mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-07 22:42:04 +00:00
Mostly performance and bug fixes, plus some cleanups. The one new
feature this merge window is a new ioctl EXT4_IOC_SWAP_BOOT which allows installation of a hidden inode designed for boot loaders. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.12 (GNU/Linux) iQIcBAABCAAGBQJRfpDwAAoJENNvdpvBGATwsjwP/17V3AN6XTEhZK80p3/qN5YD N2QIHeyYIqCGpczLs2TQEkxWX6nqpDggAPXY956wvgeEMQV+pQ+DLO4Ol9+p5WD2 hrklleYhtOjFQ3Xh4lqrEi5FzKVzWagVDLqgUjALJ+D+hkDB7ZQT/fm2sH45rzot xBp3aVqANU8GqAAbEW4/Ng9ZGMx0dpANiU2svbjM71sv2dCLFmWAkz+GgZsMbuJZ vnKIZP6I6plwP3LuZzEbVCA7F2PzC4ywEOJKjIEvgHpX6uMDR3FX8pD5Dlo/o6e2 eP+KLnD43mJMxBmTn22x5Sm0N6DUzJCEELRJWB9wCZoLdEvbEWRxT3qsPXfLWelG 2jj4bImXF2CqYEsJww5FV2WdXXdnuM57pZym5vMZGAFyKPSCJobA4Y3XRdXkBfXf Gq/cFoPYv2EcBIhz3zrRj+tbY8esbO9wOnF6+x+AF10BspD2V7nuoVdWVhOf0A3v i9ifGPwLk3e3xHr9oXheo7IWn52oviZeyD77d7D7MLhgn+xU4LaVhW3R63Q+mI4D 0TXG25R1CVcE7wyFy3gqSVXSCDO0JcQBL5LgcL+wAGXcHPAXqBpN2DFTPo+9fJH2 g3YMwr+wMbci1XRVQ2vdTt/nBZYjOCh6PgRmg3KjTz11Ra5EsjQvYjKWYwqf2RGn QhCgbzd/qtZfNJztLvr7 =GCT2 -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "Mostly performance and bug fixes, plus some cleanups. The one new feature this merge window is a new ioctl EXT4_IOC_SWAP_BOOT which allows installation of a hidden inode designed for boot loaders." * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (50 commits) ext4: fix type-widening bug in inode table readahead code ext4: add check for inodes_count overflow in new resize ioctl ext4: fix Kconfig documentation for CONFIG_EXT4_DEBUG ext4: fix online resizing for ext3-compat file systems jbd2: trace when lock_buffer in do_get_write_access takes a long time ext4: mark metadata blocks using bh flags buffer: add BH_Prio and BH_Meta flags ext4: mark all metadata I/O with REQ_META ext4: fix readdir error in case inline_data+^dir_index. ext4: fix readdir error in the case of inline_data+dir_index jbd2: use kmem_cache_zalloc instead of kmem_cache_alloc/memset ext4: mext_insert_extents should update extent block checksum ext4: move quota initialization out of inode allocation transaction ext4: reserve xattr index for Rich ACL support jbd2: reduce journal_head size ext4: clear buffer_uninit flag when submitting IO ext4: use io_end for multiple bios ext4: make ext4_bio_write_page() use BH_Async_Write flags ext4: Use kstrtoul() instead of parse_strtoul() ext4: defragmentation code cleanup ...
This commit is contained in:
commit
149b306089
@ -494,6 +494,17 @@ Files in /sys/fs/ext4/<devname>
|
||||
session_write_kbytes This file is read-only and shows the number of
|
||||
kilobytes of data that have been written to this
|
||||
filesystem since it was mounted.
|
||||
|
||||
reserved_clusters This is RW file and contains number of reserved
|
||||
clusters in the file system which will be used
|
||||
in the specific situations to avoid costly
|
||||
zeroout, unexpected ENOSPC, or possible data
|
||||
loss. The default is 2% or 4096 clusters,
|
||||
whichever is smaller and this can be changed
|
||||
however it can never exceed number of clusters
|
||||
in the file system. If there is not enough space
|
||||
for the reserved space when mounting the file
|
||||
mount will _not_ fail.
|
||||
..............................................................................
|
||||
|
||||
Ioctls
|
||||
@ -587,6 +598,16 @@ Table of Ext4 specific ioctls
|
||||
bitmaps and inode table, the userspace tool thus
|
||||
just passes the new number of blocks.
|
||||
|
||||
EXT4_IOC_SWAP_BOOT Swap i_blocks and associated attributes
|
||||
(like i_blocks, i_size, i_flags, ...) from
|
||||
the specified inode with inode
|
||||
EXT4_BOOT_LOADER_INO (#5). This is typically
|
||||
used to store a boot loader in a secure part of
|
||||
the filesystem, where it can't be changed by a
|
||||
normal user by accident.
|
||||
The data blocks of the previous boot loader
|
||||
will be associated with the given inode.
|
||||
|
||||
..............................................................................
|
||||
|
||||
References
|
||||
|
@ -2987,6 +2987,11 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
|
||||
/* Take care of bh's that straddle the end of the device */
|
||||
guard_bh_eod(rw, bio, bh);
|
||||
|
||||
if (buffer_meta(bh))
|
||||
rw |= REQ_META;
|
||||
if (buffer_prio(bh))
|
||||
rw |= REQ_PRIO;
|
||||
|
||||
bio_get(bio);
|
||||
submit_bio(rw, bio);
|
||||
|
||||
|
@ -71,4 +71,5 @@ config EXT4_DEBUG
|
||||
Enables run-time debugging support for the ext4 filesystem.
|
||||
|
||||
If you select Y here, then you will be able to turn on debugging
|
||||
with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug"
|
||||
with a command such as:
|
||||
echo 1 > /sys/module/ext4/parameters/mballoc_debug
|
||||
|
@ -29,6 +29,23 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
|
||||
* balloc.c contains the blocks allocation and deallocation routines
|
||||
*/
|
||||
|
||||
/*
|
||||
* Calculate block group number for a given block number
|
||||
*/
|
||||
ext4_group_t ext4_get_group_number(struct super_block *sb,
|
||||
ext4_fsblk_t block)
|
||||
{
|
||||
ext4_group_t group;
|
||||
|
||||
if (test_opt2(sb, STD_GROUP_SIZE))
|
||||
group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
|
||||
block) >>
|
||||
(EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3);
|
||||
else
|
||||
ext4_get_group_no_and_offset(sb, block, &group, NULL);
|
||||
return group;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the block group number and offset into the block/cluster
|
||||
* allocation bitmap, given a block number
|
||||
@ -49,14 +66,18 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
|
||||
|
||||
}
|
||||
|
||||
static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
|
||||
ext4_group_t block_group)
|
||||
/*
|
||||
* Check whether the 'block' lives within the 'block_group'. Returns 1 if so
|
||||
* and 0 otherwise.
|
||||
*/
|
||||
static inline int ext4_block_in_group(struct super_block *sb,
|
||||
ext4_fsblk_t block,
|
||||
ext4_group_t block_group)
|
||||
{
|
||||
ext4_group_t actual_group;
|
||||
ext4_get_group_no_and_offset(sb, block, &actual_group, NULL);
|
||||
if (actual_group == block_group)
|
||||
return 1;
|
||||
return 0;
|
||||
|
||||
actual_group = ext4_get_group_number(sb, block);
|
||||
return (actual_group == block_group) ? 1 : 0;
|
||||
}
|
||||
|
||||
/* Return the number of clusters used for file system metadata; this
|
||||
@ -420,7 +441,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
|
||||
trace_ext4_read_block_bitmap_load(sb, block_group);
|
||||
bh->b_end_io = ext4_end_bitmap_read;
|
||||
get_bh(bh);
|
||||
submit_bh(READ, bh);
|
||||
submit_bh(READ | REQ_META | REQ_PRIO, bh);
|
||||
return bh;
|
||||
verify:
|
||||
ext4_validate_block_bitmap(sb, desc, block_group, bh);
|
||||
@ -478,20 +499,22 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
|
||||
static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
|
||||
s64 nclusters, unsigned int flags)
|
||||
{
|
||||
s64 free_clusters, dirty_clusters, root_clusters;
|
||||
s64 free_clusters, dirty_clusters, rsv, resv_clusters;
|
||||
struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
|
||||
struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
|
||||
|
||||
free_clusters = percpu_counter_read_positive(fcc);
|
||||
dirty_clusters = percpu_counter_read_positive(dcc);
|
||||
resv_clusters = atomic64_read(&sbi->s_resv_clusters);
|
||||
|
||||
/*
|
||||
* r_blocks_count should always be multiple of the cluster ratio so
|
||||
* we are safe to do a plane bit shift only.
|
||||
*/
|
||||
root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
|
||||
rsv = (ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits) +
|
||||
resv_clusters;
|
||||
|
||||
if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
|
||||
if (free_clusters - (nclusters + rsv + dirty_clusters) <
|
||||
EXT4_FREECLUSTERS_WATERMARK) {
|
||||
free_clusters = percpu_counter_sum_positive(fcc);
|
||||
dirty_clusters = percpu_counter_sum_positive(dcc);
|
||||
@ -499,15 +522,21 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
|
||||
/* Check whether we have space after accounting for current
|
||||
* dirty clusters & root reserved clusters.
|
||||
*/
|
||||
if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
|
||||
if (free_clusters >= (rsv + nclusters + dirty_clusters))
|
||||
return 1;
|
||||
|
||||
/* Hm, nope. Are (enough) root reserved clusters available? */
|
||||
if (uid_eq(sbi->s_resuid, current_fsuid()) ||
|
||||
(!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
|
||||
capable(CAP_SYS_RESOURCE) ||
|
||||
(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
|
||||
(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
|
||||
|
||||
if (free_clusters >= (nclusters + dirty_clusters +
|
||||
resv_clusters))
|
||||
return 1;
|
||||
}
|
||||
/* No free blocks. Let's see if we can dip into reserved pool */
|
||||
if (flags & EXT4_MB_USE_RESERVED) {
|
||||
if (free_clusters >= (nclusters + dirty_clusters))
|
||||
return 1;
|
||||
}
|
||||
|
@ -46,7 +46,8 @@ static int is_dx_dir(struct inode *inode)
|
||||
if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
|
||||
EXT4_FEATURE_COMPAT_DIR_INDEX) &&
|
||||
((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
|
||||
((inode->i_size >> sb->s_blocksize_bits) == 1)))
|
||||
((inode->i_size >> sb->s_blocksize_bits) == 1) ||
|
||||
ext4_has_inline_data(inode)))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@ -115,14 +116,6 @@ static int ext4_readdir(struct file *filp,
|
||||
int ret = 0;
|
||||
int dir_has_error = 0;
|
||||
|
||||
if (ext4_has_inline_data(inode)) {
|
||||
int has_inline_data = 1;
|
||||
ret = ext4_read_inline_dir(filp, dirent, filldir,
|
||||
&has_inline_data);
|
||||
if (has_inline_data)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (is_dx_dir(inode)) {
|
||||
err = ext4_dx_readdir(filp, dirent, filldir);
|
||||
if (err != ERR_BAD_DX_DIR) {
|
||||
@ -136,6 +129,15 @@ static int ext4_readdir(struct file *filp,
|
||||
ext4_clear_inode_flag(file_inode(filp),
|
||||
EXT4_INODE_INDEX);
|
||||
}
|
||||
|
||||
if (ext4_has_inline_data(inode)) {
|
||||
int has_inline_data = 1;
|
||||
ret = ext4_read_inline_dir(filp, dirent, filldir,
|
||||
&has_inline_data);
|
||||
if (has_inline_data)
|
||||
return ret;
|
||||
}
|
||||
|
||||
stored = 0;
|
||||
offset = filp->f_pos & (sb->s_blocksize - 1);
|
||||
|
||||
|
101
fs/ext4/ext4.h
101
fs/ext4/ext4.h
@ -121,6 +121,8 @@ typedef unsigned int ext4_group_t;
|
||||
#define EXT4_MB_STREAM_ALLOC 0x0800
|
||||
/* Use reserved root blocks if needed */
|
||||
#define EXT4_MB_USE_ROOT_BLOCKS 0x1000
|
||||
/* Use blocks from reserved pool */
|
||||
#define EXT4_MB_USE_RESERVED 0x2000
|
||||
|
||||
struct ext4_allocation_request {
|
||||
/* target inode for block we're allocating */
|
||||
@ -196,19 +198,8 @@ struct mpage_da_data {
|
||||
#define EXT4_IO_END_ERROR 0x0002
|
||||
#define EXT4_IO_END_DIRECT 0x0004
|
||||
|
||||
struct ext4_io_page {
|
||||
struct page *p_page;
|
||||
atomic_t p_count;
|
||||
};
|
||||
|
||||
#define MAX_IO_PAGES 128
|
||||
|
||||
/*
|
||||
* For converting uninitialized extents on a work queue.
|
||||
*
|
||||
* 'page' is only used from the writepage() path; 'pages' is only used for
|
||||
* buffered writes; they are used to keep page references until conversion
|
||||
* takes place. For AIO/DIO, neither field is filled in.
|
||||
*/
|
||||
typedef struct ext4_io_end {
|
||||
struct list_head list; /* per-file finished IO list */
|
||||
@ -218,15 +209,13 @@ typedef struct ext4_io_end {
|
||||
ssize_t size; /* size of the extent */
|
||||
struct kiocb *iocb; /* iocb struct for AIO */
|
||||
int result; /* error value for AIO */
|
||||
int num_io_pages; /* for writepages() */
|
||||
struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */
|
||||
atomic_t count; /* reference counter */
|
||||
} ext4_io_end_t;
|
||||
|
||||
struct ext4_io_submit {
|
||||
int io_op;
|
||||
struct bio *io_bio;
|
||||
ext4_io_end_t *io_end;
|
||||
struct ext4_io_page *io_page;
|
||||
sector_t io_next_block;
|
||||
};
|
||||
|
||||
@ -403,7 +392,7 @@ struct flex_groups {
|
||||
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
|
||||
|
||||
#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
|
||||
#define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */
|
||||
#define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */
|
||||
|
||||
/* Flags that should be inherited by new inodes from their parent. */
|
||||
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
|
||||
@ -557,9 +546,8 @@ enum {
|
||||
#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002
|
||||
#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\
|
||||
EXT4_GET_BLOCKS_CREATE)
|
||||
/* Caller is from the delayed allocation writeout path,
|
||||
so set the magic i_delalloc_reserve_flag after taking the
|
||||
inode allocation semaphore for */
|
||||
/* Caller is from the delayed allocation writeout path
|
||||
* finally doing the actual allocation of delayed blocks */
|
||||
#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
|
||||
/* caller is from the direct IO path, request to creation of an
|
||||
unitialized extents if not allocated, split the uninitialized
|
||||
@ -571,8 +559,9 @@ enum {
|
||||
/* Convert extent to initialized after IO complete */
|
||||
#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
|
||||
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
|
||||
/* Punch out blocks of an extent */
|
||||
#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020
|
||||
/* Eventual metadata allocation (due to growing extent tree)
|
||||
* should not fail, so try to use reserved blocks for that.*/
|
||||
#define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020
|
||||
/* Don't normalize allocation size (used for fallocate) */
|
||||
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
|
||||
/* Request will not result in inode size update (user for fallocate) */
|
||||
@ -616,6 +605,7 @@ enum {
|
||||
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
|
||||
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
|
||||
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
|
||||
#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
|
||||
|
||||
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
|
||||
/*
|
||||
@ -949,7 +939,7 @@ struct ext4_inode_info {
|
||||
#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
|
||||
|
||||
/*
|
||||
* Mount flags
|
||||
* Mount flags set via mount options or defaults
|
||||
*/
|
||||
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
|
||||
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
|
||||
@ -981,8 +971,16 @@ struct ext4_inode_info {
|
||||
#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
|
||||
#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
|
||||
|
||||
/*
|
||||
* Mount flags set either automatically (could not be set by mount option)
|
||||
* based on per file system feature or property or in special cases such as
|
||||
* distinguishing between explicit mount option definition and default.
|
||||
*/
|
||||
#define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
|
||||
specified delalloc */
|
||||
#define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group
|
||||
size of blocksize * 8
|
||||
blocks */
|
||||
|
||||
#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
|
||||
~EXT4_MOUNT_##opt
|
||||
@ -1179,6 +1177,7 @@ struct ext4_sb_info {
|
||||
unsigned int s_mount_flags;
|
||||
unsigned int s_def_mount_opt;
|
||||
ext4_fsblk_t s_sb_block;
|
||||
atomic64_t s_resv_clusters;
|
||||
kuid_t s_resuid;
|
||||
kgid_t s_resgid;
|
||||
unsigned short s_mount_state;
|
||||
@ -1333,6 +1332,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
|
||||
return ino == EXT4_ROOT_INO ||
|
||||
ino == EXT4_USR_QUOTA_INO ||
|
||||
ino == EXT4_GRP_QUOTA_INO ||
|
||||
ino == EXT4_BOOT_LOADER_INO ||
|
||||
ino == EXT4_JOURNAL_INO ||
|
||||
ino == EXT4_RESIZE_INO ||
|
||||
(ino >= EXT4_FIRST_INO(sb) &&
|
||||
@ -1374,6 +1374,7 @@ enum {
|
||||
EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
|
||||
nolocking */
|
||||
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
|
||||
EXT4_STATE_ORDERED_MODE, /* data=ordered mode */
|
||||
};
|
||||
|
||||
#define EXT4_INODE_BIT_FNS(name, field, offset) \
|
||||
@ -1784,9 +1785,6 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
|
||||
*/
|
||||
#define ERR_BAD_DX_DIR -75000
|
||||
|
||||
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
|
||||
ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
|
||||
|
||||
/*
|
||||
* Timeout and state flag for lazy initialization inode thread.
|
||||
*/
|
||||
@ -1908,6 +1906,13 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
|
||||
struct buffer_head *bh);
|
||||
|
||||
/* balloc.c */
|
||||
extern void ext4_get_group_no_and_offset(struct super_block *sb,
|
||||
ext4_fsblk_t blocknr,
|
||||
ext4_group_t *blockgrpp,
|
||||
ext4_grpblk_t *offsetp);
|
||||
extern ext4_group_t ext4_get_group_number(struct super_block *sb,
|
||||
ext4_fsblk_t block);
|
||||
|
||||
extern void ext4_validate_block_bitmap(struct super_block *sb,
|
||||
struct ext4_group_desc *desc,
|
||||
unsigned int block_group,
|
||||
@ -2108,8 +2113,9 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
|
||||
unsigned long nr_segs);
|
||||
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
|
||||
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
|
||||
extern void ext4_ind_truncate(struct inode *inode);
|
||||
extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
|
||||
extern void ext4_ind_truncate(handle_t *, struct inode *inode);
|
||||
extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_lblk_t first, ext4_lblk_t stop);
|
||||
|
||||
/* ioctl.c */
|
||||
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
|
||||
@ -2117,6 +2123,7 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
|
||||
|
||||
/* migrate.c */
|
||||
extern int ext4_ext_migrate(struct inode *);
|
||||
extern int ext4_ind_migrate(struct inode *inode);
|
||||
|
||||
/* namei.c */
|
||||
extern int ext4_dirent_csum_verify(struct inode *inode,
|
||||
@ -2511,6 +2518,11 @@ extern int ext4_try_create_inline_dir(handle_t *handle,
|
||||
extern int ext4_read_inline_dir(struct file *filp,
|
||||
void *dirent, filldir_t filldir,
|
||||
int *has_inline_data);
|
||||
extern int htree_inlinedir_to_tree(struct file *dir_file,
|
||||
struct inode *dir, ext4_lblk_t block,
|
||||
struct dx_hash_info *hinfo,
|
||||
__u32 start_hash, __u32 start_minor_hash,
|
||||
int *has_inline_data);
|
||||
extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
|
||||
const struct qstr *d_name,
|
||||
struct ext4_dir_entry_2 **res_dir,
|
||||
@ -2547,6 +2559,24 @@ extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
|
||||
extern int ext4_handle_dirty_dirent_node(handle_t *handle,
|
||||
struct inode *inode,
|
||||
struct buffer_head *bh);
|
||||
#define S_SHIFT 12
|
||||
static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
|
||||
[S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE,
|
||||
[S_IFDIR >> S_SHIFT] = EXT4_FT_DIR,
|
||||
[S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV,
|
||||
[S_IFBLK >> S_SHIFT] = EXT4_FT_BLKDEV,
|
||||
[S_IFIFO >> S_SHIFT] = EXT4_FT_FIFO,
|
||||
[S_IFSOCK >> S_SHIFT] = EXT4_FT_SOCK,
|
||||
[S_IFLNK >> S_SHIFT] = EXT4_FT_SYMLINK,
|
||||
};
|
||||
|
||||
static inline void ext4_set_de_type(struct super_block *sb,
|
||||
struct ext4_dir_entry_2 *de,
|
||||
umode_t mode) {
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
|
||||
de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
|
||||
}
|
||||
|
||||
|
||||
/* symlink.c */
|
||||
extern const struct inode_operations ext4_symlink_inode_operations;
|
||||
@ -2573,9 +2603,9 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
|
||||
int chunk);
|
||||
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||
struct ext4_map_blocks *map, int flags);
|
||||
extern void ext4_ext_truncate(struct inode *);
|
||||
extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
|
||||
loff_t length);
|
||||
extern void ext4_ext_truncate(handle_t *, struct inode *);
|
||||
extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
|
||||
ext4_lblk_t end);
|
||||
extern void ext4_ext_init(struct super_block *);
|
||||
extern void ext4_ext_release(struct super_block *);
|
||||
extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
|
||||
@ -2609,17 +2639,26 @@ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
|
||||
|
||||
/* move_extent.c */
|
||||
extern void ext4_double_down_write_data_sem(struct inode *first,
|
||||
struct inode *second);
|
||||
extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
|
||||
struct inode *donor_inode);
|
||||
void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2);
|
||||
void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2);
|
||||
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
||||
__u64 start_orig, __u64 start_donor,
|
||||
__u64 len, __u64 *moved_len);
|
||||
|
||||
/* page-io.c */
|
||||
extern int __init ext4_init_pageio(void);
|
||||
extern void ext4_add_complete_io(ext4_io_end_t *io_end);
|
||||
extern void ext4_exit_pageio(void);
|
||||
extern void ext4_ioend_shutdown(struct inode *);
|
||||
extern void ext4_free_io_end(ext4_io_end_t *io);
|
||||
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
|
||||
extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
|
||||
extern int ext4_put_io_end(ext4_io_end_t *io_end);
|
||||
extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
|
||||
extern void ext4_io_submit_init(struct ext4_io_submit *io,
|
||||
struct writeback_control *wbc);
|
||||
extern void ext4_end_io_work(struct work_struct *work);
|
||||
extern void ext4_io_submit(struct ext4_io_submit *io);
|
||||
extern int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
|
@ -270,5 +270,10 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
|
||||
0xffff);
|
||||
}
|
||||
|
||||
#define ext4_ext_dirty(handle, inode, path) \
|
||||
__ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
|
||||
int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
|
||||
struct inode *inode, struct ext4_ext_path *path);
|
||||
|
||||
#endif /* _EXT4_EXTENTS */
|
||||
|
||||
|
@ -43,6 +43,8 @@ handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
|
||||
{
|
||||
journal_t *journal;
|
||||
|
||||
might_sleep();
|
||||
|
||||
trace_ext4_journal_start(sb, nblocks, _RET_IP_);
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return ERR_PTR(-EROFS);
|
||||
@ -113,6 +115,8 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (ext4_handle_valid(handle)) {
|
||||
err = jbd2_journal_get_write_access(handle, bh);
|
||||
if (err)
|
||||
@ -209,6 +213,10 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
might_sleep();
|
||||
|
||||
set_buffer_meta(bh);
|
||||
set_buffer_prio(bh);
|
||||
if (ext4_handle_valid(handle)) {
|
||||
err = jbd2_journal_dirty_metadata(handle, bh);
|
||||
if (err) {
|
||||
|
@ -29,11 +29,13 @@
|
||||
* block to complete the transaction.
|
||||
*
|
||||
* For extents-enabled fs we may have to allocate and modify up to
|
||||
* 5 levels of tree + root which are stored in the inode. */
|
||||
* 5 levels of tree, data block (for each of these we need bitmap + group
|
||||
* summaries), root which is stored in the inode, sb
|
||||
*/
|
||||
|
||||
#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
|
||||
(EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
|
||||
? 27U : 8U)
|
||||
? 20U : 8U)
|
||||
|
||||
/* Extended attribute operations touch at most two data buffers,
|
||||
* two bitmap buffers, and two group summaries, in addition to the inode
|
||||
@ -194,16 +196,20 @@ static inline void ext4_journal_callback_add(handle_t *handle,
|
||||
* ext4_journal_callback_del: delete a registered callback
|
||||
* @handle: active journal transaction handle on which callback was registered
|
||||
* @jce: registered journal callback entry to unregister
|
||||
* Return true if object was sucessfully removed
|
||||
*/
|
||||
static inline void ext4_journal_callback_del(handle_t *handle,
|
||||
static inline bool ext4_journal_callback_try_del(handle_t *handle,
|
||||
struct ext4_journal_cb_entry *jce)
|
||||
{
|
||||
bool deleted;
|
||||
struct ext4_sb_info *sbi =
|
||||
EXT4_SB(handle->h_transaction->t_journal->j_private);
|
||||
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
deleted = !list_empty(&jce->jce_list);
|
||||
list_del_init(&jce->jce_list);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
return deleted;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -157,11 +157,8 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
|
||||
* - ENOMEM
|
||||
* - EIO
|
||||
*/
|
||||
#define ext4_ext_dirty(handle, inode, path) \
|
||||
__ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
|
||||
static int __ext4_ext_dirty(const char *where, unsigned int line,
|
||||
handle_t *handle, struct inode *inode,
|
||||
struct ext4_ext_path *path)
|
||||
int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
|
||||
struct inode *inode, struct ext4_ext_path *path)
|
||||
{
|
||||
int err;
|
||||
if (path->p_bh) {
|
||||
@ -1813,39 +1810,101 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
|
||||
}
|
||||
depth = ext_depth(inode);
|
||||
ex = path[depth].p_ext;
|
||||
eh = path[depth].p_hdr;
|
||||
if (unlikely(path[depth].p_hdr == NULL)) {
|
||||
EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* try to insert block into found extent and return */
|
||||
if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
|
||||
&& ext4_can_extents_be_merged(inode, ex, newext)) {
|
||||
ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n",
|
||||
ext4_ext_is_uninitialized(newext),
|
||||
ext4_ext_get_actual_len(newext),
|
||||
le32_to_cpu(ex->ee_block),
|
||||
ext4_ext_is_uninitialized(ex),
|
||||
ext4_ext_get_actual_len(ex),
|
||||
ext4_ext_pblock(ex));
|
||||
err = ext4_ext_get_access(handle, inode, path + depth);
|
||||
if (err)
|
||||
return err;
|
||||
if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) {
|
||||
|
||||
/*
|
||||
* ext4_can_extents_be_merged should have checked that either
|
||||
* both extents are uninitialized, or both aren't. Thus we
|
||||
* need to check only one of them here.
|
||||
* Try to see whether we should rather test the extent on
|
||||
* right from ex, or from the left of ex. This is because
|
||||
* ext4_ext_find_extent() can return either extent on the
|
||||
* left, or on the right from the searched position. This
|
||||
* will make merging more effective.
|
||||
*/
|
||||
if (ext4_ext_is_uninitialized(ex))
|
||||
uninitialized = 1;
|
||||
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
|
||||
if (ex < EXT_LAST_EXTENT(eh) &&
|
||||
(le32_to_cpu(ex->ee_block) +
|
||||
ext4_ext_get_actual_len(ex) <
|
||||
le32_to_cpu(newext->ee_block))) {
|
||||
ex += 1;
|
||||
goto prepend;
|
||||
} else if ((ex > EXT_FIRST_EXTENT(eh)) &&
|
||||
(le32_to_cpu(newext->ee_block) +
|
||||
ext4_ext_get_actual_len(newext) <
|
||||
le32_to_cpu(ex->ee_block)))
|
||||
ex -= 1;
|
||||
|
||||
/* Try to append newex to the ex */
|
||||
if (ext4_can_extents_be_merged(inode, ex, newext)) {
|
||||
ext_debug("append [%d]%d block to %u:[%d]%d"
|
||||
"(from %llu)\n",
|
||||
ext4_ext_is_uninitialized(newext),
|
||||
ext4_ext_get_actual_len(newext),
|
||||
le32_to_cpu(ex->ee_block),
|
||||
ext4_ext_is_uninitialized(ex),
|
||||
ext4_ext_get_actual_len(ex),
|
||||
ext4_ext_pblock(ex));
|
||||
err = ext4_ext_get_access(handle, inode,
|
||||
path + depth);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* ext4_can_extents_be_merged should have checked
|
||||
* that either both extents are uninitialized, or
|
||||
* both aren't. Thus we need to check only one of
|
||||
* them here.
|
||||
*/
|
||||
if (ext4_ext_is_uninitialized(ex))
|
||||
uninitialized = 1;
|
||||
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
|
||||
+ ext4_ext_get_actual_len(newext));
|
||||
if (uninitialized)
|
||||
ext4_ext_mark_uninitialized(ex);
|
||||
eh = path[depth].p_hdr;
|
||||
nearex = ex;
|
||||
goto merge;
|
||||
if (uninitialized)
|
||||
ext4_ext_mark_uninitialized(ex);
|
||||
eh = path[depth].p_hdr;
|
||||
nearex = ex;
|
||||
goto merge;
|
||||
}
|
||||
|
||||
prepend:
|
||||
/* Try to prepend newex to the ex */
|
||||
if (ext4_can_extents_be_merged(inode, newext, ex)) {
|
||||
ext_debug("prepend %u[%d]%d block to %u:[%d]%d"
|
||||
"(from %llu)\n",
|
||||
le32_to_cpu(newext->ee_block),
|
||||
ext4_ext_is_uninitialized(newext),
|
||||
ext4_ext_get_actual_len(newext),
|
||||
le32_to_cpu(ex->ee_block),
|
||||
ext4_ext_is_uninitialized(ex),
|
||||
ext4_ext_get_actual_len(ex),
|
||||
ext4_ext_pblock(ex));
|
||||
err = ext4_ext_get_access(handle, inode,
|
||||
path + depth);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* ext4_can_extents_be_merged should have checked
|
||||
* that either both extents are uninitialized, or
|
||||
* both aren't. Thus we need to check only one of
|
||||
* them here.
|
||||
*/
|
||||
if (ext4_ext_is_uninitialized(ex))
|
||||
uninitialized = 1;
|
||||
ex->ee_block = newext->ee_block;
|
||||
ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
|
||||
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
|
||||
+ ext4_ext_get_actual_len(newext));
|
||||
if (uninitialized)
|
||||
ext4_ext_mark_uninitialized(ex);
|
||||
eh = path[depth].p_hdr;
|
||||
nearex = ex;
|
||||
goto merge;
|
||||
}
|
||||
}
|
||||
|
||||
depth = ext_depth(inode);
|
||||
@ -1880,8 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
|
||||
* There is no free space in the found leaf.
|
||||
* We're gonna add a new leaf in the tree.
|
||||
*/
|
||||
if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT)
|
||||
flags = EXT4_MB_USE_ROOT_BLOCKS;
|
||||
if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL)
|
||||
flags = EXT4_MB_USE_RESERVED;
|
||||
err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
|
||||
if (err)
|
||||
goto cleanup;
|
||||
@ -2599,8 +2658,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
|
||||
ext4_lblk_t end)
|
||||
int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
|
||||
ext4_lblk_t end)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
int depth = ext_depth(inode);
|
||||
@ -2667,12 +2726,14 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
|
||||
|
||||
/*
|
||||
* Split the extent in two so that 'end' is the last
|
||||
* block in the first new extent
|
||||
* block in the first new extent. Also we should not
|
||||
* fail removing space due to ENOSPC so try to use
|
||||
* reserved block if that happens.
|
||||
*/
|
||||
err = ext4_split_extent_at(handle, inode, path,
|
||||
end + 1, split_flag,
|
||||
EXT4_GET_BLOCKS_PRE_IO |
|
||||
EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
|
||||
end + 1, split_flag,
|
||||
EXT4_GET_BLOCKS_PRE_IO |
|
||||
EXT4_GET_BLOCKS_METADATA_NOFAIL);
|
||||
|
||||
if (err < 0)
|
||||
goto out;
|
||||
@ -3147,35 +3208,35 @@ static int ext4_split_extent(handle_t *handle,
|
||||
static int ext4_ext_convert_to_initialized(handle_t *handle,
|
||||
struct inode *inode,
|
||||
struct ext4_map_blocks *map,
|
||||
struct ext4_ext_path *path)
|
||||
struct ext4_ext_path *path,
|
||||
int flags)
|
||||
{
|
||||
struct ext4_sb_info *sbi;
|
||||
struct ext4_extent_header *eh;
|
||||
struct ext4_map_blocks split_map;
|
||||
struct ext4_extent zero_ex;
|
||||
struct ext4_extent *ex;
|
||||
struct ext4_extent *ex, *abut_ex;
|
||||
ext4_lblk_t ee_block, eof_block;
|
||||
unsigned int ee_len, depth;
|
||||
int allocated, max_zeroout = 0;
|
||||
unsigned int ee_len, depth, map_len = map->m_len;
|
||||
int allocated = 0, max_zeroout = 0;
|
||||
int err = 0;
|
||||
int split_flag = 0;
|
||||
|
||||
ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
|
||||
"block %llu, max_blocks %u\n", inode->i_ino,
|
||||
(unsigned long long)map->m_lblk, map->m_len);
|
||||
(unsigned long long)map->m_lblk, map_len);
|
||||
|
||||
sbi = EXT4_SB(inode->i_sb);
|
||||
eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
|
||||
inode->i_sb->s_blocksize_bits;
|
||||
if (eof_block < map->m_lblk + map->m_len)
|
||||
eof_block = map->m_lblk + map->m_len;
|
||||
if (eof_block < map->m_lblk + map_len)
|
||||
eof_block = map->m_lblk + map_len;
|
||||
|
||||
depth = ext_depth(inode);
|
||||
eh = path[depth].p_hdr;
|
||||
ex = path[depth].p_ext;
|
||||
ee_block = le32_to_cpu(ex->ee_block);
|
||||
ee_len = ext4_ext_get_actual_len(ex);
|
||||
allocated = ee_len - (map->m_lblk - ee_block);
|
||||
zero_ex.ee_len = 0;
|
||||
|
||||
trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
|
||||
@ -3186,77 +3247,121 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
|
||||
|
||||
/*
|
||||
* Attempt to transfer newly initialized blocks from the currently
|
||||
* uninitialized extent to its left neighbor. This is much cheaper
|
||||
* uninitialized extent to its neighbor. This is much cheaper
|
||||
* than an insertion followed by a merge as those involve costly
|
||||
* memmove() calls. This is the common case in steady state for
|
||||
* workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append
|
||||
* writes.
|
||||
* memmove() calls. Transferring to the left is the common case in
|
||||
* steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
|
||||
* followed by append writes.
|
||||
*
|
||||
* Limitations of the current logic:
|
||||
* - L1: we only deal with writes at the start of the extent.
|
||||
* The approach could be extended to writes at the end
|
||||
* of the extent but this scenario was deemed less common.
|
||||
* - L2: we do not deal with writes covering the whole extent.
|
||||
* - L1: we do not deal with writes covering the whole extent.
|
||||
* This would require removing the extent if the transfer
|
||||
* is possible.
|
||||
* - L3: we only attempt to merge with an extent stored in the
|
||||
* - L2: we only attempt to merge with an extent stored in the
|
||||
* same extent tree node.
|
||||
*/
|
||||
if ((map->m_lblk == ee_block) && /*L1*/
|
||||
(map->m_len < ee_len) && /*L2*/
|
||||
(ex > EXT_FIRST_EXTENT(eh))) { /*L3*/
|
||||
struct ext4_extent *prev_ex;
|
||||
if ((map->m_lblk == ee_block) &&
|
||||
/* See if we can merge left */
|
||||
(map_len < ee_len) && /*L1*/
|
||||
(ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
|
||||
ext4_lblk_t prev_lblk;
|
||||
ext4_fsblk_t prev_pblk, ee_pblk;
|
||||
unsigned int prev_len, write_len;
|
||||
unsigned int prev_len;
|
||||
|
||||
prev_ex = ex - 1;
|
||||
prev_lblk = le32_to_cpu(prev_ex->ee_block);
|
||||
prev_len = ext4_ext_get_actual_len(prev_ex);
|
||||
prev_pblk = ext4_ext_pblock(prev_ex);
|
||||
abut_ex = ex - 1;
|
||||
prev_lblk = le32_to_cpu(abut_ex->ee_block);
|
||||
prev_len = ext4_ext_get_actual_len(abut_ex);
|
||||
prev_pblk = ext4_ext_pblock(abut_ex);
|
||||
ee_pblk = ext4_ext_pblock(ex);
|
||||
write_len = map->m_len;
|
||||
|
||||
/*
|
||||
* A transfer of blocks from 'ex' to 'prev_ex' is allowed
|
||||
* A transfer of blocks from 'ex' to 'abut_ex' is allowed
|
||||
* upon those conditions:
|
||||
* - C1: prev_ex is initialized,
|
||||
* - C2: prev_ex is logically abutting ex,
|
||||
* - C3: prev_ex is physically abutting ex,
|
||||
* - C4: prev_ex can receive the additional blocks without
|
||||
* - C1: abut_ex is initialized,
|
||||
* - C2: abut_ex is logically abutting ex,
|
||||
* - C3: abut_ex is physically abutting ex,
|
||||
* - C4: abut_ex can receive the additional blocks without
|
||||
* overflowing the (initialized) length limit.
|
||||
*/
|
||||
if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/
|
||||
if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
|
||||
((prev_lblk + prev_len) == ee_block) && /*C2*/
|
||||
((prev_pblk + prev_len) == ee_pblk) && /*C3*/
|
||||
(prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/
|
||||
(prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
|
||||
err = ext4_ext_get_access(handle, inode, path + depth);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
trace_ext4_ext_convert_to_initialized_fastpath(inode,
|
||||
map, ex, prev_ex);
|
||||
map, ex, abut_ex);
|
||||
|
||||
/* Shift the start of ex by 'write_len' blocks */
|
||||
ex->ee_block = cpu_to_le32(ee_block + write_len);
|
||||
ext4_ext_store_pblock(ex, ee_pblk + write_len);
|
||||
ex->ee_len = cpu_to_le16(ee_len - write_len);
|
||||
/* Shift the start of ex by 'map_len' blocks */
|
||||
ex->ee_block = cpu_to_le32(ee_block + map_len);
|
||||
ext4_ext_store_pblock(ex, ee_pblk + map_len);
|
||||
ex->ee_len = cpu_to_le16(ee_len - map_len);
|
||||
ext4_ext_mark_uninitialized(ex); /* Restore the flag */
|
||||
|
||||
/* Extend prev_ex by 'write_len' blocks */
|
||||
prev_ex->ee_len = cpu_to_le16(prev_len + write_len);
|
||||
|
||||
/* Mark the block containing both extents as dirty */
|
||||
ext4_ext_dirty(handle, inode, path + depth);
|
||||
|
||||
/* Update path to point to the right extent */
|
||||
path[depth].p_ext = prev_ex;
|
||||
/* Extend abut_ex by 'map_len' blocks */
|
||||
abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
|
||||
|
||||
/* Result: number of initialized blocks past m_lblk */
|
||||
allocated = write_len;
|
||||
goto out;
|
||||
allocated = map_len;
|
||||
}
|
||||
} else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
|
||||
(map_len < ee_len) && /*L1*/
|
||||
ex < EXT_LAST_EXTENT(eh)) { /*L2*/
|
||||
/* See if we can merge right */
|
||||
ext4_lblk_t next_lblk;
|
||||
ext4_fsblk_t next_pblk, ee_pblk;
|
||||
unsigned int next_len;
|
||||
|
||||
abut_ex = ex + 1;
|
||||
next_lblk = le32_to_cpu(abut_ex->ee_block);
|
||||
next_len = ext4_ext_get_actual_len(abut_ex);
|
||||
next_pblk = ext4_ext_pblock(abut_ex);
|
||||
ee_pblk = ext4_ext_pblock(ex);
|
||||
|
||||
/*
|
||||
* A transfer of blocks from 'ex' to 'abut_ex' is allowed
|
||||
* upon those conditions:
|
||||
* - C1: abut_ex is initialized,
|
||||
* - C2: abut_ex is logically abutting ex,
|
||||
* - C3: abut_ex is physically abutting ex,
|
||||
* - C4: abut_ex can receive the additional blocks without
|
||||
* overflowing the (initialized) length limit.
|
||||
*/
|
||||
if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
|
||||
((map->m_lblk + map_len) == next_lblk) && /*C2*/
|
||||
((ee_pblk + ee_len) == next_pblk) && /*C3*/
|
||||
(next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
|
||||
err = ext4_ext_get_access(handle, inode, path + depth);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
trace_ext4_ext_convert_to_initialized_fastpath(inode,
|
||||
map, ex, abut_ex);
|
||||
|
||||
/* Shift the start of abut_ex by 'map_len' blocks */
|
||||
abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
|
||||
ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
|
||||
ex->ee_len = cpu_to_le16(ee_len - map_len);
|
||||
ext4_ext_mark_uninitialized(ex); /* Restore the flag */
|
||||
|
||||
/* Extend abut_ex by 'map_len' blocks */
|
||||
abut_ex->ee_len = cpu_to_le16(next_len + map_len);
|
||||
|
||||
/* Result: number of initialized blocks past m_lblk */
|
||||
allocated = map_len;
|
||||
}
|
||||
}
|
||||
if (allocated) {
|
||||
/* Mark the block containing both extents as dirty */
|
||||
ext4_ext_dirty(handle, inode, path + depth);
|
||||
|
||||
/* Update path to point to the right extent */
|
||||
path[depth].p_ext = abut_ex;
|
||||
goto out;
|
||||
} else
|
||||
allocated = ee_len - (map->m_lblk - ee_block);
|
||||
|
||||
WARN_ON(map->m_lblk < ee_block);
|
||||
/*
|
||||
@ -3330,7 +3435,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
|
||||
}
|
||||
|
||||
allocated = ext4_split_extent(handle, inode, path,
|
||||
&split_map, split_flag, 0);
|
||||
&split_map, split_flag, flags);
|
||||
if (allocated < 0)
|
||||
err = allocated;
|
||||
|
||||
@ -3650,6 +3755,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
|
||||
flags, allocated);
|
||||
ext4_ext_show_leaf(inode, path);
|
||||
|
||||
/*
|
||||
* When writing into uninitialized space, we should not fail to
|
||||
* allocate metadata blocks for the new extent block if needed.
|
||||
*/
|
||||
flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
|
||||
|
||||
trace_ext4_ext_handle_uninitialized_extents(inode, map, flags,
|
||||
allocated, newblock);
|
||||
|
||||
@ -3713,7 +3824,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
|
||||
}
|
||||
|
||||
/* buffered write, writepage time, convert*/
|
||||
ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
|
||||
ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags);
|
||||
if (ret >= 0)
|
||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||
out:
|
||||
@ -4257,47 +4368,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||
return err ? err : allocated;
|
||||
}
|
||||
|
||||
void ext4_ext_truncate(struct inode *inode)
|
||||
void ext4_ext_truncate(handle_t *handle, struct inode *inode)
|
||||
{
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
ext4_lblk_t last_block;
|
||||
handle_t *handle;
|
||||
loff_t page_len;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* finish any pending end_io work so we won't run the risk of
|
||||
* converting any truncated blocks to initialized later
|
||||
*/
|
||||
ext4_flush_unwritten_io(inode);
|
||||
|
||||
/*
|
||||
* probably first extent we're gonna free will be last in block
|
||||
*/
|
||||
err = ext4_writepage_trans_blocks(inode);
|
||||
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, err);
|
||||
if (IS_ERR(handle))
|
||||
return;
|
||||
|
||||
if (inode->i_size % PAGE_CACHE_SIZE != 0) {
|
||||
page_len = PAGE_CACHE_SIZE -
|
||||
(inode->i_size & (PAGE_CACHE_SIZE - 1));
|
||||
|
||||
err = ext4_discard_partial_page_buffers(handle,
|
||||
mapping, inode->i_size, page_len, 0);
|
||||
|
||||
if (err)
|
||||
goto out_stop;
|
||||
}
|
||||
|
||||
if (ext4_orphan_add(handle, inode))
|
||||
goto out_stop;
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
/*
|
||||
* TODO: optimization is possible here.
|
||||
* Probably we need not scan at all,
|
||||
@ -4313,29 +4389,6 @@ void ext4_ext_truncate(struct inode *inode)
|
||||
err = ext4_es_remove_extent(inode, last_block,
|
||||
EXT_MAX_BLOCKS - last_block);
|
||||
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
|
||||
|
||||
/* In a multi-transaction truncate, we only make the final
|
||||
* transaction synchronous.
|
||||
*/
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
|
||||
out_stop:
|
||||
/*
|
||||
* If this was a simple ftruncate() and the file will remain alive,
|
||||
* then we need to clear up the orphan record which we created above.
|
||||
* However, if this was a real unlink then we were called by
|
||||
* ext4_delete_inode(), and we allow that function to clean up the
|
||||
* orphan info for us.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(handle, inode);
|
||||
|
||||
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
}
|
||||
|
||||
static void ext4_falloc_update_inode(struct inode *inode,
|
||||
@ -4623,187 +4676,6 @@ static int ext4_xattr_fiemap(struct inode *inode,
|
||||
return (error < 0 ? error : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_ext_punch_hole
|
||||
*
|
||||
* Punches a hole of "length" bytes in a file starting
|
||||
* at byte "offset"
|
||||
*
|
||||
* @inode: The inode of the file to punch a hole in
|
||||
* @offset: The starting byte offset of the hole
|
||||
* @length: The length of the hole
|
||||
*
|
||||
* Returns the number of blocks removed or negative on err
|
||||
*/
|
||||
int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
ext4_lblk_t first_block, stop_block;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
handle_t *handle;
|
||||
loff_t first_page, last_page, page_len;
|
||||
loff_t first_page_offset, last_page_offset;
|
||||
int credits, err = 0;
|
||||
|
||||
/*
|
||||
* Write out all dirty pages to avoid race conditions
|
||||
* Then release them.
|
||||
*/
|
||||
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
||||
err = filemap_write_and_wait_range(mapping,
|
||||
offset, offset + length - 1);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
/* It's not possible punch hole on append only file */
|
||||
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
|
||||
err = -EPERM;
|
||||
goto out_mutex;
|
||||
}
|
||||
if (IS_SWAPFILE(inode)) {
|
||||
err = -ETXTBSY;
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
/* No need to punch hole beyond i_size */
|
||||
if (offset >= inode->i_size)
|
||||
goto out_mutex;
|
||||
|
||||
/*
|
||||
* If the hole extends beyond i_size, set the hole
|
||||
* to end after the page that contains i_size
|
||||
*/
|
||||
if (offset + length > inode->i_size) {
|
||||
length = inode->i_size +
|
||||
PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
|
||||
offset;
|
||||
}
|
||||
|
||||
first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
|
||||
last_page = (offset + length) >> PAGE_CACHE_SHIFT;
|
||||
|
||||
first_page_offset = first_page << PAGE_CACHE_SHIFT;
|
||||
last_page_offset = last_page << PAGE_CACHE_SHIFT;
|
||||
|
||||
/* Now release the pages */
|
||||
if (last_page_offset > first_page_offset) {
|
||||
truncate_pagecache_range(inode, first_page_offset,
|
||||
last_page_offset - 1);
|
||||
}
|
||||
|
||||
/* Wait all existing dio workers, newcomers will block on i_mutex */
|
||||
ext4_inode_block_unlocked_dio(inode);
|
||||
err = ext4_flush_unwritten_io(inode);
|
||||
if (err)
|
||||
goto out_dio;
|
||||
inode_dio_wait(inode);
|
||||
|
||||
credits = ext4_writepage_trans_blocks(inode);
|
||||
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
|
||||
if (IS_ERR(handle)) {
|
||||
err = PTR_ERR(handle);
|
||||
goto out_dio;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Now we need to zero out the non-page-aligned data in the
|
||||
* pages at the start and tail of the hole, and unmap the buffer
|
||||
* heads for the block aligned regions of the page that were
|
||||
* completely zeroed.
|
||||
*/
|
||||
if (first_page > last_page) {
|
||||
/*
|
||||
* If the file space being truncated is contained within a page
|
||||
* just zero out and unmap the middle of that page
|
||||
*/
|
||||
err = ext4_discard_partial_page_buffers(handle,
|
||||
mapping, offset, length, 0);
|
||||
|
||||
if (err)
|
||||
goto out;
|
||||
} else {
|
||||
/*
|
||||
* zero out and unmap the partial page that contains
|
||||
* the start of the hole
|
||||
*/
|
||||
page_len = first_page_offset - offset;
|
||||
if (page_len > 0) {
|
||||
err = ext4_discard_partial_page_buffers(handle, mapping,
|
||||
offset, page_len, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* zero out and unmap the partial page that contains
|
||||
* the end of the hole
|
||||
*/
|
||||
page_len = offset + length - last_page_offset;
|
||||
if (page_len > 0) {
|
||||
err = ext4_discard_partial_page_buffers(handle, mapping,
|
||||
last_page_offset, page_len, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If i_size is contained in the last page, we need to
|
||||
* unmap and zero the partial page after i_size
|
||||
*/
|
||||
if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
|
||||
inode->i_size % PAGE_CACHE_SIZE != 0) {
|
||||
|
||||
page_len = PAGE_CACHE_SIZE -
|
||||
(inode->i_size & (PAGE_CACHE_SIZE - 1));
|
||||
|
||||
if (page_len > 0) {
|
||||
err = ext4_discard_partial_page_buffers(handle,
|
||||
mapping, inode->i_size, page_len, 0);
|
||||
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
first_block = (offset + sb->s_blocksize - 1) >>
|
||||
EXT4_BLOCK_SIZE_BITS(sb);
|
||||
stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
|
||||
|
||||
/* If there are no blocks to remove, return now */
|
||||
if (first_block >= stop_block)
|
||||
goto out;
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
err = ext4_es_remove_extent(inode, first_block,
|
||||
stop_block - first_block);
|
||||
err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
|
||||
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
|
||||
out:
|
||||
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
out_dio:
|
||||
ext4_inode_resume_unlocked_dio(inode);
|
||||
out_mutex:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
__u64 start, __u64 len)
|
||||
{
|
||||
|
@ -166,8 +166,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
if (journal->j_flags & JBD2_BARRIER &&
|
||||
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
|
||||
needs_barrier = true;
|
||||
jbd2_log_start_commit(journal, commit_tid);
|
||||
ret = jbd2_log_wait_commit(journal, commit_tid);
|
||||
ret = jbd2_complete_transaction(journal, commit_tid);
|
||||
if (needs_barrier) {
|
||||
err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
if (!ret)
|
||||
|
@ -166,7 +166,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
|
||||
trace_ext4_load_inode_bitmap(sb, block_group);
|
||||
bh->b_end_io = ext4_end_bitmap_read;
|
||||
get_bh(bh);
|
||||
submit_bh(READ, bh);
|
||||
submit_bh(READ | REQ_META | REQ_PRIO, bh);
|
||||
wait_on_buffer(bh);
|
||||
if (!buffer_uptodate(bh)) {
|
||||
put_bh(bh);
|
||||
@ -666,6 +666,23 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
ei = EXT4_I(inode);
|
||||
sbi = EXT4_SB(sb);
|
||||
|
||||
/*
|
||||
* Initalize owners and quota early so that we don't have to account
|
||||
* for quota initialization worst case in standard inode creating
|
||||
* transaction
|
||||
*/
|
||||
if (owner) {
|
||||
inode->i_mode = mode;
|
||||
i_uid_write(inode, owner[0]);
|
||||
i_gid_write(inode, owner[1]);
|
||||
} else if (test_opt(sb, GRPID)) {
|
||||
inode->i_mode = mode;
|
||||
inode->i_uid = current_fsuid();
|
||||
inode->i_gid = dir->i_gid;
|
||||
} else
|
||||
inode_init_owner(inode, dir, mode);
|
||||
dquot_initialize(inode);
|
||||
|
||||
if (!goal)
|
||||
goal = sbi->s_inode_goal;
|
||||
|
||||
@ -697,7 +714,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
|
||||
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
|
||||
if (!gdp)
|
||||
goto fail;
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Check free inodes count before loading bitmap.
|
||||
@ -711,7 +728,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
brelse(inode_bitmap_bh);
|
||||
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
|
||||
if (!inode_bitmap_bh)
|
||||
goto fail;
|
||||
goto out;
|
||||
|
||||
repeat_in_this_group:
|
||||
ino = ext4_find_next_zero_bit((unsigned long *)
|
||||
@ -733,13 +750,16 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
handle_type, nblocks);
|
||||
if (IS_ERR(handle)) {
|
||||
err = PTR_ERR(handle);
|
||||
goto fail;
|
||||
ext4_std_error(sb, err);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
|
||||
err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
|
||||
if (err)
|
||||
goto fail;
|
||||
if (err) {
|
||||
ext4_std_error(sb, err);
|
||||
goto out;
|
||||
}
|
||||
ext4_lock_group(sb, group);
|
||||
ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
|
||||
ext4_unlock_group(sb, group);
|
||||
@ -755,8 +775,10 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
got:
|
||||
BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
|
||||
err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
|
||||
if (err)
|
||||
goto fail;
|
||||
if (err) {
|
||||
ext4_std_error(sb, err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* We may have to initialize the block bitmap if it isn't already */
|
||||
if (ext4_has_group_desc_csum(sb) &&
|
||||
@ -768,7 +790,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
err = ext4_journal_get_write_access(handle, block_bitmap_bh);
|
||||
if (err) {
|
||||
brelse(block_bitmap_bh);
|
||||
goto fail;
|
||||
ext4_std_error(sb, err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
|
||||
@ -787,14 +810,18 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
ext4_unlock_group(sb, group);
|
||||
brelse(block_bitmap_bh);
|
||||
|
||||
if (err)
|
||||
goto fail;
|
||||
if (err) {
|
||||
ext4_std_error(sb, err);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
BUFFER_TRACE(group_desc_bh, "get_write_access");
|
||||
err = ext4_journal_get_write_access(handle, group_desc_bh);
|
||||
if (err)
|
||||
goto fail;
|
||||
if (err) {
|
||||
ext4_std_error(sb, err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Update the relevant bg descriptor fields */
|
||||
if (ext4_has_group_desc_csum(sb)) {
|
||||
@ -840,8 +867,10 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
|
||||
BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
|
||||
err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
|
||||
if (err)
|
||||
goto fail;
|
||||
if (err) {
|
||||
ext4_std_error(sb, err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
percpu_counter_dec(&sbi->s_freeinodes_counter);
|
||||
if (S_ISDIR(mode))
|
||||
@ -851,16 +880,6 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
flex_group = ext4_flex_group(sbi, group);
|
||||
atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
|
||||
}
|
||||
if (owner) {
|
||||
inode->i_mode = mode;
|
||||
i_uid_write(inode, owner[0]);
|
||||
i_gid_write(inode, owner[1]);
|
||||
} else if (test_opt(sb, GRPID)) {
|
||||
inode->i_mode = mode;
|
||||
inode->i_uid = current_fsuid();
|
||||
inode->i_gid = dir->i_gid;
|
||||
} else
|
||||
inode_init_owner(inode, dir, mode);
|
||||
|
||||
inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
|
||||
/* This is the optimal IO size (for stat), not the fs block size */
|
||||
@ -889,7 +908,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
* twice.
|
||||
*/
|
||||
err = -EIO;
|
||||
goto fail;
|
||||
ext4_error(sb, "failed to insert inode %lu: doubly allocated?",
|
||||
inode->i_ino);
|
||||
goto out;
|
||||
}
|
||||
spin_lock(&sbi->s_next_gen_lock);
|
||||
inode->i_generation = sbi->s_next_generation++;
|
||||
@ -899,7 +920,6 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
|
||||
__u32 csum;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
__le32 inum = cpu_to_le32(inode->i_ino);
|
||||
__le32 gen = cpu_to_le32(inode->i_generation);
|
||||
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
|
||||
@ -918,7 +938,6 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
|
||||
|
||||
ret = inode;
|
||||
dquot_initialize(inode);
|
||||
err = dquot_alloc_inode(inode);
|
||||
if (err)
|
||||
goto fail_drop;
|
||||
@ -952,24 +971,17 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||
|
||||
ext4_debug("allocating inode %lu\n", inode->i_ino);
|
||||
trace_ext4_allocate_inode(inode, dir, mode);
|
||||
goto really_out;
|
||||
fail:
|
||||
ext4_std_error(sb, err);
|
||||
out:
|
||||
iput(inode);
|
||||
ret = ERR_PTR(err);
|
||||
really_out:
|
||||
brelse(inode_bitmap_bh);
|
||||
return ret;
|
||||
|
||||
fail_free_drop:
|
||||
dquot_free_inode(inode);
|
||||
|
||||
fail_drop:
|
||||
dquot_drop(inode);
|
||||
inode->i_flags |= S_NOQUOTA;
|
||||
clear_nlink(inode);
|
||||
unlock_new_inode(inode);
|
||||
out:
|
||||
dquot_drop(inode);
|
||||
inode->i_flags |= S_NOQUOTA;
|
||||
iput(inode);
|
||||
brelse(inode_bitmap_bh);
|
||||
return ERR_PTR(err);
|
||||
|
@ -291,131 +291,6 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_alloc_blocks: multiple allocate blocks needed for a branch
|
||||
* @handle: handle for this transaction
|
||||
* @inode: inode which needs allocated blocks
|
||||
* @iblock: the logical block to start allocated at
|
||||
* @goal: preferred physical block of allocation
|
||||
* @indirect_blks: the number of blocks need to allocate for indirect
|
||||
* blocks
|
||||
* @blks: number of desired blocks
|
||||
* @new_blocks: on return it will store the new block numbers for
|
||||
* the indirect blocks(if needed) and the first direct block,
|
||||
* @err: on return it will store the error code
|
||||
*
|
||||
* This function will return the number of blocks allocated as
|
||||
* requested by the passed-in parameters.
|
||||
*/
|
||||
static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_lblk_t iblock, ext4_fsblk_t goal,
|
||||
int indirect_blks, int blks,
|
||||
ext4_fsblk_t new_blocks[4], int *err)
|
||||
{
|
||||
struct ext4_allocation_request ar;
|
||||
int target, i;
|
||||
unsigned long count = 0, blk_allocated = 0;
|
||||
int index = 0;
|
||||
ext4_fsblk_t current_block = 0;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Here we try to allocate the requested multiple blocks at once,
|
||||
* on a best-effort basis.
|
||||
* To build a branch, we should allocate blocks for
|
||||
* the indirect blocks(if not allocated yet), and at least
|
||||
* the first direct block of this branch. That's the
|
||||
* minimum number of blocks need to allocate(required)
|
||||
*/
|
||||
/* first we try to allocate the indirect blocks */
|
||||
target = indirect_blks;
|
||||
while (target > 0) {
|
||||
count = target;
|
||||
/* allocating blocks for indirect blocks and direct blocks */
|
||||
current_block = ext4_new_meta_blocks(handle, inode, goal,
|
||||
0, &count, err);
|
||||
if (*err)
|
||||
goto failed_out;
|
||||
|
||||
if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
|
||||
EXT4_ERROR_INODE(inode,
|
||||
"current_block %llu + count %lu > %d!",
|
||||
current_block, count,
|
||||
EXT4_MAX_BLOCK_FILE_PHYS);
|
||||
*err = -EIO;
|
||||
goto failed_out;
|
||||
}
|
||||
|
||||
target -= count;
|
||||
/* allocate blocks for indirect blocks */
|
||||
while (index < indirect_blks && count) {
|
||||
new_blocks[index++] = current_block++;
|
||||
count--;
|
||||
}
|
||||
if (count > 0) {
|
||||
/*
|
||||
* save the new block number
|
||||
* for the first direct block
|
||||
*/
|
||||
new_blocks[index] = current_block;
|
||||
WARN(1, KERN_INFO "%s returned more blocks than "
|
||||
"requested\n", __func__);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
target = blks - count ;
|
||||
blk_allocated = count;
|
||||
if (!target)
|
||||
goto allocated;
|
||||
/* Now allocate data blocks */
|
||||
memset(&ar, 0, sizeof(ar));
|
||||
ar.inode = inode;
|
||||
ar.goal = goal;
|
||||
ar.len = target;
|
||||
ar.logical = iblock;
|
||||
if (S_ISREG(inode->i_mode))
|
||||
/* enable in-core preallocation only for regular files */
|
||||
ar.flags = EXT4_MB_HINT_DATA;
|
||||
|
||||
current_block = ext4_mb_new_blocks(handle, &ar, err);
|
||||
if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
|
||||
EXT4_ERROR_INODE(inode,
|
||||
"current_block %llu + ar.len %d > %d!",
|
||||
current_block, ar.len,
|
||||
EXT4_MAX_BLOCK_FILE_PHYS);
|
||||
*err = -EIO;
|
||||
goto failed_out;
|
||||
}
|
||||
|
||||
if (*err && (target == blks)) {
|
||||
/*
|
||||
* if the allocation failed and we didn't allocate
|
||||
* any blocks before
|
||||
*/
|
||||
goto failed_out;
|
||||
}
|
||||
if (!*err) {
|
||||
if (target == blks) {
|
||||
/*
|
||||
* save the new block number
|
||||
* for the first direct block
|
||||
*/
|
||||
new_blocks[index] = current_block;
|
||||
}
|
||||
blk_allocated += ar.len;
|
||||
}
|
||||
allocated:
|
||||
/* total number of blocks allocated for direct blocks */
|
||||
ret = blk_allocated;
|
||||
*err = 0;
|
||||
return ret;
|
||||
failed_out:
|
||||
for (i = 0; i < index; i++)
|
||||
ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_alloc_branch - allocate and set up a chain of blocks.
|
||||
* @handle: handle for this transaction
|
||||
@ -448,60 +323,59 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
|
||||
int *blks, ext4_fsblk_t goal,
|
||||
ext4_lblk_t *offsets, Indirect *branch)
|
||||
{
|
||||
int blocksize = inode->i_sb->s_blocksize;
|
||||
int i, n = 0;
|
||||
int err = 0;
|
||||
struct buffer_head *bh;
|
||||
int num;
|
||||
ext4_fsblk_t new_blocks[4];
|
||||
ext4_fsblk_t current_block;
|
||||
struct ext4_allocation_request ar;
|
||||
struct buffer_head * bh;
|
||||
ext4_fsblk_t b, new_blocks[4];
|
||||
__le32 *p;
|
||||
int i, j, err, len = 1;
|
||||
|
||||
num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
|
||||
*blks, new_blocks, &err);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
branch[0].key = cpu_to_le32(new_blocks[0]);
|
||||
/*
|
||||
* metadata blocks and data blocks are allocated.
|
||||
* Set up for the direct block allocation
|
||||
*/
|
||||
for (n = 1; n <= indirect_blks; n++) {
|
||||
/*
|
||||
* Get buffer_head for parent block, zero it out
|
||||
* and set the pointer to new one, then send
|
||||
* parent to disk.
|
||||
*/
|
||||
bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
|
||||
memset(&ar, 0, sizeof(ar));
|
||||
ar.inode = inode;
|
||||
ar.len = *blks;
|
||||
ar.logical = iblock;
|
||||
if (S_ISREG(inode->i_mode))
|
||||
ar.flags = EXT4_MB_HINT_DATA;
|
||||
|
||||
for (i = 0; i <= indirect_blks; i++) {
|
||||
if (i == indirect_blks) {
|
||||
ar.goal = goal;
|
||||
new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
|
||||
} else
|
||||
goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode,
|
||||
goal, 0, NULL, &err);
|
||||
if (err) {
|
||||
i--;
|
||||
goto failed;
|
||||
}
|
||||
branch[i].key = cpu_to_le32(new_blocks[i]);
|
||||
if (i == 0)
|
||||
continue;
|
||||
|
||||
bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]);
|
||||
if (unlikely(!bh)) {
|
||||
err = -ENOMEM;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
branch[n].bh = bh;
|
||||
lock_buffer(bh);
|
||||
BUFFER_TRACE(bh, "call get_create_access");
|
||||
err = ext4_journal_get_create_access(handle, bh);
|
||||
if (err) {
|
||||
/* Don't brelse(bh) here; it's done in
|
||||
* ext4_journal_forget() below */
|
||||
unlock_buffer(bh);
|
||||
goto failed;
|
||||
}
|
||||
|
||||
memset(bh->b_data, 0, blocksize);
|
||||
branch[n].p = (__le32 *) bh->b_data + offsets[n];
|
||||
branch[n].key = cpu_to_le32(new_blocks[n]);
|
||||
*branch[n].p = branch[n].key;
|
||||
if (n == indirect_blks) {
|
||||
current_block = new_blocks[n];
|
||||
/*
|
||||
* End of chain, update the last new metablock of
|
||||
* the chain to point to the new allocated
|
||||
* data blocks numbers
|
||||
*/
|
||||
for (i = 1; i < num; i++)
|
||||
*(branch[n].p + i) = cpu_to_le32(++current_block);
|
||||
}
|
||||
memset(bh->b_data, 0, bh->b_size);
|
||||
p = branch[i].p = (__le32 *) bh->b_data + offsets[i];
|
||||
b = new_blocks[i];
|
||||
|
||||
if (i == indirect_blks)
|
||||
len = ar.len;
|
||||
for (j = 0; j < len; j++)
|
||||
*p++ = cpu_to_le32(b++);
|
||||
|
||||
BUFFER_TRACE(bh, "marking uptodate");
|
||||
set_buffer_uptodate(bh);
|
||||
unlock_buffer(bh);
|
||||
@ -511,25 +385,16 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
|
||||
if (err)
|
||||
goto failed;
|
||||
}
|
||||
*blks = num;
|
||||
return err;
|
||||
*blks = ar.len;
|
||||
return 0;
|
||||
failed:
|
||||
/* Allocation failed, free what we already allocated */
|
||||
ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
|
||||
for (i = 1; i <= n ; i++) {
|
||||
/*
|
||||
* branch[i].bh is newly allocated, so there is no
|
||||
* need to revoke the block, which is why we don't
|
||||
* need to set EXT4_FREE_BLOCKS_METADATA.
|
||||
*/
|
||||
ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
|
||||
EXT4_FREE_BLOCKS_FORGET);
|
||||
for (; i >= 0; i--) {
|
||||
if (i != indirect_blks && branch[i].bh)
|
||||
ext4_forget(handle, 1, inode, branch[i].bh,
|
||||
branch[i].bh->b_blocknr);
|
||||
ext4_free_blocks(handle, inode, NULL, new_blocks[i],
|
||||
(i == indirect_blks) ? ar.len : 1, 0);
|
||||
}
|
||||
for (i = n+1; i < indirect_blks; i++)
|
||||
ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
|
||||
|
||||
ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -941,26 +806,9 @@ int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk)
|
||||
* be able to restart the transaction at a conventient checkpoint to make
|
||||
* sure we don't overflow the journal.
|
||||
*
|
||||
* start_transaction gets us a new handle for a truncate transaction,
|
||||
* and extend_transaction tries to extend the existing one a bit. If
|
||||
* Try to extend this transaction for the purposes of truncation. If
|
||||
* extend fails, we need to propagate the failure up and restart the
|
||||
* transaction in the top-level truncate loop. --sct
|
||||
*/
|
||||
static handle_t *start_transaction(struct inode *inode)
|
||||
{
|
||||
handle_t *result;
|
||||
|
||||
result = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
|
||||
ext4_blocks_for_truncate(inode));
|
||||
if (!IS_ERR(result))
|
||||
return result;
|
||||
|
||||
ext4_std_error(inode->i_sb, PTR_ERR(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to extend this transaction for the purposes of truncation.
|
||||
*
|
||||
* Returns 0 if we managed to create more room. If we can't create more
|
||||
* room, and the transaction must be restarted we return 1.
|
||||
@ -1353,68 +1201,30 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
|
||||
}
|
||||
}
|
||||
|
||||
void ext4_ind_truncate(struct inode *inode)
|
||||
void ext4_ind_truncate(handle_t *handle, struct inode *inode)
|
||||
{
|
||||
handle_t *handle;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
__le32 *i_data = ei->i_data;
|
||||
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
ext4_lblk_t offsets[4];
|
||||
Indirect chain[4];
|
||||
Indirect *partial;
|
||||
__le32 nr = 0;
|
||||
int n = 0;
|
||||
ext4_lblk_t last_block, max_block;
|
||||
loff_t page_len;
|
||||
unsigned blocksize = inode->i_sb->s_blocksize;
|
||||
int err;
|
||||
|
||||
handle = start_transaction(inode);
|
||||
if (IS_ERR(handle))
|
||||
return; /* AKPM: return what? */
|
||||
|
||||
last_block = (inode->i_size + blocksize-1)
|
||||
>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
|
||||
max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
|
||||
>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
|
||||
|
||||
if (inode->i_size % PAGE_CACHE_SIZE != 0) {
|
||||
page_len = PAGE_CACHE_SIZE -
|
||||
(inode->i_size & (PAGE_CACHE_SIZE - 1));
|
||||
|
||||
err = ext4_discard_partial_page_buffers(handle,
|
||||
mapping, inode->i_size, page_len, 0);
|
||||
|
||||
if (err)
|
||||
goto out_stop;
|
||||
}
|
||||
|
||||
if (last_block != max_block) {
|
||||
n = ext4_block_to_path(inode, last_block, offsets, NULL);
|
||||
if (n == 0)
|
||||
goto out_stop; /* error */
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* OK. This truncate is going to happen. We add the inode to the
|
||||
* orphan list, so that if this truncate spans multiple transactions,
|
||||
* and we crash, we will resume the truncate when the filesystem
|
||||
* recovers. It also marks the inode dirty, to catch the new size.
|
||||
*
|
||||
* Implication: the file must always be in a sane, consistent
|
||||
* truncatable state while each transaction commits.
|
||||
*/
|
||||
if (ext4_orphan_add(handle, inode))
|
||||
goto out_stop;
|
||||
|
||||
/*
|
||||
* From here we block out all ext4_get_block() callers who want to
|
||||
* modify the block allocation tree.
|
||||
*/
|
||||
down_write(&ei->i_data_sem);
|
||||
|
||||
ext4_discard_preallocations(inode);
|
||||
ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
|
||||
|
||||
/*
|
||||
@ -1431,7 +1241,7 @@ void ext4_ind_truncate(struct inode *inode)
|
||||
* It is unnecessary to free any data blocks if last_block is
|
||||
* equal to the indirect block limit.
|
||||
*/
|
||||
goto out_unlock;
|
||||
return;
|
||||
} else if (n == 1) { /* direct blocks */
|
||||
ext4_free_data(handle, inode, NULL, i_data+offsets[0],
|
||||
i_data + EXT4_NDIR_BLOCKS);
|
||||
@ -1491,31 +1301,6 @@ void ext4_ind_truncate(struct inode *inode)
|
||||
case EXT4_TIND_BLOCK:
|
||||
;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
up_write(&ei->i_data_sem);
|
||||
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
|
||||
/*
|
||||
* In a multi-transaction truncate, we only make the final transaction
|
||||
* synchronous
|
||||
*/
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
out_stop:
|
||||
/*
|
||||
* If this was a simple ftruncate(), and the file will remain alive
|
||||
* then we need to clear up the orphan record which we created above.
|
||||
* However, if this was a real unlink then we were called by
|
||||
* ext4_delete_inode(), and we allow that function to clean up the
|
||||
* orphan info for us.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(handle, inode);
|
||||
|
||||
ext4_journal_stop(handle);
|
||||
trace_ext4_truncate_exit(inode);
|
||||
}
|
||||
|
||||
static int free_hole_blocks(handle_t *handle, struct inode *inode,
|
||||
@ -1569,8 +1354,8 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_lblk_t first, ext4_lblk_t stop)
|
||||
int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_lblk_t first, ext4_lblk_t stop)
|
||||
{
|
||||
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
|
||||
int level, ret = 0;
|
||||
@ -1604,157 +1389,3 @@ static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
ext4_lblk_t first_block, stop_block;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
handle_t *handle = NULL;
|
||||
loff_t first_page, last_page, page_len;
|
||||
loff_t first_page_offset, last_page_offset;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* Write out all dirty pages to avoid race conditions
|
||||
* Then release them.
|
||||
*/
|
||||
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
||||
err = filemap_write_and_wait_range(mapping,
|
||||
offset, offset + length - 1);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
/* It's not possible punch hole on append only file */
|
||||
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
|
||||
err = -EPERM;
|
||||
goto out_mutex;
|
||||
}
|
||||
if (IS_SWAPFILE(inode)) {
|
||||
err = -ETXTBSY;
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
/* No need to punch hole beyond i_size */
|
||||
if (offset >= inode->i_size)
|
||||
goto out_mutex;
|
||||
|
||||
/*
|
||||
* If the hole extents beyond i_size, set the hole
|
||||
* to end after the page that contains i_size
|
||||
*/
|
||||
if (offset + length > inode->i_size) {
|
||||
length = inode->i_size +
|
||||
PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
|
||||
offset;
|
||||
}
|
||||
|
||||
first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
|
||||
last_page = (offset + length) >> PAGE_CACHE_SHIFT;
|
||||
|
||||
first_page_offset = first_page << PAGE_CACHE_SHIFT;
|
||||
last_page_offset = last_page << PAGE_CACHE_SHIFT;
|
||||
|
||||
/* Now release the pages */
|
||||
if (last_page_offset > first_page_offset) {
|
||||
truncate_pagecache_range(inode, first_page_offset,
|
||||
last_page_offset - 1);
|
||||
}
|
||||
|
||||
/* Wait all existing dio works, newcomers will block on i_mutex */
|
||||
inode_dio_wait(inode);
|
||||
|
||||
handle = start_transaction(inode);
|
||||
if (IS_ERR(handle))
|
||||
goto out_mutex;
|
||||
|
||||
/*
|
||||
* Now we need to zero out the non-page-aligned data in the
|
||||
* pages at the start and tail of the hole, and unmap the buffer
|
||||
* heads for the block aligned regions of the page that were
|
||||
* completely zerod.
|
||||
*/
|
||||
if (first_page > last_page) {
|
||||
/*
|
||||
* If the file space being truncated is contained within a page
|
||||
* just zero out and unmap the middle of that page
|
||||
*/
|
||||
err = ext4_discard_partial_page_buffers(handle,
|
||||
mapping, offset, length, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
} else {
|
||||
/*
|
||||
* Zero out and unmap the paritial page that contains
|
||||
* the start of the hole
|
||||
*/
|
||||
page_len = first_page_offset - offset;
|
||||
if (page_len > 0) {
|
||||
err = ext4_discard_partial_page_buffers(handle, mapping,
|
||||
offset, page_len, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero out and unmap the partial page that contains
|
||||
* the end of the hole
|
||||
*/
|
||||
page_len = offset + length - last_page_offset;
|
||||
if (page_len > 0) {
|
||||
err = ext4_discard_partial_page_buffers(handle, mapping,
|
||||
last_page_offset, page_len, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If i_size contained in the last page, we need to
|
||||
* unmap and zero the paritial page after i_size
|
||||
*/
|
||||
if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
|
||||
inode->i_size % PAGE_CACHE_SIZE != 0) {
|
||||
page_len = PAGE_CACHE_SIZE -
|
||||
(inode->i_size & (PAGE_CACHE_SIZE - 1));
|
||||
if (page_len > 0) {
|
||||
err = ext4_discard_partial_page_buffers(handle,
|
||||
mapping, inode->i_size, page_len, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
first_block = (offset + sb->s_blocksize - 1) >>
|
||||
EXT4_BLOCK_SIZE_BITS(sb);
|
||||
stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
|
||||
|
||||
if (first_block >= stop_block)
|
||||
goto out;
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
err = ext4_es_remove_extent(inode, first_block,
|
||||
stop_block - first_block);
|
||||
err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
|
||||
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
|
||||
out:
|
||||
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
out_mutex:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
178
fs/ext4/inline.c
178
fs/ext4/inline.c
@ -19,7 +19,8 @@
|
||||
|
||||
#define EXT4_XATTR_SYSTEM_DATA "data"
|
||||
#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
|
||||
#define EXT4_INLINE_DOTDOT_SIZE 4
|
||||
#define EXT4_INLINE_DOTDOT_OFFSET 2
|
||||
#define EXT4_INLINE_DOTDOT_SIZE 4
|
||||
|
||||
int ext4_get_inline_size(struct inode *inode)
|
||||
{
|
||||
@ -1289,6 +1290,120 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function fills a red-black tree with information from an
|
||||
* inlined dir. It returns the number directory entries loaded
|
||||
* into the tree. If there is an error it is returned in err.
|
||||
*/
|
||||
int htree_inlinedir_to_tree(struct file *dir_file,
|
||||
struct inode *dir, ext4_lblk_t block,
|
||||
struct dx_hash_info *hinfo,
|
||||
__u32 start_hash, __u32 start_minor_hash,
|
||||
int *has_inline_data)
|
||||
{
|
||||
int err = 0, count = 0;
|
||||
unsigned int parent_ino;
|
||||
int pos;
|
||||
struct ext4_dir_entry_2 *de;
|
||||
struct inode *inode = file_inode(dir_file);
|
||||
int ret, inline_size = 0;
|
||||
struct ext4_iloc iloc;
|
||||
void *dir_buf = NULL;
|
||||
struct ext4_dir_entry_2 fake;
|
||||
|
||||
ret = ext4_get_inode_loc(inode, &iloc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
down_read(&EXT4_I(inode)->xattr_sem);
|
||||
if (!ext4_has_inline_data(inode)) {
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
*has_inline_data = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
inline_size = ext4_get_inline_size(inode);
|
||||
dir_buf = kmalloc(inline_size, GFP_NOFS);
|
||||
if (!dir_buf) {
|
||||
ret = -ENOMEM;
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc);
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
pos = 0;
|
||||
parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
|
||||
while (pos < inline_size) {
|
||||
/*
|
||||
* As inlined dir doesn't store any information about '.' and
|
||||
* only the inode number of '..' is stored, we have to handle
|
||||
* them differently.
|
||||
*/
|
||||
if (pos == 0) {
|
||||
fake.inode = cpu_to_le32(inode->i_ino);
|
||||
fake.name_len = 1;
|
||||
strcpy(fake.name, ".");
|
||||
fake.rec_len = ext4_rec_len_to_disk(
|
||||
EXT4_DIR_REC_LEN(fake.name_len),
|
||||
inline_size);
|
||||
ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
|
||||
de = &fake;
|
||||
pos = EXT4_INLINE_DOTDOT_OFFSET;
|
||||
} else if (pos == EXT4_INLINE_DOTDOT_OFFSET) {
|
||||
fake.inode = cpu_to_le32(parent_ino);
|
||||
fake.name_len = 2;
|
||||
strcpy(fake.name, "..");
|
||||
fake.rec_len = ext4_rec_len_to_disk(
|
||||
EXT4_DIR_REC_LEN(fake.name_len),
|
||||
inline_size);
|
||||
ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
|
||||
de = &fake;
|
||||
pos = EXT4_INLINE_DOTDOT_SIZE;
|
||||
} else {
|
||||
de = (struct ext4_dir_entry_2 *)(dir_buf + pos);
|
||||
pos += ext4_rec_len_from_disk(de->rec_len, inline_size);
|
||||
if (ext4_check_dir_entry(inode, dir_file, de,
|
||||
iloc.bh, dir_buf,
|
||||
inline_size, pos)) {
|
||||
ret = count;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ext4fs_dirhash(de->name, de->name_len, hinfo);
|
||||
if ((hinfo->hash < start_hash) ||
|
||||
((hinfo->hash == start_hash) &&
|
||||
(hinfo->minor_hash < start_minor_hash)))
|
||||
continue;
|
||||
if (de->inode == 0)
|
||||
continue;
|
||||
err = ext4_htree_store_dirent(dir_file,
|
||||
hinfo->hash, hinfo->minor_hash, de);
|
||||
if (err) {
|
||||
count = err;
|
||||
goto out;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
ret = count;
|
||||
out:
|
||||
kfree(dir_buf);
|
||||
brelse(iloc.bh);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* So this function is called when the volume is mkfsed with
|
||||
* dir_index disabled. In order to keep f_pos persistent
|
||||
* after we convert from an inlined dir to a blocked based,
|
||||
* we just pretend that we are a normal dir and return the
|
||||
* offset as if '.' and '..' really take place.
|
||||
*
|
||||
*/
|
||||
int ext4_read_inline_dir(struct file *filp,
|
||||
void *dirent, filldir_t filldir,
|
||||
int *has_inline_data)
|
||||
@ -1302,6 +1417,7 @@ int ext4_read_inline_dir(struct file *filp,
|
||||
int ret, inline_size = 0;
|
||||
struct ext4_iloc iloc;
|
||||
void *dir_buf = NULL;
|
||||
int dotdot_offset, dotdot_size, extra_offset, extra_size;
|
||||
|
||||
ret = ext4_get_inode_loc(inode, &iloc);
|
||||
if (ret)
|
||||
@ -1330,8 +1446,21 @@ int ext4_read_inline_dir(struct file *filp,
|
||||
sb = inode->i_sb;
|
||||
stored = 0;
|
||||
parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
|
||||
offset = filp->f_pos;
|
||||
|
||||
while (!error && !stored && filp->f_pos < inode->i_size) {
|
||||
/*
|
||||
* dotdot_offset and dotdot_size is the real offset and
|
||||
* size for ".." and "." if the dir is block based while
|
||||
* the real size for them are only EXT4_INLINE_DOTDOT_SIZE.
|
||||
* So we will use extra_offset and extra_size to indicate them
|
||||
* during the inline dir iteration.
|
||||
*/
|
||||
dotdot_offset = EXT4_DIR_REC_LEN(1);
|
||||
dotdot_size = dotdot_offset + EXT4_DIR_REC_LEN(2);
|
||||
extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE;
|
||||
extra_size = extra_offset + inline_size;
|
||||
|
||||
while (!error && !stored && filp->f_pos < extra_size) {
|
||||
revalidate:
|
||||
/*
|
||||
* If the version has changed since the last call to
|
||||
@ -1340,15 +1469,23 @@ int ext4_read_inline_dir(struct file *filp,
|
||||
* dir to make sure.
|
||||
*/
|
||||
if (filp->f_version != inode->i_version) {
|
||||
for (i = 0;
|
||||
i < inode->i_size && i < offset;) {
|
||||
for (i = 0; i < extra_size && i < offset;) {
|
||||
/*
|
||||
* "." is with offset 0 and
|
||||
* ".." is dotdot_offset.
|
||||
*/
|
||||
if (!i) {
|
||||
/* skip "." and ".." if needed. */
|
||||
i += EXT4_INLINE_DOTDOT_SIZE;
|
||||
i = dotdot_offset;
|
||||
continue;
|
||||
} else if (i == dotdot_offset) {
|
||||
i = dotdot_size;
|
||||
continue;
|
||||
}
|
||||
/* for other entry, the real offset in
|
||||
* the buf has to be tuned accordingly.
|
||||
*/
|
||||
de = (struct ext4_dir_entry_2 *)
|
||||
(dir_buf + i);
|
||||
(dir_buf + i - extra_offset);
|
||||
/* It's too expensive to do a full
|
||||
* dirent test each time round this
|
||||
* loop, but we do have to test at
|
||||
@ -1356,43 +1493,47 @@ int ext4_read_inline_dir(struct file *filp,
|
||||
* failure will be detected in the
|
||||
* dirent test below. */
|
||||
if (ext4_rec_len_from_disk(de->rec_len,
|
||||
inline_size) < EXT4_DIR_REC_LEN(1))
|
||||
extra_size) < EXT4_DIR_REC_LEN(1))
|
||||
break;
|
||||
i += ext4_rec_len_from_disk(de->rec_len,
|
||||
inline_size);
|
||||
extra_size);
|
||||
}
|
||||
offset = i;
|
||||
filp->f_pos = offset;
|
||||
filp->f_version = inode->i_version;
|
||||
}
|
||||
|
||||
while (!error && filp->f_pos < inode->i_size) {
|
||||
while (!error && filp->f_pos < extra_size) {
|
||||
if (filp->f_pos == 0) {
|
||||
error = filldir(dirent, ".", 1, 0, inode->i_ino,
|
||||
DT_DIR);
|
||||
if (error)
|
||||
break;
|
||||
stored++;
|
||||
filp->f_pos = dotdot_offset;
|
||||
continue;
|
||||
}
|
||||
|
||||
error = filldir(dirent, "..", 2, 0, parent_ino,
|
||||
DT_DIR);
|
||||
if (filp->f_pos == dotdot_offset) {
|
||||
error = filldir(dirent, "..", 2,
|
||||
dotdot_offset,
|
||||
parent_ino, DT_DIR);
|
||||
if (error)
|
||||
break;
|
||||
stored++;
|
||||
|
||||
filp->f_pos = offset = EXT4_INLINE_DOTDOT_SIZE;
|
||||
filp->f_pos = dotdot_size;
|
||||
continue;
|
||||
}
|
||||
|
||||
de = (struct ext4_dir_entry_2 *)(dir_buf + offset);
|
||||
de = (struct ext4_dir_entry_2 *)
|
||||
(dir_buf + filp->f_pos - extra_offset);
|
||||
if (ext4_check_dir_entry(inode, filp, de,
|
||||
iloc.bh, dir_buf,
|
||||
inline_size, offset)) {
|
||||
extra_size, filp->f_pos)) {
|
||||
ret = stored;
|
||||
goto out;
|
||||
}
|
||||
offset += ext4_rec_len_from_disk(de->rec_len,
|
||||
inline_size);
|
||||
if (le32_to_cpu(de->inode)) {
|
||||
/* We might block in the next section
|
||||
* if the data destination is
|
||||
@ -1415,9 +1556,8 @@ int ext4_read_inline_dir(struct file *filp,
|
||||
stored++;
|
||||
}
|
||||
filp->f_pos += ext4_rec_len_from_disk(de->rec_len,
|
||||
inline_size);
|
||||
extra_size);
|
||||
}
|
||||
offset = 0;
|
||||
}
|
||||
out:
|
||||
kfree(dir_buf);
|
||||
|
580
fs/ext4/inode.c
580
fs/ext4/inode.c
@ -55,21 +55,21 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
|
||||
__u16 csum_hi = 0;
|
||||
__u32 csum;
|
||||
|
||||
csum_lo = raw->i_checksum_lo;
|
||||
csum_lo = le16_to_cpu(raw->i_checksum_lo);
|
||||
raw->i_checksum_lo = 0;
|
||||
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
|
||||
EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
|
||||
csum_hi = raw->i_checksum_hi;
|
||||
csum_hi = le16_to_cpu(raw->i_checksum_hi);
|
||||
raw->i_checksum_hi = 0;
|
||||
}
|
||||
|
||||
csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
|
||||
EXT4_INODE_SIZE(inode->i_sb));
|
||||
|
||||
raw->i_checksum_lo = csum_lo;
|
||||
raw->i_checksum_lo = cpu_to_le16(csum_lo);
|
||||
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
|
||||
EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
|
||||
raw->i_checksum_hi = csum_hi;
|
||||
raw->i_checksum_hi = cpu_to_le16(csum_hi);
|
||||
|
||||
return csum;
|
||||
}
|
||||
@ -210,8 +210,7 @@ void ext4_evict_inode(struct inode *inode)
|
||||
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
|
||||
|
||||
jbd2_log_start_commit(journal, commit_tid);
|
||||
jbd2_log_wait_commit(journal, commit_tid);
|
||||
jbd2_complete_transaction(journal, commit_tid);
|
||||
filemap_write_and_wait(&inode->i_data);
|
||||
}
|
||||
truncate_inode_pages(&inode->i_data, 0);
|
||||
@ -1081,20 +1080,42 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
|
||||
/* For write_end() in data=journal mode */
|
||||
static int write_end_fn(handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
int ret;
|
||||
if (!buffer_mapped(bh) || buffer_freed(bh))
|
||||
return 0;
|
||||
set_buffer_uptodate(bh);
|
||||
return ext4_handle_dirty_metadata(handle, NULL, bh);
|
||||
ret = ext4_handle_dirty_metadata(handle, NULL, bh);
|
||||
clear_buffer_meta(bh);
|
||||
clear_buffer_prio(bh);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext4_generic_write_end(struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
/*
|
||||
* We need to pick up the new inode size which generic_commit_write gave us
|
||||
* `file' can be NULL - eg, when called from page_symlink().
|
||||
*
|
||||
* ext4 never places buffers on inode->i_mapping->private_list. metadata
|
||||
* buffers are managed internally.
|
||||
*/
|
||||
static int ext4_write_end(struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
{
|
||||
int i_size_changed = 0;
|
||||
struct inode *inode = mapping->host;
|
||||
handle_t *handle = ext4_journal_current_handle();
|
||||
struct inode *inode = mapping->host;
|
||||
int ret = 0, ret2;
|
||||
int i_size_changed = 0;
|
||||
|
||||
trace_ext4_write_end(inode, pos, len, copied);
|
||||
if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) {
|
||||
ret = ext4_jbd2_file_inode(handle, inode);
|
||||
if (ret) {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
goto errout;
|
||||
}
|
||||
}
|
||||
|
||||
if (ext4_has_inline_data(inode))
|
||||
copied = ext4_write_inline_data_end(inode, pos, len,
|
||||
@ -1105,7 +1126,7 @@ static int ext4_generic_write_end(struct file *file,
|
||||
|
||||
/*
|
||||
* No need to use i_size_read() here, the i_size
|
||||
* cannot change under us because we hold i_mutex.
|
||||
* cannot change under us because we hole i_mutex.
|
||||
*
|
||||
* But it's important to update i_size while still holding page lock:
|
||||
* page writeout could otherwise come in and zero beyond i_size.
|
||||
@ -1115,10 +1136,10 @@ static int ext4_generic_write_end(struct file *file,
|
||||
i_size_changed = 1;
|
||||
}
|
||||
|
||||
if (pos + copied > EXT4_I(inode)->i_disksize) {
|
||||
if (pos + copied > EXT4_I(inode)->i_disksize) {
|
||||
/* We need to mark inode dirty even if
|
||||
* new_i_size is less that inode->i_size
|
||||
* bu greater than i_disksize.(hint delalloc)
|
||||
* but greater than i_disksize. (hint delalloc)
|
||||
*/
|
||||
ext4_update_i_disksize(inode, (pos + copied));
|
||||
i_size_changed = 1;
|
||||
@ -1135,87 +1156,15 @@ static int ext4_generic_write_end(struct file *file,
|
||||
if (i_size_changed)
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
|
||||
return copied;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to pick up the new inode size which generic_commit_write gave us
|
||||
* `file' can be NULL - eg, when called from page_symlink().
|
||||
*
|
||||
* ext4 never places buffers on inode->i_mapping->private_list. metadata
|
||||
* buffers are managed internally.
|
||||
*/
|
||||
static int ext4_ordered_write_end(struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
{
|
||||
handle_t *handle = ext4_journal_current_handle();
|
||||
struct inode *inode = mapping->host;
|
||||
int ret = 0, ret2;
|
||||
|
||||
trace_ext4_ordered_write_end(inode, pos, len, copied);
|
||||
ret = ext4_jbd2_file_inode(handle, inode);
|
||||
|
||||
if (ret == 0) {
|
||||
ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
|
||||
page, fsdata);
|
||||
copied = ret2;
|
||||
if (pos + len > inode->i_size && ext4_can_truncate(inode))
|
||||
/* if we have allocated more blocks and copied
|
||||
* less. We will have blocks allocated outside
|
||||
* inode->i_size. So truncate them
|
||||
*/
|
||||
ext4_orphan_add(handle, inode);
|
||||
if (ret2 < 0)
|
||||
ret = ret2;
|
||||
} else {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
}
|
||||
|
||||
ret2 = ext4_journal_stop(handle);
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
|
||||
if (pos + len > inode->i_size) {
|
||||
ext4_truncate_failed_write(inode);
|
||||
/*
|
||||
* If truncate failed early the inode might still be
|
||||
* on the orphan list; we need to make sure the inode
|
||||
* is removed from the orphan list in that case.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(NULL, inode);
|
||||
}
|
||||
|
||||
|
||||
return ret ? ret : copied;
|
||||
}
|
||||
|
||||
static int ext4_writeback_write_end(struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
{
|
||||
handle_t *handle = ext4_journal_current_handle();
|
||||
struct inode *inode = mapping->host;
|
||||
int ret = 0, ret2;
|
||||
|
||||
trace_ext4_writeback_write_end(inode, pos, len, copied);
|
||||
ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
|
||||
page, fsdata);
|
||||
copied = ret2;
|
||||
if (copied < 0)
|
||||
ret = copied;
|
||||
if (pos + len > inode->i_size && ext4_can_truncate(inode))
|
||||
/* if we have allocated more blocks and copied
|
||||
* less. We will have blocks allocated outside
|
||||
* inode->i_size. So truncate them
|
||||
*/
|
||||
ext4_orphan_add(handle, inode);
|
||||
|
||||
if (ret2 < 0)
|
||||
ret = ret2;
|
||||
|
||||
errout:
|
||||
ret2 = ext4_journal_stop(handle);
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
@ -1538,7 +1487,10 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
|
||||
struct ext4_io_submit io_submit;
|
||||
|
||||
BUG_ON(mpd->next_page <= mpd->first_page);
|
||||
memset(&io_submit, 0, sizeof(io_submit));
|
||||
ext4_io_submit_init(&io_submit, mpd->wbc);
|
||||
io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
|
||||
if (!io_submit.io_end)
|
||||
return -ENOMEM;
|
||||
/*
|
||||
* We need to start from the first_page to the next_page - 1
|
||||
* to make sure we also write the mapped dirty buffer_heads.
|
||||
@ -1626,6 +1578,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
|
||||
pagevec_release(&pvec);
|
||||
}
|
||||
ext4_io_submit(&io_submit);
|
||||
/* Drop io_end reference we got from init */
|
||||
ext4_put_io_end_defer(io_submit.io_end);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1670,22 +1624,25 @@ static void ext4_print_free_blocks(struct inode *inode)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
|
||||
ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
|
||||
EXT4_C2B(EXT4_SB(inode->i_sb),
|
||||
ext4_count_free_clusters(inode->i_sb)));
|
||||
ext4_count_free_clusters(sb)));
|
||||
ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
|
||||
ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
|
||||
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
|
||||
(long long) EXT4_C2B(EXT4_SB(sb),
|
||||
percpu_counter_sum(&sbi->s_freeclusters_counter)));
|
||||
ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
|
||||
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
|
||||
(long long) EXT4_C2B(EXT4_SB(sb),
|
||||
percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
|
||||
ext4_msg(sb, KERN_CRIT, "Block reservation details");
|
||||
ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
|
||||
EXT4_I(inode)->i_reserved_data_blocks);
|
||||
ei->i_reserved_data_blocks);
|
||||
ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
|
||||
EXT4_I(inode)->i_reserved_meta_blocks);
|
||||
ei->i_reserved_meta_blocks);
|
||||
ext4_msg(sb, KERN_CRIT, "i_allocated_meta_blocks=%u",
|
||||
ei->i_allocated_meta_blocks);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1740,12 +1697,21 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
|
||||
*/
|
||||
map.m_lblk = next;
|
||||
map.m_len = max_blocks;
|
||||
get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
|
||||
/*
|
||||
* We're in delalloc path and it is possible that we're going to
|
||||
* need more metadata blocks than previously reserved. However
|
||||
* we must not fail because we're in writeback and there is
|
||||
* nothing we can do about it so it might result in data loss.
|
||||
* So use reserved blocks to allocate metadata if possible.
|
||||
*/
|
||||
get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
|
||||
EXT4_GET_BLOCKS_METADATA_NOFAIL;
|
||||
if (ext4_should_dioread_nolock(mpd->inode))
|
||||
get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
|
||||
if (mpd->b_state & (1 << BH_Delay))
|
||||
get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
|
||||
|
||||
|
||||
blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
|
||||
if (blks < 0) {
|
||||
struct super_block *sb = mpd->inode->i_sb;
|
||||
@ -2272,9 +2238,16 @@ static int ext4_writepage(struct page *page,
|
||||
*/
|
||||
return __ext4_journalled_writepage(page, len);
|
||||
|
||||
memset(&io_submit, 0, sizeof(io_submit));
|
||||
ext4_io_submit_init(&io_submit, wbc);
|
||||
io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
|
||||
if (!io_submit.io_end) {
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
return -ENOMEM;
|
||||
}
|
||||
ret = ext4_bio_write_page(&io_submit, page, len, wbc);
|
||||
ext4_io_submit(&io_submit);
|
||||
/* Drop io_end reference we got from init */
|
||||
ext4_put_io_end_defer(io_submit.io_end);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2661,7 +2634,7 @@ static int ext4_da_writepages(struct address_space *mapping,
|
||||
|
||||
static int ext4_nonda_switch(struct super_block *sb)
|
||||
{
|
||||
s64 free_blocks, dirty_blocks;
|
||||
s64 free_clusters, dirty_clusters;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
/*
|
||||
@ -2672,17 +2645,18 @@ static int ext4_nonda_switch(struct super_block *sb)
|
||||
* Delalloc need an accurate free block accounting. So switch
|
||||
* to non delalloc when we are near to error range.
|
||||
*/
|
||||
free_blocks = EXT4_C2B(sbi,
|
||||
percpu_counter_read_positive(&sbi->s_freeclusters_counter));
|
||||
dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
|
||||
free_clusters =
|
||||
percpu_counter_read_positive(&sbi->s_freeclusters_counter);
|
||||
dirty_clusters =
|
||||
percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
|
||||
/*
|
||||
* Start pushing delalloc when 1/2 of free blocks are dirty.
|
||||
*/
|
||||
if (dirty_blocks && (free_blocks < 2 * dirty_blocks))
|
||||
if (dirty_clusters && (free_clusters < 2 * dirty_clusters))
|
||||
try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
|
||||
|
||||
if (2 * free_blocks < 3 * dirty_blocks ||
|
||||
free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
|
||||
if (2 * free_clusters < 3 * dirty_clusters ||
|
||||
free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) {
|
||||
/*
|
||||
* free block count is less than 150% of dirty blocks
|
||||
* or free blocks is less than watermark
|
||||
@ -2818,18 +2792,9 @@ static int ext4_da_write_end(struct file *file,
|
||||
unsigned long start, end;
|
||||
int write_mode = (int)(unsigned long)fsdata;
|
||||
|
||||
if (write_mode == FALL_BACK_TO_NONDELALLOC) {
|
||||
switch (ext4_inode_journal_mode(inode)) {
|
||||
case EXT4_INODE_ORDERED_DATA_MODE:
|
||||
return ext4_ordered_write_end(file, mapping, pos,
|
||||
len, copied, page, fsdata);
|
||||
case EXT4_INODE_WRITEBACK_DATA_MODE:
|
||||
return ext4_writeback_write_end(file, mapping, pos,
|
||||
len, copied, page, fsdata);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
if (write_mode == FALL_BACK_TO_NONDELALLOC)
|
||||
return ext4_write_end(file, mapping, pos,
|
||||
len, copied, page, fsdata);
|
||||
|
||||
trace_ext4_da_write_end(inode, pos, len, copied);
|
||||
start = pos & (PAGE_CACHE_SIZE - 1);
|
||||
@ -3113,9 +3078,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
ext4_io_end_t *io_end = iocb->private;
|
||||
|
||||
/* if not async direct IO or dio with 0 bytes write, just return */
|
||||
if (!io_end || !size)
|
||||
goto out;
|
||||
/* if not async direct IO just return */
|
||||
if (!io_end) {
|
||||
inode_dio_done(inode);
|
||||
if (is_async)
|
||||
aio_complete(iocb, ret, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
ext_debug("ext4_end_io_dio(): io_end 0x%p "
|
||||
"for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
|
||||
@ -3123,25 +3092,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
||||
size);
|
||||
|
||||
iocb->private = NULL;
|
||||
|
||||
/* if not aio dio with unwritten extents, just free io and return */
|
||||
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
|
||||
ext4_free_io_end(io_end);
|
||||
out:
|
||||
inode_dio_done(inode);
|
||||
if (is_async)
|
||||
aio_complete(iocb, ret, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
io_end->offset = offset;
|
||||
io_end->size = size;
|
||||
if (is_async) {
|
||||
io_end->iocb = iocb;
|
||||
io_end->result = ret;
|
||||
}
|
||||
|
||||
ext4_add_complete_io(io_end);
|
||||
ext4_put_io_end_defer(io_end);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3175,6 +3132,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||
get_block_t *get_block_func = NULL;
|
||||
int dio_flags = 0;
|
||||
loff_t final_size = offset + count;
|
||||
ext4_io_end_t *io_end = NULL;
|
||||
|
||||
/* Use the old path for reads and writes beyond i_size. */
|
||||
if (rw != WRITE || final_size > inode->i_size)
|
||||
@ -3213,13 +3171,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||
iocb->private = NULL;
|
||||
ext4_inode_aio_set(inode, NULL);
|
||||
if (!is_sync_kiocb(iocb)) {
|
||||
ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
|
||||
io_end = ext4_init_io_end(inode, GFP_NOFS);
|
||||
if (!io_end) {
|
||||
ret = -ENOMEM;
|
||||
goto retake_lock;
|
||||
}
|
||||
io_end->flag |= EXT4_IO_END_DIRECT;
|
||||
iocb->private = io_end;
|
||||
/*
|
||||
* Grab reference for DIO. Will be dropped in ext4_end_io_dio()
|
||||
*/
|
||||
iocb->private = ext4_get_io_end(io_end);
|
||||
/*
|
||||
* we save the io structure for current async direct
|
||||
* IO, so that later ext4_map_blocks() could flag the
|
||||
@ -3243,26 +3204,27 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||
NULL,
|
||||
dio_flags);
|
||||
|
||||
if (iocb->private)
|
||||
ext4_inode_aio_set(inode, NULL);
|
||||
/*
|
||||
* The io_end structure takes a reference to the inode, that
|
||||
* structure needs to be destroyed and the reference to the
|
||||
* inode need to be dropped, when IO is complete, even with 0
|
||||
* byte write, or failed.
|
||||
*
|
||||
* In the successful AIO DIO case, the io_end structure will
|
||||
* be destroyed and the reference to the inode will be dropped
|
||||
* after the end_io call back function is called.
|
||||
*
|
||||
* In the case there is 0 byte write, or error case, since VFS
|
||||
* direct IO won't invoke the end_io call back function, we
|
||||
* need to free the end_io structure here.
|
||||
* Put our reference to io_end. This can free the io_end structure e.g.
|
||||
* in sync IO case or in case of error. It can even perform extent
|
||||
* conversion if all bios we submitted finished before we got here.
|
||||
* Note that in that case iocb->private can be already set to NULL
|
||||
* here.
|
||||
*/
|
||||
if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
|
||||
ext4_free_io_end(iocb->private);
|
||||
iocb->private = NULL;
|
||||
} else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
|
||||
if (io_end) {
|
||||
ext4_inode_aio_set(inode, NULL);
|
||||
ext4_put_io_end(io_end);
|
||||
/*
|
||||
* In case of error or no write ext4_end_io_dio() was not
|
||||
* called so we have to put iocb's reference.
|
||||
*/
|
||||
if (ret <= 0 && ret != -EIOCBQUEUED) {
|
||||
WARN_ON(iocb->private != io_end);
|
||||
ext4_put_io_end(io_end);
|
||||
iocb->private = NULL;
|
||||
}
|
||||
}
|
||||
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
|
||||
EXT4_STATE_DIO_UNWRITTEN)) {
|
||||
int err;
|
||||
/*
|
||||
@ -3334,27 +3296,12 @@ static int ext4_journalled_set_page_dirty(struct page *page)
|
||||
return __set_page_dirty_nobuffers(page);
|
||||
}
|
||||
|
||||
static const struct address_space_operations ext4_ordered_aops = {
|
||||
static const struct address_space_operations ext4_aops = {
|
||||
.readpage = ext4_readpage,
|
||||
.readpages = ext4_readpages,
|
||||
.writepage = ext4_writepage,
|
||||
.write_begin = ext4_write_begin,
|
||||
.write_end = ext4_ordered_write_end,
|
||||
.bmap = ext4_bmap,
|
||||
.invalidatepage = ext4_invalidatepage,
|
||||
.releasepage = ext4_releasepage,
|
||||
.direct_IO = ext4_direct_IO,
|
||||
.migratepage = buffer_migrate_page,
|
||||
.is_partially_uptodate = block_is_partially_uptodate,
|
||||
.error_remove_page = generic_error_remove_page,
|
||||
};
|
||||
|
||||
static const struct address_space_operations ext4_writeback_aops = {
|
||||
.readpage = ext4_readpage,
|
||||
.readpages = ext4_readpages,
|
||||
.writepage = ext4_writepage,
|
||||
.write_begin = ext4_write_begin,
|
||||
.write_end = ext4_writeback_write_end,
|
||||
.write_end = ext4_write_end,
|
||||
.bmap = ext4_bmap,
|
||||
.invalidatepage = ext4_invalidatepage,
|
||||
.releasepage = ext4_releasepage,
|
||||
@ -3399,23 +3346,21 @@ void ext4_set_aops(struct inode *inode)
|
||||
{
|
||||
switch (ext4_inode_journal_mode(inode)) {
|
||||
case EXT4_INODE_ORDERED_DATA_MODE:
|
||||
if (test_opt(inode->i_sb, DELALLOC))
|
||||
inode->i_mapping->a_ops = &ext4_da_aops;
|
||||
else
|
||||
inode->i_mapping->a_ops = &ext4_ordered_aops;
|
||||
ext4_set_inode_state(inode, EXT4_STATE_ORDERED_MODE);
|
||||
break;
|
||||
case EXT4_INODE_WRITEBACK_DATA_MODE:
|
||||
if (test_opt(inode->i_sb, DELALLOC))
|
||||
inode->i_mapping->a_ops = &ext4_da_aops;
|
||||
else
|
||||
inode->i_mapping->a_ops = &ext4_writeback_aops;
|
||||
ext4_clear_inode_state(inode, EXT4_STATE_ORDERED_MODE);
|
||||
break;
|
||||
case EXT4_INODE_JOURNAL_DATA_MODE:
|
||||
inode->i_mapping->a_ops = &ext4_journalled_aops;
|
||||
break;
|
||||
return;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
if (test_opt(inode->i_sb, DELALLOC))
|
||||
inode->i_mapping->a_ops = &ext4_da_aops;
|
||||
else
|
||||
inode->i_mapping->a_ops = &ext4_aops;
|
||||
}
|
||||
|
||||
|
||||
@ -3646,20 +3591,190 @@ int ext4_can_truncate(struct inode *inode)
|
||||
int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
ext4_lblk_t first_block, stop_block;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
loff_t first_page, last_page, page_len;
|
||||
loff_t first_page_offset, last_page_offset;
|
||||
handle_t *handle;
|
||||
unsigned int credits;
|
||||
int ret = 0;
|
||||
|
||||
if (!S_ISREG(inode->i_mode))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
return ext4_ind_punch_hole(file, offset, length);
|
||||
|
||||
if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
|
||||
if (EXT4_SB(sb)->s_cluster_ratio > 1) {
|
||||
/* TODO: Add support for bigalloc file systems */
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
trace_ext4_punch_hole(inode, offset, length);
|
||||
|
||||
return ext4_ext_punch_hole(file, offset, length);
|
||||
/*
|
||||
* Write out all dirty pages to avoid race conditions
|
||||
* Then release them.
|
||||
*/
|
||||
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
||||
ret = filemap_write_and_wait_range(mapping, offset,
|
||||
offset + length - 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
/* It's not possible punch hole on append only file */
|
||||
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
|
||||
ret = -EPERM;
|
||||
goto out_mutex;
|
||||
}
|
||||
if (IS_SWAPFILE(inode)) {
|
||||
ret = -ETXTBSY;
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
/* No need to punch hole beyond i_size */
|
||||
if (offset >= inode->i_size)
|
||||
goto out_mutex;
|
||||
|
||||
/*
|
||||
* If the hole extends beyond i_size, set the hole
|
||||
* to end after the page that contains i_size
|
||||
*/
|
||||
if (offset + length > inode->i_size) {
|
||||
length = inode->i_size +
|
||||
PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
|
||||
offset;
|
||||
}
|
||||
|
||||
first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
|
||||
last_page = (offset + length) >> PAGE_CACHE_SHIFT;
|
||||
|
||||
first_page_offset = first_page << PAGE_CACHE_SHIFT;
|
||||
last_page_offset = last_page << PAGE_CACHE_SHIFT;
|
||||
|
||||
/* Now release the pages */
|
||||
if (last_page_offset > first_page_offset) {
|
||||
truncate_pagecache_range(inode, first_page_offset,
|
||||
last_page_offset - 1);
|
||||
}
|
||||
|
||||
/* Wait all existing dio workers, newcomers will block on i_mutex */
|
||||
ext4_inode_block_unlocked_dio(inode);
|
||||
ret = ext4_flush_unwritten_io(inode);
|
||||
if (ret)
|
||||
goto out_dio;
|
||||
inode_dio_wait(inode);
|
||||
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
credits = ext4_writepage_trans_blocks(inode);
|
||||
else
|
||||
credits = ext4_blocks_for_truncate(inode);
|
||||
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
ext4_std_error(sb, ret);
|
||||
goto out_dio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now we need to zero out the non-page-aligned data in the
|
||||
* pages at the start and tail of the hole, and unmap the
|
||||
* buffer heads for the block aligned regions of the page that
|
||||
* were completely zeroed.
|
||||
*/
|
||||
if (first_page > last_page) {
|
||||
/*
|
||||
* If the file space being truncated is contained
|
||||
* within a page just zero out and unmap the middle of
|
||||
* that page
|
||||
*/
|
||||
ret = ext4_discard_partial_page_buffers(handle,
|
||||
mapping, offset, length, 0);
|
||||
|
||||
if (ret)
|
||||
goto out_stop;
|
||||
} else {
|
||||
/*
|
||||
* zero out and unmap the partial page that contains
|
||||
* the start of the hole
|
||||
*/
|
||||
page_len = first_page_offset - offset;
|
||||
if (page_len > 0) {
|
||||
ret = ext4_discard_partial_page_buffers(handle, mapping,
|
||||
offset, page_len, 0);
|
||||
if (ret)
|
||||
goto out_stop;
|
||||
}
|
||||
|
||||
/*
|
||||
* zero out and unmap the partial page that contains
|
||||
* the end of the hole
|
||||
*/
|
||||
page_len = offset + length - last_page_offset;
|
||||
if (page_len > 0) {
|
||||
ret = ext4_discard_partial_page_buffers(handle, mapping,
|
||||
last_page_offset, page_len, 0);
|
||||
if (ret)
|
||||
goto out_stop;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If i_size is contained in the last page, we need to
|
||||
* unmap and zero the partial page after i_size
|
||||
*/
|
||||
if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
|
||||
inode->i_size % PAGE_CACHE_SIZE != 0) {
|
||||
page_len = PAGE_CACHE_SIZE -
|
||||
(inode->i_size & (PAGE_CACHE_SIZE - 1));
|
||||
|
||||
if (page_len > 0) {
|
||||
ret = ext4_discard_partial_page_buffers(handle,
|
||||
mapping, inode->i_size, page_len, 0);
|
||||
|
||||
if (ret)
|
||||
goto out_stop;
|
||||
}
|
||||
}
|
||||
|
||||
first_block = (offset + sb->s_blocksize - 1) >>
|
||||
EXT4_BLOCK_SIZE_BITS(sb);
|
||||
stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
|
||||
|
||||
/* If there are no blocks to remove, return now */
|
||||
if (first_block >= stop_block)
|
||||
goto out_stop;
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
ret = ext4_es_remove_extent(inode, first_block,
|
||||
stop_block - first_block);
|
||||
if (ret) {
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
goto out_stop;
|
||||
}
|
||||
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
ret = ext4_ext_remove_space(inode, first_block,
|
||||
stop_block - 1);
|
||||
else
|
||||
ret = ext4_free_hole_blocks(handle, inode, first_block,
|
||||
stop_block);
|
||||
|
||||
ext4_discard_preallocations(inode);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
out_stop:
|
||||
ext4_journal_stop(handle);
|
||||
out_dio:
|
||||
ext4_inode_resume_unlocked_dio(inode);
|
||||
out_mutex:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3692,6 +3807,19 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
|
||||
*/
|
||||
void ext4_truncate(struct inode *inode)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
unsigned int credits;
|
||||
handle_t *handle;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
loff_t page_len;
|
||||
|
||||
/*
|
||||
* There is a possibility that we're either freeing the inode
|
||||
* or it completely new indode. In those cases we might not
|
||||
* have i_mutex locked because it's not necessary.
|
||||
*/
|
||||
if (!(inode->i_state & (I_NEW|I_FREEING)))
|
||||
WARN_ON(!mutex_is_locked(&inode->i_mutex));
|
||||
trace_ext4_truncate_enter(inode);
|
||||
|
||||
if (!ext4_can_truncate(inode))
|
||||
@ -3710,10 +3838,72 @@ void ext4_truncate(struct inode *inode)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* finish any pending end_io work so we won't run the risk of
|
||||
* converting any truncated blocks to initialized later
|
||||
*/
|
||||
ext4_flush_unwritten_io(inode);
|
||||
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
ext4_ext_truncate(inode);
|
||||
credits = ext4_writepage_trans_blocks(inode);
|
||||
else
|
||||
ext4_ind_truncate(inode);
|
||||
credits = ext4_blocks_for_truncate(inode);
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
|
||||
if (IS_ERR(handle)) {
|
||||
ext4_std_error(inode->i_sb, PTR_ERR(handle));
|
||||
return;
|
||||
}
|
||||
|
||||
if (inode->i_size % PAGE_CACHE_SIZE != 0) {
|
||||
page_len = PAGE_CACHE_SIZE -
|
||||
(inode->i_size & (PAGE_CACHE_SIZE - 1));
|
||||
|
||||
if (ext4_discard_partial_page_buffers(handle,
|
||||
mapping, inode->i_size, page_len, 0))
|
||||
goto out_stop;
|
||||
}
|
||||
|
||||
/*
|
||||
* We add the inode to the orphan list, so that if this
|
||||
* truncate spans multiple transactions, and we crash, we will
|
||||
* resume the truncate when the filesystem recovers. It also
|
||||
* marks the inode dirty, to catch the new size.
|
||||
*
|
||||
* Implication: the file must always be in a sane, consistent
|
||||
* truncatable state while each transaction commits.
|
||||
*/
|
||||
if (ext4_orphan_add(handle, inode))
|
||||
goto out_stop;
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
ext4_ext_truncate(handle, inode);
|
||||
else
|
||||
ext4_ind_truncate(handle, inode);
|
||||
|
||||
up_write(&ei->i_data_sem);
|
||||
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
|
||||
out_stop:
|
||||
/*
|
||||
* If this was a simple ftruncate() and the file will remain alive,
|
||||
* then we need to clear up the orphan record which we created above.
|
||||
* However, if this was a real unlink then we were called by
|
||||
* ext4_delete_inode(), and we allow that function to clean up the
|
||||
* orphan info for us.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(handle, inode);
|
||||
|
||||
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
trace_ext4_truncate_exit(inode);
|
||||
}
|
||||
@ -3821,13 +4011,14 @@ static int __ext4_get_inode_loc(struct inode *inode,
|
||||
if (EXT4_SB(sb)->s_inode_readahead_blks) {
|
||||
ext4_fsblk_t b, end, table;
|
||||
unsigned num;
|
||||
__u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks;
|
||||
|
||||
table = ext4_inode_table(sb, gdp);
|
||||
/* s_inode_readahead_blks is always a power of 2 */
|
||||
b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
|
||||
b = block & ~((ext4_fsblk_t) ra_blks - 1);
|
||||
if (table > b)
|
||||
b = table;
|
||||
end = b + EXT4_SB(sb)->s_inode_readahead_blks;
|
||||
end = b + ra_blks;
|
||||
num = EXT4_INODES_PER_GROUP(sb);
|
||||
if (ext4_has_group_desc_csum(sb))
|
||||
num -= ext4_itable_unused_count(sb, gdp);
|
||||
@ -4024,8 +4215,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
||||
* NeilBrown 1999oct15
|
||||
*/
|
||||
if (inode->i_nlink == 0) {
|
||||
if (inode->i_mode == 0 ||
|
||||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
|
||||
if ((inode->i_mode == 0 ||
|
||||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
|
||||
ino != EXT4_BOOT_LOADER_INO) {
|
||||
/* this inode is deleted */
|
||||
ret = -ESTALE;
|
||||
goto bad_inode;
|
||||
@ -4033,7 +4225,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
||||
/* The only unlinked inodes we let through here have
|
||||
* valid i_mode and are being read by the orphan
|
||||
* recovery code: that's fine, we're about to complete
|
||||
* the process of deleting those. */
|
||||
* the process of deleting those.
|
||||
* OR it is the EXT4_BOOT_LOADER_INO which is
|
||||
* not initialized on a new filesystem. */
|
||||
}
|
||||
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
|
||||
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
|
||||
@ -4153,6 +4347,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
||||
else
|
||||
init_special_inode(inode, inode->i_mode,
|
||||
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
|
||||
} else if (ino == EXT4_BOOT_LOADER_INO) {
|
||||
make_bad_inode(inode);
|
||||
} else {
|
||||
ret = -EIO;
|
||||
EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
|
||||
|
218
fs/ext4/ioctl.c
218
fs/ext4/ioctl.c
@ -17,9 +17,201 @@
|
||||
#include <asm/uaccess.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
#include "ext4_extents.h"
|
||||
|
||||
#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
|
||||
|
||||
/**
|
||||
* Swap memory between @a and @b for @len bytes.
|
||||
*
|
||||
* @a: pointer to first memory area
|
||||
* @b: pointer to second memory area
|
||||
* @len: number of bytes to swap
|
||||
*
|
||||
*/
|
||||
static void memswap(void *a, void *b, size_t len)
|
||||
{
|
||||
unsigned char *ap, *bp;
|
||||
unsigned char tmp;
|
||||
|
||||
ap = (unsigned char *)a;
|
||||
bp = (unsigned char *)b;
|
||||
while (len-- > 0) {
|
||||
tmp = *ap;
|
||||
*ap = *bp;
|
||||
*bp = tmp;
|
||||
ap++;
|
||||
bp++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap i_data and associated attributes between @inode1 and @inode2.
|
||||
* This function is used for the primary swap between inode1 and inode2
|
||||
* and also to revert this primary swap in case of errors.
|
||||
*
|
||||
* Therefore you have to make sure, that calling this method twice
|
||||
* will revert all changes.
|
||||
*
|
||||
* @inode1: pointer to first inode
|
||||
* @inode2: pointer to second inode
|
||||
*/
|
||||
static void swap_inode_data(struct inode *inode1, struct inode *inode2)
|
||||
{
|
||||
loff_t isize;
|
||||
struct ext4_inode_info *ei1;
|
||||
struct ext4_inode_info *ei2;
|
||||
|
||||
ei1 = EXT4_I(inode1);
|
||||
ei2 = EXT4_I(inode2);
|
||||
|
||||
memswap(&inode1->i_flags, &inode2->i_flags, sizeof(inode1->i_flags));
|
||||
memswap(&inode1->i_version, &inode2->i_version,
|
||||
sizeof(inode1->i_version));
|
||||
memswap(&inode1->i_blocks, &inode2->i_blocks,
|
||||
sizeof(inode1->i_blocks));
|
||||
memswap(&inode1->i_bytes, &inode2->i_bytes, sizeof(inode1->i_bytes));
|
||||
memswap(&inode1->i_atime, &inode2->i_atime, sizeof(inode1->i_atime));
|
||||
memswap(&inode1->i_mtime, &inode2->i_mtime, sizeof(inode1->i_mtime));
|
||||
|
||||
memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
|
||||
memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags));
|
||||
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
|
||||
memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree));
|
||||
memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr));
|
||||
|
||||
isize = i_size_read(inode1);
|
||||
i_size_write(inode1, i_size_read(inode2));
|
||||
i_size_write(inode2, isize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap the information from the given @inode and the inode
|
||||
* EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other
|
||||
* important fields of the inodes.
|
||||
*
|
||||
* @sb: the super block of the filesystem
|
||||
* @inode: the inode to swap with EXT4_BOOT_LOADER_INO
|
||||
*
|
||||
*/
|
||||
static long swap_inode_boot_loader(struct super_block *sb,
|
||||
struct inode *inode)
|
||||
{
|
||||
handle_t *handle;
|
||||
int err;
|
||||
struct inode *inode_bl;
|
||||
struct ext4_inode_info *ei;
|
||||
struct ext4_inode_info *ei_bl;
|
||||
struct ext4_sb_info *sbi;
|
||||
|
||||
if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) {
|
||||
err = -EINVAL;
|
||||
goto swap_boot_out;
|
||||
}
|
||||
|
||||
if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) {
|
||||
err = -EPERM;
|
||||
goto swap_boot_out;
|
||||
}
|
||||
|
||||
sbi = EXT4_SB(sb);
|
||||
ei = EXT4_I(inode);
|
||||
|
||||
inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
|
||||
if (IS_ERR(inode_bl)) {
|
||||
err = PTR_ERR(inode_bl);
|
||||
goto swap_boot_out;
|
||||
}
|
||||
ei_bl = EXT4_I(inode_bl);
|
||||
|
||||
filemap_flush(inode->i_mapping);
|
||||
filemap_flush(inode_bl->i_mapping);
|
||||
|
||||
/* Protect orig inodes against a truncate and make sure,
|
||||
* that only 1 swap_inode_boot_loader is running. */
|
||||
ext4_inode_double_lock(inode, inode_bl);
|
||||
|
||||
truncate_inode_pages(&inode->i_data, 0);
|
||||
truncate_inode_pages(&inode_bl->i_data, 0);
|
||||
|
||||
/* Wait for all existing dio workers */
|
||||
ext4_inode_block_unlocked_dio(inode);
|
||||
ext4_inode_block_unlocked_dio(inode_bl);
|
||||
inode_dio_wait(inode);
|
||||
inode_dio_wait(inode_bl);
|
||||
|
||||
handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
|
||||
if (IS_ERR(handle)) {
|
||||
err = -EINVAL;
|
||||
goto swap_boot_out;
|
||||
}
|
||||
|
||||
/* Protect extent tree against block allocations via delalloc */
|
||||
ext4_double_down_write_data_sem(inode, inode_bl);
|
||||
|
||||
if (inode_bl->i_nlink == 0) {
|
||||
/* this inode has never been used as a BOOT_LOADER */
|
||||
set_nlink(inode_bl, 1);
|
||||
i_uid_write(inode_bl, 0);
|
||||
i_gid_write(inode_bl, 0);
|
||||
inode_bl->i_flags = 0;
|
||||
ei_bl->i_flags = 0;
|
||||
inode_bl->i_version = 1;
|
||||
i_size_write(inode_bl, 0);
|
||||
inode_bl->i_mode = S_IFREG;
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_INCOMPAT_EXTENTS)) {
|
||||
ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
|
||||
ext4_ext_tree_init(handle, inode_bl);
|
||||
} else
|
||||
memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data));
|
||||
}
|
||||
|
||||
swap_inode_data(inode, inode_bl);
|
||||
|
||||
inode->i_ctime = inode_bl->i_ctime = ext4_current_time(inode);
|
||||
|
||||
spin_lock(&sbi->s_next_gen_lock);
|
||||
inode->i_generation = sbi->s_next_generation++;
|
||||
inode_bl->i_generation = sbi->s_next_generation++;
|
||||
spin_unlock(&sbi->s_next_gen_lock);
|
||||
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
err = ext4_mark_inode_dirty(handle, inode);
|
||||
if (err < 0) {
|
||||
ext4_warning(inode->i_sb,
|
||||
"couldn't mark inode #%lu dirty (err %d)",
|
||||
inode->i_ino, err);
|
||||
/* Revert all changes: */
|
||||
swap_inode_data(inode, inode_bl);
|
||||
} else {
|
||||
err = ext4_mark_inode_dirty(handle, inode_bl);
|
||||
if (err < 0) {
|
||||
ext4_warning(inode_bl->i_sb,
|
||||
"couldn't mark inode #%lu dirty (err %d)",
|
||||
inode_bl->i_ino, err);
|
||||
/* Revert all changes: */
|
||||
swap_inode_data(inode, inode_bl);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
}
|
||||
}
|
||||
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
ext4_double_up_write_data_sem(inode, inode_bl);
|
||||
|
||||
ext4_inode_resume_unlocked_dio(inode);
|
||||
ext4_inode_resume_unlocked_dio(inode_bl);
|
||||
|
||||
ext4_inode_double_unlock(inode, inode_bl);
|
||||
|
||||
iput(inode_bl);
|
||||
|
||||
swap_boot_out:
|
||||
return err;
|
||||
}
|
||||
|
||||
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(filp);
|
||||
@ -83,17 +275,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
if (!capable(CAP_SYS_RESOURCE))
|
||||
goto flags_out;
|
||||
}
|
||||
if (oldflags & EXT4_EXTENTS_FL) {
|
||||
/* We don't support clearning extent flags */
|
||||
if (!(flags & EXT4_EXTENTS_FL)) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto flags_out;
|
||||
}
|
||||
} else if (flags & EXT4_EXTENTS_FL) {
|
||||
/* migrate the file */
|
||||
if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
|
||||
migrate = 1;
|
||||
flags &= ~EXT4_EXTENTS_FL;
|
||||
}
|
||||
|
||||
if (flags & EXT4_EOFBLOCKS_FL) {
|
||||
/* we don't support adding EOFBLOCKS flag */
|
||||
@ -137,8 +320,13 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
err = ext4_change_inode_journal_flag(inode, jflag);
|
||||
if (err)
|
||||
goto flags_out;
|
||||
if (migrate)
|
||||
err = ext4_ext_migrate(inode);
|
||||
if (migrate) {
|
||||
if (flags & EXT4_EXTENTS_FL)
|
||||
err = ext4_ext_migrate(inode);
|
||||
else
|
||||
err = ext4_ind_migrate(inode);
|
||||
}
|
||||
|
||||
flags_out:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
mnt_drop_write_file(filp);
|
||||
@ -357,9 +545,13 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
return err;
|
||||
}
|
||||
|
||||
case EXT4_IOC_SWAP_BOOT:
|
||||
if (!(filp->f_mode & FMODE_WRITE))
|
||||
return -EBADF;
|
||||
return swap_inode_boot_loader(sb, inode);
|
||||
|
||||
case EXT4_IOC_RESIZE_FS: {
|
||||
ext4_fsblk_t n_blocks_count;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
int err = 0, err2 = 0;
|
||||
ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
|
||||
|
||||
|
@ -405,6 +405,12 @@ static inline void mb_clear_bit(int bit, void *addr)
|
||||
ext4_clear_bit(bit, addr);
|
||||
}
|
||||
|
||||
static inline int mb_test_and_clear_bit(int bit, void *addr)
|
||||
{
|
||||
addr = mb_correct_addr_and_bit(&bit, addr);
|
||||
return ext4_test_and_clear_bit(bit, addr);
|
||||
}
|
||||
|
||||
static inline int mb_find_next_zero_bit(void *addr, int max, int start)
|
||||
{
|
||||
int fix = 0, ret, tmpmax;
|
||||
@ -764,6 +770,24 @@ void ext4_mb_generate_buddy(struct super_block *sb,
|
||||
spin_unlock(&EXT4_SB(sb)->s_bal_lock);
|
||||
}
|
||||
|
||||
static void mb_regenerate_buddy(struct ext4_buddy *e4b)
|
||||
{
|
||||
int count;
|
||||
int order = 1;
|
||||
void *buddy;
|
||||
|
||||
while ((buddy = mb_find_buddy(e4b, order++, &count))) {
|
||||
ext4_set_bits(buddy, 0, count);
|
||||
}
|
||||
e4b->bd_info->bb_fragments = 0;
|
||||
memset(e4b->bd_info->bb_counters, 0,
|
||||
sizeof(*e4b->bd_info->bb_counters) *
|
||||
(e4b->bd_sb->s_blocksize_bits + 2));
|
||||
|
||||
ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
|
||||
e4b->bd_bitmap, e4b->bd_group);
|
||||
}
|
||||
|
||||
/* The buddy information is attached the buddy cache inode
|
||||
* for convenience. The information regarding each group
|
||||
* is loaded via ext4_mb_load_buddy. The information involve
|
||||
@ -860,8 +884,6 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
||||
|
||||
first_block = page->index * blocks_per_page;
|
||||
for (i = 0; i < blocks_per_page; i++) {
|
||||
int group;
|
||||
|
||||
group = (first_block + i) >> 1;
|
||||
if (group >= ngroups)
|
||||
break;
|
||||
@ -1011,6 +1033,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
|
||||
struct page *page;
|
||||
int ret = 0;
|
||||
|
||||
might_sleep();
|
||||
mb_debug(1, "init group %u\n", group);
|
||||
this_grp = ext4_get_group_info(sb, group);
|
||||
/*
|
||||
@ -1082,6 +1105,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct inode *inode = sbi->s_buddy_cache;
|
||||
|
||||
might_sleep();
|
||||
mb_debug(1, "load group %u\n", group);
|
||||
|
||||
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
||||
@ -1244,6 +1268,33 @@ static void mb_clear_bits(void *bm, int cur, int len)
|
||||
}
|
||||
}
|
||||
|
||||
/* clear bits in given range
|
||||
* will return first found zero bit if any, -1 otherwise
|
||||
*/
|
||||
static int mb_test_and_clear_bits(void *bm, int cur, int len)
|
||||
{
|
||||
__u32 *addr;
|
||||
int zero_bit = -1;
|
||||
|
||||
len = cur + len;
|
||||
while (cur < len) {
|
||||
if ((cur & 31) == 0 && (len - cur) >= 32) {
|
||||
/* fast path: clear whole word at once */
|
||||
addr = bm + (cur >> 3);
|
||||
if (*addr != (__u32)(-1) && zero_bit == -1)
|
||||
zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
|
||||
*addr = 0;
|
||||
cur += 32;
|
||||
continue;
|
||||
}
|
||||
if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
|
||||
zero_bit = cur;
|
||||
cur++;
|
||||
}
|
||||
|
||||
return zero_bit;
|
||||
}
|
||||
|
||||
void ext4_set_bits(void *bm, int cur, int len)
|
||||
{
|
||||
__u32 *addr;
|
||||
@ -1262,17 +1313,90 @@ void ext4_set_bits(void *bm, int cur, int len)
|
||||
}
|
||||
}
|
||||
|
||||
static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
|
||||
int first, int count)
|
||||
/*
|
||||
* _________________________________________________________________ */
|
||||
|
||||
static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
|
||||
{
|
||||
int block = 0;
|
||||
int max = 0;
|
||||
int order;
|
||||
void *buddy;
|
||||
void *buddy2;
|
||||
if (mb_test_bit(*bit + side, bitmap)) {
|
||||
mb_clear_bit(*bit, bitmap);
|
||||
(*bit) -= side;
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
(*bit) += side;
|
||||
mb_set_bit(*bit, bitmap);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
|
||||
{
|
||||
int max;
|
||||
int order = 1;
|
||||
void *buddy = mb_find_buddy(e4b, order, &max);
|
||||
|
||||
while (buddy) {
|
||||
void *buddy2;
|
||||
|
||||
/* Bits in range [first; last] are known to be set since
|
||||
* corresponding blocks were allocated. Bits in range
|
||||
* (first; last) will stay set because they form buddies on
|
||||
* upper layer. We just deal with borders if they don't
|
||||
* align with upper layer and then go up.
|
||||
* Releasing entire group is all about clearing
|
||||
* single bit of highest order buddy.
|
||||
*/
|
||||
|
||||
/* Example:
|
||||
* ---------------------------------
|
||||
* | 1 | 1 | 1 | 1 |
|
||||
* ---------------------------------
|
||||
* | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
|
||||
* ---------------------------------
|
||||
* 0 1 2 3 4 5 6 7
|
||||
* \_____________________/
|
||||
*
|
||||
* Neither [1] nor [6] is aligned to above layer.
|
||||
* Left neighbour [0] is free, so mark it busy,
|
||||
* decrease bb_counters and extend range to
|
||||
* [0; 6]
|
||||
* Right neighbour [7] is busy. It can't be coaleasced with [6], so
|
||||
* mark [6] free, increase bb_counters and shrink range to
|
||||
* [0; 5].
|
||||
* Then shift range to [0; 2], go up and do the same.
|
||||
*/
|
||||
|
||||
|
||||
if (first & 1)
|
||||
e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
|
||||
if (!(last & 1))
|
||||
e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
|
||||
if (first > last)
|
||||
break;
|
||||
order++;
|
||||
|
||||
if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
|
||||
mb_clear_bits(buddy, first, last - first + 1);
|
||||
e4b->bd_info->bb_counters[order - 1] += last - first + 1;
|
||||
break;
|
||||
}
|
||||
first >>= 1;
|
||||
last >>= 1;
|
||||
buddy = buddy2;
|
||||
}
|
||||
}
|
||||
|
||||
static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
|
||||
int first, int count)
|
||||
{
|
||||
int left_is_free = 0;
|
||||
int right_is_free = 0;
|
||||
int block;
|
||||
int last = first + count - 1;
|
||||
struct super_block *sb = e4b->bd_sb;
|
||||
|
||||
BUG_ON(first + count > (sb->s_blocksize << 3));
|
||||
BUG_ON(last >= (sb->s_blocksize << 3));
|
||||
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
|
||||
mb_check_buddy(e4b);
|
||||
mb_free_blocks_double(inode, e4b, first, count);
|
||||
@ -1281,67 +1405,54 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
|
||||
if (first < e4b->bd_info->bb_first_free)
|
||||
e4b->bd_info->bb_first_free = first;
|
||||
|
||||
/* let's maintain fragments counter */
|
||||
/* access memory sequentially: check left neighbour,
|
||||
* clear range and then check right neighbour
|
||||
*/
|
||||
if (first != 0)
|
||||
block = !mb_test_bit(first - 1, e4b->bd_bitmap);
|
||||
if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
|
||||
max = !mb_test_bit(first + count, e4b->bd_bitmap);
|
||||
if (block && max)
|
||||
left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
|
||||
block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
|
||||
if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
|
||||
right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
|
||||
|
||||
if (unlikely(block != -1)) {
|
||||
ext4_fsblk_t blocknr;
|
||||
|
||||
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
|
||||
blocknr += EXT4_C2B(EXT4_SB(sb), block);
|
||||
ext4_grp_locked_error(sb, e4b->bd_group,
|
||||
inode ? inode->i_ino : 0,
|
||||
blocknr,
|
||||
"freeing already freed block "
|
||||
"(bit %u)", block);
|
||||
mb_regenerate_buddy(e4b);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* let's maintain fragments counter */
|
||||
if (left_is_free && right_is_free)
|
||||
e4b->bd_info->bb_fragments--;
|
||||
else if (!block && !max)
|
||||
else if (!left_is_free && !right_is_free)
|
||||
e4b->bd_info->bb_fragments++;
|
||||
|
||||
/* let's maintain buddy itself */
|
||||
while (count-- > 0) {
|
||||
block = first++;
|
||||
order = 0;
|
||||
|
||||
if (!mb_test_bit(block, e4b->bd_bitmap)) {
|
||||
ext4_fsblk_t blocknr;
|
||||
|
||||
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
|
||||
blocknr += EXT4_C2B(EXT4_SB(sb), block);
|
||||
ext4_grp_locked_error(sb, e4b->bd_group,
|
||||
inode ? inode->i_ino : 0,
|
||||
blocknr,
|
||||
"freeing already freed block "
|
||||
"(bit %u)", block);
|
||||
}
|
||||
mb_clear_bit(block, e4b->bd_bitmap);
|
||||
e4b->bd_info->bb_counters[order]++;
|
||||
|
||||
/* start of the buddy */
|
||||
buddy = mb_find_buddy(e4b, order, &max);
|
||||
|
||||
do {
|
||||
block &= ~1UL;
|
||||
if (mb_test_bit(block, buddy) ||
|
||||
mb_test_bit(block + 1, buddy))
|
||||
break;
|
||||
|
||||
/* both the buddies are free, try to coalesce them */
|
||||
buddy2 = mb_find_buddy(e4b, order + 1, &max);
|
||||
|
||||
if (!buddy2)
|
||||
break;
|
||||
|
||||
if (order > 0) {
|
||||
/* for special purposes, we don't set
|
||||
* free bits in bitmap */
|
||||
mb_set_bit(block, buddy);
|
||||
mb_set_bit(block + 1, buddy);
|
||||
}
|
||||
e4b->bd_info->bb_counters[order]--;
|
||||
e4b->bd_info->bb_counters[order]--;
|
||||
|
||||
block = block >> 1;
|
||||
order++;
|
||||
e4b->bd_info->bb_counters[order]++;
|
||||
|
||||
mb_clear_bit(block, buddy2);
|
||||
buddy = buddy2;
|
||||
} while (1);
|
||||
/* buddy[0] == bd_bitmap is a special case, so handle
|
||||
* it right away and let mb_buddy_mark_free stay free of
|
||||
* zero order checks.
|
||||
* Check if neighbours are to be coaleasced,
|
||||
* adjust bitmap bb_counters and borders appropriately.
|
||||
*/
|
||||
if (first & 1) {
|
||||
first += !left_is_free;
|
||||
e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
|
||||
}
|
||||
if (!(last & 1)) {
|
||||
last -= !right_is_free;
|
||||
e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
|
||||
}
|
||||
|
||||
if (first <= last)
|
||||
mb_buddy_mark_free(e4b, first >> 1, last >> 1);
|
||||
|
||||
done:
|
||||
mb_set_largest_free_order(sb, e4b->bd_info);
|
||||
mb_check_buddy(e4b);
|
||||
}
|
||||
@ -3342,7 +3453,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
|
||||
if (pa->pa_type == MB_GROUP_PA)
|
||||
grp_blk--;
|
||||
|
||||
ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
|
||||
grp = ext4_get_group_number(sb, grp_blk);
|
||||
|
||||
/*
|
||||
* possible race:
|
||||
@ -3807,7 +3918,7 @@ void ext4_discard_preallocations(struct inode *inode)
|
||||
|
||||
list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
|
||||
BUG_ON(pa->pa_type != MB_INODE_PA);
|
||||
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
|
||||
group = ext4_get_group_number(sb, pa->pa_pstart);
|
||||
|
||||
err = ext4_mb_load_buddy(sb, group, &e4b);
|
||||
if (err) {
|
||||
@ -4069,7 +4180,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
|
||||
|
||||
list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
|
||||
|
||||
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
|
||||
group = ext4_get_group_number(sb, pa->pa_pstart);
|
||||
if (ext4_mb_load_buddy(sb, group, &e4b)) {
|
||||
ext4_error(sb, "Error loading buddy information for %u",
|
||||
group);
|
||||
@ -4217,6 +4328,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
|
||||
unsigned int inquota = 0;
|
||||
unsigned int reserv_clstrs = 0;
|
||||
|
||||
might_sleep();
|
||||
sb = ar->inode->i_sb;
|
||||
sbi = EXT4_SB(sb);
|
||||
|
||||
@ -4420,11 +4532,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
|
||||
node = rb_prev(new_node);
|
||||
if (node) {
|
||||
entry = rb_entry(node, struct ext4_free_data, efd_node);
|
||||
if (can_merge(entry, new_entry)) {
|
||||
if (can_merge(entry, new_entry) &&
|
||||
ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
|
||||
new_entry->efd_start_cluster = entry->efd_start_cluster;
|
||||
new_entry->efd_count += entry->efd_count;
|
||||
rb_erase(node, &(db->bb_free_root));
|
||||
ext4_journal_callback_del(handle, &entry->efd_jce);
|
||||
kmem_cache_free(ext4_free_data_cachep, entry);
|
||||
}
|
||||
}
|
||||
@ -4432,10 +4544,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
|
||||
node = rb_next(new_node);
|
||||
if (node) {
|
||||
entry = rb_entry(node, struct ext4_free_data, efd_node);
|
||||
if (can_merge(new_entry, entry)) {
|
||||
if (can_merge(new_entry, entry) &&
|
||||
ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
|
||||
new_entry->efd_count += entry->efd_count;
|
||||
rb_erase(node, &(db->bb_free_root));
|
||||
ext4_journal_callback_del(handle, &entry->efd_jce);
|
||||
kmem_cache_free(ext4_free_data_cachep, entry);
|
||||
}
|
||||
}
|
||||
@ -4470,6 +4582,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
|
||||
int err = 0;
|
||||
int ret;
|
||||
|
||||
might_sleep();
|
||||
if (bh) {
|
||||
if (block)
|
||||
BUG_ON(block != bh->b_blocknr);
|
||||
|
@ -426,7 +426,6 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
|
||||
return retval;
|
||||
}
|
||||
return retval;
|
||||
|
||||
}
|
||||
|
||||
int ext4_ext_migrate(struct inode *inode)
|
||||
@ -606,3 +605,64 @@ int ext4_ext_migrate(struct inode *inode)
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Migrate a simple extent-based inode to use the i_blocks[] array
|
||||
*/
|
||||
int ext4_ind_migrate(struct inode *inode)
|
||||
{
|
||||
struct ext4_extent_header *eh;
|
||||
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct ext4_extent *ex;
|
||||
unsigned int i, len;
|
||||
ext4_fsblk_t blk;
|
||||
handle_t *handle;
|
||||
int ret;
|
||||
|
||||
if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
|
||||
EXT4_FEATURE_INCOMPAT_EXTENTS) ||
|
||||
(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
||||
return -EINVAL;
|
||||
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
|
||||
EXT4_FEATURE_RO_COMPAT_BIGALLOC))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ret = ext4_ext_check_inode(inode);
|
||||
if (ret)
|
||||
goto errout;
|
||||
|
||||
eh = ext_inode_hdr(inode);
|
||||
ex = EXT_FIRST_EXTENT(eh);
|
||||
if (ext4_blocks_count(es) > EXT4_MAX_BLOCK_FILE_PHYS ||
|
||||
eh->eh_depth != 0 || le16_to_cpu(eh->eh_entries) > 1) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto errout;
|
||||
}
|
||||
if (eh->eh_entries == 0)
|
||||
blk = len = 0;
|
||||
else {
|
||||
len = le16_to_cpu(ex->ee_len);
|
||||
blk = ext4_ext_pblock(ex);
|
||||
if (len > EXT4_NDIR_BLOCKS) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto errout;
|
||||
}
|
||||
}
|
||||
|
||||
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
|
||||
memset(ei->i_data, 0, sizeof(ei->i_data));
|
||||
for (i=0; i < len; i++)
|
||||
ei->i_data[i] = cpu_to_le32(blk++);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
errout:
|
||||
ext4_journal_stop(handle);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
return ret;
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include "ext4.h"
|
||||
|
||||
/* Checksumming functions */
|
||||
static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
|
||||
static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int offset = offsetof(struct mmp_struct, mmp_checksum);
|
||||
@ -54,7 +54,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
|
||||
lock_buffer(bh);
|
||||
bh->b_end_io = end_buffer_write_sync;
|
||||
get_bh(bh);
|
||||
submit_bh(WRITE_SYNC, bh);
|
||||
submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
|
||||
wait_on_buffer(bh);
|
||||
sb_end_write(sb);
|
||||
if (unlikely(!buffer_uptodate(bh)))
|
||||
@ -86,7 +86,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
|
||||
get_bh(*bh);
|
||||
lock_buffer(*bh);
|
||||
(*bh)->b_end_io = end_buffer_read_sync;
|
||||
submit_bh(READ_SYNC, *bh);
|
||||
submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
|
||||
wait_on_buffer(*bh);
|
||||
if (!buffer_uptodate(*bh)) {
|
||||
brelse(*bh);
|
||||
|
@ -144,12 +144,13 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
|
||||
}
|
||||
|
||||
/**
|
||||
* double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
|
||||
* ext4_double_down_write_data_sem - Acquire two inodes' write lock
|
||||
* of i_data_sem
|
||||
*
|
||||
* Acquire write lock of i_data_sem of the two inodes
|
||||
*/
|
||||
static void
|
||||
double_down_write_data_sem(struct inode *first, struct inode *second)
|
||||
void
|
||||
ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
|
||||
{
|
||||
if (first < second) {
|
||||
down_write(&EXT4_I(first)->i_data_sem);
|
||||
@ -162,14 +163,15 @@ double_down_write_data_sem(struct inode *first, struct inode *second)
|
||||
}
|
||||
|
||||
/**
|
||||
* double_up_write_data_sem - Release two inodes' write lock of i_data_sem
|
||||
* ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
|
||||
*
|
||||
* @orig_inode: original inode structure to be released its lock first
|
||||
* @donor_inode: donor inode structure to be released its lock second
|
||||
* Release write lock of i_data_sem of two inodes (orig and donor).
|
||||
*/
|
||||
static void
|
||||
double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
|
||||
void
|
||||
ext4_double_up_write_data_sem(struct inode *orig_inode,
|
||||
struct inode *donor_inode)
|
||||
{
|
||||
up_write(&EXT4_I(orig_inode)->i_data_sem);
|
||||
up_write(&EXT4_I(donor_inode)->i_data_sem);
|
||||
@ -407,18 +409,7 @@ mext_insert_extents(handle_t *handle, struct inode *orig_inode,
|
||||
mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
|
||||
end_ext, eh, range_to_move);
|
||||
|
||||
if (depth) {
|
||||
ret = ext4_handle_dirty_metadata(handle, orig_inode,
|
||||
orig_path->p_bh);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
ret = ext4_mark_inode_dirty(handle, orig_inode);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return ext4_ext_dirty(handle, orig_inode, orig_path);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -737,6 +728,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
|
||||
donor_off += dext_alen;
|
||||
orig_off += dext_alen;
|
||||
|
||||
BUG_ON(replaced_count > count);
|
||||
/* Already moved the expected blocks */
|
||||
if (replaced_count >= count)
|
||||
break;
|
||||
@ -814,7 +806,13 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
|
||||
page_cache_release(page[0]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* grab_cache_page_write_begin() may not wait on page's writeback if
|
||||
* BDI not demand that. But it is reasonable to be very conservative
|
||||
* here and explicitly wait on page's writeback
|
||||
*/
|
||||
wait_on_page_writeback(page[0]);
|
||||
wait_on_page_writeback(page[1]);
|
||||
if (inode1 > inode2) {
|
||||
struct page *tmp;
|
||||
tmp = page[0];
|
||||
@ -856,7 +854,6 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
|
||||
if (buffer_uptodate(bh))
|
||||
continue;
|
||||
if (!buffer_mapped(bh)) {
|
||||
int err = 0;
|
||||
err = ext4_get_block(inode, block, bh, 0);
|
||||
if (err) {
|
||||
SetPageError(page);
|
||||
@ -976,7 +973,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
||||
* necessary, just swap data blocks between orig and donor.
|
||||
*/
|
||||
if (uninit) {
|
||||
double_down_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
/* If any of extents in range became initialized we have to
|
||||
* fallback to data copying */
|
||||
uninit = mext_check_coverage(orig_inode, orig_blk_offset,
|
||||
@ -990,7 +987,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
||||
goto drop_data_sem;
|
||||
|
||||
if (!uninit) {
|
||||
double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
goto data_copy;
|
||||
}
|
||||
if ((page_has_private(pagep[0]) &&
|
||||
@ -1004,7 +1001,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
||||
donor_inode, orig_blk_offset,
|
||||
block_len_in_page, err);
|
||||
drop_data_sem:
|
||||
double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
goto unlock_pages;
|
||||
}
|
||||
data_copy:
|
||||
@ -1033,7 +1030,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
||||
}
|
||||
/* Perform all necessary steps similar write_begin()/write_end()
|
||||
* but keeping in mind that i_size will not change */
|
||||
*err = __block_write_begin(pagep[0], from, from + replaced_size,
|
||||
*err = __block_write_begin(pagep[0], from, replaced_size,
|
||||
ext4_get_block);
|
||||
if (!*err)
|
||||
*err = block_commit_write(pagep[0], from, from + replaced_size);
|
||||
@ -1065,11 +1062,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
||||
* Extents are swapped already, but we are not able to copy data.
|
||||
* Try to swap extents to it's original places
|
||||
*/
|
||||
double_down_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
|
||||
orig_blk_offset,
|
||||
block_len_in_page, &err2);
|
||||
double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
if (replaced_count != block_len_in_page) {
|
||||
EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
|
||||
"Unable to copy data block,"
|
||||
@ -1209,15 +1206,15 @@ mext_check_arguments(struct inode *orig_inode,
|
||||
}
|
||||
|
||||
/**
|
||||
* mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
|
||||
* ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
|
||||
*
|
||||
* @inode1: the inode structure
|
||||
* @inode2: the inode structure
|
||||
*
|
||||
* Lock two inodes' i_mutex
|
||||
*/
|
||||
static void
|
||||
mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
|
||||
void
|
||||
ext4_inode_double_lock(struct inode *inode1, struct inode *inode2)
|
||||
{
|
||||
BUG_ON(inode1 == inode2);
|
||||
if (inode1 < inode2) {
|
||||
@ -1230,15 +1227,15 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
|
||||
}
|
||||
|
||||
/**
|
||||
* mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
|
||||
* ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
|
||||
*
|
||||
* @inode1: the inode that is released first
|
||||
* @inode2: the inode that is released second
|
||||
*
|
||||
*/
|
||||
|
||||
static void
|
||||
mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
|
||||
void
|
||||
ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2)
|
||||
{
|
||||
mutex_unlock(&inode1->i_mutex);
|
||||
mutex_unlock(&inode2->i_mutex);
|
||||
@ -1333,7 +1330,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
||||
return -EINVAL;
|
||||
}
|
||||
/* Protect orig and donor inodes against a truncate */
|
||||
mext_inode_double_lock(orig_inode, donor_inode);
|
||||
ext4_inode_double_lock(orig_inode, donor_inode);
|
||||
|
||||
/* Wait for all existing dio workers */
|
||||
ext4_inode_block_unlocked_dio(orig_inode);
|
||||
@ -1342,7 +1339,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
||||
inode_dio_wait(donor_inode);
|
||||
|
||||
/* Protect extent tree against block allocations via delalloc */
|
||||
double_down_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
/* Check the filesystem environment whether move_extent can be done */
|
||||
ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
|
||||
donor_start, &len);
|
||||
@ -1466,7 +1463,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
||||
* b. racing with ->readpage, ->write_begin, and ext4_get_block
|
||||
* in move_extent_per_page
|
||||
*/
|
||||
double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
|
||||
while (orig_page_offset <= seq_end_page) {
|
||||
|
||||
@ -1500,7 +1497,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
||||
block_len_in_page = rest_blocks;
|
||||
}
|
||||
|
||||
double_down_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
@ -1538,10 +1535,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
||||
ext4_ext_drop_refs(holecheck_path);
|
||||
kfree(holecheck_path);
|
||||
}
|
||||
double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_inode_resume_unlocked_dio(orig_inode);
|
||||
ext4_inode_resume_unlocked_dio(donor_inode);
|
||||
mext_inode_double_unlock(orig_inode, donor_inode);
|
||||
ext4_inode_double_unlock(orig_inode, donor_inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -416,15 +416,16 @@ static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
__u32 csum, old_csum;
|
||||
__u32 csum;
|
||||
__le32 save_csum;
|
||||
int size;
|
||||
|
||||
size = count_offset + (count * sizeof(struct dx_entry));
|
||||
old_csum = t->dt_checksum;
|
||||
save_csum = t->dt_checksum;
|
||||
t->dt_checksum = 0;
|
||||
csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
|
||||
csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
|
||||
t->dt_checksum = old_csum;
|
||||
t->dt_checksum = save_csum;
|
||||
|
||||
return cpu_to_le32(csum);
|
||||
}
|
||||
@ -971,6 +972,17 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
|
||||
hinfo.hash_version +=
|
||||
EXT4_SB(dir->i_sb)->s_hash_unsigned;
|
||||
hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
|
||||
if (ext4_has_inline_data(dir)) {
|
||||
int has_inline_data = 1;
|
||||
count = htree_inlinedir_to_tree(dir_file, dir, 0,
|
||||
&hinfo, start_hash,
|
||||
start_minor_hash,
|
||||
&has_inline_data);
|
||||
if (has_inline_data) {
|
||||
*next_hash = ~0;
|
||||
return count;
|
||||
}
|
||||
}
|
||||
count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
|
||||
start_hash, start_minor_hash);
|
||||
*next_hash = ~0;
|
||||
@ -1455,24 +1467,6 @@ struct dentry *ext4_get_parent(struct dentry *child)
|
||||
return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
|
||||
}
|
||||
|
||||
#define S_SHIFT 12
|
||||
static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
|
||||
[S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE,
|
||||
[S_IFDIR >> S_SHIFT] = EXT4_FT_DIR,
|
||||
[S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV,
|
||||
[S_IFBLK >> S_SHIFT] = EXT4_FT_BLKDEV,
|
||||
[S_IFIFO >> S_SHIFT] = EXT4_FT_FIFO,
|
||||
[S_IFSOCK >> S_SHIFT] = EXT4_FT_SOCK,
|
||||
[S_IFLNK >> S_SHIFT] = EXT4_FT_SYMLINK,
|
||||
};
|
||||
|
||||
static inline void ext4_set_de_type(struct super_block *sb,
|
||||
struct ext4_dir_entry_2 *de,
|
||||
umode_t mode) {
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
|
||||
de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
|
||||
}
|
||||
|
||||
/*
|
||||
* Move count entries from end of map between two memory locations.
|
||||
* Returns pointer to last entry moved.
|
||||
@ -2251,8 +2245,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
dquot_initialize(dir);
|
||||
|
||||
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
|
||||
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
|
||||
retry:
|
||||
inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
|
||||
NULL, EXT4_HT_DIR, credits);
|
||||
@ -2286,8 +2279,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
|
||||
dquot_initialize(dir);
|
||||
|
||||
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
|
||||
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
|
||||
retry:
|
||||
inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
|
||||
NULL, EXT4_HT_DIR, credits);
|
||||
@ -2396,8 +2388,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
dquot_initialize(dir);
|
||||
|
||||
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
|
||||
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
|
||||
retry:
|
||||
inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
|
||||
&dentry->d_name,
|
||||
@ -2826,8 +2817,7 @@ static int ext4_symlink(struct inode *dir,
|
||||
* quota blocks, sb is already counted in previous macros).
|
||||
*/
|
||||
credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
|
||||
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
|
||||
}
|
||||
retry:
|
||||
inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
|
||||
|
@ -29,25 +29,19 @@
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
|
||||
static struct kmem_cache *io_page_cachep, *io_end_cachep;
|
||||
static struct kmem_cache *io_end_cachep;
|
||||
|
||||
int __init ext4_init_pageio(void)
|
||||
{
|
||||
io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
|
||||
if (io_page_cachep == NULL)
|
||||
return -ENOMEM;
|
||||
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
|
||||
if (io_end_cachep == NULL) {
|
||||
kmem_cache_destroy(io_page_cachep);
|
||||
if (io_end_cachep == NULL)
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ext4_exit_pageio(void)
|
||||
{
|
||||
kmem_cache_destroy(io_end_cachep);
|
||||
kmem_cache_destroy(io_page_cachep);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -67,29 +61,28 @@ void ext4_ioend_shutdown(struct inode *inode)
|
||||
cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
|
||||
}
|
||||
|
||||
static void put_io_page(struct ext4_io_page *io_page)
|
||||
static void ext4_release_io_end(ext4_io_end_t *io_end)
|
||||
{
|
||||
if (atomic_dec_and_test(&io_page->p_count)) {
|
||||
end_page_writeback(io_page->p_page);
|
||||
put_page(io_page->p_page);
|
||||
kmem_cache_free(io_page_cachep, io_page);
|
||||
}
|
||||
BUG_ON(!list_empty(&io_end->list));
|
||||
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
|
||||
|
||||
if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
|
||||
wake_up_all(ext4_ioend_wq(io_end->inode));
|
||||
if (io_end->flag & EXT4_IO_END_DIRECT)
|
||||
inode_dio_done(io_end->inode);
|
||||
if (io_end->iocb)
|
||||
aio_complete(io_end->iocb, io_end->result, 0);
|
||||
kmem_cache_free(io_end_cachep, io_end);
|
||||
}
|
||||
|
||||
void ext4_free_io_end(ext4_io_end_t *io)
|
||||
static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
|
||||
{
|
||||
int i;
|
||||
struct inode *inode = io_end->inode;
|
||||
|
||||
BUG_ON(!io);
|
||||
BUG_ON(!list_empty(&io->list));
|
||||
BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
|
||||
|
||||
for (i = 0; i < io->num_io_pages; i++)
|
||||
put_io_page(io->pages[i]);
|
||||
io->num_io_pages = 0;
|
||||
if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
|
||||
wake_up_all(ext4_ioend_wq(io->inode));
|
||||
kmem_cache_free(io_end_cachep, io);
|
||||
io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
|
||||
/* Wake up anyone waiting on unwritten extent conversion */
|
||||
if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
|
||||
wake_up_all(ext4_ioend_wq(inode));
|
||||
}
|
||||
|
||||
/* check a range of space and convert unwritten extents to written. */
|
||||
@ -112,13 +105,8 @@ static int ext4_end_io(ext4_io_end_t *io)
|
||||
"(inode %lu, offset %llu, size %zd, error %d)",
|
||||
inode->i_ino, offset, size, ret);
|
||||
}
|
||||
/* Wake up anyone waiting on unwritten extent conversion */
|
||||
if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
|
||||
wake_up_all(ext4_ioend_wq(inode));
|
||||
if (io->flag & EXT4_IO_END_DIRECT)
|
||||
inode_dio_done(inode);
|
||||
if (io->iocb)
|
||||
aio_complete(io->iocb, io->result, 0);
|
||||
ext4_clear_io_unwritten_flag(io);
|
||||
ext4_release_io_end(io);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -149,7 +137,7 @@ static void dump_completed_IO(struct inode *inode)
|
||||
}
|
||||
|
||||
/* Add the io_end to per-inode completed end_io list. */
|
||||
void ext4_add_complete_io(ext4_io_end_t *io_end)
|
||||
static void ext4_add_complete_io(ext4_io_end_t *io_end)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
|
||||
struct workqueue_struct *wq;
|
||||
@ -186,8 +174,6 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
|
||||
err = ext4_end_io(io);
|
||||
if (unlikely(!ret && err))
|
||||
ret = err;
|
||||
io->flag &= ~EXT4_IO_END_UNWRITTEN;
|
||||
ext4_free_io_end(io);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -219,10 +205,43 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
||||
atomic_inc(&EXT4_I(inode)->i_ioend_count);
|
||||
io->inode = inode;
|
||||
INIT_LIST_HEAD(&io->list);
|
||||
atomic_set(&io->count, 1);
|
||||
}
|
||||
return io;
|
||||
}
|
||||
|
||||
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
|
||||
{
|
||||
if (atomic_dec_and_test(&io_end->count)) {
|
||||
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
|
||||
ext4_release_io_end(io_end);
|
||||
return;
|
||||
}
|
||||
ext4_add_complete_io(io_end);
|
||||
}
|
||||
}
|
||||
|
||||
int ext4_put_io_end(ext4_io_end_t *io_end)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (atomic_dec_and_test(&io_end->count)) {
|
||||
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
||||
err = ext4_convert_unwritten_extents(io_end->inode,
|
||||
io_end->offset, io_end->size);
|
||||
ext4_clear_io_unwritten_flag(io_end);
|
||||
}
|
||||
ext4_release_io_end(io_end);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
|
||||
{
|
||||
atomic_inc(&io_end->count);
|
||||
return io_end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Print an buffer I/O error compatible with the fs/buffer.c. This
|
||||
* provides compatibility with dmesg scrapers that look for a specific
|
||||
@ -243,45 +262,56 @@ static void ext4_end_bio(struct bio *bio, int error)
|
||||
ext4_io_end_t *io_end = bio->bi_private;
|
||||
struct inode *inode;
|
||||
int i;
|
||||
int blocksize;
|
||||
sector_t bi_sector = bio->bi_sector;
|
||||
|
||||
BUG_ON(!io_end);
|
||||
inode = io_end->inode;
|
||||
blocksize = 1 << inode->i_blkbits;
|
||||
bio->bi_private = NULL;
|
||||
bio->bi_end_io = NULL;
|
||||
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
error = 0;
|
||||
bio_put(bio);
|
||||
|
||||
for (i = 0; i < io_end->num_io_pages; i++) {
|
||||
struct page *page = io_end->pages[i]->p_page;
|
||||
for (i = 0; i < bio->bi_vcnt; i++) {
|
||||
struct bio_vec *bvec = &bio->bi_io_vec[i];
|
||||
struct page *page = bvec->bv_page;
|
||||
struct buffer_head *bh, *head;
|
||||
loff_t offset;
|
||||
loff_t io_end_offset;
|
||||
unsigned bio_start = bvec->bv_offset;
|
||||
unsigned bio_end = bio_start + bvec->bv_len;
|
||||
unsigned under_io = 0;
|
||||
unsigned long flags;
|
||||
|
||||
if (!page)
|
||||
continue;
|
||||
|
||||
if (error) {
|
||||
SetPageError(page);
|
||||
set_bit(AS_EIO, &page->mapping->flags);
|
||||
head = page_buffers(page);
|
||||
BUG_ON(!head);
|
||||
|
||||
io_end_offset = io_end->offset + io_end->size;
|
||||
|
||||
offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
|
||||
bh = head;
|
||||
do {
|
||||
if ((offset >= io_end->offset) &&
|
||||
(offset+bh->b_size <= io_end_offset))
|
||||
buffer_io_error(bh);
|
||||
|
||||
offset += bh->b_size;
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
}
|
||||
|
||||
put_io_page(io_end->pages[i]);
|
||||
bh = head = page_buffers(page);
|
||||
/*
|
||||
* We check all buffers in the page under BH_Uptodate_Lock
|
||||
* to avoid races with other end io clearing async_write flags
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
|
||||
do {
|
||||
if (bh_offset(bh) < bio_start ||
|
||||
bh_offset(bh) + blocksize > bio_end) {
|
||||
if (buffer_async_write(bh))
|
||||
under_io++;
|
||||
continue;
|
||||
}
|
||||
clear_buffer_async_write(bh);
|
||||
if (error)
|
||||
buffer_io_error(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
|
||||
local_irq_restore(flags);
|
||||
if (!under_io)
|
||||
end_page_writeback(page);
|
||||
}
|
||||
io_end->num_io_pages = 0;
|
||||
inode = io_end->inode;
|
||||
bio_put(bio);
|
||||
|
||||
if (error) {
|
||||
io_end->flag |= EXT4_IO_END_ERROR;
|
||||
@ -294,12 +324,7 @@ static void ext4_end_bio(struct bio *bio, int error)
|
||||
bi_sector >> (inode->i_blkbits - 9));
|
||||
}
|
||||
|
||||
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
|
||||
ext4_free_io_end(io_end);
|
||||
return;
|
||||
}
|
||||
|
||||
ext4_add_complete_io(io_end);
|
||||
ext4_put_io_end_defer(io_end);
|
||||
}
|
||||
|
||||
void ext4_io_submit(struct ext4_io_submit *io)
|
||||
@ -313,76 +338,59 @@ void ext4_io_submit(struct ext4_io_submit *io)
|
||||
bio_put(io->io_bio);
|
||||
}
|
||||
io->io_bio = NULL;
|
||||
io->io_op = 0;
|
||||
}
|
||||
|
||||
void ext4_io_submit_init(struct ext4_io_submit *io,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
|
||||
io->io_bio = NULL;
|
||||
io->io_end = NULL;
|
||||
}
|
||||
|
||||
static int io_submit_init(struct ext4_io_submit *io,
|
||||
struct inode *inode,
|
||||
struct writeback_control *wbc,
|
||||
struct buffer_head *bh)
|
||||
static int io_submit_init_bio(struct ext4_io_submit *io,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
ext4_io_end_t *io_end;
|
||||
struct page *page = bh->b_page;
|
||||
int nvecs = bio_get_nr_vecs(bh->b_bdev);
|
||||
struct bio *bio;
|
||||
|
||||
io_end = ext4_init_io_end(inode, GFP_NOFS);
|
||||
if (!io_end)
|
||||
return -ENOMEM;
|
||||
bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
|
||||
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
|
||||
bio->bi_bdev = bh->b_bdev;
|
||||
bio->bi_private = io->io_end = io_end;
|
||||
bio->bi_end_io = ext4_end_bio;
|
||||
|
||||
io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
|
||||
|
||||
bio->bi_private = ext4_get_io_end(io->io_end);
|
||||
if (!io->io_end->size)
|
||||
io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
|
||||
+ bh_offset(bh);
|
||||
io->io_bio = bio;
|
||||
io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
|
||||
io->io_next_block = bh->b_blocknr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_submit_add_bh(struct ext4_io_submit *io,
|
||||
struct ext4_io_page *io_page,
|
||||
struct inode *inode,
|
||||
struct writeback_control *wbc,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
ext4_io_end_t *io_end;
|
||||
int ret;
|
||||
|
||||
if (buffer_new(bh)) {
|
||||
clear_buffer_new(bh);
|
||||
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
|
||||
}
|
||||
|
||||
if (io->io_bio && bh->b_blocknr != io->io_next_block) {
|
||||
submit_and_retry:
|
||||
ext4_io_submit(io);
|
||||
}
|
||||
if (io->io_bio == NULL) {
|
||||
ret = io_submit_init(io, inode, wbc, bh);
|
||||
ret = io_submit_init_bio(io, bh);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
io_end = io->io_end;
|
||||
if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
|
||||
(io_end->pages[io_end->num_io_pages-1] != io_page))
|
||||
goto submit_and_retry;
|
||||
if (buffer_uninit(bh))
|
||||
ext4_set_io_unwritten_flag(inode, io_end);
|
||||
io->io_end->size += bh->b_size;
|
||||
io->io_next_block++;
|
||||
ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
|
||||
if (ret != bh->b_size)
|
||||
goto submit_and_retry;
|
||||
if ((io_end->num_io_pages == 0) ||
|
||||
(io_end->pages[io_end->num_io_pages-1] != io_page)) {
|
||||
io_end->pages[io_end->num_io_pages++] = io_page;
|
||||
atomic_inc(&io_page->p_count);
|
||||
}
|
||||
io_end = io->io_end;
|
||||
if (test_clear_buffer_uninit(bh))
|
||||
ext4_set_io_unwritten_flag(inode, io_end);
|
||||
io_end->size += bh->b_size;
|
||||
io->io_next_block++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -392,33 +400,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
unsigned block_start, block_end, blocksize;
|
||||
struct ext4_io_page *io_page;
|
||||
unsigned block_start, blocksize;
|
||||
struct buffer_head *bh, *head;
|
||||
int ret = 0;
|
||||
int nr_submitted = 0;
|
||||
|
||||
blocksize = 1 << inode->i_blkbits;
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
BUG_ON(PageWriteback(page));
|
||||
|
||||
io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
|
||||
if (!io_page) {
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
unlock_page(page);
|
||||
return -ENOMEM;
|
||||
}
|
||||
io_page->p_page = page;
|
||||
atomic_set(&io_page->p_count, 1);
|
||||
get_page(page);
|
||||
set_page_writeback(page);
|
||||
ClearPageError(page);
|
||||
|
||||
for (bh = head = page_buffers(page), block_start = 0;
|
||||
bh != head || !block_start;
|
||||
block_start = block_end, bh = bh->b_this_page) {
|
||||
|
||||
block_end = block_start + blocksize;
|
||||
/*
|
||||
* In the first loop we prepare and mark buffers to submit. We have to
|
||||
* mark all buffers in the page before submitting so that
|
||||
* end_page_writeback() cannot be called from ext4_bio_end_io() when IO
|
||||
* on the first buffer finishes and we are still working on submitting
|
||||
* the second buffer.
|
||||
*/
|
||||
bh = head = page_buffers(page);
|
||||
do {
|
||||
block_start = bh_offset(bh);
|
||||
if (block_start >= len) {
|
||||
/*
|
||||
* Comments copied from block_write_full_page_endio:
|
||||
@ -431,7 +435,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
* mapped, and writes to that region are not written
|
||||
* out to the file."
|
||||
*/
|
||||
zero_user_segment(page, block_start, block_end);
|
||||
zero_user_segment(page, block_start,
|
||||
block_start + blocksize);
|
||||
clear_buffer_dirty(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
continue;
|
||||
@ -445,7 +450,19 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
ext4_io_submit(io);
|
||||
continue;
|
||||
}
|
||||
ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
|
||||
if (buffer_new(bh)) {
|
||||
clear_buffer_new(bh);
|
||||
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
|
||||
}
|
||||
set_buffer_async_write(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
||||
/* Now submit buffers to write */
|
||||
bh = head = page_buffers(page);
|
||||
do {
|
||||
if (!buffer_async_write(bh))
|
||||
continue;
|
||||
ret = io_submit_add_bh(io, inode, bh);
|
||||
if (ret) {
|
||||
/*
|
||||
* We only get here on ENOMEM. Not much else
|
||||
@ -455,17 +472,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
break;
|
||||
}
|
||||
nr_submitted++;
|
||||
clear_buffer_dirty(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
||||
/* Error stopped previous loop? Clean up buffers... */
|
||||
if (ret) {
|
||||
do {
|
||||
clear_buffer_async_write(bh);
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
}
|
||||
unlock_page(page);
|
||||
/*
|
||||
* If the page was truncated before we could do the writeback,
|
||||
* or we had a memory allocation error while trying to write
|
||||
* the first buffer head, we won't have submitted any pages for
|
||||
* I/O. In that case we need to make sure we've cleared the
|
||||
* PageWriteback bit from the page to prevent the system from
|
||||
* wedging later on.
|
||||
*/
|
||||
put_io_page(io_page);
|
||||
/* Nothing submitted - we have to end page writeback */
|
||||
if (!nr_submitted)
|
||||
end_page_writeback(page);
|
||||
return ret;
|
||||
}
|
||||
|
@ -272,7 +272,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
|
||||
if (start_blk >= last_blk)
|
||||
goto next_group;
|
||||
group_data[bb_index].block_bitmap = start_blk++;
|
||||
ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
|
||||
group = ext4_get_group_number(sb, start_blk - 1);
|
||||
group -= group_data[0].group;
|
||||
group_data[group].free_blocks_count--;
|
||||
if (flexbg_size > 1)
|
||||
@ -284,7 +284,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
|
||||
if (start_blk >= last_blk)
|
||||
goto next_group;
|
||||
group_data[ib_index].inode_bitmap = start_blk++;
|
||||
ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
|
||||
group = ext4_get_group_number(sb, start_blk - 1);
|
||||
group -= group_data[0].group;
|
||||
group_data[group].free_blocks_count--;
|
||||
if (flexbg_size > 1)
|
||||
@ -296,7 +296,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
|
||||
if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
|
||||
goto next_group;
|
||||
group_data[it_index].inode_table = start_blk;
|
||||
ext4_get_group_no_and_offset(sb, start_blk, &group, NULL);
|
||||
group = ext4_get_group_number(sb, start_blk - 1);
|
||||
group -= group_data[0].group;
|
||||
group_data[group].free_blocks_count -=
|
||||
EXT4_SB(sb)->s_itb_per_group;
|
||||
@ -392,7 +392,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
|
||||
ext4_group_t group;
|
||||
int err;
|
||||
|
||||
ext4_get_group_no_and_offset(sb, block, &group, NULL);
|
||||
group = ext4_get_group_number(sb, block);
|
||||
start = ext4_group_first_block_no(sb, group);
|
||||
group -= flex_gd->groups[0].group;
|
||||
|
||||
@ -1341,6 +1341,8 @@ static void ext4_update_super(struct super_block *sb,
|
||||
|
||||
/* Update the global fs size fields */
|
||||
sbi->s_groups_count += flex_gd->count;
|
||||
sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
|
||||
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
|
||||
|
||||
/* Update the reserved block counts only once the new group is
|
||||
* active. */
|
||||
@ -1879,7 +1881,11 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
|
||||
/* Nothing need to do */
|
||||
return 0;
|
||||
|
||||
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
|
||||
n_group = ext4_get_group_number(sb, n_blocks_count - 1);
|
||||
if (n_group > (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) {
|
||||
ext4_warning(sb, "resize would cause inodes_count overflow");
|
||||
return -EINVAL;
|
||||
}
|
||||
ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
|
||||
|
||||
n_desc_blocks = num_desc_blocks(sb, n_group + 1);
|
||||
|
131
fs/ext4/super.c
131
fs/ext4/super.c
@ -81,6 +81,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly);
|
||||
static void ext4_destroy_lazyinit_thread(void);
|
||||
static void ext4_unregister_li_request(struct super_block *sb);
|
||||
static void ext4_clear_request_list(void);
|
||||
static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);
|
||||
|
||||
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
|
||||
static struct file_system_type ext2_fs_type = {
|
||||
@ -353,10 +354,13 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
|
||||
struct super_block *sb = journal->j_private;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int error = is_journal_aborted(journal);
|
||||
struct ext4_journal_cb_entry *jce, *tmp;
|
||||
struct ext4_journal_cb_entry *jce;
|
||||
|
||||
BUG_ON(txn->t_state == T_FINISHED);
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
|
||||
while (!list_empty(&txn->t_private_list)) {
|
||||
jce = list_entry(txn->t_private_list.next,
|
||||
struct ext4_journal_cb_entry, jce_list);
|
||||
list_del_init(&jce->jce_list);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
jce->jce_func(sb, jce, error);
|
||||
@ -1948,16 +1952,16 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
|
||||
if ((sbi->s_es->s_feature_ro_compat &
|
||||
cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
|
||||
/* Use new metadata_csum algorithm */
|
||||
__u16 old_csum;
|
||||
__le16 save_csum;
|
||||
__u32 csum32;
|
||||
|
||||
old_csum = gdp->bg_checksum;
|
||||
save_csum = gdp->bg_checksum;
|
||||
gdp->bg_checksum = 0;
|
||||
csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
|
||||
sizeof(le_group));
|
||||
csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
|
||||
sbi->s_desc_size);
|
||||
gdp->bg_checksum = old_csum;
|
||||
gdp->bg_checksum = save_csum;
|
||||
|
||||
crc = csum32 & 0xFFFF;
|
||||
goto out;
|
||||
@ -2379,17 +2383,15 @@ struct ext4_attr {
|
||||
int offset;
|
||||
};
|
||||
|
||||
static int parse_strtoul(const char *buf,
|
||||
unsigned long max, unsigned long *value)
|
||||
static int parse_strtoull(const char *buf,
|
||||
unsigned long long max, unsigned long long *value)
|
||||
{
|
||||
char *endp;
|
||||
int ret;
|
||||
|
||||
*value = simple_strtoul(skip_spaces(buf), &endp, 0);
|
||||
endp = skip_spaces(endp);
|
||||
if (*endp || *value > max)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
ret = kstrtoull(skip_spaces(buf), 0, value);
|
||||
if (!ret && *value > max)
|
||||
ret = -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
|
||||
@ -2431,11 +2433,13 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
unsigned long t;
|
||||
int ret;
|
||||
|
||||
if (parse_strtoul(buf, 0x40000000, &t))
|
||||
return -EINVAL;
|
||||
ret = kstrtoul(skip_spaces(buf), 0, &t);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (t && !is_power_of_2(t))
|
||||
if (t && (!is_power_of_2(t) || t > 0x40000000))
|
||||
return -EINVAL;
|
||||
|
||||
sbi->s_inode_readahead_blks = t;
|
||||
@ -2456,13 +2460,36 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,
|
||||
{
|
||||
unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
|
||||
unsigned long t;
|
||||
int ret;
|
||||
|
||||
if (parse_strtoul(buf, 0xffffffff, &t))
|
||||
return -EINVAL;
|
||||
ret = kstrtoul(skip_spaces(buf), 0, &t);
|
||||
if (ret)
|
||||
return ret;
|
||||
*ui = t;
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t reserved_clusters_show(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi, char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
(unsigned long long) atomic64_read(&sbi->s_resv_clusters));
|
||||
}
|
||||
|
||||
static ssize_t reserved_clusters_store(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
unsigned long long val;
|
||||
int ret;
|
||||
|
||||
if (parse_strtoull(buf, -1ULL, &val))
|
||||
return -EINVAL;
|
||||
ret = ext4_reserve_clusters(sbi, val);
|
||||
|
||||
return ret ? ret : count;
|
||||
}
|
||||
|
||||
static ssize_t trigger_test_error(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi,
|
||||
const char *buf, size_t count)
|
||||
@ -2500,6 +2527,7 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
|
||||
EXT4_RO_ATTR(delayed_allocation_blocks);
|
||||
EXT4_RO_ATTR(session_write_kbytes);
|
||||
EXT4_RO_ATTR(lifetime_write_kbytes);
|
||||
EXT4_RW_ATTR(reserved_clusters);
|
||||
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
|
||||
inode_readahead_blks_store, s_inode_readahead_blks);
|
||||
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
|
||||
@ -2517,6 +2545,7 @@ static struct attribute *ext4_attrs[] = {
|
||||
ATTR_LIST(delayed_allocation_blocks),
|
||||
ATTR_LIST(session_write_kbytes),
|
||||
ATTR_LIST(lifetime_write_kbytes),
|
||||
ATTR_LIST(reserved_clusters),
|
||||
ATTR_LIST(inode_readahead_blks),
|
||||
ATTR_LIST(inode_goal),
|
||||
ATTR_LIST(mb_stats),
|
||||
@ -3192,6 +3221,40 @@ int ext4_calculate_overhead(struct super_block *sb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
|
||||
{
|
||||
ext4_fsblk_t resv_clusters;
|
||||
|
||||
/*
|
||||
* By default we reserve 2% or 4096 clusters, whichever is smaller.
|
||||
* This should cover the situations where we can not afford to run
|
||||
* out of space like for example punch hole, or converting
|
||||
* uninitialized extents in delalloc path. In most cases such
|
||||
* allocation would require 1, or 2 blocks, higher numbers are
|
||||
* very rare.
|
||||
*/
|
||||
resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
|
||||
|
||||
do_div(resv_clusters, 50);
|
||||
resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
|
||||
|
||||
return resv_clusters;
|
||||
}
|
||||
|
||||
|
||||
static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count)
|
||||
{
|
||||
ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >>
|
||||
sbi->s_cluster_bits;
|
||||
|
||||
if (count >= clusters)
|
||||
return -EINVAL;
|
||||
|
||||
atomic64_set(&sbi->s_resv_clusters, count);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
{
|
||||
char *orig_data = kstrdup(data, GFP_KERNEL);
|
||||
@ -3526,6 +3589,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
|
||||
sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
|
||||
|
||||
/* Do we have standard group size of blocksize * 8 blocks ? */
|
||||
if (sbi->s_blocks_per_group == blocksize << 3)
|
||||
set_opt2(sb, STD_GROUP_SIZE);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
|
||||
sbi->s_def_hash_version = es->s_def_hash_version;
|
||||
@ -3698,6 +3765,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sbi->s_err_report.function = print_daily_error_info;
|
||||
sbi->s_err_report.data = (unsigned long) sb;
|
||||
|
||||
/* Register extent status tree shrinker */
|
||||
ext4_es_register_shrinker(sb);
|
||||
|
||||
err = percpu_counter_init(&sbi->s_freeclusters_counter,
|
||||
ext4_count_free_clusters(sb));
|
||||
if (!err) {
|
||||
@ -3723,9 +3793,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sbi->s_max_writeback_mb_bump = 128;
|
||||
sbi->s_extent_max_zeroout_kb = 32;
|
||||
|
||||
/* Register extent status tree shrinker */
|
||||
ext4_es_register_shrinker(sb);
|
||||
|
||||
/*
|
||||
* set up enough so that it can read an inode
|
||||
*/
|
||||
@ -3911,6 +3978,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
"available");
|
||||
}
|
||||
|
||||
err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
|
||||
if (err) {
|
||||
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
|
||||
"reserved pool", ext4_calculate_resv_clusters(sbi));
|
||||
goto failed_mount4a;
|
||||
}
|
||||
|
||||
err = ext4_setup_system_zone(sb);
|
||||
if (err) {
|
||||
ext4_msg(sb, KERN_ERR, "failed to initialize system "
|
||||
@ -4010,6 +4084,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sbi->s_journal = NULL;
|
||||
}
|
||||
failed_mount3:
|
||||
ext4_es_unregister_shrinker(sb);
|
||||
del_timer(&sbi->s_err_report);
|
||||
if (sbi->s_flex_groups)
|
||||
ext4_kvfree(sbi->s_flex_groups);
|
||||
@ -4177,7 +4252,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
|
||||
goto out_bdev;
|
||||
}
|
||||
journal->j_private = sb;
|
||||
ll_rw_block(READ, 1, &journal->j_sb_buffer);
|
||||
ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
|
||||
wait_on_buffer(journal->j_sb_buffer);
|
||||
if (!buffer_uptodate(journal->j_sb_buffer)) {
|
||||
ext4_msg(sb, KERN_ERR, "I/O error on journal device");
|
||||
@ -4742,9 +4817,10 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
struct super_block *sb = dentry->d_sb;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_super_block *es = sbi->s_es;
|
||||
ext4_fsblk_t overhead = 0;
|
||||
ext4_fsblk_t overhead = 0, resv_blocks;
|
||||
u64 fsid;
|
||||
s64 bfree;
|
||||
resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
|
||||
|
||||
if (!test_opt(sb, MINIX_DF))
|
||||
overhead = sbi->s_overhead;
|
||||
@ -4756,8 +4832,9 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
|
||||
/* prevent underflow in case that few free space is available */
|
||||
buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
|
||||
buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
|
||||
if (buf->f_bfree < ext4_r_blocks_count(es))
|
||||
buf->f_bavail = buf->f_bfree -
|
||||
(ext4_r_blocks_count(es) + resv_blocks);
|
||||
if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
|
||||
buf->f_bavail = 0;
|
||||
buf->f_files = le32_to_cpu(es->s_inodes_count);
|
||||
buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
|
||||
@ -4945,6 +5022,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
|
||||
return PTR_ERR(qf_inode);
|
||||
}
|
||||
|
||||
/* Don't account quota for quota files to avoid recursion */
|
||||
qf_inode->i_flags |= S_NOQUOTA;
|
||||
err = dquot_enable(qf_inode, type, format_id, flags);
|
||||
iput(qf_inode);
|
||||
|
||||
|
@ -122,17 +122,18 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
|
||||
struct ext4_xattr_header *hdr)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
__u32 csum, old;
|
||||
__u32 csum;
|
||||
__le32 save_csum;
|
||||
__le64 dsk_block_nr = cpu_to_le64(block_nr);
|
||||
|
||||
old = hdr->h_checksum;
|
||||
save_csum = hdr->h_checksum;
|
||||
hdr->h_checksum = 0;
|
||||
block_nr = cpu_to_le64(block_nr);
|
||||
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr,
|
||||
sizeof(block_nr));
|
||||
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
|
||||
sizeof(dsk_block_nr));
|
||||
csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
|
||||
EXT4_BLOCK_SIZE(inode->i_sb));
|
||||
|
||||
hdr->h_checksum = old;
|
||||
hdr->h_checksum = save_csum;
|
||||
return cpu_to_le32(csum);
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#define EXT4_XATTR_INDEX_LUSTRE 5
|
||||
#define EXT4_XATTR_INDEX_SECURITY 6
|
||||
#define EXT4_XATTR_INDEX_SYSTEM 7
|
||||
#define EXT4_XATTR_INDEX_RICHACL 8
|
||||
|
||||
struct ext4_xattr_header {
|
||||
__le32 h_magic; /* magic number for identification */
|
||||
|
@ -382,7 +382,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||
int space_left = 0;
|
||||
int first_tag = 0;
|
||||
int tag_flag;
|
||||
int i, to_free = 0;
|
||||
int i;
|
||||
int tag_bytes = journal_tag_bytes(journal);
|
||||
struct buffer_head *cbh = NULL; /* For transactional checksums */
|
||||
__u32 crc32_sum = ~0;
|
||||
@ -1134,7 +1134,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||
journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
|
||||
spin_unlock(&journal->j_history_lock);
|
||||
|
||||
commit_transaction->t_state = T_FINISHED;
|
||||
commit_transaction->t_state = T_COMMIT_CALLBACK;
|
||||
J_ASSERT(commit_transaction == journal->j_committing_transaction);
|
||||
journal->j_commit_sequence = commit_transaction->t_tid;
|
||||
journal->j_committing_transaction = NULL;
|
||||
@ -1149,38 +1149,44 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||
journal->j_average_commit_time*3) / 4;
|
||||
else
|
||||
journal->j_average_commit_time = commit_time;
|
||||
|
||||
write_unlock(&journal->j_state_lock);
|
||||
|
||||
if (commit_transaction->t_checkpoint_list == NULL &&
|
||||
commit_transaction->t_checkpoint_io_list == NULL) {
|
||||
__jbd2_journal_drop_transaction(journal, commit_transaction);
|
||||
to_free = 1;
|
||||
if (journal->j_checkpoint_transactions == NULL) {
|
||||
journal->j_checkpoint_transactions = commit_transaction;
|
||||
commit_transaction->t_cpnext = commit_transaction;
|
||||
commit_transaction->t_cpprev = commit_transaction;
|
||||
} else {
|
||||
if (journal->j_checkpoint_transactions == NULL) {
|
||||
journal->j_checkpoint_transactions = commit_transaction;
|
||||
commit_transaction->t_cpnext = commit_transaction;
|
||||
commit_transaction->t_cpprev = commit_transaction;
|
||||
} else {
|
||||
commit_transaction->t_cpnext =
|
||||
journal->j_checkpoint_transactions;
|
||||
commit_transaction->t_cpprev =
|
||||
commit_transaction->t_cpnext->t_cpprev;
|
||||
commit_transaction->t_cpnext->t_cpprev =
|
||||
commit_transaction->t_cpnext =
|
||||
journal->j_checkpoint_transactions;
|
||||
commit_transaction->t_cpprev =
|
||||
commit_transaction->t_cpnext->t_cpprev;
|
||||
commit_transaction->t_cpnext->t_cpprev =
|
||||
commit_transaction;
|
||||
commit_transaction->t_cpprev->t_cpnext =
|
||||
commit_transaction;
|
||||
commit_transaction->t_cpprev->t_cpnext =
|
||||
commit_transaction;
|
||||
}
|
||||
}
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
|
||||
/* Drop all spin_locks because commit_callback may be block.
|
||||
* __journal_remove_checkpoint() can not destroy transaction
|
||||
* under us because it is not marked as T_FINISHED yet */
|
||||
if (journal->j_commit_callback)
|
||||
journal->j_commit_callback(journal, commit_transaction);
|
||||
|
||||
trace_jbd2_end_commit(journal, commit_transaction);
|
||||
jbd_debug(1, "JBD2: commit %d complete, head %d\n",
|
||||
journal->j_commit_sequence, journal->j_tail_sequence);
|
||||
if (to_free)
|
||||
jbd2_journal_free_transaction(commit_transaction);
|
||||
|
||||
write_lock(&journal->j_state_lock);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
commit_transaction->t_state = T_FINISHED;
|
||||
/* Recheck checkpoint lists after j_list_lock was dropped */
|
||||
if (commit_transaction->t_checkpoint_list == NULL &&
|
||||
commit_transaction->t_checkpoint_io_list == NULL) {
|
||||
__jbd2_journal_drop_transaction(journal, commit_transaction);
|
||||
jbd2_journal_free_transaction(commit_transaction);
|
||||
}
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
write_unlock(&journal->j_state_lock);
|
||||
wake_up(&journal->j_wait_done_commit);
|
||||
}
|
||||
|
@ -707,6 +707,37 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* When this function returns the transaction corresponding to tid
|
||||
* will be completed. If the transaction has currently running, start
|
||||
* committing that transaction before waiting for it to complete. If
|
||||
* the transaction id is stale, it is by definition already completed,
|
||||
* so just return SUCCESS.
|
||||
*/
|
||||
int jbd2_complete_transaction(journal_t *journal, tid_t tid)
|
||||
{
|
||||
int need_to_wait = 1;
|
||||
|
||||
read_lock(&journal->j_state_lock);
|
||||
if (journal->j_running_transaction &&
|
||||
journal->j_running_transaction->t_tid == tid) {
|
||||
if (journal->j_commit_request != tid) {
|
||||
/* transaction not yet started, so request it */
|
||||
read_unlock(&journal->j_state_lock);
|
||||
jbd2_log_start_commit(journal, tid);
|
||||
goto wait_commit;
|
||||
}
|
||||
} else if (!(journal->j_committing_transaction &&
|
||||
journal->j_committing_transaction->t_tid == tid))
|
||||
need_to_wait = 0;
|
||||
read_unlock(&journal->j_state_lock);
|
||||
if (!need_to_wait)
|
||||
return 0;
|
||||
wait_commit:
|
||||
return jbd2_log_wait_commit(journal, tid);
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_complete_transaction);
|
||||
|
||||
/*
|
||||
* Log buffer allocation routines:
|
||||
*/
|
||||
|
@ -332,7 +332,6 @@ static handle_t *new_handle(int nblocks)
|
||||
handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
|
||||
if (!handle)
|
||||
return NULL;
|
||||
memset(handle, 0, sizeof(*handle));
|
||||
handle->h_buffer_credits = nblocks;
|
||||
handle->h_ref = 1;
|
||||
|
||||
@ -640,6 +639,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
|
||||
int error;
|
||||
char *frozen_buffer = NULL;
|
||||
int need_copy = 0;
|
||||
unsigned long start_lock, time_lock;
|
||||
|
||||
if (is_handle_aborted(handle))
|
||||
return -EROFS;
|
||||
@ -655,9 +655,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
|
||||
|
||||
/* @@@ Need to check for errors here at some point. */
|
||||
|
||||
start_lock = jiffies;
|
||||
lock_buffer(bh);
|
||||
jbd_lock_bh_state(bh);
|
||||
|
||||
/* If it takes too long to lock the buffer, trace it */
|
||||
time_lock = jbd2_time_diff(start_lock, jiffies);
|
||||
if (time_lock > HZ/10)
|
||||
trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev,
|
||||
jiffies_to_msecs(time_lock));
|
||||
|
||||
/* We now hold the buffer lock so it is safe to query the buffer
|
||||
* state. Is the buffer dirty?
|
||||
*
|
||||
|
@ -34,6 +34,8 @@ enum bh_state_bits {
|
||||
BH_Write_EIO, /* I/O error on write */
|
||||
BH_Unwritten, /* Buffer is allocated on disk but not written */
|
||||
BH_Quiet, /* Buffer Error Prinks to be quiet */
|
||||
BH_Meta, /* Buffer contains metadata */
|
||||
BH_Prio, /* Buffer should be submitted with REQ_PRIO */
|
||||
|
||||
BH_PrivateStart,/* not a state bit, but the first bit available
|
||||
* for private allocation by other entities
|
||||
@ -124,6 +126,8 @@ BUFFER_FNS(Delay, delay)
|
||||
BUFFER_FNS(Boundary, boundary)
|
||||
BUFFER_FNS(Write_EIO, write_io_error)
|
||||
BUFFER_FNS(Unwritten, unwritten)
|
||||
BUFFER_FNS(Meta, meta)
|
||||
BUFFER_FNS(Prio, prio)
|
||||
|
||||
#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
|
||||
|
||||
|
@ -480,6 +480,7 @@ struct transaction_s
|
||||
T_COMMIT,
|
||||
T_COMMIT_DFLUSH,
|
||||
T_COMMIT_JFLUSH,
|
||||
T_COMMIT_CALLBACK,
|
||||
T_FINISHED
|
||||
} t_state;
|
||||
|
||||
@ -1144,7 +1145,7 @@ extern struct kmem_cache *jbd2_handle_cache;
|
||||
|
||||
static inline handle_t *jbd2_alloc_handle(gfp_t gfp_flags)
|
||||
{
|
||||
return kmem_cache_alloc(jbd2_handle_cache, gfp_flags);
|
||||
return kmem_cache_zalloc(jbd2_handle_cache, gfp_flags);
|
||||
}
|
||||
|
||||
static inline void jbd2_free_handle(handle_t *handle)
|
||||
@ -1200,6 +1201,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t tid);
|
||||
int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
|
||||
int jbd2_journal_force_commit_nested(journal_t *journal);
|
||||
int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
|
||||
int jbd2_complete_transaction(journal_t *journal, tid_t tid);
|
||||
int jbd2_log_do_checkpoint(journal_t *journal);
|
||||
int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
|
||||
|
||||
|
@ -31,21 +31,14 @@ struct journal_head {
|
||||
/*
|
||||
* Journalling list for this buffer [jbd_lock_bh_state()]
|
||||
*/
|
||||
unsigned b_jlist;
|
||||
unsigned b_jlist:4;
|
||||
|
||||
/*
|
||||
* This flag signals the buffer has been modified by
|
||||
* the currently running transaction
|
||||
* [jbd_lock_bh_state()]
|
||||
*/
|
||||
unsigned b_modified;
|
||||
|
||||
/*
|
||||
* This feild tracks the last transaction id in which this buffer
|
||||
* has been cowed
|
||||
* [jbd_lock_bh_state()]
|
||||
*/
|
||||
tid_t b_cow_tid;
|
||||
unsigned b_modified:1;
|
||||
|
||||
/*
|
||||
* Copy of the buffer data frozen for writing to the log.
|
||||
|
@ -257,15 +257,7 @@ DECLARE_EVENT_CLASS(ext4__write_end,
|
||||
__entry->pos, __entry->len, __entry->copied)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
|
||||
|
||||
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
|
||||
unsigned int copied),
|
||||
|
||||
TP_ARGS(inode, pos, len, copied)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end,
|
||||
DEFINE_EVENT(ext4__write_end, ext4_write_end,
|
||||
|
||||
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
|
||||
unsigned int copied),
|
||||
@ -1956,7 +1948,7 @@ TRACE_EVENT(ext4_remove_blocks,
|
||||
__entry->to = to;
|
||||
__entry->partial = partial_cluster;
|
||||
__entry->ee_pblk = ext4_ext_pblock(ex);
|
||||
__entry->ee_lblk = cpu_to_le32(ex->ee_block);
|
||||
__entry->ee_lblk = le32_to_cpu(ex->ee_block);
|
||||
__entry->ee_len = ext4_ext_get_actual_len(ex);
|
||||
),
|
||||
|
||||
@ -2060,7 +2052,7 @@ TRACE_EVENT(ext4_ext_remove_space,
|
||||
|
||||
TRACE_EVENT(ext4_ext_remove_space_done,
|
||||
TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth,
|
||||
ext4_lblk_t partial, unsigned short eh_entries),
|
||||
ext4_lblk_t partial, __le16 eh_entries),
|
||||
|
||||
TP_ARGS(inode, start, depth, partial, eh_entries),
|
||||
|
||||
@ -2079,7 +2071,7 @@ TRACE_EVENT(ext4_ext_remove_space_done,
|
||||
__entry->start = start;
|
||||
__entry->depth = depth;
|
||||
__entry->partial = partial;
|
||||
__entry->eh_entries = eh_entries;
|
||||
__entry->eh_entries = le16_to_cpu(eh_entries);
|
||||
),
|
||||
|
||||
TP_printk("dev %d,%d ino %lu since %u depth %d partial %u "
|
||||
|
@ -358,6 +358,27 @@ TRACE_EVENT(jbd2_write_superblock,
|
||||
MINOR(__entry->dev), __entry->write_op)
|
||||
);
|
||||
|
||||
TRACE_EVENT(jbd2_lock_buffer_stall,
|
||||
|
||||
TP_PROTO(dev_t dev, unsigned long stall_ms),
|
||||
|
||||
TP_ARGS(dev, stall_ms),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( dev_t, dev )
|
||||
__field(unsigned long, stall_ms )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = dev;
|
||||
__entry->stall_ms = stall_ms;
|
||||
),
|
||||
|
||||
TP_printk("dev %d,%d stall_ms %lu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->stall_ms)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_JBD2_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
Loading…
Reference in New Issue
Block a user