mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-28 16:56:26 +00:00
Lots of cleanups and bug fixes this cycle, primarily in the block
allocation, extent management, fast commit, and journalling. -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmbsGRcACgkQ8vlZVpUN gaP+pwgAop3LUpOFQ9dPRTR3+37AJI8adfabfLIDkEkoVA7lyYY/6Q8pcQ0rklq3 wE1WxrJ7MaE1GaFCwRIDIL6TP+uYRK0pPjqbFBxGakhDc+WXrTcALOWWofb7J7PL FLwP264lRRfKfpMHdK8bx6YHnEN8425PR+ZNXGVPsw+wjo72mmnq54w+ct1iOKiw dKfIrwwCGKlBsNdYHS/XsSx7MMK8e7nsKoSq0UtpJ4PqF11/asOtlYYODc4hd27U E3I3UDKuntmz+meAscDejOJqQk5FT184HIt/Y5JfetKU2zpUFj9IKqXDzMjijdaj vGn9RkTXfJdxMPm1ouF2R6KIRJollg== =V7+A -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "Lots of cleanups and bug fixes this cycle, primarily in the block allocation, extent management, fast commit, and journalling" * tag 'ext4_for_linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (93 commits) ext4: convert EXT4_B2C(sbi->s_stripe) users to EXT4_NUM_B2C ext4: check stripe size compatibility on remount as well ext4: fix i_data_sem unlock order in ext4_ind_migrate() ext4: remove the special buffer dirty handling in do_journal_get_write_access ext4: fix a potential assertion failure due to improperly dirtied buffer ext4: hoist ext4_block_write_begin and replace the __block_write_begin ext4: persist the new uptodate buffers in ext4_journalled_zero_new_buffers ext4: dax: keep orphan list before truncate overflow allocated blocks ext4: fix error message when rejecting the default hash ext4: save unnecessary indentation in ext4_ext_create_new_leaf() ext4: make some fast commit functions reuse extents path ext4: refactor ext4_swap_extents() to reuse extents path ext4: get rid of ppath in convert_initialized_extent() ext4: get rid of ppath in ext4_ext_handle_unwritten_extents() ext4: get rid of ppath in ext4_ext_convert_to_initialized() ext4: get rid of ppath in ext4_convert_unwritten_extents_endio() ext4: get rid of ppath in ext4_split_convert_extents() ext4: get rid of ppath in ext4_split_extent() ext4: get rid of ppath in ext4_force_split_extent_at() ext4: get rid of ppath in ext4_split_extent_at() ...
This commit is contained in:
commit
056f8c437d
@ -212,16 +212,6 @@ When mounting an ext4 filesystem, the following option are accepted:
|
||||
that ext4's inode table readahead algorithm will pre-read into the
|
||||
buffer cache. The default value is 32 blocks.
|
||||
|
||||
nouser_xattr
|
||||
Disables Extended User Attributes. See the attr(5) manual page for
|
||||
more information about extended attributes.
|
||||
|
||||
noacl
|
||||
This option disables POSIX Access Control List support. If ACL support
|
||||
is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL
|
||||
is enabled by default on mount. See the acl(5) manual page for more
|
||||
information about acl.
|
||||
|
||||
bsddf (*)
|
||||
Make 'df' act like BSD.
|
||||
|
||||
|
@ -18,15 +18,17 @@ unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
|
||||
|
||||
int ext4_inode_bitmap_csum_verify(struct super_block *sb,
|
||||
struct ext4_group_desc *gdp,
|
||||
struct buffer_head *bh, int sz)
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
__u32 hi;
|
||||
__u32 provided, calculated;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int sz;
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
return 1;
|
||||
|
||||
sz = EXT4_INODES_PER_GROUP(sb) >> 3;
|
||||
provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
|
||||
calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
|
||||
if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) {
|
||||
@ -40,14 +42,16 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb,
|
||||
|
||||
void ext4_inode_bitmap_csum_set(struct super_block *sb,
|
||||
struct ext4_group_desc *gdp,
|
||||
struct buffer_head *bh, int sz)
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
__u32 csum;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int sz;
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
return;
|
||||
|
||||
sz = EXT4_INODES_PER_GROUP(sb) >> 3;
|
||||
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
|
||||
gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF);
|
||||
if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END)
|
||||
|
@ -280,12 +280,20 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
|
||||
struct fscrypt_str de_name =
|
||||
FSTR_INIT(de->name,
|
||||
de->name_len);
|
||||
u32 hash;
|
||||
u32 minor_hash;
|
||||
|
||||
if (IS_CASEFOLDED(inode)) {
|
||||
hash = EXT4_DIRENT_HASH(de);
|
||||
minor_hash = EXT4_DIRENT_MINOR_HASH(de);
|
||||
} else {
|
||||
hash = 0;
|
||||
minor_hash = 0;
|
||||
}
|
||||
|
||||
/* Directory is encrypted */
|
||||
err = fscrypt_fname_disk_to_usr(inode,
|
||||
EXT4_DIRENT_HASH(de),
|
||||
EXT4_DIRENT_MINOR_HASH(de),
|
||||
&de_name, &fstr);
|
||||
hash, minor_hash, &de_name, &fstr);
|
||||
de_name = fstr;
|
||||
fstr.len = save_len;
|
||||
if (err)
|
||||
|
@ -1058,6 +1058,7 @@ struct ext4_inode_info {
|
||||
|
||||
/* Number of ongoing updates on this inode */
|
||||
atomic_t i_fc_updates;
|
||||
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
|
||||
|
||||
/* Fast commit wait queue for this inode */
|
||||
wait_queue_head_t i_fc_wait;
|
||||
@ -1106,6 +1107,10 @@ struct ext4_inode_info {
|
||||
|
||||
/* mballoc */
|
||||
atomic_t i_prealloc_active;
|
||||
|
||||
/* allocation reservation info for delalloc */
|
||||
/* In case of bigalloc, this refer to clusters rather than blocks */
|
||||
unsigned int i_reserved_data_blocks;
|
||||
struct rb_root i_prealloc_node;
|
||||
rwlock_t i_prealloc_lock;
|
||||
|
||||
@ -1122,10 +1127,6 @@ struct ext4_inode_info {
|
||||
/* ialloc */
|
||||
ext4_group_t i_last_alloc_group;
|
||||
|
||||
/* allocation reservation info for delalloc */
|
||||
/* In case of bigalloc, this refer to clusters rather than blocks */
|
||||
unsigned int i_reserved_data_blocks;
|
||||
|
||||
/* pending cluster reservations for bigalloc file systems */
|
||||
struct ext4_pending_tree i_pending_tree;
|
||||
|
||||
@ -1149,7 +1150,6 @@ struct ext4_inode_info {
|
||||
*/
|
||||
struct list_head i_rsv_conversion_list;
|
||||
struct work_struct i_rsv_conversion_work;
|
||||
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
|
||||
|
||||
spinlock_t i_block_reservation_lock;
|
||||
|
||||
@ -2338,9 +2338,9 @@ struct ext4_dir_entry_2 {
|
||||
((struct ext4_dir_entry_hash *) \
|
||||
(((void *)(entry)) + \
|
||||
((8 + (entry)->name_len + EXT4_DIR_ROUND) & ~EXT4_DIR_ROUND)))
|
||||
#define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(de)->hash)
|
||||
#define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(entry)->hash)
|
||||
#define EXT4_DIRENT_MINOR_HASH(entry) \
|
||||
le32_to_cpu(EXT4_DIRENT_HASHES(de)->minor_hash)
|
||||
le32_to_cpu(EXT4_DIRENT_HASHES(entry)->minor_hash)
|
||||
|
||||
static inline bool ext4_hash_in_dirent(const struct inode *inode)
|
||||
{
|
||||
@ -2462,6 +2462,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
|
||||
#define DX_HASH_HALF_MD4_UNSIGNED 4
|
||||
#define DX_HASH_TEA_UNSIGNED 5
|
||||
#define DX_HASH_SIPHASH 6
|
||||
#define DX_HASH_LAST DX_HASH_SIPHASH
|
||||
|
||||
static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
|
||||
const void *address, unsigned int length)
|
||||
@ -2695,10 +2696,10 @@ struct mmpd_data {
|
||||
extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
|
||||
void ext4_inode_bitmap_csum_set(struct super_block *sb,
|
||||
struct ext4_group_desc *gdp,
|
||||
struct buffer_head *bh, int sz);
|
||||
struct buffer_head *bh);
|
||||
int ext4_inode_bitmap_csum_verify(struct super_block *sb,
|
||||
struct ext4_group_desc *gdp,
|
||||
struct buffer_head *bh, int sz);
|
||||
struct buffer_head *bh);
|
||||
void ext4_block_bitmap_csum_set(struct super_block *sb,
|
||||
struct ext4_group_desc *gdp,
|
||||
struct buffer_head *bh);
|
||||
@ -3712,11 +3713,12 @@ extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
|
||||
int num,
|
||||
struct ext4_ext_path *path);
|
||||
extern int ext4_ext_insert_extent(handle_t *, struct inode *,
|
||||
struct ext4_ext_path **,
|
||||
struct ext4_extent *, int);
|
||||
extern struct ext4_ext_path *ext4_ext_insert_extent(
|
||||
handle_t *handle, struct inode *inode,
|
||||
struct ext4_ext_path *path,
|
||||
struct ext4_extent *newext, int gb_flags);
|
||||
extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
|
||||
struct ext4_ext_path **,
|
||||
struct ext4_ext_path *,
|
||||
int flags);
|
||||
extern void ext4_free_ext_path(struct ext4_ext_path *);
|
||||
extern int ext4_ext_check_inode(struct inode *inode);
|
||||
@ -3853,6 +3855,9 @@ static inline int ext4_buffer_uptodate(struct buffer_head *bh)
|
||||
return buffer_uptodate(bh);
|
||||
}
|
||||
|
||||
extern int ext4_block_write_begin(handle_t *handle, struct folio *folio,
|
||||
loff_t pos, unsigned len,
|
||||
get_block_t *get_block);
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#define EFSBADCRC EBADMSG /* Bad CRC detected */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -558,8 +558,8 @@ static int ext4_es_can_be_merged(struct extent_status *es1,
|
||||
if (ext4_es_is_hole(es1))
|
||||
return 1;
|
||||
|
||||
/* we need to check delayed extent is without unwritten status */
|
||||
if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
|
||||
/* we need to check delayed extent */
|
||||
if (ext4_es_is_delayed(es1))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@ -848,11 +848,12 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
|
||||
*/
|
||||
void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len, ext4_fsblk_t pblk,
|
||||
unsigned int status)
|
||||
unsigned int status, int flags)
|
||||
{
|
||||
struct extent_status newes;
|
||||
ext4_lblk_t end = lblk + len - 1;
|
||||
int err1 = 0, err2 = 0, err3 = 0;
|
||||
int resv_used = 0, pending = 0;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct extent_status *es1 = NULL;
|
||||
struct extent_status *es2 = NULL;
|
||||
@ -862,21 +863,14 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
||||
return;
|
||||
|
||||
es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
|
||||
lblk, len, pblk, status, inode->i_ino);
|
||||
es_debug("add [%u/%u) %llu %x %x to extent status tree of inode %lu\n",
|
||||
lblk, len, pblk, status, flags, inode->i_ino);
|
||||
|
||||
if (!len)
|
||||
return;
|
||||
|
||||
BUG_ON(end < lblk);
|
||||
|
||||
if ((status & EXTENT_STATUS_DELAYED) &&
|
||||
(status & EXTENT_STATUS_WRITTEN)) {
|
||||
ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
|
||||
" delayed and written which can potentially "
|
||||
" cause data loss.", lblk, len);
|
||||
WARN_ON(1);
|
||||
}
|
||||
WARN_ON_ONCE(status & EXTENT_STATUS_DELAYED);
|
||||
|
||||
newes.es_lblk = lblk;
|
||||
newes.es_len = len;
|
||||
@ -894,11 +888,11 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
es1 = __es_alloc_extent(true);
|
||||
if ((err1 || err2) && !es2)
|
||||
es2 = __es_alloc_extent(true);
|
||||
if ((err1 || err2 || err3) && revise_pending && !pr)
|
||||
if ((err1 || err2 || err3 < 0) && revise_pending && !pr)
|
||||
pr = __alloc_pending(true);
|
||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
|
||||
err1 = __es_remove_extent(inode, lblk, end, &resv_used, es1);
|
||||
if (err1 != 0)
|
||||
goto error;
|
||||
/* Free preallocated extent if it didn't get used. */
|
||||
@ -922,16 +916,38 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
|
||||
if (revise_pending) {
|
||||
err3 = __revise_pending(inode, lblk, len, &pr);
|
||||
if (err3 != 0)
|
||||
if (err3 < 0)
|
||||
goto error;
|
||||
if (pr) {
|
||||
__free_pending(pr);
|
||||
pr = NULL;
|
||||
}
|
||||
pending = err3;
|
||||
}
|
||||
error:
|
||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
if (err1 || err2 || err3)
|
||||
/*
|
||||
* Reduce the reserved cluster count to reflect successful deferred
|
||||
* allocation of delayed allocated clusters or direct allocation of
|
||||
* clusters discovered to be delayed allocated. Once allocated, a
|
||||
* cluster is not included in the reserved count.
|
||||
*
|
||||
* When direct allocating (from fallocate, filemap, DIO, or clusters
|
||||
* allocated when delalloc has been disabled by ext4_nonda_switch())
|
||||
* an extent either 1) contains delayed blocks but start with
|
||||
* non-delayed allocated blocks (e.g. hole) or 2) contains non-delayed
|
||||
* allocated blocks which belong to delayed allocated clusters when
|
||||
* bigalloc feature is enabled, quota has already been claimed by
|
||||
* ext4_mb_new_blocks(), so release the quota reservations made for
|
||||
* any previously delayed allocated clusters instead of claim them
|
||||
* again.
|
||||
*/
|
||||
resv_used += pending;
|
||||
if (resv_used)
|
||||
ext4_da_update_reserve_space(inode, resv_used,
|
||||
flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE);
|
||||
|
||||
if (err1 || err2 || err3 < 0)
|
||||
goto retry;
|
||||
|
||||
ext4_es_print_tree(inode);
|
||||
@ -1051,7 +1067,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
}
|
||||
|
||||
struct rsvd_count {
|
||||
int ndelonly;
|
||||
int ndelayed;
|
||||
bool first_do_lblk_found;
|
||||
ext4_lblk_t first_do_lblk;
|
||||
ext4_lblk_t last_do_lblk;
|
||||
@ -1077,10 +1093,10 @@ static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct rb_node *node;
|
||||
|
||||
rc->ndelonly = 0;
|
||||
rc->ndelayed = 0;
|
||||
|
||||
/*
|
||||
* for bigalloc, note the first delonly block in the range has not
|
||||
* for bigalloc, note the first delayed block in the range has not
|
||||
* been found, record the extent containing the block to the left of
|
||||
* the region to be removed, if any, and note that there's no partial
|
||||
* cluster to track
|
||||
@ -1100,9 +1116,8 @@ static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
|
||||
}
|
||||
|
||||
/*
|
||||
* count_rsvd - count the clusters containing delayed and not unwritten
|
||||
* (delonly) blocks in a range within an extent and add to
|
||||
* the running tally in rsvd_count
|
||||
* count_rsvd - count the clusters containing delayed blocks in a range
|
||||
* within an extent and add to the running tally in rsvd_count
|
||||
*
|
||||
* @inode - file containing extent
|
||||
* @lblk - first block in range
|
||||
@ -1119,13 +1134,13 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
ext4_lblk_t i, end, nclu;
|
||||
|
||||
if (!ext4_es_is_delonly(es))
|
||||
if (!ext4_es_is_delayed(es))
|
||||
return;
|
||||
|
||||
WARN_ON(len <= 0);
|
||||
|
||||
if (sbi->s_cluster_ratio == 1) {
|
||||
rc->ndelonly += (int) len;
|
||||
rc->ndelayed += (int) len;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1135,7 +1150,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
|
||||
end = lblk + (ext4_lblk_t) len - 1;
|
||||
end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
|
||||
|
||||
/* record the first block of the first delonly extent seen */
|
||||
/* record the first block of the first delayed extent seen */
|
||||
if (!rc->first_do_lblk_found) {
|
||||
rc->first_do_lblk = i;
|
||||
rc->first_do_lblk_found = true;
|
||||
@ -1149,7 +1164,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
|
||||
* doesn't start with it, count it and stop tracking
|
||||
*/
|
||||
if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
|
||||
rc->ndelonly++;
|
||||
rc->ndelayed++;
|
||||
rc->partial = false;
|
||||
}
|
||||
|
||||
@ -1159,7 +1174,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
|
||||
*/
|
||||
if (EXT4_LBLK_COFF(sbi, i) != 0) {
|
||||
if (end >= EXT4_LBLK_CFILL(sbi, i)) {
|
||||
rc->ndelonly++;
|
||||
rc->ndelayed++;
|
||||
rc->partial = false;
|
||||
i = EXT4_LBLK_CFILL(sbi, i) + 1;
|
||||
}
|
||||
@ -1167,11 +1182,11 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
|
||||
|
||||
/*
|
||||
* if the current cluster starts on a cluster boundary, count the
|
||||
* number of whole delonly clusters in the extent
|
||||
* number of whole delayed clusters in the extent
|
||||
*/
|
||||
if ((i + sbi->s_cluster_ratio - 1) <= end) {
|
||||
nclu = (end - i + 1) >> sbi->s_cluster_bits;
|
||||
rc->ndelonly += nclu;
|
||||
rc->ndelayed += nclu;
|
||||
i += nclu << sbi->s_cluster_bits;
|
||||
}
|
||||
|
||||
@ -1231,10 +1246,9 @@ static struct pending_reservation *__pr_tree_search(struct rb_root *root,
|
||||
* @rc - pointer to reserved count data
|
||||
*
|
||||
* The number of reservations to be released is equal to the number of
|
||||
* clusters containing delayed and not unwritten (delonly) blocks within
|
||||
* the range, minus the number of clusters still containing delonly blocks
|
||||
* at the ends of the range, and minus the number of pending reservations
|
||||
* within the range.
|
||||
* clusters containing delayed blocks within the range, minus the number of
|
||||
* clusters still containing delayed blocks at the ends of the range, and
|
||||
* minus the number of pending reservations within the range.
|
||||
*/
|
||||
static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
||||
struct extent_status *right_es,
|
||||
@ -1245,33 +1259,33 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
||||
struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
|
||||
struct rb_node *node;
|
||||
ext4_lblk_t first_lclu, last_lclu;
|
||||
bool left_delonly, right_delonly, count_pending;
|
||||
bool left_delayed, right_delayed, count_pending;
|
||||
struct extent_status *es;
|
||||
|
||||
if (sbi->s_cluster_ratio > 1) {
|
||||
/* count any remaining partial cluster */
|
||||
if (rc->partial)
|
||||
rc->ndelonly++;
|
||||
rc->ndelayed++;
|
||||
|
||||
if (rc->ndelonly == 0)
|
||||
if (rc->ndelayed == 0)
|
||||
return 0;
|
||||
|
||||
first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
|
||||
last_lclu = EXT4_B2C(sbi, rc->last_do_lblk);
|
||||
|
||||
/*
|
||||
* decrease the delonly count by the number of clusters at the
|
||||
* ends of the range that still contain delonly blocks -
|
||||
* decrease the delayed count by the number of clusters at the
|
||||
* ends of the range that still contain delayed blocks -
|
||||
* these clusters still need to be reserved
|
||||
*/
|
||||
left_delonly = right_delonly = false;
|
||||
left_delayed = right_delayed = false;
|
||||
|
||||
es = rc->left_es;
|
||||
while (es && ext4_es_end(es) >=
|
||||
EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
|
||||
if (ext4_es_is_delonly(es)) {
|
||||
rc->ndelonly--;
|
||||
left_delonly = true;
|
||||
if (ext4_es_is_delayed(es)) {
|
||||
rc->ndelayed--;
|
||||
left_delayed = true;
|
||||
break;
|
||||
}
|
||||
node = rb_prev(&es->rb_node);
|
||||
@ -1279,7 +1293,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
||||
break;
|
||||
es = rb_entry(node, struct extent_status, rb_node);
|
||||
}
|
||||
if (right_es && (!left_delonly || first_lclu != last_lclu)) {
|
||||
if (right_es && (!left_delayed || first_lclu != last_lclu)) {
|
||||
if (end < ext4_es_end(right_es)) {
|
||||
es = right_es;
|
||||
} else {
|
||||
@ -1289,9 +1303,9 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
||||
}
|
||||
while (es && es->es_lblk <=
|
||||
EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
|
||||
if (ext4_es_is_delonly(es)) {
|
||||
rc->ndelonly--;
|
||||
right_delonly = true;
|
||||
if (ext4_es_is_delayed(es)) {
|
||||
rc->ndelayed--;
|
||||
right_delayed = true;
|
||||
break;
|
||||
}
|
||||
node = rb_next(&es->rb_node);
|
||||
@ -1305,21 +1319,21 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
||||
/*
|
||||
* Determine the block range that should be searched for
|
||||
* pending reservations, if any. Clusters on the ends of the
|
||||
* original removed range containing delonly blocks are
|
||||
* original removed range containing delayed blocks are
|
||||
* excluded. They've already been accounted for and it's not
|
||||
* possible to determine if an associated pending reservation
|
||||
* should be released with the information available in the
|
||||
* extents status tree.
|
||||
*/
|
||||
if (first_lclu == last_lclu) {
|
||||
if (left_delonly | right_delonly)
|
||||
if (left_delayed | right_delayed)
|
||||
count_pending = false;
|
||||
else
|
||||
count_pending = true;
|
||||
} else {
|
||||
if (left_delonly)
|
||||
if (left_delayed)
|
||||
first_lclu++;
|
||||
if (right_delonly)
|
||||
if (right_delayed)
|
||||
last_lclu--;
|
||||
if (first_lclu <= last_lclu)
|
||||
count_pending = true;
|
||||
@ -1330,13 +1344,13 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
||||
/*
|
||||
* a pending reservation found between first_lclu and last_lclu
|
||||
* represents an allocated cluster that contained at least one
|
||||
* delonly block, so the delonly total must be reduced by one
|
||||
* delayed block, so the delayed total must be reduced by one
|
||||
* for each pending reservation found and released
|
||||
*/
|
||||
if (count_pending) {
|
||||
pr = __pr_tree_search(&tree->root, first_lclu);
|
||||
while (pr && pr->lclu <= last_lclu) {
|
||||
rc->ndelonly--;
|
||||
rc->ndelayed--;
|
||||
node = rb_next(&pr->rb_node);
|
||||
rb_erase(&pr->rb_node, &tree->root);
|
||||
__free_pending(pr);
|
||||
@ -1347,7 +1361,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc->ndelonly;
|
||||
return rc->ndelayed;
|
||||
}
|
||||
|
||||
|
||||
@ -1940,7 +1954,7 @@ static struct pending_reservation *__get_pending(struct inode *inode,
|
||||
* @lblk - logical block in the cluster to be added
|
||||
* @prealloc - preallocated pending entry
|
||||
*
|
||||
* Returns 0 on successful insertion and -ENOMEM on failure. If the
|
||||
* Returns 1 on successful insertion and -ENOMEM on failure. If the
|
||||
* pending reservation is already in the set, returns successfully.
|
||||
*/
|
||||
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
@ -1984,6 +1998,7 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
|
||||
rb_link_node(&pr->rb_node, parent, p);
|
||||
rb_insert_color(&pr->rb_node, &tree->root);
|
||||
ret = 1;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
@ -2105,7 +2120,7 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
es1 = __es_alloc_extent(true);
|
||||
if ((err1 || err2) && !es2)
|
||||
es2 = __es_alloc_extent(true);
|
||||
if (err1 || err2 || err3) {
|
||||
if (err1 || err2 || err3 < 0) {
|
||||
if (lclu_allocated && !pr1)
|
||||
pr1 = __alloc_pending(true);
|
||||
if (end_allocated && !pr2)
|
||||
@ -2135,7 +2150,7 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
|
||||
if (lclu_allocated) {
|
||||
err3 = __insert_pending(inode, lblk, &pr1);
|
||||
if (err3 != 0)
|
||||
if (err3 < 0)
|
||||
goto error;
|
||||
if (pr1) {
|
||||
__free_pending(pr1);
|
||||
@ -2144,7 +2159,7 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
}
|
||||
if (end_allocated) {
|
||||
err3 = __insert_pending(inode, end, &pr2);
|
||||
if (err3 != 0)
|
||||
if (err3 < 0)
|
||||
goto error;
|
||||
if (pr2) {
|
||||
__free_pending(pr2);
|
||||
@ -2153,7 +2168,7 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
}
|
||||
error:
|
||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
if (err1 || err2 || err3)
|
||||
if (err1 || err2 || err3 < 0)
|
||||
goto retry;
|
||||
|
||||
ext4_es_print_tree(inode);
|
||||
@ -2161,94 +2176,6 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* __es_delayed_clu - count number of clusters containing blocks that
|
||||
* are delayed only
|
||||
*
|
||||
* @inode - file containing block range
|
||||
* @start - logical block defining start of range
|
||||
* @end - logical block defining end of range
|
||||
*
|
||||
* Returns the number of clusters containing only delayed (not delayed
|
||||
* and unwritten) blocks in the range specified by @start and @end. Any
|
||||
* cluster or part of a cluster within the range and containing a delayed
|
||||
* and not unwritten block within the range is counted as a whole cluster.
|
||||
*/
|
||||
static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
|
||||
ext4_lblk_t end)
|
||||
{
|
||||
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
|
||||
struct extent_status *es;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct rb_node *node;
|
||||
ext4_lblk_t first_lclu, last_lclu;
|
||||
unsigned long long last_counted_lclu;
|
||||
unsigned int n = 0;
|
||||
|
||||
/* guaranteed to be unequal to any ext4_lblk_t value */
|
||||
last_counted_lclu = ~0ULL;
|
||||
|
||||
es = __es_tree_search(&tree->root, start);
|
||||
|
||||
while (es && (es->es_lblk <= end)) {
|
||||
if (ext4_es_is_delonly(es)) {
|
||||
if (es->es_lblk <= start)
|
||||
first_lclu = EXT4_B2C(sbi, start);
|
||||
else
|
||||
first_lclu = EXT4_B2C(sbi, es->es_lblk);
|
||||
|
||||
if (ext4_es_end(es) >= end)
|
||||
last_lclu = EXT4_B2C(sbi, end);
|
||||
else
|
||||
last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
|
||||
|
||||
if (first_lclu == last_counted_lclu)
|
||||
n += last_lclu - first_lclu;
|
||||
else
|
||||
n += last_lclu - first_lclu + 1;
|
||||
last_counted_lclu = last_lclu;
|
||||
}
|
||||
node = rb_next(&es->rb_node);
|
||||
if (!node)
|
||||
break;
|
||||
es = rb_entry(node, struct extent_status, rb_node);
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_es_delayed_clu - count number of clusters containing blocks that
|
||||
* are both delayed and unwritten
|
||||
*
|
||||
* @inode - file containing block range
|
||||
* @lblk - logical block defining start of range
|
||||
* @len - number of blocks in range
|
||||
*
|
||||
* Locking for external use of __es_delayed_clu().
|
||||
*/
|
||||
unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
ext4_lblk_t end;
|
||||
unsigned int n;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
end = lblk + len - 1;
|
||||
WARN_ON(end < lblk);
|
||||
|
||||
read_lock(&ei->i_es_lock);
|
||||
|
||||
n = __es_delayed_clu(inode, lblk, end);
|
||||
|
||||
read_unlock(&ei->i_es_lock);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* __revise_pending - makes, cancels, or leaves unchanged pending cluster
|
||||
* reservations for a specified block range depending
|
||||
@ -2263,7 +2190,9 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
||||
*
|
||||
* Used after a newly allocated extent is added to the extents status tree.
|
||||
* Requires that the extents in the range have either written or unwritten
|
||||
* status. Must be called while holding i_es_lock.
|
||||
* status. Must be called while holding i_es_lock. Returns number of new
|
||||
* inserts pending cluster on insert pendings, returns 0 on remove pendings,
|
||||
* return -ENOMEM on failure.
|
||||
*/
|
||||
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len,
|
||||
@ -2273,6 +2202,7 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t end = lblk + len - 1;
|
||||
ext4_lblk_t first, last;
|
||||
bool f_del = false, l_del = false;
|
||||
int pendings = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (len == 0)
|
||||
@ -2294,49 +2224,53 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
|
||||
first = EXT4_LBLK_CMASK(sbi, lblk);
|
||||
if (first != lblk)
|
||||
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||
f_del = __es_scan_range(inode, &ext4_es_is_delayed,
|
||||
first, lblk - 1);
|
||||
if (f_del) {
|
||||
ret = __insert_pending(inode, first, prealloc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
pendings += ret;
|
||||
} else {
|
||||
last = EXT4_LBLK_CMASK(sbi, end) +
|
||||
sbi->s_cluster_ratio - 1;
|
||||
if (last != end)
|
||||
l_del = __es_scan_range(inode,
|
||||
&ext4_es_is_delonly,
|
||||
&ext4_es_is_delayed,
|
||||
end + 1, last);
|
||||
if (l_del) {
|
||||
ret = __insert_pending(inode, last, prealloc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
pendings += ret;
|
||||
} else
|
||||
__remove_pending(inode, last);
|
||||
}
|
||||
} else {
|
||||
first = EXT4_LBLK_CMASK(sbi, lblk);
|
||||
if (first != lblk)
|
||||
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||
f_del = __es_scan_range(inode, &ext4_es_is_delayed,
|
||||
first, lblk - 1);
|
||||
if (f_del) {
|
||||
ret = __insert_pending(inode, first, prealloc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
pendings += ret;
|
||||
} else
|
||||
__remove_pending(inode, first);
|
||||
|
||||
last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
|
||||
if (last != end)
|
||||
l_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||
l_del = __es_scan_range(inode, &ext4_es_is_delayed,
|
||||
end + 1, last);
|
||||
if (l_del) {
|
||||
ret = __insert_pending(inode, last, prealloc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
pendings += ret;
|
||||
} else
|
||||
__remove_pending(inode, last);
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
return (ret < 0) ? ret : pendings;
|
||||
}
|
||||
|
@ -42,6 +42,10 @@ enum {
|
||||
#define ES_SHIFT (sizeof(ext4_fsblk_t)*8 - ES_FLAGS)
|
||||
#define ES_MASK (~((ext4_fsblk_t)0) << ES_SHIFT)
|
||||
|
||||
/*
|
||||
* Besides EXTENT_STATUS_REFERENCED, all these extent type masks
|
||||
* are exclusive, only one type can be set at a time.
|
||||
*/
|
||||
#define EXTENT_STATUS_WRITTEN (1 << ES_WRITTEN_B)
|
||||
#define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B)
|
||||
#define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B)
|
||||
@ -51,7 +55,9 @@ enum {
|
||||
#define ES_TYPE_MASK ((ext4_fsblk_t)(EXTENT_STATUS_WRITTEN | \
|
||||
EXTENT_STATUS_UNWRITTEN | \
|
||||
EXTENT_STATUS_DELAYED | \
|
||||
EXTENT_STATUS_HOLE) << ES_SHIFT)
|
||||
EXTENT_STATUS_HOLE))
|
||||
|
||||
#define ES_TYPE_VALID(type) ((type) && !((type) & ((type) - 1)))
|
||||
|
||||
struct ext4_sb_info;
|
||||
struct ext4_extent;
|
||||
@ -129,7 +135,7 @@ extern void ext4_es_init_tree(struct ext4_es_tree *tree);
|
||||
|
||||
extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len, ext4_fsblk_t pblk,
|
||||
unsigned int status);
|
||||
unsigned int status, int flags);
|
||||
extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len, ext4_fsblk_t pblk,
|
||||
unsigned int status);
|
||||
@ -156,7 +162,7 @@ static inline unsigned int ext4_es_status(struct extent_status *es)
|
||||
|
||||
static inline unsigned int ext4_es_type(struct extent_status *es)
|
||||
{
|
||||
return (es->es_pblk & ES_TYPE_MASK) >> ES_SHIFT;
|
||||
return (es->es_pblk >> ES_SHIFT) & ES_TYPE_MASK;
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_written(struct extent_status *es)
|
||||
@ -184,11 +190,6 @@ static inline int ext4_es_is_mapped(struct extent_status *es)
|
||||
return (ext4_es_is_written(es) || ext4_es_is_unwritten(es));
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_delonly(struct extent_status *es)
|
||||
{
|
||||
return (ext4_es_is_delayed(es) && !ext4_es_is_unwritten(es));
|
||||
}
|
||||
|
||||
static inline void ext4_es_set_referenced(struct extent_status *es)
|
||||
{
|
||||
es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT;
|
||||
@ -224,17 +225,12 @@ static inline void ext4_es_store_pblock(struct extent_status *es,
|
||||
es->es_pblk = block;
|
||||
}
|
||||
|
||||
static inline void ext4_es_store_status(struct extent_status *es,
|
||||
unsigned int status)
|
||||
{
|
||||
es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
|
||||
(es->es_pblk & ~ES_MASK);
|
||||
}
|
||||
|
||||
static inline void ext4_es_store_pblock_status(struct extent_status *es,
|
||||
ext4_fsblk_t pb,
|
||||
unsigned int status)
|
||||
{
|
||||
WARN_ON_ONCE(!ES_TYPE_VALID(status & ES_TYPE_MASK));
|
||||
|
||||
es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
|
||||
(pb & ~ES_MASK);
|
||||
}
|
||||
@ -252,8 +248,6 @@ extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
|
||||
extern void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len, bool lclu_allocated,
|
||||
bool end_allocated);
|
||||
extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len);
|
||||
extern void ext4_clear_inode_es(struct inode *inode);
|
||||
|
||||
#endif /* _EXT4_EXTENTS_STATUS_H */
|
||||
|
@ -339,22 +339,29 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
tid_t tid;
|
||||
bool has_transaction = true;
|
||||
bool is_ineligible;
|
||||
|
||||
if (ext4_fc_disabled(sb))
|
||||
return;
|
||||
|
||||
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
|
||||
if (handle && !IS_ERR(handle))
|
||||
tid = handle->h_transaction->t_tid;
|
||||
else {
|
||||
read_lock(&sbi->s_journal->j_state_lock);
|
||||
tid = sbi->s_journal->j_running_transaction ?
|
||||
sbi->s_journal->j_running_transaction->t_tid : 0;
|
||||
if (sbi->s_journal->j_running_transaction)
|
||||
tid = sbi->s_journal->j_running_transaction->t_tid;
|
||||
else
|
||||
has_transaction = false;
|
||||
read_unlock(&sbi->s_journal->j_state_lock);
|
||||
}
|
||||
spin_lock(&sbi->s_fc_lock);
|
||||
if (tid_gt(tid, sbi->s_fc_ineligible_tid))
|
||||
is_ineligible = ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
|
||||
if (has_transaction &&
|
||||
(!is_ineligible ||
|
||||
(is_ineligible && tid_gt(tid, sbi->s_fc_ineligible_tid))))
|
||||
sbi->s_fc_ineligible_tid = tid;
|
||||
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
|
||||
spin_unlock(&sbi->s_fc_lock);
|
||||
WARN_ON(reason >= EXT4_FC_REASON_MAX);
|
||||
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
|
||||
@ -1288,8 +1295,21 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
|
||||
list_del_init(&iter->i_fc_list);
|
||||
ext4_clear_inode_state(&iter->vfs_inode,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
if (tid_geq(tid, iter->i_sync_tid))
|
||||
if (tid_geq(tid, iter->i_sync_tid)) {
|
||||
ext4_fc_reset_inode(&iter->vfs_inode);
|
||||
} else if (full) {
|
||||
/*
|
||||
* We are called after a full commit, inode has been
|
||||
* modified while the commit was running. Re-enqueue
|
||||
* the inode into STAGING, which will then be splice
|
||||
* back into MAIN. This cannot happen during
|
||||
* fastcommit because the journal is locked all the
|
||||
* time in that case (and tid doesn't increase so
|
||||
* tid check above isn't reliable).
|
||||
*/
|
||||
list_add_tail(&EXT4_I(&iter->vfs_inode)->i_fc_list,
|
||||
&sbi->s_fc_q[FC_Q_STAGING]);
|
||||
}
|
||||
/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
|
||||
smp_mb();
|
||||
#if (BITS_PER_LONG < 64)
|
||||
@ -1772,7 +1792,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
|
||||
|
||||
if (ret == 0) {
|
||||
/* Range is not mapped */
|
||||
path = ext4_find_extent(inode, cur, NULL, 0);
|
||||
path = ext4_find_extent(inode, cur, path, 0);
|
||||
if (IS_ERR(path))
|
||||
goto out;
|
||||
memset(&newex, 0, sizeof(newex));
|
||||
@ -1783,11 +1803,10 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
|
||||
if (ext4_ext_is_unwritten(ex))
|
||||
ext4_ext_mark_unwritten(&newex);
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ret = ext4_ext_insert_extent(
|
||||
NULL, inode, &path, &newex, 0);
|
||||
path = ext4_ext_insert_extent(NULL, inode,
|
||||
path, &newex, 0);
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
ext4_free_ext_path(path);
|
||||
if (ret)
|
||||
if (IS_ERR(path))
|
||||
goto out;
|
||||
goto next;
|
||||
}
|
||||
@ -1836,6 +1855,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
|
||||
ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
|
||||
sb->s_blocksize_bits);
|
||||
out:
|
||||
ext4_free_ext_path(path);
|
||||
iput(inode);
|
||||
return 0;
|
||||
}
|
||||
@ -1936,12 +1956,13 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
|
||||
break;
|
||||
|
||||
if (ret > 0) {
|
||||
path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
|
||||
path = ext4_find_extent(inode, map.m_lblk, path, 0);
|
||||
if (!IS_ERR(path)) {
|
||||
for (j = 0; j < path->p_depth; j++)
|
||||
ext4_mb_mark_bb(inode->i_sb,
|
||||
path[j].p_block, 1, true);
|
||||
ext4_free_ext_path(path);
|
||||
} else {
|
||||
path = NULL;
|
||||
}
|
||||
cur += ret;
|
||||
ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
|
||||
@ -1952,6 +1973,8 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
|
||||
}
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
ext4_free_ext_path(path);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -306,7 +306,7 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
|
||||
}
|
||||
|
||||
static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
|
||||
ssize_t count)
|
||||
ssize_t written, ssize_t count)
|
||||
{
|
||||
handle_t *handle;
|
||||
|
||||
@ -315,7 +315,7 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
if (ext4_update_inode_size(inode, offset + count)) {
|
||||
if (ext4_update_inode_size(inode, offset + written)) {
|
||||
int ret = ext4_mark_inode_dirty(handle, inode);
|
||||
if (unlikely(ret)) {
|
||||
ext4_journal_stop(handle);
|
||||
@ -323,21 +323,21 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
|
||||
}
|
||||
}
|
||||
|
||||
if (inode->i_nlink)
|
||||
if ((written == count) && inode->i_nlink)
|
||||
ext4_orphan_del(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
return count;
|
||||
return written;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up the inode after DIO or DAX extending write has completed and the
|
||||
* inode size has been updated using ext4_handle_inode_extension().
|
||||
*/
|
||||
static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count)
|
||||
static void ext4_inode_extension_cleanup(struct inode *inode, bool need_trunc)
|
||||
{
|
||||
lockdep_assert_held_write(&inode->i_rwsem);
|
||||
if (count < 0) {
|
||||
if (need_trunc) {
|
||||
ext4_truncate_failed_write(inode);
|
||||
/*
|
||||
* If the truncate operation failed early, then the inode may
|
||||
@ -393,7 +393,7 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
|
||||
if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize) &&
|
||||
pos + size <= i_size_read(inode))
|
||||
return size;
|
||||
return ext4_handle_inode_extension(inode, pos, size);
|
||||
return ext4_handle_inode_extension(inode, pos, size, size);
|
||||
}
|
||||
|
||||
static const struct iomap_dio_ops ext4_dio_write_ops = {
|
||||
@ -586,7 +586,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
* writeback of delalloc blocks.
|
||||
*/
|
||||
WARN_ON_ONCE(ret == -EIOCBQUEUED);
|
||||
ext4_inode_extension_cleanup(inode, ret);
|
||||
ext4_inode_extension_cleanup(inode, ret < 0);
|
||||
}
|
||||
|
||||
out:
|
||||
@ -669,8 +669,8 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
|
||||
|
||||
if (extend) {
|
||||
ret = ext4_handle_inode_extension(inode, offset, ret);
|
||||
ext4_inode_extension_cleanup(inode, ret);
|
||||
ret = ext4_handle_inode_extension(inode, offset, ret, count);
|
||||
ext4_inode_extension_cleanup(inode, ret < (ssize_t)count);
|
||||
}
|
||||
out:
|
||||
inode_unlock(inode);
|
||||
|
@ -87,10 +87,10 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
|
||||
if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
|
||||
return 0;
|
||||
|
||||
grp = ext4_get_group_info(sb, block_group);
|
||||
|
||||
if (buffer_verified(bh))
|
||||
return 0;
|
||||
|
||||
grp = ext4_get_group_info(sb, block_group);
|
||||
if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
@ -98,8 +98,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
|
||||
if (buffer_verified(bh))
|
||||
goto verified;
|
||||
blk = ext4_inode_bitmap(sb, desc);
|
||||
if (!ext4_inode_bitmap_csum_verify(sb, desc, bh,
|
||||
EXT4_INODES_PER_GROUP(sb) / 8) ||
|
||||
if (!ext4_inode_bitmap_csum_verify(sb, desc, bh) ||
|
||||
ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_CRC)) {
|
||||
ext4_unlock_group(sb, block_group);
|
||||
ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
|
||||
@ -327,8 +326,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
|
||||
if (percpu_counter_initialized(&sbi->s_dirs_counter))
|
||||
percpu_counter_dec(&sbi->s_dirs_counter);
|
||||
}
|
||||
ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh,
|
||||
EXT4_INODES_PER_GROUP(sb) / 8);
|
||||
ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh);
|
||||
ext4_group_desc_csum_set(sb, block_group, gdp);
|
||||
ext4_unlock_group(sb, block_group);
|
||||
|
||||
@ -514,6 +512,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
|
||||
if (min_inodes < 1)
|
||||
min_inodes = 1;
|
||||
min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
|
||||
if (min_clusters < 0)
|
||||
min_clusters = 0;
|
||||
|
||||
/*
|
||||
* Start looking in the flex group where we last allocated an
|
||||
@ -755,10 +755,10 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
|
||||
struct ext4_group_desc *gdp;
|
||||
ext4_group_t group;
|
||||
int bit;
|
||||
int err = -EFSCORRUPTED;
|
||||
int err;
|
||||
|
||||
if (ino < EXT4_FIRST_INO(sb) || ino > max_ino)
|
||||
goto out;
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
|
||||
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
|
||||
@ -772,7 +772,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
|
||||
}
|
||||
|
||||
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
|
||||
if (!gdp || !group_desc_bh) {
|
||||
if (!gdp) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@ -851,8 +851,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
|
||||
|
||||
ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
|
||||
if (ext4_has_group_desc_csum(sb)) {
|
||||
ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh,
|
||||
EXT4_INODES_PER_GROUP(sb) / 8);
|
||||
ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh);
|
||||
ext4_group_desc_csum_set(sb, group, gdp);
|
||||
}
|
||||
|
||||
@ -860,6 +859,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
|
||||
err = ext4_handle_dirty_metadata(NULL, NULL, group_desc_bh);
|
||||
sync_dirty_buffer(group_desc_bh);
|
||||
out:
|
||||
brelse(inode_bitmap_bh);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1053,14 +1053,14 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
|
||||
brelse(inode_bitmap_bh);
|
||||
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
|
||||
/* Skip groups with suspicious inode tables */
|
||||
if (((!(sbi->s_mount_state & EXT4_FC_REPLAY))
|
||||
&& EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) ||
|
||||
IS_ERR(inode_bitmap_bh)) {
|
||||
if (IS_ERR(inode_bitmap_bh)) {
|
||||
inode_bitmap_bh = NULL;
|
||||
goto next_group;
|
||||
}
|
||||
if (!(sbi->s_mount_state & EXT4_FC_REPLAY) &&
|
||||
EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
|
||||
goto next_group;
|
||||
|
||||
repeat_in_this_group:
|
||||
ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
|
||||
if (!ret2)
|
||||
goto next_group;
|
||||
@ -1110,8 +1110,6 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
|
||||
if (!ret2)
|
||||
goto got; /* we grabbed the inode! */
|
||||
|
||||
if (ino < EXT4_INODES_PER_GROUP(sb))
|
||||
goto repeat_in_this_group;
|
||||
next_group:
|
||||
if (++group == ngroups)
|
||||
group = 0;
|
||||
@ -1224,8 +1222,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
|
||||
}
|
||||
}
|
||||
if (ext4_has_group_desc_csum(sb)) {
|
||||
ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh,
|
||||
EXT4_INODES_PER_GROUP(sb) / 8);
|
||||
ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh);
|
||||
ext4_group_desc_csum_set(sb, group, gdp);
|
||||
}
|
||||
ext4_unlock_group(sb, group);
|
||||
|
@ -652,13 +652,6 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||
count = ar.len;
|
||||
|
||||
/*
|
||||
* Update reserved blocks/metadata blocks after successful block
|
||||
* allocation which had been deferred till now.
|
||||
*/
|
||||
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
|
||||
ext4_da_update_reserve_space(inode, count, 1);
|
||||
|
||||
got_it:
|
||||
map->m_flags |= EXT4_MAP_MAPPED;
|
||||
map->m_pblk = le32_to_cpu(chain[depth-1].key);
|
||||
|
@ -601,10 +601,11 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
|
||||
goto out;
|
||||
|
||||
if (ext4_should_dioread_nolock(inode)) {
|
||||
ret = __block_write_begin(folio, from, to,
|
||||
ext4_get_block_unwritten);
|
||||
ret = ext4_block_write_begin(handle, folio, from, to,
|
||||
ext4_get_block_unwritten);
|
||||
} else
|
||||
ret = __block_write_begin(folio, from, to, ext4_get_block);
|
||||
ret = ext4_block_write_begin(handle, folio, from, to,
|
||||
ext4_get_block);
|
||||
|
||||
if (!ret && ext4_should_journal_data(inode)) {
|
||||
ret = ext4_walk_page_buffers(handle, inode,
|
||||
@ -856,8 +857,8 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = __block_write_begin(folio, 0, inline_size,
|
||||
ext4_da_get_block_prep);
|
||||
ret = ext4_block_write_begin(NULL, folio, 0, inline_size,
|
||||
ext4_da_get_block_prep);
|
||||
if (ret) {
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
folio_unlock(folio);
|
||||
@ -1665,24 +1666,36 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
|
||||
struct ext4_dir_entry_2 **res_dir,
|
||||
int *has_inline_data)
|
||||
{
|
||||
struct ext4_xattr_ibody_find is = {
|
||||
.s = { .not_found = -ENODATA, },
|
||||
};
|
||||
struct ext4_xattr_info i = {
|
||||
.name_index = EXT4_XATTR_INDEX_SYSTEM,
|
||||
.name = EXT4_XATTR_SYSTEM_DATA,
|
||||
};
|
||||
int ret;
|
||||
struct ext4_iloc iloc;
|
||||
void *inline_start;
|
||||
int inline_size;
|
||||
|
||||
if (ext4_get_inode_loc(dir, &iloc))
|
||||
return NULL;
|
||||
ret = ext4_get_inode_loc(dir, &is.iloc);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
down_read(&EXT4_I(dir)->xattr_sem);
|
||||
|
||||
ret = ext4_xattr_ibody_find(dir, &i, &is);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!ext4_has_inline_data(dir)) {
|
||||
*has_inline_data = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
|
||||
inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block +
|
||||
EXT4_INLINE_DOTDOT_SIZE;
|
||||
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
|
||||
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
|
||||
ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size,
|
||||
dir, fname, 0, res_dir);
|
||||
if (ret == 1)
|
||||
goto out_find;
|
||||
@ -1692,20 +1705,23 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
|
||||
if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE)
|
||||
goto out;
|
||||
|
||||
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
|
||||
inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc);
|
||||
inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
|
||||
|
||||
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
|
||||
ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size,
|
||||
dir, fname, 0, res_dir);
|
||||
if (ret == 1)
|
||||
goto out_find;
|
||||
|
||||
out:
|
||||
brelse(iloc.bh);
|
||||
iloc.bh = NULL;
|
||||
brelse(is.iloc.bh);
|
||||
if (ret < 0)
|
||||
is.iloc.bh = ERR_PTR(ret);
|
||||
else
|
||||
is.iloc.bh = NULL;
|
||||
out_find:
|
||||
up_read(&EXT4_I(dir)->xattr_sem);
|
||||
return iloc.bh;
|
||||
return is.iloc.bh;
|
||||
}
|
||||
|
||||
int ext4_delete_inline_entry(handle_t *handle,
|
||||
|
292
fs/ext4/inode.c
292
fs/ext4/inode.c
@ -49,6 +49,11 @@
|
||||
|
||||
#include <trace/events/ext4.h>
|
||||
|
||||
static void ext4_journalled_zero_new_buffers(handle_t *handle,
|
||||
struct inode *inode,
|
||||
struct folio *folio,
|
||||
unsigned from, unsigned to);
|
||||
|
||||
static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
|
||||
struct ext4_inode_info *ei)
|
||||
{
|
||||
@ -478,7 +483,89 @@ static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
|
||||
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
|
||||
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
||||
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||
map->m_pblk, status);
|
||||
map->m_pblk, status, 0);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
|
||||
struct ext4_map_blocks *map, int flags)
|
||||
{
|
||||
struct extent_status es;
|
||||
unsigned int status;
|
||||
int err, retval = 0;
|
||||
|
||||
/*
|
||||
* We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE
|
||||
* indicates that the blocks and quotas has already been
|
||||
* checked when the data was copied into the page cache.
|
||||
*/
|
||||
if (map->m_flags & EXT4_MAP_DELAYED)
|
||||
flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
|
||||
|
||||
/*
|
||||
* Here we clear m_flags because after allocating an new extent,
|
||||
* it will be set again.
|
||||
*/
|
||||
map->m_flags &= ~EXT4_MAP_FLAGS;
|
||||
|
||||
/*
|
||||
* We need to check for EXT4 here because migrate could have
|
||||
* changed the inode type in between.
|
||||
*/
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
|
||||
retval = ext4_ext_map_blocks(handle, inode, map, flags);
|
||||
} else {
|
||||
retval = ext4_ind_map_blocks(handle, inode, map, flags);
|
||||
|
||||
/*
|
||||
* We allocated new blocks which will result in i_data's
|
||||
* format changing. Force the migrate to fail by clearing
|
||||
* migrate flags.
|
||||
*/
|
||||
if (retval > 0 && map->m_flags & EXT4_MAP_NEW)
|
||||
ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
|
||||
}
|
||||
if (retval <= 0)
|
||||
return retval;
|
||||
|
||||
if (unlikely(retval != map->m_len)) {
|
||||
ext4_warning(inode->i_sb,
|
||||
"ES len assertion failed for inode %lu: "
|
||||
"retval %d != map->m_len %d",
|
||||
inode->i_ino, retval, map->m_len);
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* We have to zeroout blocks before inserting them into extent
|
||||
* status tree. Otherwise someone could look them up there and
|
||||
* use them before they are really zeroed. We also have to
|
||||
* unmap metadata before zeroing as otherwise writeback can
|
||||
* overwrite zeros with stale data from block device.
|
||||
*/
|
||||
if (flags & EXT4_GET_BLOCKS_ZERO &&
|
||||
map->m_flags & EXT4_MAP_MAPPED && map->m_flags & EXT4_MAP_NEW) {
|
||||
err = ext4_issue_zeroout(inode, map->m_lblk, map->m_pblk,
|
||||
map->m_len);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the extent has been zeroed out, we don't need to update
|
||||
* extent status tree.
|
||||
*/
|
||||
if (flags & EXT4_GET_BLOCKS_PRE_IO &&
|
||||
ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
|
||||
if (ext4_es_is_written(&es))
|
||||
return retval;
|
||||
}
|
||||
|
||||
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
|
||||
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
||||
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||
map->m_pblk, status, flags);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -576,32 +663,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||
* file system block.
|
||||
*/
|
||||
down_read(&EXT4_I(inode)->i_data_sem);
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
|
||||
retval = ext4_ext_map_blocks(handle, inode, map, 0);
|
||||
} else {
|
||||
retval = ext4_ind_map_blocks(handle, inode, map, 0);
|
||||
}
|
||||
if (retval > 0) {
|
||||
unsigned int status;
|
||||
|
||||
if (unlikely(retval != map->m_len)) {
|
||||
ext4_warning(inode->i_sb,
|
||||
"ES len assertion failed for inode "
|
||||
"%lu: retval %d != map->m_len %d",
|
||||
inode->i_ino, retval, map->m_len);
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
|
||||
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
||||
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
|
||||
!(status & EXTENT_STATUS_WRITTEN) &&
|
||||
ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
|
||||
map->m_lblk + map->m_len - 1))
|
||||
status |= EXTENT_STATUS_DELAYED;
|
||||
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||
map->m_pblk, status);
|
||||
}
|
||||
retval = ext4_map_query_blocks(handle, inode, map);
|
||||
up_read((&EXT4_I(inode)->i_data_sem));
|
||||
|
||||
found:
|
||||
@ -630,12 +692,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||
if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN))
|
||||
return retval;
|
||||
|
||||
/*
|
||||
* Here we clear m_flags because after allocating an new extent,
|
||||
* it will be set again.
|
||||
*/
|
||||
map->m_flags &= ~EXT4_MAP_FLAGS;
|
||||
|
||||
/*
|
||||
* New blocks allocate and/or writing to unwritten extent
|
||||
* will possibly result in updating i_data, so we take
|
||||
@ -643,76 +699,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||
* with create == 1 flag.
|
||||
*/
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
|
||||
/*
|
||||
* We need to check for EXT4 here because migrate
|
||||
* could have changed the inode type in between
|
||||
*/
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
|
||||
retval = ext4_ext_map_blocks(handle, inode, map, flags);
|
||||
} else {
|
||||
retval = ext4_ind_map_blocks(handle, inode, map, flags);
|
||||
|
||||
if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
|
||||
/*
|
||||
* We allocated new blocks which will result in
|
||||
* i_data's format changing. Force the migrate
|
||||
* to fail by clearing migrate flags
|
||||
*/
|
||||
ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
|
||||
}
|
||||
}
|
||||
|
||||
if (retval > 0) {
|
||||
unsigned int status;
|
||||
|
||||
if (unlikely(retval != map->m_len)) {
|
||||
ext4_warning(inode->i_sb,
|
||||
"ES len assertion failed for inode "
|
||||
"%lu: retval %d != map->m_len %d",
|
||||
inode->i_ino, retval, map->m_len);
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* We have to zeroout blocks before inserting them into extent
|
||||
* status tree. Otherwise someone could look them up there and
|
||||
* use them before they are really zeroed. We also have to
|
||||
* unmap metadata before zeroing as otherwise writeback can
|
||||
* overwrite zeros with stale data from block device.
|
||||
*/
|
||||
if (flags & EXT4_GET_BLOCKS_ZERO &&
|
||||
map->m_flags & EXT4_MAP_MAPPED &&
|
||||
map->m_flags & EXT4_MAP_NEW) {
|
||||
ret = ext4_issue_zeroout(inode, map->m_lblk,
|
||||
map->m_pblk, map->m_len);
|
||||
if (ret) {
|
||||
retval = ret;
|
||||
goto out_sem;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the extent has been zeroed out, we don't need to update
|
||||
* extent status tree.
|
||||
*/
|
||||
if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
|
||||
ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
|
||||
if (ext4_es_is_written(&es))
|
||||
goto out_sem;
|
||||
}
|
||||
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
|
||||
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
||||
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
|
||||
!(status & EXTENT_STATUS_WRITTEN) &&
|
||||
ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
|
||||
map->m_lblk + map->m_len - 1))
|
||||
status |= EXTENT_STATUS_DELAYED;
|
||||
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||
map->m_pblk, status);
|
||||
}
|
||||
|
||||
out_sem:
|
||||
retval = ext4_map_create_blocks(handle, inode, map, flags);
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
|
||||
ret = check_block_validity(inode, map);
|
||||
@ -1018,32 +1005,16 @@ static int ext4_dirty_journalled_data(handle_t *handle, struct buffer_head *bh)
|
||||
int do_journal_get_write_access(handle_t *handle, struct inode *inode,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
int dirty = buffer_dirty(bh);
|
||||
int ret;
|
||||
|
||||
if (!buffer_mapped(bh) || buffer_freed(bh))
|
||||
return 0;
|
||||
/*
|
||||
* __block_write_begin() could have dirtied some buffers. Clean
|
||||
* the dirty bit as jbd2_journal_get_write_access() could complain
|
||||
* otherwise about fs integrity issues. Setting of the dirty bit
|
||||
* by __block_write_begin() isn't a real problem here as we clear
|
||||
* the bit before releasing a page lock and thus writeback cannot
|
||||
* ever write the buffer.
|
||||
*/
|
||||
if (dirty)
|
||||
clear_buffer_dirty(bh);
|
||||
BUFFER_TRACE(bh, "get write access");
|
||||
ret = ext4_journal_get_write_access(handle, inode->i_sb, bh,
|
||||
return ext4_journal_get_write_access(handle, inode->i_sb, bh,
|
||||
EXT4_JTR_NONE);
|
||||
if (!ret && dirty)
|
||||
ret = ext4_dirty_journalled_data(handle, bh);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_ENCRYPTION
|
||||
static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
|
||||
get_block_t *get_block)
|
||||
int ext4_block_write_begin(handle_t *handle, struct folio *folio,
|
||||
loff_t pos, unsigned len,
|
||||
get_block_t *get_block)
|
||||
{
|
||||
unsigned from = pos & (PAGE_SIZE - 1);
|
||||
unsigned to = from + len;
|
||||
@ -1056,6 +1027,7 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
|
||||
struct buffer_head *bh, *head, *wait[2];
|
||||
int nr_wait = 0;
|
||||
int i;
|
||||
bool should_journal_data = ext4_should_journal_data(inode);
|
||||
|
||||
BUG_ON(!folio_test_locked(folio));
|
||||
BUG_ON(from > PAGE_SIZE);
|
||||
@ -1085,10 +1057,22 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
|
||||
if (err)
|
||||
break;
|
||||
if (buffer_new(bh)) {
|
||||
/*
|
||||
* We may be zeroing partial buffers or all new
|
||||
* buffers in case of failure. Prepare JBD2 for
|
||||
* that.
|
||||
*/
|
||||
if (should_journal_data)
|
||||
do_journal_get_write_access(handle,
|
||||
inode, bh);
|
||||
if (folio_test_uptodate(folio)) {
|
||||
clear_buffer_new(bh);
|
||||
/*
|
||||
* Unlike __block_write_begin() we leave
|
||||
* dirtying of new uptodate buffers to
|
||||
* ->write_end() time or
|
||||
* folio_zero_new_buffers().
|
||||
*/
|
||||
set_buffer_uptodate(bh);
|
||||
mark_buffer_dirty(bh);
|
||||
continue;
|
||||
}
|
||||
if (block_end > to || block_start < from)
|
||||
@ -1118,7 +1102,11 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
|
||||
err = -EIO;
|
||||
}
|
||||
if (unlikely(err)) {
|
||||
folio_zero_new_buffers(folio, from, to);
|
||||
if (should_journal_data)
|
||||
ext4_journalled_zero_new_buffers(handle, inode, folio,
|
||||
from, to);
|
||||
else
|
||||
folio_zero_new_buffers(folio, from, to);
|
||||
} else if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
|
||||
for (i = 0; i < nr_wait; i++) {
|
||||
int err2;
|
||||
@ -1134,7 +1122,6 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
|
||||
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* To preserve ordering, it is essential that the hole instantiation and
|
||||
@ -1216,19 +1203,12 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
|
||||
/* In case writeback began while the folio was unlocked */
|
||||
folio_wait_stable(folio);
|
||||
|
||||
#ifdef CONFIG_FS_ENCRYPTION
|
||||
if (ext4_should_dioread_nolock(inode))
|
||||
ret = ext4_block_write_begin(folio, pos, len,
|
||||
ret = ext4_block_write_begin(handle, folio, pos, len,
|
||||
ext4_get_block_unwritten);
|
||||
else
|
||||
ret = ext4_block_write_begin(folio, pos, len, ext4_get_block);
|
||||
#else
|
||||
if (ext4_should_dioread_nolock(inode))
|
||||
ret = __block_write_begin(folio, pos, len,
|
||||
ext4_get_block_unwritten);
|
||||
else
|
||||
ret = __block_write_begin(folio, pos, len, ext4_get_block);
|
||||
#endif
|
||||
ret = ext4_block_write_begin(handle, folio, pos, len,
|
||||
ext4_get_block);
|
||||
if (!ret && ext4_should_journal_data(inode)) {
|
||||
ret = ext4_walk_page_buffers(handle, inode,
|
||||
folio_buffers(folio), from, to,
|
||||
@ -1241,7 +1221,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
|
||||
|
||||
folio_unlock(folio);
|
||||
/*
|
||||
* __block_write_begin may have instantiated a few blocks
|
||||
* ext4_block_write_begin may have instantiated a few blocks
|
||||
* outside i_size. Trim these off again. Don't need
|
||||
* i_size_read because we hold i_rwsem.
|
||||
*
|
||||
@ -1388,9 +1368,9 @@ static void ext4_journalled_zero_new_buffers(handle_t *handle,
|
||||
size = min(to, block_end) - start;
|
||||
|
||||
folio_zero_range(folio, start, size);
|
||||
write_end_fn(handle, inode, bh);
|
||||
}
|
||||
clear_buffer_new(bh);
|
||||
write_end_fn(handle, inode, bh);
|
||||
}
|
||||
}
|
||||
block_start = block_end;
|
||||
@ -1661,7 +1641,7 @@ static int ext4_clu_alloc_state(struct inode *inode, ext4_lblk_t lblk)
|
||||
int ret;
|
||||
|
||||
/* Has delalloc reservation? */
|
||||
if (ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk))
|
||||
if (ext4_es_scan_clu(inode, &ext4_es_is_delayed, lblk))
|
||||
return 1;
|
||||
|
||||
/* Already been allocated? */
|
||||
@ -1782,7 +1762,7 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map)
|
||||
* Delayed extent could be allocated by fallocate.
|
||||
* So we need to check it.
|
||||
*/
|
||||
if (ext4_es_is_delonly(&es)) {
|
||||
if (ext4_es_is_delayed(&es)) {
|
||||
map->m_flags |= EXT4_MAP_DELAYED;
|
||||
return 0;
|
||||
}
|
||||
@ -2217,11 +2197,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
|
||||
* writeback and there is nothing we can do about it so it might result
|
||||
* in data loss. So use reserved blocks to allocate metadata if
|
||||
* possible.
|
||||
*
|
||||
* We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
|
||||
* the blocks in question are delalloc blocks. This indicates
|
||||
* that the blocks and quotas has already been checked when
|
||||
* the data was copied into the page cache.
|
||||
*/
|
||||
get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
|
||||
EXT4_GET_BLOCKS_METADATA_NOFAIL |
|
||||
@ -2229,8 +2204,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
|
||||
dioread_nolock = ext4_should_dioread_nolock(inode);
|
||||
if (dioread_nolock)
|
||||
get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
|
||||
if (map->m_flags & BIT(BH_Delay))
|
||||
get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
|
||||
|
||||
err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
|
||||
if (err < 0)
|
||||
@ -2959,11 +2932,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
|
||||
if (IS_ERR(folio))
|
||||
return PTR_ERR(folio);
|
||||
|
||||
#ifdef CONFIG_FS_ENCRYPTION
|
||||
ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep);
|
||||
#else
|
||||
ret = __block_write_begin(folio, pos, len, ext4_da_get_block_prep);
|
||||
#endif
|
||||
ret = ext4_block_write_begin(NULL, folio, pos, len,
|
||||
ext4_da_get_block_prep);
|
||||
if (ret < 0) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
@ -4067,7 +4037,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
|
||||
stop_block);
|
||||
|
||||
ext4_es_insert_extent(inode, first_block, hole_len, ~0,
|
||||
EXTENT_STATUS_HOLE);
|
||||
EXTENT_STATUS_HOLE, 0);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
}
|
||||
ext4_fc_track_range(handle, inode, first_block, stop_block);
|
||||
@ -5276,8 +5246,9 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
|
||||
{
|
||||
unsigned offset;
|
||||
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||
tid_t commit_tid = 0;
|
||||
tid_t commit_tid;
|
||||
int ret;
|
||||
bool has_transaction;
|
||||
|
||||
offset = inode->i_size & (PAGE_SIZE - 1);
|
||||
/*
|
||||
@ -5302,12 +5273,14 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
|
||||
folio_put(folio);
|
||||
if (ret != -EBUSY)
|
||||
return;
|
||||
commit_tid = 0;
|
||||
has_transaction = false;
|
||||
read_lock(&journal->j_state_lock);
|
||||
if (journal->j_committing_transaction)
|
||||
if (journal->j_committing_transaction) {
|
||||
commit_tid = journal->j_committing_transaction->t_tid;
|
||||
has_transaction = true;
|
||||
}
|
||||
read_unlock(&journal->j_state_lock);
|
||||
if (commit_tid)
|
||||
if (has_transaction)
|
||||
jbd2_log_wait_commit(journal, commit_tid);
|
||||
}
|
||||
}
|
||||
@ -6216,7 +6189,8 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
|
||||
if (folio_pos(folio) + len > size)
|
||||
len = size - folio_pos(folio);
|
||||
|
||||
err = __block_write_begin(folio, 0, len, ext4_get_block);
|
||||
err = ext4_block_write_begin(handle, folio, 0, len,
|
||||
ext4_get_block);
|
||||
if (!err) {
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
if (ext4_journal_folio_buffers(handle, folio, len))
|
||||
|
@ -2356,7 +2356,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
|
||||
ex.fe_logical = 0xDEADFA11; /* debug value */
|
||||
|
||||
if (max >= ac->ac_g_ex.fe_len &&
|
||||
ac->ac_g_ex.fe_len == EXT4_B2C(sbi, sbi->s_stripe)) {
|
||||
ac->ac_g_ex.fe_len == EXT4_NUM_B2C(sbi, sbi->s_stripe)) {
|
||||
ext4_fsblk_t start;
|
||||
|
||||
start = ext4_grp_offs_to_block(ac->ac_sb, &ex);
|
||||
@ -2553,7 +2553,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
|
||||
do_div(a, sbi->s_stripe);
|
||||
i = (a * sbi->s_stripe) - first_group_block;
|
||||
|
||||
stripe = EXT4_B2C(sbi, sbi->s_stripe);
|
||||
stripe = EXT4_NUM_B2C(sbi, sbi->s_stripe);
|
||||
i = EXT4_B2C(sbi, i);
|
||||
while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
|
||||
if (!mb_test_bit(i, bitmap)) {
|
||||
@ -2928,9 +2928,11 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
|
||||
if (cr == CR_POWER2_ALIGNED)
|
||||
ext4_mb_simple_scan_group(ac, &e4b);
|
||||
else {
|
||||
bool is_stripe_aligned = sbi->s_stripe &&
|
||||
bool is_stripe_aligned =
|
||||
(sbi->s_stripe >=
|
||||
sbi->s_cluster_ratio) &&
|
||||
!(ac->ac_g_ex.fe_len %
|
||||
EXT4_B2C(sbi, sbi->s_stripe));
|
||||
EXT4_NUM_B2C(sbi, sbi->s_stripe));
|
||||
|
||||
if ((cr == CR_GOAL_LEN_FAST ||
|
||||
cr == CR_BEST_AVAIL_LEN) &&
|
||||
@ -3075,8 +3077,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
|
||||
seq_puts(seq, " ]");
|
||||
if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info))
|
||||
seq_puts(seq, " Block bitmap corrupted!");
|
||||
seq_puts(seq, "\n");
|
||||
|
||||
seq_putc(seq, '\n');
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3707,7 +3708,7 @@ int ext4_mb_init(struct super_block *sb)
|
||||
*/
|
||||
if (sbi->s_stripe > 1) {
|
||||
sbi->s_mb_group_prealloc = roundup(
|
||||
sbi->s_mb_group_prealloc, EXT4_B2C(sbi, sbi->s_stripe));
|
||||
sbi->s_mb_group_prealloc, EXT4_NUM_B2C(sbi, sbi->s_stripe));
|
||||
}
|
||||
|
||||
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
|
||||
@ -3887,11 +3888,8 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
|
||||
/*
|
||||
* Clear the trimmed flag for the group so that the next
|
||||
* ext4_trim_fs can trim it.
|
||||
* If the volume is mounted with -o discard, online discard
|
||||
* is supported and the free blocks will be trimmed online.
|
||||
*/
|
||||
if (!test_opt(sb, DISCARD))
|
||||
EXT4_MB_GRP_CLEAR_TRIMMED(db);
|
||||
EXT4_MB_GRP_CLEAR_TRIMMED(db);
|
||||
|
||||
if (!db->bb_free_root.rb_node) {
|
||||
/* No more items in the per group rb tree
|
||||
@ -6515,8 +6513,9 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
|
||||
" group:%u block:%d count:%lu failed"
|
||||
" with %d", block_group, bit, count,
|
||||
err);
|
||||
} else
|
||||
EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
|
||||
}
|
||||
|
||||
EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
|
||||
|
||||
ext4_lock_group(sb, block_group);
|
||||
mb_free_blocks(inode, &e4b, bit, count_clusters);
|
||||
|
@ -37,7 +37,6 @@ static int finish_range(handle_t *handle, struct inode *inode,
|
||||
path = ext4_find_extent(inode, lb->first_block, NULL, 0);
|
||||
if (IS_ERR(path)) {
|
||||
retval = PTR_ERR(path);
|
||||
path = NULL;
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
@ -53,7 +52,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
|
||||
retval = ext4_datasem_ensure_credits(handle, inode, needed, needed, 0);
|
||||
if (retval < 0)
|
||||
goto err_out;
|
||||
retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
|
||||
path = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
|
||||
if (IS_ERR(path))
|
||||
retval = PTR_ERR(path);
|
||||
err_out:
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
ext4_free_ext_path(path);
|
||||
@ -663,8 +664,8 @@ int ext4_ind_migrate(struct inode *inode)
|
||||
if (unlikely(ret2 && !ret))
|
||||
ret = ret2;
|
||||
errout:
|
||||
ext4_journal_stop(handle);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_journal_stop(handle);
|
||||
out_unlock:
|
||||
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
|
||||
return ret;
|
||||
|
@ -17,27 +17,23 @@
|
||||
* get_ext_path() - Find an extent path for designated logical block number.
|
||||
* @inode: inode to be searched
|
||||
* @lblock: logical block number to find an extent path
|
||||
* @ppath: pointer to an extent path pointer (for output)
|
||||
* @path: pointer to an extent path
|
||||
*
|
||||
* ext4_find_extent wrapper. Return 0 on success, or a negative error value
|
||||
* on failure.
|
||||
* ext4_find_extent wrapper. Return an extent path pointer on success,
|
||||
* or an error pointer on failure.
|
||||
*/
|
||||
static inline int
|
||||
static inline struct ext4_ext_path *
|
||||
get_ext_path(struct inode *inode, ext4_lblk_t lblock,
|
||||
struct ext4_ext_path **ppath)
|
||||
struct ext4_ext_path *path)
|
||||
{
|
||||
struct ext4_ext_path *path;
|
||||
|
||||
path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
|
||||
path = ext4_find_extent(inode, lblock, path, EXT4_EX_NOCACHE);
|
||||
if (IS_ERR(path))
|
||||
return PTR_ERR(path);
|
||||
return path;
|
||||
if (path[ext_depth(inode)].p_ext == NULL) {
|
||||
ext4_free_ext_path(path);
|
||||
*ppath = NULL;
|
||||
return -ENODATA;
|
||||
return ERR_PTR(-ENODATA);
|
||||
}
|
||||
*ppath = path;
|
||||
return 0;
|
||||
return path;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -95,9 +91,11 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
|
||||
int ret = 0;
|
||||
ext4_lblk_t last = from + count;
|
||||
while (from < last) {
|
||||
*err = get_ext_path(inode, from, &path);
|
||||
if (*err)
|
||||
goto out;
|
||||
path = get_ext_path(inode, from, path);
|
||||
if (IS_ERR(path)) {
|
||||
*err = PTR_ERR(path);
|
||||
return ret;
|
||||
}
|
||||
ext = path[ext_depth(inode)].p_ext;
|
||||
if (unwritten != ext4_ext_is_unwritten(ext))
|
||||
goto out;
|
||||
@ -166,15 +164,16 @@ mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Force page buffers uptodate w/o dropping page's lock */
|
||||
static int
|
||||
mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
|
||||
/* Force folio buffers uptodate w/o dropping folio's lock */
|
||||
static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
|
||||
{
|
||||
struct inode *inode = folio->mapping->host;
|
||||
sector_t block;
|
||||
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
|
||||
struct buffer_head *bh, *head;
|
||||
unsigned int blocksize, block_start, block_end;
|
||||
int i, err, nr = 0, partial = 0;
|
||||
int nr = 0;
|
||||
bool partial = false;
|
||||
|
||||
BUG_ON(!folio_test_locked(folio));
|
||||
BUG_ON(folio_test_writeback(folio));
|
||||
|
||||
@ -186,19 +185,21 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
|
||||
if (!head)
|
||||
head = create_empty_buffers(folio, blocksize, 0);
|
||||
|
||||
block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits);
|
||||
for (bh = head, block_start = 0; bh != head || !block_start;
|
||||
block++, block_start = block_end, bh = bh->b_this_page) {
|
||||
block = folio_pos(folio) >> inode->i_blkbits;
|
||||
block_end = 0;
|
||||
bh = head;
|
||||
do {
|
||||
block_start = block_end;
|
||||
block_end = block_start + blocksize;
|
||||
if (block_end <= from || block_start >= to) {
|
||||
if (!buffer_uptodate(bh))
|
||||
partial = 1;
|
||||
partial = true;
|
||||
continue;
|
||||
}
|
||||
if (buffer_uptodate(bh))
|
||||
continue;
|
||||
if (!buffer_mapped(bh)) {
|
||||
err = ext4_get_block(inode, block, bh, 0);
|
||||
int err = ext4_get_block(inode, block, bh, 0);
|
||||
if (err)
|
||||
return err;
|
||||
if (!buffer_mapped(bh)) {
|
||||
@ -207,21 +208,30 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
BUG_ON(nr >= MAX_BUF_PER_PAGE);
|
||||
arr[nr++] = bh;
|
||||
}
|
||||
lock_buffer(bh);
|
||||
if (buffer_uptodate(bh)) {
|
||||
unlock_buffer(bh);
|
||||
continue;
|
||||
}
|
||||
ext4_read_bh_nowait(bh, 0, NULL);
|
||||
nr++;
|
||||
} while (block++, (bh = bh->b_this_page) != head);
|
||||
|
||||
/* No io required */
|
||||
if (!nr)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
bh = arr[i];
|
||||
if (!bh_uptodate_or_lock(bh)) {
|
||||
err = ext4_read_bh(bh, 0, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
bh = head;
|
||||
do {
|
||||
if (bh_offset(bh) + blocksize <= from)
|
||||
continue;
|
||||
if (bh_offset(bh) > to)
|
||||
break;
|
||||
wait_on_buffer(bh);
|
||||
if (buffer_uptodate(bh))
|
||||
continue;
|
||||
return -EIO;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
out:
|
||||
if (!partial)
|
||||
folio_mark_uptodate(folio);
|
||||
@ -624,9 +634,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
|
||||
int offset_in_page;
|
||||
int unwritten, cur_len;
|
||||
|
||||
ret = get_ext_path(orig_inode, o_start, &path);
|
||||
if (ret)
|
||||
path = get_ext_path(orig_inode, o_start, path);
|
||||
if (IS_ERR(path)) {
|
||||
ret = PTR_ERR(path);
|
||||
goto out;
|
||||
}
|
||||
ex = path[path->p_depth].p_ext;
|
||||
cur_blk = le32_to_cpu(ex->ee_block);
|
||||
cur_len = ext4_ext_get_actual_len(ex);
|
||||
|
@ -1482,7 +1482,7 @@ static bool ext4_match(struct inode *parent,
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 0 if not found, -1 on failure, and 1 on success
|
||||
* Returns 0 if not found, -EFSCORRUPTED on failure, and 1 on success
|
||||
*/
|
||||
int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
|
||||
struct inode *dir, struct ext4_filename *fname,
|
||||
@ -1503,7 +1503,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
|
||||
* a full check */
|
||||
if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
|
||||
buf_size, offset))
|
||||
return -1;
|
||||
return -EFSCORRUPTED;
|
||||
*res_dir = de;
|
||||
return 1;
|
||||
}
|
||||
@ -1511,7 +1511,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
|
||||
de_len = ext4_rec_len_from_disk(de->rec_len,
|
||||
dir->i_sb->s_blocksize);
|
||||
if (de_len <= 0)
|
||||
return -1;
|
||||
return -EFSCORRUPTED;
|
||||
offset += de_len;
|
||||
de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
|
||||
}
|
||||
@ -1574,7 +1574,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
|
||||
&has_inline_data);
|
||||
if (inlined)
|
||||
*inlined = has_inline_data;
|
||||
if (has_inline_data)
|
||||
if (has_inline_data || IS_ERR(ret))
|
||||
goto cleanup_and_exit;
|
||||
}
|
||||
|
||||
@ -1663,8 +1663,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
|
||||
goto cleanup_and_exit;
|
||||
} else {
|
||||
brelse(bh);
|
||||
if (i < 0)
|
||||
if (i < 0) {
|
||||
ret = ERR_PTR(i);
|
||||
goto cleanup_and_exit;
|
||||
}
|
||||
}
|
||||
next:
|
||||
if (++block >= nblocks)
|
||||
@ -1758,7 +1760,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
|
||||
if (retval == 1)
|
||||
goto success;
|
||||
brelse(bh);
|
||||
if (retval == -1) {
|
||||
if (retval < 0) {
|
||||
bh = ERR_PTR(ERR_BAD_DX_DIR);
|
||||
goto errout;
|
||||
}
|
||||
@ -1999,7 +2001,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
|
||||
split = count/2;
|
||||
|
||||
hash2 = map[split].hash;
|
||||
continued = hash2 == map[split - 1].hash;
|
||||
continued = split > 0 ? hash2 == map[split - 1].hash : 0;
|
||||
dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
|
||||
(unsigned long)dx_get_block(frame->at),
|
||||
hash2, split, count-split));
|
||||
|
@ -221,7 +221,7 @@ int ext4_mpage_readpages(struct inode *inode,
|
||||
sector_t block_in_file;
|
||||
sector_t last_block;
|
||||
sector_t last_block_in_file;
|
||||
sector_t blocks[MAX_BUF_PER_PAGE];
|
||||
sector_t first_block;
|
||||
unsigned page_block;
|
||||
struct block_device *bdev = inode->i_sb->s_bdev;
|
||||
int length;
|
||||
@ -263,6 +263,7 @@ int ext4_mpage_readpages(struct inode *inode,
|
||||
unsigned map_offset = block_in_file - map.m_lblk;
|
||||
unsigned last = map.m_len - map_offset;
|
||||
|
||||
first_block = map.m_pblk + map_offset;
|
||||
for (relative_block = 0; ; relative_block++) {
|
||||
if (relative_block == last) {
|
||||
/* needed? */
|
||||
@ -271,8 +272,6 @@ int ext4_mpage_readpages(struct inode *inode,
|
||||
}
|
||||
if (page_block == blocks_per_page)
|
||||
break;
|
||||
blocks[page_block] = map.m_pblk + map_offset +
|
||||
relative_block;
|
||||
page_block++;
|
||||
block_in_file++;
|
||||
}
|
||||
@ -307,7 +306,9 @@ int ext4_mpage_readpages(struct inode *inode,
|
||||
goto confused; /* hole -> non-hole */
|
||||
|
||||
/* Contiguous blocks? */
|
||||
if (page_block && blocks[page_block-1] != map.m_pblk-1)
|
||||
if (!page_block)
|
||||
first_block = map.m_pblk;
|
||||
else if (first_block + page_block != map.m_pblk)
|
||||
goto confused;
|
||||
for (relative_block = 0; ; relative_block++) {
|
||||
if (relative_block == map.m_len) {
|
||||
@ -316,7 +317,6 @@ int ext4_mpage_readpages(struct inode *inode,
|
||||
break;
|
||||
} else if (page_block == blocks_per_page)
|
||||
break;
|
||||
blocks[page_block] = map.m_pblk+relative_block;
|
||||
page_block++;
|
||||
block_in_file++;
|
||||
}
|
||||
@ -339,7 +339,7 @@ int ext4_mpage_readpages(struct inode *inode,
|
||||
* This folio will go to BIO. Do we need to send this
|
||||
* BIO off first?
|
||||
*/
|
||||
if (bio && (last_block_in_bio != blocks[0] - 1 ||
|
||||
if (bio && (last_block_in_bio != first_block - 1 ||
|
||||
!fscrypt_mergeable_bio(bio, inode, next_block))) {
|
||||
submit_and_realloc:
|
||||
submit_bio(bio);
|
||||
@ -355,7 +355,7 @@ int ext4_mpage_readpages(struct inode *inode,
|
||||
fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
|
||||
GFP_KERNEL);
|
||||
ext4_set_bio_post_read_ctx(bio, inode, folio->index);
|
||||
bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
|
||||
bio->bi_iter.bi_sector = first_block << (blkbits - 9);
|
||||
bio->bi_end_io = mpage_end_io;
|
||||
if (rac)
|
||||
bio->bi_opf |= REQ_RAHEAD;
|
||||
@ -371,7 +371,7 @@ int ext4_mpage_readpages(struct inode *inode,
|
||||
submit_bio(bio);
|
||||
bio = NULL;
|
||||
} else
|
||||
last_block_in_bio = blocks[blocks_per_page - 1];
|
||||
last_block_in_bio = first_block + blocks_per_page - 1;
|
||||
continue;
|
||||
confused:
|
||||
if (bio) {
|
||||
|
@ -1319,8 +1319,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
|
||||
bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
|
||||
if (!bh)
|
||||
return -EIO;
|
||||
ext4_inode_bitmap_csum_set(sb, gdp, bh,
|
||||
EXT4_INODES_PER_GROUP(sb) / 8);
|
||||
ext4_inode_bitmap_csum_set(sb, gdp, bh);
|
||||
brelse(bh);
|
||||
|
||||
bh = ext4_get_bitmap(sb, group_data->block_bitmap);
|
||||
|
@ -735,11 +735,12 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
|
||||
|
||||
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
|
||||
/*
|
||||
* Make sure updated value of ->s_mount_flags will be visible before
|
||||
* ->s_flags update
|
||||
* EXT4_FLAGS_SHUTDOWN was set which stops all filesystem
|
||||
* modifications. We don't set SB_RDONLY because that requires
|
||||
* sb->s_umount semaphore and setting it without proper remount
|
||||
* procedure is confusing code such as freeze_super() leading to
|
||||
* deadlocks and other problems.
|
||||
*/
|
||||
smp_wmb();
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
}
|
||||
|
||||
static void update_super_work(struct work_struct *work)
|
||||
@ -3045,7 +3046,7 @@ int ext4_seq_options_show(struct seq_file *seq, void *offset)
|
||||
|
||||
seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
|
||||
rc = _ext4_show_options(seq, sb, 1);
|
||||
seq_puts(seq, "\n");
|
||||
seq_putc(seq, '\n');
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -5087,16 +5088,27 @@ static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ext4_hash_info_init(struct super_block *sb)
|
||||
static int ext4_hash_info_init(struct super_block *sb)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_super_block *es = sbi->s_es;
|
||||
unsigned int i;
|
||||
|
||||
sbi->s_def_hash_version = es->s_def_hash_version;
|
||||
|
||||
if (sbi->s_def_hash_version > DX_HASH_LAST) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Invalid default hash set in the superblock");
|
||||
return -EINVAL;
|
||||
} else if (sbi->s_def_hash_version == DX_HASH_SIPHASH) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"SIPHASH is not a valid default hash value");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
|
||||
|
||||
sbi->s_def_hash_version = es->s_def_hash_version;
|
||||
if (ext4_has_feature_dir_index(sb)) {
|
||||
i = le32_to_cpu(es->s_flags);
|
||||
if (i & EXT2_FLAGS_UNSIGNED_HASH)
|
||||
@ -5114,6 +5126,7 @@ static void ext4_hash_info_init(struct super_block *sb)
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_block_group_meta_init(struct super_block *sb, int silent)
|
||||
@ -5165,6 +5178,18 @@ static int ext4_block_group_meta_init(struct super_block *sb, int silent)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's hard to get stripe aligned blocks if stripe is not aligned with
|
||||
* cluster, just disable stripe and alert user to simplify code and avoid
|
||||
* stripe aligned allocation which will rarely succeed.
|
||||
*/
|
||||
static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
return (stripe > 0 && sbi->s_cluster_ratio > 1 &&
|
||||
stripe % sbi->s_cluster_ratio != 0);
|
||||
}
|
||||
|
||||
static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||
{
|
||||
struct ext4_super_block *es = NULL;
|
||||
@ -5249,7 +5274,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||
if (err)
|
||||
goto failed_mount;
|
||||
|
||||
ext4_hash_info_init(sb);
|
||||
err = ext4_hash_info_init(sb);
|
||||
if (err)
|
||||
goto failed_mount;
|
||||
|
||||
err = ext4_handle_clustersize(sb);
|
||||
if (err)
|
||||
@ -5272,13 +5299,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||
goto failed_mount3;
|
||||
|
||||
sbi->s_stripe = ext4_get_stripe_size(sbi);
|
||||
/*
|
||||
* It's hard to get stripe aligned blocks if stripe is not aligned with
|
||||
* cluster, just disable stripe and alert user to simpfy code and avoid
|
||||
* stripe aligned allocation which will rarely successes.
|
||||
*/
|
||||
if (sbi->s_stripe > 0 && sbi->s_cluster_ratio > 1 &&
|
||||
sbi->s_stripe % sbi->s_cluster_ratio != 0) {
|
||||
if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) {
|
||||
ext4_msg(sb, KERN_WARNING,
|
||||
"stripe (%lu) is not aligned with cluster size (%u), "
|
||||
"stripe is disabled",
|
||||
@ -5313,6 +5334,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
|
||||
mutex_init(&sbi->s_orphan_lock);
|
||||
|
||||
spin_lock_init(&sbi->s_bdev_wb_lock);
|
||||
|
||||
ext4_fast_commit_init(sb);
|
||||
|
||||
sb->s_root = NULL;
|
||||
@ -5534,7 +5557,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||
* Save the original bdev mapping's wb_err value which could be
|
||||
* used to detect the metadata async write error.
|
||||
*/
|
||||
spin_lock_init(&sbi->s_bdev_wb_lock);
|
||||
errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err,
|
||||
&sbi->s_bdev_wb_err);
|
||||
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
|
||||
@ -5614,8 +5636,8 @@ failed_mount8: __maybe_unused
|
||||
failed_mount3:
|
||||
/* flush s_sb_upd_work before sbi destroy */
|
||||
flush_work(&sbi->s_sb_upd_work);
|
||||
del_timer_sync(&sbi->s_err_report);
|
||||
ext4_stop_mmpd(sbi);
|
||||
del_timer_sync(&sbi->s_err_report);
|
||||
ext4_group_desc_free(sbi);
|
||||
failed_mount:
|
||||
if (sbi->s_chksum_driver)
|
||||
@ -6441,6 +6463,15 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
|
||||
|
||||
}
|
||||
|
||||
if ((ctx->spec & EXT4_SPEC_s_stripe) &&
|
||||
ext4_is_stripe_incompatible(sb, ctx->s_stripe)) {
|
||||
ext4_msg(sb, KERN_WARNING,
|
||||
"stripe (%lu) is not aligned with cluster size (%u), "
|
||||
"stripe is disabled",
|
||||
ctx->s_stripe, sbi->s_cluster_ratio);
|
||||
ctx->s_stripe = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
|
||||
* two calls to ext4_should_dioread_nolock() to return inconsistent
|
||||
|
@ -458,7 +458,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
|
||||
ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE);
|
||||
ext4_xattr_inode_set_ref(inode, 1);
|
||||
} else {
|
||||
inode_lock(inode);
|
||||
inode_lock_nested(inode, I_MUTEX_XATTR);
|
||||
inode->i_flags |= S_NOQUOTA;
|
||||
inode_unlock(inode);
|
||||
}
|
||||
@ -1039,7 +1039,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
|
||||
s64 ref_count;
|
||||
int ret;
|
||||
|
||||
inode_lock(ea_inode);
|
||||
inode_lock_nested(ea_inode, I_MUTEX_XATTR);
|
||||
|
||||
ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
|
||||
if (ret)
|
||||
@ -2879,33 +2879,31 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
|
||||
if (*ea_inode_array == NULL) {
|
||||
/*
|
||||
* Start with 15 inodes, so it fits into a power-of-two size.
|
||||
* If *ea_inode_array is NULL, this is essentially offsetof()
|
||||
*/
|
||||
(*ea_inode_array) =
|
||||
kmalloc(offsetof(struct ext4_xattr_inode_array,
|
||||
inodes[EIA_MASK]),
|
||||
GFP_NOFS);
|
||||
(*ea_inode_array) = kmalloc(
|
||||
struct_size(*ea_inode_array, inodes, EIA_MASK),
|
||||
GFP_NOFS);
|
||||
if (*ea_inode_array == NULL)
|
||||
return -ENOMEM;
|
||||
(*ea_inode_array)->count = 0;
|
||||
} else if (((*ea_inode_array)->count & EIA_MASK) == EIA_MASK) {
|
||||
/* expand the array once all 15 + n * 16 slots are full */
|
||||
struct ext4_xattr_inode_array *new_array = NULL;
|
||||
int count = (*ea_inode_array)->count;
|
||||
|
||||
/* if new_array is NULL, this is essentially offsetof() */
|
||||
new_array = kmalloc(
|
||||
offsetof(struct ext4_xattr_inode_array,
|
||||
inodes[count + EIA_INCR]),
|
||||
GFP_NOFS);
|
||||
struct_size(*ea_inode_array, inodes,
|
||||
(*ea_inode_array)->count + EIA_INCR),
|
||||
GFP_NOFS);
|
||||
if (new_array == NULL)
|
||||
return -ENOMEM;
|
||||
memcpy(new_array, *ea_inode_array,
|
||||
offsetof(struct ext4_xattr_inode_array, inodes[count]));
|
||||
struct_size(*ea_inode_array, inodes,
|
||||
(*ea_inode_array)->count));
|
||||
kfree(*ea_inode_array);
|
||||
*ea_inode_array = new_array;
|
||||
}
|
||||
(*ea_inode_array)->inodes[(*ea_inode_array)->count++] = inode;
|
||||
(*ea_inode_array)->count++;
|
||||
(*ea_inode_array)->inodes[(*ea_inode_array)->count - 1] = inode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3036,8 +3034,6 @@ void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
|
||||
*
|
||||
* Create a new entry in the extended attribute block cache, and insert
|
||||
* it unless such an entry is already in the cache.
|
||||
*
|
||||
* Returns 0, or a negative error number on failure.
|
||||
*/
|
||||
static void
|
||||
ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
|
||||
@ -3065,8 +3061,7 @@ ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
|
||||
*
|
||||
* Compare two extended attribute blocks for equality.
|
||||
*
|
||||
* Returns 0 if the blocks are equal, 1 if they differ, and
|
||||
* a negative error number on errors.
|
||||
* Returns 0 if the blocks are equal, 1 if they differ.
|
||||
*/
|
||||
static int
|
||||
ext4_xattr_cmp(struct ext4_xattr_header *header1,
|
||||
|
@ -32,8 +32,7 @@ struct ext4_xattr_header {
|
||||
__le32 h_refcount; /* reference count */
|
||||
__le32 h_blocks; /* number of disk blocks used */
|
||||
__le32 h_hash; /* hash value of all attributes */
|
||||
__le32 h_checksum; /* crc32c(uuid+id+xattrblock) */
|
||||
/* id = inum if refcount=1, blknum otherwise */
|
||||
__le32 h_checksum; /* crc32c(uuid+blknum+xattrblock) */
|
||||
__u32 h_reserved[3]; /* zero right now */
|
||||
};
|
||||
|
||||
@ -130,8 +129,8 @@ struct ext4_xattr_ibody_find {
|
||||
};
|
||||
|
||||
struct ext4_xattr_inode_array {
|
||||
unsigned int count; /* # of used items in the array */
|
||||
struct inode *inodes[];
|
||||
unsigned int count;
|
||||
struct inode *inodes[] __counted_by(count);
|
||||
};
|
||||
|
||||
extern const struct xattr_handler ext4_xattr_user_handler;
|
||||
|
@ -79,17 +79,23 @@ __releases(&journal->j_state_lock)
|
||||
if (space_left < nblocks) {
|
||||
int chkpt = journal->j_checkpoint_transactions != NULL;
|
||||
tid_t tid = 0;
|
||||
bool has_transaction = false;
|
||||
|
||||
if (journal->j_committing_transaction)
|
||||
if (journal->j_committing_transaction) {
|
||||
tid = journal->j_committing_transaction->t_tid;
|
||||
has_transaction = true;
|
||||
}
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
write_unlock(&journal->j_state_lock);
|
||||
if (chkpt) {
|
||||
jbd2_log_do_checkpoint(journal);
|
||||
} else if (jbd2_cleanup_journal_tail(journal) == 0) {
|
||||
/* We were able to recover space; yay! */
|
||||
} else if (jbd2_cleanup_journal_tail(journal) <= 0) {
|
||||
/*
|
||||
* We were able to recover space or the
|
||||
* journal was aborted due to an error.
|
||||
*/
|
||||
;
|
||||
} else if (tid) {
|
||||
} else if (has_transaction) {
|
||||
/*
|
||||
* jbd2_journal_commit_transaction() may want
|
||||
* to take the checkpoint_mutex if JBD2_FLUSHED
|
||||
@ -407,6 +413,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
|
||||
tid_t tid = 0;
|
||||
unsigned long nr_freed = 0;
|
||||
unsigned long freed;
|
||||
bool first_set = false;
|
||||
|
||||
again:
|
||||
spin_lock(&journal->j_list_lock);
|
||||
@ -426,8 +433,10 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
|
||||
else
|
||||
transaction = journal->j_checkpoint_transactions;
|
||||
|
||||
if (!first_tid)
|
||||
if (!first_set) {
|
||||
first_tid = transaction->t_tid;
|
||||
first_set = true;
|
||||
}
|
||||
last_transaction = journal->j_checkpoint_transactions->t_cpprev;
|
||||
next_transaction = transaction;
|
||||
last_tid = last_transaction->t_tid;
|
||||
@ -457,7 +466,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
cond_resched();
|
||||
|
||||
if (*nr_to_scan && next_tid)
|
||||
if (*nr_to_scan && journal->j_shrink_transaction)
|
||||
goto again;
|
||||
out:
|
||||
trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
|
||||
|
@ -281,6 +281,16 @@ static void journal_kill_thread(journal_t *journal)
|
||||
write_unlock(&journal->j_state_lock);
|
||||
}
|
||||
|
||||
static inline bool jbd2_data_needs_escaping(char *data)
|
||||
{
|
||||
return *((__be32 *)data) == cpu_to_be32(JBD2_MAGIC_NUMBER);
|
||||
}
|
||||
|
||||
static inline void jbd2_data_do_escape(char *data)
|
||||
{
|
||||
*((unsigned int *)data) = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
|
||||
*
|
||||
@ -318,9 +328,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
|
||||
struct buffer_head **bh_out,
|
||||
sector_t blocknr)
|
||||
{
|
||||
int done_copy_out = 0;
|
||||
int do_escape = 0;
|
||||
char *mapped_data;
|
||||
struct buffer_head *new_bh;
|
||||
struct folio *new_folio;
|
||||
unsigned int new_offset;
|
||||
@ -349,37 +357,33 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
|
||||
* we use that version of the data for the commit.
|
||||
*/
|
||||
if (jh_in->b_frozen_data) {
|
||||
done_copy_out = 1;
|
||||
new_folio = virt_to_folio(jh_in->b_frozen_data);
|
||||
new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
|
||||
do_escape = jbd2_data_needs_escaping(jh_in->b_frozen_data);
|
||||
if (do_escape)
|
||||
jbd2_data_do_escape(jh_in->b_frozen_data);
|
||||
} else {
|
||||
char *tmp;
|
||||
char *mapped_data;
|
||||
|
||||
new_folio = bh_in->b_folio;
|
||||
new_offset = offset_in_folio(new_folio, bh_in->b_data);
|
||||
}
|
||||
|
||||
mapped_data = kmap_local_folio(new_folio, new_offset);
|
||||
/*
|
||||
* Fire data frozen trigger if data already wasn't frozen. Do this
|
||||
* before checking for escaping, as the trigger may modify the magic
|
||||
* offset. If a copy-out happens afterwards, it will have the correct
|
||||
* data in the buffer.
|
||||
*/
|
||||
if (!done_copy_out)
|
||||
mapped_data = kmap_local_folio(new_folio, new_offset);
|
||||
/*
|
||||
* Fire data frozen trigger if data already wasn't frozen. Do
|
||||
* this before checking for escaping, as the trigger may modify
|
||||
* the magic offset. If a copy-out happens afterwards, it will
|
||||
* have the correct data in the buffer.
|
||||
*/
|
||||
jbd2_buffer_frozen_trigger(jh_in, mapped_data,
|
||||
jh_in->b_triggers);
|
||||
|
||||
/*
|
||||
* Check for escaping
|
||||
*/
|
||||
if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER))
|
||||
do_escape = 1;
|
||||
kunmap_local(mapped_data);
|
||||
|
||||
/*
|
||||
* Do we need to do a data copy?
|
||||
*/
|
||||
if (do_escape && !done_copy_out) {
|
||||
char *tmp;
|
||||
do_escape = jbd2_data_needs_escaping(mapped_data);
|
||||
kunmap_local(mapped_data);
|
||||
/*
|
||||
* Do we need to do a data copy?
|
||||
*/
|
||||
if (!do_escape)
|
||||
goto escape_done;
|
||||
|
||||
spin_unlock(&jh_in->b_state_lock);
|
||||
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
|
||||
@ -406,18 +410,10 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
|
||||
copy_done:
|
||||
new_folio = virt_to_folio(jh_in->b_frozen_data);
|
||||
new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
|
||||
done_copy_out = 1;
|
||||
jbd2_data_do_escape(jh_in->b_frozen_data);
|
||||
}
|
||||
|
||||
/*
|
||||
* Did we need to do an escaping? Now we've done all the
|
||||
* copying, we can finally do so.
|
||||
* b_frozen_data is from jbd2_alloc() which always provides an
|
||||
* address from the direct kernels mapping.
|
||||
*/
|
||||
if (do_escape)
|
||||
*((unsigned int *)jh_in->b_frozen_data) = 0;
|
||||
|
||||
escape_done:
|
||||
folio_set_bh(new_bh, new_folio, new_offset);
|
||||
new_bh->b_size = bh_in->b_size;
|
||||
new_bh->b_bdev = journal->j_dev;
|
||||
@ -710,7 +706,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
|
||||
return -EINVAL;
|
||||
|
||||
write_lock(&journal->j_state_lock);
|
||||
if (tid <= journal->j_commit_sequence) {
|
||||
if (tid_geq(journal->j_commit_sequence, tid)) {
|
||||
write_unlock(&journal->j_state_lock);
|
||||
return -EALREADY;
|
||||
}
|
||||
@ -740,9 +736,9 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit);
|
||||
*/
|
||||
static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
|
||||
{
|
||||
jbd2_journal_unlock_updates(journal);
|
||||
if (journal->j_fc_cleanup_callback)
|
||||
journal->j_fc_cleanup_callback(journal, 0, tid);
|
||||
jbd2_journal_unlock_updates(journal);
|
||||
write_lock(&journal->j_state_lock);
|
||||
journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
|
||||
if (fallback)
|
||||
@ -841,17 +837,12 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
|
||||
|
||||
*bh_out = NULL;
|
||||
|
||||
if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
|
||||
fc_off = journal->j_fc_off;
|
||||
blocknr = journal->j_fc_first + fc_off;
|
||||
journal->j_fc_off++;
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
if (journal->j_fc_off + journal->j_fc_first >= journal->j_fc_last)
|
||||
return -EINVAL;
|
||||
|
||||
fc_off = journal->j_fc_off;
|
||||
blocknr = journal->j_fc_first + fc_off;
|
||||
journal->j_fc_off++;
|
||||
ret = jbd2_journal_bmap(journal, blocknr, &pblock);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -860,7 +851,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
|
||||
if (!bh)
|
||||
return -ENOMEM;
|
||||
|
||||
|
||||
journal->j_fc_wbuf[fc_off] = bh;
|
||||
|
||||
*bh_out = bh;
|
||||
@ -903,7 +893,7 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_fc_wait_bufs);
|
||||
|
||||
int jbd2_fc_release_bufs(journal_t *journal)
|
||||
void jbd2_fc_release_bufs(journal_t *journal)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
int i, j_fc_off;
|
||||
@ -917,8 +907,6 @@ int jbd2_fc_release_bufs(journal_t *journal)
|
||||
put_bh(bh);
|
||||
journal->j_fc_wbuf[i] = NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_fc_release_bufs);
|
||||
|
||||
@ -1944,7 +1932,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
|
||||
if (had_fast_commit)
|
||||
jbd2_set_feature_fast_commit(journal);
|
||||
|
||||
/* Log is no longer empty */
|
||||
/* Log is empty */
|
||||
write_lock(&journal->j_state_lock);
|
||||
journal->j_flags |= JBD2_FLUSHED;
|
||||
write_unlock(&journal->j_state_lock);
|
||||
@ -2866,8 +2854,7 @@ static struct journal_head *journal_alloc_journal_head(void)
|
||||
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
}
|
||||
if (ret)
|
||||
spin_lock_init(&ret->b_state_lock);
|
||||
spin_lock_init(&ret->b_state_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1086,7 +1086,7 @@ struct journal_s
|
||||
int j_revoke_records_per_block;
|
||||
|
||||
/**
|
||||
* @j_transaction_overhead:
|
||||
* @j_transaction_overhead_buffers:
|
||||
*
|
||||
* Number of blocks each transaction needs for its own bookkeeping
|
||||
*/
|
||||
@ -1675,7 +1675,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
|
||||
int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode);
|
||||
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
|
||||
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
|
||||
int jbd2_fc_release_bufs(journal_t *journal);
|
||||
void jbd2_fc_release_bufs(journal_t *journal);
|
||||
|
||||
/*
|
||||
* is_journal_abort
|
||||
|
Loading…
Reference in New Issue
Block a user