A large number of cleanups and bug fixes, with many of the bug fixes

found by Syzbot and fuzzing.  (Many of the bug fixes involve less-used
 ext4 features such as fast_commit, inline_data and bigalloc.)
 
 In addition, remove the writepage function for ext4, since the
 medium-term plan is to remove ->writepage() entirely.  (The VM doesn't
 need or want writepage() for writeback, since it is fine with
 ->writepages() so long as ->migrate_folio() is implemented.)
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmOWqrMACgkQ8vlZVpUN
 gaMvmgf+P2C6vzjn13ZdF+GwFTi4fx4TJ5BZT78LQqvTZqhkfk4k1q2SFfHI7nXT
 ZWdu1KUQ0SYLo64oaSU9W+2B2pmGi/KgUlrwNhy8DFeGStogPuDVfmGWB63p1UQL
 ld42mE9q7bjY6nCZSKYXPp2jfSwsHuliHBJ4UfzVNAIwjiUEJ7pGeIrMFdLAEkVm
 TVNzvlUZaHUnVxhpsP6hs+5WNhHQ2IhWz4rwX01ussNgHTijYac4iaL05wpTvF5e
 6NtvfmpOEMAbYrmIkJX4RVss4JNsHNOC0E8fjEHlgXJxBiAI6w8GxTxrS52Y4ELH
 nHXl/pc0L+I8+yh9B9+s0LBaSuPuTg==
 =lezv
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "A large number of cleanups and bug fixes, with many of the bug fixes
  found by Syzbot and fuzzing. (Many of the bug fixes involve less-used
  ext4 features such as fast_commit, inline_data and bigalloc)

  In addition, remove the writepage function for ext4, since the
  medium-term plan is to remove ->writepage() entirely. (The VM doesn't
  need or want writepage() for writeback, since it is fine with
  ->writepages() so long as ->migrate_folio() is implemented)"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (58 commits)
  ext4: fix reserved cluster accounting in __es_remove_extent()
  ext4: fix inode leak in ext4_xattr_inode_create() on an error path
  ext4: allocate extended attribute value in vmalloc area
  ext4: avoid unaccounted block allocation when expanding inode
  ext4: initialize quota before expanding inode in setproject ioctl
  ext4: stop providing .writepage hook
  mm: export buffer_migrate_folio_norefs()
  ext4: switch to using write_cache_pages() for data=journal writeout
  jbd2: switch jbd2_submit_inode_data() to use fs-provided hook for data writeout
  ext4: switch to using ext4_do_writepages() for ordered data writeout
  ext4: move percpu_rwsem protection into ext4_writepages()
  ext4: provide ext4_do_writepages()
  ext4: add support for writepages calls that cannot map blocks
  ext4: drop pointless IO submission from ext4_bio_write_page()
  ext4: remove nr_submitted from ext4_bio_write_page()
  ext4: move keep_towrite handling to ext4_bio_write_page()
  ext4: handle redirtying in ext4_bio_write_page()
  ext4: fix kernel BUG in 'ext4_write_inline_data_end()'
  ext4: make ext4_mb_initialize_context return void
  ext4: fix deadlock due to mbcache entry corruption
  ...
This commit is contained in:
Linus Torvalds 2022-12-12 19:56:37 -08:00
commit deb9acc122
32 changed files with 573 additions and 369 deletions

View File

@ -803,6 +803,7 @@ process the parameters it is given.
int fs_lookup_param(struct fs_context *fc,
struct fs_parameter *value,
bool want_bdev,
unsigned int flags,
struct path *_path);
This takes a parameter that carries a string or filename type and attempts

View File

@ -558,7 +558,7 @@ enum {
*
* It's not paranoia if the Murphy's Law really *is* out to get you. :-)
*/
#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG))
#define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
static inline void ext4_check_flag_values(void)
@ -2964,7 +2964,8 @@ int do_journal_get_write_access(handle_t *handle, struct inode *inode,
typedef enum {
EXT4_IGET_NORMAL = 0,
EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */
EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */
EXT4_IGET_BAD = 0x0004 /* Allow to iget a bad inode */
} ext4_iget_flags;
extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
@ -2999,6 +3000,7 @@ extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend);
@ -3619,8 +3621,8 @@ extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
unsigned int blocksize);
extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
struct buffer_head *bh);
extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
struct inode *inode);
extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
struct inode *inode, struct dentry *dentry);
extern int __ext4_link(struct inode *dir, struct inode *inode,
struct dentry *dentry);
@ -3756,8 +3758,7 @@ extern void ext4_end_io_rsv_work(struct work_struct *work);
extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
bool keep_towrite);
int len);
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);

View File

@ -86,15 +86,21 @@ static int ext4_journal_check_start(struct super_block *sb)
return 0;
}
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
handle_t *__ext4_journal_start_sb(struct inode *inode,
struct super_block *sb, unsigned int line,
int type, int blocks, int rsv_blocks,
int revoke_creds)
{
journal_t *journal;
int err;
trace_ext4_journal_start(sb, blocks, rsv_blocks, revoke_creds,
_RET_IP_);
if (inode)
trace_ext4_journal_start_inode(inode, blocks, rsv_blocks,
revoke_creds, type,
_RET_IP_);
else
trace_ext4_journal_start_sb(sb, blocks, rsv_blocks,
revoke_creds, type,
_RET_IP_);
err = ext4_journal_check_start(sb);
if (err < 0)
return ERR_PTR(err);

View File

@ -261,9 +261,9 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
__ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
(bh))
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
int type, int blocks, int rsv_blocks,
int revoke_creds);
handle_t *__ext4_journal_start_sb(struct inode *inode, struct super_block *sb,
unsigned int line, int type, int blocks,
int rsv_blocks, int revoke_creds);
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@ -303,7 +303,7 @@ static inline int ext4_trans_default_revoke_credits(struct super_block *sb)
}
#define ext4_journal_start_sb(sb, type, nblocks) \
__ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0, \
__ext4_journal_start_sb(NULL, (sb), __LINE__, (type), (nblocks), 0,\
ext4_trans_default_revoke_credits(sb))
#define ext4_journal_start(inode, type, nblocks) \
@ -323,7 +323,7 @@ static inline handle_t *__ext4_journal_start(struct inode *inode,
int blocks, int rsv_blocks,
int revoke_creds)
{
return __ext4_journal_start_sb(inode->i_sb, line, type, blocks,
return __ext4_journal_start_sb(inode, inode->i_sb, line, type, blocks,
rsv_blocks, revoke_creds);
}

View File

@ -2635,9 +2635,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
unwritten, ex_ee_len);
path[depth].p_ext = ex;
a = ex_ee_block > start ? ex_ee_block : start;
b = ex_ee_block+ex_ee_len - 1 < end ?
ex_ee_block+ex_ee_len - 1 : end;
a = max(ex_ee_block, start);
b = min(ex_ee_block + ex_ee_len - 1, end);
ext_debug(inode, " border %u:%u\n", a, b);
@ -5567,8 +5566,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
* ee_start_lblk to shift extents
*/
ret = ext4_ext_shift_extents(inode, handle,
ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
len_lblk, SHIFT_RIGHT);
max(ee_start_lblk, offset_lblk), len_lblk, SHIFT_RIGHT);
up_write(&EXT4_I(inode)->i_data_sem);
if (IS_SYNC(inode))
@ -5799,6 +5797,14 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
struct ext4_extent *extent;
ext4_lblk_t first_lblk, first_lclu, last_lclu;
/*
* if data can be stored inline, the logical cluster isn't
* mapped - no physical clusters have been allocated, and the
* file has no extents
*/
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
return 0;
/* search for the extent closest to the first block in the cluster */
path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
if (IS_ERR(path)) {

View File

@ -155,9 +155,7 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
int __init ext4_init_es(void)
{
ext4_es_cachep = kmem_cache_create("ext4_extent_status",
sizeof(struct extent_status),
0, (SLAB_RECLAIM_ACCOUNT), NULL);
ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
if (ext4_es_cachep == NULL)
return -ENOMEM;
return 0;
@ -1371,7 +1369,7 @@ retry:
if (count_reserved)
count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
&orig_es, &rc);
goto out;
goto out_get_reserved;
}
if (len1 > 0) {
@ -1413,6 +1411,7 @@ retry:
}
}
out_get_reserved:
if (count_reserved)
*reserved = get_rsvd(inode, end, es, &rc);
out:
@ -1807,9 +1806,7 @@ static void ext4_print_pending_tree(struct inode *inode)
int __init ext4_init_pending(void)
{
ext4_pending_cachep = kmem_cache_create("ext4_pending_reservation",
sizeof(struct pending_reservation),
0, (SLAB_RECLAIM_ACCOUNT), NULL);
ext4_pending_cachep = KMEM_CACHE(pending_reservation, SLAB_RECLAIM_ACCOUNT);
if (ext4_pending_cachep == NULL)
return -ENOMEM;
return 0;

View File

@ -420,25 +420,34 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
struct __track_dentry_update_args *dentry_update =
(struct __track_dentry_update_args *)arg;
struct dentry *dentry = dentry_update->dentry;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct inode *dir = dentry->d_parent->d_inode;
struct super_block *sb = inode->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
mutex_unlock(&ei->i_fc_lock);
if (IS_ENCRYPTED(dir)) {
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_ENCRYPTED_FILENAME,
NULL);
mutex_lock(&ei->i_fc_lock);
return -EOPNOTSUPP;
}
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
if (!node) {
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
node->fcd_op = dentry_update->op;
node->fcd_parent = dentry->d_parent->d_inode->i_ino;
node->fcd_parent = dir->i_ino;
node->fcd_ino = inode->i_ino;
if (dentry->d_name.len > DNAME_INLINE_LEN) {
node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
if (!node->fcd_name.name) {
kmem_cache_free(ext4_fc_dentry_cachep, node);
ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_NOMEM, NULL);
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@ -666,18 +675,6 @@ static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
/* Ext4 commit path routines */
/* memzero and update CRC */
static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
u32 *crc)
{
void *ret;
ret = memset(dst, 0, len);
if (crc)
*crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
return ret;
}
/*
* Allocate len bytes on a fast commit buffer.
*
@ -691,62 +688,60 @@ static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
*/
static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
{
struct ext4_fc_tl *tl;
struct ext4_fc_tl tl;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct buffer_head *bh;
int bsize = sbi->s_journal->j_blocksize;
int ret, off = sbi->s_fc_bytes % bsize;
int pad_len;
int remaining;
u8 *dst;
/*
* After allocating len, we should have space at least for a 0 byte
* padding.
* If 'len' is too long to fit in any block alongside a PAD tlv, then we
* cannot fulfill the request.
*/
if (len + EXT4_FC_TAG_BASE_LEN > bsize)
if (len > bsize - EXT4_FC_TAG_BASE_LEN)
return NULL;
if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) {
/*
* Only allocate from current buffer if we have enough space for
* this request AND we have space to add a zero byte padding.
*/
if (!sbi->s_fc_bh) {
ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
if (ret)
return NULL;
sbi->s_fc_bh = bh;
}
sbi->s_fc_bytes += len;
return sbi->s_fc_bh->b_data + off;
if (!sbi->s_fc_bh) {
ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
if (ret)
return NULL;
sbi->s_fc_bh = bh;
}
/* Need to add PAD tag */
tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN;
tl->fc_len = cpu_to_le16(pad_len);
if (crc)
*crc = ext4_chksum(sbi, *crc, tl, EXT4_FC_TAG_BASE_LEN);
if (pad_len > 0)
ext4_fc_memzero(sb, tl + 1, pad_len, crc);
dst = sbi->s_fc_bh->b_data + off;
/*
* Allocate the bytes in the current block if we can do so while still
* leaving enough space for a PAD tlv.
*/
remaining = bsize - EXT4_FC_TAG_BASE_LEN - off;
if (len <= remaining) {
sbi->s_fc_bytes += len;
return dst;
}
/*
* Else, terminate the current block with a PAD tlv, then allocate a new
* block and allocate the bytes at the start of that new block.
*/
tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
tl.fc_len = cpu_to_le16(remaining);
memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN);
memset(dst + EXT4_FC_TAG_BASE_LEN, 0, remaining);
*crc = ext4_chksum(sbi, *crc, sbi->s_fc_bh->b_data, bsize);
ext4_fc_submit_bh(sb, false);
ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
if (ret)
return NULL;
sbi->s_fc_bh = bh;
sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
sbi->s_fc_bytes += bsize - off + len;
return sbi->s_fc_bh->b_data;
}
/* memcpy to fc reserved space and update CRC */
static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
int len, u32 *crc)
{
if (crc)
*crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
return memcpy(dst, src, len);
}
/*
* Complete a fast commit by writing tail tag.
*
@ -774,16 +769,20 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
off = sbi->s_fc_bytes % bsize;
tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
tl.fc_len = cpu_to_le16(bsize - off + sizeof(struct ext4_fc_tail));
sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc);
memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN);
dst += EXT4_FC_TAG_BASE_LEN;
tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
memcpy(dst, &tail.fc_tid, sizeof(tail.fc_tid));
dst += sizeof(tail.fc_tid);
crc = ext4_chksum(sbi, crc, sbi->s_fc_bh->b_data,
dst - (u8 *)sbi->s_fc_bh->b_data);
tail.fc_crc = cpu_to_le32(crc);
ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
memcpy(dst, &tail.fc_crc, sizeof(tail.fc_crc));
dst += sizeof(tail.fc_crc);
memset(dst, 0, bsize - off); /* Don't leak uninitialized memory. */
ext4_fc_submit_bh(sb, true);
@ -807,8 +806,8 @@ static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
tl.fc_tag = cpu_to_le16(tag);
tl.fc_len = cpu_to_le16(len);
ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
ext4_fc_memcpy(sb, dst + EXT4_FC_TAG_BASE_LEN, val, len, crc);
memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN);
memcpy(dst + EXT4_FC_TAG_BASE_LEN, val, len);
return true;
}
@ -830,11 +829,11 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN);
dst += EXT4_FC_TAG_BASE_LEN;
ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
memcpy(dst, &fcd, sizeof(fcd));
dst += sizeof(fcd);
ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
memcpy(dst, fc_dentry->fcd_name.name, dlen);
return true;
}
@ -872,15 +871,11 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
if (!dst)
goto err;
if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc))
goto err;
memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN);
dst += EXT4_FC_TAG_BASE_LEN;
if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
goto err;
memcpy(dst, &fc_inode, sizeof(fc_inode));
dst += sizeof(fc_inode);
if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
inode_len, crc))
goto err;
memcpy(dst, (u8 *)ext4_raw_inode(&iloc), inode_len);
ret = 0;
err:
brelse(iloc.bh);
@ -986,7 +981,7 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
finish_wait(&ei->i_fc_wait, &wait);
}
spin_unlock(&sbi->s_fc_lock);
ret = jbd2_submit_inode_data(ei->jinode);
ret = jbd2_submit_inode_data(journal, ei->jinode);
if (ret)
return ret;
spin_lock(&sbi->s_fc_lock);
@ -1388,7 +1383,7 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
return 0;
}
ret = __ext4_unlink(NULL, old_parent, &entry, inode);
ret = __ext4_unlink(old_parent, &entry, inode, NULL);
/* -ENOENT ok coz it might not exist anymore. */
if (ret == -ENOENT)
ret = 0;
@ -1977,32 +1972,31 @@ void ext4_fc_replay_cleanup(struct super_block *sb)
kfree(sbi->s_fc_replay_state.fc_modified_inodes);
}
static inline bool ext4_fc_tag_len_isvalid(struct ext4_fc_tl *tl,
u8 *val, u8 *end)
static bool ext4_fc_value_len_isvalid(struct ext4_sb_info *sbi,
int tag, int len)
{
if (val + tl->fc_len > end)
return false;
/* Here only check ADD_RANGE/TAIL/HEAD which will read data when do
* journal rescan before do CRC check. Other tags length check will
* rely on CRC check.
*/
switch (tl->fc_tag) {
switch (tag) {
case EXT4_FC_TAG_ADD_RANGE:
return (sizeof(struct ext4_fc_add_range) == tl->fc_len);
case EXT4_FC_TAG_TAIL:
return (sizeof(struct ext4_fc_tail) <= tl->fc_len);
case EXT4_FC_TAG_HEAD:
return (sizeof(struct ext4_fc_head) == tl->fc_len);
return len == sizeof(struct ext4_fc_add_range);
case EXT4_FC_TAG_DEL_RANGE:
return len == sizeof(struct ext4_fc_del_range);
case EXT4_FC_TAG_CREAT:
case EXT4_FC_TAG_LINK:
case EXT4_FC_TAG_UNLINK:
case EXT4_FC_TAG_CREAT:
len -= sizeof(struct ext4_fc_dentry_info);
return len >= 1 && len <= EXT4_NAME_LEN;
case EXT4_FC_TAG_INODE:
len -= sizeof(struct ext4_fc_inode);
return len >= EXT4_GOOD_OLD_INODE_SIZE &&
len <= sbi->s_inode_size;
case EXT4_FC_TAG_PAD:
default:
return true;
return true; /* padding can have any length */
case EXT4_FC_TAG_TAIL:
return len >= sizeof(struct ext4_fc_tail);
case EXT4_FC_TAG_HEAD:
return len == sizeof(struct ext4_fc_head);
}
return false;
}
/*
@ -2040,7 +2034,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
state = &sbi->s_fc_replay_state;
start = (u8 *)bh->b_data;
end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
end = start + journal->j_blocksize;
if (state->fc_replay_expected_off == 0) {
state->fc_cur_tag = 0;
@ -2061,11 +2055,12 @@ static int ext4_fc_replay_scan(journal_t *journal,
}
state->fc_replay_expected_off++;
for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN;
cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
ext4_fc_get_tl(&tl, cur);
val = cur + EXT4_FC_TAG_BASE_LEN;
if (!ext4_fc_tag_len_isvalid(&tl, val, end)) {
if (tl.fc_len > end - val ||
!ext4_fc_value_len_isvalid(sbi, tl.fc_tag, tl.fc_len)) {
ret = state->fc_replay_num_tags ?
JBD2_FC_REPLAY_STOP : -ECANCELED;
goto out_err;
@ -2178,9 +2173,9 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
#endif
start = (u8 *)bh->b_data;
end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
end = start + journal->j_blocksize;
for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN;
cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
ext4_fc_get_tl(&tl, cur);
val = cur + EXT4_FC_TAG_BASE_LEN;
@ -2249,17 +2244,17 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal)
journal->j_fc_cleanup_callback = ext4_fc_cleanup;
}
static const char *fc_ineligible_reasons[] = {
"Extended attributes changed",
"Cross rename",
"Journal flag changed",
"Insufficient memory",
"Swap boot",
"Resize",
"Dir renamed",
"Falloc range op",
"Data journalling",
"FC Commit Failed"
static const char * const fc_ineligible_reasons[] = {
[EXT4_FC_REASON_XATTR] = "Extended attributes changed",
[EXT4_FC_REASON_CROSS_RENAME] = "Cross rename",
[EXT4_FC_REASON_JOURNAL_FLAG_CHANGE] = "Journal flag changed",
[EXT4_FC_REASON_NOMEM] = "Insufficient memory",
[EXT4_FC_REASON_SWAP_BOOT] = "Swap boot",
[EXT4_FC_REASON_RESIZE] = "Resize",
[EXT4_FC_REASON_RENAME_DIR] = "Dir renamed",
[EXT4_FC_REASON_FALLOC_RANGE] = "Falloc range op",
[EXT4_FC_REASON_INODE_JOURNAL_DATA] = "Data journalling",
[EXT4_FC_REASON_ENCRYPTED_FILENAME] = "Encrypted filename",
};
int ext4_fc_info_show(struct seq_file *seq, void *v)

View File

@ -58,7 +58,7 @@ struct ext4_fc_dentry_info {
__u8 fc_dname[];
};
/* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */
/* Value structure for EXT4_FC_TAG_INODE. */
struct ext4_fc_inode {
__le32 fc_ino;
__u8 fc_raw_inode[];
@ -96,6 +96,7 @@ enum {
EXT4_FC_REASON_RENAME_DIR,
EXT4_FC_REASON_FALLOC_RANGE,
EXT4_FC_REASON_INODE_JOURNAL_DATA,
EXT4_FC_REASON_ENCRYPTED_FILENAME,
EXT4_FC_REASON_MAX
};

View File

@ -1076,8 +1076,8 @@ repeat_in_this_group:
if ((!(sbi->s_mount_state & EXT4_FC_REPLAY)) && !handle) {
BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
handle_type, nblocks, 0,
handle = __ext4_journal_start_sb(NULL, dir->i_sb,
line_no, handle_type, nblocks, 0,
ext4_trans_default_revoke_credits(sb));
if (IS_ERR(handle)) {
err = PTR_ERR(handle);

View File

@ -148,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
struct super_block *sb = inode->i_sb;
Indirect *p = chain;
struct buffer_head *bh;
unsigned int key;
int ret = -EIO;
*err = 0;
@ -156,7 +157,13 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
if (!p->key)
goto no_block;
while (--depth) {
bh = sb_getblk(sb, le32_to_cpu(p->key));
key = le32_to_cpu(p->key);
if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) {
/* the block was out of range */
ret = -EFSCORRUPTED;
goto failure;
}
bh = sb_getblk(sb, key);
if (unlikely(!bh)) {
ret = -ENOMEM;
goto failure;

View File

@ -180,8 +180,7 @@ static int ext4_read_inline_data(struct inode *inode, void *buffer,
BUG_ON(len > EXT4_I(inode)->i_inline_size);
cp_len = len < EXT4_MIN_INLINE_DATA_SIZE ?
len : EXT4_MIN_INLINE_DATA_SIZE;
cp_len = min_t(unsigned int, len, EXT4_MIN_INLINE_DATA_SIZE);
raw_inode = ext4_raw_inode(iloc);
memcpy(buffer, (void *)(raw_inode->i_block), cp_len);

View File

@ -222,13 +222,13 @@ void ext4_evict_inode(struct inode *inode)
/*
* For inodes with journalled data, transaction commit could have
* dirtied the inode. Flush worker is ignoring it because of I_FREEING
* flag but we still need to remove the inode from the writeback lists.
* dirtied the inode. And for inodes with dioread_nolock, unwritten
* extents converting worker could merge extents and also have dirtied
* the inode. Flush worker is ignoring it because of I_FREEING flag but
* we still need to remove the inode from the writeback lists.
*/
if (!list_empty_careful(&inode->i_io_list)) {
WARN_ON_ONCE(!ext4_should_journal_data(inode));
if (!list_empty_careful(&inode->i_io_list))
inode_io_list_del(inode);
}
/*
* Protect us against freezing - iput() caller didn't have to have any
@ -335,6 +335,12 @@ stop_handle:
ext4_xattr_inode_array_free(ea_inode_array);
return;
no_delete:
/*
* Check out some where else accidentally dirty the evicting inode,
* which may probably cause inode use-after-free issues later.
*/
WARN_ON_ONCE(!list_empty_careful(&inode->i_io_list));
if (!list_empty(&EXT4_I(inode)->i_fc_list))
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@ -1309,7 +1315,8 @@ static int ext4_write_end(struct file *file,
trace_ext4_write_end(inode, pos, len, copied);
if (ext4_has_inline_data(inode))
if (ext4_has_inline_data(inode) &&
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
return ext4_write_inline_data_end(inode, pos, len, copied, page);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@ -1543,9 +1550,12 @@ void ext4_da_release_space(struct inode *inode, int to_free)
*/
struct mpage_da_data {
/* These are input fields for ext4_do_writepages() */
struct inode *inode;
struct writeback_control *wbc;
unsigned int can_map:1; /* Can writepages call map blocks? */
/* These are internal state of ext4_do_writepages() */
pgoff_t first_page; /* The first page to write */
pgoff_t next_page; /* Current page to examine */
pgoff_t last_page; /* Last page to examine */
@ -2009,7 +2019,6 @@ static int ext4_writepage(struct page *page,
struct buffer_head *page_bufs = NULL;
struct inode *inode = page->mapping->host;
struct ext4_io_submit io_submit;
bool keep_towrite = false;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
folio_invalidate(folio, 0, folio_size(folio));
@ -2067,7 +2076,6 @@ static int ext4_writepage(struct page *page,
unlock_page(page);
return 0;
}
keep_towrite = true;
}
if (PageChecked(page) && ext4_should_journal_data(inode))
@ -2084,7 +2092,7 @@ static int ext4_writepage(struct page *page,
unlock_page(page);
return -ENOMEM;
}
ret = ext4_bio_write_page(&io_submit, page, len, keep_towrite);
ret = ext4_bio_write_page(&io_submit, page, len);
ext4_io_submit(&io_submit);
/* Drop io_end reference we got from init */
ext4_put_io_end_defer(io_submit.io_end);
@ -2118,7 +2126,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
err = ext4_bio_write_page(&mpd->io_submit, page, len, false);
err = ext4_bio_write_page(&mpd->io_submit, page, len);
if (!err)
mpd->wbc->nr_to_write--;
mpd->first_page++;
@ -2551,18 +2559,33 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp);
}
/* Return true if the page needs to be written as part of transaction commit */
static bool ext4_page_nomap_can_writeout(struct page *page)
{
struct buffer_head *bh, *head;
bh = head = page_buffers(page);
do {
if (buffer_dirty(bh) && buffer_mapped(bh) && !buffer_delay(bh))
return true;
} while ((bh = bh->b_this_page) != head);
return false;
}
/*
* mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages
* and underlying extent to map
* needing mapping, submit mapped pages
*
* @mpd - where to look for pages
*
* Walk dirty pages in the mapping. If they are fully mapped, submit them for
* IO immediately. When we find a page which isn't mapped we start accumulating
* extent of buffers underlying these pages that needs mapping (formed by
* either delayed or unwritten buffers). We also lock the pages containing
* these buffers. The extent found is returned in @mpd structure (starting at
* mpd->lblk with length mpd->len blocks).
* IO immediately. If we cannot map blocks, we submit just already mapped
* buffers in the page for IO and keep page dirty. When we can map blocks and
* we find a page which isn't mapped we start accumulating extent of buffers
* underlying these pages that needs mapping (formed by either delayed or
* unwritten buffers). We also lock the pages containing these buffers. The
* extent found is returned in @mpd structure (starting at mpd->lblk with
* length mpd->len blocks).
*
* Note that this function can attach bios to one io_end structure which are
* neither logically nor physically contiguous. Although it may seem as an
@ -2653,14 +2676,30 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
if (mpd->map.m_len == 0)
mpd->first_page = page->index;
mpd->next_page = page->index + 1;
/* Add all dirty buffers to mpd */
lblk = ((ext4_lblk_t)page->index) <<
(PAGE_SHIFT - blkbits);
head = page_buffers(page);
err = mpage_process_page_bufs(mpd, head, head, lblk);
if (err <= 0)
goto out;
err = 0;
/*
* Writeout for transaction commit where we cannot
* modify metadata is simple. Just submit the page.
*/
if (!mpd->can_map) {
if (ext4_page_nomap_can_writeout(page)) {
err = mpage_submit_page(mpd, page);
if (err < 0)
goto out;
} else {
unlock_page(page);
mpd->first_page++;
}
} else {
/* Add all dirty buffers to mpd */
lblk = ((ext4_lblk_t)page->index) <<
(PAGE_SHIFT - blkbits);
head = page_buffers(page);
err = mpage_process_page_bufs(mpd, head, head,
lblk);
if (err <= 0)
goto out;
err = 0;
}
left--;
}
pagevec_release(&pvec);
@ -2673,25 +2712,27 @@ out:
return err;
}
static int ext4_writepages(struct address_space *mapping,
struct writeback_control *wbc)
static int ext4_writepage_cb(struct page *page, struct writeback_control *wbc,
void *data)
{
return ext4_writepage(page, wbc);
}
static int ext4_do_writepages(struct mpage_da_data *mpd)
{
struct writeback_control *wbc = mpd->wbc;
pgoff_t writeback_index = 0;
long nr_to_write = wbc->nr_to_write;
int range_whole = 0;
int cycled = 1;
handle_t *handle = NULL;
struct mpage_da_data mpd;
struct inode *inode = mapping->host;
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
int needed_blocks, rsv_blocks = 0, ret = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
struct blk_plug plug;
bool give_up_on_write = false;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
percpu_down_read(&sbi->s_writepages_rwsem);
trace_ext4_writepages(inode, wbc);
/*
@ -2703,7 +2744,9 @@ static int ext4_writepages(struct address_space *mapping,
goto out_writepages;
if (ext4_should_journal_data(inode)) {
ret = generic_writepages(mapping, wbc);
blk_start_plug(&plug);
ret = write_cache_pages(mapping, wbc, ext4_writepage_cb, NULL);
blk_finish_plug(&plug);
goto out_writepages;
}
@ -2757,19 +2800,18 @@ static int ext4_writepages(struct address_space *mapping,
writeback_index = mapping->writeback_index;
if (writeback_index)
cycled = 0;
mpd.first_page = writeback_index;
mpd.last_page = -1;
mpd->first_page = writeback_index;
mpd->last_page = -1;
} else {
mpd.first_page = wbc->range_start >> PAGE_SHIFT;
mpd.last_page = wbc->range_end >> PAGE_SHIFT;
mpd->first_page = wbc->range_start >> PAGE_SHIFT;
mpd->last_page = wbc->range_end >> PAGE_SHIFT;
}
mpd.inode = inode;
mpd.wbc = wbc;
ext4_io_submit_init(&mpd.io_submit, wbc);
ext4_io_submit_init(&mpd->io_submit, wbc);
retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
tag_pages_for_writeback(mapping, mpd->first_page,
mpd->last_page);
blk_start_plug(&plug);
/*
@ -2778,31 +2820,32 @@ retry:
* in the block layer on device congestion while having transaction
* started.
*/
mpd.do_map = 0;
mpd.scanned_until_end = 0;
mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
if (!mpd.io_submit.io_end) {
mpd->do_map = 0;
mpd->scanned_until_end = 0;
mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
if (!mpd->io_submit.io_end) {
ret = -ENOMEM;
goto unplug;
}
ret = mpage_prepare_extent_to_map(&mpd);
ret = mpage_prepare_extent_to_map(mpd);
/* Unlock pages we didn't use */
mpage_release_unused_pages(&mpd, false);
mpage_release_unused_pages(mpd, false);
/* Submit prepared bio */
ext4_io_submit(&mpd.io_submit);
ext4_put_io_end_defer(mpd.io_submit.io_end);
mpd.io_submit.io_end = NULL;
ext4_io_submit(&mpd->io_submit);
ext4_put_io_end_defer(mpd->io_submit.io_end);
mpd->io_submit.io_end = NULL;
if (ret < 0)
goto unplug;
while (!mpd.scanned_until_end && wbc->nr_to_write > 0) {
while (!mpd->scanned_until_end && wbc->nr_to_write > 0) {
/* For each extent of pages we use new io_end */
mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
if (!mpd.io_submit.io_end) {
mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
if (!mpd->io_submit.io_end) {
ret = -ENOMEM;
break;
}
WARN_ON_ONCE(!mpd->can_map);
/*
* We have two constraints: We find one extent to map and we
* must always write out whole page (makes a difference when
@ -2822,16 +2865,16 @@ retry:
"%ld pages, ino %lu; err %d", __func__,
wbc->nr_to_write, inode->i_ino, ret);
/* Release allocated io_end */
ext4_put_io_end(mpd.io_submit.io_end);
mpd.io_submit.io_end = NULL;
ext4_put_io_end(mpd->io_submit.io_end);
mpd->io_submit.io_end = NULL;
break;
}
mpd.do_map = 1;
mpd->do_map = 1;
trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
ret = mpage_prepare_extent_to_map(&mpd);
if (!ret && mpd.map.m_len)
ret = mpage_map_and_submit_extent(handle, &mpd,
trace_ext4_da_write_pages(inode, mpd->first_page, wbc);
ret = mpage_prepare_extent_to_map(mpd);
if (!ret && mpd->map.m_len)
ret = mpage_map_and_submit_extent(handle, mpd,
&give_up_on_write);
/*
* Caution: If the handle is synchronous,
@ -2846,12 +2889,12 @@ retry:
if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
ext4_journal_stop(handle);
handle = NULL;
mpd.do_map = 0;
mpd->do_map = 0;
}
/* Unlock pages we didn't use */
mpage_release_unused_pages(&mpd, give_up_on_write);
mpage_release_unused_pages(mpd, give_up_on_write);
/* Submit prepared bio */
ext4_io_submit(&mpd.io_submit);
ext4_io_submit(&mpd->io_submit);
/*
* Drop our io_end reference we got from init. We have
@ -2861,11 +2904,11 @@ retry:
* up doing unwritten extent conversion.
*/
if (handle) {
ext4_put_io_end_defer(mpd.io_submit.io_end);
ext4_put_io_end_defer(mpd->io_submit.io_end);
ext4_journal_stop(handle);
} else
ext4_put_io_end(mpd.io_submit.io_end);
mpd.io_submit.io_end = NULL;
ext4_put_io_end(mpd->io_submit.io_end);
mpd->io_submit.io_end = NULL;
if (ret == -ENOSPC && sbi->s_journal) {
/*
@ -2885,8 +2928,8 @@ unplug:
blk_finish_plug(&plug);
if (!ret && !cycled && wbc->nr_to_write > 0) {
cycled = 1;
mpd.last_page = writeback_index - 1;
mpd.first_page = 0;
mpd->last_page = writeback_index - 1;
mpd->first_page = 0;
goto retry;
}
@ -2896,15 +2939,51 @@ unplug:
* Set the writeback_index so that range_cyclic
* mode will write it back later
*/
mapping->writeback_index = mpd.first_page;
mapping->writeback_index = mpd->first_page;
out_writepages:
trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write);
percpu_up_read(&sbi->s_writepages_rwsem);
return ret;
}
static int ext4_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct super_block *sb = mapping->host->i_sb;
struct mpage_da_data mpd = {
.inode = mapping->host,
.wbc = wbc,
.can_map = 1,
};
int ret;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
return -EIO;
percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem);
ret = ext4_do_writepages(&mpd);
percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem);
return ret;
}
int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode)
{
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_start = jinode->i_dirty_start,
.range_end = jinode->i_dirty_end,
};
struct mpage_da_data mpd = {
.inode = jinode->i_vfs_inode,
.wbc = &wbc,
.can_map = 0,
};
return ext4_do_writepages(&mpd);
}
static int ext4_dax_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
@ -3646,7 +3725,6 @@ static int ext4_iomap_swap_activate(struct swap_info_struct *sis,
static const struct address_space_operations ext4_aops = {
.read_folio = ext4_read_folio,
.readahead = ext4_readahead,
.writepage = ext4_writepage,
.writepages = ext4_writepages,
.write_begin = ext4_write_begin,
.write_end = ext4_write_end,
@ -3664,7 +3742,6 @@ static const struct address_space_operations ext4_aops = {
static const struct address_space_operations ext4_journalled_aops = {
.read_folio = ext4_read_folio,
.readahead = ext4_readahead,
.writepage = ext4_writepage,
.writepages = ext4_writepages,
.write_begin = ext4_write_begin,
.write_end = ext4_journalled_write_end,
@ -3673,6 +3750,7 @@ static const struct address_space_operations ext4_journalled_aops = {
.invalidate_folio = ext4_journalled_invalidate_folio,
.release_folio = ext4_release_folio,
.direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio_norefs,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
.swap_activate = ext4_iomap_swap_activate,
@ -3681,7 +3759,6 @@ static const struct address_space_operations ext4_journalled_aops = {
static const struct address_space_operations ext4_da_aops = {
.read_folio = ext4_read_folio,
.readahead = ext4_readahead,
.writepage = ext4_writepage,
.writepages = ext4_writepages,
.write_begin = ext4_da_write_begin,
.write_end = ext4_da_write_end,
@ -4225,7 +4302,8 @@ int ext4_truncate(struct inode *inode)
/* If we zero-out tail of the page, we have to create jinode for jbd2 */
if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
if (ext4_inode_attach_jinode(inode) < 0)
err = ext4_inode_attach_jinode(inode);
if (err)
goto out_trace;
}
@ -4473,9 +4551,17 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino,
inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
inode_offset = ((ino - 1) %
EXT4_INODES_PER_GROUP(sb));
block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
block = ext4_inode_table(sb, gdp);
if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) ||
(block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) {
ext4_error(sb, "Invalid inode table block %llu in "
"block_group %u", block, iloc->block_group);
return -EFSCORRUPTED;
}
block += (inode_offset / inodes_per_block);
bh = sb_getblk(sb, block);
if (unlikely(!bh))
return -ENOMEM;
@ -5044,8 +5130,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
ext4_error_inode(inode, function, line, 0,
"casefold flag without casefold feature");
brelse(iloc.bh);
if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) {
ext4_error_inode(inode, function, line, 0,
"bad inode without EXT4_IGET_BAD flag");
ret = -EUCLEAN;
goto bad_inode;
}
brelse(iloc.bh);
unlock_new_inode(inode);
return inode;
@ -5853,6 +5945,14 @@ static int __ext4_expand_extra_isize(struct inode *inode,
return 0;
}
/*
* We may need to allocate external xattr block so we need quotas
* initialized. Here we can be called with various locks held so we
* cannot affort to initialize quotas ourselves. So just bail.
*/
if (dquot_initialize_needed(inode))
return -EAGAIN;
/* try to expand with EAs present */
error = ext4_expand_extra_isize_ea(inode, new_extra_isize,
raw_inode, handle);

View File

@ -374,7 +374,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
blkcnt_t blocks;
unsigned short bytes;
inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO,
EXT4_IGET_SPECIAL | EXT4_IGET_BAD);
if (IS_ERR(inode_bl))
return PTR_ERR(inode_bl);
ei_bl = EXT4_I(inode_bl);
@ -424,7 +425,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
/* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(inode, inode_bl);
if (inode_bl->i_nlink == 0) {
if (is_bad_inode(inode_bl) || !S_ISREG(inode_bl->i_mode)) {
/* this inode has never been used as a BOOT_LOADER */
set_nlink(inode_bl, 1);
i_uid_write(inode_bl, 0);
@ -731,6 +732,10 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
if (ext4_is_quota_file(inode))
return err;
err = dquot_initialize(inode);
if (err)
return err;
err = ext4_get_inode_loc(inode, &iloc);
if (err)
return err;
@ -746,10 +751,6 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
brelse(iloc.bh);
}
err = dquot_initialize(inode);
if (err)
return err;
handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
EXT4_QUOTA_INIT_BLOCKS(sb) +
EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
@ -1153,19 +1154,22 @@ static int ext4_ioctl_getuuid(struct ext4_sb_info *sbi,
if (fsuuid.fsu_len == 0) {
fsuuid.fsu_len = UUID_SIZE;
if (copy_to_user(ufsuuid, &fsuuid, sizeof(fsuuid.fsu_len)))
if (copy_to_user(&ufsuuid->fsu_len, &fsuuid.fsu_len,
sizeof(fsuuid.fsu_len)))
return -EFAULT;
return -EINVAL;
return 0;
}
if (fsuuid.fsu_len != UUID_SIZE || fsuuid.fsu_flags != 0)
if (fsuuid.fsu_len < UUID_SIZE || fsuuid.fsu_flags != 0)
return -EINVAL;
lock_buffer(sbi->s_sbh);
memcpy(uuid, sbi->s_es->s_uuid, UUID_SIZE);
unlock_buffer(sbi->s_sbh);
if (copy_to_user(&ufsuuid->fsu_uuid[0], uuid, UUID_SIZE))
fsuuid.fsu_len = UUID_SIZE;
if (copy_to_user(ufsuuid, &fsuuid, sizeof(fsuuid)) ||
copy_to_user(&ufsuuid->fsu_uuid[0], uuid, UUID_SIZE))
return -EFAULT;
return 0;
}

View File

@ -5204,7 +5204,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
mutex_lock(&ac->ac_lg->lg_mutex);
}
static noinline_for_stack int
static noinline_for_stack void
ext4_mb_initialize_context(struct ext4_allocation_context *ac,
struct ext4_allocation_request *ar)
{
@ -5253,8 +5253,6 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
(unsigned) ar->lleft, (unsigned) ar->pleft,
(unsigned) ar->lright, (unsigned) ar->pright,
inode_is_open_for_write(ar->inode) ? "" : "non-");
return 0;
}
static noinline_for_stack void
@ -5591,11 +5589,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
goto out;
}
*errp = ext4_mb_initialize_context(ac, ar);
if (*errp) {
ar->len = 0;
goto out;
}
ext4_mb_initialize_context(ac, ar);
ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
seq = this_cpu_read(discard_pa_seq);

View File

@ -3204,14 +3204,20 @@ end_rmdir:
return retval;
}
int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
struct inode *inode)
int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
struct inode *inode,
struct dentry *dentry /* NULL during fast_commit recovery */)
{
int retval = -ENOENT;
struct buffer_head *bh;
struct ext4_dir_entry_2 *de;
handle_t *handle;
int skip_remove_dentry = 0;
/*
* Keep this outside the transaction; it may have to set up the
* directory's encryption key, which isn't GFP_NOFS-safe.
*/
bh = ext4_find_entry(dir, d_name, &de, NULL);
if (IS_ERR(bh))
return PTR_ERR(bh);
@ -3228,7 +3234,14 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
skip_remove_dentry = 1;
else
goto out;
goto out_bh;
}
handle = ext4_journal_start(dir, EXT4_HT_DIR,
EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
goto out_bh;
}
if (IS_DIRSYNC(dir))
@ -3237,12 +3250,12 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name
if (!skip_remove_dentry) {
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto out;
goto out_handle;
dir->i_ctime = dir->i_mtime = current_time(dir);
ext4_update_dx_flag(dir);
retval = ext4_mark_inode_dirty(handle, dir);
if (retval)
goto out;
goto out_handle;
} else {
retval = 0;
}
@ -3255,15 +3268,17 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name
ext4_orphan_add(handle, inode);
inode->i_ctime = current_time(inode);
retval = ext4_mark_inode_dirty(handle, inode);
out:
if (dentry && !retval)
ext4_fc_track_unlink(handle, dentry);
out_handle:
ext4_journal_stop(handle);
out_bh:
brelse(bh);
return retval;
}
static int ext4_unlink(struct inode *dir, struct dentry *dentry)
{
handle_t *handle;
int retval;
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
@ -3281,16 +3296,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
if (retval)
goto out_trace;
handle = ext4_journal_start(dir, EXT4_HT_DIR,
EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
goto out_trace;
}
retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
if (!retval)
ext4_fc_track_unlink(handle, dentry);
retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry);
#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
@ -3301,8 +3307,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
if (IS_CASEFOLDED(dir))
d_invalidate(dentry);
#endif
if (handle)
ext4_journal_stop(handle);
out_trace:
trace_ext4_unlink_exit(dentry, retval);
@ -3792,6 +3796,9 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
return -EXDEV;
retval = dquot_initialize(old.dir);
if (retval)
return retval;
retval = dquot_initialize(old.inode);
if (retval)
return retval;
retval = dquot_initialize(new.dir);

View File

@ -412,7 +412,7 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
/* don't clear list on RO mount w/ errors */
if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
"clearing orphan list.\n");
"clearing orphan list.");
es->s_last_orphan = 0;
}
ext4_debug("Skipping orphan recovery on fs with errors.\n");

View File

@ -430,25 +430,20 @@ submit_and_retry:
int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
bool keep_towrite)
int len)
{
struct page *bounce_page = NULL;
struct inode *inode = page->mapping->host;
unsigned block_start;
struct buffer_head *bh, *head;
int ret = 0;
int nr_submitted = 0;
int nr_to_submit = 0;
struct writeback_control *wbc = io->io_wbc;
bool keep_towrite = false;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
if (keep_towrite)
set_page_writeback_keepwrite(page);
else
set_page_writeback(page);
ClearPageError(page);
/*
@ -482,16 +477,31 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
/* A hole? We can safely clear the dirty bit */
if (!buffer_mapped(bh))
clear_buffer_dirty(bh);
if (io->io_bio)
ext4_io_submit(io);
/*
* Keeping dirty some buffer we cannot write? Make sure
* to redirty the page and keep TOWRITE tag so that
* racing WB_SYNC_ALL writeback does not skip the page.
* This happens e.g. when doing writeout for
* transaction commit.
*/
if (buffer_dirty(bh)) {
if (!PageDirty(page))
redirty_page_for_writepage(wbc, page);
keep_towrite = true;
}
continue;
}
if (buffer_new(bh))
clear_buffer_new(bh);
set_buffer_async_write(bh);
clear_buffer_dirty(bh);
nr_to_submit++;
} while ((bh = bh->b_this_page) != head);
/* Nothing to submit? Just unlock the page... */
if (!nr_to_submit)
goto unlock;
bh = head = page_buffers(page);
/*
@ -532,27 +542,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
redirty_page_for_writepage(wbc, page);
do {
clear_buffer_async_write(bh);
if (buffer_async_write(bh)) {
clear_buffer_async_write(bh);
set_buffer_dirty(bh);
}
bh = bh->b_this_page;
} while (bh != head);
goto unlock;
}
}
if (keep_towrite)
set_page_writeback_keepwrite(page);
else
set_page_writeback(page);
/* Now submit buffers to write */
do {
if (!buffer_async_write(bh))
continue;
io_submit_add_bh(io, inode,
bounce_page ? bounce_page : page, bh);
nr_submitted++;
clear_buffer_dirty(bh);
} while ((bh = bh->b_this_page) != head);
unlock:
unlock_page(page);
/* Nothing submitted - we have to end page writeback */
if (!nr_submitted)
end_page_writeback(page);
return ret;
}

View File

@ -410,9 +410,8 @@ int ext4_mpage_readpages(struct inode *inode,
int __init ext4_init_post_read_processing(void)
{
bio_post_read_ctx_cache =
kmem_cache_create("ext4_bio_post_read_ctx",
sizeof(struct bio_post_read_ctx), 0, 0, NULL);
bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, SLAB_RECLAIM_ACCOUNT);
if (!bio_post_read_ctx_cache)
goto fail;
bio_post_read_ctx_pool =

View File

@ -1110,6 +1110,16 @@ exit_free:
return err;
}
static inline void ext4_set_block_group_nr(struct super_block *sb, char *data,
ext4_group_t group)
{
struct ext4_super_block *es = (struct ext4_super_block *) data;
es->s_block_group_nr = cpu_to_le16(group);
if (ext4_has_metadata_csum(sb))
es->s_checksum = ext4_superblock_csum(sb, es);
}
/*
* Update the backup copies of the ext4 metadata. These don't need to be part
* of the main resize transaction, because e2fsck will re-write them if there
@ -1158,7 +1168,8 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
while (group < sbi->s_groups_count) {
struct buffer_head *bh;
ext4_fsblk_t backup_block;
struct ext4_super_block *es;
int has_super = ext4_bg_has_super(sb, group);
ext4_fsblk_t first_block = ext4_group_first_block_no(sb, group);
/* Out of journal space, and can't get more - abort - so sad */
err = ext4_resize_ensure_credits_batch(handle, 1);
@ -1168,8 +1179,7 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
if (meta_bg == 0)
backup_block = ((ext4_fsblk_t)group) * bpg + blk_off;
else
backup_block = (ext4_group_first_block_no(sb, group) +
ext4_bg_has_super(sb, group));
backup_block = first_block + has_super;
bh = sb_getblk(sb, backup_block);
if (unlikely(!bh)) {
@ -1187,10 +1197,8 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
memcpy(bh->b_data, data, size);
if (rest)
memset(bh->b_data + size, 0, rest);
es = (struct ext4_super_block *) bh->b_data;
es->s_block_group_nr = cpu_to_le16(group);
if (ext4_has_metadata_csum(sb))
es->s_checksum = ext4_superblock_csum(sb, es);
if (has_super && (backup_block == first_block))
ext4_set_block_group_nr(sb, bh->b_data, group);
set_buffer_uptodate(bh);
unlock_buffer(bh);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
@ -1476,8 +1484,6 @@ static void ext4_update_super(struct super_block *sb,
* active. */
ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
reserved_blocks);
ext4_superblock_csum_set(sb);
unlock_buffer(sbi->s_sbh);
/* Update the free space counts */
percpu_counter_add(&sbi->s_freeclusters_counter,
@ -1513,6 +1519,8 @@ static void ext4_update_super(struct super_block *sb,
ext4_calculate_overhead(sb);
es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
ext4_superblock_csum_set(sb);
unlock_buffer(sbi->s_sbh);
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: added group %u:"
"%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
@ -1596,8 +1604,8 @@ exit_journal:
int meta_bg = ext4_has_feature_meta_bg(sb);
sector_t old_gdb = 0;
update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext4_super_block), 0);
update_backups(sb, ext4_group_first_block_no(sb, 0),
(char *)es, sizeof(struct ext4_super_block), 0);
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
@ -1808,7 +1816,7 @@ errout:
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
"blocks\n", ext4_blocks_count(es));
update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr,
update_backups(sb, ext4_group_first_block_no(sb, 0),
(char *)es, sizeof(struct ext4_super_block), 0);
}
return err;
@ -1831,7 +1839,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_grpblk_t last;
ext4_grpblk_t add;
struct buffer_head *bh;
int err;
ext4_group_t group;
o_blocks_count = ext4_blocks_count(es);
@ -1886,8 +1893,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
}
brelse(bh);
err = ext4_group_extend_no_check(sb, o_blocks_count, add);
return err;
return ext4_group_extend_no_check(sb, o_blocks_count, add);
} /* ext4_group_extend */

View File

@ -540,8 +540,7 @@ static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
if (ext4_should_journal_data(jinode->i_vfs_inode))
ret = ext4_journalled_submit_inode_data_buffers(jinode);
else
ret = jbd2_journal_submit_inode_data_buffers(jinode);
ret = ext4_normal_submit_inode_data_buffers(jinode);
return ret;
}
@ -1206,7 +1205,8 @@ static void ext4_put_super(struct super_block *sb)
ext4_unregister_sysfs(sb);
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
ext4_msg(sb, KERN_INFO, "unmounting filesystem.");
ext4_msg(sb, KERN_INFO, "unmounting filesystem %pU.",
&sb->s_uuid);
ext4_unregister_li_request(sb);
ext4_quota_off_umount(sb);
@ -1323,6 +1323,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
return NULL;
inode_set_iversion(&ei->vfs_inode, 1);
ei->i_flags = 0;
spin_lock_init(&ei->i_raw_lock);
INIT_LIST_HEAD(&ei->i_prealloc_list);
atomic_set(&ei->i_prealloc_active, 0);
@ -2247,7 +2248,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
return -EINVAL;
}
error = fs_lookup_param(fc, param, 1, &path);
error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path);
if (error) {
ext4_msg(NULL, KERN_ERR, "error: could not find "
"journal device path");
@ -5286,16 +5287,17 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount3a;
} else {
/* Nojournal mode, all journal mount options are illegal */
if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"journal_checksum, fs mounted w/o journal");
goto failed_mount3a;
}
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"journal_async_commit, fs mounted w/o journal");
goto failed_mount3a;
}
if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"journal_checksum, fs mounted w/o journal");
goto failed_mount3a;
}
if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"commit=%lu, fs mounted w/o journal",
@ -5654,8 +5656,9 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
descr = "out journal";
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
"Quota mode: %s.", descr, ext4_quota_mode(sb));
ext4_msg(sb, KERN_INFO, "mounted filesystem %pU with%s. "
"Quota mode: %s.", &sb->s_uuid, descr,
ext4_quota_mode(sb));
/* Update the s_overhead_clusters if necessary */
ext4_update_overhead(sb, false);
@ -5722,7 +5725,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
ext4_debug("Journal inode found at %p: %lld bytes\n",
journal_inode, journal_inode->i_size);
if (!S_ISREG(journal_inode->i_mode)) {
if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
ext4_msg(sb, KERN_ERR, "invalid journal inode");
iput(journal_inode);
return NULL;
@ -6610,8 +6613,8 @@ static int ext4_reconfigure(struct fs_context *fc)
if (ret < 0)
return ret;
ext4_msg(sb, KERN_INFO, "re-mounted. Quota mode: %s.",
ext4_quota_mode(sb));
ext4_msg(sb, KERN_INFO, "re-mounted %pU. Quota mode: %s.",
&sb->s_uuid, ext4_quota_mode(sb));
return 0;
}
@ -6885,6 +6888,20 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
return err;
}
static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
{
switch (type) {
case USRQUOTA:
return qf_inum == EXT4_USR_QUOTA_INO;
case GRPQUOTA:
return qf_inum == EXT4_GRP_QUOTA_INO;
case PRJQUOTA:
return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
default:
BUG();
}
}
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags)
{
@ -6901,9 +6918,16 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
if (!qf_inums[type])
return -EPERM;
if (!ext4_check_quota_inum(type, qf_inums[type])) {
ext4_error(sb, "Bad quota inum: %lu, type: %d",
qf_inums[type], type);
return -EUCLEAN;
}
qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
if (IS_ERR(qf_inode)) {
ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
ext4_error(sb, "Bad quota inode: %lu, type: %d",
qf_inums[type], type);
return PTR_ERR(qf_inode);
}
@ -6942,8 +6966,9 @@ int ext4_enable_quotas(struct super_block *sb)
if (err) {
ext4_warning(sb,
"Failed to enable quota tracking "
"(type=%d, err=%d). Please run "
"e2fsck to fix.", type, err);
"(type=%d, err=%d, ino=%lu). "
"Please run e2fsck to fix.", type,
err, qf_inums[type]);
for (type--; type >= 0; type--) {
struct inode *inode;
@ -7030,8 +7055,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
len = i_size-off;
toread = len;
while (toread > 0) {
tocopy = sb->s_blocksize - offset < toread ?
sb->s_blocksize - offset : toread;
tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
bh = ext4_bread(NULL, inode, blk, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);

View File

@ -79,7 +79,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos));
struct page *page;
void *fsdata;
void *fsdata = NULL;
int res;
res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata);

View File

@ -1281,7 +1281,7 @@ retry_ref:
ce = mb_cache_entry_get(ea_block_cache, hash,
bh->b_blocknr);
if (ce) {
ce->e_reusable = 1;
set_bit(MBE_REUSABLE_B, &ce->e_flags);
mb_cache_entry_put(ea_block_cache, ce);
}
}
@ -1441,6 +1441,9 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
if (!err)
err = ext4_inode_attach_jinode(ea_inode);
if (err) {
if (ext4_xattr_inode_dec_ref(handle, ea_inode))
ext4_warning_inode(ea_inode,
"cleanup dec ref error %d", err);
iput(ea_inode);
return ERR_PTR(err);
}
@ -1540,7 +1543,8 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
if (err) {
ext4_xattr_inode_dec_ref(handle, ea_inode);
if (ext4_xattr_inode_dec_ref(handle, ea_inode))
ext4_warning_inode(ea_inode, "cleanup dec ref error %d", err);
iput(ea_inode);
return err;
}
@ -2042,7 +2046,7 @@ inserted:
}
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
if (ref == EXT4_XATTR_REFCOUNT_MAX)
ce->e_reusable = 0;
clear_bit(MBE_REUSABLE_B, &ce->e_flags);
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
ext4_xattr_block_csum_set(inode, new_bh);
@ -2070,19 +2074,11 @@ inserted:
goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group);
/* non-extent files can't have physical blocks past 2^32 */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
block = ext4_new_meta_blocks(handle, inode, goal, 0,
NULL, &error);
if (error)
goto cleanup;
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
ea_idebug(inode, "creating block %llu",
(unsigned long long)block);
@ -2555,7 +2551,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
buffer = kmalloc(value_size, GFP_NOFS);
buffer = kvmalloc(value_size, GFP_NOFS);
b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
if (!is || !bs || !buffer || !b_entry_name) {
error = -ENOMEM;
@ -2607,7 +2603,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
error = 0;
out:
kfree(b_entry_name);
kfree(buffer);
kvfree(buffer);
if (is)
brelse(is->iloc.bh);
if (bs)

View File

@ -138,15 +138,16 @@ EXPORT_SYMBOL(__fs_parse);
* @fc: The filesystem context to log errors through.
* @param: The parameter.
* @want_bdev: T if want a blockdev
* @flags: Pathwalk flags passed to filename_lookup()
* @_path: The result of the lookup
*/
int fs_lookup_param(struct fs_context *fc,
struct fs_parameter *param,
bool want_bdev,
unsigned int flags,
struct path *_path)
{
struct filename *f;
unsigned int flags = 0;
bool put_f;
int ret;

View File

@ -207,14 +207,13 @@ int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
}
/* Send all the data buffers related to an inode */
int jbd2_submit_inode_data(struct jbd2_inode *jinode)
int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
{
if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
return 0;
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
return jbd2_journal_submit_inode_data_buffers(jinode);
return journal->j_submit_inode_data_buffers(jinode);
}
EXPORT_SYMBOL(jbd2_submit_inode_data);

View File

@ -100,8 +100,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
atomic_set(&entry->e_refcnt, 2);
entry->e_key = key;
entry->e_value = value;
entry->e_reusable = reusable;
entry->e_referenced = 0;
entry->e_flags = 0;
if (reusable)
set_bit(MBE_REUSABLE_B, &entry->e_flags);
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
@ -165,7 +166,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
while (node) {
entry = hlist_bl_entry(node, struct mb_cache_entry,
e_hash_list);
if (entry->e_key == key && entry->e_reusable &&
if (entry->e_key == key &&
test_bit(MBE_REUSABLE_B, &entry->e_flags) &&
atomic_inc_not_zero(&entry->e_refcnt))
goto out;
node = node->next;
@ -284,7 +286,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
void mb_cache_entry_touch(struct mb_cache *cache,
struct mb_cache_entry *entry)
{
entry->e_referenced = 1;
set_bit(MBE_REFERENCED_B, &entry->e_flags);
}
EXPORT_SYMBOL(mb_cache_entry_touch);
@ -309,9 +311,9 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
entry = list_first_entry(&cache->c_list,
struct mb_cache_entry, e_list);
/* Drop initial hash reference if there is no user */
if (entry->e_referenced ||
if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
entry->e_referenced = 0;
clear_bit(MBE_REFERENCED_B, &entry->e_flags);
list_move_tail(&entry->e_list, &cache->c_list);
continue;
}

View File

@ -2324,6 +2324,8 @@ static int vfs_setup_quota_inode(struct inode *inode, int type)
struct super_block *sb = inode->i_sb;
struct quota_info *dqopt = sb_dqopt(sb);
if (is_bad_inode(inode))
return -EUCLEAN;
if (!S_ISREG(inode->i_mode))
return -EACCES;
if (IS_RDONLY(inode))

View File

@ -76,6 +76,7 @@ static inline int fs_parse(struct fs_context *fc,
extern int fs_lookup_param(struct fs_context *fc,
struct fs_parameter *param,
bool want_bdev,
unsigned int flags,
struct path *_path);
extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found);

View File

@ -1662,7 +1662,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
int jbd2_fc_end_commit(journal_t *journal);
int jbd2_fc_end_commit_fallback(journal_t *journal);
int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
int jbd2_submit_inode_data(struct jbd2_inode *jinode);
int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode);
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
int jbd2_fc_release_bufs(journal_t *journal);

View File

@ -10,6 +10,12 @@
struct mb_cache;
/* Cache entry flags */
enum {
MBE_REFERENCED_B = 0,
MBE_REUSABLE_B
};
struct mb_cache_entry {
/* List of entries in cache - protected by cache->c_list_lock */
struct list_head e_list;
@ -26,8 +32,7 @@ struct mb_cache_entry {
atomic_t e_refcnt;
/* Key in hash - stable during lifetime of the entry */
u32 e_key;
u32 e_referenced:1;
u32 e_reusable:1;
unsigned long e_flags;
/* User provided value - stable during lifetime of the entry */
u64 e_value;
};

View File

@ -104,6 +104,7 @@ TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE);
TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR);
TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE);
TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA);
TRACE_DEFINE_ENUM(EXT4_FC_REASON_ENCRYPTED_FILENAME);
TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX);
#define show_fc_reason(reason) \
@ -116,7 +117,8 @@ TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX);
{ EXT4_FC_REASON_RESIZE, "RESIZE"}, \
{ EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \
{ EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}, \
{ EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"})
{ EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}, \
{ EXT4_FC_REASON_ENCRYPTED_FILENAME, "ENCRYPTED_FILENAME"})
TRACE_EVENT(ext4_other_inode_update_time,
TP_PROTO(struct inode *inode, ino_t orig_ino),
@ -1744,18 +1746,19 @@ TRACE_EVENT(ext4_load_inode,
(unsigned long) __entry->ino)
);
TRACE_EVENT(ext4_journal_start,
TRACE_EVENT(ext4_journal_start_sb,
TP_PROTO(struct super_block *sb, int blocks, int rsv_blocks,
int revoke_creds, unsigned long IP),
int revoke_creds, int type, unsigned long IP),
TP_ARGS(sb, blocks, rsv_blocks, revoke_creds, IP),
TP_ARGS(sb, blocks, rsv_blocks, revoke_creds, type, IP),
TP_STRUCT__entry(
__field( dev_t, dev )
__field(unsigned long, ip )
__field( int, blocks )
__field( int, rsv_blocks )
__field( int, revoke_creds )
__field( dev_t, dev )
__field( unsigned long, ip )
__field( int, blocks )
__field( int, rsv_blocks )
__field( int, revoke_creds )
__field( int, type )
),
TP_fast_assign(
@ -1764,11 +1767,45 @@ TRACE_EVENT(ext4_journal_start,
__entry->blocks = blocks;
__entry->rsv_blocks = rsv_blocks;
__entry->revoke_creds = revoke_creds;
__entry->type = type;
),
TP_printk("dev %d,%d blocks %d, rsv_blocks %d, revoke_creds %d, "
"caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->blocks, __entry->rsv_blocks, __entry->revoke_creds,
TP_printk("dev %d,%d blocks %d, rsv_blocks %d, revoke_creds %d,"
" type %d, caller %pS", MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->blocks, __entry->rsv_blocks,
__entry->revoke_creds, __entry->type, (void *)__entry->ip)
);
TRACE_EVENT(ext4_journal_start_inode,
TP_PROTO(struct inode *inode, int blocks, int rsv_blocks,
int revoke_creds, int type, unsigned long IP),
TP_ARGS(inode, blocks, rsv_blocks, revoke_creds, type, IP),
TP_STRUCT__entry(
__field( unsigned long, ino )
__field( dev_t, dev )
__field( unsigned long, ip )
__field( int, blocks )
__field( int, rsv_blocks )
__field( int, revoke_creds )
__field( int, type )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ip = IP;
__entry->blocks = blocks;
__entry->rsv_blocks = rsv_blocks;
__entry->revoke_creds = revoke_creds;
__entry->type = type;
__entry->ino = inode->i_ino;
),
TP_printk("dev %d,%d blocks %d, rsv_blocks %d, revoke_creds %d,"
" type %d, ino %lu, caller %pS", MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->blocks, __entry->rsv_blocks,
__entry->revoke_creds, __entry->type, __entry->ino,
(void *)__entry->ip)
);
@ -2764,7 +2801,7 @@ TRACE_EVENT(ext4_fc_stats,
),
TP_printk("dev %d,%d fc ineligible reasons:\n"
"%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u "
"%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u"
"num_commits:%lu, ineligible: %lu, numblks: %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
@ -2776,6 +2813,7 @@ TRACE_EVENT(ext4_fc_stats,
FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA),
FC_REASON_NAME_STAT(EXT4_FC_REASON_ENCRYPTED_FILENAME),
__entry->fc_commits, __entry->fc_ineligible_commits,
__entry->fc_numblks)
);

View File

@ -40,7 +40,7 @@ DECLARE_EVENT_CLASS(jbd2_commit,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
__field( tid_t, transaction )
),
TP_fast_assign(
@ -49,7 +49,7 @@ DECLARE_EVENT_CLASS(jbd2_commit,
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %d,%d transaction %d sync %d",
TP_printk("dev %d,%d transaction %u sync %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->transaction, __entry->sync_commit)
);
@ -97,8 +97,8 @@ TRACE_EVENT(jbd2_end_commit,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
__field( int, head )
__field( tid_t, transaction )
__field( tid_t, head )
),
TP_fast_assign(
@ -108,7 +108,7 @@ TRACE_EVENT(jbd2_end_commit,
__entry->head = journal->j_tail_sequence;
),
TP_printk("dev %d,%d transaction %d sync %d head %d",
TP_printk("dev %d,%d transaction %u sync %d head %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->transaction, __entry->sync_commit, __entry->head)
);
@ -134,14 +134,14 @@ TRACE_EVENT(jbd2_submit_inode_data,
);
DECLARE_EVENT_CLASS(jbd2_handle_start_class,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
TP_PROTO(dev_t dev, tid_t tid, unsigned int type,
unsigned int line_no, int requested_blocks),
TP_ARGS(dev, tid, type, line_no, requested_blocks),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( unsigned long, tid )
__field( tid_t, tid )
__field( unsigned int, type )
__field( unsigned int, line_no )
__field( int, requested_blocks)
@ -155,28 +155,28 @@ DECLARE_EVENT_CLASS(jbd2_handle_start_class,
__entry->requested_blocks = requested_blocks;
),
TP_printk("dev %d,%d tid %lu type %u line_no %u "
TP_printk("dev %d,%d tid %u type %u line_no %u "
"requested_blocks %d",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
__entry->type, __entry->line_no, __entry->requested_blocks)
);
DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_start,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
TP_PROTO(dev_t dev, tid_t tid, unsigned int type,
unsigned int line_no, int requested_blocks),
TP_ARGS(dev, tid, type, line_no, requested_blocks)
);
DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_restart,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
TP_PROTO(dev_t dev, tid_t tid, unsigned int type,
unsigned int line_no, int requested_blocks),
TP_ARGS(dev, tid, type, line_no, requested_blocks)
);
TRACE_EVENT(jbd2_handle_extend,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
TP_PROTO(dev_t dev, tid_t tid, unsigned int type,
unsigned int line_no, int buffer_credits,
int requested_blocks),
@ -184,7 +184,7 @@ TRACE_EVENT(jbd2_handle_extend,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( unsigned long, tid )
__field( tid_t, tid )
__field( unsigned int, type )
__field( unsigned int, line_no )
__field( int, buffer_credits )
@ -200,7 +200,7 @@ TRACE_EVENT(jbd2_handle_extend,
__entry->requested_blocks = requested_blocks;
),
TP_printk("dev %d,%d tid %lu type %u line_no %u "
TP_printk("dev %d,%d tid %u type %u line_no %u "
"buffer_credits %d requested_blocks %d",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
__entry->type, __entry->line_no, __entry->buffer_credits,
@ -208,7 +208,7 @@ TRACE_EVENT(jbd2_handle_extend,
);
TRACE_EVENT(jbd2_handle_stats,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
TP_PROTO(dev_t dev, tid_t tid, unsigned int type,
unsigned int line_no, int interval, int sync,
int requested_blocks, int dirtied_blocks),
@ -217,7 +217,7 @@ TRACE_EVENT(jbd2_handle_stats,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( unsigned long, tid )
__field( tid_t, tid )
__field( unsigned int, type )
__field( unsigned int, line_no )
__field( int, interval )
@ -237,7 +237,7 @@ TRACE_EVENT(jbd2_handle_stats,
__entry->dirtied_blocks = dirtied_blocks;
),
TP_printk("dev %d,%d tid %lu type %u line_no %u interval %d "
TP_printk("dev %d,%d tid %u type %u line_no %u interval %d "
"sync %d requested_blocks %d dirtied_blocks %d",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
__entry->type, __entry->line_no, __entry->interval,
@ -246,14 +246,14 @@ TRACE_EVENT(jbd2_handle_stats,
);
TRACE_EVENT(jbd2_run_stats,
TP_PROTO(dev_t dev, unsigned long tid,
TP_PROTO(dev_t dev, tid_t tid,
struct transaction_run_stats_s *stats),
TP_ARGS(dev, tid, stats),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( unsigned long, tid )
__field( tid_t, tid )
__field( unsigned long, wait )
__field( unsigned long, request_delay )
__field( unsigned long, running )
@ -279,7 +279,7 @@ TRACE_EVENT(jbd2_run_stats,
__entry->blocks_logged = stats->rs_blocks_logged;
),
TP_printk("dev %d,%d tid %lu wait %u request_delay %u running %u "
TP_printk("dev %d,%d tid %u wait %u request_delay %u running %u "
"locked %u flushing %u logging %u handle_count %u "
"blocks %u blocks_logged %u",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
@ -294,14 +294,14 @@ TRACE_EVENT(jbd2_run_stats,
);
TRACE_EVENT(jbd2_checkpoint_stats,
TP_PROTO(dev_t dev, unsigned long tid,
TP_PROTO(dev_t dev, tid_t tid,
struct transaction_chp_stats_s *stats),
TP_ARGS(dev, tid, stats),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( unsigned long, tid )
__field( tid_t, tid )
__field( unsigned long, chp_time )
__field( __u32, forced_to_close )
__field( __u32, written )
@ -317,7 +317,7 @@ TRACE_EVENT(jbd2_checkpoint_stats,
__entry->dropped = stats->cs_dropped;
),
TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u "
TP_printk("dev %d,%d tid %u chp_time %u forced_to_close %u "
"written %u dropped %u",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
jiffies_to_msecs(__entry->chp_time),

View File

@ -829,6 +829,7 @@ int buffer_migrate_folio_norefs(struct address_space *mapping,
{
return __buffer_migrate_folio(mapping, dst, src, mode, true);
}
EXPORT_SYMBOL_GPL(buffer_migrate_folio_norefs);
#endif
int filemap_migrate_folio(struct address_space *mapping,