mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-17 10:26:09 +00:00
ext4: fix races between changing inode journal mode and ext4_writepages
In ext4, there is a race condition between changing inode journal mode and ext4_writepages(). While ext4_writepages() is executed on a non-journalled mode inode, the inode's journal mode could be enabled by ioctl() and then, some pages dirtied after switching the journal mode will be still exposed to ext4_writepages() in non-journaled mode. To resolve this problem, we use fs-wide per-cpu rw semaphore by Jan Kara's suggestion because we don't want to waste ext4_inode_info's space for this extra rare case. Signed-off-by: Daeho Jeong <daeho.jeong@samsung.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Reviewed-by: Jan Kara <jack@suse.cz>
This commit is contained in:
parent
4c54659269
commit
c8585c6fca
@ -33,6 +33,7 @@
|
|||||||
#include <linux/ratelimit.h>
|
#include <linux/ratelimit.h>
|
||||||
#include <crypto/hash.h>
|
#include <crypto/hash.h>
|
||||||
#include <linux/falloc.h>
|
#include <linux/falloc.h>
|
||||||
|
#include <linux/percpu-rwsem.h>
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
#include <linux/compat.h>
|
#include <linux/compat.h>
|
||||||
#endif
|
#endif
|
||||||
@ -1508,6 +1509,9 @@ struct ext4_sb_info {
|
|||||||
struct ratelimit_state s_err_ratelimit_state;
|
struct ratelimit_state s_err_ratelimit_state;
|
||||||
struct ratelimit_state s_warning_ratelimit_state;
|
struct ratelimit_state s_warning_ratelimit_state;
|
||||||
struct ratelimit_state s_msg_ratelimit_state;
|
struct ratelimit_state s_msg_ratelimit_state;
|
||||||
|
|
||||||
|
/* Barrier between changing inodes' journal flags and writepages ops. */
|
||||||
|
struct percpu_rw_semaphore s_journal_flag_rwsem;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
|
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
|
||||||
|
@ -2612,11 +2612,14 @@ static int ext4_writepages(struct address_space *mapping,
|
|||||||
struct blk_plug plug;
|
struct blk_plug plug;
|
||||||
bool give_up_on_write = false;
|
bool give_up_on_write = false;
|
||||||
|
|
||||||
|
percpu_down_read(&sbi->s_journal_flag_rwsem);
|
||||||
trace_ext4_writepages(inode, wbc);
|
trace_ext4_writepages(inode, wbc);
|
||||||
|
|
||||||
if (dax_mapping(mapping))
|
if (dax_mapping(mapping)) {
|
||||||
return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
|
ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
|
||||||
wbc);
|
wbc);
|
||||||
|
goto out_writepages;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No pages to write? This is mainly a kludge to avoid starting
|
* No pages to write? This is mainly a kludge to avoid starting
|
||||||
@ -2786,6 +2789,7 @@ retry:
|
|||||||
out_writepages:
|
out_writepages:
|
||||||
trace_ext4_writepages_result(inode, wbc, ret,
|
trace_ext4_writepages_result(inode, wbc, ret,
|
||||||
nr_to_write - wbc->nr_to_write);
|
nr_to_write - wbc->nr_to_write);
|
||||||
|
percpu_up_read(&sbi->s_journal_flag_rwsem);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5436,6 +5440,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
|||||||
journal_t *journal;
|
journal_t *journal;
|
||||||
handle_t *handle;
|
handle_t *handle;
|
||||||
int err;
|
int err;
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have to be very careful here: changing a data block's
|
* We have to be very careful here: changing a data block's
|
||||||
@ -5475,6 +5480,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
percpu_down_write(&sbi->s_journal_flag_rwsem);
|
||||||
jbd2_journal_lock_updates(journal);
|
jbd2_journal_lock_updates(journal);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -5491,6 +5497,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
|||||||
err = jbd2_journal_flush(journal);
|
err = jbd2_journal_flush(journal);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
jbd2_journal_unlock_updates(journal);
|
jbd2_journal_unlock_updates(journal);
|
||||||
|
percpu_up_write(&sbi->s_journal_flag_rwsem);
|
||||||
ext4_inode_resume_unlocked_dio(inode);
|
ext4_inode_resume_unlocked_dio(inode);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -5499,6 +5506,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
|||||||
ext4_set_aops(inode);
|
ext4_set_aops(inode);
|
||||||
|
|
||||||
jbd2_journal_unlock_updates(journal);
|
jbd2_journal_unlock_updates(journal);
|
||||||
|
percpu_up_write(&sbi->s_journal_flag_rwsem);
|
||||||
|
|
||||||
if (val)
|
if (val)
|
||||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||||
ext4_inode_resume_unlocked_dio(inode);
|
ext4_inode_resume_unlocked_dio(inode);
|
||||||
|
@ -859,6 +859,7 @@ static void ext4_put_super(struct super_block *sb)
|
|||||||
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
||||||
percpu_counter_destroy(&sbi->s_dirs_counter);
|
percpu_counter_destroy(&sbi->s_dirs_counter);
|
||||||
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
|
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
|
||||||
|
percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
|
||||||
brelse(sbi->s_sbh);
|
brelse(sbi->s_sbh);
|
||||||
#ifdef CONFIG_QUOTA
|
#ifdef CONFIG_QUOTA
|
||||||
for (i = 0; i < EXT4_MAXQUOTAS; i++)
|
for (i = 0; i < EXT4_MAXQUOTAS; i++)
|
||||||
@ -3930,6 +3931,9 @@ no_journal:
|
|||||||
if (!err)
|
if (!err)
|
||||||
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
|
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
|
if (!err)
|
||||||
|
err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
ext4_msg(sb, KERN_ERR, "insufficient memory");
|
ext4_msg(sb, KERN_ERR, "insufficient memory");
|
||||||
goto failed_mount6;
|
goto failed_mount6;
|
||||||
|
@ -37,6 +37,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
|
|||||||
free_percpu(brw->fast_read_ctr);
|
free_percpu(brw->fast_read_ctr);
|
||||||
brw->fast_read_ctr = NULL; /* catch use after free bugs */
|
brw->fast_read_ctr = NULL; /* catch use after free bugs */
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is the fast-path for down_read/up_read. If it succeeds we rely
|
* This is the fast-path for down_read/up_read. If it succeeds we rely
|
||||||
|
Loading…
x
Reference in New Issue
Block a user