2017-12-18 03:00:59 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0+
|
2006-10-11 08:20:57 +00:00
|
|
|
/*
|
2007-05-09 05:51:49 +00:00
|
|
|
* linux/fs/jbd2/checkpoint.c
|
2006-10-11 08:20:57 +00:00
|
|
|
*
|
|
|
|
* Written by Stephen C. Tweedie <sct@redhat.com>, 1999
|
|
|
|
*
|
|
|
|
* Copyright 1999 Red Hat Software --- All Rights Reserved
|
|
|
|
*
|
|
|
|
* Checkpoint routines for the generic filesystem journaling code.
|
|
|
|
* Part of the ext2fs journaling system.
|
|
|
|
*
|
|
|
|
* Checkpointing is the process of ensuring that a section of the log is
|
|
|
|
* committed fully to disk, so that that portion of the log can be
|
|
|
|
* reused.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/time.h>
|
|
|
|
#include <linux/fs.h>
|
2006-10-11 08:20:59 +00:00
|
|
|
#include <linux/jbd2.h>
|
2006-10-11 08:20:57 +00:00
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/slab.h>
|
2009-12-23 11:52:08 +00:00
|
|
|
#include <linux/blkdev.h>
|
2009-06-17 15:47:48 +00:00
|
|
|
#include <trace/events/jbd2.h>
|
2006-10-11 08:20:57 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Unlink a buffer from a transaction checkpoint list.
|
|
|
|
*
|
|
|
|
* Called with j_list_lock held.
|
|
|
|
*/
|
2023-06-06 13:59:24 +00:00
|
|
|
static inline void __buffer_unlink(struct journal_head *jh)
|
2006-10-11 08:20:57 +00:00
|
|
|
{
|
|
|
|
transaction_t *transaction = jh->b_cp_transaction;
|
|
|
|
|
|
|
|
jh->b_cpnext->b_cpprev = jh->b_cpprev;
|
|
|
|
jh->b_cpprev->b_cpnext = jh->b_cpnext;
|
|
|
|
if (transaction->t_checkpoint_list == jh) {
|
|
|
|
transaction->t_checkpoint_list = jh->b_cpnext;
|
|
|
|
if (transaction->t_checkpoint_list == jh)
|
|
|
|
transaction->t_checkpoint_list = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2006-10-11 08:20:59 +00:00
|
|
|
* __jbd2_log_wait_for_space: wait until there is space in the journal.
|
2006-10-11 08:20:57 +00:00
|
|
|
*
|
|
|
|
* Called under j-state_lock *only*. It will be unlocked if we have to wait
|
|
|
|
* for a checkpoint to free up some space in the log.
|
|
|
|
*/
|
2006-10-11 08:20:59 +00:00
|
|
|
void __jbd2_log_wait_for_space(journal_t *journal)
|
2020-11-07 05:00:49 +00:00
|
|
|
__acquires(&journal->j_state_lock)
|
|
|
|
__releases(&journal->j_state_lock)
|
2006-10-11 08:20:57 +00:00
|
|
|
{
|
2008-11-07 03:38:07 +00:00
|
|
|
int nblocks, space_left;
|
2010-08-04 01:35:12 +00:00
|
|
|
/* assert_spin_locked(&journal->j_state_lock); */
|
2006-10-11 08:20:57 +00:00
|
|
|
|
2019-11-05 16:44:25 +00:00
|
|
|
nblocks = journal->j_max_transaction_buffers;
|
2013-06-04 16:12:57 +00:00
|
|
|
while (jbd2_log_space_left(journal) < nblocks) {
|
2010-08-04 01:35:12 +00:00
|
|
|
write_unlock(&journal->j_state_lock);
|
2019-02-01 04:42:11 +00:00
|
|
|
mutex_lock_io(&journal->j_checkpoint_mutex);
|
2006-10-11 08:20:57 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Test again, another process may have checkpointed while we
|
2008-10-09 03:28:31 +00:00
|
|
|
* were waiting for the checkpoint lock. If there are no
|
2008-11-07 03:38:07 +00:00
|
|
|
* transactions ready to be checkpointed, try to recover
|
|
|
|
* journal space by calling cleanup_journal_tail(), and if
|
|
|
|
* that doesn't work, by waiting for the currently committing
|
|
|
|
* transaction to complete. If there is absolutely no way
|
|
|
|
* to make progress, this is either a BUG or corrupted
|
|
|
|
* filesystem, so abort the journal and leave a stack
|
|
|
|
* trace for forensic evidence.
|
2006-10-11 08:20:57 +00:00
|
|
|
*/
|
2010-08-04 01:35:12 +00:00
|
|
|
write_lock(&journal->j_state_lock);
|
2014-09-16 18:50:50 +00:00
|
|
|
if (journal->j_flags & JBD2_ABORT) {
|
|
|
|
mutex_unlock(&journal->j_checkpoint_mutex);
|
|
|
|
return;
|
|
|
|
}
|
2008-10-09 03:28:31 +00:00
|
|
|
spin_lock(&journal->j_list_lock);
|
2013-06-04 16:12:57 +00:00
|
|
|
space_left = jbd2_log_space_left(journal);
|
2008-11-07 03:38:07 +00:00
|
|
|
if (space_left < nblocks) {
|
2008-10-09 03:28:31 +00:00
|
|
|
int chkpt = journal->j_checkpoint_transactions != NULL;
|
2008-11-07 03:38:07 +00:00
|
|
|
tid_t tid = 0;
|
2024-07-24 16:11:16 +00:00
|
|
|
bool has_transaction = false;
|
2008-10-09 03:28:31 +00:00
|
|
|
|
2024-07-24 16:11:16 +00:00
|
|
|
if (journal->j_committing_transaction) {
|
2008-11-07 03:38:07 +00:00
|
|
|
tid = journal->j_committing_transaction->t_tid;
|
2024-07-24 16:11:16 +00:00
|
|
|
has_transaction = true;
|
|
|
|
}
|
2008-10-09 03:28:31 +00:00
|
|
|
spin_unlock(&journal->j_list_lock);
|
2010-08-04 01:35:12 +00:00
|
|
|
write_unlock(&journal->j_state_lock);
|
2008-10-09 03:28:31 +00:00
|
|
|
if (chkpt) {
|
|
|
|
jbd2_log_do_checkpoint(journal);
|
jbd2: stop waiting for space when jbd2_cleanup_journal_tail() returns error
In __jbd2_log_wait_for_space(), we might call jbd2_cleanup_journal_tail()
to recover some journal space. But if an error occurs while executing
jbd2_cleanup_journal_tail() (e.g., an EIO), we don't stop waiting for free
space right away, we try other branches, and if j_committing_transaction
is NULL (i.e., the tid is 0), we will get the following complain:
============================================
JBD2: I/O error when updating journal superblock for sdd-8.
__jbd2_log_wait_for_space: needed 256 blocks and only had 217 space available
__jbd2_log_wait_for_space: no way to get more journal space in sdd-8
------------[ cut here ]------------
WARNING: CPU: 2 PID: 139804 at fs/jbd2/checkpoint.c:109 __jbd2_log_wait_for_space+0x251/0x2e0
Modules linked in:
CPU: 2 PID: 139804 Comm: kworker/u8:3 Not tainted 6.6.0+ #1
RIP: 0010:__jbd2_log_wait_for_space+0x251/0x2e0
Call Trace:
<TASK>
add_transaction_credits+0x5d1/0x5e0
start_this_handle+0x1ef/0x6a0
jbd2__journal_start+0x18b/0x340
ext4_dirty_inode+0x5d/0xb0
__mark_inode_dirty+0xe4/0x5d0
generic_update_time+0x60/0x70
[...]
============================================
So only if jbd2_cleanup_journal_tail() returns 1, i.e., there is nothing to
clean up at the moment, continue to try to reclaim free space in other ways.
Note that this fix relies on commit 6f6a6fda2945 ("jbd2: fix ocfs2 corrupt
when updating journal superblock fails") to make jbd2_cleanup_journal_tail
return the correct error code.
Fixes: 8c3f25d8950c ("jbd2: don't give up looking for space so easily in __jbd2_log_wait_for_space")
Cc: stable@kernel.org
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20240718115336.2554501-1-libaokun@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2024-07-18 11:53:36 +00:00
|
|
|
} else if (jbd2_cleanup_journal_tail(journal) <= 0) {
|
|
|
|
/*
|
|
|
|
* We were able to recover space or the
|
|
|
|
* journal was aborted due to an error.
|
|
|
|
*/
|
2008-11-07 03:38:07 +00:00
|
|
|
;
|
2024-07-24 16:11:16 +00:00
|
|
|
} else if (has_transaction) {
|
jbd2: drop checkpoint mutex when waiting in __jbd2_log_wait_for_space()
While trying to debug an an issue under extreme I/O loading
on preempt-rt kernels, the following backtrace was observed
via SysRQ output:
rm D ffff8802203afbc0 4600 4878 4748 0x00000000
ffff8802217bfb78 0000000000000082 ffff88021fc2bb80 ffff88021fc2bb80
ffff88021fc2bb80 ffff8802217bffd8 ffff8802217bffd8 ffff8802217bffd8
ffff88021f1d4c80 ffff88021fc2bb80 ffff8802217bfb88 ffff88022437b000
Call Trace:
[<ffffffff8172dc34>] schedule+0x24/0x70
[<ffffffff81225b5d>] jbd2_log_wait_commit+0xbd/0x140
[<ffffffff81060390>] ? __init_waitqueue_head+0x50/0x50
[<ffffffff81223635>] jbd2_log_do_checkpoint+0xf5/0x520
[<ffffffff81223b09>] __jbd2_log_wait_for_space+0xa9/0x1f0
[<ffffffff8121dc40>] start_this_handle.isra.10+0x2e0/0x530
[<ffffffff81060390>] ? __init_waitqueue_head+0x50/0x50
[<ffffffff8121e0a3>] jbd2__journal_start+0xc3/0x110
[<ffffffff811de7ce>] ? ext4_rmdir+0x6e/0x230
[<ffffffff8121e0fe>] jbd2_journal_start+0xe/0x10
[<ffffffff811f308b>] ext4_journal_start_sb+0x5b/0x160
[<ffffffff811de7ce>] ext4_rmdir+0x6e/0x230
[<ffffffff811435c5>] vfs_rmdir+0xd5/0x140
[<ffffffff8114370f>] do_rmdir+0xdf/0x120
[<ffffffff8105c6b4>] ? task_work_run+0x44/0x80
[<ffffffff81002889>] ? do_notify_resume+0x89/0x100
[<ffffffff817361ae>] ? int_signal+0x12/0x17
[<ffffffff81145d85>] sys_unlinkat+0x25/0x40
[<ffffffff81735f22>] system_call_fastpath+0x16/0x1b
What is interesting here, is that we call log_wait_commit, from
within wait_for_space, but we are still holding the checkpoint_mutex
as it surrounds mostly the whole of wait_for_space. And then, as we
are waiting, journal_commit_transaction can run, and if the JBD2_FLUSHED
bit is set, then we will also try to take the same checkpoint_mutex.
It seems that we need to drop the checkpoint_mutex while sitting in
jbd2_log_wait_commit, if we want to guarantee that progress can be made
by jbd2_journal_commit_transaction(). There does not seem to be
anything preempt-rt specific about this, other then perhaps increasing
the odds of it happening.
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2013-06-13 02:47:35 +00:00
|
|
|
/*
|
|
|
|
* jbd2_journal_commit_transaction() may want
|
|
|
|
* to take the checkpoint_mutex if JBD2_FLUSHED
|
|
|
|
* is set. So we need to temporarily drop it.
|
|
|
|
*/
|
|
|
|
mutex_unlock(&journal->j_checkpoint_mutex);
|
2008-11-07 03:38:07 +00:00
|
|
|
jbd2_log_wait_commit(journal, tid);
|
jbd2: drop checkpoint mutex when waiting in __jbd2_log_wait_for_space()
While trying to debug an an issue under extreme I/O loading
on preempt-rt kernels, the following backtrace was observed
via SysRQ output:
rm D ffff8802203afbc0 4600 4878 4748 0x00000000
ffff8802217bfb78 0000000000000082 ffff88021fc2bb80 ffff88021fc2bb80
ffff88021fc2bb80 ffff8802217bffd8 ffff8802217bffd8 ffff8802217bffd8
ffff88021f1d4c80 ffff88021fc2bb80 ffff8802217bfb88 ffff88022437b000
Call Trace:
[<ffffffff8172dc34>] schedule+0x24/0x70
[<ffffffff81225b5d>] jbd2_log_wait_commit+0xbd/0x140
[<ffffffff81060390>] ? __init_waitqueue_head+0x50/0x50
[<ffffffff81223635>] jbd2_log_do_checkpoint+0xf5/0x520
[<ffffffff81223b09>] __jbd2_log_wait_for_space+0xa9/0x1f0
[<ffffffff8121dc40>] start_this_handle.isra.10+0x2e0/0x530
[<ffffffff81060390>] ? __init_waitqueue_head+0x50/0x50
[<ffffffff8121e0a3>] jbd2__journal_start+0xc3/0x110
[<ffffffff811de7ce>] ? ext4_rmdir+0x6e/0x230
[<ffffffff8121e0fe>] jbd2_journal_start+0xe/0x10
[<ffffffff811f308b>] ext4_journal_start_sb+0x5b/0x160
[<ffffffff811de7ce>] ext4_rmdir+0x6e/0x230
[<ffffffff811435c5>] vfs_rmdir+0xd5/0x140
[<ffffffff8114370f>] do_rmdir+0xdf/0x120
[<ffffffff8105c6b4>] ? task_work_run+0x44/0x80
[<ffffffff81002889>] ? do_notify_resume+0x89/0x100
[<ffffffff817361ae>] ? int_signal+0x12/0x17
[<ffffffff81145d85>] sys_unlinkat+0x25/0x40
[<ffffffff81735f22>] system_call_fastpath+0x16/0x1b
What is interesting here, is that we call log_wait_commit, from
within wait_for_space, but we are still holding the checkpoint_mutex
as it surrounds mostly the whole of wait_for_space. And then, as we
are waiting, journal_commit_transaction can run, and if the JBD2_FLUSHED
bit is set, then we will also try to take the same checkpoint_mutex.
It seems that we need to drop the checkpoint_mutex while sitting in
jbd2_log_wait_commit, if we want to guarantee that progress can be made
by jbd2_journal_commit_transaction(). There does not seem to be
anything preempt-rt specific about this, other then perhaps increasing
the odds of it happening.
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2013-06-13 02:47:35 +00:00
|
|
|
write_lock(&journal->j_state_lock);
|
|
|
|
continue;
|
2008-10-09 03:28:31 +00:00
|
|
|
} else {
|
2008-11-07 03:38:07 +00:00
|
|
|
printk(KERN_ERR "%s: needed %d blocks and "
|
|
|
|
"only had %d space available\n",
|
|
|
|
__func__, nblocks, space_left);
|
|
|
|
printk(KERN_ERR "%s: no way to get more "
|
|
|
|
"journal space in %s\n", __func__,
|
|
|
|
journal->j_devname);
|
|
|
|
WARN_ON(1);
|
2019-12-04 12:46:12 +00:00
|
|
|
jbd2_journal_abort(journal, -EIO);
|
2008-10-09 03:28:31 +00:00
|
|
|
}
|
2010-08-04 01:35:12 +00:00
|
|
|
write_lock(&journal->j_state_lock);
|
2008-10-09 03:28:31 +00:00
|
|
|
} else {
|
|
|
|
spin_unlock(&journal->j_list_lock);
|
2006-10-11 08:20:57 +00:00
|
|
|
}
|
|
|
|
mutex_unlock(&journal->j_checkpoint_mutex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2008-11-05 05:09:22 +00:00
|
|
|
__flush_batch(journal_t *journal, int *batch_count)
|
2006-10-11 08:20:57 +00:00
|
|
|
{
|
|
|
|
int i;
|
2011-06-27 16:36:29 +00:00
|
|
|
struct blk_plug plug;
|
2006-10-11 08:20:57 +00:00
|
|
|
|
2011-06-27 16:36:29 +00:00
|
|
|
blk_start_plug(&plug);
|
2010-08-11 15:06:24 +00:00
|
|
|
for (i = 0; i < *batch_count; i++)
|
2016-11-01 13:40:10 +00:00
|
|
|
write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC);
|
2011-06-27 16:36:29 +00:00
|
|
|
blk_finish_plug(&plug);
|
2010-08-11 15:06:24 +00:00
|
|
|
|
2006-10-11 08:20:57 +00:00
|
|
|
for (i = 0; i < *batch_count; i++) {
|
2008-11-05 05:09:22 +00:00
|
|
|
struct buffer_head *bh = journal->j_chkpt_bhs[i];
|
2006-10-11 08:20:57 +00:00
|
|
|
BUFFER_TRACE(bh, "brelse");
|
|
|
|
__brelse(bh);
|
2023-06-06 13:59:23 +00:00
|
|
|
journal->j_chkpt_bhs[i] = NULL;
|
2006-10-11 08:20:57 +00:00
|
|
|
}
|
|
|
|
*batch_count = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Perform an actual checkpoint. We take the first transaction on the
|
|
|
|
* list of transactions to be checkpointed and send all its buffers
|
|
|
|
* to disk. We submit larger chunks of data at once.
|
|
|
|
*
|
|
|
|
* The journal should be locked before calling this function.
|
2008-10-11 00:29:13 +00:00
|
|
|
* Called with j_checkpoint_mutex held.
|
2006-10-11 08:20:57 +00:00
|
|
|
*/
|
2006-10-11 08:20:59 +00:00
|
|
|
int jbd2_log_do_checkpoint(journal_t *journal)
|
2006-10-11 08:20:57 +00:00
|
|
|
{
|
2014-09-02 01:19:01 +00:00
|
|
|
struct journal_head *jh;
|
|
|
|
struct buffer_head *bh;
|
|
|
|
transaction_t *transaction;
|
|
|
|
tid_t this_tid;
|
2014-09-04 22:09:22 +00:00
|
|
|
int result, batch_count = 0;
|
2006-10-11 08:20:57 +00:00
|
|
|
|
2022-06-08 11:23:48 +00:00
|
|
|
jbd2_debug(1, "Start checkpoint\n");
|
2006-10-11 08:20:57 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* First thing: if there are any transactions in the log which
|
|
|
|
* don't need checkpointing, just eliminate them from the
|
|
|
|
* journal straight away.
|
|
|
|
*/
|
2006-10-11 08:20:59 +00:00
|
|
|
result = jbd2_cleanup_journal_tail(journal);
|
2009-06-17 15:47:48 +00:00
|
|
|
trace_jbd2_checkpoint(journal, result);
|
2022-06-08 11:23:48 +00:00
|
|
|
jbd2_debug(1, "cleanup_journal_tail returned %d\n", result);
|
2006-10-11 08:20:57 +00:00
|
|
|
if (result <= 0)
|
|
|
|
return result;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* OK, we need to start writing disk blocks. Take one transaction
|
|
|
|
* and write it.
|
|
|
|
*/
|
|
|
|
spin_lock(&journal->j_list_lock);
|
|
|
|
if (!journal->j_checkpoint_transactions)
|
|
|
|
goto out;
|
|
|
|
transaction = journal->j_checkpoint_transactions;
|
2008-01-29 04:58:27 +00:00
|
|
|
if (transaction->t_chp_stats.cs_chp_time == 0)
|
|
|
|
transaction->t_chp_stats.cs_chp_time = jiffies;
|
2006-10-11 08:20:57 +00:00
|
|
|
this_tid = transaction->t_tid;
|
|
|
|
restart:
|
|
|
|
/*
|
|
|
|
* If someone cleaned up this transaction while we slept, we're
|
|
|
|
* done (maybe it's a new transaction, but it fell at the same
|
|
|
|
* address).
|
|
|
|
*/
|
2014-09-02 01:19:01 +00:00
|
|
|
if (journal->j_checkpoint_transactions != transaction ||
|
|
|
|
transaction->t_tid != this_tid)
|
|
|
|
goto out;
|
2006-10-11 08:20:57 +00:00
|
|
|
|
2014-09-02 01:19:01 +00:00
|
|
|
/* checkpoint all of the transaction's buffers */
|
|
|
|
while (transaction->t_checkpoint_list) {
|
|
|
|
jh = transaction->t_checkpoint_list;
|
|
|
|
bh = jh2bh(jh);
|
|
|
|
|
|
|
|
if (jh->b_transaction != NULL) {
|
|
|
|
transaction_t *t = jh->b_transaction;
|
|
|
|
tid_t tid = t->t_tid;
|
2006-10-11 08:20:57 +00:00
|
|
|
|
2014-09-02 01:19:01 +00:00
|
|
|
transaction->t_chp_stats.cs_forced_to_close++;
|
|
|
|
spin_unlock(&journal->j_list_lock);
|
|
|
|
if (unlikely(journal->j_flags & JBD2_UNMOUNT))
|
|
|
|
/*
|
|
|
|
* The journal thread is dead; so
|
|
|
|
* starting and waiting for a commit
|
|
|
|
* to finish will cause us to wait for
|
|
|
|
* a _very_ long time.
|
|
|
|
*/
|
|
|
|
printk(KERN_ERR
|
|
|
|
"JBD2: %s: Waiting for Godot: block %llu\n",
|
|
|
|
journal->j_devname, (unsigned long long) bh->b_blocknr);
|
|
|
|
|
2019-02-01 04:42:11 +00:00
|
|
|
if (batch_count)
|
|
|
|
__flush_batch(journal, &batch_count);
|
2014-09-02 01:19:01 +00:00
|
|
|
jbd2_log_start_commit(journal, tid);
|
2019-02-01 04:42:11 +00:00
|
|
|
/*
|
|
|
|
* jbd2_journal_commit_transaction() may want
|
|
|
|
* to take the checkpoint_mutex if JBD2_FLUSHED
|
|
|
|
* is set, jbd2_update_log_tail() called by
|
|
|
|
* jbd2_journal_commit_transaction() may also take
|
|
|
|
* checkpoint_mutex. So we need to temporarily
|
|
|
|
* drop it.
|
|
|
|
*/
|
|
|
|
mutex_unlock(&journal->j_checkpoint_mutex);
|
2014-09-02 01:19:01 +00:00
|
|
|
jbd2_log_wait_commit(journal, tid);
|
2019-02-01 04:42:11 +00:00
|
|
|
mutex_lock_io(&journal->j_checkpoint_mutex);
|
|
|
|
spin_lock(&journal->j_list_lock);
|
|
|
|
goto restart;
|
2014-09-02 01:19:01 +00:00
|
|
|
}
|
2023-06-06 13:59:26 +00:00
|
|
|
if (!trylock_buffer(bh)) {
|
|
|
|
/*
|
|
|
|
* The buffer is locked, it may be writing back, or
|
|
|
|
* flushing out in the last couple of cycles, or
|
|
|
|
* re-adding into a new transaction, need to check
|
|
|
|
* it again until it's unlocked.
|
|
|
|
*/
|
|
|
|
get_bh(bh);
|
|
|
|
spin_unlock(&journal->j_list_lock);
|
|
|
|
wait_on_buffer(bh);
|
|
|
|
/* the journal_head may have gone by now */
|
|
|
|
BUFFER_TRACE(bh, "brelse");
|
|
|
|
__brelse(bh);
|
|
|
|
goto retry;
|
|
|
|
} else if (!buffer_dirty(bh)) {
|
|
|
|
unlock_buffer(bh);
|
2014-09-02 01:19:01 +00:00
|
|
|
BUFFER_TRACE(bh, "remove from checkpoint");
|
2023-06-06 13:59:23 +00:00
|
|
|
/*
|
|
|
|
* If the transaction was released or the checkpoint
|
|
|
|
* list was empty, we're done.
|
|
|
|
*/
|
|
|
|
if (__jbd2_journal_remove_checkpoint(jh) ||
|
|
|
|
!transaction->t_checkpoint_list)
|
2014-09-04 22:09:29 +00:00
|
|
|
goto out;
|
2023-06-06 13:59:23 +00:00
|
|
|
} else {
|
2023-06-06 13:59:26 +00:00
|
|
|
unlock_buffer(bh);
|
2023-06-06 13:59:23 +00:00
|
|
|
/*
|
|
|
|
* We are about to write the buffer, it could be
|
|
|
|
* raced by some other transaction shrink or buffer
|
|
|
|
* re-log logic once we release the j_list_lock,
|
|
|
|
* leave it on the checkpoint list and check status
|
|
|
|
* again to make sure it's clean.
|
|
|
|
*/
|
|
|
|
BUFFER_TRACE(bh, "queue");
|
|
|
|
get_bh(bh);
|
|
|
|
J_ASSERT_BH(bh, !buffer_jwrite(bh));
|
|
|
|
journal->j_chkpt_bhs[batch_count++] = bh;
|
|
|
|
transaction->t_chp_stats.cs_written++;
|
|
|
|
transaction->t_checkpoint_list = jh->b_cpnext;
|
2006-10-11 08:20:57 +00:00
|
|
|
}
|
2023-06-06 13:59:23 +00:00
|
|
|
|
2014-09-02 01:19:01 +00:00
|
|
|
if ((batch_count == JBD2_NR_BATCH) ||
|
2023-06-06 13:59:23 +00:00
|
|
|
need_resched() || spin_needbreak(&journal->j_list_lock) ||
|
|
|
|
jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
|
2014-09-02 01:19:01 +00:00
|
|
|
goto unlock_and_flush;
|
2006-10-11 08:20:57 +00:00
|
|
|
}
|
2014-09-02 01:19:01 +00:00
|
|
|
|
|
|
|
if (batch_count) {
|
|
|
|
unlock_and_flush:
|
|
|
|
spin_unlock(&journal->j_list_lock);
|
|
|
|
retry:
|
|
|
|
if (batch_count)
|
|
|
|
__flush_batch(journal, &batch_count);
|
|
|
|
spin_lock(&journal->j_list_lock);
|
|
|
|
goto restart;
|
|
|
|
}
|
|
|
|
|
2006-10-11 08:20:57 +00:00
|
|
|
out:
|
|
|
|
spin_unlock(&journal->j_list_lock);
|
2021-06-10 11:24:36 +00:00
|
|
|
result = jbd2_cleanup_journal_tail(journal);
|
2008-10-11 00:29:13 +00:00
|
|
|
|
|
|
|
return (result < 0) ? result : 0;
|
2006-10-11 08:20:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check the list of checkpoint transactions for the journal to see if
|
|
|
|
* we have already got rid of any since the last update of the log tail
|
|
|
|
* in the journal superblock. If so, we can instantly roll the
|
|
|
|
* superblock forward to remove those transactions from the log.
|
|
|
|
*
|
|
|
|
* Return <0 on error, 0 on success, 1 if there was nothing to clean up.
|
|
|
|
*
|
|
|
|
* Called with the journal lock held.
|
|
|
|
*
|
|
|
|
* This is the only part of the journaling code which really needs to be
|
|
|
|
* aware of transaction aborts. Checkpointing involves writing to the
|
|
|
|
* main filesystem area rather than to the journal, so it can proceed
|
2008-10-11 00:29:13 +00:00
|
|
|
* even in abort state, but we must not update the super block if
|
|
|
|
* checkpointing may have failed. Otherwise, we would lose some metadata
|
|
|
|
* buffers which should be written-back to the filesystem.
|
2006-10-11 08:20:57 +00:00
|
|
|
*/
|
|
|
|
|
2006-10-11 08:20:59 +00:00
|
|
|
int jbd2_cleanup_journal_tail(journal_t *journal)
|
2006-10-11 08:20:57 +00:00
|
|
|
{
|
|
|
|
tid_t first_tid;
|
2012-03-14 02:22:54 +00:00
|
|
|
unsigned long blocknr;
|
2006-10-11 08:20:57 +00:00
|
|
|
|
2008-10-11 00:29:13 +00:00
|
|
|
if (is_journal_aborted(journal))
|
2015-06-15 18:36:01 +00:00
|
|
|
return -EIO;
|
2008-10-11 00:29:13 +00:00
|
|
|
|
2012-03-14 02:22:54 +00:00
|
|
|
if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
|
2006-10-11 08:20:57 +00:00
|
|
|
return 1;
|
2012-03-14 02:22:54 +00:00
|
|
|
J_ASSERT(blocknr != 0);
|
2009-12-23 11:52:08 +00:00
|
|
|
|
|
|
|
/*
|
2012-03-14 02:22:54 +00:00
|
|
|
* We need to make sure that any blocks that were recently written out
|
|
|
|
* --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
|
|
|
|
* we drop the transactions from the journal. It's unlikely this will
|
|
|
|
* be necessary, especially with an appropriately sized journal, but we
|
|
|
|
* need this to guarantee correctness. Fortunately
|
|
|
|
* jbd2_cleanup_journal_tail() doesn't get called all that often.
|
2009-12-23 11:52:08 +00:00
|
|
|
*/
|
2012-03-14 02:22:54 +00:00
|
|
|
if (journal->j_flags & JBD2_BARRIER)
|
2021-01-26 14:52:35 +00:00
|
|
|
blkdev_issue_flush(journal->j_fs_dev);
|
2012-03-14 02:22:54 +00:00
|
|
|
|
2015-06-15 18:36:01 +00:00
|
|
|
return __jbd2_update_log_tail(journal, first_tid, blocknr);
|
2006-10-11 08:20:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Checkpoint list management */
|
|
|
|
|
2021-06-10 11:24:37 +00:00
|
|
|
/*
|
|
|
|
* journal_shrink_one_cp_list
|
|
|
|
*
|
2023-06-06 13:59:25 +00:00
|
|
|
* Find all the written-back checkpoint buffers in the given list
|
2021-06-10 11:24:37 +00:00
|
|
|
* and try to release them. If the whole transaction is released, set
|
|
|
|
* the 'released' parameter. Return the number of released checkpointed
|
|
|
|
* buffers.
|
|
|
|
*
|
|
|
|
* Called with j_list_lock held.
|
|
|
|
*/
|
|
|
|
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
|
2024-04-07 06:53:55 +00:00
|
|
|
enum jbd2_shrink_type type,
|
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 02:55:26 +00:00
|
|
|
bool *released)
|
2021-06-10 11:24:37 +00:00
|
|
|
{
|
|
|
|
struct journal_head *last_jh;
|
|
|
|
struct journal_head *next_jh = jh;
|
|
|
|
unsigned long nr_freed = 0;
|
|
|
|
int ret;
|
|
|
|
|
2023-06-06 13:59:25 +00:00
|
|
|
*released = false;
|
|
|
|
if (!jh)
|
2021-06-10 11:24:37 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
last_jh = jh->b_cpprev;
|
|
|
|
do {
|
|
|
|
jh = next_jh;
|
|
|
|
next_jh = jh->b_cpnext;
|
|
|
|
|
2024-04-07 06:53:55 +00:00
|
|
|
if (type == JBD2_SHRINK_DESTROY) {
|
2023-06-06 13:59:27 +00:00
|
|
|
ret = __jbd2_journal_remove_checkpoint(jh);
|
|
|
|
} else {
|
|
|
|
ret = jbd2_journal_try_remove_checkpoint(jh);
|
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 02:55:26 +00:00
|
|
|
if (ret < 0) {
|
2024-04-07 06:53:55 +00:00
|
|
|
if (type == JBD2_SHRINK_BUSY_SKIP)
|
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 02:55:26 +00:00
|
|
|
continue;
|
|
|
|
break;
|
|
|
|
}
|
2023-06-06 13:59:27 +00:00
|
|
|
}
|
2021-06-10 11:24:37 +00:00
|
|
|
|
|
|
|
nr_freed++;
|
|
|
|
if (ret) {
|
|
|
|
*released = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (need_resched())
|
|
|
|
break;
|
2023-06-06 13:59:25 +00:00
|
|
|
} while (jh != last_jh);
|
2021-06-10 11:24:37 +00:00
|
|
|
|
|
|
|
return nr_freed;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* jbd2_journal_shrink_checkpoint_list
|
|
|
|
*
|
|
|
|
* Find 'nr_to_scan' written-back checkpoint buffers in the journal
|
|
|
|
* and try to release them. Return the number of released checkpointed
|
|
|
|
* buffers.
|
|
|
|
*
|
|
|
|
* Called with j_list_lock held.
|
|
|
|
*/
|
|
|
|
unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
|
|
|
|
unsigned long *nr_to_scan)
|
|
|
|
{
|
|
|
|
transaction_t *transaction, *last_transaction, *next_transaction;
|
2023-06-06 13:59:25 +00:00
|
|
|
bool __maybe_unused released;
|
2021-06-10 11:24:37 +00:00
|
|
|
tid_t first_tid = 0, last_tid = 0, next_tid = 0;
|
|
|
|
tid_t tid = 0;
|
|
|
|
unsigned long nr_freed = 0;
|
2023-06-06 13:59:25 +00:00
|
|
|
unsigned long freed;
|
2024-07-24 16:11:17 +00:00
|
|
|
bool first_set = false;
|
2021-06-10 11:24:37 +00:00
|
|
|
|
|
|
|
again:
|
|
|
|
spin_lock(&journal->j_list_lock);
|
|
|
|
if (!journal->j_checkpoint_transactions) {
|
|
|
|
spin_unlock(&journal->j_list_lock);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get next shrink transaction, resume previous scan or start
|
|
|
|
* over again. If some others do checkpoint and drop transaction
|
|
|
|
* from the checkpoint list, we ignore saved j_shrink_transaction
|
|
|
|
* and start over unconditionally.
|
|
|
|
*/
|
|
|
|
if (journal->j_shrink_transaction)
|
|
|
|
transaction = journal->j_shrink_transaction;
|
|
|
|
else
|
|
|
|
transaction = journal->j_checkpoint_transactions;
|
|
|
|
|
2024-07-24 16:11:17 +00:00
|
|
|
if (!first_set) {
|
2021-06-10 11:24:37 +00:00
|
|
|
first_tid = transaction->t_tid;
|
2024-07-24 16:11:17 +00:00
|
|
|
first_set = true;
|
|
|
|
}
|
2021-06-10 11:24:37 +00:00
|
|
|
last_transaction = journal->j_checkpoint_transactions->t_cpprev;
|
|
|
|
next_transaction = transaction;
|
|
|
|
last_tid = last_transaction->t_tid;
|
|
|
|
do {
|
|
|
|
transaction = next_transaction;
|
|
|
|
next_transaction = transaction->t_cpnext;
|
|
|
|
tid = transaction->t_tid;
|
|
|
|
|
2023-06-06 13:59:25 +00:00
|
|
|
freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
|
2024-04-07 06:53:55 +00:00
|
|
|
JBD2_SHRINK_BUSY_SKIP, &released);
|
2023-06-06 13:59:25 +00:00
|
|
|
nr_freed += freed;
|
|
|
|
(*nr_to_scan) -= min(*nr_to_scan, freed);
|
2021-06-10 11:24:37 +00:00
|
|
|
if (*nr_to_scan == 0)
|
|
|
|
break;
|
|
|
|
if (need_resched() || spin_needbreak(&journal->j_list_lock))
|
|
|
|
break;
|
|
|
|
} while (transaction != last_transaction);
|
|
|
|
|
|
|
|
if (transaction != last_transaction) {
|
|
|
|
journal->j_shrink_transaction = next_transaction;
|
|
|
|
next_tid = next_transaction->t_tid;
|
|
|
|
} else {
|
|
|
|
journal->j_shrink_transaction = NULL;
|
|
|
|
next_tid = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_unlock(&journal->j_list_lock);
|
|
|
|
cond_resched();
|
|
|
|
|
2024-07-24 16:11:17 +00:00
|
|
|
if (*nr_to_scan && journal->j_shrink_transaction)
|
2021-06-10 11:24:37 +00:00
|
|
|
goto again;
|
|
|
|
out:
|
|
|
|
trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
|
2023-06-06 13:59:25 +00:00
|
|
|
nr_freed, next_tid);
|
2021-06-10 11:24:37 +00:00
|
|
|
|
|
|
|
return nr_freed;
|
|
|
|
}
|
|
|
|
|
2006-10-11 08:20:57 +00:00
|
|
|
/*
|
|
|
|
* journal_clean_checkpoint_list
|
|
|
|
*
|
|
|
|
* Find all the written-back checkpoint buffers in the journal and release them.
|
2024-04-07 06:53:55 +00:00
|
|
|
* If 'type' is JBD2_SHRINK_DESTROY, release all buffers unconditionally. If
|
|
|
|
* 'type' is JBD2_SHRINK_BUSY_STOP, will stop release buffers if encounters a
|
|
|
|
* busy buffer. To avoid wasting CPU cycles scanning the buffer list in some
|
|
|
|
* cases, don't pass JBD2_SHRINK_BUSY_SKIP 'type' for this function.
|
2006-10-11 08:20:57 +00:00
|
|
|
*
|
|
|
|
* Called with j_list_lock held.
|
|
|
|
*/
|
2024-04-07 06:53:54 +00:00
|
|
|
void __jbd2_journal_clean_checkpoint_list(journal_t *journal,
|
2024-04-07 06:53:55 +00:00
|
|
|
enum jbd2_shrink_type type)
|
2006-10-11 08:20:57 +00:00
|
|
|
{
|
|
|
|
transaction_t *transaction, *last_transaction, *next_transaction;
|
2023-06-06 13:59:25 +00:00
|
|
|
bool released;
|
2006-10-11 08:20:57 +00:00
|
|
|
|
2024-04-07 06:53:55 +00:00
|
|
|
WARN_ON_ONCE(type == JBD2_SHRINK_BUSY_SKIP);
|
2024-04-07 06:53:54 +00:00
|
|
|
|
2006-10-11 08:20:57 +00:00
|
|
|
transaction = journal->j_checkpoint_transactions;
|
|
|
|
if (!transaction)
|
2014-09-18 04:58:12 +00:00
|
|
|
return;
|
2006-10-11 08:20:57 +00:00
|
|
|
|
|
|
|
last_transaction = transaction->t_cpprev;
|
|
|
|
next_transaction = transaction;
|
|
|
|
do {
|
|
|
|
transaction = next_transaction;
|
|
|
|
next_transaction = transaction->t_cpnext;
|
2023-06-06 13:59:25 +00:00
|
|
|
journal_shrink_one_cp_list(transaction->t_checkpoint_list,
|
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 02:55:26 +00:00
|
|
|
type, &released);
|
2006-10-11 08:20:57 +00:00
|
|
|
/*
|
|
|
|
* This function only frees up some memory if possible so we
|
|
|
|
* dont have an obligation to finish processing. Bail out if
|
|
|
|
* preemption requested:
|
|
|
|
*/
|
2014-09-18 04:58:12 +00:00
|
|
|
if (need_resched())
|
|
|
|
return;
|
|
|
|
/*
|
|
|
|
* Stop scanning if we couldn't free the transaction. This
|
|
|
|
* avoids pointless scanning of transactions which still
|
|
|
|
* weren't checkpointed.
|
|
|
|
*/
|
2023-06-06 13:59:25 +00:00
|
|
|
if (!released)
|
2014-09-18 04:58:12 +00:00
|
|
|
return;
|
2006-10-11 08:20:57 +00:00
|
|
|
} while (transaction != last_transaction);
|
|
|
|
}
|
|
|
|
|
2015-07-28 18:57:14 +00:00
|
|
|
/*
|
|
|
|
* Remove buffers from all checkpoint lists as journal is aborted and we just
|
|
|
|
* need to free memory
|
|
|
|
*/
|
|
|
|
void jbd2_journal_destroy_checkpoint(journal_t *journal)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We loop because __jbd2_journal_clean_checkpoint_list() may abort
|
|
|
|
* early due to a need of rescheduling.
|
|
|
|
*/
|
|
|
|
while (1) {
|
|
|
|
spin_lock(&journal->j_list_lock);
|
|
|
|
if (!journal->j_checkpoint_transactions) {
|
|
|
|
spin_unlock(&journal->j_list_lock);
|
|
|
|
break;
|
|
|
|
}
|
2024-04-07 06:53:55 +00:00
|
|
|
__jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_DESTROY);
|
2015-07-28 18:57:14 +00:00
|
|
|
spin_unlock(&journal->j_list_lock);
|
|
|
|
cond_resched();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-10-11 08:20:57 +00:00
|
|
|
/*
|
|
|
|
* journal_remove_checkpoint: called after a buffer has been committed
|
|
|
|
* to disk (either by being write-back flushed to disk, or being
|
|
|
|
* committed to the log).
|
|
|
|
*
|
|
|
|
* We cannot safely clean a transaction out of the log until all of the
|
|
|
|
* buffer updates committed in that transaction have safely been stored
|
|
|
|
* elsewhere on disk. To achieve this, all of the buffers in a
|
|
|
|
* transaction need to be maintained on the transaction's checkpoint
|
|
|
|
* lists until they have been rewritten, at which point this function is
|
|
|
|
* called to remove the buffer from the existing transaction's
|
|
|
|
* checkpoint lists.
|
|
|
|
*
|
|
|
|
* The function returns 1 if it frees the transaction, 0 otherwise.
|
2011-06-13 19:38:22 +00:00
|
|
|
* The function can free jh and bh.
|
2006-10-11 08:20:57 +00:00
|
|
|
*
|
|
|
|
* This function is called with j_list_lock held.
|
|
|
|
*/
|
2006-10-11 08:20:59 +00:00
|
|
|
int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
|
2006-10-11 08:20:57 +00:00
|
|
|
{
|
2009-09-30 04:32:06 +00:00
|
|
|
struct transaction_chp_stats_s *stats;
|
2006-10-11 08:20:57 +00:00
|
|
|
transaction_t *transaction;
|
|
|
|
journal_t *journal;
|
|
|
|
|
|
|
|
JBUFFER_TRACE(jh, "entry");
|
|
|
|
|
2021-06-10 11:24:33 +00:00
|
|
|
transaction = jh->b_cp_transaction;
|
|
|
|
if (!transaction) {
|
2006-10-11 08:20:57 +00:00
|
|
|
JBUFFER_TRACE(jh, "not on transaction");
|
2021-06-10 11:24:33 +00:00
|
|
|
return 0;
|
2006-10-11 08:20:57 +00:00
|
|
|
}
|
|
|
|
journal = transaction->t_journal;
|
|
|
|
|
2011-06-13 19:38:22 +00:00
|
|
|
JBUFFER_TRACE(jh, "removing from transaction");
|
2021-06-10 11:24:34 +00:00
|
|
|
|
2006-10-11 08:20:57 +00:00
|
|
|
__buffer_unlink(jh);
|
|
|
|
jh->b_cp_transaction = NULL;
|
2021-07-02 22:05:03 +00:00
|
|
|
percpu_counter_dec(&journal->j_checkpoint_jh_count);
|
2011-06-13 19:38:22 +00:00
|
|
|
jbd2_journal_put_journal_head(jh);
|
2006-10-11 08:20:57 +00:00
|
|
|
|
2021-06-10 11:24:33 +00:00
|
|
|
/* Is this transaction empty? */
|
2023-06-06 13:59:24 +00:00
|
|
|
if (transaction->t_checkpoint_list)
|
2021-06-10 11:24:33 +00:00
|
|
|
return 0;
|
2006-10-11 08:20:57 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* There is one special case to worry about: if we have just pulled the
|
2008-01-29 04:58:27 +00:00
|
|
|
* buffer off a running or committing transaction's checkpoing list,
|
|
|
|
* then even if the checkpoint list is empty, the transaction obviously
|
|
|
|
* cannot be dropped!
|
2006-10-11 08:20:57 +00:00
|
|
|
*
|
2008-01-29 04:58:27 +00:00
|
|
|
* The locking here around t_state is a bit sleazy.
|
2006-10-11 08:20:59 +00:00
|
|
|
* See the comment at the end of jbd2_journal_commit_transaction().
|
2006-10-11 08:20:57 +00:00
|
|
|
*/
|
2011-06-13 19:38:22 +00:00
|
|
|
if (transaction->t_state != T_FINISHED)
|
2021-06-10 11:24:33 +00:00
|
|
|
return 0;
|
2006-10-11 08:20:57 +00:00
|
|
|
|
2021-06-10 11:24:33 +00:00
|
|
|
/*
|
|
|
|
* OK, that was the last buffer for the transaction, we can now
|
|
|
|
* safely remove this transaction from the log.
|
|
|
|
*/
|
2009-09-30 04:32:06 +00:00
|
|
|
stats = &transaction->t_chp_stats;
|
|
|
|
if (stats->cs_chp_time)
|
|
|
|
stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
|
|
|
|
jiffies);
|
|
|
|
trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev,
|
|
|
|
transaction->t_tid, stats);
|
2006-10-11 08:20:57 +00:00
|
|
|
|
2006-10-11 08:20:59 +00:00
|
|
|
__jbd2_journal_drop_transaction(journal, transaction);
|
2012-02-20 22:53:02 +00:00
|
|
|
jbd2_journal_free_transaction(transaction);
|
2021-06-10 11:24:33 +00:00
|
|
|
return 1;
|
2006-10-11 08:20:57 +00:00
|
|
|
}
|
|
|
|
|
2023-06-06 13:59:27 +00:00
|
|
|
/*
|
|
|
|
* Check the checkpoint buffer and try to remove it from the checkpoint
|
|
|
|
* list if it's clean. Returns -EBUSY if it is not clean, returns 1 if
|
|
|
|
* it frees the transaction, 0 otherwise.
|
|
|
|
*
|
|
|
|
* This function is called with j_list_lock held.
|
|
|
|
*/
|
|
|
|
int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
|
|
|
|
{
|
|
|
|
struct buffer_head *bh = jh2bh(jh);
|
|
|
|
|
2023-07-14 02:55:27 +00:00
|
|
|
if (jh->b_transaction)
|
|
|
|
return -EBUSY;
|
2023-06-06 13:59:27 +00:00
|
|
|
if (!trylock_buffer(bh))
|
|
|
|
return -EBUSY;
|
|
|
|
if (buffer_dirty(bh)) {
|
|
|
|
unlock_buffer(bh);
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
unlock_buffer(bh);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Buffer is clean and the IO has finished (we held the buffer
|
|
|
|
* lock) so the checkpoint is done. We can safely remove the
|
|
|
|
* buffer from this transaction.
|
|
|
|
*/
|
|
|
|
JBUFFER_TRACE(jh, "remove from checkpoint list");
|
|
|
|
return __jbd2_journal_remove_checkpoint(jh);
|
|
|
|
}
|
|
|
|
|
2006-10-11 08:20:57 +00:00
|
|
|
/*
|
|
|
|
* journal_insert_checkpoint: put a committed buffer onto a checkpoint
|
|
|
|
* list so that we know when it is safe to clean the transaction out of
|
|
|
|
* the log.
|
|
|
|
*
|
|
|
|
* Called with the journal locked.
|
|
|
|
* Called with j_list_lock held.
|
|
|
|
*/
|
2006-10-11 08:20:59 +00:00
|
|
|
void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
|
2006-10-11 08:20:57 +00:00
|
|
|
transaction_t *transaction)
|
|
|
|
{
|
|
|
|
JBUFFER_TRACE(jh, "entry");
|
|
|
|
J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
|
|
|
|
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
|
|
|
|
|
2011-06-13 19:38:22 +00:00
|
|
|
/* Get reference for checkpointing transaction */
|
|
|
|
jbd2_journal_grab_journal_head(jh2bh(jh));
|
2006-10-11 08:20:57 +00:00
|
|
|
jh->b_cp_transaction = transaction;
|
|
|
|
|
|
|
|
if (!transaction->t_checkpoint_list) {
|
|
|
|
jh->b_cpnext = jh->b_cpprev = jh;
|
|
|
|
} else {
|
|
|
|
jh->b_cpnext = transaction->t_checkpoint_list;
|
|
|
|
jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
|
|
|
|
jh->b_cpprev->b_cpnext = jh;
|
|
|
|
jh->b_cpnext->b_cpprev = jh;
|
|
|
|
}
|
|
|
|
transaction->t_checkpoint_list = jh;
|
2021-07-02 22:05:03 +00:00
|
|
|
percpu_counter_inc(&transaction->t_journal->j_checkpoint_jh_count);
|
2006-10-11 08:20:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We've finished with this transaction structure: adios...
|
|
|
|
*
|
|
|
|
* The transaction must have no links except for the checkpoint by this
|
|
|
|
* point.
|
|
|
|
*
|
|
|
|
* Called with the journal locked.
|
|
|
|
* Called with j_list_lock held.
|
|
|
|
*/
|
|
|
|
|
2006-10-11 08:20:59 +00:00
|
|
|
void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
|
2006-10-11 08:20:57 +00:00
|
|
|
{
|
|
|
|
assert_spin_locked(&journal->j_list_lock);
|
2021-06-10 11:24:37 +00:00
|
|
|
|
|
|
|
journal->j_shrink_transaction = NULL;
|
2006-10-11 08:20:57 +00:00
|
|
|
if (transaction->t_cpnext) {
|
|
|
|
transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
|
|
|
|
transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
|
|
|
|
if (journal->j_checkpoint_transactions == transaction)
|
|
|
|
journal->j_checkpoint_transactions =
|
|
|
|
transaction->t_cpnext;
|
|
|
|
if (journal->j_checkpoint_transactions == transaction)
|
|
|
|
journal->j_checkpoint_transactions = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
J_ASSERT(transaction->t_state == T_FINISHED);
|
|
|
|
J_ASSERT(transaction->t_buffers == NULL);
|
|
|
|
J_ASSERT(transaction->t_forget == NULL);
|
|
|
|
J_ASSERT(transaction->t_shadow_list == NULL);
|
|
|
|
J_ASSERT(transaction->t_checkpoint_list == NULL);
|
2010-08-02 12:43:25 +00:00
|
|
|
J_ASSERT(atomic_read(&transaction->t_updates) == 0);
|
2006-10-11 08:20:57 +00:00
|
|
|
J_ASSERT(journal->j_committing_transaction != transaction);
|
|
|
|
J_ASSERT(journal->j_running_transaction != transaction);
|
|
|
|
|
2012-02-20 22:53:01 +00:00
|
|
|
trace_jbd2_drop_transaction(journal, transaction);
|
|
|
|
|
2022-06-08 11:23:48 +00:00
|
|
|
jbd2_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
|
2006-10-11 08:20:57 +00:00
|
|
|
}
|