for-6.6-rc1-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmT/hwAACgkQxWXV+ddt
 WDsn7hAAngwEMKEAH9Jvu/BtHgRYcAdsGh5Mxw34aQf1+DAaH03GGsZjN6hfHYo4
 FMsnnvoZD5VPfuaFaQVd+mS9mRzikm503W7KfZFAPAQTOjz50RZbohLnZWa3eFbI
 46OcpoHusxwoYosEmIAt+dcw/gDlT9fpj+W11dKYtwOEjCqGA/OeKoVenfk38hVJ
 r+XhLwZFf4dPIqE3Ht26UtJk87Xs2X0/LQxOX3vM1MZ+l38N4dyo7TQnwfTHlQNw
 AK9sK6vp3rpRR96rvTV1dWr9lnmE7wky+Vh36DN/jxpzbW7Wx8IVoobBpcsO4Tyk
 Vw/rdjB7g7LfBmjLFhWvvQ73jv0WjIUUzXH17RuxOeyAQJ9tXFztVMh+QoVVC/Ka
 NxwA5uqyJKR7DIA+kLL06abUnASUVgP6Krdv9Fk7rYCKWluWk1k9ls9XaFFhytvg
 eeno/UB0px1rwps5P5zfaSXLIXEl53Luy5rFhTMCCNQfXyo+Qe6PJyTafR3E0uP8
 aXJV1lPG+o7qi9Vwg+20yy//1sE5gR0dLrcTaup3/20RK6eljZ/bNSkl3GJR9mlS
 YF+J/Ccia06y8Qo0xaeCofxkoI3J/PK6KPOTt8yZDgYoetYgHhrfBRO0I7ZU4Edq
 10512hAeskzPt6+5348+/jOEENASffXKP3FJSdDEzWd33vtlaHE=
 =mHTa
 -----END PGP SIGNATURE-----

Merge tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - several fixes for handling directory item (inserting, removing,
   iteration, error handling)

 - fix transaction commit stalls when auto relocation is running and
   blocks other tasks that want to commit

 - fix a build error when DEBUG is enabled

 - fix lockdep warning in inode number lookup ioctl

 - fix race when finishing block group creation

 - remove link to obsolete wiki in several files

* tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  MAINTAINERS: remove links to obsolete btrfs.wiki.kernel.org
  btrfs: assert delayed node locked when removing delayed item
  btrfs: remove BUG() after failure to insert delayed dir index item
  btrfs: improve error message after failure to add delayed dir index item
  btrfs: fix a compilation error if DEBUG is defined in btree_dirty_folio
  btrfs: check for BTRFS_FS_ERROR in pending ordered assert
  btrfs: fix lockdep splat and potential deadlock after failure running delayed items
  btrfs: do not block starts waiting on previous transaction commit
  btrfs: release path before inode lookup during the ino lookup ioctl
  btrfs: fix race between finishing block group creation and its item update
This commit is contained in:
Linus Torvalds 2023-09-12 11:28:00 -07:00
commit 3669558bdf
11 changed files with 128 additions and 66 deletions

View File

@ -37,7 +37,6 @@ For more information please refer to the documentation site or wiki
https://btrfs.readthedocs.io https://btrfs.readthedocs.io
https://btrfs.wiki.kernel.org
that maintains information about administration tasks, frequently asked that maintains information about administration tasks, frequently asked
questions, use cases, mount options, comprehensible changelogs, features, questions, use cases, mount options, comprehensible changelogs, features,

View File

@ -4378,7 +4378,6 @@ M: David Sterba <dsterba@suse.com>
L: linux-btrfs@vger.kernel.org L: linux-btrfs@vger.kernel.org
S: Maintained S: Maintained
W: https://btrfs.readthedocs.io W: https://btrfs.readthedocs.io
W: https://btrfs.wiki.kernel.org/
Q: https://patchwork.kernel.org/project/linux-btrfs/list/ Q: https://patchwork.kernel.org/project/linux-btrfs/list/
C: irc://irc.libera.chat/btrfs C: irc://irc.libera.chat/btrfs
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git

View File

@ -31,7 +31,7 @@ config BTRFS_FS
continue to be mountable and usable by newer kernels. continue to be mountable and usable by newer kernels.
For more information, please see the web pages at For more information, please see the web pages at
http://btrfs.wiki.kernel.org. https://btrfs.readthedocs.io
To compile this file system support as a module, choose M here. The To compile this file system support as a module, choose M here. The
module will be called btrfs. module will be called btrfs.

View File

@ -3028,8 +3028,16 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
fail: fail:
btrfs_release_path(path); btrfs_release_path(path);
/* We didn't update the block group item, need to revert @commit_used. */ /*
if (ret < 0) { * We didn't update the block group item, need to revert commit_used
* unless the block group item didn't exist yet - this is to prevent a
* race with a concurrent insertion of the block group item, with
* insert_block_group_item(), that happened just after we attempted to
* update. In that case we would reset commit_used to 0 just after the
* insertion set it to a value greater than 0 - if the block group later
* becomes with 0 used bytes, we would incorrectly skip its update.
*/
if (ret < 0 && ret != -ENOENT) {
spin_lock(&cache->lock); spin_lock(&cache->lock);
cache->commit_used = old_commit_used; cache->commit_used = old_commit_used;
spin_unlock(&cache->lock); spin_unlock(&cache->lock);

View File

@ -412,6 +412,7 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
{ {
struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node;
struct rb_root_cached *root; struct rb_root_cached *root;
struct btrfs_delayed_root *delayed_root; struct btrfs_delayed_root *delayed_root;
@ -419,18 +420,21 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
if (RB_EMPTY_NODE(&delayed_item->rb_node)) if (RB_EMPTY_NODE(&delayed_item->rb_node))
return; return;
delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root; /* If it's in a rbtree, then we need to have delayed node locked. */
lockdep_assert_held(&delayed_node->mutex);
delayed_root = delayed_node->root->fs_info->delayed_root;
BUG_ON(!delayed_root); BUG_ON(!delayed_root);
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM) if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
root = &delayed_item->delayed_node->ins_root; root = &delayed_node->ins_root;
else else
root = &delayed_item->delayed_node->del_root; root = &delayed_node->del_root;
rb_erase_cached(&delayed_item->rb_node, root); rb_erase_cached(&delayed_item->rb_node, root);
RB_CLEAR_NODE(&delayed_item->rb_node); RB_CLEAR_NODE(&delayed_item->rb_node);
delayed_item->delayed_node->count--; delayed_node->count--;
finish_one_item(delayed_root); finish_one_item(delayed_root);
} }
@ -1153,20 +1157,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
ret = __btrfs_commit_inode_delayed_items(trans, path, ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node); curr_node);
if (ret) { if (ret) {
btrfs_release_delayed_node(curr_node);
curr_node = NULL;
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
break; break;
} }
prev_node = curr_node; prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node); curr_node = btrfs_next_delayed_node(curr_node);
/*
* See the comment below about releasing path before releasing
* node. If the commit of delayed items was successful the path
* should always be released, but in case of an error, it may
* point to locked extent buffers (a leaf at the very least).
*/
ASSERT(path->nodes[0] == NULL);
btrfs_release_delayed_node(prev_node); btrfs_release_delayed_node(prev_node);
} }
/*
* Release the path to avoid a potential deadlock and lockdep splat when
* releasing the delayed node, as that requires taking the delayed node's
* mutex. If another task starts running delayed items before we take
* the mutex, it will first lock the mutex and then it may try to lock
* the same btree path (leaf).
*/
btrfs_free_path(path);
if (curr_node) if (curr_node)
btrfs_release_delayed_node(curr_node); btrfs_release_delayed_node(curr_node);
btrfs_free_path(path);
trans->block_rsv = block_rsv; trans->block_rsv = block_rsv;
return ret; return ret;
@ -1413,7 +1430,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH); btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
} }
/* Will return 0 or -ENOMEM */ static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
return;
/*
* Adding the new dir index item does not require touching another
* leaf, so we can release 1 unit of metadata that was previously
* reserved when starting the transaction. This applies only to
* the case where we had a transaction start and excludes the
* transaction join case (when replaying log trees).
*/
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, bytes, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
ASSERT(trans->bytes_reserved >= bytes);
trans->bytes_reserved -= bytes;
}
/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
const char *name, int name_len, const char *name, int name_len,
struct btrfs_inode *dir, struct btrfs_inode *dir,
@ -1455,6 +1494,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_lock(&delayed_node->mutex); mutex_lock(&delayed_node->mutex);
/*
* First attempt to insert the delayed item. This is to make the error
* handling path simpler in case we fail (-EEXIST). There's no risk of
* any other task coming in and running the delayed item before we do
* the metadata space reservation below, because we are holding the
* delayed node's mutex and that mutex must also be locked before the
* node's delayed items can be run.
*/
ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
name_len, name, index, btrfs_root_id(delayed_node->root),
delayed_node->inode_id, dir->index_cnt,
delayed_node->index_cnt, ret);
btrfs_release_delayed_item(delayed_item);
btrfs_release_dir_index_item_space(trans);
mutex_unlock(&delayed_node->mutex);
goto release_node;
}
if (delayed_node->index_item_leaves == 0 || if (delayed_node->index_item_leaves == 0 ||
delayed_node->curr_index_batch_size + data_len > leaf_data_size) { delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
delayed_node->curr_index_batch_size = data_len; delayed_node->curr_index_batch_size = data_len;
@ -1472,36 +1532,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
* impossible. * impossible.
*/ */
if (WARN_ON(ret)) { if (WARN_ON(ret)) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_item(delayed_item); btrfs_release_delayed_item(delayed_item);
mutex_unlock(&delayed_node->mutex);
goto release_node; goto release_node;
} }
delayed_node->index_item_leaves++; delayed_node->index_item_leaves++;
} else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { } else {
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); btrfs_release_dir_index_item_space(trans);
/*
* Adding the new dir index item does not require touching another
* leaf, so we can release 1 unit of metadata that was previously
* reserved when starting the transaction. This applies only to
* the case where we had a transaction start and excludes the
* transaction join case (when replaying log trees).
*/
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, bytes, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
ASSERT(trans->bytes_reserved >= bytes);
trans->bytes_reserved -= bytes;
}
ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
"err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
name_len, name, delayed_node->root->root_key.objectid,
delayed_node->inode_id, ret);
BUG();
} }
mutex_unlock(&delayed_node->mutex); mutex_unlock(&delayed_node->mutex);

View File

@ -520,6 +520,7 @@ static bool btree_dirty_folio(struct address_space *mapping,
struct folio *folio) struct folio *folio)
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
struct btrfs_subpage_info *spi = fs_info->subpage_info;
struct btrfs_subpage *subpage; struct btrfs_subpage *subpage;
struct extent_buffer *eb; struct extent_buffer *eb;
int cur_bit = 0; int cur_bit = 0;
@ -533,18 +534,19 @@ static bool btree_dirty_folio(struct address_space *mapping,
btrfs_assert_tree_write_locked(eb); btrfs_assert_tree_write_locked(eb);
return filemap_dirty_folio(mapping, folio); return filemap_dirty_folio(mapping, folio);
} }
ASSERT(spi);
subpage = folio_get_private(folio); subpage = folio_get_private(folio);
ASSERT(subpage->dirty_bitmap); for (cur_bit = spi->dirty_offset;
while (cur_bit < BTRFS_SUBPAGE_BITMAP_SIZE) { cur_bit < spi->dirty_offset + spi->bitmap_nr_bits;
cur_bit++) {
unsigned long flags; unsigned long flags;
u64 cur; u64 cur;
u16 tmp = (1 << cur_bit);
spin_lock_irqsave(&subpage->lock, flags); spin_lock_irqsave(&subpage->lock, flags);
if (!(tmp & subpage->dirty_bitmap)) { if (!test_bit(cur_bit, subpage->bitmaps)) {
spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock_irqrestore(&subpage->lock, flags);
cur_bit++;
continue; continue;
} }
spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock_irqrestore(&subpage->lock, flags);
@ -557,7 +559,7 @@ static bool btree_dirty_folio(struct address_space *mapping,
btrfs_assert_tree_write_locked(eb); btrfs_assert_tree_write_locked(eb);
free_extent_buffer(eb); free_extent_buffer(eb);
cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits); cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits) - 1;
} }
return filemap_dirty_folio(mapping, folio); return filemap_dirty_folio(mapping, folio);
} }
@ -1547,7 +1549,7 @@ static int transaction_kthread(void *arg)
delta = ktime_get_seconds() - cur->start_time; delta = ktime_get_seconds() - cur->start_time;
if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) && if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) &&
cur->state < TRANS_STATE_COMMIT_START && cur->state < TRANS_STATE_COMMIT_PREP &&
delta < fs_info->commit_interval) { delta < fs_info->commit_interval) {
spin_unlock(&fs_info->trans_lock); spin_unlock(&fs_info->trans_lock);
delay -= msecs_to_jiffies((delta - 1) * 1000); delay -= msecs_to_jiffies((delta - 1) * 1000);
@ -2682,8 +2684,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters); btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered); btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent); btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start, btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_prep,
BTRFS_LOCKDEP_TRANS_COMMIT_START); BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked, btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
BTRFS_LOCKDEP_TRANS_UNBLOCKED); BTRFS_LOCKDEP_TRANS_UNBLOCKED);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed, btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
@ -4870,7 +4872,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
while (!list_empty(&fs_info->trans_list)) { while (!list_empty(&fs_info->trans_list)) {
t = list_first_entry(&fs_info->trans_list, t = list_first_entry(&fs_info->trans_list,
struct btrfs_transaction, list); struct btrfs_transaction, list);
if (t->state >= TRANS_STATE_COMMIT_START) { if (t->state >= TRANS_STATE_COMMIT_PREP) {
refcount_inc(&t->use_count); refcount_inc(&t->use_count);
spin_unlock(&fs_info->trans_lock); spin_unlock(&fs_info->trans_lock);
btrfs_wait_for_commit(fs_info, t->transid); btrfs_wait_for_commit(fs_info, t->transid);

View File

@ -1958,6 +1958,13 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
goto out_put; goto out_put;
} }
/*
* We don't need the path anymore, so release it and
* avoid deadlocks and lockdep warnings in case
* btrfs_iget() needs to lookup the inode from its root
* btree and lock the same leaf.
*/
btrfs_release_path(path);
temp_inode = btrfs_iget(sb, key2.objectid, root); temp_inode = btrfs_iget(sb, key2.objectid, root);
if (IS_ERR(temp_inode)) { if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode); ret = PTR_ERR(temp_inode);
@ -1978,7 +1985,6 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
goto out_put; goto out_put;
} }
btrfs_release_path(path);
key.objectid = key.offset; key.objectid = key.offset;
key.offset = (u64)-1; key.offset = (u64)-1;
dirid = key.objectid; dirid = key.objectid;

View File

@ -79,7 +79,7 @@ enum btrfs_lock_nesting {
}; };
enum btrfs_lockdep_trans_states { enum btrfs_lockdep_trans_states {
BTRFS_LOCKDEP_TRANS_COMMIT_START, BTRFS_LOCKDEP_TRANS_COMMIT_PREP,
BTRFS_LOCKDEP_TRANS_UNBLOCKED, BTRFS_LOCKDEP_TRANS_UNBLOCKED,
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED,
BTRFS_LOCKDEP_TRANS_COMPLETED, BTRFS_LOCKDEP_TRANS_COMPLETED,

View File

@ -639,7 +639,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
refcount_inc(&trans->use_count); refcount_inc(&trans->use_count);
spin_unlock(&fs_info->trans_lock); spin_unlock(&fs_info->trans_lock);
ASSERT(trans); ASSERT(trans || BTRFS_FS_ERROR(fs_info));
if (trans) { if (trans) {
if (atomic_dec_and_test(&trans->pending_ordered)) if (atomic_dec_and_test(&trans->pending_ordered))
wake_up(&trans->pending_wait); wake_up(&trans->pending_wait);

View File

@ -56,12 +56,17 @@ static struct kmem_cache *btrfs_trans_handle_cachep;
* | Call btrfs_commit_transaction() on any trans handle attached to * | Call btrfs_commit_transaction() on any trans handle attached to
* | transaction N * | transaction N
* V * V
* Transaction N [[TRANS_STATE_COMMIT_PREP]]
* |
* | If there are simultaneous calls to btrfs_commit_transaction() one will win
* | the race and the rest will wait for the winner to commit the transaction.
* |
* | The winner will wait for previous running transaction to completely finish
* | if there is one.
* |
* Transaction N [[TRANS_STATE_COMMIT_START]] * Transaction N [[TRANS_STATE_COMMIT_START]]
* | * |
* | Will wait for previous running transaction to completely finish if there * | Then one of the following happens:
* | is one
* |
* | Then one of the following happes:
* | - Wait for all other trans handle holders to release. * | - Wait for all other trans handle holders to release.
* | The btrfs_commit_transaction() caller will do the commit work. * | The btrfs_commit_transaction() caller will do the commit work.
* | - Wait for current transaction to be committed by others. * | - Wait for current transaction to be committed by others.
@ -112,6 +117,7 @@ static struct kmem_cache *btrfs_trans_handle_cachep;
*/ */
static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
[TRANS_STATE_RUNNING] = 0U, [TRANS_STATE_RUNNING] = 0U,
[TRANS_STATE_COMMIT_PREP] = 0U,
[TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
[TRANS_STATE_COMMIT_DOING] = (__TRANS_START | [TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
__TRANS_ATTACH | __TRANS_ATTACH |
@ -1982,7 +1988,7 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
* Wait for the current transaction commit to start and block * Wait for the current transaction commit to start and block
* subsequent transaction joins * subsequent transaction joins
*/ */
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
wait_event(fs_info->transaction_blocked_wait, wait_event(fs_info->transaction_blocked_wait,
cur_trans->state >= TRANS_STATE_COMMIT_START || cur_trans->state >= TRANS_STATE_COMMIT_START ||
TRANS_ABORTED(cur_trans)); TRANS_ABORTED(cur_trans));
@ -2129,7 +2135,7 @@ static void add_pending_snapshot(struct btrfs_trans_handle *trans)
return; return;
lockdep_assert_held(&trans->fs_info->trans_lock); lockdep_assert_held(&trans->fs_info->trans_lock);
ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START); ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_PREP);
list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots); list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
} }
@ -2153,7 +2159,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
ktime_t interval; ktime_t interval;
ASSERT(refcount_read(&trans->use_count) == 1); ASSERT(refcount_read(&trans->use_count) == 1);
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags); clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
@ -2213,7 +2219,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
} }
spin_lock(&fs_info->trans_lock); spin_lock(&fs_info->trans_lock);
if (cur_trans->state >= TRANS_STATE_COMMIT_START) { if (cur_trans->state >= TRANS_STATE_COMMIT_PREP) {
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED; enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
add_pending_snapshot(trans); add_pending_snapshot(trans);
@ -2225,7 +2231,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
want_state = TRANS_STATE_SUPER_COMMITTED; want_state = TRANS_STATE_SUPER_COMMITTED;
btrfs_trans_state_lockdep_release(fs_info, btrfs_trans_state_lockdep_release(fs_info,
BTRFS_LOCKDEP_TRANS_COMMIT_START); BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
ret = btrfs_end_transaction(trans); ret = btrfs_end_transaction(trans);
wait_for_commit(cur_trans, want_state); wait_for_commit(cur_trans, want_state);
@ -2237,9 +2243,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
return ret; return ret;
} }
cur_trans->state = TRANS_STATE_COMMIT_START; cur_trans->state = TRANS_STATE_COMMIT_PREP;
wake_up(&fs_info->transaction_blocked_wait); wake_up(&fs_info->transaction_blocked_wait);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
if (cur_trans->list.prev != &fs_info->trans_list) { if (cur_trans->list.prev != &fs_info->trans_list) {
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED; enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
@ -2260,11 +2266,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
btrfs_put_transaction(prev_trans); btrfs_put_transaction(prev_trans);
if (ret) if (ret)
goto lockdep_release; goto lockdep_release;
} else { spin_lock(&fs_info->trans_lock);
spin_unlock(&fs_info->trans_lock);
} }
} else { } else {
spin_unlock(&fs_info->trans_lock);
/* /*
* The previous transaction was aborted and was already removed * The previous transaction was aborted and was already removed
* from the list of transactions at fs_info->trans_list. So we * from the list of transactions at fs_info->trans_list. So we
@ -2272,11 +2276,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* corrupt state (pointing to trees with unwritten nodes/leafs). * corrupt state (pointing to trees with unwritten nodes/leafs).
*/ */
if (BTRFS_FS_ERROR(fs_info)) { if (BTRFS_FS_ERROR(fs_info)) {
spin_unlock(&fs_info->trans_lock);
ret = -EROFS; ret = -EROFS;
goto lockdep_release; goto lockdep_release;
} }
} }
cur_trans->state = TRANS_STATE_COMMIT_START;
wake_up(&fs_info->transaction_blocked_wait);
spin_unlock(&fs_info->trans_lock);
/* /*
* Get the time spent on the work done by the commit thread and not * Get the time spent on the work done by the commit thread and not
* the time spent waiting on a previous commit * the time spent waiting on a previous commit
@ -2586,7 +2595,7 @@ lockdep_release:
goto cleanup_transaction; goto cleanup_transaction;
lockdep_trans_commit_start_release: lockdep_trans_commit_start_release:
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
btrfs_end_transaction(trans); btrfs_end_transaction(trans);
return ret; return ret;
} }

View File

@ -14,6 +14,7 @@
enum btrfs_trans_state { enum btrfs_trans_state {
TRANS_STATE_RUNNING, TRANS_STATE_RUNNING,
TRANS_STATE_COMMIT_PREP,
TRANS_STATE_COMMIT_START, TRANS_STATE_COMMIT_START,
TRANS_STATE_COMMIT_DOING, TRANS_STATE_COMMIT_DOING,
TRANS_STATE_UNBLOCKED, TRANS_STATE_UNBLOCKED,