mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-09 06:33:34 +00:00
e34820f984
We had a problem with io hung because it was waiting for c->root to
release the lock.
crash> cache_set.root -l cache_set.list ffffa03fde4c0050
root = 0xffff802ef454c800
crash> btree -o 0xffff802ef454c800 | grep rw_semaphore
[ffff802ef454c858] struct rw_semaphore lock;
crash> struct rw_semaphore ffff802ef454c858
struct rw_semaphore {
count = {
counter = -4294967297
},
wait_list = {
next = 0xffff00006786fc28,
prev = 0xffff00005d0efac8
},
wait_lock = {
raw_lock = {
{
val = {
counter = 0
},
{
locked = 0 '\000',
pending = 0 '\000'
},
{
locked_pending = 0,
tail = 0
}
}
}
},
osq = {
tail = {
counter = 0
}
},
owner = 0xffffa03fdc586603
}
The "counter = -4294967297" means that lock count is -1 and a write lock
is being attempted. Then, we found that there is a btree with a counter
of 1 in btree_cache_freeable.
crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache
[ffffa03fde4c1140] struct list_head btree_cache;
[ffffa03fde4c1150] struct list_head btree_cache_freeable;
[ffffa03fde4c1160] struct list_head btree_cache_freed;
[ffffa03fde4c1170] unsigned int btree_cache_used;
[ffffa03fde4c1178] wait_queue_head_t btree_cache_wait;
[ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock;
crash> list -H ffffa03fde4c1140|wc -l
973
crash> list -H ffffa03fde4c1150|wc -l
1123
crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050
btree_cache_used = 2097
crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^ lock = {" > btree_cache.txt
crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^ lock = {" > btree_cache_freeable.txt
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd
/var/crash/127.0.0.1-2023-08-04-16:40:28
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0"
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0"
counter = 1
We found that this is a bug in bch_sectors_dirty_init() when locking c->root:
(1). Thread X has locked c->root(A) write.
(2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A).
(3). Thread X bch_btree_set_root() changes c->root from A to B.
(4). Thread X releases the lock(c->root A).
(5). Thread Y successfully locks c->root(A).
(6). Thread Y releases the lock(c->root B).
down_write locked ---(1)----------------------┐
| |
| down_read waiting ---(2)----┐ |
| | ┌-------------┐ ┌-------------┐
bch_btree_set_root ===(3)========>> | c->root A | | c->root B |
| | └-------------┘ └-------------┘
up_write ---(4)---------------------┘ | |
| | |
down_read locked ---(5)-----------┘ |
| |
up_read ---(6)-----------------------------┘
Since c->root may change, the correct steps to lock c->root should be
the same as bch_root_usage(), compare after locking.
static unsigned int bch_root_usage(struct cache_set *c)
{
unsigned int bytes = 0;
struct bkey *k;
struct btree *b;
struct btree_iter iter;
goto lock_root;
do {
rw_unlock(false, b);
lock_root:
b = c->root;
rw_lock(false, b, b->level);
} while (b != c->root);
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
bytes += bkey_bytes(k);
rw_unlock(false, b);
return (bytes * 100) / btree_bytes(c);
}
Fixes:
|
||
---|---|---|
.. | ||
bcache | ||
persistent-data | ||
dm-audit.c | ||
dm-audit.h | ||
dm-bio-prison-v1.c | ||
dm-bio-prison-v1.h | ||
dm-bio-prison-v2.c | ||
dm-bio-prison-v2.h | ||
dm-bio-record.h | ||
dm-bufio.c | ||
dm-builtin.c | ||
dm-cache-background-tracker.c | ||
dm-cache-background-tracker.h | ||
dm-cache-block-types.h | ||
dm-cache-metadata.c | ||
dm-cache-metadata.h | ||
dm-cache-policy-internal.h | ||
dm-cache-policy-smq.c | ||
dm-cache-policy.c | ||
dm-cache-policy.h | ||
dm-cache-target.c | ||
dm-clone-metadata.c | ||
dm-clone-metadata.h | ||
dm-clone-target.c | ||
dm-core.h | ||
dm-crypt.c | ||
dm-delay.c | ||
dm-dust.c | ||
dm-ebs-target.c | ||
dm-era-target.c | ||
dm-exception-store.c | ||
dm-exception-store.h | ||
dm-flakey.c | ||
dm-ima.c | ||
dm-ima.h | ||
dm-init.c | ||
dm-integrity.c | ||
dm-io-rewind.c | ||
dm-io-tracker.h | ||
dm-io.c | ||
dm-ioctl.c | ||
dm-kcopyd.c | ||
dm-linear.c | ||
dm-log-userspace-base.c | ||
dm-log-userspace-transfer.c | ||
dm-log-userspace-transfer.h | ||
dm-log-writes.c | ||
dm-log.c | ||
dm-mpath.c | ||
dm-mpath.h | ||
dm-path-selector.c | ||
dm-path-selector.h | ||
dm-ps-historical-service-time.c | ||
dm-ps-io-affinity.c | ||
dm-ps-queue-length.c | ||
dm-ps-round-robin.c | ||
dm-ps-service-time.c | ||
dm-raid1.c | ||
dm-raid.c | ||
dm-region-hash.c | ||
dm-rq.c | ||
dm-rq.h | ||
dm-snap-persistent.c | ||
dm-snap-transient.c | ||
dm-snap.c | ||
dm-stats.c | ||
dm-stats.h | ||
dm-stripe.c | ||
dm-switch.c | ||
dm-sysfs.c | ||
dm-table.c | ||
dm-target.c | ||
dm-thin-metadata.c | ||
dm-thin-metadata.h | ||
dm-thin.c | ||
dm-uevent.c | ||
dm-uevent.h | ||
dm-unstripe.c | ||
dm-verity-fec.c | ||
dm-verity-fec.h | ||
dm-verity-loadpin.c | ||
dm-verity-target.c | ||
dm-verity-verify-sig.c | ||
dm-verity-verify-sig.h | ||
dm-verity.h | ||
dm-writecache.c | ||
dm-zero.c | ||
dm-zone.c | ||
dm-zoned-metadata.c | ||
dm-zoned-reclaim.c | ||
dm-zoned-target.c | ||
dm-zoned.h | ||
dm.c | ||
dm.h | ||
Kconfig | ||
Makefile | ||
md-autodetect.c | ||
md-bitmap.c | ||
md-bitmap.h | ||
md-cluster.c | ||
md-cluster.h | ||
md-faulty.c | ||
md-linear.c | ||
md-linear.h | ||
md-multipath.c | ||
md-multipath.h | ||
md.c | ||
md.h | ||
raid0.c | ||
raid0.h | ||
raid1-10.c | ||
raid1.c | ||
raid1.h | ||
raid5-cache.c | ||
raid5-log.h | ||
raid5-ppl.c | ||
raid5.c | ||
raid5.h | ||
raid10.c | ||
raid10.h |