From 10398ef57aa189153406c110f5957145030f08fe Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Thu, 11 Jan 2024 17:42:58 +0000 Subject: [PATCH 01/27] gfs2: Improve gfs2_consist_inode() usage gfs2_consist_inode() logs an error message with the source file and line number. When we jump before calling it, the line number becomes less useful as it no longer relates to the source of the error. To aid troubleshooting, replace the gotos with the gfs2_consist_inode() calls so that the error messages are more informative. Signed-off-by: Andrew Price Signed-off-by: Andreas Gruenbacher --- fs/gfs2/dir.c | 31 +++++++++++++++++-------------- fs/gfs2/glops.c | 34 ++++++++++++++++++++-------------- fs/gfs2/xattr.c | 28 ++++++++++++++++------------ 3 files changed, 53 insertions(+), 40 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 560e4624c09f..dbf1aede744c 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -562,15 +562,18 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, int ret = 0; ret = gfs2_dirent_offset(GFS2_SB(inode), buf); - if (ret < 0) - goto consist_inode; - + if (ret < 0) { + gfs2_consist_inode(GFS2_I(inode)); + return ERR_PTR(-EIO); + } offset = ret; prev = NULL; dent = buf + offset; size = be16_to_cpu(dent->de_rec_len); - if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, len, 1)) - goto consist_inode; + if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, len, 1)) { + gfs2_consist_inode(GFS2_I(inode)); + return ERR_PTR(-EIO); + } do { ret = scan(dent, name, opaque); if (ret) @@ -582,8 +585,10 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, dent = buf + offset; size = be16_to_cpu(dent->de_rec_len); if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, - len, 0)) - goto consist_inode; + len, 0)) { + gfs2_consist_inode(GFS2_I(inode)); + return ERR_PTR(-EIO); + } } while(1); switch(ret) { @@ -597,10 +602,6 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, BUG_ON(ret > 0); return ERR_PTR(ret); } - -consist_inode: - gfs2_consist_inode(GFS2_I(inode)); - return ERR_PTR(-EIO); } static int dirent_check_reclen(struct gfs2_inode *dip, @@ -609,14 +610,16 @@ static int dirent_check_reclen(struct gfs2_inode *dip, const void *ptr = d; u16 rec_len = be16_to_cpu(d->de_rec_len); - if (unlikely(rec_len < sizeof(struct gfs2_dirent))) - goto broken; + if (unlikely(rec_len < sizeof(struct gfs2_dirent))) { + gfs2_consist_inode(dip); + return -EIO; + } ptr += rec_len; if (ptr < end_p) return rec_len; if (ptr == end_p) return -ENOENT; -broken: + gfs2_consist_inode(dip); return -EIO; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 45653cbc8a87..ae8d4731907d 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -409,10 +409,14 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) struct inode *inode = &ip->i_inode; bool is_new = inode->i_state & I_NEW; - if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) - goto corrupt; - if (unlikely(!is_new && inode_wrong_type(inode, mode))) - goto corrupt; + if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) { + gfs2_consist_inode(ip); + return -EIO; + } + if (unlikely(!is_new && inode_wrong_type(inode, mode))) { + gfs2_consist_inode(ip); + return -EIO; + } ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); inode->i_mode = mode; if (is_new) { @@ -449,26 +453,28 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ gfs2_set_inode_flags(inode); height = be16_to_cpu(str->di_height); - if (unlikely(height > sdp->sd_max_height)) - goto corrupt; + if (unlikely(height > sdp->sd_max_height)) { + gfs2_consist_inode(ip); + return -EIO; + } ip->i_height = (u8)height; depth = be16_to_cpu(str->di_depth); - if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) - goto corrupt; + if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) { + gfs2_consist_inode(ip); + return -EIO; + } ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); - if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) - goto corrupt; - + if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) { + gfs2_consist_inode(ip); + return -EIO; + } if (S_ISREG(inode->i_mode)) gfs2_set_aops(inode); return 0; -corrupt: - gfs2_consist_inode(ip); - return -EIO; } /** diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 8c96ba6230d1..17ae5070a90e 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -96,30 +96,34 @@ static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh, return -EIO; for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) { - if (!GFS2_EA_REC_LEN(ea)) - goto fail; + if (!GFS2_EA_REC_LEN(ea)) { + gfs2_consist_inode(ip); + return -EIO; + } if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <= - bh->b_data + bh->b_size)) - goto fail; - if (!gfs2_eatype_valid(sdp, ea->ea_type)) - goto fail; + bh->b_data + bh->b_size)) { + gfs2_consist_inode(ip); + return -EIO; + } + if (!gfs2_eatype_valid(sdp, ea->ea_type)) { + gfs2_consist_inode(ip); + return -EIO; + } error = ea_call(ip, bh, ea, prev, data); if (error) return error; if (GFS2_EA_IS_LAST(ea)) { if ((char *)GFS2_EA2NEXT(ea) != - bh->b_data + bh->b_size) - goto fail; + bh->b_data + bh->b_size) { + gfs2_consist_inode(ip); + return -EIO; + } break; } } return error; - -fail: - gfs2_consist_inode(ip); - return -EIO; } static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data) From b204b1b61eff8d5ac1aefcb00547a5f4002be47d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 27 Feb 2024 20:42:42 +0100 Subject: [PATCH 02/27] gfs2: Get rid of newlines in log messages Get rid of attempts to create multi-line syslog entries; this only makes the messages harder to read. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 2 +- fs/gfs2/rgrp.c | 10 +++++----- fs/gfs2/util.c | 50 ++++++++++++++++++++++++++------------------------ 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 643175498d1c..b8d64f2259d6 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1827,7 +1827,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) gfs2_assert_withdraw(sdp, bh); if (gfs2_assert_withdraw(sdp, prev_bnr != bh->b_blocknr)) { - fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u," + fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u, " "s_h:%u, mp_h:%u\n", (unsigned long long)ip->i_no_addr, prev_bnr, ip->i_height, strip_h, mp_h); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 26d6c1eea559..29c772816765 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -814,11 +814,11 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) bi = rgd->rd_bits + (length - 1); if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) { gfs2_lm(sdp, - "ri_addr = %llu\n" - "ri_length = %u\n" - "ri_data0 = %llu\n" - "ri_data = %u\n" - "ri_bitbytes = %u\n" + "ri_addr=%llu " + "ri_length=%u " + "ri_data0=%llu " + "ri_data=%u " + "ri_bitbytes=%u " "start=%u len=%u offset=%u\n", (unsigned long long)rgd->rd_addr, rgd->rd_length, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index f52141ce9485..fd65a2e34102 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -376,8 +376,8 @@ void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, return; fs_err(sdp, - "fatal: assertion \"%s\" failed\n" - " function = %s, file = %s, line = %u\n", + "fatal: assertion \"%s\" failed - " + "function = %s, file = %s, line = %u\n", assertion, function, file, line); /* @@ -407,7 +407,8 @@ void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, return; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) - fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n", + fs_warn(sdp, "warning: assertion \"%s\" failed - " + "function = %s, file = %s, line = %u\n", assertion, function, file, line); if (sdp->sd_args.ar_debug) @@ -416,10 +417,10 @@ void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, dump_stack(); if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) - panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + panic("GFS2: fsid=%s: warning: assertion \"%s\" failed - " + "function = %s, file = %s, line = %u\n", sdp->sd_fsname, assertion, - sdp->sd_fsname, function, file, line); + function, file, line); sdp->sd_last_warning = jiffies; } @@ -432,7 +433,8 @@ void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line) { gfs2_lm(sdp, - "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", + "fatal: filesystem consistency error - " + "function = %s, file = %s, line = %u\n", function, file, line); gfs2_withdraw(sdp); } @@ -447,9 +449,9 @@ void gfs2_consist_inode_i(struct gfs2_inode *ip, struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); gfs2_lm(sdp, - "fatal: filesystem consistency error\n" - " inode = %llu %llu\n" - " function = %s, file = %s, line = %u\n", + "fatal: filesystem consistency error - " + "inode = %llu %llu, " + "function = %s, file = %s, line = %u\n", (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, function, file, line); @@ -470,9 +472,9 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); gfs2_rgrp_dump(NULL, rgd, fs_id_buf); gfs2_lm(sdp, - "fatal: filesystem consistency error\n" - " RG = %llu\n" - " function = %s, file = %s, line = %u\n", + "fatal: filesystem consistency error - " + "RG = %llu, " + "function = %s, file = %s, line = %u\n", (unsigned long long)rgd->rd_addr, function, file, line); gfs2_dump_glock(NULL, rgd->rd_gl, 1); @@ -492,9 +494,9 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, int me; gfs2_lm(sdp, - "fatal: invalid metadata block\n" - " bh = %llu (%s)\n" - " function = %s, file = %s, line = %u\n", + "fatal: invalid metadata block - " + "bh = %llu (%s), " + "function = %s, file = %s, line = %u\n", (unsigned long long)bh->b_blocknr, type, function, file, line); me = gfs2_withdraw(sdp); @@ -514,9 +516,9 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, int me; gfs2_lm(sdp, - "fatal: invalid metadata block\n" - " bh = %llu (type: exp=%u, found=%u)\n" - " function = %s, file = %s, line = %u\n", + "fatal: invalid metadata block - " + "bh = %llu (type: exp=%u, found=%u), " + "function = %s, file = %s, line = %u\n", (unsigned long long)bh->b_blocknr, type, t, function, file, line); me = gfs2_withdraw(sdp); @@ -533,8 +535,8 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line) { gfs2_lm(sdp, - "fatal: I/O error\n" - " function = %s, file = %s, line = %u\n", + "fatal: I/O error - " + "function = %s, file = %s, line = %u\n", function, file, line); return gfs2_withdraw(sdp); } @@ -551,9 +553,9 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, if (gfs2_withdrawing_or_withdrawn(sdp)) return; - fs_err(sdp, "fatal: I/O error\n" - " block = %llu\n" - " function = %s, file = %s, line = %u\n", + fs_err(sdp, "fatal: I/O error - " + "block = %llu, " + "function = %s, file = %s, line = %u\n", (unsigned long long)bh->b_blocknr, function, file, line); if (withdraw) gfs2_withdraw(sdp); From 795405c4b995efc4f6b6b561200c384bdda07c02 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 27 Feb 2024 22:57:08 +0100 Subject: [PATCH 03/27] gfs2: Remove unnecessary gfs2_meta_check_ii argument The type argument of gfs2_meta_check_ii() is always set to "magic number", so remove that argument and hardcode the string in gfs2_meta_check_ii(). Change the string to "bad magic number" to emphasize that the problem is the incorrect magic number. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/util.c | 6 +++--- fs/gfs2/util.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index fd65a2e34102..963d77c4ca21 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -488,16 +488,16 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, */ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *type, const char *function, char *file, + const char *function, char *file, unsigned int line) { int me; gfs2_lm(sdp, "fatal: invalid metadata block - " - "bh = %llu (%s), " + "bh = %llu (bad magic number), " "function = %s, file = %s, line = %u\n", - (unsigned long long)bh->b_blocknr, type, + (unsigned long long)bh->b_blocknr, function, file, line); me = gfs2_withdraw(sdp); return (me) ? -1 : -2; diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index ba071998461f..d8d62bf0424d 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -92,7 +92,7 @@ gfs2_consist_rgrpd_i((rgd), __func__, __FILE__, __LINE__) int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *type, const char *function, + const char *function, char *file, unsigned int line); static inline int gfs2_meta_check(struct gfs2_sbd *sdp, @@ -123,7 +123,7 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp, u32 magic = be32_to_cpu(mh->mh_magic); u16 t = be32_to_cpu(mh->mh_type); if (unlikely(magic != GFS2_MAGIC)) - return gfs2_meta_check_ii(sdp, bh, "magic number", function, + return gfs2_meta_check_ii(sdp, bh, function, file, line); if (unlikely(t != type)) return gfs2_metatype_check_ii(sdp, bh, type, t, function, From 52c2d389fe0a5cbfd88bc68e5b31d8811d2e5933 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 13 Mar 2024 13:24:35 +0100 Subject: [PATCH 04/27] gfs2: Follow-up to flag rename in sysfs status file Follow up to commit e7beb8b6de1a ("gfs2: Rename SDF_DEACTIVATING to SDF_KILL") and change the description of the SDF_KILL flag from "Deactivating" to "Killing". Signed-off-by: Andreas Gruenbacher --- fs/gfs2/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 250f340cb44d..32d3484270f3 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -88,7 +88,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) "Withdraw In Prog: %d\n" "Remote Withdraw: %d\n" "Withdraw Recovery: %d\n" - "Deactivating: %d\n" + "Killing: %d\n" "sd_log_error: %d\n" "sd_log_flush_lock: %d\n" "sd_log_num_revoke: %u\n" From 3592bfaf746a4d0fefe6fe11527aa335073674c1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 15 Jan 2024 22:01:12 +0100 Subject: [PATCH 05/27] gfs2: Use [NO_]CREATE consistently for gfs2_glock_get When calling gfs2_glock_get(), consistently pass the CREATE or NO_CREATE flag for the create argument. This is the only place that passes 0 instead. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 32d3484270f3..ecc699f8d9fc 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -336,7 +336,7 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len return -EINVAL; if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) fs_info(sdp, "demote interface used\n"); - rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl); + rv = gfs2_glock_get(sdp, glnum, glops, NO_CREATE, &gl); if (rv) return rv; gfs2_glock_cb(gl, glmode); From b01189333ee91c1ae6cd96dfd1e3a3c2e69202f0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 26 Jan 2024 11:49:44 +0100 Subject: [PATCH 06/27] gfs2: Don't forget to complete delayed withdraw Commit fffe9bee14b0 ("gfs2: Delay withdraw from atomic context") switched from gfs2_withdraw() to gfs2_withdraw_delayed() in gfs2_ail_error(), but failed to then check if a delayed withdraw had occurred. Fix that by adding the missing check in __gfs2_ail_flush(), where the spin locks are already dropped and a withdraw is possible. Fixes: fffe9bee14b0 ("gfs2: Delay withdraw from atomic context") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index ae8d4731907d..68677fb69a73 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -82,6 +82,9 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); + + if (gfs2_withdrawing(sdp)) + gfs2_withdraw(sdp); } From 35264909e9d1973ab9aaa2a1b07cda70f12bb828 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 11 Mar 2024 15:51:59 +0100 Subject: [PATCH 07/27] gfs2: Fix NULL pointer dereference in gfs2_log_flush In gfs2_jindex_free(), set sdp->sd_jdesc to NULL under the log flush lock to provide exclusion against gfs2_log_flush(). In gfs2_log_flush(), check if sdp->sd_jdesc is non-NULL before dereferencing it. Otherwise, we could run into a NULL pointer dereference when outstanding glock work races with an unmount (glock_work_func -> run_queue -> do_xmote -> inode_go_sync -> gfs2_log_flush). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 3 ++- fs/gfs2/super.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 8cddf955ebc0..a6dd68b458ce 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1108,7 +1108,8 @@ repeat: lops_before_commit(sdp, tr); if (gfs2_withdrawing_or_withdrawn(sdp)) goto out_withdraw; - gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE); + if (sdp->sd_jdesc) + gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE); if (gfs2_withdrawing_or_withdrawn(sdp)) goto out_withdraw; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e5f79466340d..9de789b78bc5 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -67,9 +67,13 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) sdp->sd_journals = 0; spin_unlock(&sdp->sd_jindex_spin); + down_write(&sdp->sd_log_flush_lock); sdp->sd_jdesc = NULL; + up_write(&sdp->sd_log_flush_lock); + while (!list_empty(&list)) { jd = list_first_entry(&list, struct gfs2_jdesc, jd_list); + BUG_ON(jd->jd_log_bio); gfs2_free_journal_extents(jd); list_del(&jd->jd_list); iput(jd->jd_inode); From f80d882edcf242d0256d9e51b09d5fb7a3a0d3b4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 15 Mar 2024 16:56:09 +0100 Subject: [PATCH 08/27] gfs2: Get rid of gfs2_glock_queue_put in signal_our_withdraw In function signal_our_withdraw(), we are calling gfs2_glock_queue_put() in a context in which we are actually allowed to sleep, so replace that with a simple call to gfs2_glock_put(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 963d77c4ca21..0535a0206512 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -255,7 +255,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) gfs2_glock_nq(&sdp->sd_live_gh); } - gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */ + gfs2_glock_put(live_gl); /* drop extra reference we acquired */ clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); /* From ee2be7d7c7f32783f60ee5fe59b91548a4571f10 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 15 Mar 2024 16:45:39 +0100 Subject: [PATCH 09/27] gfs2: Replace gfs2_glock_queue_put with gfs2_glock_put_async Function gfs2_glock_queue_put() puts a glock reference by enqueuing glock work instead of putting the reference directly. This ensures that the operation won't sleep, but it is costly and really only necessary when putting the final glock reference. Replace it with a new gfs2_glock_put_async() function that only queues glock work when putting the last glock reference. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 27 +++++++++++++++++---------- fs/gfs2/glock.h | 2 +- fs/gfs2/log.c | 2 +- fs/gfs2/super.c | 4 ++-- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 34540f9d011c..ed90033b9c72 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -285,14 +285,6 @@ static void __gfs2_glock_put(struct gfs2_glock *gl) sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } -/* - * Cause the glock to be put in work queue context. - */ -void gfs2_glock_queue_put(struct gfs2_glock *gl) -{ - gfs2_glock_queue_work(gl, 0); -} - /** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put @@ -307,6 +299,22 @@ void gfs2_glock_put(struct gfs2_glock *gl) __gfs2_glock_put(gl); } +/* + * gfs2_glock_put_async - Decrement reference count without sleeping + * @gl: The glock to put + * + * Decrement the reference count on glock immediately unless it is the last + * reference. Defer putting the last reference to work queue context. + */ +void gfs2_glock_put_async(struct gfs2_glock *gl) +{ + if (lockref_put_or_lock(&gl->gl_lockref)) + return; + + __gfs2_glock_queue_work(gl, 0); + spin_unlock(&gl->gl_lockref.lock); +} + /** * may_grant - check if it's ok to grant a new lock * @gl: The glock @@ -2529,8 +2537,7 @@ static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) if (gl) { if (n == 0) return; - if (!lockref_put_not_zero(&gl->gl_lockref)) - gfs2_glock_queue_put(gl); + gfs2_glock_put_async(gl); } for (;;) { gl = rhashtable_walk_next(&gi->hti); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 0114f3e0ebe0..2c697674a86f 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -172,7 +172,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, int create, struct gfs2_glock **glp); struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl); void gfs2_glock_put(struct gfs2_glock *gl); -void gfs2_glock_queue_put(struct gfs2_glock *gl); +void gfs2_glock_put_async(struct gfs2_glock *gl); void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh, diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index a6dd68b458ce..6ee6013fb825 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -786,7 +786,7 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl) { if (atomic_dec_return(&gl->gl_revokes) == 0) { clear_bit(GLF_LFLUSH, &gl->gl_flags); - gfs2_glock_queue_put(gl); + gfs2_glock_put_async(gl); } } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 9de789b78bc5..d481db9510ac 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1049,7 +1049,7 @@ static int gfs2_drop_inode(struct inode *inode) gfs2_glock_hold(gl); if (!gfs2_queue_try_to_evict(gl)) - gfs2_glock_queue_put(gl); + gfs2_glock_put_async(gl); return 0; } @@ -1255,7 +1255,7 @@ out_qs: static void gfs2_glock_put_eventually(struct gfs2_glock *gl) { if (current->flags & PF_MEMALLOC) - gfs2_glock_queue_put(gl); + gfs2_glock_put_async(gl); else gfs2_glock_put(gl); } From 927cfc90d27cb7732a62464f95fd9aa7edfa9b70 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 9 Apr 2024 07:11:48 +0200 Subject: [PATCH 10/27] gfs2: Don't set GLF_LOCK in gfs2_dispose_glock_lru In gfs2_dispose_glock_lru(), we want to skip glocks which are in the process of transitioning state (as indicated by the set GLF_LOCK flag), but we we don't need to set that flag for requesting a state transition. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ed90033b9c72..a700e424f790 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2017,14 +2017,13 @@ add_back_to_lru: atomic_inc(&lru_count); continue; } - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + if (test_bit(GLF_LOCK, &gl->gl_flags)) { spin_unlock(&gl->gl_lockref.lock); goto add_back_to_lru; } gl->gl_lockref.count++; if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); - WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); __gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); cond_resched_lock(&lru_lock); From 262ee3a07e98e8e56230547d4fe75d485348a55a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 29 Mar 2024 15:46:32 +0100 Subject: [PATCH 11/27] gfs2: Get rid of unnecessary test_and_set_bit The GLF_LOCK flag is protected by the gl->gl_lockref.lock spin lock which is held when entering run_queue(), so we can use test_bit() and set_bit() here. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a700e424f790..fa047331ea3c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -842,8 +842,9 @@ __acquires(&gl->gl_lockref.lock) { struct gfs2_holder *gh = NULL; - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) + if (test_bit(GLF_LOCK, &gl->gl_flags)) return; + set_bit(GLF_LOCK, &gl->gl_flags); GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); From 5d9231111966b6c5a65016d58dcbeab91055bc91 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 5 Apr 2024 13:47:51 +0200 Subject: [PATCH 12/27] gfs2: Fix "ignore unlock failures after withdraw" Commit 3e11e53041502 tries to suppress dlm_lock() lock conversion errors that occur when the lockspace has already been released. It does that by setting and checking the SDF_SKIP_DLM_UNLOCK flag. This conflicts with the intended meaning of the SDF_SKIP_DLM_UNLOCK flag, so check whether the lockspace is still allocated instead. (Given the current DLM API, checking for this kind of error after the fact seems easier that than to make sure that the lockspace is still allocated before calling dlm_lock(). Changing the DLM API so that users maintain the lockspace references themselves would be an option.) Fixes: 3e11e53041502 ("GFS2: ignore unlock failures after withdraw") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 4 +++- fs/gfs2/util.c | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index fa047331ea3c..ec1f8eb28950 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -810,11 +810,13 @@ skip_inval: } if (sdp->sd_lockstruct.ls_ops->lm_lock) { + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + /* lock_dlm */ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && target == LM_ST_UNLOCKED && - test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) { + test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { finish_xmote(gl, target); gfs2_glock_queue_work(gl, 0); } else if (ret) { diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 0535a0206512..09238604d741 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -350,7 +350,6 @@ int gfs2_withdraw(struct gfs2_sbd *sdp) fs_err(sdp, "telling LM to unmount\n"); lm->lm_unmount(sdp); } - set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); fs_err(sdp, "File system withdrawn\n"); dump_stack(); clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); From c9a0a4b028e4fe16c79e685b2135ce2f097d0d0e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 28 Mar 2024 19:14:07 +0100 Subject: [PATCH 13/27] Revert "gfs2: fix glock shrinker ref issues" This reverts commit 62862485a4c3a52029fc30f4bdde9af04afdafc9. Commit 62862485a4c3 tried to fix issues introduced by commit 228804a35caa ("gfs2: Make glock lru list scanning safer"), but like that commit, it failed to account for the bias state_change() adds to the glock reference count for locked glocks. Revert commit 62862485a4c3 so that we can fix commit 228804a35caa properly. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ec1f8eb28950..2882a42e88aa 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2056,9 +2056,7 @@ static long gfs2_scan_glock_lru(int nr) if (!test_bit(GLF_LOCK, &gl->gl_flags)) { if (!spin_trylock(&gl->gl_lockref.lock)) continue; - if (gl->gl_lockref.count <= 1 && - (gl->gl_state == LM_ST_UNLOCKED || - demote_ok(gl))) { + if (!gl->gl_lockref.count) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); freed++; From acf1f42faf5a27913eb7b8140cc255f1b3d14e48 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 28 Mar 2024 19:59:45 +0100 Subject: [PATCH 14/27] gfs2: Fix "Make glock lru list scanning safer" Commit 228804a35caa tried to add a refcount check to gfs2_scan_glock_lru() to make sure that glocks that are still referenced cannot be freed. It failed to account for the bias state_change() adds to the refcount for held glocks, so held glocks are no longer removed from the glock cache, which can lead to out-of-memory problems. Fix that. (The inodes those glocks are associated with do get shrunk and do get pushed out of memory.) In addition, use the same eligibility check in gfs2_scan_glock_lru() and gfs2_dispose_glock_lru(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 2882a42e88aa..04e0a8ac61d7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1987,6 +1987,14 @@ static int glock_cmp(void *priv, const struct list_head *a, return 0; } +static bool can_free_glock(struct gfs2_glock *gl) +{ + bool held = gl->gl_state != LM_ST_UNLOCKED; + + return !test_bit(GLF_LOCK, &gl->gl_flags) && + gl->gl_lockref.count == held; +} + /** * gfs2_dispose_glock_lru - Demote a list of glocks * @list: The list to dispose of @@ -2020,7 +2028,7 @@ add_back_to_lru: atomic_inc(&lru_count); continue; } - if (test_bit(GLF_LOCK, &gl->gl_flags)) { + if (!can_free_glock(gl)) { spin_unlock(&gl->gl_lockref.lock); goto add_back_to_lru; } @@ -2052,16 +2060,10 @@ static long gfs2_scan_glock_lru(int nr) list_for_each_entry_safe(gl, next, &lru_list, gl_lru) { if (nr-- <= 0) break; - /* Test for being demotable */ - if (!test_bit(GLF_LOCK, &gl->gl_flags)) { - if (!spin_trylock(&gl->gl_lockref.lock)) - continue; - if (!gl->gl_lockref.count) { - list_move(&gl->gl_lru, &dispose); - atomic_dec(&lru_count); - freed++; - } - spin_unlock(&gl->gl_lockref.lock); + if (can_free_glock(gl)) { + list_move(&gl->gl_lru, &dispose); + atomic_dec(&lru_count); + freed++; } } if (!list_empty(&dispose)) From 7a1ad9d8120e3d510ee5de8924f14de089aa2e2d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 9 Apr 2024 13:54:26 +0200 Subject: [PATCH 15/27] gfs2: Fix lru_count accounting Currently, gfs2_scan_glock_lru() decrements lru_count when a glock is moved onto the dispose list. When such a glock is then stolen from the dispose list while gfs2_dispose_glock_lru() doesn't hold the lru_lock, lru_count will be decremented again, so the counter will eventually go negative. This bug has existed in one form or another since at least commit 97cc1025b1a91 ("GFS2: Kill two daemons with one patch"). Fix this by only decrementing lru_count when we actually remove a glock and schedule for it to be unlocked and dropped. We also don't need to remove and then re-add glocks when we can just as well move them back onto the lru_list when necessary. In addition, return the number of glocks freed as we should, not the number of glocks moved onto the dispose list. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 04e0a8ac61d7..8e6936b79460 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2009,29 +2009,30 @@ static bool can_free_glock(struct gfs2_glock *gl) * private) */ -static void gfs2_dispose_glock_lru(struct list_head *list) +static unsigned long gfs2_dispose_glock_lru(struct list_head *list) __releases(&lru_lock) __acquires(&lru_lock) { struct gfs2_glock *gl; + unsigned long freed = 0; list_sort(NULL, list, glock_cmp); while(!list_empty(list)) { gl = list_first_entry(list, struct gfs2_glock, gl_lru); - list_del_init(&gl->gl_lru); - clear_bit(GLF_LRU, &gl->gl_flags); if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: - list_add(&gl->gl_lru, &lru_list); - set_bit(GLF_LRU, &gl->gl_flags); - atomic_inc(&lru_count); + list_move(&gl->gl_lru, &lru_list); continue; } if (!can_free_glock(gl)) { spin_unlock(&gl->gl_lockref.lock); goto add_back_to_lru; } + list_del_init(&gl->gl_lru); + atomic_dec(&lru_count); + clear_bit(GLF_LRU, &gl->gl_flags); + freed++; gl->gl_lockref.count++; if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); @@ -2039,6 +2040,7 @@ add_back_to_lru: spin_unlock(&gl->gl_lockref.lock); cond_resched_lock(&lru_lock); } + return freed; } /** @@ -2050,24 +2052,21 @@ add_back_to_lru: * gfs2_dispose_glock_lru() above. */ -static long gfs2_scan_glock_lru(int nr) +static unsigned long gfs2_scan_glock_lru(unsigned long nr) { struct gfs2_glock *gl, *next; LIST_HEAD(dispose); - long freed = 0; + unsigned long freed = 0; spin_lock(&lru_lock); list_for_each_entry_safe(gl, next, &lru_list, gl_lru) { - if (nr-- <= 0) + if (!nr--) break; - if (can_free_glock(gl)) { + if (can_free_glock(gl)) list_move(&gl->gl_lru, &dispose); - atomic_dec(&lru_count); - freed++; - } } if (!list_empty(&dispose)) - gfs2_dispose_glock_lru(&dispose); + freed = gfs2_dispose_glock_lru(&dispose); spin_unlock(&lru_lock); return freed; From 59f60005797b4018d7b46620037e0c53d690795e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 10 Apr 2024 04:24:56 +0200 Subject: [PATCH 16/27] gfs2: Remove ill-placed consistency check This consistency check was originally added by commit 9287c6452d2b1 ("gfs2: Fix occasional glock use-after-free"). It is ill-placed in gfs2_glock_free() because if it holds there, it must equally hold in __gfs2_glock_put() already. Either way, the check doesn't seem necessary anymore. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8e6936b79460..bf7538274985 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -170,7 +170,6 @@ void gfs2_glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0); rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); smp_mb(); wake_up_glock(gl); From d98779e687726d8f8860f1c54b5687eec5f63a73 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 10 Apr 2024 04:50:18 +0200 Subject: [PATCH 17/27] gfs2: Fix potential glock use-after-free on unmount When a DLM lockspace is released and there ares still locks in that lockspace, DLM will unlock those locks automatically. Commit fb6791d100d1b started exploiting this behavior to speed up filesystem unmount: gfs2 would simply free glocks it didn't want to unlock and then release the lockspace. This didn't take the bast callbacks for asynchronous lock contention notifications into account, which remain active until until a lock is unlocked or its lockspace is released. To prevent those callbacks from accessing deallocated objects, put the glocks that should not be unlocked on the sd_dead_glocks list, release the lockspace, and only then free those glocks. As an additional measure, ignore unexpected ast and bast callbacks if the receiving glock is dead. Fixes: fb6791d100d1b ("GFS2: skip dlm_unlock calls in unmount") Signed-off-by: Andreas Gruenbacher Cc: David Teigland --- fs/gfs2/glock.c | 35 ++++++++++++++++++++++++++++++++--- fs/gfs2/glock.h | 1 + fs/gfs2/incore.h | 1 + fs/gfs2/lock_dlm.c | 32 ++++++++++++++++++++++---------- fs/gfs2/ops_fstype.c | 1 + fs/gfs2/super.c | 3 --- 6 files changed, 57 insertions(+), 16 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index bf7538274985..4cf8971ce8ee 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -166,18 +166,45 @@ static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) return true; } -void gfs2_glock_free(struct gfs2_glock *gl) +static void __gfs2_glock_free(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); smp_mb(); wake_up_glock(gl); call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); +} + +void gfs2_glock_free(struct gfs2_glock *gl) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + __gfs2_glock_free(gl); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_kill_wait); } +void gfs2_glock_free_later(struct gfs2_glock *gl) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + spin_lock(&lru_lock); + list_add(&gl->gl_lru, &sdp->sd_dead_glocks); + spin_unlock(&lru_lock); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_kill_wait); +} + +static void gfs2_free_dead_glocks(struct gfs2_sbd *sdp) +{ + struct list_head *list = &sdp->sd_dead_glocks; + + while(!list_empty(list)) { + struct gfs2_glock *gl; + + gl = list_first_entry(list, struct gfs2_glock, gl_lru); + list_del_init(&gl->gl_lru); + __gfs2_glock_free(gl); + } +} + /** * gfs2_glock_hold() - increment reference count on glock * @gl: The glock to hold @@ -2233,6 +2260,8 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) wait_event_timeout(sdp->sd_kill_wait, atomic_read(&sdp->sd_glock_disposal) == 0, HZ * 600); + gfs2_lm_unmount(sdp); + gfs2_free_dead_glocks(sdp); glock_hash_walk(dump_glock_func, sdp); } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2c697674a86f..19aef6d53267 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -252,6 +252,7 @@ void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); void gfs2_glock_thaw(struct gfs2_sbd *sdp); void gfs2_glock_add_to_lru(struct gfs2_glock *gl); void gfs2_glock_free(struct gfs2_glock *gl); +void gfs2_glock_free_later(struct gfs2_glock *gl); int __init gfs2_glock_init(void); void gfs2_glock_exit(void); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 95a334d64da2..60abd7050c99 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -838,6 +838,7 @@ struct gfs2_sbd { /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; struct mutex sd_freeze_mutex; + struct list_head sd_dead_glocks; char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; char sd_table_name[GFS2_FSNAME_LEN]; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index d1ac5d0679ea..e028e55e67d9 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -121,6 +121,11 @@ static void gdlm_ast(void *arg) struct gfs2_glock *gl = arg; unsigned ret = gl->gl_state; + /* If the glock is dead, we only react to a dlm_unlock() reply. */ + if (__lockref_is_dead(&gl->gl_lockref) && + gl->gl_lksb.sb_status != -DLM_EUNLOCK) + return; + gfs2_update_reply_times(gl); BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); @@ -171,6 +176,9 @@ static void gdlm_bast(void *arg, int mode) { struct gfs2_glock *gl = arg; + if (__lockref_is_dead(&gl->gl_lockref)) + return; + switch (mode) { case DLM_LOCK_EX: gfs2_glock_cb(gl, LM_ST_UNLOCKED); @@ -291,8 +299,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; - if (gl->gl_lksb.sb_lkid == 0) - goto out_free; + BUG_ON(!__lockref_is_dead(&gl->gl_lockref)); + + if (gl->gl_lksb.sb_lkid == 0) { + gfs2_glock_free(gl); + return; + } clear_bit(GLF_BLOCKING, &gl->gl_flags); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); @@ -300,13 +312,17 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_update_request_times(gl); /* don't want to call dlm if we've unmounted the lock protocol */ - if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) - goto out_free; + if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { + gfs2_glock_free(gl); + return; + } /* don't want to skip dlm_unlock writing the lvb when lock has one */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && - !gl->gl_lksb.sb_lvbptr) - goto out_free; + !gl->gl_lksb.sb_lvbptr) { + gfs2_glock_free_later(gl); + return; + } again: error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, @@ -321,10 +337,6 @@ again: gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, error); } - return; - -out_free: - gfs2_glock_free(gl); } static void gdlm_cancel(struct gfs2_glock *gl) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1281e60be639..db0df091a6a7 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -136,6 +136,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) atomic_set(&sdp->sd_log_in_flight, 0); init_waitqueue_head(&sdp->sd_log_flush_wait); mutex_init(&sdp->sd_freeze_mutex); + INIT_LIST_HEAD(&sdp->sd_dead_glocks); return sdp; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index d481db9510ac..32a37daaccbe 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -650,10 +650,7 @@ restart: gfs2_gl_hash_clear(sdp); truncate_inode_pages_final(&sdp->sd_aspace); gfs2_delete_debugfs_file(sdp); - /* Unmount the locking protocol */ - gfs2_lm_unmount(sdp); - /* At this point, we're through participating in the lockspace */ gfs2_sys_fs_del(sdp); free_sbd(sdp); } From a3730c5ec57b033ba6e437f7881a894d57b28a4a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 10 Apr 2024 05:23:19 +0200 Subject: [PATCH 18/27] gfs2: Unlock fewer glocks on unmount At unmount time, we would generally like to explicitly unlock as few glocks as possible for efficiency. We are already skipping glocks that don't have a lock value block (LVB), but we can also skip glocks which are not held in DLM_LOCK_EX or DLM_LOCK_PW mode (of which gfs2 only uses DLM_LOCK_EX under the name LM_ST_EXCLUSIVE). Signed-off-by: Andreas Gruenbacher Cc: David Teigland --- fs/gfs2/lock_dlm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index e028e55e67d9..49059274a528 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -316,10 +316,16 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_glock_free(gl); return; } - /* don't want to skip dlm_unlock writing the lvb when lock has one */ + + /* + * When the lockspace is released, all remaining glocks will be + * unlocked automatically. This is more efficient than unlocking them + * individually, but when the lock is held in DLM_LOCK_EX or + * DLM_LOCK_PW mode, the lock value block (LVB) will be lost. + */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && - !gl->gl_lksb.sb_lvbptr) { + (!gl->gl_lksb.sb_lvbptr || gl->gl_state != LM_ST_EXCLUSIVE)) { gfs2_glock_free_later(gl); return; } From 1cd28e15864054f3c48baee9eecda1c0441c48ac Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 12 Apr 2024 19:16:58 +0200 Subject: [PATCH 19/27] gfs2: finish_xmote cleanup Currently, function finish_xmote() takes and releases the glock spinlock. However, all of its callers immediately take that spinlock again, so it makes more sense to take the spin lock before calling finish_xmote() already. With that, thaw_glock() is the only place that sets the GLF_HAVE_REPLY flag outside of the glock spinlock, but it also takes that spinlock immediately thereafter. Change that to set the bit when the spinlock is already held. This allows to switch from test_and_clear_bit() to test_bit() and clear_bit() in glock_work_func(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 4cf8971ce8ee..b1a2862d431d 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -625,7 +625,6 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) struct gfs2_holder *gh; unsigned state = ret & LM_OUT_ST_MASK; - spin_lock(&gl->gl_lockref.lock); trace_gfs2_glock_state_change(gl, state); state_change(gl, state); gh = find_first_waiter(gl); @@ -673,7 +672,6 @@ retry: gl->gl_target, state); GLOCK_BUG_ON(gl, 1); } - spin_unlock(&gl->gl_lockref.lock); return; } @@ -696,7 +694,6 @@ retry: } out: clear_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_lockref.lock); } static bool is_system_glock(struct gfs2_glock *gl) @@ -843,15 +840,19 @@ skip_inval: if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && target == LM_ST_UNLOCKED && test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { + spin_lock(&gl->gl_lockref.lock); finish_xmote(gl, target); - gfs2_glock_queue_work(gl, 0); + __gfs2_glock_queue_work(gl, 0); + spin_unlock(&gl->gl_lockref.lock); } else if (ret) { fs_err(sdp, "lm_lock ret %d\n", ret); GLOCK_BUG_ON(gl, !gfs2_withdrawing_or_withdrawn(sdp)); } } else { /* lock_nolock */ + spin_lock(&gl->gl_lockref.lock); finish_xmote(gl, target); - gfs2_glock_queue_work(gl, 0); + __gfs2_glock_queue_work(gl, 0); + spin_unlock(&gl->gl_lockref.lock); } out: spin_lock(&gl->gl_lockref.lock); @@ -1108,11 +1109,12 @@ static void glock_work_func(struct work_struct *work) struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); unsigned int drop_refs = 1; - if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { + spin_lock(&gl->gl_lockref.lock); + if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { + clear_bit(GLF_REPLY_PENDING, &gl->gl_flags); finish_xmote(gl, gl->gl_reply); drop_refs++; } - spin_lock(&gl->gl_lockref.lock); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { @@ -2183,8 +2185,11 @@ static void thaw_glock(struct gfs2_glock *gl) return; if (!lockref_get_not_dead(&gl->gl_lockref)) return; + + spin_lock(&gl->gl_lockref.lock); set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - gfs2_glock_queue_work(gl, 0); + __gfs2_glock_queue_work(gl, 0); + spin_unlock(&gl->gl_lockref.lock); } /** From 9947a06d29c0a30da88cdc6376ca5fd87083e130 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 15 Apr 2024 11:23:04 +0200 Subject: [PATCH 20/27] gfs2: do_xmote fixes Function do_xmote() is called with the glock spinlock held. Commit 86934198eefa added a 'goto skip_inval' statement at the beginning of the function to further below where the glock spinlock is expected not to be held anymore. Then it added code there that requires the glock spinlock to be held. This doesn't make sense; fix this up by dropping and retaking the spinlock where needed. In addition, when ->lm_lock() returned an error, do_xmote() didn't fail the locking operation, and simply left the glock hanging; fix that as well. (This is a much older error.) Fixes: 86934198eefa ("gfs2: Clear flags when withdraw prevents xmote") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b1a2862d431d..fa374617d2f1 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -721,6 +721,7 @@ __acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; @@ -749,6 +750,9 @@ __acquires(&gl->gl_lockref.lock) (gl->gl_state == LM_ST_EXCLUSIVE) || (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) clear_bit(GLF_BLOCKING, &gl->gl_flags); + if (!glops->go_inval && !glops->go_sync) + goto skip_inval; + spin_unlock(&gl->gl_lockref.lock); if (glops->go_sync) { ret = glops->go_sync(gl); @@ -761,6 +765,7 @@ __acquires(&gl->gl_lockref.lock) fs_err(sdp, "Error %d syncing glock \n", ret); gfs2_dump_glock(NULL, gl, true); } + spin_lock(&gl->gl_lockref.lock); goto skip_inval; } } @@ -781,9 +786,10 @@ __acquires(&gl->gl_lockref.lock) glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); } + spin_lock(&gl->gl_lockref.lock); skip_inval: - gfs2_glock_hold(gl); + gl->gl_lockref.count++; /* * Check for an error encountered since we called go_sync and go_inval. * If so, we can't withdraw from the glock code because the withdraw @@ -825,37 +831,37 @@ skip_inval: */ clear_bit(GLF_LOCK, &gl->gl_flags); clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); - goto out; + __gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); + return; } else { clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); } } - if (sdp->sd_lockstruct.ls_ops->lm_lock) { - struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (ls->ls_ops->lm_lock) { + spin_unlock(&gl->gl_lockref.lock); + ret = ls->ls_ops->lm_lock(gl, target, lck_flags); + spin_lock(&gl->gl_lockref.lock); - /* lock_dlm */ - ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && target == LM_ST_UNLOCKED && test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { - spin_lock(&gl->gl_lockref.lock); - finish_xmote(gl, target); - __gfs2_glock_queue_work(gl, 0); - spin_unlock(&gl->gl_lockref.lock); + /* + * The lockspace has been released and the lock has + * been unlocked implicitly. + */ } else if (ret) { fs_err(sdp, "lm_lock ret %d\n", ret); - GLOCK_BUG_ON(gl, !gfs2_withdrawing_or_withdrawn(sdp)); + target = gl->gl_state | LM_OUT_ERROR; + } else { + /* The operation will be completed asynchronously. */ + return; } - } else { /* lock_nolock */ - spin_lock(&gl->gl_lockref.lock); - finish_xmote(gl, target); - __gfs2_glock_queue_work(gl, 0); - spin_unlock(&gl->gl_lockref.lock); } -out: - spin_lock(&gl->gl_lockref.lock); + + /* Complete the operation now. */ + finish_xmote(gl, target); + __gfs2_glock_queue_work(gl, 0); } /** From 1e86044402c45b70a9b31beeaefb5cc732a7470c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 12 Apr 2024 21:58:15 +0200 Subject: [PATCH 21/27] gfs2: Remove and replace gfs2_glock_queue_work There are no more callers of gfs2_glock_queue_work() left, so remove that helper. With that, we can now rename __gfs2_glock_queue_work() back to gfs2_glock_queue_work() to get rid of some unnecessary clutter. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index fa374617d2f1..9f11fc1e79eb 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -274,7 +274,7 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) * Enqueue the glock on the work queue. Passes one glock reference on to the * work queue. */ -static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { +static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) { /* * We are holding the lockref spinlock, and the work was still @@ -287,12 +287,6 @@ static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) } } -static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { - spin_lock(&gl->gl_lockref.lock); - __gfs2_glock_queue_work(gl, delay); - spin_unlock(&gl->gl_lockref.lock); -} - static void __gfs2_glock_put(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -337,7 +331,8 @@ void gfs2_glock_put_async(struct gfs2_glock *gl) if (lockref_put_or_lock(&gl->gl_lockref)) return; - __gfs2_glock_queue_work(gl, 0); + GLOCK_BUG_ON(gl, gl->gl_lockref.count != 1); + gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); } @@ -831,7 +826,7 @@ skip_inval: */ clear_bit(GLF_LOCK, &gl->gl_flags); clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - __gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); + gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); return; } else { clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); @@ -861,7 +856,7 @@ skip_inval: /* Complete the operation now. */ finish_xmote(gl, target); - __gfs2_glock_queue_work(gl, 0); + gfs2_glock_queue_work(gl, 0); } /** @@ -909,7 +904,7 @@ out_sched: clear_bit(GLF_LOCK, &gl->gl_flags); smp_mb__after_atomic(); gl->gl_lockref.count++; - __gfs2_glock_queue_work(gl, 0); + gfs2_glock_queue_work(gl, 0); return; out_unlock: @@ -1141,12 +1136,12 @@ static void glock_work_func(struct work_struct *work) drop_refs--; if (gl->gl_name.ln_type != LM_TYPE_INODE) delay = 0; - __gfs2_glock_queue_work(gl, delay); + gfs2_glock_queue_work(gl, delay); } /* * Drop the remaining glock references manually here. (Mind that - * __gfs2_glock_queue_work depends on the lockref spinlock begin held + * gfs2_glock_queue_work depends on the lockref spinlock begin held * here as well.) */ gl->gl_lockref.count -= drop_refs; @@ -1651,7 +1646,7 @@ unlock: test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { set_bit(GLF_REPLY_PENDING, &gl->gl_flags); gl->gl_lockref.count++; - __gfs2_glock_queue_work(gl, 0); + gfs2_glock_queue_work(gl, 0); } run_queue(gl, 1); spin_unlock(&gl->gl_lockref.lock); @@ -1717,7 +1712,7 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags) && gl->gl_name.ln_type == LM_TYPE_INODE) delay = gl->gl_hold_time; - __gfs2_glock_queue_work(gl, delay); + gfs2_glock_queue_work(gl, delay); } } @@ -1941,7 +1936,7 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) delay = gl->gl_hold_time; } handle_callback(gl, state, delay, true); - __gfs2_glock_queue_work(gl, delay); + gfs2_glock_queue_work(gl, delay); spin_unlock(&gl->gl_lockref.lock); } @@ -2001,7 +1996,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) gl->gl_lockref.count++; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - __gfs2_glock_queue_work(gl, 0); + gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); } @@ -2070,7 +2065,7 @@ add_back_to_lru: gl->gl_lockref.count++; if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); - __gfs2_glock_queue_work(gl, 0); + gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); cond_resched_lock(&lru_lock); } @@ -2194,7 +2189,7 @@ static void thaw_glock(struct gfs2_glock *gl) spin_lock(&gl->gl_lockref.lock); set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - __gfs2_glock_queue_work(gl, 0); + gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); } @@ -2213,7 +2208,7 @@ static void clear_glock(struct gfs2_glock *gl) gl->gl_lockref.count++; if (gl->gl_state != LM_ST_UNLOCKED) handle_callback(gl, LM_ST_UNLOCKED, 0, false); - __gfs2_glock_queue_work(gl, 0); + gfs2_glock_queue_work(gl, 0); } spin_unlock(&gl->gl_lockref.lock); } From fcd63086bc14b1689866d7b0c61958f6b7e48604 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 7 Apr 2024 12:55:44 +0200 Subject: [PATCH 22/27] gfs2: gfs2_freeze_unlock cleanup Function gfs2_freeze_unlock() is always called with &sdp->sd_freeze_gh as its argument, so clean up the code by passing in sdp instead. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/super.c | 12 ++++++------ fs/gfs2/util.c | 6 +++--- fs/gfs2/util.h | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index db0df091a6a7..a9f7f0d44227 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1289,7 +1289,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) error = gfs2_make_fs_rw(sdp); if (error) { - gfs2_freeze_unlock(&sdp->sd_freeze_gh); + gfs2_freeze_unlock(sdp); gfs2_destroy_threads(sdp); fs_err(sdp, "can't make FS RW: %d\n", error); goto fail_per_node; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 32a37daaccbe..713dd24082c9 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -358,7 +358,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) list_add(&lfcc->list, &list); } - gfs2_freeze_unlock(&sdp->sd_freeze_gh); + gfs2_freeze_unlock(sdp); error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOPID, @@ -382,7 +382,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) if (!error) goto out; /* success */ - gfs2_freeze_unlock(&sdp->sd_freeze_gh); + gfs2_freeze_unlock(sdp); relock_shared: error2 = gfs2_freeze_lock_shared(sdp); @@ -621,7 +621,7 @@ restart: /* Release stuff */ - gfs2_freeze_unlock(&sdp->sd_freeze_gh); + gfs2_freeze_unlock(sdp); iput(sdp->sd_jindex); iput(sdp->sd_statfs_inode); @@ -707,7 +707,7 @@ void gfs2_freeze_func(struct work_struct *work) if (error) goto freeze_failed; - gfs2_freeze_unlock(&sdp->sd_freeze_gh); + gfs2_freeze_unlock(sdp); set_bit(SDF_FROZEN, &sdp->sd_flags); error = gfs2_do_thaw(sdp); @@ -812,7 +812,7 @@ static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who) } atomic_inc(&sb->s_active); - gfs2_freeze_unlock(&sdp->sd_freeze_gh); + gfs2_freeze_unlock(sdp); error = gfs2_do_thaw(sdp); @@ -833,7 +833,7 @@ void gfs2_thaw_freeze_initiator(struct super_block *sb) if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) goto out; - gfs2_freeze_unlock(&sdp->sd_freeze_gh); + gfs2_freeze_unlock(sdp); out: mutex_unlock(&sdp->sd_freeze_mutex); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 09238604d741..af4758d8d894 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -109,10 +109,10 @@ int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp) return error; } -void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh) +void gfs2_freeze_unlock(struct gfs2_sbd *sdp) { - if (gfs2_holder_initialized(freeze_gh)) - gfs2_glock_dq_uninit(freeze_gh); + if (gfs2_holder_initialized(&sdp->sd_freeze_gh)) + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); } static void signal_our_withdraw(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index d8d62bf0424d..27d03b641024 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -150,7 +150,7 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, bool verbose); int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp); -void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh); +void gfs2_freeze_unlock(struct gfs2_sbd *sdp); #define gfs2_io_error(sdp) \ gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__) From f3851fed07327b6a19e7ff8c2106e2b424f44cca Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Apr 2024 18:23:48 +0100 Subject: [PATCH 23/27] gfs2: Convert gfs2_page_mkwrite() to use a folio Convert the incoming page to a folio and use it throughout, saving several calls to compound_head(). Also use 'pos' for file position rather than the ambiguous 'offset' and convert 'length' to type size_t in case we get some truly ridiculous sized folios in the future. This function should now be large-folio safe, but I may have missed something. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 59 +++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 992ca4effb50..649f808db9cc 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -376,23 +376,23 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) } /** - * gfs2_allocate_page_backing - Allocate blocks for a write fault - * @page: The (locked) page to allocate backing for + * gfs2_allocate_folio_backing - Allocate blocks for a write fault + * @folio: The (locked) folio to allocate backing for * @length: Size of the allocation * - * We try to allocate all the blocks required for the page in one go. This + * We try to allocate all the blocks required for the folio in one go. This * might fail for various reasons, so we keep trying until all the blocks to - * back this page are allocated. If some of the blocks are already allocated, + * back this folio are allocated. If some of the blocks are already allocated, * that is ok too. */ -static int gfs2_allocate_page_backing(struct page *page, unsigned int length) +static int gfs2_allocate_folio_backing(struct folio *folio, size_t length) { - u64 pos = page_offset(page); + u64 pos = folio_pos(folio); do { struct iomap iomap = { }; - if (gfs2_iomap_alloc(page->mapping->host, pos, length, &iomap)) + if (gfs2_iomap_alloc(folio->mapping->host, pos, length, &iomap)) return -EIO; if (length < iomap.length) @@ -414,16 +414,16 @@ static int gfs2_allocate_page_backing(struct page *page, unsigned int length) static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = {}; - u64 offset = page_offset(page); + u64 pos = folio_pos(folio); unsigned int data_blocks, ind_blocks, rblocks; vm_fault_t ret = VM_FAULT_LOCKED; struct gfs2_holder gh; - unsigned int length; + size_t length; loff_t size; int err; @@ -436,23 +436,23 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) goto out_uninit; } - /* Check page index against inode size */ + /* Check folio index against inode size */ size = i_size_read(inode); - if (offset >= size) { + if (pos >= size) { ret = VM_FAULT_SIGBUS; goto out_unlock; } - /* Update file times before taking page lock */ + /* Update file times before taking folio lock */ file_update_time(vmf->vma->vm_file); - /* page is wholly or partially inside EOF */ - if (size - offset < PAGE_SIZE) - length = size - offset; + /* folio is wholly or partially inside EOF */ + if (size - pos < folio_size(folio)) + length = size - pos; else - length = PAGE_SIZE; + length = folio_size(folio); - gfs2_size_hint(vmf->vma->vm_file, offset, length); + gfs2_size_hint(vmf->vma->vm_file, pos, length); set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); @@ -463,11 +463,12 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) */ if (!gfs2_is_stuffed(ip) && - !gfs2_write_alloc_required(ip, offset, length)) { - lock_page(page); - if (!PageUptodate(page) || page->mapping != inode->i_mapping) { + !gfs2_write_alloc_required(ip, pos, length)) { + folio_lock(folio); + if (!folio_test_uptodate(folio) || + folio->mapping != inode->i_mapping) { ret = VM_FAULT_NOPAGE; - unlock_page(page); + folio_unlock(folio); } goto out_unlock; } @@ -504,7 +505,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) goto out_trans_fail; } - /* Unstuff, if required, and allocate backing blocks for page */ + /* Unstuff, if required, and allocate backing blocks for folio */ if (gfs2_is_stuffed(ip)) { err = gfs2_unstuff_dinode(ip); if (err) { @@ -513,22 +514,22 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) } } - lock_page(page); + folio_lock(folio); /* If truncated, we must retry the operation, we may have raced * with the glock demotion code. */ - if (!PageUptodate(page) || page->mapping != inode->i_mapping) { + if (!folio_test_uptodate(folio) || folio->mapping != inode->i_mapping) { ret = VM_FAULT_NOPAGE; goto out_page_locked; } - err = gfs2_allocate_page_backing(page, length); + err = gfs2_allocate_folio_backing(folio, length); if (err) ret = vmf_fs_error(err); out_page_locked: if (ret != VM_FAULT_LOCKED) - unlock_page(page); + folio_unlock(folio); out_trans_end: gfs2_trans_end(sdp); out_trans_fail: @@ -540,8 +541,8 @@ out_unlock: out_uninit: gfs2_holder_uninit(&gh); if (ret == VM_FAULT_LOCKED) { - set_page_dirty(page); - wait_for_stable_page(page); + folio_mark_dirty(folio); + folio_wait_stable(folio); } sb_end_pagefault(inode->i_sb); return ret; From 75377ae754c93a312e8430e9c159db3273bb679c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Apr 2024 18:23:50 +0100 Subject: [PATCH 24/27] gfs2: Simplify gfs2_read_super Use submit_bio_wait() instead of hand-rolling our own synchronous wait. Also allocate the BIO on the stack since we're not deep in the call stack at this point. There's no need to kmap the page, since it isn't allocated from HIGHMEM. Turn the GFP_NOFS allocation into GFP_KERNEL; if the page allocator enters reclaim, we cannot be called as the filesystem has not yet been initialised and so has no pages to reclaim. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 46 +++++++++++++------------------------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index a9f7f0d44227..10bf46867adc 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -185,22 +185,10 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) return 0; } -static void end_bio_io_page(struct bio *bio) -{ - struct page *page = bio->bi_private; - - if (!bio->bi_status) - SetPageUptodate(page); - else - pr_warn("error %d reading superblock\n", bio->bi_status); - unlock_page(page); -} - -static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf) +static void gfs2_sb_in(struct gfs2_sbd *sdp, const struct gfs2_sb *str) { struct gfs2_sb_host *sb = &sdp->sd_sb; struct super_block *s = sdp->sd_vfs; - const struct gfs2_sb *str = buf; sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); sb->sb_type = be32_to_cpu(str->sb_header.mh_type); @@ -240,34 +228,26 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf) static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) { struct super_block *sb = sdp->sd_vfs; - struct gfs2_sb *p; struct page *page; - struct bio *bio; + struct bio_vec bvec; + struct bio bio; + int err; - page = alloc_page(GFP_NOFS); + page = alloc_page(GFP_KERNEL); if (unlikely(!page)) return -ENOMEM; - ClearPageUptodate(page); - ClearPageDirty(page); - lock_page(page); + bio_init(&bio, sb->s_bdev, &bvec, 1, REQ_OP_READ | REQ_META); + bio.bi_iter.bi_sector = sector * (sb->s_blocksize >> 9); + __bio_add_page(&bio, page, PAGE_SIZE, 0); - bio = bio_alloc(sb->s_bdev, 1, REQ_OP_READ | REQ_META, GFP_NOFS); - bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9); - __bio_add_page(bio, page, PAGE_SIZE, 0); - - bio->bi_end_io = end_bio_io_page; - bio->bi_private = page; - submit_bio(bio); - wait_on_page_locked(page); - bio_put(bio); - if (!PageUptodate(page)) { + err = submit_bio_wait(&bio); + if (err) { + pr_warn("error %d reading superblock\n", err); __free_page(page); - return -EIO; + return err; } - p = kmap(page); - gfs2_sb_in(sdp, p); - kunmap(page); + gfs2_sb_in(sdp, page_address(page)); __free_page(page); return gfs2_check_sb(sdp, silent); } From b844048011d3c771e7f8ae9354450f4e0c5c041a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Apr 2024 18:23:49 +0100 Subject: [PATCH 25/27] gfs2: Add a migrate_folio operation for journalled files For journalled data, folio migration currently works by writing the folio back, freeing the folio and faulting the new folio back in. We can bypass that by telling the migration code to migrate the buffer_heads attached to our folios. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 974aca9c8ea8..10d5acd3f742 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -116,8 +116,7 @@ static int gfs2_write_jdata_folio(struct folio *folio, * @folio: The folio to write * @wbc: The writeback control * - * This is shared between writepage and writepages and implements the - * core of the writepage operation. If a transaction is required then + * Implements the core of write back. If a transaction is required then * the checked flag will have been set and the transaction will have * already been started before this is called. */ @@ -755,6 +754,7 @@ static const struct address_space_operations gfs2_jdata_aops = { .readahead = gfs2_readahead, .dirty_folio = jdata_dirty_folio, .bmap = gfs2_bmap, + .migrate_folio = buffer_migrate_folio, .invalidate_folio = gfs2_invalidate_folio, .release_folio = gfs2_release_folio, .is_partially_uptodate = block_is_partially_uptodate, From 50fabd42cb2fa02b727e7786fb80f6e172334066 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Apr 2024 18:23:51 +0100 Subject: [PATCH 26/27] gfs2: Convert gfs2_aspace_writepage() to use a folio Convert the incoming struct page to a folio and use it throughout. Saves six calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andreas Gruenbacher --- fs/gfs2/meta_io.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index f814054c8cd0..2b26e8d529aa 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -32,14 +32,14 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); struct buffer_head *bh, *head; int nr_underway = 0; blk_opf_t write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc); - BUG_ON(!PageLocked(page)); - BUG_ON(!page_has_buffers(page)); + BUG_ON(!folio_test_locked(folio)); - head = page_buffers(page); + head = folio_buffers(folio); bh = head; do { @@ -55,7 +55,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb if (wbc->sync_mode != WB_SYNC_NONE) { lock_buffer(bh); } else if (!trylock_buffer(bh)) { - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); continue; } if (test_clear_buffer_dirty(bh)) { @@ -69,8 +69,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb * The page and its buffers are protected by PageWriteback(), so we can * drop the bh refcounts early. */ - BUG_ON(PageWriteback(page)); - set_page_writeback(page); + BUG_ON(folio_test_writeback(folio)); + folio_start_writeback(folio); do { struct buffer_head *next = bh->b_this_page; @@ -80,10 +80,10 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb } bh = next; } while (bh != head); - unlock_page(page); + folio_unlock(folio); if (nr_underway == 0) - end_page_writeback(page); + folio_end_writeback(folio); return 0; } From c1c53c26e3380a79b65e6b53dac6c3c797a7e8f1 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 May 2024 11:04:49 +0200 Subject: [PATCH 27/27] gfs2: make timeout values more explicit 'timeout' is a vague name for the return value of wait_event_*_timeout because it actually returns the time left. Because the variable is never used later, just drop the return value. Since variable 'timeout' is then only used to carry a fixed timeout value, drop this in favor of a fixed function argument as in the other call to wait_event_timeout() above. Signed-off-by: Wolfram Sang Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 713dd24082c9..7a5aedfcd52a 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1262,7 +1262,6 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder *gh = &ip->i_iopen_gh; - long timeout = 5 * HZ; int error; gh->gh_flags |= GL_NOCACHE; @@ -1293,10 +1292,10 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) if (error) return false; - timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait, + wait_event_interruptible_timeout(sdp->sd_async_glock_wait, !test_bit(HIF_WAIT, &gh->gh_iflags) || test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags), - timeout); + 5 * HZ); if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) { gfs2_glock_dq(gh); return false;