From 820ce8ed53ce2111aa5171f7349f289d7e9d0693 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 21 Aug 2024 22:02:05 +0200 Subject: [PATCH 01/21] gfs2: Rename GLF_VERIFY_EVICT to GLF_VERIFY_DELETE Rename the GLF_VERIFY_EVICT flag to GLF_VERIFY_DELETE: that flag indicates that we want to delete an inode / verify that it has been deleted. To match, rename gfs2_queue_verify_evict() to gfs2_queue_verify_delete(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 14 +++++++------- fs/gfs2/incore.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 269c3bc7fced..5addf4ebf33b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1013,11 +1013,11 @@ bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) &gl->gl_delete, 0); } -static bool gfs2_queue_verify_evict(struct gfs2_glock *gl) +static bool gfs2_queue_verify_delete(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (test_and_set_bit(GLF_VERIFY_EVICT, &gl->gl_flags)) + if (test_and_set_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) return false; return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 5 * HZ); @@ -1052,19 +1052,19 @@ static void delete_work_func(struct work_struct *work) if (gfs2_try_evict(gl)) { if (test_bit(SDF_KILL, &sdp->sd_flags)) goto out; - if (gfs2_queue_verify_evict(gl)) + if (gfs2_queue_verify_delete(gl)) return; } goto out; } - if (test_and_clear_bit(GLF_VERIFY_EVICT, &gl->gl_flags)) { + if (test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) { inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, GFS2_BLKST_UNLINKED); if (IS_ERR(inode)) { if (PTR_ERR(inode) == -EAGAIN && !test_bit(SDF_KILL, &sdp->sd_flags) && - gfs2_queue_verify_evict(gl)) + gfs2_queue_verify_delete(gl)) return; } else { d_prune_aliases(inode); @@ -2118,7 +2118,7 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) void gfs2_cancel_delete_work(struct gfs2_glock *gl) { clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags); - clear_bit(GLF_VERIFY_EVICT, &gl->gl_flags); + clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); if (cancel_delayed_work(&gl->gl_delete)) gfs2_glock_put(gl); } @@ -2371,7 +2371,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'N'; if (test_bit(GLF_TRY_TO_EVICT, gflags)) *p++ = 'e'; - if (test_bit(GLF_VERIFY_EVICT, gflags)) + if (test_bit(GLF_VERIFY_DELETE, gflags)) *p++ = 'E'; *p = 0; return buf; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index aa4ef67a34e0..bd1348bff90e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -329,7 +329,7 @@ enum { GLF_BLOCKING = 15, GLF_UNLOCKED = 16, /* Wait for glock to be unlocked */ GLF_TRY_TO_EVICT = 17, /* iopen glocks only */ - GLF_VERIFY_EVICT = 18, /* iopen glocks only */ + GLF_VERIFY_DELETE = 18, /* iopen glocks only */ }; struct gfs2_glock { From 1072b3aa6863bc4d91006038b032bfb4dcc98dec Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 16 Sep 2024 15:42:57 +0200 Subject: [PATCH 02/21] gfs2: Initialize gl_no_formal_ino earlier Set gl_no_formal_ino of the iopen glock to the generation of the associated inode (ip->i_no_formal_ino) as soon as that value is known. This saves us from setting it later, possibly repeatedly, when queuing GLF_VERIFY_DELETE work. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 1 - fs/gfs2/glops.c | 9 ++++++++- fs/gfs2/inode.c | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5addf4ebf33b..96894243d03b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -980,7 +980,6 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) ip = NULL; spin_unlock(&gl->gl_lockref.lock); if (ip) { - gl->gl_no_formal_ino = ip->i_no_formal_ino; set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); d_prune_aliases(&ip->i_inode); iput(&ip->i_inode); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 95d8081681dc..dbc444bfc630 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -494,11 +494,18 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) static int inode_go_instantiate(struct gfs2_glock *gl) { struct gfs2_inode *ip = gl->gl_object; + struct gfs2_glock *io_gl; + int error; if (!ip) /* no inode to populate - read it in later */ return 0; - return gfs2_inode_refresh(ip); + error = gfs2_inode_refresh(ip); + if (error) + return error; + io_gl = ip->i_iopen_gh.gh_gl; + io_gl->gl_no_formal_ino = ip->i_no_formal_ino; + return 0; } static int inode_go_held(struct gfs2_holder *gh) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 1b95db2c3aac..6fbbaaad1cd0 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -750,6 +750,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_free_inode; gfs2_cancel_delete_work(io_gl); + io_gl->gl_no_formal_ino = ip->i_no_formal_ino; retry: error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); From 160bc9555d8654464cbbd7bb1f6687048471d2f6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 24 Sep 2024 18:38:00 +0200 Subject: [PATCH 03/21] gfs2: Allow immediate GLF_VERIFY_DELETE work Add an argument to gfs2_queue_verify_delete() that allows it to queue GLF_VERIFY_DELETE work for immediate execution. This is used in the next patch. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 96894243d03b..7119037a866c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1012,14 +1012,15 @@ bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) &gl->gl_delete, 0); } -static bool gfs2_queue_verify_delete(struct gfs2_glock *gl) +static bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + unsigned long delay; if (test_and_set_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) return false; - return queue_delayed_work(sdp->sd_delete_wq, - &gl->gl_delete, 5 * HZ); + delay = later ? 5 * HZ : 0; + return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, delay); } static void delete_work_func(struct work_struct *work) @@ -1051,7 +1052,7 @@ static void delete_work_func(struct work_struct *work) if (gfs2_try_evict(gl)) { if (test_bit(SDF_KILL, &sdp->sd_flags)) goto out; - if (gfs2_queue_verify_delete(gl)) + if (gfs2_queue_verify_delete(gl, true)) return; } goto out; @@ -1063,7 +1064,7 @@ static void delete_work_func(struct work_struct *work) if (IS_ERR(inode)) { if (PTR_ERR(inode) == -EAGAIN && !test_bit(SDF_KILL, &sdp->sd_flags) && - gfs2_queue_verify_delete(gl)) + gfs2_queue_verify_delete(gl, true)) return; } else { d_prune_aliases(inode); From 7c6f714d88475ceae5342264858a641eafa19632 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 26 Aug 2024 20:06:21 +0200 Subject: [PATCH 04/21] gfs2: Fix unlinked inode cleanup Before commit f0e56edc2ec7 ("gfs2: Split the two kinds of glock "delete" work"), function delete_work_func() was used to trigger the eviction of in-memory inodes from remote as well as deleting unlinked inodes at a later point. These two kinds of work were then split into two kinds of work, and the two places in the code were deferred deletion of inodes is required accidentally ended up queuing the wrong kind of work. This caused unlinked inodes to be left behind, which could in the worst case fill up filesystems and require a filesystem check to recover. Fix that by queuing the right kind of work in try_rgrp_unlink() and gfs2_drop_inode(). Fixes: f0e56edc2ec7 ("gfs2: Split the two kinds of glock "delete" work") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 +- fs/gfs2/glock.h | 1 + fs/gfs2/rgrp.c | 2 +- fs/gfs2/super.c | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7119037a866c..9273ec5345ed 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1012,7 +1012,7 @@ bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) &gl->gl_delete, 0); } -static bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) +bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned long delay; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index adf0091cc98f..63e101d448e9 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -245,6 +245,7 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); void gfs2_glock_complete(struct gfs2_glock *gl, int ret); bool gfs2_queue_try_to_evict(struct gfs2_glock *gl); +bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later); void gfs2_cancel_delete_work(struct gfs2_glock *gl); void gfs2_flush_delete_work(struct gfs2_sbd *sdp); void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 29c772816765..539303129715 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1879,7 +1879,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip */ ip = gl->gl_object; - if (ip || !gfs2_queue_try_to_evict(gl)) + if (ip || !gfs2_queue_verify_delete(gl, false)) gfs2_glock_put(gl); else found++; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6678060ed4d2..e22c1edc32b3 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1045,7 +1045,7 @@ static int gfs2_drop_inode(struct inode *inode) struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; gfs2_glock_hold(gl); - if (!gfs2_queue_try_to_evict(gl)) + if (!gfs2_queue_verify_delete(gl, true)) gfs2_glock_put_async(gl); return 0; } From f9417fcfca3c5e30a0b961e7250fab92cfa5d123 Mon Sep 17 00:00:00 2001 From: Qianqiang Liu Date: Mon, 21 Oct 2024 22:58:01 +0200 Subject: [PATCH 05/21] KMSAN: uninit-value in inode_go_dump (5) When mounting of a corrupted disk image fails, the error message printed can reference uninitialized inode fields. To prevent that from happening, always initialize those fields. Reported-by: syzbot+aa0730b0a42646eb1359@syzkaller.appspotmail.com Signed-off-by: Qianqiang Liu Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e22c1edc32b3..b9cef63c7871 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1537,11 +1537,13 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) if (!ip) return NULL; ip->i_no_addr = 0; + ip->i_no_formal_ino = 0; ip->i_flags = 0; ip->i_gl = NULL; gfs2_holder_mark_uninitialized(&ip->i_iopen_gh); memset(&ip->i_res, 0, sizeof(ip->i_res)); RB_CLEAR_NODE(&ip->i_res.rs_node); + ip->i_diskflags = 0; ip->i_rahead = 0; return &ip->i_inode; } From ee51baa817eec7c5182c1e4450c4d1e8469faa96 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 9 Sep 2024 23:38:59 +0200 Subject: [PATCH 06/21] gfs2: Faster gfs2_upgrade_iopen_glock wakeups Move function needs_demote() to glock.h and rename it to glock_needs_demote(). In handle_callback(), wake up the glock when setting the GLF_PENDING_DEMOTE flag as well. (Setting the GLF_DEMOTE flag already triggered a wake-up.) With that, check for glock_needs_demote() in gfs2_upgrade_iopen_glock() to wake up when either of those flags is set for the inode glock: the faster we can react to contention, the better. The GLF_PENDING_DEMOTE flag is only used for inode glocks (see gfs2_glock_cb()) so it's okay to only check for the GLF_DEMOTE flag in gfs2_drop_inode(). Still, using glock_needs_demote() there as well makes the code a little easier to read. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 23 +++++++---------------- fs/gfs2/glock.h | 6 ++++++ fs/gfs2/super.c | 4 ++-- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9273ec5345ed..4567c8c60a03 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -563,11 +563,11 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) gl->gl_tchange = jiffies; } -static void gfs2_set_demote(struct gfs2_glock *gl) +static void gfs2_set_demote(int nr, struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - set_bit(GLF_DEMOTE, &gl->gl_flags); + set_bit(nr, &gl->gl_flags); smp_mb(); wake_up(&sdp->sd_async_glock_wait); } @@ -1101,7 +1101,7 @@ static void glock_work_func(struct work_struct *work) if (!delay) { clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); - gfs2_set_demote(gl); + gfs2_set_demote(GLF_DEMOTE, gl); } } run_queue(gl, 0); @@ -1443,10 +1443,7 @@ int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) static void request_demote(struct gfs2_glock *gl, unsigned int state, unsigned long delay, bool remote) { - if (delay) - set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); - else - gfs2_set_demote(gl); + gfs2_set_demote(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, gl); if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { gl->gl_demote_state = state; gl->gl_demote_time = jiffies; @@ -1636,12 +1633,6 @@ int gfs2_glock_poll(struct gfs2_holder *gh) return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; } -static inline bool needs_demote(struct gfs2_glock *gl) -{ - return (test_bit(GLF_DEMOTE, &gl->gl_flags) || - test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)); -} - static void __gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; @@ -1650,8 +1641,8 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) /* * This holder should not be cached, so mark it for demote. - * Note: this should be done before the check for needs_demote - * below. + * Note: this should be done before the glock_needs_demote + * check below. */ if (gh->gh_flags & GL_NOCACHE) request_demote(gl, LM_ST_UNLOCKED, 0, false); @@ -1664,7 +1655,7 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) * If there hasn't been a demote request we are done. * (Let the remaining holders, if any, keep holding it.) */ - if (!needs_demote(gl)) { + if (!glock_needs_demote(gl)) { if (list_empty(&gl->gl_holders)) fast_path = 1; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 63e101d448e9..c171f745650f 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -285,4 +285,10 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh) void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); +static inline bool glock_needs_demote(struct gfs2_glock *gl) +{ + return (test_bit(GLF_DEMOTE, &gl->gl_flags) || + test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)); +} + #endif /* __GLOCK_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b9cef63c7871..ea9af6d4a042 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1030,7 +1030,7 @@ static int gfs2_drop_inode(struct inode *inode) if (inode->i_nlink && gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; - if (test_bit(GLF_DEMOTE, &gl->gl_flags)) + if (glock_needs_demote(gl)) clear_nlink(inode); } @@ -1294,7 +1294,7 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) wait_event_interruptible_timeout(sdp->sd_async_glock_wait, !test_bit(HIF_WAIT, &gh->gh_iflags) || - test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags), + glock_needs_demote(ip->i_gl), 5 * HZ); if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) { gfs2_glock_dq(gh); From 9fb794aac6ddd08a9c4982372250f06137696e90 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 12 Sep 2024 22:22:12 +0200 Subject: [PATCH 07/21] gfs2: Rename GIF_{DEFERRED -> DEFER}_DELETE The GIF_DEFERRED_DELETE flag indicates an action that gfs2_evict_inode() should take, so rename the flag to GIF_DEFER_DELETE to clarify. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 4 ++-- fs/gfs2/incore.h | 2 +- fs/gfs2/super.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 4567c8c60a03..c095ff2efe1d 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -980,7 +980,7 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) ip = NULL; spin_unlock(&gl->gl_lockref.lock); if (ip) { - set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); + set_bit(GIF_DEFER_DELETE, &ip->i_flags); d_prune_aliases(&ip->i_inode); iput(&ip->i_inode); @@ -988,7 +988,7 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) spin_lock(&gl->gl_lockref.lock); ip = gl->gl_object; if (ip) { - clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags); + clear_bit(GIF_DEFER_DELETE, &ip->i_flags); if (!igrab(&ip->i_inode)) ip = NULL; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index bd1348bff90e..4e19cce3d906 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -376,7 +376,7 @@ enum { GIF_SW_PAGED = 3, GIF_FREE_VFS_INODE = 5, GIF_GLOP_PENDING = 6, - GIF_DEFERRED_DELETE = 7, + GIF_DEFER_DELETE = 7, }; struct gfs2_inode { diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ea9af6d4a042..c85c07b30b20 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1324,7 +1324,7 @@ static enum dinode_demise evict_should_delete(struct inode *inode, if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) goto should_delete; - if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags)) + if (test_bit(GIF_DEFER_DELETE, &ip->i_flags)) return SHOULD_DEFER_EVICTION; /* Deletes should never happen under memory pressure anymore. */ From c79ba4be351a06e0ac4c51143a83023bb37888d6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 14 Sep 2024 00:37:03 +0200 Subject: [PATCH 08/21] gfs2: Rename dinode_demise to evict_behavior Rename enum dinode_demise to evict_behavior and its items SHOULD_DELETE_DINODE to EVICT_SHOULD_DELETE, SHOULD_NOT_DELETE_DINODE to EVICT_SHOULD_SKIP_DELETE, and SHOULD_DEFER_EVICTION to EVICT_SHOULD_DEFER_DELETE. In gfs2_evict_inode(), add a separate variable of type enum evict_behavior instead of implicitly casting to int. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index c85c07b30b20..f43b238ccaf1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -44,10 +44,10 @@ #include "xattr.h" #include "lops.h" -enum dinode_demise { - SHOULD_DELETE_DINODE, - SHOULD_NOT_DELETE_DINODE, - SHOULD_DEFER_EVICTION, +enum evict_behavior { + EVICT_SHOULD_DELETE, + EVICT_SHOULD_SKIP_DELETE, + EVICT_SHOULD_DEFER_DELETE, }; /** @@ -1313,8 +1313,8 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) * * Returns: the fate of the dinode */ -static enum dinode_demise evict_should_delete(struct inode *inode, - struct gfs2_holder *gh) +static enum evict_behavior evict_should_delete(struct inode *inode, + struct gfs2_holder *gh) { struct gfs2_inode *ip = GFS2_I(inode); struct super_block *sb = inode->i_sb; @@ -1325,11 +1325,11 @@ static enum dinode_demise evict_should_delete(struct inode *inode, goto should_delete; if (test_bit(GIF_DEFER_DELETE, &ip->i_flags)) - return SHOULD_DEFER_EVICTION; + return EVICT_SHOULD_DEFER_DELETE; /* Deletes should never happen under memory pressure anymore. */ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) - return SHOULD_DEFER_EVICTION; + return EVICT_SHOULD_DEFER_DELETE; /* Must not read inode block until block type has been verified */ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh); @@ -1337,34 +1337,34 @@ static enum dinode_demise evict_should_delete(struct inode *inode, glock_clear_object(ip->i_iopen_gh.gh_gl, ip); ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&ip->i_iopen_gh); - return SHOULD_DEFER_EVICTION; + return EVICT_SHOULD_DEFER_DELETE; } if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino)) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED); if (ret) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; ret = gfs2_instantiate(gh); if (ret) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; /* * The inode may have been recreated in the meantime. */ if (inode->i_nlink) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; should_delete: if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { if (!gfs2_upgrade_iopen_glock(inode)) { gfs2_holder_uninit(&ip->i_iopen_gh); - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; } } - return SHOULD_DELETE_DINODE; + return EVICT_SHOULD_DELETE; } /** @@ -1475,6 +1475,7 @@ static void gfs2_evict_inode(struct inode *inode) struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; + enum evict_behavior behavior; int ret; if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) @@ -1489,10 +1490,10 @@ static void gfs2_evict_inode(struct inode *inode) goto out; gfs2_holder_mark_uninitialized(&gh); - ret = evict_should_delete(inode, &gh); - if (ret == SHOULD_DEFER_EVICTION) + behavior = evict_should_delete(inode, &gh); + if (behavior == EVICT_SHOULD_DEFER_DELETE) goto out; - if (ret == SHOULD_DELETE_DINODE) + if (behavior == EVICT_SHOULD_DELETE) ret = evict_unlinked_inode(inode); else ret = evict_linked_inode(inode); From a94dafe87d5fdded799fc25b82b123fb93959421 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 13 Sep 2024 01:17:40 +0200 Subject: [PATCH 09/21] gfs2: Return enum evict_behavior from gfs2_upgrade_iopen_glock In case an iopen glock cannot be upgraded, function gfs2_upgrade_iopen_glock() needs to communicate to gfs2_evict_inode() whether deleting the inode should be deferred or skipped altogether. Change the function to return the appropriate enum evict_behavior value to indicate that. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index f43b238ccaf1..46d325c2ab88 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1257,7 +1257,7 @@ static void gfs2_glock_put_eventually(struct gfs2_glock *gl) gfs2_glock_put(gl); } -static bool gfs2_upgrade_iopen_glock(struct inode *inode) +static enum evict_behavior gfs2_upgrade_iopen_glock(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -1290,7 +1290,7 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh); error = gfs2_glock_nq(gh); if (error) - return false; + return EVICT_SHOULD_SKIP_DELETE; wait_event_interruptible_timeout(sdp->sd_async_glock_wait, !test_bit(HIF_WAIT, &gh->gh_iflags) || @@ -1298,9 +1298,14 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) 5 * HZ); if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) { gfs2_glock_dq(gh); - return false; + if (glock_needs_demote(ip->i_gl)) + return EVICT_SHOULD_SKIP_DELETE; + return EVICT_SHOULD_DEFER_DELETE; } - return gfs2_glock_holder_ready(gh) == 0; + error = gfs2_glock_holder_ready(gh); + if (error) + return EVICT_SHOULD_SKIP_DELETE; + return EVICT_SHOULD_DELETE; } /** @@ -1359,9 +1364,12 @@ static enum evict_behavior evict_should_delete(struct inode *inode, should_delete: if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { - if (!gfs2_upgrade_iopen_glock(inode)) { + enum evict_behavior behavior = + gfs2_upgrade_iopen_glock(inode); + + if (behavior != EVICT_SHOULD_DELETE) { gfs2_holder_uninit(&ip->i_iopen_gh); - return EVICT_SHOULD_SKIP_DELETE; + return behavior; } } return EVICT_SHOULD_DELETE; From b4100457d02d90149129ba2230130954a03fdf0b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 22 Aug 2024 19:33:51 +0200 Subject: [PATCH 10/21] gfs2: Minor delete_work_func cleanup Move those definitions into the the scope in which they are used. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c095ff2efe1d..9ec305b8d4d4 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1028,8 +1028,6 @@ static void delete_work_func(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete); struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct inode *inode; - u64 no_addr = gl->gl_name.ln_number; if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) { /* @@ -1059,6 +1057,9 @@ static void delete_work_func(struct work_struct *work) } if (test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) { + u64 no_addr = gl->gl_name.ln_number; + struct inode *inode; + inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, GFS2_BLKST_UNLINKED); if (IS_ERR(inode)) { From 0baa10b60cddb587a1a252a8db76b0cea439d1be Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 12 Sep 2024 23:29:41 +0200 Subject: [PATCH 11/21] gfs2: Clean up delete work processing Function delete_work_func() was previously assuming that the GLF_TRY_TO_EVICT and GLF_VERIFY_DELETE flags won't both be set at the same time, but there probably are races in which that can happen, so handle that case correctly. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9ec305b8d4d4..95f082f13a8c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1028,6 +1028,7 @@ static void delete_work_func(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete); struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) { /* @@ -1048,15 +1049,15 @@ static void delete_work_func(struct work_struct *work) * step entirely. */ if (gfs2_try_evict(gl)) { - if (test_bit(SDF_KILL, &sdp->sd_flags)) - goto out; - if (gfs2_queue_verify_delete(gl, true)) - return; + if (!test_bit(SDF_KILL, &sdp->sd_flags)) { + gfs2_glock_hold(gl); + if (!gfs2_queue_verify_delete(gl, true)) + gfs2_glock_put(gl); + } } - goto out; } - if (test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) { + if (verify_delete) { u64 no_addr = gl->gl_name.ln_number; struct inode *inode; @@ -1073,7 +1074,6 @@ static void delete_work_func(struct work_struct *work) } } -out: gfs2_glock_put(gl); } From 8c21c2c71e668a5eed9fe9981a2306f9178e6c3e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 12 Sep 2024 22:32:46 +0200 Subject: [PATCH 12/21] gfs2: Call gfs2_queue_verify_delete from gfs2_evict_inode Move calls to gfs2_queue_verify_delete() into gfs2_evict_inode(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 13 ++----------- fs/gfs2/super.c | 9 ++++++++- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 95f082f13a8c..3736e7f3b381 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -959,10 +959,9 @@ static void gfs2_glock_poke(struct gfs2_glock *gl) gfs2_holder_uninit(&gh); } -static bool gfs2_try_evict(struct gfs2_glock *gl) +static void gfs2_try_evict(struct gfs2_glock *gl) { struct gfs2_inode *ip; - bool evicted = false; /* * If there is contention on the iopen glock and we have an inode, try @@ -997,9 +996,7 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) gfs2_glock_poke(ip->i_gl); iput(&ip->i_inode); } - evicted = !ip; } - return evicted; } bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) @@ -1048,13 +1045,7 @@ static void delete_work_func(struct work_struct *work) * care about compatibility with such nodes, we can skip this * step entirely. */ - if (gfs2_try_evict(gl)) { - if (!test_bit(SDF_KILL, &sdp->sd_flags)) { - gfs2_glock_hold(gl); - if (!gfs2_queue_verify_delete(gl, true)) - gfs2_glock_put(gl); - } - } + gfs2_try_evict(gl); } if (verify_delete) { diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 46d325c2ab88..340bc21de218 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1499,8 +1499,15 @@ static void gfs2_evict_inode(struct inode *inode) gfs2_holder_mark_uninitialized(&gh); behavior = evict_should_delete(inode, &gh); - if (behavior == EVICT_SHOULD_DEFER_DELETE) + if (behavior == EVICT_SHOULD_DEFER_DELETE && + !test_bit(SDF_KILL, &sdp->sd_flags)) { + struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl; + + gfs2_glock_hold(io_gl); + if (!gfs2_queue_verify_delete(io_gl, true)) + gfs2_glock_put(io_gl); goto out; + } if (behavior == EVICT_SHOULD_DELETE) ret = evict_unlinked_inode(inode); else From a6033333ccce01ecada39b3ddabc03fd967e60c0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 23 Sep 2024 22:27:48 +0200 Subject: [PATCH 13/21] gfs2: Update to the evict / remote delete documentation Try to be a bit more clear and remove some duplications. We cannot actually get rid of the verification step eventually, so remove the comment saying so. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 31 ++++++++----------------------- fs/gfs2/super.c | 6 +++--- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 3736e7f3b381..8fff36846145 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -965,13 +965,16 @@ static void gfs2_try_evict(struct gfs2_glock *gl) /* * If there is contention on the iopen glock and we have an inode, try - * to grab and release the inode so that it can be evicted. This will - * allow the remote node to go ahead and delete the inode without us - * having to do it, which will avoid rgrp glock thrashing. + * to grab and release the inode so that it can be evicted. The + * GIF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode + * should not be deleted locally. This will allow the remote node to + * go ahead and delete the inode without us having to do it, which will + * avoid rgrp glock thrashing. * * The remote node is likely still holding the corresponding inode * glock, so it will run before we get to verify that the delete has - * happened below. + * happened below. (Verification is triggered by the call to + * gfs2_queue_verify_delete() in gfs2_evict_inode().) */ spin_lock(&gl->gl_lockref.lock); ip = gl->gl_object; @@ -1027,26 +1030,8 @@ static void delete_work_func(struct work_struct *work) struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); - if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) { - /* - * If we can evict the inode, give the remote node trying to - * delete the inode some time before verifying that the delete - * has happened. Otherwise, if we cause contention on the inode glock - * immediately, the remote node will think that we still have - * the inode in use, and so it will give up waiting. - * - * If we can't evict the inode, signal to the remote node that - * the inode is still in use. We'll later try to delete the - * inode locally in gfs2_evict_inode. - * - * FIXME: We only need to verify that the remote node has - * deleted the inode because nodes before this remote delete - * rework won't cooperate. At a later time, when we no longer - * care about compatibility with such nodes, we can skip this - * step entirely. - */ + if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) gfs2_try_evict(gl); - } if (verify_delete) { u64 no_addr = gl->gl_name.ln_number; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 340bc21de218..40158396e8db 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1272,9 +1272,9 @@ static enum evict_behavior gfs2_upgrade_iopen_glock(struct inode *inode) * exclusive access to the iopen glock here. * * Otherwise, the other nodes holding the lock will be notified about - * our locking request. If they do not have the inode open, they are - * expected to evict the cached inode and release the lock, allowing us - * to proceed. + * our locking request (see iopen_go_callback()). If they do not have + * the inode open, they are expected to evict the cached inode and + * release the lock, allowing us to proceed. * * Otherwise, if they cannot evict the inode, they are expected to poke * the inode glock (note: not the iopen glock). We will notice that From f6ca45e3d2b97ddb4bfcbbe44d1dd18374cd6f85 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 13 Sep 2024 22:56:53 +0200 Subject: [PATCH 14/21] gfs2: Use mod_delayed_work in gfs2_queue_try_to_evict In the unlikely case that we're trying to queue GLF_TRY_TO_EVICT work for an inode that already has GLF_VERIFY_DELETE work queued, we want to make sure that the GLF_TRY_TO_EVICT work gets scheduled immediately instead of waiting for the delayed work timer to expire. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8fff36846145..5455f4dd14a3 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1008,8 +1008,7 @@ bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) if (test_and_set_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) return false; - return queue_delayed_work(sdp->sd_delete_wq, - &gl->gl_delete, 0); + return !mod_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 0); } bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) From 085e423b4d51dfe71e1967c9e508d1cb845063d3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 13 Sep 2024 23:07:31 +0200 Subject: [PATCH 15/21] gfs2: Randomize GLF_VERIFY_DELETE work delay Randomize the delay of GLF_VERIFY_DELETE work. This avoids thundering herd problems when multiple nodes schedule that kind of work in response to an inode being unlinked remotely. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5455f4dd14a3..46a6e97cd930 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -1018,7 +1019,7 @@ bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) if (test_and_set_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) return false; - delay = later ? 5 * HZ : 0; + delay = later ? HZ + get_random_long() % (HZ * 9) : 0; return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, delay); } From 0c5bee608fbbd970e46aade6e57a0fdbbaa4621e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 13 Sep 2024 23:10:50 +0200 Subject: [PATCH 16/21] gfs2: Use get_random_u32 in gfs2_orlov_skip Use get_random_u32() instead of get_random_bytes() to remove the last remaining call to get_random_bytes(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 539303129715..b14e54b38ee8 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1987,10 +1987,8 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) { const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - u32 skip; - get_random_bytes(&skip, sizeof(skip)); - return skip % sdp->sd_rgrps; + return get_random_u32() % sdp->sd_rgrps; } static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) From 70cddf16cbfbb6f7fb4d68bb62765850a921450d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 16 Sep 2024 14:02:41 +0200 Subject: [PATCH 17/21] gfs2: Make gfs2_inode_refresh static Function gfs2_inode_refresh() is only used in fs/gfs2/glops.c. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 2 +- fs/gfs2/inode.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index dbc444bfc630..eb4714f299ef 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -470,7 +470,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) * Returns: errno */ -int gfs2_inode_refresh(struct gfs2_inode *ip) +static int gfs2_inode_refresh(struct gfs2_inode *ip) { struct buffer_head *dibh; int error; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index fd15d1c6b6fb..9e5e1622d50a 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -93,8 +93,6 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 no_formal_ino, unsigned int blktype); -int gfs2_inode_refresh(struct gfs2_inode *ip); - struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root); int gfs2_permission(struct mnt_idmap *idmap, From 03ff3781bf6c149554d88e7b702a3abd5e400dc0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Oct 2024 14:10:47 +0200 Subject: [PATCH 18/21] gfs2: gfs2_evict_inode clarification When function evict_should_delete() returns SHOULD_DEFER_EVICTION, gh is never initialized, but that isn't obvious; if it did initialize gh and then return SHOULD_DEFER_EVICTION, gfs2_evict_inode() would fail to release it. To clarify the code, change gfs2_evict_inode() to always check if gh needs to be released, no matter what evict_should_delete() returns. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 40158396e8db..aadb83e38c17 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1486,6 +1486,7 @@ static void gfs2_evict_inode(struct inode *inode) enum evict_behavior behavior; int ret; + gfs2_holder_mark_uninitialized(&gh); if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) goto out; @@ -1497,7 +1498,6 @@ static void gfs2_evict_inode(struct inode *inode) if (!sdp->sd_jdesc) goto out; - gfs2_holder_mark_uninitialized(&gh); behavior = evict_should_delete(inode, &gh); if (behavior == EVICT_SHOULD_DEFER_DELETE && !test_bit(SDF_KILL, &sdp->sd_flags)) { @@ -1516,11 +1516,11 @@ static void gfs2_evict_inode(struct inode *inode) if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); - if (gfs2_holder_initialized(&gh)) - gfs2_glock_dq_uninit(&gh); if (ret && ret != GLR_TRYFAILED && ret != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", ret); out: + if (gfs2_holder_initialized(&gh)) + gfs2_glock_dq_uninit(&gh); truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); From b6900ce15191ff9e219f1974b5db107ae02bb387 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 28 Oct 2024 22:18:32 +0100 Subject: [PATCH 19/21] gfs2: Simplify DLM_LKF_QUECVT use The DLM_LKF_QUECVT flag needs to be set for "upward" lock conversions to ensure fairness, but setting it for "downward" lock conversions will lead to a failure. The flag is currently set based on the GLF_BLOCKING flag and it's not immediately obvious why this is correct. Simplify things by figuring out if a lock conversion is "upward" by looking at the before and after locking modes instead of relying on the GLF_BLOCKING flag. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/lock_dlm.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index fa5134df985f..58aeeae7ed8c 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -224,8 +224,21 @@ static int make_mode(struct gfs2_sbd *sdp, const unsigned int lmstate) return -1; } +/* Taken from fs/dlm/lock.c. */ + +static bool middle_conversion(int cur, int req) +{ + return (cur == DLM_LOCK_PR && req == DLM_LOCK_CW) || + (cur == DLM_LOCK_CW && req == DLM_LOCK_PR); +} + +static bool down_conversion(int cur, int req) +{ + return !middle_conversion(cur, req) && req < cur; +} + static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, - const int req) + const int cur, const int req) { u32 lkf = 0; @@ -251,7 +264,14 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, if (!test_bit(GLF_INITIAL, &gl->gl_flags)) { lkf |= DLM_LKF_CONVERT; - if (test_bit(GLF_BLOCKING, &gl->gl_flags)) + + /* + * The DLM_LKF_QUECVT flag needs to be set for "first come, + * first served" semantics, but it must only be set for + * "upward" lock conversions or else DLM will reject the + * request as invalid. + */ + if (!down_conversion(cur, req)) lkf |= DLM_LKF_QUECVT; } @@ -271,13 +291,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, unsigned int flags) { struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; - int req; + int cur, req; u32 lkf; char strname[GDLM_STRNAME_BYTES] = ""; int error; + cur = make_mode(gl->gl_name.ln_sbd, gl->gl_state); req = make_mode(gl->gl_name.ln_sbd, req_state); - lkf = make_flags(gl, flags, req); + lkf = make_flags(gl, flags, cur, req); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); if (test_bit(GLF_INITIAL, &gl->gl_flags)) { From c5b7a2400edc458b22133d5e5394bea26eab1923 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 19 Nov 2024 10:44:56 +0100 Subject: [PATCH 20/21] gfs2: Only defer deletes when we have an iopen glock The mechanism to defer deleting unlinked inodes is tied to delete_work_func(), which is tied to iopen glocks. When we don't have an iopen glock, we must carry out deletes immediately instead. Fixes a NULL pointer dereference in gfs2_evict_inode(). Fixes: 8c21c2c71e66 ("gfs2: Call gfs2_queue_verify_delete from gfs2_evict_inode") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index aadb83e38c17..92a3b6ddafdc 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1503,10 +1503,13 @@ static void gfs2_evict_inode(struct inode *inode) !test_bit(SDF_KILL, &sdp->sd_flags)) { struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl; - gfs2_glock_hold(io_gl); - if (!gfs2_queue_verify_delete(io_gl, true)) - gfs2_glock_put(io_gl); - goto out; + if (io_gl) { + gfs2_glock_hold(io_gl); + if (!gfs2_queue_verify_delete(io_gl, true)) + gfs2_glock_put(io_gl); + goto out; + } + behavior = EVICT_SHOULD_DELETE; } if (behavior == EVICT_SHOULD_DELETE) ret = evict_unlinked_inode(inode); From ffd1cf0443a208b80e40100ed02892d2ec74c7e9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 19 Nov 2024 12:15:26 +0100 Subject: [PATCH 21/21] gfs2: Prevent inode creation race When a request to evict an inode comes in over the network, we are trying to grab an inode reference via the iopen glock's gl_object pointer. There is a very small probability that by the time such a request comes in, inode creation hasn't completed and the I_NEW flag is still set. To deal with that, wait for the inode and then check if inode creation was successful. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 46a6e97cd930..8db92a36194a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -982,6 +982,13 @@ static void gfs2_try_evict(struct gfs2_glock *gl) if (ip && !igrab(&ip->i_inode)) ip = NULL; spin_unlock(&gl->gl_lockref.lock); + if (ip) { + wait_on_inode(&ip->i_inode); + if (is_bad_inode(&ip->i_inode)) { + iput(&ip->i_inode); + ip = NULL; + } + } if (ip) { set_bit(GIF_DEFER_DELETE, &ip->i_flags); d_prune_aliases(&ip->i_inode);