fuse: Introduce a new notification type for resend pending requests

When a FUSE daemon panics and failover, we aim to minimize the impact on
applications by reusing the existing FUSE connection. During this process,
another daemon is employed to preserve the FUSE connection's file
descriptor. The new started FUSE Daemon will takeover the fd and continue
to provide service.

However, it is possible for some inflight requests to be lost and never
returned. As a result, applications awaiting replies would become stuck
forever. To address this, we can resend these pending requests to the
new started FUSE daemon.

This patch introduces a new notification type "FUSE_NOTIFY_RESEND", which
can trigger resending of the pending requests, ensuring they are properly
processed again.

Signed-off-by: Zhao Chen <winters.zc@antgroup.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
This commit is contained in:
Zhao Chen 2024-01-09 17:24:42 +08:00 committed by Miklos Szeredi
parent e022f6a1c7
commit 760eac73f9
2 changed files with 58 additions and 0 deletions

View File

@ -1775,6 +1775,59 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
return err;
}
/*
* Resending all processing queue requests.
*
* During a FUSE daemon panics and failover, it is possible for some inflight
* requests to be lost and never returned. As a result, applications awaiting
* replies would become stuck forever. To address this, we can use notification
* to trigger resending of these pending requests to the FUSE daemon, ensuring
* they are properly processed again.
*
* Please note that this strategy is applicable only to idempotent requests or
* if the FUSE daemon takes careful measures to avoid processing duplicated
* non-idempotent requests.
*/
static void fuse_resend(struct fuse_conn *fc)
{
struct fuse_dev *fud;
struct fuse_req *req, *next;
struct fuse_iqueue *fiq = &fc->iq;
LIST_HEAD(to_queue);
unsigned int i;
spin_lock(&fc->lock);
if (!fc->connected) {
spin_unlock(&fc->lock);
return;
}
list_for_each_entry(fud, &fc->devices, entry) {
struct fuse_pqueue *fpq = &fud->pq;
spin_lock(&fpq->lock);
for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
list_splice_tail_init(&fpq->processing[i], &to_queue);
spin_unlock(&fpq->lock);
}
spin_unlock(&fc->lock);
list_for_each_entry_safe(req, next, &to_queue, list) {
__set_bit(FR_PENDING, &req->flags);
}
spin_lock(&fiq->lock);
/* iq and pq requests are both oldest to newest */
list_splice(&to_queue, &fiq->pending);
fiq->ops->wake_pending_and_unlock(fiq);
}
static int fuse_notify_resend(struct fuse_conn *fc)
{
fuse_resend(fc);
return 0;
}
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
@ -1800,6 +1853,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
case FUSE_NOTIFY_DELETE:
return fuse_notify_delete(fc, size, cs);
case FUSE_NOTIFY_RESEND:
return fuse_notify_resend(fc);
default:
fuse_copy_finish(cs);
return -EINVAL;

View File

@ -216,6 +216,7 @@
* - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag
* - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag
* - add FUSE_NO_EXPORT_SUPPORT init flag
* - add FUSE_NOTIFY_RESEND
*/
#ifndef _LINUX_FUSE_H
@ -645,6 +646,7 @@ enum fuse_notify_code {
FUSE_NOTIFY_STORE = 4,
FUSE_NOTIFY_RETRIEVE = 5,
FUSE_NOTIFY_DELETE = 6,
FUSE_NOTIFY_RESEND = 7,
FUSE_NOTIFY_CODE_MAX,
};