nfsd: allow for up to 32 callback session slots

nfsd currently only uses a single slot in the callback channel, which is
proving to be a bottleneck in some cases. Widen the callback channel to
a max of 32 slots (subject to the client's target_maxreqs value).

Change the cb_holds_slot boolean to an integer that tracks the current
slot number (with -1 meaning "unassigned").  Move the callback slot
tracking info into the session. Add a new u32 that acts as a bitmap to
track which slots are in use, and a u32 to track the latest callback
target_slotid that the client reports. To protect the new fields, add
a new per-session spinlock (the se_lock). Fix nfsd41_cb_get_slot to always
search for the lowest slotid (using ffs()).

Finally, convert the session->se_cb_seq_nr field into an array of
ints and add the necessary handling to ensure that the seqids get
reset when the slot table grows after shrinking.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
This commit is contained in:
Jeff Layton 2024-11-18 09:54:34 -05:00 committed by Chuck Lever
parent c840b8e1f0
commit 583772eec7
4 changed files with 109 additions and 40 deletions

View File

@ -374,6 +374,19 @@ encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
hdr->nops++;
}
static u32 highest_slotid(struct nfsd4_session *ses)
{
u32 idx;
spin_lock(&ses->se_lock);
idx = fls(~ses->se_cb_slot_avail);
if (idx > 0)
--idx;
idx = max(idx, ses->se_cb_highest_slot);
spin_unlock(&ses->se_lock);
return idx;
}
/*
* CB_SEQUENCE4args
*
@ -400,15 +413,40 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr,
encode_sessionid4(xdr, session);
p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
*p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */
*p++ = xdr_zero; /* csa_slotid */
*p++ = xdr_zero; /* csa_highest_slotid */
*p++ = cpu_to_be32(session->se_cb_seq_nr[cb->cb_held_slot]); /* csa_sequenceid */
*p++ = cpu_to_be32(cb->cb_held_slot); /* csa_slotid */
*p++ = cpu_to_be32(highest_slotid(session)); /* csa_highest_slotid */
*p++ = xdr_zero; /* csa_cachethis */
xdr_encode_empty_array(p); /* csa_referring_call_lists */
hdr->nops++;
}
static void update_cb_slot_table(struct nfsd4_session *ses, u32 target)
{
/* No need to do anything if nothing changed */
if (likely(target == READ_ONCE(ses->se_cb_highest_slot)))
return;
spin_lock(&ses->se_lock);
if (target > ses->se_cb_highest_slot) {
int i;
target = min(target, NFSD_BC_SLOT_TABLE_SIZE - 1);
/*
* Growing the slot table. Reset any new sequences to 1.
*
* NB: There is some debate about whether the RFC requires this,
* but the Linux client expects it.
*/
for (i = ses->se_cb_highest_slot + 1; i <= target; ++i)
ses->se_cb_seq_nr[i] = 1;
}
ses->se_cb_highest_slot = target;
spin_unlock(&ses->se_lock);
}
/*
* CB_SEQUENCE4resok
*
@ -436,7 +474,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
int status = -ESERVERFAULT;
__be32 *p;
u32 dummy;
u32 seqid, slotid, target;
/*
* If the server returns different values for sessionID, slotID or
@ -452,21 +490,22 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
}
p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
dummy = be32_to_cpup(p++);
if (dummy != session->se_cb_seq_nr) {
seqid = be32_to_cpup(p++);
if (seqid != session->se_cb_seq_nr[cb->cb_held_slot]) {
dprintk("NFS: %s Invalid sequence number\n", __func__);
goto out;
}
dummy = be32_to_cpup(p++);
if (dummy != 0) {
slotid = be32_to_cpup(p++);
if (slotid != cb->cb_held_slot) {
dprintk("NFS: %s Invalid slotid\n", __func__);
goto out;
}
/*
* FIXME: process highest slotid and target highest slotid
*/
p++; // ignore current highest slot value
target = be32_to_cpup(p++);
update_cb_slot_table(session, target);
status = 0;
out:
cb->cb_seq_status = status;
@ -1167,6 +1206,22 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
spin_unlock(&clp->cl_lock);
}
static int grab_slot(struct nfsd4_session *ses)
{
int idx;
spin_lock(&ses->se_lock);
idx = ffs(ses->se_cb_slot_avail) - 1;
if (idx < 0 || idx > ses->se_cb_highest_slot) {
spin_unlock(&ses->se_lock);
return -1;
}
/* clear the bit for the slot */
ses->se_cb_slot_avail &= ~BIT(idx);
spin_unlock(&ses->se_lock);
return idx;
}
/*
* There's currently a single callback channel slot.
* If the slot is available, then mark it busy. Otherwise, set the
@ -1175,28 +1230,32 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
{
struct nfs4_client *clp = cb->cb_clp;
struct nfsd4_session *ses = clp->cl_cb_session;
if (!cb->cb_holds_slot &&
test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
if (cb->cb_held_slot >= 0)
return true;
cb->cb_held_slot = grab_slot(ses);
if (cb->cb_held_slot < 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
/* Race breaker */
if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
dprintk("%s slot is busy\n", __func__);
cb->cb_held_slot = grab_slot(ses);
if (cb->cb_held_slot < 0)
return false;
}
rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
}
cb->cb_holds_slot = true;
return true;
}
static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
struct nfsd4_session *ses = clp->cl_cb_session;
if (cb->cb_holds_slot) {
cb->cb_holds_slot = false;
clear_bit(0, &clp->cl_cb_slot_busy);
if (cb->cb_held_slot >= 0) {
spin_lock(&ses->se_lock);
ses->se_cb_slot_avail |= BIT(cb->cb_held_slot);
spin_unlock(&ses->se_lock);
cb->cb_held_slot = -1;
rpc_wake_up_next(&clp->cl_cb_waitq);
}
}
@ -1213,8 +1272,8 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
}
/*
* TODO: cb_sequence should support referring call lists, cachethis, multiple
* slots, and mark callback channel down on communication errors.
* TODO: cb_sequence should support referring call lists, cachethis,
* and mark callback channel down on communication errors.
*/
static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
{
@ -1256,7 +1315,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
return true;
}
if (!cb->cb_holds_slot)
if (cb->cb_held_slot < 0)
goto need_restart;
/* This is the operation status code for CB_SEQUENCE */
@ -1270,10 +1329,10 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
* If CB_SEQUENCE returns an error, then the state of the slot
* (sequence ID, cached reply) MUST NOT change.
*/
++session->se_cb_seq_nr;
++session->se_cb_seq_nr[cb->cb_held_slot];
break;
case -ESERVERFAULT:
++session->se_cb_seq_nr;
++session->se_cb_seq_nr[cb->cb_held_slot];
nfsd4_mark_cb_fault(cb->cb_clp);
ret = false;
break;
@ -1299,17 +1358,16 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
case -NFS4ERR_BADSLOT:
goto retry_nowait;
case -NFS4ERR_SEQ_MISORDERED:
if (session->se_cb_seq_nr != 1) {
session->se_cb_seq_nr = 1;
if (session->se_cb_seq_nr[cb->cb_held_slot] != 1) {
session->se_cb_seq_nr[cb->cb_held_slot] = 1;
goto retry_nowait;
}
break;
default:
nfsd4_mark_cb_fault(cb->cb_clp);
}
nfsd41_cb_release_slot(cb);
trace_nfsd_cb_free_slot(task, cb);
nfsd41_cb_release_slot(cb);
if (RPC_SIGNALLED(task))
goto need_restart;
@ -1529,7 +1587,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
cb->cb_need_restart = false;
cb->cb_holds_slot = false;
cb->cb_held_slot = -1;
}
/**

View File

@ -2010,6 +2010,10 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
}
memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
new->se_cb_slot_avail = ~0U;
new->se_cb_highest_slot = min(battrs->maxreqs - 1,
NFSD_BC_SLOT_TABLE_SIZE - 1);
spin_lock_init(&new->se_lock);
return new;
out_free:
while (i--)
@ -2140,11 +2144,14 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
INIT_LIST_HEAD(&new->se_conns);
new->se_cb_seq_nr = 1;
atomic_set(&new->se_ref, 0);
new->se_dead = false;
new->se_cb_prog = cses->callback_prog;
new->se_cb_sec = cses->cb_sec;
atomic_set(&new->se_ref, 0);
for (idx = 0; idx < NFSD_BC_SLOT_TABLE_SIZE; ++idx)
new->se_cb_seq_nr[idx] = 1;
idx = hash_sessionid(&new->se_sessionid);
list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
spin_lock(&clp->cl_lock);
@ -3153,7 +3160,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
clp->cl_time = ktime_get_boottime_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
clp->cl_cb_session = NULL;
@ -3935,6 +3941,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
cr_ses->flags &= ~SESSION4_PERSIST;
/* Upshifting from TCP to RDMA is not supported */
cr_ses->flags &= ~SESSION4_RDMA;
/* Report the correct number of backchannel slots */
cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1;
init_session(rqstp, new, conf, cr_ses);
nfsd4_get_session_locked(new);

View File

@ -71,8 +71,8 @@ struct nfsd4_callback {
struct work_struct cb_work;
int cb_seq_status;
int cb_status;
int cb_held_slot;
bool cb_need_restart;
bool cb_holds_slot;
};
struct nfsd4_callback_ops {
@ -304,6 +304,9 @@ struct nfsd4_conn {
unsigned char cn_flags;
};
/* Maximum number of slots that nfsd will use in the backchannel */
#define NFSD_BC_SLOT_TABLE_SIZE (sizeof(u32) * 8)
/*
* Representation of a v4.1+ session. These are refcounted in a similar fashion
* to the nfs4_client. References are only taken when the server is actively
@ -311,6 +314,10 @@ struct nfsd4_conn {
*/
struct nfsd4_session {
atomic_t se_ref;
spinlock_t se_lock;
u32 se_cb_slot_avail; /* bitmap of available slots */
u32 se_cb_highest_slot; /* highest slot client wants */
u32 se_cb_prog;
bool se_dead;
struct list_head se_hash; /* hash by sessionid */
struct list_head se_perclnt;
@ -319,8 +326,7 @@ struct nfsd4_session {
struct nfsd4_channel_attrs se_fchannel;
struct nfsd4_cb_sec se_cb_sec;
struct list_head se_conns;
u32 se_cb_prog;
u32 se_cb_seq_nr;
u32 se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
struct nfsd4_slot *se_slots[]; /* forward channel slots */
};
@ -454,9 +460,6 @@ struct nfs4_client {
*/
struct dentry *cl_nfsd_info_dentry;
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
unsigned long cl_cb_slot_busy;
struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
/* wait here for slots */
struct net *net;

View File

@ -1697,7 +1697,7 @@ TRACE_EVENT(nfsd_cb_free_slot,
__entry->cl_id = sid->clientid.cl_id;
__entry->seqno = sid->sequence;
__entry->reserved = sid->reserved;
__entry->slot_seqno = session->se_cb_seq_nr;
__entry->slot_seqno = session->se_cb_seq_nr[cb->cb_held_slot];
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
" sessionid=%08x:%08x:%08x:%08x new slot seqno=%u",