SUNRPC: Count ops completing with tk_status < 0

We often see various error conditions with NFS4.x that show up with
a very high operation count all completing with tk_status < 0 in a
short period of time.  Add a count to rpc_iostats to record on a
per-op basis the ops that complete in this manner, which will
enable lower overhead diagnostics.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
Dave Wysochanski 2019-05-23 16:13:50 -04:00 committed by Trond Myklebust
parent 93ba048e1b
commit a332518fda
2 changed files with 12 additions and 3 deletions

View File

@ -30,7 +30,7 @@
#include <linux/ktime.h> #include <linux/ktime.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#define RPC_IOSTATS_VERS "1.0" #define RPC_IOSTATS_VERS "1.1"
struct rpc_iostats { struct rpc_iostats {
spinlock_t om_lock; spinlock_t om_lock;
@ -66,6 +66,11 @@ struct rpc_iostats {
ktime_t om_queue, /* queued for xmit */ ktime_t om_queue, /* queued for xmit */
om_rtt, /* RPC RTT */ om_rtt, /* RPC RTT */
om_execute; /* RPC execution */ om_execute; /* RPC execution */
/*
* The count of operations that complete with tk_status < 0.
* These statuses usually indicate error conditions.
*/
unsigned long om_error_status;
} ____cacheline_aligned; } ____cacheline_aligned;
struct rpc_task; struct rpc_task;

View File

@ -177,6 +177,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
execute = ktime_sub(now, task->tk_start); execute = ktime_sub(now, task->tk_start);
op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
if (task->tk_status < 0)
op_metrics->om_error_status++;
spin_unlock(&op_metrics->om_lock); spin_unlock(&op_metrics->om_lock);
@ -219,13 +221,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b)
a->om_queue = ktime_add(a->om_queue, b->om_queue); a->om_queue = ktime_add(a->om_queue, b->om_queue);
a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); a->om_rtt = ktime_add(a->om_rtt, b->om_rtt);
a->om_execute = ktime_add(a->om_execute, b->om_execute); a->om_execute = ktime_add(a->om_execute, b->om_execute);
a->om_error_status += b->om_error_status;
} }
static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
int op, const struct rpc_procinfo *procs) int op, const struct rpc_procinfo *procs)
{ {
_print_name(seq, op, procs); _print_name(seq, op, procs);
seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n", seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n",
stats->om_ops, stats->om_ops,
stats->om_ntrans, stats->om_ntrans,
stats->om_timeouts, stats->om_timeouts,
@ -233,7 +236,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
stats->om_bytes_recv, stats->om_bytes_recv,
ktime_to_ms(stats->om_queue), ktime_to_ms(stats->om_queue),
ktime_to_ms(stats->om_rtt), ktime_to_ms(stats->om_rtt),
ktime_to_ms(stats->om_execute)); ktime_to_ms(stats->om_execute),
stats->om_error_status);
} }
void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)