/* * Block stat tracking code * * Copyright (C) 2016 Jens Axboe */ #include #include #include "blk-stat.h" #include "blk-mq.h" #define BLK_RQ_STAT_BATCH 64 static void blk_stat_flush_batch(struct blk_rq_stat *stat) { const s32 nr_batch = READ_ONCE(stat->nr_batch); const s32 nr_samples = READ_ONCE(stat->nr_samples); if (!nr_batch) return; if (!nr_samples) stat->mean = div64_s64(stat->batch, nr_batch); else { stat->mean = div64_s64((stat->mean * nr_samples) + stat->batch, nr_batch + nr_samples); } stat->nr_samples += nr_batch; stat->nr_batch = stat->batch = 0; } static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) { blk_stat_flush_batch(src); if (!src->nr_samples) return; dst->min = min(dst->min, src->min); dst->max = max(dst->max, src->max); if (!dst->nr_samples) dst->mean = src->mean; else { dst->mean = div64_s64((src->mean * src->nr_samples) + (dst->mean * dst->nr_samples), dst->nr_samples + src->nr_samples); } dst->nr_samples += src->nr_samples; } static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst) { struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; uint64_t latest = 0; int i, j, nr; blk_stat_init(&dst[READ]); blk_stat_init(&dst[WRITE]); nr = 0; do { uint64_t newest = 0; queue_for_each_hw_ctx(q, hctx, i) { hctx_for_each_ctx(hctx, ctx, j) { blk_stat_flush_batch(&ctx->stat[READ]); blk_stat_flush_batch(&ctx->stat[WRITE]); if (!ctx->stat[READ].nr_samples && !ctx->stat[WRITE].nr_samples) continue; if (ctx->stat[READ].time > newest) newest = ctx->stat[READ].time; if (ctx->stat[WRITE].time > newest) newest = ctx->stat[WRITE].time; } } /* * No samples */ if (!newest) break; if (newest > latest) latest = newest; queue_for_each_hw_ctx(q, hctx, i) { hctx_for_each_ctx(hctx, ctx, j) { if (ctx->stat[READ].time == newest) { blk_stat_sum(&dst[READ], &ctx->stat[READ]); nr++; } if (ctx->stat[WRITE].time == newest) { blk_stat_sum(&dst[WRITE], &ctx->stat[WRITE]); nr++; } } } /* * If we race on finding an entry, just loop back again. * Should be very rare. */ } while (!nr); dst[READ].time = dst[WRITE].time = latest; } void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst) { if (q->mq_ops) blk_mq_stat_get(q, dst); else { blk_stat_flush_batch(&q->rq_stats[READ]); blk_stat_flush_batch(&q->rq_stats[WRITE]); memcpy(&dst[READ], &q->rq_stats[READ], sizeof(struct blk_rq_stat)); memcpy(&dst[WRITE], &q->rq_stats[WRITE], sizeof(struct blk_rq_stat)); } } void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst) { struct blk_mq_ctx *ctx; unsigned int i, nr; nr = 0; do { uint64_t newest = 0; hctx_for_each_ctx(hctx, ctx, i) { blk_stat_flush_batch(&ctx->stat[READ]); blk_stat_flush_batch(&ctx->stat[WRITE]); if (!ctx->stat[READ].nr_samples && !ctx->stat[WRITE].nr_samples) continue; if (ctx->stat[READ].time > newest) newest = ctx->stat[READ].time; if (ctx->stat[WRITE].time > newest) newest = ctx->stat[WRITE].time; } if (!newest) break; hctx_for_each_ctx(hctx, ctx, i) { if (ctx->stat[READ].time == newest) { blk_stat_sum(&dst[READ], &ctx->stat[READ]); nr++; } if (ctx->stat[WRITE].time == newest) { blk_stat_sum(&dst[WRITE], &ctx->stat[WRITE]); nr++; } } /* * If we race on finding an entry, just loop back again. * Should be very rare, as the window is only updated * occasionally */ } while (!nr); } static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now) { stat->min = -1ULL; stat->max = stat->nr_samples = stat->mean = 0; stat->batch = stat->nr_batch = 0; stat->time = time_now & BLK_STAT_NSEC_MASK; } void blk_stat_init(struct blk_rq_stat *stat) { __blk_stat_init(stat, ktime_to_ns(ktime_get())); } static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now) { return (now & BLK_STAT_NSEC_MASK) == (stat->time & BLK_STAT_NSEC_MASK); } bool blk_stat_is_current(struct blk_rq_stat *stat) { return __blk_stat_is_current(stat, ktime_to_ns(ktime_get())); } void blk_stat_add(struct blk_rq_stat *stat, struct request *rq) { s64 now, value; now = __blk_stat_time(ktime_to_ns(ktime_get())); if (now < blk_stat_time(&rq->issue_stat)) return; if (!__blk_stat_is_current(stat, now)) __blk_stat_init(stat, now); value = now - blk_stat_time(&rq->issue_stat); if (value > stat->max) stat->max = value; if (value < stat->min) stat->min = value; if (stat->batch + value < stat->batch || stat->nr_batch + 1 == BLK_RQ_STAT_BATCH) blk_stat_flush_batch(stat); stat->batch += value; stat->nr_batch++; } void blk_stat_clear(struct request_queue *q) { if (q->mq_ops) { struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; int i, j; queue_for_each_hw_ctx(q, hctx, i) { hctx_for_each_ctx(hctx, ctx, j) { blk_stat_init(&ctx->stat[READ]); blk_stat_init(&ctx->stat[WRITE]); } } } else { blk_stat_init(&q->rq_stats[READ]); blk_stat_init(&q->rq_stats[WRITE]); } } void blk_stat_set_issue_time(struct blk_issue_stat *stat) { stat->time = (stat->time & BLK_STAT_MASK) | (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK); } /* * Enable stat tracking, return whether it was enabled */ bool blk_stat_enable(struct request_queue *q) { if (!test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { set_bit(QUEUE_FLAG_STATS, &q->queue_flags); return false; } return true; }