mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 23:39:18 +00:00
4ba67ef3a1
With the previous change, struct dqs->stall_thrs will be in the hot path (at queue side), even if DQS is disabled. The other fields accessed in this function (last_obj_cnt and num_queued) are in the first cache line, let's move this field (stall_thrs) to the very first cache line, since there is a hole there. This does not change the structure size, since it moves an short (2 bytes) to 4-bytes whole in the first cache line. This is the new structure format now: struct dql { unsigned int num_queued; unsigned int last_obj_cnt; ... short unsigned int stall_thrs; /* XXX 2 bytes hole, try to pack */ ... /* --- cacheline 1 boundary (64 bytes) --- */ ... /* Longest stall detected, reported to user */ short unsigned int stall_max; /* XXX 2 bytes hole, try to pack */ }; Also, read the stall_thrs (now in the very first cache line) earlier, together with dql->num_queued (also in the first cache line). Suggested-by: Jakub Kicinski <kuba@kernel.org> Suggested-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Breno Leitao <leitao@debian.org> Link: https://lore.kernel.org/r/20240411192241.2498631-5-leitao@debian.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
164 lines
5.4 KiB
C
164 lines
5.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Dynamic queue limits (dql) - Definitions
|
|
*
|
|
* Copyright (c) 2011, Tom Herbert <therbert@google.com>
|
|
*
|
|
* This header file contains the definitions for dynamic queue limits (dql).
|
|
* dql would be used in conjunction with a producer/consumer type queue
|
|
* (possibly a HW queue). Such a queue would have these general properties:
|
|
*
|
|
* 1) Objects are queued up to some limit specified as number of objects.
|
|
* 2) Periodically a completion process executes which retires consumed
|
|
* objects.
|
|
* 3) Starvation occurs when limit has been reached, all queued data has
|
|
* actually been consumed, but completion processing has not yet run
|
|
* so queuing new data is blocked.
|
|
* 4) Minimizing the amount of queued data is desirable.
|
|
*
|
|
* The goal of dql is to calculate the limit as the minimum number of objects
|
|
* needed to prevent starvation.
|
|
*
|
|
* The primary functions of dql are:
|
|
* dql_queued - called when objects are enqueued to record number of objects
|
|
* dql_avail - returns how many objects are available to be queued based
|
|
* on the object limit and how many objects are already enqueued
|
|
* dql_completed - called at completion time to indicate how many objects
|
|
* were retired from the queue
|
|
*
|
|
* The dql implementation does not implement any locking for the dql data
|
|
* structures, the higher layer should provide this. dql_queued should
|
|
* be serialized to prevent concurrent execution of the function; this
|
|
* is also true for dql_completed. However, dql_queued and dlq_completed can
|
|
* be executed concurrently (i.e. they can be protected by different locks).
|
|
*/
|
|
|
|
#ifndef _LINUX_DQL_H
|
|
#define _LINUX_DQL_H
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/bitops.h>
|
|
#include <asm/bug.h>
|
|
|
|
#define DQL_HIST_LEN 4
|
|
#define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN])
|
|
|
|
struct dql {
|
|
/* Fields accessed in enqueue path (dql_queued) */
|
|
unsigned int num_queued; /* Total ever queued */
|
|
unsigned int adj_limit; /* limit + num_completed */
|
|
unsigned int last_obj_cnt; /* Count at last queuing */
|
|
|
|
/* Stall threshold (in jiffies), defined by user */
|
|
unsigned short stall_thrs;
|
|
|
|
unsigned long history_head; /* top 58 bits of jiffies */
|
|
/* stall entries, a bit per entry */
|
|
unsigned long history[DQL_HIST_LEN];
|
|
|
|
/* Fields accessed only by completion path (dql_completed) */
|
|
|
|
unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */
|
|
unsigned int num_completed; /* Total ever completed */
|
|
|
|
unsigned int prev_ovlimit; /* Previous over limit */
|
|
unsigned int prev_num_queued; /* Previous queue total */
|
|
unsigned int prev_last_obj_cnt; /* Previous queuing cnt */
|
|
|
|
unsigned int lowest_slack; /* Lowest slack found */
|
|
unsigned long slack_start_time; /* Time slacks seen */
|
|
|
|
/* Configuration */
|
|
unsigned int max_limit; /* Max limit */
|
|
unsigned int min_limit; /* Minimum limit */
|
|
unsigned int slack_hold_time; /* Time to measure slack */
|
|
|
|
/* Longest stall detected, reported to user */
|
|
unsigned short stall_max;
|
|
unsigned long last_reap; /* Last reap (in jiffies) */
|
|
unsigned long stall_cnt; /* Number of stalls */
|
|
};
|
|
|
|
/* Set some static maximums */
|
|
#define DQL_MAX_OBJECT (UINT_MAX / 16)
|
|
#define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT)
|
|
|
|
/* Populate the bitmap to be processed later in dql_check_stall() */
|
|
static inline void dql_queue_stall(struct dql *dql)
|
|
{
|
|
unsigned long map, now, now_hi, i;
|
|
|
|
now = jiffies;
|
|
now_hi = now / BITS_PER_LONG;
|
|
|
|
/* The following code set a bit in the ring buffer, where each
|
|
* bit trackes time the packet was queued. The dql->history buffer
|
|
* tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot
|
|
*/
|
|
if (unlikely(now_hi != dql->history_head)) {
|
|
/* About to reuse slots, clear them */
|
|
for (i = 0; i < DQL_HIST_LEN; i++) {
|
|
/* Multiplication masks high bits */
|
|
if (now_hi * BITS_PER_LONG ==
|
|
(dql->history_head + i) * BITS_PER_LONG)
|
|
break;
|
|
DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0;
|
|
}
|
|
/* pairs with smp_rmb() in dql_check_stall() */
|
|
smp_wmb();
|
|
WRITE_ONCE(dql->history_head, now_hi);
|
|
}
|
|
|
|
/* __set_bit() does not guarantee WRITE_ONCE() semantics */
|
|
map = DQL_HIST_ENT(dql, now_hi);
|
|
|
|
/* Populate the history with an entry (bit) per queued */
|
|
if (!(map & BIT_MASK(now)))
|
|
WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now));
|
|
}
|
|
|
|
/*
|
|
* Record number of objects queued. Assumes that caller has already checked
|
|
* availability in the queue with dql_avail.
|
|
*/
|
|
static inline void dql_queued(struct dql *dql, unsigned int count)
|
|
{
|
|
if (WARN_ON_ONCE(count > DQL_MAX_OBJECT))
|
|
return;
|
|
|
|
dql->last_obj_cnt = count;
|
|
|
|
/* We want to force a write first, so that cpu do not attempt
|
|
* to get cache line containing last_obj_cnt, num_queued, adj_limit
|
|
* in Shared state, but directly does a Request For Ownership
|
|
* It is only a hint, we use barrier() only.
|
|
*/
|
|
barrier();
|
|
|
|
dql->num_queued += count;
|
|
|
|
/* Only populate stall information if the threshold is set */
|
|
if (READ_ONCE(dql->stall_thrs))
|
|
dql_queue_stall(dql);
|
|
}
|
|
|
|
/* Returns how many objects can be queued, < 0 indicates over limit. */
|
|
static inline int dql_avail(const struct dql *dql)
|
|
{
|
|
return READ_ONCE(dql->adj_limit) - READ_ONCE(dql->num_queued);
|
|
}
|
|
|
|
/* Record number of completed objects and recalculate the limit. */
|
|
void dql_completed(struct dql *dql, unsigned int count);
|
|
|
|
/* Reset dql state */
|
|
void dql_reset(struct dql *dql);
|
|
|
|
/* Initialize dql state */
|
|
void dql_init(struct dql *dql, unsigned int hold_time);
|
|
|
|
#endif /* _KERNEL_ */
|
|
|
|
#endif /* _LINUX_DQL_H */
|