net_sched: sch_sfq: handle bigger packets

SFQ has an assumption on dealing with packets smaller than 64KB.

Even before BIG TCP, TCA_STAB can provide arbitrary big values
in qdisc_pkt_len(skb)

It is time to switch (struct sfq_slot)->allot to a 32bit field.

sizeof(struct sfq_slot) is now 64 bytes, giving better cache locality.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20241008111603.653140-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Eric Dumazet 2024-10-08 11:16:03 +00:00 committed by Jakub Kicinski
parent 0a316b16a6
commit e4650d7ae4

View File

@ -77,12 +77,6 @@
#define SFQ_EMPTY_SLOT 0xffff #define SFQ_EMPTY_SLOT 0xffff
#define SFQ_DEFAULT_HASH_DIVISOR 1024 #define SFQ_DEFAULT_HASH_DIVISOR 1024
/* We use 16 bits to store allot, and want to handle packets up to 64K
* Scale allot by 8 (1<<3) so that no overflow occurs.
*/
#define SFQ_ALLOT_SHIFT 3
#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */ /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
typedef u16 sfq_index; typedef u16 sfq_index;
@ -104,7 +98,7 @@ struct sfq_slot {
sfq_index next; /* next slot in sfq RR chain */ sfq_index next; /* next slot in sfq RR chain */
struct sfq_head dep; /* anchor in dep[] chains */ struct sfq_head dep; /* anchor in dep[] chains */
unsigned short hash; /* hash value (index in ht[]) */ unsigned short hash; /* hash value (index in ht[]) */
short allot; /* credit for this slot */ int allot; /* credit for this slot */
unsigned int backlog; unsigned int backlog;
struct red_vars vars; struct red_vars vars;
@ -120,7 +114,6 @@ struct sfq_sched_data {
siphash_key_t perturbation; siphash_key_t perturbation;
u8 cur_depth; /* depth of longest slot */ u8 cur_depth; /* depth of longest slot */
u8 flags; u8 flags;
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
struct tcf_proto __rcu *filter_list; struct tcf_proto __rcu *filter_list;
struct tcf_block *block; struct tcf_block *block;
sfq_index *ht; /* Hash table ('divisor' slots) */ sfq_index *ht; /* Hash table ('divisor' slots) */
@ -456,7 +449,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
*/ */
q->tail = slot; q->tail = slot;
/* We could use a bigger initial quantum for new flows */ /* We could use a bigger initial quantum for new flows */
slot->allot = q->scaled_quantum; slot->allot = q->quantum;
} }
if (++sch->q.qlen <= q->limit) if (++sch->q.qlen <= q->limit)
return NET_XMIT_SUCCESS; return NET_XMIT_SUCCESS;
@ -493,7 +486,7 @@ sfq_dequeue(struct Qdisc *sch)
slot = &q->slots[a]; slot = &q->slots[a];
if (slot->allot <= 0) { if (slot->allot <= 0) {
q->tail = slot; q->tail = slot;
slot->allot += q->scaled_quantum; slot->allot += q->quantum;
goto next_slot; goto next_slot;
} }
skb = slot_dequeue_head(slot); skb = slot_dequeue_head(slot);
@ -512,7 +505,7 @@ sfq_dequeue(struct Qdisc *sch)
} }
q->tail->next = next_a; q->tail->next = next_a;
} else { } else {
slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb)); slot->allot -= qdisc_pkt_len(skb);
} }
return skb; return skb;
} }
@ -595,7 +588,7 @@ static void sfq_rehash(struct Qdisc *sch)
q->tail->next = x; q->tail->next = x;
} }
q->tail = slot; q->tail = slot;
slot->allot = q->scaled_quantum; slot->allot = q->quantum;
} }
} }
sch->q.qlen -= dropped; sch->q.qlen -= dropped;
@ -628,7 +621,8 @@ static void sfq_perturbation(struct timer_list *t)
rcu_read_unlock(); rcu_read_unlock();
} }
static int sfq_change(struct Qdisc *sch, struct nlattr *opt) static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{ {
struct sfq_sched_data *q = qdisc_priv(sch); struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = nla_data(opt); struct tc_sfq_qopt *ctl = nla_data(opt);
@ -646,14 +640,10 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL; return -EINVAL;
/* slot->allot is a short, make sure quantum is not too big. */ if ((int)ctl->quantum < 0) {
if (ctl->quantum) { NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum); return -EINVAL;
if (scaled <= 0 || scaled > SHRT_MAX)
return -EINVAL;
} }
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL; return -EINVAL;
@ -663,10 +653,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
return -ENOMEM; return -ENOMEM;
} }
sch_tree_lock(sch); sch_tree_lock(sch);
if (ctl->quantum) { if (ctl->quantum)
q->quantum = ctl->quantum; q->quantum = ctl->quantum;
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
}
WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
if (ctl->flows) if (ctl->flows)
q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
@ -762,12 +750,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
q->divisor = SFQ_DEFAULT_HASH_DIVISOR; q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
q->maxflows = SFQ_DEFAULT_FLOWS; q->maxflows = SFQ_DEFAULT_FLOWS;
q->quantum = psched_mtu(qdisc_dev(sch)); q->quantum = psched_mtu(qdisc_dev(sch));
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
q->perturb_period = 0; q->perturb_period = 0;
get_random_bytes(&q->perturbation, sizeof(q->perturbation)); get_random_bytes(&q->perturbation, sizeof(q->perturbation));
if (opt) { if (opt) {
int err = sfq_change(sch, opt); int err = sfq_change(sch, opt, extack);
if (err) if (err)
return err; return err;
} }
@ -878,7 +865,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (idx != SFQ_EMPTY_SLOT) { if (idx != SFQ_EMPTY_SLOT) {
const struct sfq_slot *slot = &q->slots[idx]; const struct sfq_slot *slot = &q->slots[idx];
xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; xstats.allot = slot->allot;
qs.qlen = slot->qlen; qs.qlen = slot->qlen;
qs.backlog = slot->backlog; qs.backlog = slot->backlog;
} }