net_sched: sch_sfq: handle bigger packets

SFQ has an assumption on dealing with packets smaller than 64KB.

Even before BIG TCP, TCA_STAB can provide arbitrary big values
in qdisc_pkt_len(skb)

It is time to switch (struct sfq_slot)->allot to a 32bit field.

sizeof(struct sfq_slot) is now 64 bytes, giving better cache locality.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20241008111603.653140-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Eric Dumazet 2024-10-08 11:16:03 +00:00 committed by Jakub Kicinski
parent 0a316b16a6
commit e4650d7ae4

View File

@ -77,12 +77,6 @@
#define SFQ_EMPTY_SLOT 0xffff
#define SFQ_DEFAULT_HASH_DIVISOR 1024
/* We use 16 bits to store allot, and want to handle packets up to 64K
* Scale allot by 8 (1<<3) so that no overflow occurs.
*/
#define SFQ_ALLOT_SHIFT 3
#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
typedef u16 sfq_index;
@ -104,7 +98,7 @@ struct sfq_slot {
sfq_index next; /* next slot in sfq RR chain */
struct sfq_head dep; /* anchor in dep[] chains */
unsigned short hash; /* hash value (index in ht[]) */
short allot; /* credit for this slot */
int allot; /* credit for this slot */
unsigned int backlog;
struct red_vars vars;
@ -120,7 +114,6 @@ struct sfq_sched_data {
siphash_key_t perturbation;
u8 cur_depth; /* depth of longest slot */
u8 flags;
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
struct tcf_proto __rcu *filter_list;
struct tcf_block *block;
sfq_index *ht; /* Hash table ('divisor' slots) */
@ -456,7 +449,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
*/
q->tail = slot;
/* We could use a bigger initial quantum for new flows */
slot->allot = q->scaled_quantum;
slot->allot = q->quantum;
}
if (++sch->q.qlen <= q->limit)
return NET_XMIT_SUCCESS;
@ -493,7 +486,7 @@ sfq_dequeue(struct Qdisc *sch)
slot = &q->slots[a];
if (slot->allot <= 0) {
q->tail = slot;
slot->allot += q->scaled_quantum;
slot->allot += q->quantum;
goto next_slot;
}
skb = slot_dequeue_head(slot);
@ -512,7 +505,7 @@ sfq_dequeue(struct Qdisc *sch)
}
q->tail->next = next_a;
} else {
slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
slot->allot -= qdisc_pkt_len(skb);
}
return skb;
}
@ -595,7 +588,7 @@ static void sfq_rehash(struct Qdisc *sch)
q->tail->next = x;
}
q->tail = slot;
slot->allot = q->scaled_quantum;
slot->allot = q->quantum;
}
}
sch->q.qlen -= dropped;
@ -628,7 +621,8 @@ static void sfq_perturbation(struct timer_list *t)
rcu_read_unlock();
}
static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = nla_data(opt);
@ -646,14 +640,10 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
/* slot->allot is a short, make sure quantum is not too big. */
if (ctl->quantum) {
unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
if (scaled <= 0 || scaled > SHRT_MAX)
return -EINVAL;
if ((int)ctl->quantum < 0) {
NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
return -EINVAL;
}
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
@ -663,10 +653,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
return -ENOMEM;
}
sch_tree_lock(sch);
if (ctl->quantum) {
if (ctl->quantum)
q->quantum = ctl->quantum;
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
}
WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
if (ctl->flows)
q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
@ -762,12 +750,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
q->maxflows = SFQ_DEFAULT_FLOWS;
q->quantum = psched_mtu(qdisc_dev(sch));
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
q->perturb_period = 0;
get_random_bytes(&q->perturbation, sizeof(q->perturbation));
if (opt) {
int err = sfq_change(sch, opt);
int err = sfq_change(sch, opt, extack);
if (err)
return err;
}
@ -878,7 +865,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (idx != SFQ_EMPTY_SLOT) {
const struct sfq_slot *slot = &q->slots[idx];
xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
xstats.allot = slot->allot;
qs.qlen = slot->qlen;
qs.backlog = slot->backlog;
}