net: dsa: add trace points for FDB/MDB operations

DSA performs non-trivial housekeeping of unicast and multicast addresses
on shared (CPU and DSA) ports, and puts a bit of pressure on higher
layers, requiring them to behave correctly (remove these addresses
exactly as many times as they were added). Otherwise, either addresses
linger around forever, or DSA returns -ENOENT complaining that entries
that were already deleted must be deleted again.

To aid debugging, introduce some trace points specifically for FDB and
MDB - that's where some of the bugs still are right now.

Some bugs I have seen were also due to race conditions, see:
- 630fd4822af2 ("net: dsa: flush switchdev workqueue on bridge join error path")
- a2614140dc0f ("net: dsa: mv88e6xxx: flush switchdev FDB workqueue before removing VLAN")

so it would be good to not disturb the timing too much, hence the choice
to use trace points vs regular dev_dbg().

I've had these for some time on my computer in a less polished form, and
they've proven useful. What I found most useful was to enable
CONFIG_BOOTTIME_TRACING, add "trace_event=dsa" to the kernel cmdline,
and run "cat /sys/kernel/debug/tracing/trace". This is to debug more
complex environments with network managers started by the init system,
things like that.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Vladimir Oltean 2023-04-07 17:14:50 +03:00 committed by David S. Miller
parent 18bb56ab44
commit 9538ebce88
4 changed files with 423 additions and 12 deletions

View File

@ -16,7 +16,8 @@ dsa_core-y += \
slave.o \
switch.o \
tag.o \
tag_8021q.o
tag_8021q.o \
trace.o
# tagging formats
obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
@ -37,3 +38,6 @@ obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o
obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o
obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o
# for tracing framework to find trace.h
CFLAGS_trace.o := -I$(src)

View File

@ -18,6 +18,7 @@
#include "slave.h"
#include "switch.h"
#include "tag_8021q.h"
#include "trace.h"
static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
unsigned int ageing_time)
@ -164,14 +165,20 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_mdb_add(ds, port, mdb, db);
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) {
err = ds->ops->port_mdb_add(ds, port, mdb, db);
trace_dsa_mdb_add_hw(dp, mdb->addr, mdb->vid, &db, err);
return err;
}
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db);
if (a) {
refcount_inc(&a->refcount);
trace_dsa_mdb_add_bump(dp, mdb->addr, mdb->vid, &db,
&a->refcount);
goto out;
}
@ -182,6 +189,7 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
}
err = ds->ops->port_mdb_add(ds, port, mdb, db);
trace_dsa_mdb_add_hw(dp, mdb->addr, mdb->vid, &db, err);
if (err) {
kfree(a);
goto out;
@ -209,21 +217,30 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp,
int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_mdb_del(ds, port, mdb, db);
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) {
err = ds->ops->port_mdb_del(ds, port, mdb, db);
trace_dsa_mdb_del_hw(dp, mdb->addr, mdb->vid, &db, err);
return err;
}
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db);
if (!a) {
trace_dsa_mdb_del_not_found(dp, mdb->addr, mdb->vid, &db);
err = -ENOENT;
goto out;
}
if (!refcount_dec_and_test(&a->refcount))
if (!refcount_dec_and_test(&a->refcount)) {
trace_dsa_mdb_del_drop(dp, mdb->addr, mdb->vid, &db,
&a->refcount);
goto out;
}
err = ds->ops->port_mdb_del(ds, port, mdb, db);
trace_dsa_mdb_del_hw(dp, mdb->addr, mdb->vid, &db, err);
if (err) {
refcount_set(&a->refcount, 1);
goto out;
@ -247,14 +264,19 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_fdb_add(ds, port, addr, vid, db);
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) {
err = ds->ops->port_fdb_add(ds, port, addr, vid, db);
trace_dsa_fdb_add_hw(dp, addr, vid, &db, err);
return err;
}
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db);
if (a) {
refcount_inc(&a->refcount);
trace_dsa_fdb_add_bump(dp, addr, vid, &db, &a->refcount);
goto out;
}
@ -265,6 +287,7 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
}
err = ds->ops->port_fdb_add(ds, port, addr, vid, db);
trace_dsa_fdb_add_hw(dp, addr, vid, &db, err);
if (err) {
kfree(a);
goto out;
@ -291,21 +314,29 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_fdb_del(ds, port, addr, vid, db);
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) {
err = ds->ops->port_fdb_del(ds, port, addr, vid, db);
trace_dsa_fdb_del_hw(dp, addr, vid, &db, err);
return err;
}
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db);
if (!a) {
trace_dsa_fdb_del_not_found(dp, addr, vid, &db);
err = -ENOENT;
goto out;
}
if (!refcount_dec_and_test(&a->refcount))
if (!refcount_dec_and_test(&a->refcount)) {
trace_dsa_fdb_del_drop(dp, addr, vid, &db, &a->refcount);
goto out;
}
err = ds->ops->port_fdb_del(ds, port, addr, vid, db);
trace_dsa_fdb_del_hw(dp, addr, vid, &db, err);
if (err) {
refcount_set(&a->refcount, 1);
goto out;
@ -332,6 +363,8 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag,
a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db);
if (a) {
refcount_inc(&a->refcount);
trace_dsa_lag_fdb_add_bump(lag->dev, addr, vid, &db,
&a->refcount);
goto out;
}
@ -342,6 +375,7 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag,
}
err = ds->ops->lag_fdb_add(ds, *lag, addr, vid, db);
trace_dsa_lag_fdb_add_hw(lag->dev, addr, vid, &db, err);
if (err) {
kfree(a);
goto out;
@ -370,14 +404,19 @@ static int dsa_switch_do_lag_fdb_del(struct dsa_switch *ds, struct dsa_lag *lag,
a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db);
if (!a) {
trace_dsa_lag_fdb_del_not_found(lag->dev, addr, vid, &db);
err = -ENOENT;
goto out;
}
if (!refcount_dec_and_test(&a->refcount))
if (!refcount_dec_and_test(&a->refcount)) {
trace_dsa_lag_fdb_del_drop(lag->dev, addr, vid, &db,
&a->refcount);
goto out;
}
err = ds->ops->lag_fdb_del(ds, *lag, addr, vid, db);
trace_dsa_lag_fdb_del_hw(lag->dev, addr, vid, &db, err);
if (err) {
refcount_set(&a->refcount, 1);
goto out;

39
net/dsa/trace.c Normal file
View File

@ -0,0 +1,39 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/* Copyright 2022-2023 NXP
*/
#define CREATE_TRACE_POINTS
#include "trace.h"
void dsa_db_print(const struct dsa_db *db, char buf[DSA_DB_BUFSIZ])
{
switch (db->type) {
case DSA_DB_PORT:
sprintf(buf, "port %s", db->dp->name);
break;
case DSA_DB_LAG:
sprintf(buf, "lag %s id %d", db->lag.dev->name, db->lag.id);
break;
case DSA_DB_BRIDGE:
sprintf(buf, "bridge %s num %d", db->bridge.dev->name,
db->bridge.num);
break;
default:
sprintf(buf, "unknown");
break;
}
}
const char *dsa_port_kind(const struct dsa_port *dp)
{
switch (dp->type) {
case DSA_PORT_TYPE_USER:
return "user";
case DSA_PORT_TYPE_CPU:
return "cpu";
case DSA_PORT_TYPE_DSA:
return "dsa";
default:
return "unused";
}
}

329
net/dsa/trace.h Normal file
View File

@ -0,0 +1,329 @@
/* SPDX-License-Identifier: GPL-2.0
* Copyright 2022-2023 NXP
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM dsa
#if !defined(_NET_DSA_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _NET_DSA_TRACE_H
#include <net/dsa.h>
#include <linux/etherdevice.h>
#include <linux/refcount.h>
#include <linux/tracepoint.h>
/* Enough to fit "bridge %s num %d" where num has 3 digits */
#define DSA_DB_BUFSIZ (IFNAMSIZ + 16)
void dsa_db_print(const struct dsa_db *db, char buf[DSA_DB_BUFSIZ]);
const char *dsa_port_kind(const struct dsa_port *dp);
DECLARE_EVENT_CLASS(dsa_port_addr_op_hw,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, u16 vid,
const struct dsa_db *db, int err),
TP_ARGS(dp, addr, vid, db, err),
TP_STRUCT__entry(
__string(dev, dev_name(dp->ds->dev))
__string(kind, dsa_port_kind(dp))
__field(int, port)
__array(unsigned char, addr, ETH_ALEN)
__field(u16, vid)
__array(char, db_buf, DSA_DB_BUFSIZ)
__field(int, err)
),
TP_fast_assign(
__assign_str(dev, dev_name(dp->ds->dev));
__assign_str(kind, dsa_port_kind(dp));
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
__entry->err = err;
),
TP_printk("%s %s port %d addr %pM vid %u db \"%s\" err %d",
__get_str(dev), __get_str(kind), __entry->port, __entry->addr,
__entry->vid, __entry->db_buf, __entry->err)
);
/* Add unicast/multicast address to hardware, either on user ports
* (where no refcounting is kept), or on shared ports when the entry
* is first seen and its refcount is 1.
*/
DEFINE_EVENT(dsa_port_addr_op_hw, dsa_fdb_add_hw,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
u16 vid, const struct dsa_db *db, int err),
TP_ARGS(dp, addr, vid, db, err));
DEFINE_EVENT(dsa_port_addr_op_hw, dsa_mdb_add_hw,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
u16 vid, const struct dsa_db *db, int err),
TP_ARGS(dp, addr, vid, db, err));
/* Delete unicast/multicast address from hardware, either on user ports or
* when the refcount on shared ports reaches 0
*/
DEFINE_EVENT(dsa_port_addr_op_hw, dsa_fdb_del_hw,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
u16 vid, const struct dsa_db *db, int err),
TP_ARGS(dp, addr, vid, db, err));
DEFINE_EVENT(dsa_port_addr_op_hw, dsa_mdb_del_hw,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
u16 vid, const struct dsa_db *db, int err),
TP_ARGS(dp, addr, vid, db, err));
DECLARE_EVENT_CLASS(dsa_port_addr_op_refcount,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, u16 vid,
const struct dsa_db *db, const refcount_t *refcount),
TP_ARGS(dp, addr, vid, db, refcount),
TP_STRUCT__entry(
__string(dev, dev_name(dp->ds->dev))
__string(kind, dsa_port_kind(dp))
__field(int, port)
__array(unsigned char, addr, ETH_ALEN)
__field(u16, vid)
__array(char, db_buf, DSA_DB_BUFSIZ)
__field(unsigned int, refcount)
),
TP_fast_assign(
__assign_str(dev, dev_name(dp->ds->dev));
__assign_str(kind, dsa_port_kind(dp));
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
__entry->refcount = refcount_read(refcount);
),
TP_printk("%s %s port %d addr %pM vid %u db \"%s\" refcount %u",
__get_str(dev), __get_str(kind), __entry->port, __entry->addr,
__entry->vid, __entry->db_buf, __entry->refcount)
);
/* Bump the refcount of an existing unicast/multicast address on shared ports */
DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_fdb_add_bump,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
u16 vid, const struct dsa_db *db,
const refcount_t *refcount),
TP_ARGS(dp, addr, vid, db, refcount));
DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_mdb_add_bump,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
u16 vid, const struct dsa_db *db,
const refcount_t *refcount),
TP_ARGS(dp, addr, vid, db, refcount));
/* Drop the refcount of a multicast address that we still keep on
* shared ports
*/
DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_fdb_del_drop,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
u16 vid, const struct dsa_db *db,
const refcount_t *refcount),
TP_ARGS(dp, addr, vid, db, refcount));
DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_mdb_del_drop,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
u16 vid, const struct dsa_db *db,
const refcount_t *refcount),
TP_ARGS(dp, addr, vid, db, refcount));
DECLARE_EVENT_CLASS(dsa_port_addr_del_not_found,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, u16 vid,
const struct dsa_db *db),
TP_ARGS(dp, addr, vid, db),
TP_STRUCT__entry(
__string(dev, dev_name(dp->ds->dev))
__string(kind, dsa_port_kind(dp))
__field(int, port)
__array(unsigned char, addr, ETH_ALEN)
__field(u16, vid)
__array(char, db_buf, DSA_DB_BUFSIZ)
),
TP_fast_assign(
__assign_str(dev, dev_name(dp->ds->dev));
__assign_str(kind, dsa_port_kind(dp));
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
),
TP_printk("%s %s port %d addr %pM vid %u db \"%s\"",
__get_str(dev), __get_str(kind), __entry->port,
__entry->addr, __entry->vid, __entry->db_buf)
);
/* Attempt to delete a unicast/multicast address on shared ports for which
* the delete operation was called more times than the addition
*/
DEFINE_EVENT(dsa_port_addr_del_not_found, dsa_fdb_del_not_found,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
u16 vid, const struct dsa_db *db),
TP_ARGS(dp, addr, vid, db));
DEFINE_EVENT(dsa_port_addr_del_not_found, dsa_mdb_del_not_found,
TP_PROTO(const struct dsa_port *dp, const unsigned char *addr,
u16 vid, const struct dsa_db *db),
TP_ARGS(dp, addr, vid, db));
TRACE_EVENT(dsa_lag_fdb_add_hw,
TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr,
u16 vid, const struct dsa_db *db, int err),
TP_ARGS(lag_dev, addr, vid, db, err),
TP_STRUCT__entry(
__string(dev, lag_dev->name)
__array(unsigned char, addr, ETH_ALEN)
__field(u16, vid)
__array(char, db_buf, DSA_DB_BUFSIZ)
__field(int, err)
),
TP_fast_assign(
__assign_str(dev, lag_dev->name);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
__entry->err = err;
),
TP_printk("%s addr %pM vid %u db \"%s\" err %d",
__get_str(dev), __entry->addr, __entry->vid,
__entry->db_buf, __entry->err)
);
TRACE_EVENT(dsa_lag_fdb_add_bump,
TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr,
u16 vid, const struct dsa_db *db, const refcount_t *refcount),
TP_ARGS(lag_dev, addr, vid, db, refcount),
TP_STRUCT__entry(
__string(dev, lag_dev->name)
__array(unsigned char, addr, ETH_ALEN)
__field(u16, vid)
__array(char, db_buf, DSA_DB_BUFSIZ)
__field(unsigned int, refcount)
),
TP_fast_assign(
__assign_str(dev, lag_dev->name);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
__entry->refcount = refcount_read(refcount);
),
TP_printk("%s addr %pM vid %u db \"%s\" refcount %u",
__get_str(dev), __entry->addr, __entry->vid,
__entry->db_buf, __entry->refcount)
);
TRACE_EVENT(dsa_lag_fdb_del_hw,
TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr,
u16 vid, const struct dsa_db *db, int err),
TP_ARGS(lag_dev, addr, vid, db, err),
TP_STRUCT__entry(
__string(dev, lag_dev->name)
__array(unsigned char, addr, ETH_ALEN)
__field(u16, vid)
__array(char, db_buf, DSA_DB_BUFSIZ)
__field(int, err)
),
TP_fast_assign(
__assign_str(dev, lag_dev->name);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
__entry->err = err;
),
TP_printk("%s addr %pM vid %u db \"%s\" err %d",
__get_str(dev), __entry->addr, __entry->vid,
__entry->db_buf, __entry->err)
);
TRACE_EVENT(dsa_lag_fdb_del_drop,
TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr,
u16 vid, const struct dsa_db *db, const refcount_t *refcount),
TP_ARGS(lag_dev, addr, vid, db, refcount),
TP_STRUCT__entry(
__string(dev, lag_dev->name)
__array(unsigned char, addr, ETH_ALEN)
__field(u16, vid)
__array(char, db_buf, DSA_DB_BUFSIZ)
__field(unsigned int, refcount)
),
TP_fast_assign(
__assign_str(dev, lag_dev->name);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
__entry->refcount = refcount_read(refcount);
),
TP_printk("%s addr %pM vid %u db \"%s\" refcount %u",
__get_str(dev), __entry->addr, __entry->vid,
__entry->db_buf, __entry->refcount)
);
TRACE_EVENT(dsa_lag_fdb_del_not_found,
TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr,
u16 vid, const struct dsa_db *db),
TP_ARGS(lag_dev, addr, vid, db),
TP_STRUCT__entry(
__string(dev, lag_dev->name)
__array(unsigned char, addr, ETH_ALEN)
__field(u16, vid)
__array(char, db_buf, DSA_DB_BUFSIZ)
),
TP_fast_assign(
__assign_str(dev, lag_dev->name);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
),
TP_printk("%s addr %pM vid %u db \"%s\"",
__get_str(dev), __entry->addr, __entry->vid, __entry->db_buf)
);
#endif /* _NET_DSA_TRACE_H */
/* We don't want to use include/trace/events */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace
/* This part must be outside protection */
#include <trace/define_trace.h>