Merge branch 'mlx5-genl-queue-stats'

Joe Damato says:

====================
mlx5: Add netdev-genl queue stats

Welcome to v5.

Switched from RFC to just a v5, because I think this is pretty close.
Minor changes from v4 summarized below in the changelog.

Note that my NIC does not seem to support PTP and I couldn't get the
mlnx-tools mlnx_qos script to work, so I was only able to test the
following cases:

- device up at boot
- adjusting queue counts
- device down (e.g. ip link set dev eth4 down)

Please see the commit message of patch 2/2 for more details on output
and test cases.

rfcv4 thread:
  https://lore.kernel.org/linux-kernel/20240604004629.299699-1-jdamato@fastly.com/T/

rfcv4 -> v5:
 - Patch 1/2: change variable name 'mlx5e_qid' to 'txq_ix'.
 - Patch 2/2:
    - remove logic in mlx5e_get_queue_stats_rx for PTP. PTP RX are
      always reported in base.
    - report PTP TX in mlx5e_get_base_stats only if:
      - PTP has ever been opened, and
      - either PTP is NULL (closed) or the MLX5E_PTP_STATE_TX bit in its
        state is not set

    Otherwise, PTP TX will be reported when the txq_ix is passed into
    mlx5e_get_queue_stats_tx

rfcv3 -> rfcv4:
 - Patch 1/2 now creates a mapping (priv->txq2sq_stats) which maps txq
   indices to sq_stats structures so stats can be accessed directly.
   This mapping is kept up to date along side txq2sq.

 - Patch 2/2:
   - All mutex_lock/unlock on state_lock has been dropped.
   - mlx5e_get_queue_stats_rx now uses ASSERT_RTNL() and has a special
     case for PTP. If PTP was ever opened, is currently opened, and the
     channel index matches, stats for PTP RX are output.
   - mlx5e_get_queue_stats_tx rewritten to use priv->txq2sq_stats. No
     corner cases are needed here because any txq idx (passed in as i)
     will have an up to date mapping in priv->txq2sq_stats.
   - mlx5e_get_base_stats:
     - in the RX case:
       - iterates from [params.num_channels, stats_nch) collecting
         stats.
       - if ptp was ever opened but is currently closed, add the PTP
         stats.
     - in the TX case:
       - handle 2 cases:
         - the channel is available, so sum only the unavailable TCs
           [mlx5e_get_dcb_num_tc, max_opened_tc).
         - the channel is unavailable, so sum all TCs [0, max_opened_tc).
       - if ptp was ever opened but is currently closed, add the PTP
         sq stats.

v2 -> rfcv3:
 - Added patch 1/2 which creates some helpers for computing the txq_ix
   and ch_ix/tc_ix.

 - Patch 2/2 modified in several ways:
   - Fixed variable declarations in mlx5e_get_queue_stats_rx to be at
     the start of the function.
   - mlx5e_get_queue_stats_tx rewritten to access sq stats directly by
     using the helpers added in the previous patch.
   - mlx5e_get_base_stats modified in several ways:
     - Took the state_lock when accessing priv->channels.
     - For the base RX stats, code was simplified to call
       mlx5e_get_queue_stats_rx instead of repeating the same code.
     - For the base TX stats, I attempted to implement what I think
       Tariq suggested in the previous thread:
         - for available channels, only unavailable TC stats are summed
	 - for unavailable channels, all stats for TCs up to
	   max_opened_tc are summed.

v1 - > v2:
  - Essentially a full rewrite after comments from Jakub, Tariq, and
    Zhu.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2024-06-17 11:37:19 +01:00
commit 69776921e8
3 changed files with 155 additions and 3 deletions

View File

@ -867,6 +867,8 @@ struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_selq selq;
struct mlx5e_txqsq **txq2sq;
struct mlx5e_sq_stats **txq2sq_stats;
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx_dp dcbx_dp;
#endif

View File

@ -170,6 +170,7 @@ int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid));
priv->txq2sq[qid] = sq;
priv->txq2sq_stats[qid] = sq->stats;
/* Make the change to txq2sq visible before the queue is started.
* As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
@ -186,6 +187,7 @@ int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
{
struct mlx5e_txqsq *sq;
u16 txq_ix;
sq = mlx5e_get_qos_sq(priv, qid);
if (!sq) /* Handle the case when the SQ failed to open. */
@ -194,7 +196,10 @@ void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
qos_dbg(sq->mdev, "Deactivate QoS SQ qid %u\n", qid);
mlx5e_deactivate_txqsq(sq);
priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
txq_ix = mlx5e_qid_from_qos(&priv->channels, qid);
priv->txq2sq[txq_ix] = NULL;
priv->txq2sq_stats[txq_ix] = NULL;
/* Make the change to txq2sq visible before the queue is started again.
* As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
@ -325,6 +330,7 @@ void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
{
struct mlx5e_params *params = &c->priv->channels.params;
struct mlx5e_txqsq __rcu **qos_sqs;
u16 txq_ix;
int i;
qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs);
@ -342,8 +348,11 @@ void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid);
mlx5e_deactivate_txqsq(sq);
txq_ix = mlx5e_qid_from_qos(&c->priv->channels, qid);
/* The queue is disabled, no synchronization with datapath is needed. */
c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL;
c->priv->txq2sq[txq_ix] = NULL;
c->priv->txq2sq_stats[txq_ix] = NULL;
}
}

View File

@ -39,6 +39,7 @@
#include <linux/debugfs.h>
#include <linux/if_bridge.h>
#include <linux/filter.h>
#include <net/netdev_queues.h>
#include <net/page_pool/types.h>
#include <net/pkt_sched.h>
#include <net/xdp_sock_drv.h>
@ -3125,6 +3126,7 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
struct mlx5e_txqsq *sq = &c->sq[tc];
priv->txq2sq[sq->txq_ix] = sq;
priv->txq2sq_stats[sq->txq_ix] = sq->stats;
}
}
@ -3139,6 +3141,7 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
priv->txq2sq[sq->txq_ix] = sq;
priv->txq2sq_stats[sq->txq_ix] = sq->stats;
}
out:
@ -5296,6 +5299,136 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
}
static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i,
struct netdev_queue_stats_rx *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_channel_stats *channel_stats;
struct mlx5e_rq_stats *xskrq_stats;
struct mlx5e_rq_stats *rq_stats;
ASSERT_RTNL();
if (mlx5e_is_uplink_rep(priv))
return;
channel_stats = priv->channel_stats[i];
xskrq_stats = &channel_stats->xskrq;
rq_stats = &channel_stats->rq;
stats->packets = rq_stats->packets + xskrq_stats->packets;
stats->bytes = rq_stats->bytes + xskrq_stats->bytes;
stats->alloc_fail = rq_stats->buff_alloc_err +
xskrq_stats->buff_alloc_err;
}
static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i,
struct netdev_queue_stats_tx *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_sq_stats *sq_stats;
ASSERT_RTNL();
/* no special case needed for ptp htb etc since txq2sq_stats is kept up
* to date for active sq_stats, otherwise get_base_stats takes care of
* inactive sqs.
*/
sq_stats = priv->txq2sq_stats[i];
stats->packets = sq_stats->packets;
stats->bytes = sq_stats->bytes;
}
static void mlx5e_get_base_stats(struct net_device *dev,
struct netdev_queue_stats_rx *rx,
struct netdev_queue_stats_tx *tx)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_ptp *ptp_channel;
int i, tc;
ASSERT_RTNL();
if (!mlx5e_is_uplink_rep(priv)) {
rx->packets = 0;
rx->bytes = 0;
rx->alloc_fail = 0;
for (i = priv->channels.params.num_channels; i < priv->stats_nch; i++) {
struct netdev_queue_stats_rx rx_i = {0};
mlx5e_get_queue_stats_rx(dev, i, &rx_i);
rx->packets += rx_i.packets;
rx->bytes += rx_i.bytes;
rx->alloc_fail += rx_i.alloc_fail;
}
/* always report PTP RX stats from base as there is no
* corresponding channel to report them under in
* mlx5e_get_queue_stats_rx.
*/
if (priv->rx_ptp_opened) {
struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
rx->packets += rq_stats->packets;
rx->bytes += rq_stats->bytes;
}
}
tx->packets = 0;
tx->bytes = 0;
for (i = 0; i < priv->stats_nch; i++) {
struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
/* handle two cases:
*
* 1. channels which are active. In this case,
* report only deactivated TCs on these channels.
*
* 2. channels which were deactivated
* (i > priv->channels.params.num_channels)
* must have all of their TCs [0 .. priv->max_opened_tc)
* examined because deactivated channels will not be in the
* range of [0..real_num_tx_queues) and will not have their
* stats reported by mlx5e_get_queue_stats_tx.
*/
if (i < priv->channels.params.num_channels)
tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
else
tc = 0;
for (; tc < priv->max_opened_tc; tc++) {
struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[tc];
tx->packets += sq_stats->packets;
tx->bytes += sq_stats->bytes;
}
}
/* if PTP TX was opened at some point and has since either:
* - been shutdown and set to NULL, or
* - simply disabled (bit unset)
*
* report stats directly from the ptp_stats structures as these queues
* are now unavailable and there is no txq index to retrieve these
* stats via calls to mlx5e_get_queue_stats_tx.
*/
ptp_channel = priv->channels.ptp;
if (priv->tx_ptp_opened && (!ptp_channel || !test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state))) {
for (tc = 0; tc < priv->max_opened_tc; tc++) {
struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[tc];
tx->packets += sq_stats->packets;
tx->bytes += sq_stats->bytes;
}
}
}
static const struct netdev_stat_ops mlx5e_stat_ops = {
.get_queue_stats_rx = mlx5e_get_queue_stats_rx,
.get_queue_stats_tx = mlx5e_get_queue_stats_tx,
.get_base_stats = mlx5e_get_base_stats,
};
static void mlx5e_build_nic_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@ -5313,6 +5446,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->watchdog_timeo = 15 * HZ;
netdev->stat_ops = &mlx5e_stat_ops;
netdev->ethtool_ops = &mlx5e_ethtool_ops;
netdev->vlan_features |= NETIF_F_SG;
@ -5848,9 +5982,13 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
if (!priv->txq2sq)
goto err_destroy_workqueue;
priv->txq2sq_stats = kcalloc_node(num_txqs, sizeof(*priv->txq2sq_stats), GFP_KERNEL, node);
if (!priv->txq2sq_stats)
goto err_free_txq2sq;
priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node);
if (!priv->tx_rates)
goto err_free_txq2sq;
goto err_free_txq2sq_stats;
priv->channel_stats =
kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node);
@ -5861,6 +5999,8 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
err_free_tx_rates:
kfree(priv->tx_rates);
err_free_txq2sq_stats:
kfree(priv->txq2sq_stats);
err_free_txq2sq:
kfree(priv->txq2sq);
err_destroy_workqueue:
@ -5884,6 +6024,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
kvfree(priv->channel_stats[i]);
kfree(priv->channel_stats);
kfree(priv->tx_rates);
kfree(priv->txq2sq_stats);
kfree(priv->txq2sq);
destroy_workqueue(priv->wq);
mlx5e_selq_cleanup(&priv->selq);