mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
61bf0009a7
net_dim() is currently passed a struct dim_sample argument by value. struct dim_sample is 24 bytes. Since this is greater 16 bytes, x86-64 passes it on the stack. All callers have already initialized dim_sample on the stack, so passing it by value requires pushing a duplicated copy to the stack. Either witing to the stack and immediately reading it, or perhaps dereferencing addresses relative to the stack pointer in a chain of push instructions, seems to perform quite poorly. In a heavy TCP workload, mlx5e_handle_rx_dim() consumes 3% of CPU time, 94% of which is attributed to the first push instruction to copy dim_sample on the stack for the call to net_dim(): // Call ktime_get() 0.26 |4ead2: call 4ead7 <mlx5e_handle_rx_dim+0x47> // Pass the address of struct dim in %rdi |4ead7: lea 0x3d0(%rbx),%rdi // Set dim_sample.pkt_ctr |4eade: mov %r13d,0x8(%rsp) // Set dim_sample.byte_ctr |4eae3: mov %r12d,0xc(%rsp) // Set dim_sample.event_ctr 0.15 |4eae8: mov %bp,0x10(%rsp) // Duplicate dim_sample on the stack 94.16 |4eaed: push 0x10(%rsp) 2.79 |4eaf1: push 0x10(%rsp) 0.07 |4eaf5: push %rax // Call net_dim() 0.21 |4eaf6: call 4eafb <mlx5e_handle_rx_dim+0x6b> To allow the caller to reuse the struct dim_sample already on the stack, pass the struct dim_sample by reference to net_dim(). Signed-off-by: Caleb Sander Mateos <csander@purestorage.com> Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com> Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com> Reviewed-by: Arthur Kiyanovski <akiyano@amazon.com> Reviewed-by: Louis Peens <louis.peens@corigine.com> Link: https://patch.msgid.link/20241031002326.3426181-2-csander@purestorage.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
380 lines
9.2 KiB
C
380 lines
9.2 KiB
C
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
|
/*
|
|
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
|
|
*/
|
|
|
|
#include <linux/dim.h>
|
|
#include <linux/rtnetlink.h>
|
|
|
|
/*
|
|
* Net DIM profiles:
|
|
* There are different set of profiles for each CQ period mode.
|
|
* There are different set of profiles for RX/TX CQs.
|
|
* Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
|
|
*/
|
|
#define NET_DIM_RX_EQE_PROFILES { \
|
|
{.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
|
|
{.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
|
|
{.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
|
|
{.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
|
|
{.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \
|
|
}
|
|
|
|
#define NET_DIM_RX_CQE_PROFILES { \
|
|
{.usec = 2, .pkts = 256,}, \
|
|
{.usec = 8, .pkts = 128,}, \
|
|
{.usec = 16, .pkts = 64,}, \
|
|
{.usec = 32, .pkts = 64,}, \
|
|
{.usec = 64, .pkts = 64,} \
|
|
}
|
|
|
|
#define NET_DIM_TX_EQE_PROFILES { \
|
|
{.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
|
|
{.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
|
|
{.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
|
|
{.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
|
|
{.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \
|
|
}
|
|
|
|
#define NET_DIM_TX_CQE_PROFILES { \
|
|
{.usec = 5, .pkts = 128,}, \
|
|
{.usec = 8, .pkts = 64,}, \
|
|
{.usec = 16, .pkts = 32,}, \
|
|
{.usec = 32, .pkts = 32,}, \
|
|
{.usec = 64, .pkts = 32,} \
|
|
}
|
|
|
|
static const struct dim_cq_moder
|
|
rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
|
|
NET_DIM_RX_EQE_PROFILES,
|
|
NET_DIM_RX_CQE_PROFILES,
|
|
};
|
|
|
|
static const struct dim_cq_moder
|
|
tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
|
|
NET_DIM_TX_EQE_PROFILES,
|
|
NET_DIM_TX_CQE_PROFILES,
|
|
};
|
|
|
|
struct dim_cq_moder
|
|
net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
|
|
{
|
|
struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
|
|
|
|
cq_moder.cq_period_mode = cq_period_mode;
|
|
return cq_moder;
|
|
}
|
|
EXPORT_SYMBOL(net_dim_get_rx_moderation);
|
|
|
|
struct dim_cq_moder
|
|
net_dim_get_def_rx_moderation(u8 cq_period_mode)
|
|
{
|
|
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
|
|
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
|
|
|
|
return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
|
|
}
|
|
EXPORT_SYMBOL(net_dim_get_def_rx_moderation);
|
|
|
|
struct dim_cq_moder
|
|
net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
|
|
{
|
|
struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
|
|
|
|
cq_moder.cq_period_mode = cq_period_mode;
|
|
return cq_moder;
|
|
}
|
|
EXPORT_SYMBOL(net_dim_get_tx_moderation);
|
|
|
|
struct dim_cq_moder
|
|
net_dim_get_def_tx_moderation(u8 cq_period_mode)
|
|
{
|
|
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
|
|
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
|
|
|
|
return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
|
|
}
|
|
EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
|
|
|
|
int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags,
|
|
u8 coal_flags, u8 rx_mode, u8 tx_mode,
|
|
void (*rx_dim_work)(struct work_struct *work),
|
|
void (*tx_dim_work)(struct work_struct *work))
|
|
{
|
|
struct dim_cq_moder *rxp = NULL, *txp;
|
|
struct dim_irq_moder *moder;
|
|
int len;
|
|
|
|
dev->irq_moder = kzalloc(sizeof(*dev->irq_moder), GFP_KERNEL);
|
|
if (!dev->irq_moder)
|
|
return -ENOMEM;
|
|
|
|
moder = dev->irq_moder;
|
|
len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_profile);
|
|
|
|
moder->coal_flags = coal_flags;
|
|
moder->profile_flags = profile_flags;
|
|
|
|
if (profile_flags & DIM_PROFILE_RX) {
|
|
moder->rx_dim_work = rx_dim_work;
|
|
moder->dim_rx_mode = rx_mode;
|
|
rxp = kmemdup(rx_profile[rx_mode], len, GFP_KERNEL);
|
|
if (!rxp)
|
|
goto free_moder;
|
|
|
|
rcu_assign_pointer(moder->rx_profile, rxp);
|
|
}
|
|
|
|
if (profile_flags & DIM_PROFILE_TX) {
|
|
moder->tx_dim_work = tx_dim_work;
|
|
moder->dim_tx_mode = tx_mode;
|
|
txp = kmemdup(tx_profile[tx_mode], len, GFP_KERNEL);
|
|
if (!txp)
|
|
goto free_rxp;
|
|
|
|
rcu_assign_pointer(moder->tx_profile, txp);
|
|
}
|
|
|
|
return 0;
|
|
|
|
free_rxp:
|
|
kfree(rxp);
|
|
free_moder:
|
|
kfree(moder);
|
|
return -ENOMEM;
|
|
}
|
|
EXPORT_SYMBOL(net_dim_init_irq_moder);
|
|
|
|
/* RTNL lock is held. */
|
|
void net_dim_free_irq_moder(struct net_device *dev)
|
|
{
|
|
struct dim_cq_moder *rxp, *txp;
|
|
|
|
if (!dev->irq_moder)
|
|
return;
|
|
|
|
rxp = rtnl_dereference(dev->irq_moder->rx_profile);
|
|
txp = rtnl_dereference(dev->irq_moder->tx_profile);
|
|
|
|
rcu_assign_pointer(dev->irq_moder->rx_profile, NULL);
|
|
rcu_assign_pointer(dev->irq_moder->tx_profile, NULL);
|
|
|
|
kfree_rcu(rxp, rcu);
|
|
kfree_rcu(txp, rcu);
|
|
kfree(dev->irq_moder);
|
|
}
|
|
EXPORT_SYMBOL(net_dim_free_irq_moder);
|
|
|
|
void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx)
|
|
{
|
|
struct dim_irq_moder *irq_moder = dev->irq_moder;
|
|
|
|
if (!irq_moder)
|
|
return;
|
|
|
|
if (is_tx) {
|
|
INIT_WORK(&dim->work, irq_moder->tx_dim_work);
|
|
dim->mode = READ_ONCE(irq_moder->dim_tx_mode);
|
|
return;
|
|
}
|
|
|
|
INIT_WORK(&dim->work, irq_moder->rx_dim_work);
|
|
dim->mode = READ_ONCE(irq_moder->dim_rx_mode);
|
|
}
|
|
EXPORT_SYMBOL(net_dim_setting);
|
|
|
|
void net_dim_work_cancel(struct dim *dim)
|
|
{
|
|
cancel_work_sync(&dim->work);
|
|
}
|
|
EXPORT_SYMBOL(net_dim_work_cancel);
|
|
|
|
struct dim_cq_moder net_dim_get_rx_irq_moder(struct net_device *dev,
|
|
struct dim *dim)
|
|
{
|
|
struct dim_cq_moder res, *profile;
|
|
|
|
rcu_read_lock();
|
|
profile = rcu_dereference(dev->irq_moder->rx_profile);
|
|
res = profile[dim->profile_ix];
|
|
rcu_read_unlock();
|
|
|
|
res.cq_period_mode = dim->mode;
|
|
|
|
return res;
|
|
}
|
|
EXPORT_SYMBOL(net_dim_get_rx_irq_moder);
|
|
|
|
struct dim_cq_moder net_dim_get_tx_irq_moder(struct net_device *dev,
|
|
struct dim *dim)
|
|
{
|
|
struct dim_cq_moder res, *profile;
|
|
|
|
rcu_read_lock();
|
|
profile = rcu_dereference(dev->irq_moder->tx_profile);
|
|
res = profile[dim->profile_ix];
|
|
rcu_read_unlock();
|
|
|
|
res.cq_period_mode = dim->mode;
|
|
|
|
return res;
|
|
}
|
|
EXPORT_SYMBOL(net_dim_get_tx_irq_moder);
|
|
|
|
void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode)
|
|
{
|
|
WRITE_ONCE(dev->irq_moder->dim_rx_mode, rx_mode);
|
|
}
|
|
EXPORT_SYMBOL(net_dim_set_rx_mode);
|
|
|
|
void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode)
|
|
{
|
|
WRITE_ONCE(dev->irq_moder->dim_tx_mode, tx_mode);
|
|
}
|
|
EXPORT_SYMBOL(net_dim_set_tx_mode);
|
|
|
|
static int net_dim_step(struct dim *dim)
|
|
{
|
|
if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
|
|
return DIM_TOO_TIRED;
|
|
|
|
switch (dim->tune_state) {
|
|
case DIM_PARKING_ON_TOP:
|
|
case DIM_PARKING_TIRED:
|
|
break;
|
|
case DIM_GOING_RIGHT:
|
|
if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
|
|
return DIM_ON_EDGE;
|
|
dim->profile_ix++;
|
|
dim->steps_right++;
|
|
break;
|
|
case DIM_GOING_LEFT:
|
|
if (dim->profile_ix == 0)
|
|
return DIM_ON_EDGE;
|
|
dim->profile_ix--;
|
|
dim->steps_left++;
|
|
break;
|
|
}
|
|
|
|
dim->tired++;
|
|
return DIM_STEPPED;
|
|
}
|
|
|
|
static void net_dim_exit_parking(struct dim *dim)
|
|
{
|
|
dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT;
|
|
net_dim_step(dim);
|
|
}
|
|
|
|
static int net_dim_stats_compare(struct dim_stats *curr,
|
|
struct dim_stats *prev)
|
|
{
|
|
if (!prev->bpms)
|
|
return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME;
|
|
|
|
if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
|
|
return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER :
|
|
DIM_STATS_WORSE;
|
|
|
|
if (!prev->ppms)
|
|
return curr->ppms ? DIM_STATS_BETTER :
|
|
DIM_STATS_SAME;
|
|
|
|
if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
|
|
return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER :
|
|
DIM_STATS_WORSE;
|
|
|
|
if (!prev->epms)
|
|
return DIM_STATS_SAME;
|
|
|
|
if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
|
|
return (curr->epms < prev->epms) ? DIM_STATS_BETTER :
|
|
DIM_STATS_WORSE;
|
|
|
|
return DIM_STATS_SAME;
|
|
}
|
|
|
|
static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
|
|
{
|
|
int prev_state = dim->tune_state;
|
|
int prev_ix = dim->profile_ix;
|
|
int stats_res;
|
|
int step_res;
|
|
|
|
switch (dim->tune_state) {
|
|
case DIM_PARKING_ON_TOP:
|
|
stats_res = net_dim_stats_compare(curr_stats,
|
|
&dim->prev_stats);
|
|
if (stats_res != DIM_STATS_SAME)
|
|
net_dim_exit_parking(dim);
|
|
break;
|
|
|
|
case DIM_PARKING_TIRED:
|
|
dim->tired--;
|
|
if (!dim->tired)
|
|
net_dim_exit_parking(dim);
|
|
break;
|
|
|
|
case DIM_GOING_RIGHT:
|
|
case DIM_GOING_LEFT:
|
|
stats_res = net_dim_stats_compare(curr_stats,
|
|
&dim->prev_stats);
|
|
if (stats_res != DIM_STATS_BETTER)
|
|
dim_turn(dim);
|
|
|
|
if (dim_on_top(dim)) {
|
|
dim_park_on_top(dim);
|
|
break;
|
|
}
|
|
|
|
step_res = net_dim_step(dim);
|
|
switch (step_res) {
|
|
case DIM_ON_EDGE:
|
|
dim_park_on_top(dim);
|
|
break;
|
|
case DIM_TOO_TIRED:
|
|
dim_park_tired(dim);
|
|
break;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
if (prev_state != DIM_PARKING_ON_TOP ||
|
|
dim->tune_state != DIM_PARKING_ON_TOP)
|
|
dim->prev_stats = *curr_stats;
|
|
|
|
return dim->profile_ix != prev_ix;
|
|
}
|
|
|
|
void net_dim(struct dim *dim, const struct dim_sample *end_sample)
|
|
{
|
|
struct dim_stats curr_stats;
|
|
u16 nevents;
|
|
|
|
switch (dim->state) {
|
|
case DIM_MEASURE_IN_PROGRESS:
|
|
nevents = BIT_GAP(BITS_PER_TYPE(u16),
|
|
end_sample->event_ctr,
|
|
dim->start_sample.event_ctr);
|
|
if (nevents < DIM_NEVENTS)
|
|
break;
|
|
if (!dim_calc_stats(&dim->start_sample, end_sample, &curr_stats))
|
|
break;
|
|
if (net_dim_decision(&curr_stats, dim)) {
|
|
dim->state = DIM_APPLY_NEW_PROFILE;
|
|
schedule_work(&dim->work);
|
|
break;
|
|
}
|
|
fallthrough;
|
|
case DIM_START_MEASURE:
|
|
dim_update_sample(end_sample->event_ctr, end_sample->pkt_ctr,
|
|
end_sample->byte_ctr, &dim->start_sample);
|
|
dim->state = DIM_MEASURE_IN_PROGRESS;
|
|
break;
|
|
case DIM_APPLY_NEW_PROFILE:
|
|
break;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(net_dim);
|