linux/lib/dim/net_dim.c

380 lines
9.2 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/dim.h>
ethtool: provide customized dim profile management The NetDIM library, currently leveraged by an array of NICs, delivers excellent acceleration benefits. Nevertheless, NICs vary significantly in their dim profile list prerequisites. Specifically, virtio-net backends may present diverse sw or hw device implementation, making a one-size-fits-all parameter list impractical. On Alibaba Cloud, the virtio DPU's performance under the default DIM profile falls short of expectations, partly due to a mismatch in parameter configuration. I also noticed that ice/idpf/ena and other NICs have customized profilelist or placed some restrictions on dim capabilities. Motivated by this, I tried adding new params for "ethtool -C" that provides a per-device control to modify and access a device's interrupt parameters. Usage ======== The target NIC is named ethx. Assume that ethx only declares support for rx profile setting (with DIM_PROFILE_RX flag set in profile_flags) and supports modification of usec and pkt fields. 1. Query the currently customized list of the device $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 256, .comps = n/a,}, {.usec = 8, .pkts = 256, .comps = n/a,}, {.usec = 64, .pkts = 256, .comps = n/a,}, {.usec = 128, .pkts = 256, .comps = n/a,}, {.usec = 256, .pkts = 256, .comps = n/a,} tx-profile: n/a 2. Tune $ ethtool -C ethx rx-profile 1,1,n_2,n,n_3,3,n_4,4,n_n,5,n "n" means do not modify this field. $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 1, .comps = n/a,}, {.usec = 2, .pkts = 256, .comps = n/a,}, {.usec = 3, .pkts = 3, .comps = n/a,}, {.usec = 4, .pkts = 4, .comps = n/a,}, {.usec = 256, .pkts = 5, .comps = n/a,} tx-profile: n/a 3. Hint If the device does not support some type of customized dim profiles, the corresponding "n/a" will display. If the "n/a" field is being modified, -EOPNOTSUPP will be reported. Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://patch.msgid.link/20240621101353.107425-4-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-06-21 10:13:51 +00:00
#include <linux/rtnetlink.h>
/*
* Net DIM profiles:
* There are different set of profiles for each CQ period mode.
* There are different set of profiles for RX/TX CQs.
* Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
*/
#define NET_DIM_RX_EQE_PROFILES { \
dim: initialize all struct fields The W=2 build pointed out that the code wasn't initializing all the variables in the dim_cq_moder declarations with the struct initializers. The net change here is zero since these structs were already static const globals and were initialized with zeros by the compiler, but removing compiler warnings has value in and of itself. lib/dim/net_dim.c: At top level: lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] 54 | NET_DIM_RX_EQE_PROFILES, | ^~~~~~~~~~~~~~~~~~~~~~~ In file included from lib/dim/net_dim.c:6: ./include/linux/dim.h:45:13: note: ‘comps’ declared here 45 | u16 comps; | ^~~~~ and repeats for the tx struct, and once you fix the comps entry then the cq_period_mode field needs the same treatment. Use the commonly accepted style to indicate to the compiler that we know what we're doing, and add a comma at the end of each struct initializer to clean up the issue, and use explicit initializers for the fields we are initializing which makes the compiler happy. While here and fixing these lines, clean up the code slightly with a fix for the super long lines by removing the word "_MODERATION" from a couple defines only used in this file. Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-05-07 01:10:38 +00:00
{.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
{.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
{.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
{.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
{.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \
}
#define NET_DIM_RX_CQE_PROFILES { \
dim: initialize all struct fields The W=2 build pointed out that the code wasn't initializing all the variables in the dim_cq_moder declarations with the struct initializers. The net change here is zero since these structs were already static const globals and were initialized with zeros by the compiler, but removing compiler warnings has value in and of itself. lib/dim/net_dim.c: At top level: lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] 54 | NET_DIM_RX_EQE_PROFILES, | ^~~~~~~~~~~~~~~~~~~~~~~ In file included from lib/dim/net_dim.c:6: ./include/linux/dim.h:45:13: note: ‘comps’ declared here 45 | u16 comps; | ^~~~~ and repeats for the tx struct, and once you fix the comps entry then the cq_period_mode field needs the same treatment. Use the commonly accepted style to indicate to the compiler that we know what we're doing, and add a comma at the end of each struct initializer to clean up the issue, and use explicit initializers for the fields we are initializing which makes the compiler happy. While here and fixing these lines, clean up the code slightly with a fix for the super long lines by removing the word "_MODERATION" from a couple defines only used in this file. Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-05-07 01:10:38 +00:00
{.usec = 2, .pkts = 256,}, \
{.usec = 8, .pkts = 128,}, \
{.usec = 16, .pkts = 64,}, \
{.usec = 32, .pkts = 64,}, \
{.usec = 64, .pkts = 64,} \
}
#define NET_DIM_TX_EQE_PROFILES { \
dim: initialize all struct fields The W=2 build pointed out that the code wasn't initializing all the variables in the dim_cq_moder declarations with the struct initializers. The net change here is zero since these structs were already static const globals and were initialized with zeros by the compiler, but removing compiler warnings has value in and of itself. lib/dim/net_dim.c: At top level: lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] 54 | NET_DIM_RX_EQE_PROFILES, | ^~~~~~~~~~~~~~~~~~~~~~~ In file included from lib/dim/net_dim.c:6: ./include/linux/dim.h:45:13: note: ‘comps’ declared here 45 | u16 comps; | ^~~~~ and repeats for the tx struct, and once you fix the comps entry then the cq_period_mode field needs the same treatment. Use the commonly accepted style to indicate to the compiler that we know what we're doing, and add a comma at the end of each struct initializer to clean up the issue, and use explicit initializers for the fields we are initializing which makes the compiler happy. While here and fixing these lines, clean up the code slightly with a fix for the super long lines by removing the word "_MODERATION" from a couple defines only used in this file. Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-05-07 01:10:38 +00:00
{.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
{.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
{.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
{.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
{.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \
}
#define NET_DIM_TX_CQE_PROFILES { \
dim: initialize all struct fields The W=2 build pointed out that the code wasn't initializing all the variables in the dim_cq_moder declarations with the struct initializers. The net change here is zero since these structs were already static const globals and were initialized with zeros by the compiler, but removing compiler warnings has value in and of itself. lib/dim/net_dim.c: At top level: lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] 54 | NET_DIM_RX_EQE_PROFILES, | ^~~~~~~~~~~~~~~~~~~~~~~ In file included from lib/dim/net_dim.c:6: ./include/linux/dim.h:45:13: note: ‘comps’ declared here 45 | u16 comps; | ^~~~~ and repeats for the tx struct, and once you fix the comps entry then the cq_period_mode field needs the same treatment. Use the commonly accepted style to indicate to the compiler that we know what we're doing, and add a comma at the end of each struct initializer to clean up the issue, and use explicit initializers for the fields we are initializing which makes the compiler happy. While here and fixing these lines, clean up the code slightly with a fix for the super long lines by removing the word "_MODERATION" from a couple defines only used in this file. Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-05-07 01:10:38 +00:00
{.usec = 5, .pkts = 128,}, \
{.usec = 8, .pkts = 64,}, \
{.usec = 16, .pkts = 32,}, \
{.usec = 32, .pkts = 32,}, \
{.usec = 64, .pkts = 32,} \
}
static const struct dim_cq_moder
rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_RX_EQE_PROFILES,
NET_DIM_RX_CQE_PROFILES,
};
static const struct dim_cq_moder
tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_TX_EQE_PROFILES,
NET_DIM_TX_CQE_PROFILES,
};
struct dim_cq_moder
net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
{
struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
EXPORT_SYMBOL(net_dim_get_rx_moderation);
struct dim_cq_moder
net_dim_get_def_rx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
}
EXPORT_SYMBOL(net_dim_get_def_rx_moderation);
struct dim_cq_moder
net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
{
struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
EXPORT_SYMBOL(net_dim_get_tx_moderation);
struct dim_cq_moder
net_dim_get_def_tx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
}
EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
ethtool: provide customized dim profile management The NetDIM library, currently leveraged by an array of NICs, delivers excellent acceleration benefits. Nevertheless, NICs vary significantly in their dim profile list prerequisites. Specifically, virtio-net backends may present diverse sw or hw device implementation, making a one-size-fits-all parameter list impractical. On Alibaba Cloud, the virtio DPU's performance under the default DIM profile falls short of expectations, partly due to a mismatch in parameter configuration. I also noticed that ice/idpf/ena and other NICs have customized profilelist or placed some restrictions on dim capabilities. Motivated by this, I tried adding new params for "ethtool -C" that provides a per-device control to modify and access a device's interrupt parameters. Usage ======== The target NIC is named ethx. Assume that ethx only declares support for rx profile setting (with DIM_PROFILE_RX flag set in profile_flags) and supports modification of usec and pkt fields. 1. Query the currently customized list of the device $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 256, .comps = n/a,}, {.usec = 8, .pkts = 256, .comps = n/a,}, {.usec = 64, .pkts = 256, .comps = n/a,}, {.usec = 128, .pkts = 256, .comps = n/a,}, {.usec = 256, .pkts = 256, .comps = n/a,} tx-profile: n/a 2. Tune $ ethtool -C ethx rx-profile 1,1,n_2,n,n_3,3,n_4,4,n_n,5,n "n" means do not modify this field. $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 1, .comps = n/a,}, {.usec = 2, .pkts = 256, .comps = n/a,}, {.usec = 3, .pkts = 3, .comps = n/a,}, {.usec = 4, .pkts = 4, .comps = n/a,}, {.usec = 256, .pkts = 5, .comps = n/a,} tx-profile: n/a 3. Hint If the device does not support some type of customized dim profiles, the corresponding "n/a" will display. If the "n/a" field is being modified, -EOPNOTSUPP will be reported. Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://patch.msgid.link/20240621101353.107425-4-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-06-21 10:13:51 +00:00
int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags,
u8 coal_flags, u8 rx_mode, u8 tx_mode,
void (*rx_dim_work)(struct work_struct *work),
void (*tx_dim_work)(struct work_struct *work))
{
struct dim_cq_moder *rxp = NULL, *txp;
struct dim_irq_moder *moder;
int len;
dev->irq_moder = kzalloc(sizeof(*dev->irq_moder), GFP_KERNEL);
if (!dev->irq_moder)
return -ENOMEM;
moder = dev->irq_moder;
len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_profile);
moder->coal_flags = coal_flags;
moder->profile_flags = profile_flags;
if (profile_flags & DIM_PROFILE_RX) {
moder->rx_dim_work = rx_dim_work;
moder->dim_rx_mode = rx_mode;
rxp = kmemdup(rx_profile[rx_mode], len, GFP_KERNEL);
if (!rxp)
goto free_moder;
rcu_assign_pointer(moder->rx_profile, rxp);
}
if (profile_flags & DIM_PROFILE_TX) {
moder->tx_dim_work = tx_dim_work;
moder->dim_tx_mode = tx_mode;
txp = kmemdup(tx_profile[tx_mode], len, GFP_KERNEL);
if (!txp)
goto free_rxp;
rcu_assign_pointer(moder->tx_profile, txp);
}
return 0;
free_rxp:
kfree(rxp);
free_moder:
kfree(moder);
return -ENOMEM;
}
EXPORT_SYMBOL(net_dim_init_irq_moder);
/* RTNL lock is held. */
void net_dim_free_irq_moder(struct net_device *dev)
{
struct dim_cq_moder *rxp, *txp;
if (!dev->irq_moder)
return;
rxp = rtnl_dereference(dev->irq_moder->rx_profile);
txp = rtnl_dereference(dev->irq_moder->tx_profile);
rcu_assign_pointer(dev->irq_moder->rx_profile, NULL);
rcu_assign_pointer(dev->irq_moder->tx_profile, NULL);
kfree_rcu(rxp, rcu);
kfree_rcu(txp, rcu);
kfree(dev->irq_moder);
}
EXPORT_SYMBOL(net_dim_free_irq_moder);
void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx)
{
struct dim_irq_moder *irq_moder = dev->irq_moder;
if (!irq_moder)
return;
if (is_tx) {
INIT_WORK(&dim->work, irq_moder->tx_dim_work);
dim->mode = READ_ONCE(irq_moder->dim_tx_mode);
return;
}
INIT_WORK(&dim->work, irq_moder->rx_dim_work);
dim->mode = READ_ONCE(irq_moder->dim_rx_mode);
}
EXPORT_SYMBOL(net_dim_setting);
void net_dim_work_cancel(struct dim *dim)
{
cancel_work_sync(&dim->work);
}
EXPORT_SYMBOL(net_dim_work_cancel);
struct dim_cq_moder net_dim_get_rx_irq_moder(struct net_device *dev,
struct dim *dim)
{
struct dim_cq_moder res, *profile;
rcu_read_lock();
profile = rcu_dereference(dev->irq_moder->rx_profile);
res = profile[dim->profile_ix];
rcu_read_unlock();
res.cq_period_mode = dim->mode;
return res;
}
EXPORT_SYMBOL(net_dim_get_rx_irq_moder);
struct dim_cq_moder net_dim_get_tx_irq_moder(struct net_device *dev,
struct dim *dim)
{
struct dim_cq_moder res, *profile;
rcu_read_lock();
profile = rcu_dereference(dev->irq_moder->tx_profile);
res = profile[dim->profile_ix];
rcu_read_unlock();
res.cq_period_mode = dim->mode;
return res;
}
EXPORT_SYMBOL(net_dim_get_tx_irq_moder);
void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode)
{
WRITE_ONCE(dev->irq_moder->dim_rx_mode, rx_mode);
}
EXPORT_SYMBOL(net_dim_set_rx_mode);
void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode)
{
WRITE_ONCE(dev->irq_moder->dim_tx_mode, tx_mode);
}
EXPORT_SYMBOL(net_dim_set_tx_mode);
static int net_dim_step(struct dim *dim)
{
if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
return DIM_TOO_TIRED;
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
case DIM_PARKING_TIRED:
break;
case DIM_GOING_RIGHT:
if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
return DIM_ON_EDGE;
dim->profile_ix++;
dim->steps_right++;
break;
case DIM_GOING_LEFT:
if (dim->profile_ix == 0)
return DIM_ON_EDGE;
dim->profile_ix--;
dim->steps_left++;
break;
}
dim->tired++;
return DIM_STEPPED;
}
static void net_dim_exit_parking(struct dim *dim)
{
dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT;
net_dim_step(dim);
}
static int net_dim_stats_compare(struct dim_stats *curr,
struct dim_stats *prev)
{
if (!prev->bpms)
return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (!prev->ppms)
return curr->ppms ? DIM_STATS_BETTER :
DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (!prev->epms)
return DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
return (curr->epms < prev->epms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
return DIM_STATS_SAME;
}
static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
{
int prev_state = dim->tune_state;
int prev_ix = dim->profile_ix;
int stats_res;
int step_res;
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
stats_res = net_dim_stats_compare(curr_stats,
&dim->prev_stats);
if (stats_res != DIM_STATS_SAME)
net_dim_exit_parking(dim);
break;
case DIM_PARKING_TIRED:
dim->tired--;
if (!dim->tired)
net_dim_exit_parking(dim);
break;
case DIM_GOING_RIGHT:
case DIM_GOING_LEFT:
stats_res = net_dim_stats_compare(curr_stats,
&dim->prev_stats);
if (stats_res != DIM_STATS_BETTER)
dim_turn(dim);
if (dim_on_top(dim)) {
dim_park_on_top(dim);
break;
}
step_res = net_dim_step(dim);
switch (step_res) {
case DIM_ON_EDGE:
dim_park_on_top(dim);
break;
case DIM_TOO_TIRED:
dim_park_tired(dim);
break;
}
break;
}
if (prev_state != DIM_PARKING_ON_TOP ||
dim->tune_state != DIM_PARKING_ON_TOP)
dim->prev_stats = *curr_stats;
return dim->profile_ix != prev_ix;
}
void net_dim(struct dim *dim, struct dim_sample end_sample)
{
struct dim_stats curr_stats;
u16 nevents;
switch (dim->state) {
case DIM_MEASURE_IN_PROGRESS:
nevents = BIT_GAP(BITS_PER_TYPE(u16),
end_sample.event_ctr,
dim->start_sample.event_ctr);
if (nevents < DIM_NEVENTS)
break;
if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats))
break;
if (net_dim_decision(&curr_stats, dim)) {
dim->state = DIM_APPLY_NEW_PROFILE;
schedule_work(&dim->work);
break;
}
fallthrough;
case DIM_START_MEASURE:
dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
end_sample.byte_ctr, &dim->start_sample);
dim->state = DIM_MEASURE_IN_PROGRESS;
break;
case DIM_APPLY_NEW_PROFILE:
break;
}
}
EXPORT_SYMBOL(net_dim);