mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-28 00:33:16 +00:00
dim: pass dim_sample to net_dim() by reference
net_dim() is currently passed a struct dim_sample argument by value. struct dim_sample is 24 bytes. Since this is greater 16 bytes, x86-64 passes it on the stack. All callers have already initialized dim_sample on the stack, so passing it by value requires pushing a duplicated copy to the stack. Either witing to the stack and immediately reading it, or perhaps dereferencing addresses relative to the stack pointer in a chain of push instructions, seems to perform quite poorly. In a heavy TCP workload, mlx5e_handle_rx_dim() consumes 3% of CPU time, 94% of which is attributed to the first push instruction to copy dim_sample on the stack for the call to net_dim(): // Call ktime_get() 0.26 |4ead2: call 4ead7 <mlx5e_handle_rx_dim+0x47> // Pass the address of struct dim in %rdi |4ead7: lea 0x3d0(%rbx),%rdi // Set dim_sample.pkt_ctr |4eade: mov %r13d,0x8(%rsp) // Set dim_sample.byte_ctr |4eae3: mov %r12d,0xc(%rsp) // Set dim_sample.event_ctr 0.15 |4eae8: mov %bp,0x10(%rsp) // Duplicate dim_sample on the stack 94.16 |4eaed: push 0x10(%rsp) 2.79 |4eaf1: push 0x10(%rsp) 0.07 |4eaf5: push %rax // Call net_dim() 0.21 |4eaf6: call 4eafb <mlx5e_handle_rx_dim+0x6b> To allow the caller to reuse the struct dim_sample already on the stack, pass the struct dim_sample by reference to net_dim(). Signed-off-by: Caleb Sander Mateos <csander@purestorage.com> Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com> Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com> Reviewed-by: Arthur Kiyanovski <akiyano@amazon.com> Reviewed-by: Louis Peens <louis.peens@corigine.com> Link: https://patch.msgid.link/20241031002326.3426181-2-csander@purestorage.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
a865276872
commit
61bf0009a7
@ -156,7 +156,7 @@ usage is not complete but it should make the outline of the usage clear.
|
||||
my_entity->bytes,
|
||||
&dim_sample);
|
||||
/* Call net DIM */
|
||||
net_dim(&my_entity->dim, dim_sample);
|
||||
net_dim(&my_entity->dim, &dim_sample);
|
||||
...
|
||||
}
|
||||
|
||||
|
@ -1383,7 +1383,7 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
|
||||
rx_ring->rx_stats.bytes,
|
||||
&dim_sample);
|
||||
|
||||
net_dim(&ena_napi->dim, dim_sample);
|
||||
net_dim(&ena_napi->dim, &dim_sample);
|
||||
|
||||
rx_ring->per_napi_packets = 0;
|
||||
}
|
||||
|
@ -1029,7 +1029,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
|
||||
if (priv->dim.use_dim) {
|
||||
dim_update_sample(priv->dim.event_ctr, priv->dim.packets,
|
||||
priv->dim.bytes, &dim_sample);
|
||||
net_dim(&priv->dim.dim, dim_sample);
|
||||
net_dim(&priv->dim.dim, &dim_sample);
|
||||
}
|
||||
|
||||
return work_done;
|
||||
|
@ -3102,7 +3102,7 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
|
||||
cpr->rx_packets,
|
||||
cpr->rx_bytes,
|
||||
&dim_sample);
|
||||
net_dim(&cpr->dim, dim_sample);
|
||||
net_dim(&cpr->dim, &dim_sample);
|
||||
}
|
||||
return work_done;
|
||||
}
|
||||
@ -3233,7 +3233,7 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
|
||||
cpr_rx->rx_packets,
|
||||
cpr_rx->rx_bytes,
|
||||
&dim_sample);
|
||||
net_dim(&cpr->dim, dim_sample);
|
||||
net_dim(&cpr->dim, &dim_sample);
|
||||
}
|
||||
return work_done;
|
||||
}
|
||||
|
@ -2405,7 +2405,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
|
||||
if (ring->dim.use_dim) {
|
||||
dim_update_sample(ring->dim.event_ctr, ring->dim.packets,
|
||||
ring->dim.bytes, &dim_sample);
|
||||
net_dim(&ring->dim.dim, dim_sample);
|
||||
net_dim(&ring->dim.dim, &dim_sample);
|
||||
}
|
||||
|
||||
return work_done;
|
||||
|
@ -718,7 +718,7 @@ static void enetc_rx_net_dim(struct enetc_int_vector *v)
|
||||
v->rx_ring.stats.packets,
|
||||
v->rx_ring.stats.bytes,
|
||||
&dim_sample);
|
||||
net_dim(&v->rx_dim, dim_sample);
|
||||
net_dim(&v->rx_dim, &dim_sample);
|
||||
}
|
||||
|
||||
static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
|
||||
|
@ -4478,7 +4478,7 @@ static void hns3_update_rx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector)
|
||||
|
||||
dim_update_sample(tqp_vector->event_cnt, rx_group->total_packets,
|
||||
rx_group->total_bytes, &sample);
|
||||
net_dim(&rx_group->dim, sample);
|
||||
net_dim(&rx_group->dim, &sample);
|
||||
}
|
||||
|
||||
static void hns3_update_tx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector)
|
||||
@ -4491,7 +4491,7 @@ static void hns3_update_tx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector)
|
||||
|
||||
dim_update_sample(tqp_vector->event_cnt, tx_group->total_packets,
|
||||
tx_group->total_bytes, &sample);
|
||||
net_dim(&tx_group->dim, sample);
|
||||
net_dim(&tx_group->dim, &sample);
|
||||
}
|
||||
|
||||
static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
|
||||
|
@ -1352,14 +1352,14 @@ static void ice_net_dim(struct ice_q_vector *q_vector)
|
||||
struct dim_sample dim_sample;
|
||||
|
||||
__ice_update_sample(q_vector, tx, &dim_sample, true);
|
||||
net_dim(&tx->dim, dim_sample);
|
||||
net_dim(&tx->dim, &dim_sample);
|
||||
}
|
||||
|
||||
if (ITR_IS_DYNAMIC(rx)) {
|
||||
struct dim_sample dim_sample;
|
||||
|
||||
__ice_update_sample(q_vector, rx, &dim_sample, false);
|
||||
net_dim(&rx->dim, dim_sample);
|
||||
net_dim(&rx->dim, &dim_sample);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3679,7 +3679,7 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector)
|
||||
|
||||
idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->tx_dim,
|
||||
packets, bytes);
|
||||
net_dim(&q_vector->tx_dim, dim_sample);
|
||||
net_dim(&q_vector->tx_dim, &dim_sample);
|
||||
|
||||
check_rx_itr:
|
||||
if (!IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode))
|
||||
@ -3698,7 +3698,7 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector)
|
||||
|
||||
idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->rx_dim,
|
||||
packets, bytes);
|
||||
net_dim(&q_vector->rx_dim, dim_sample);
|
||||
net_dim(&q_vector->rx_dim, &dim_sample);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -527,7 +527,7 @@ static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p
|
||||
rx_frames + tx_frames,
|
||||
rx_bytes + tx_bytes,
|
||||
&dim_sample);
|
||||
net_dim(&cq_poll->dim, dim_sample);
|
||||
net_dim(&cq_poll->dim, &dim_sample);
|
||||
}
|
||||
|
||||
int otx2_napi_handler(struct napi_struct *napi, int budget)
|
||||
|
@ -2227,7 +2227,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
|
||||
eth->rx_bytes += bytes;
|
||||
dim_update_sample(eth->rx_events, eth->rx_packets, eth->rx_bytes,
|
||||
&dim_sample);
|
||||
net_dim(ð->rx_dim, dim_sample);
|
||||
net_dim(ð->rx_dim, &dim_sample);
|
||||
|
||||
if (xdp_flush)
|
||||
xdp_do_flush();
|
||||
@ -2377,7 +2377,7 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
|
||||
|
||||
dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes,
|
||||
&dim_sample);
|
||||
net_dim(ð->tx_dim, dim_sample);
|
||||
net_dim(ð->tx_dim, &dim_sample);
|
||||
|
||||
if (mtk_queue_stopped(eth) &&
|
||||
(atomic_read(&ring->free_count) > ring->thresh))
|
||||
|
@ -55,7 +55,7 @@ static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
|
||||
return;
|
||||
|
||||
dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
|
||||
net_dim(sq->dim, dim_sample);
|
||||
net_dim(sq->dim, &dim_sample);
|
||||
}
|
||||
|
||||
static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
|
||||
@ -67,7 +67,7 @@ static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
|
||||
return;
|
||||
|
||||
dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
|
||||
net_dim(rq->dim, dim_sample);
|
||||
net_dim(rq->dim, &dim_sample);
|
||||
}
|
||||
|
||||
void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
|
||||
|
@ -1179,7 +1179,7 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget)
|
||||
} while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
|
||||
|
||||
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
|
||||
net_dim(&r_vec->rx_dim, dim_sample);
|
||||
net_dim(&r_vec->rx_dim, &dim_sample);
|
||||
}
|
||||
|
||||
if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
|
||||
@ -1194,7 +1194,7 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget)
|
||||
} while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
|
||||
|
||||
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
|
||||
net_dim(&r_vec->tx_dim, dim_sample);
|
||||
net_dim(&r_vec->tx_dim, &dim_sample);
|
||||
}
|
||||
|
||||
return pkts_polled;
|
||||
|
@ -1289,7 +1289,7 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget)
|
||||
} while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
|
||||
|
||||
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
|
||||
net_dim(&r_vec->rx_dim, dim_sample);
|
||||
net_dim(&r_vec->rx_dim, &dim_sample);
|
||||
}
|
||||
|
||||
if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
|
||||
@ -1304,7 +1304,7 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget)
|
||||
} while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
|
||||
|
||||
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
|
||||
net_dim(&r_vec->tx_dim, dim_sample);
|
||||
net_dim(&r_vec->tx_dim, &dim_sample);
|
||||
}
|
||||
|
||||
return pkts_polled;
|
||||
|
@ -928,7 +928,7 @@ static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode)
|
||||
dim_update_sample(qcq->cq.bound_intr->rearm_count,
|
||||
pkts, bytes, &dim_sample);
|
||||
|
||||
net_dim(&qcq->dim, dim_sample);
|
||||
net_dim(&qcq->dim, &dim_sample);
|
||||
}
|
||||
|
||||
int ionic_tx_napi(struct napi_struct *napi, int budget)
|
||||
|
@ -2804,7 +2804,7 @@ static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue
|
||||
u64_stats_read(&rq->stats.bytes),
|
||||
&cur_sample);
|
||||
|
||||
net_dim(&rq->dim, cur_sample);
|
||||
net_dim(&rq->dim, &cur_sample);
|
||||
rq->packets_in_napi = 0;
|
||||
}
|
||||
|
||||
|
@ -891,7 +891,7 @@ void dpaa2_io_update_net_dim(struct dpaa2_io *d, __u64 frames, __u64 bytes)
|
||||
d->frames += frames;
|
||||
|
||||
dim_update_sample(d->event_ctr, d->frames, d->bytes, &dim_sample);
|
||||
net_dim(&d->rx_dim, dim_sample);
|
||||
net_dim(&d->rx_dim, &dim_sample);
|
||||
|
||||
spin_unlock(&d->dim_lock);
|
||||
}
|
||||
|
@ -425,7 +425,7 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
|
||||
* This is the main logic of the algorithm, where data is processed in order
|
||||
* to decide on next required action.
|
||||
*/
|
||||
void net_dim(struct dim *dim, struct dim_sample end_sample);
|
||||
void net_dim(struct dim *dim, const struct dim_sample *end_sample);
|
||||
|
||||
/* RDMA DIM */
|
||||
|
||||
|
@ -347,7 +347,7 @@ static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
|
||||
return dim->profile_ix != prev_ix;
|
||||
}
|
||||
|
||||
void net_dim(struct dim *dim, struct dim_sample end_sample)
|
||||
void net_dim(struct dim *dim, const struct dim_sample *end_sample)
|
||||
{
|
||||
struct dim_stats curr_stats;
|
||||
u16 nevents;
|
||||
@ -355,11 +355,11 @@ void net_dim(struct dim *dim, struct dim_sample end_sample)
|
||||
switch (dim->state) {
|
||||
case DIM_MEASURE_IN_PROGRESS:
|
||||
nevents = BIT_GAP(BITS_PER_TYPE(u16),
|
||||
end_sample.event_ctr,
|
||||
end_sample->event_ctr,
|
||||
dim->start_sample.event_ctr);
|
||||
if (nevents < DIM_NEVENTS)
|
||||
break;
|
||||
if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats))
|
||||
if (!dim_calc_stats(&dim->start_sample, end_sample, &curr_stats))
|
||||
break;
|
||||
if (net_dim_decision(&curr_stats, dim)) {
|
||||
dim->state = DIM_APPLY_NEW_PROFILE;
|
||||
@ -368,8 +368,8 @@ void net_dim(struct dim *dim, struct dim_sample end_sample)
|
||||
}
|
||||
fallthrough;
|
||||
case DIM_START_MEASURE:
|
||||
dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
|
||||
end_sample.byte_ctr, &dim->start_sample);
|
||||
dim_update_sample(end_sample->event_ctr, end_sample->pkt_ctr,
|
||||
end_sample->byte_ctr, &dim->start_sample);
|
||||
dim->state = DIM_MEASURE_IN_PROGRESS;
|
||||
break;
|
||||
case DIM_APPLY_NEW_PROFILE:
|
||||
|
Loading…
Reference in New Issue
Block a user