mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-09 14:50:19 +00:00
tcp_cubic: refactor code to perform a divide only when needed
Neal Cardwell suggested to not change ca->delay_min and apply the ack delay cushion only when Hystart ACK train is still under consideration. This should avoid a 64bit divide unless needed. Tested: 40Gbit(mlx4) testbed (with sch_fq as packet scheduler) $ echo -n 'file tcp_cubic.c +p' >/sys/kernel/debug/dynamic_debug/control $ nstat -n;for f in {1..10}; do ./super_netperf 1 -H lpaa24 -l -4000000; done;nstat|egrep "Hystart" 14815 16280 15293 15563 11574 15145 14789 18548 16972 12520 TcpExtTCPHystartTrainDetect 10 0.0 TcpExtTCPHystartTrainCwnd 1396 0.0 $ dmesg | tail -10 [ 4873.951350] hystart_ack_train (116 > 93) delay_min 24 (+ ack_delay 69) cwnd 80 [ 4875.155379] hystart_ack_train (55 > 50) delay_min 21 (+ ack_delay 29) cwnd 160 [ 4876.333921] hystart_ack_train (69 > 62) delay_min 23 (+ ack_delay 39) cwnd 130 [ 4877.519037] hystart_ack_train (69 > 60) delay_min 22 (+ ack_delay 38) cwnd 130 [ 4878.701559] hystart_ack_train (87 > 63) delay_min 24 (+ ack_delay 39) cwnd 160 [ 4879.844597] hystart_ack_train (93 > 50) delay_min 21 (+ ack_delay 29) cwnd 216 [ 4880.956650] hystart_ack_train (74 > 67) delay_min 20 (+ ack_delay 47) cwnd 108 [ 4882.098500] hystart_ack_train (61 > 57) delay_min 23 (+ ack_delay 34) cwnd 130 [ 4883.262056] hystart_ack_train (72 > 67) delay_min 21 (+ ack_delay 46) cwnd 130 [ 4884.418760] hystart_ack_train (74 > 67) delay_min 29 (+ ack_delay 38) cwnd 152 10Gbit(bnx2x) testbed (with sch_fq as packet scheduler) $ echo -n 'file tcp_cubic.c +p' >/sys/kernel/debug/dynamic_debug/control $ nstat -n;for f in {1..10}; do ./super_netperf 1 -H lpk52 -l -4000000; done;nstat|egrep "Hystart" 7050 7065 7100 6900 7202 7263 7189 6869 7463 7034 TcpExtTCPHystartTrainDetect 10 0.0 TcpExtTCPHystartTrainCwnd 3199 0.0 $ dmesg | tail -10 [ 176.920012] hystart_ack_train (161 > 141) delay_min 83 (+ ack_delay 58) cwnd 264 [ 179.144645] hystart_ack_train (164 > 159) delay_min 120 (+ ack_delay 39) cwnd 444 [ 181.354527] hystart_ack_train (214 > 168) delay_min 125 (+ ack_delay 43) cwnd 436 [ 183.539565] hystart_ack_train (170 > 147) delay_min 96 (+ ack_delay 51) cwnd 326 [ 185.727309] hystart_ack_train (177 > 160) delay_min 61 (+ ack_delay 99) cwnd 128 [ 187.947142] hystart_ack_train (184 > 167) delay_min 123 (+ ack_delay 44) cwnd 367 [ 190.166680] hystart_ack_train (230 > 153) delay_min 116 (+ ack_delay 37) cwnd 444 [ 192.327285] hystart_ack_train (210 > 206) delay_min 86 (+ ack_delay 120) cwnd 152 [ 194.511392] hystart_ack_train (173 > 151) delay_min 94 (+ ack_delay 57) cwnd 239 [ 196.736023] hystart_ack_train (149 > 146) delay_min 105 (+ ack_delay 41) cwnd 399 Fixes: 42f3a8aaae66 ("tcp_cubic: tweak Hystart detection for short RTT flows") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Neal Cardwell <ncardwell@google.com> Link: https://www.spinics.net/lists/netdev/msg621886.html Link: https://www.spinics.net/lists/netdev/msg621797.html Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
ba4028105e
commit
f278b99ca6
@ -372,6 +372,26 @@ static void bictcp_state(struct sock *sk, u8 new_state)
|
||||
}
|
||||
}
|
||||
|
||||
/* Account for TSO/GRO delays.
|
||||
* Otherwise short RTT flows could get too small ssthresh, since during
|
||||
* slow start we begin with small TSO packets and ca->delay_min would
|
||||
* not account for long aggregation delay when TSO packets get bigger.
|
||||
* Ideally even with a very small RTT we would like to have at least one
|
||||
* TSO packet being sent and received by GRO, and another one in qdisc layer.
|
||||
* We apply another 100% factor because @rate is doubled at this point.
|
||||
* We cap the cushion to 1ms.
|
||||
*/
|
||||
static u32 hystart_ack_delay(struct sock *sk)
|
||||
{
|
||||
unsigned long rate;
|
||||
|
||||
rate = READ_ONCE(sk->sk_pacing_rate);
|
||||
if (!rate)
|
||||
return 0;
|
||||
return min_t(u64, USEC_PER_MSEC,
|
||||
div64_ul((u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
|
||||
}
|
||||
|
||||
static void hystart_update(struct sock *sk, u32 delay)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
@ -385,7 +405,8 @@ static void hystart_update(struct sock *sk, u32 delay)
|
||||
if ((s32)(now - ca->last_ack) <= hystart_ack_delta_us) {
|
||||
ca->last_ack = now;
|
||||
|
||||
threshold = ca->delay_min;
|
||||
threshold = ca->delay_min + hystart_ack_delay(sk);
|
||||
|
||||
/* Hystart ack train triggers if we get ack past
|
||||
* ca->delay_min/2.
|
||||
* Pacing might have delayed packets up to RTT/2
|
||||
@ -396,6 +417,9 @@ static void hystart_update(struct sock *sk, u32 delay)
|
||||
|
||||
if ((s32)(now - ca->round_start) > threshold) {
|
||||
ca->found = 1;
|
||||
pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n",
|
||||
now - ca->round_start, threshold,
|
||||
ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd);
|
||||
NET_INC_STATS(sock_net(sk),
|
||||
LINUX_MIB_TCPHYSTARTTRAINDETECT);
|
||||
NET_ADD_STATS(sock_net(sk),
|
||||
@ -447,30 +471,11 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
|
||||
delay = 1;
|
||||
|
||||
/* first time call or link delay decreases */
|
||||
if (ca->delay_min == 0 || ca->delay_min > delay) {
|
||||
unsigned long rate = READ_ONCE(sk->sk_pacing_rate);
|
||||
|
||||
/* Account for TSO/GRO delays.
|
||||
* Otherwise short RTT flows could get too small ssthresh,
|
||||
* since during slow start we begin with small TSO packets
|
||||
* and could lower ca->delay_min too much.
|
||||
* Ideally even with a very small RTT we would like to have
|
||||
* at least one TSO packet being sent and received by GRO,
|
||||
* and another one in qdisc layer.
|
||||
* We apply another 100% factor because @rate is doubled at
|
||||
* this point.
|
||||
* We cap the cushion to 1ms.
|
||||
*/
|
||||
if (rate)
|
||||
delay += min_t(u64, USEC_PER_MSEC,
|
||||
div64_ul((u64)GSO_MAX_SIZE *
|
||||
4 * USEC_PER_SEC, rate));
|
||||
if (ca->delay_min == 0 || ca->delay_min > delay)
|
||||
ca->delay_min = delay;
|
||||
}
|
||||
if (ca->delay_min == 0 || ca->delay_min > delay)
|
||||
ca->delay_min = delay;
|
||||
|
||||
/* hystart triggers when cwnd is larger than some threshold */
|
||||
if (!ca->found && hystart && tcp_in_slow_start(tp) &&
|
||||
if (!ca->found && tcp_in_slow_start(tp) && hystart &&
|
||||
tp->snd_cwnd >= hystart_low_window)
|
||||
hystart_update(sk, delay);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user