mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 21:23:23 +00:00
tcp: Measure TIME-WAIT reuse delay with millisecond precision
Prepare ground for TIME-WAIT socket reuse with subsecond delay. Today the last TS.Recent update timestamp, recorded in seconds and stored tp->ts_recent_stamp and tw->tw_ts_recent_stamp fields, has two purposes. Firstly, it is used to track the age of the last recorded TS.Recent value to detect when that value becomes outdated due to potential wrap-around of the other TCP timestamp clock (RFC 7323, section 5.5). For this purpose a second-based timestamp is completely sufficient as even in the worst case scenario of a peer using a high resolution microsecond timestamp, the wrap-around interval is ~36 minutes long. Secondly, it serves as a threshold value for allowing TIME-WAIT socket reuse. A TIME-WAIT socket can be reused only once the virtual 1 Hz clock, ktime_get_seconds, is past the TS.Recent update timestamp. The purpose behind delaying the TIME-WAIT socket reuse is to wait for the other TCP timestamp clock to tick at least once before reusing the connection. It is only then that the PAWS mechanism for the reopened connection can detect old duplicate segments from the previous connection incarnation (RFC 7323, appendix B.2). In this case using a timestamp with second resolution not only blocks the way toward allowing faster TIME-WAIT reuse after shorter subsecond delay, but also makes it impossible to reliably delay TW reuse by one second. As Eric Dumazet has pointed out [1], due to timestamp rounding, the TW reuse delay will actually be between (0, 1] seconds, and 0.5 seconds on average. We delay TW reuse for one full second only when last TS.Recent update coincides with our virtual 1 Hz clock tick. Considering the above, introduce a dedicated field to store a millisecond timestamp of transition into the TIME-WAIT state. Place it in an existing 4-byte hole inside inet_timewait_sock structure to avoid an additional memory cost. Use the new timestamp to (i) reliably delay TIME-WAIT reuse by one second, and (ii) prepare for configurable subsecond reuse delay in the subsequent change. We assume here that a full one second delay was the original intention in [2] because it accounts for the worst case scenario of the other TCP using the slowest recommended 1 Hz timestamp clock. A more involved alternative would be to change the resolution of the last TS.Recent update timestamp, tw->tw_ts_recent_stamp, to milliseconds. [1] https://lore.kernel.org/netdev/CANn89iKB4GFd8sVzCbRttqw_96o3i2wDhX-3DraQtsceNGYwug@mail.gmail.com/ [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b8439924316d5bcb266d165b93d632a4b4b859af Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Jason Xing <kerneljasonxing@gmail.com> Link: https://patch.msgid.link/20241209-jakub-krn-909-poc-msec-tw-tstamp-v2-1-66aca0eed03e@cloudflare.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
148328b59d
commit
19ce8cd304
@ -74,6 +74,10 @@ struct inet_timewait_sock {
|
||||
tw_tos : 8;
|
||||
u32 tw_txhash;
|
||||
u32 tw_priority;
|
||||
/**
|
||||
* @tw_reuse_stamp: Time of entry into %TCP_TIME_WAIT state in msec.
|
||||
*/
|
||||
u32 tw_entry_stamp;
|
||||
struct timer_list tw_timer;
|
||||
struct inet_bind_bucket *tw_tb;
|
||||
struct inet_bind2_bucket *tw_tb2;
|
||||
|
@ -120,6 +120,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
|
||||
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
int ts_recent_stamp;
|
||||
u32 reuse_thresh;
|
||||
|
||||
if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2)
|
||||
reuse = 0;
|
||||
@ -162,9 +163,9 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
|
||||
and use initial timestamp retrieved from peer table.
|
||||
*/
|
||||
ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp);
|
||||
reuse_thresh = READ_ONCE(tw->tw_entry_stamp) + MSEC_PER_SEC;
|
||||
if (ts_recent_stamp &&
|
||||
(!twp || (reuse && time_after32(ktime_get_seconds(),
|
||||
ts_recent_stamp)))) {
|
||||
(!twp || (reuse && time_after32(tcp_clock_ms(), reuse_thresh)))) {
|
||||
/* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk
|
||||
* and releasing the bucket lock.
|
||||
*/
|
||||
|
@ -157,8 +157,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
|
||||
rcv_nxt);
|
||||
|
||||
if (tmp_opt.saw_tstamp) {
|
||||
u64 ts = tcp_clock_ms();
|
||||
|
||||
WRITE_ONCE(tw->tw_entry_stamp, ts);
|
||||
WRITE_ONCE(tcptw->tw_ts_recent_stamp,
|
||||
ktime_get_seconds());
|
||||
div_u64(ts, MSEC_PER_SEC));
|
||||
WRITE_ONCE(tcptw->tw_ts_recent,
|
||||
tmp_opt.rcv_tsval);
|
||||
}
|
||||
@ -316,6 +319,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
||||
tw->tw_mark = sk->sk_mark;
|
||||
tw->tw_priority = READ_ONCE(sk->sk_priority);
|
||||
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
|
||||
/* refreshed when we enter true TIME-WAIT state */
|
||||
tw->tw_entry_stamp = tcp_time_stamp_ms(tp);
|
||||
tcptw->tw_rcv_nxt = tp->rcv_nxt;
|
||||
tcptw->tw_snd_nxt = tp->snd_nxt;
|
||||
tcptw->tw_rcv_wnd = tcp_receive_window(tp);
|
||||
|
Loading…
x
Reference in New Issue
Block a user