net-tcp: Fast Open client - sending SYN-data

This patch implements sending SYN-data in tcp_connect(). The data is
from tcp_sendmsg() with flag MSG_FASTOPEN (implemented in a later patch).

The length of the cookie in tcp_fastopen_req, init'd to 0, controls the
type of the SYN. If the cookie is not cached (len==0), the host sends
data-less SYN with Fast Open cookie request option to solicit a cookie
from the remote. If cookie is not available (len > 0), the host sends
a SYN-data with Fast Open cookie option. If cookie length is negative,
  the SYN will not include any Fast Open option (for fall back operations).

To deal with middleboxes that may drop SYN with data or experimental TCP
option, the SYN-data is only sent once. SYN retransmits do not include
data or Fast Open options. The connection will fall back to regular TCP
handshake.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Yuchung Cheng 2012-07-19 06:43:07 +00:00 committed by David S. Miller
parent 1fe4c481ba
commit 783237e8da
6 changed files with 130 additions and 12 deletions

View File

@ -238,6 +238,7 @@ enum
LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */
LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */ LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */
LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */ LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */
LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */
__LINUX_MIB_MAX __LINUX_MIB_MAX
}; };

View File

@ -386,7 +386,8 @@ struct tcp_sock {
unused : 1; unused : 1;
u8 repair_queue; u8 repair_queue;
u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
early_retrans_delayed:1; /* Delayed ER timer installed */ early_retrans_delayed:1, /* Delayed ER timer installed */
syn_fastopen:1; /* SYN includes Fast Open option */
/* RTT measurement */ /* RTT measurement */
u32 srtt; /* smoothed round trip time << 3 */ u32 srtt; /* smoothed round trip time << 3 */
@ -500,6 +501,9 @@ struct tcp_sock {
struct tcp_md5sig_info __rcu *md5sig_info; struct tcp_md5sig_info __rcu *md5sig_info;
#endif #endif
/* TCP fastopen related information */
struct tcp_fastopen_request *fastopen_req;
/* When the cookie options are generated and exchanged, then this /* When the cookie options are generated and exchanged, then this
* object holds a reference to them (cookie_values->kref). Also * object holds a reference to them (cookie_values->kref). Also
* contains related tcp_cookie_transactions fields. * contains related tcp_cookie_transactions fields.

View File

@ -1289,6 +1289,15 @@ extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff
extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
const struct tcp_md5sig_key *key); const struct tcp_md5sig_key *key);
struct tcp_fastopen_request {
/* Fast Open cookie. Size 0 means a cookie request */
struct tcp_fastopen_cookie cookie;
struct msghdr *data; /* data in MSG_FASTOPEN */
u16 copied; /* queued in tcp_connect() */
};
void tcp_free_fastopen_req(struct tcp_sock *tp);
/* write queue abstraction */ /* write queue abstraction */
static inline void tcp_write_queue_purge(struct sock *sk) static inline void tcp_write_queue_purge(struct sock *sk)
{ {

View File

@ -556,11 +556,12 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
} }
EXPORT_SYMBOL(inet_dgram_connect); EXPORT_SYMBOL(inet_dgram_connect);
static long inet_wait_for_connect(struct sock *sk, long timeo) static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
{ {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
sk->sk_write_pending += writebias;
/* Basic assumption: if someone sets sk->sk_err, he _must_ /* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*. * change state of the socket from TCP_SYN_*.
@ -576,6 +577,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
} }
finish_wait(sk_sleep(sk), &wait); finish_wait(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
return timeo; return timeo;
} }
@ -634,8 +636,12 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
tcp_sk(sk)->fastopen_req &&
tcp_sk(sk)->fastopen_req->data ? 1 : 0;
/* Error code is set above */ /* Error code is set above */
if (!timeo || !inet_wait_for_connect(sk, timeo)) if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
goto out; goto out;
err = sock_intr_errno(timeo); err = sock_intr_errno(timeo);

View File

@ -262,6 +262,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPOFOMerge", LINUX_MIB_TCPOFOMERGE), SNMP_MIB_ITEM("TCPOFOMerge", LINUX_MIB_TCPOFOMERGE),
SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK),
SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE),
SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE),
SNMP_MIB_SENTINEL SNMP_MIB_SENTINEL
}; };

View File

@ -596,6 +596,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
tcp_cookie_size_check(cvp->cookie_desired) : tcp_cookie_size_check(cvp->cookie_desired) :
0; 0;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
*md5 = tp->af_specific->md5_lookup(sk, sk); *md5 = tp->af_specific->md5_lookup(sk, sk);
@ -636,6 +637,16 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
remaining -= TCPOLEN_SACKPERM_ALIGNED; remaining -= TCPOLEN_SACKPERM_ALIGNED;
} }
if (fastopen && fastopen->cookie.len >= 0) {
u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
need = (need + 3) & ~3U; /* Align to 32 bits */
if (remaining >= need) {
opts->options |= OPTION_FAST_OPEN_COOKIE;
opts->fastopen_cookie = &fastopen->cookie;
remaining -= need;
tp->syn_fastopen = 1;
}
}
/* Note that timestamps are required by the specification. /* Note that timestamps are required by the specification.
* *
* Odd numbers of bytes are prohibited by the specification, ensuring * Odd numbers of bytes are prohibited by the specification, ensuring
@ -2824,6 +2835,96 @@ void tcp_connect_init(struct sock *sk)
tcp_clear_retrans(tp); tcp_clear_retrans(tp);
} }
static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
tcb->end_seq += skb->len;
skb_header_release(skb);
__tcp_add_write_queue_tail(sk, skb);
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
tp->write_seq = tcb->end_seq;
tp->packets_out += tcp_skb_pcount(skb);
}
/* Build and send a SYN with data and (cached) Fast Open cookie. However,
* queue a data-only packet after the regular SYN, such that regular SYNs
* are retransmitted on timeouts. Also if the remote SYN-ACK acknowledges
* only the SYN sequence, the data are retransmitted in the first ACK.
* If cookie is not cached or other error occurs, falls back to send a
* regular SYN with Fast Open cookie request option.
*/
static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
int space, i, err = 0, iovlen = fo->data->msg_iovlen;
struct sk_buff *syn_data = NULL, *data;
tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie);
if (fo->cookie.len <= 0)
goto fallback;
/* MSS for SYN-data is based on cached MSS and bounded by PMTU and
* user-MSS. Reserve maximum option space for middleboxes that add
* private TCP options. The cost is reduced data space in SYN :(
*/
if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
sk->sk_allocation);
if (syn_data == NULL)
goto fallback;
for (i = 0; i < iovlen && syn_data->len < space; ++i) {
struct iovec *iov = &fo->data->msg_iov[i];
unsigned char __user *from = iov->iov_base;
int len = iov->iov_len;
if (syn_data->len + len > space)
len = space - syn_data->len;
else if (i + 1 == iovlen)
/* No more data pending in inet_wait_for_connect() */
fo->data = NULL;
if (skb_add_data(syn_data, from, len))
goto fallback;
}
/* Queue a data-only packet after the regular SYN for retransmission */
data = pskb_copy(syn_data, sk->sk_allocation);
if (data == NULL)
goto fallback;
TCP_SKB_CB(data)->seq++;
TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
tcp_connect_queue_skb(sk, data);
fo->copied = data->len;
if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
goto done;
}
syn_data = NULL;
fallback:
/* Send a regular SYN with Fast Open cookie request option */
if (fo->cookie.len > 0)
fo->cookie.len = 0;
err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
if (err)
tp->syn_fastopen = 0;
kfree_skb(syn_data);
done:
fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */
return err;
}
/* Build a SYN and send it off. */ /* Build a SYN and send it off. */
int tcp_connect(struct sock *sk) int tcp_connect(struct sock *sk)
{ {
@ -2841,17 +2942,13 @@ int tcp_connect(struct sock *sk)
skb_reserve(buff, MAX_TCP_HEADER); skb_reserve(buff, MAX_TCP_HEADER);
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp;
tcp_connect_queue_skb(sk, buff);
TCP_ECN_send_syn(sk, buff); TCP_ECN_send_syn(sk, buff);
/* Send it off. */ /* Send off SYN; include data in Fast Open. */
TCP_SKB_CB(buff)->when = tcp_time_stamp; err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
tp->retrans_stamp = TCP_SKB_CB(buff)->when; tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
skb_header_release(buff);
__tcp_add_write_queue_tail(sk, buff);
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
tp->packets_out += tcp_skb_pcount(buff);
err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
if (err == -ECONNREFUSED) if (err == -ECONNREFUSED)
return err; return err;