mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-28 16:52:18 +00:00
rxrpc: Implement RACK/TLP to deal with transmission stalls [RFC8985]
When an rxrpc call is in its transmission phase and is sending a lot of packets, stalls occasionally occur that cause severe performance degradation (eg. increasing the transmission time for a 256MiB payload from 0.7s to 2.5s over a 10G link). rxrpc already implements TCP-style congestion control [RFC5681] and this helps mitigate the effects, but occasionally we're missing a time event that deals with a missing ACK, leading to a stall until the RTO expires. Fix this by implementing RACK/TLP in rxrpc. Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
4ee4c2f82b
commit
7c48266593
@ -305,7 +305,9 @@
|
||||
#define rxrpc_txdata_traces \
|
||||
EM(rxrpc_txdata_inject_loss, " *INJ-LOSS*") \
|
||||
EM(rxrpc_txdata_new_data, " ") \
|
||||
E_(rxrpc_txdata_retransmit, " *RETRANS*")
|
||||
EM(rxrpc_txdata_retransmit, " *RETRANS*") \
|
||||
EM(rxrpc_txdata_tlp_new_data, " *TLP-NEW*") \
|
||||
E_(rxrpc_txdata_tlp_retransmit, " *TLP-RETRANS*")
|
||||
|
||||
#define rxrpc_receive_traces \
|
||||
EM(rxrpc_receive_end, "END") \
|
||||
@ -353,11 +355,12 @@
|
||||
EM(rxrpc_timer_trace_hard, "HardLimit") \
|
||||
EM(rxrpc_timer_trace_idle, "IdleLimit") \
|
||||
EM(rxrpc_timer_trace_keepalive, "KeepAlive") \
|
||||
EM(rxrpc_timer_trace_lost_ack, "LostAck ") \
|
||||
EM(rxrpc_timer_trace_ping, "DelayPing") \
|
||||
EM(rxrpc_timer_trace_resend, "Resend ") \
|
||||
EM(rxrpc_timer_trace_resend_reset, "ResendRst") \
|
||||
E_(rxrpc_timer_trace_resend_tx, "ResendTx ")
|
||||
EM(rxrpc_timer_trace_rack_off, "RACK-OFF ") \
|
||||
EM(rxrpc_timer_trace_rack_zwp, "RACK-ZWP ") \
|
||||
EM(rxrpc_timer_trace_rack_reo, "RACK-Reo ") \
|
||||
EM(rxrpc_timer_trace_rack_tlp_pto, "TLP-PTO ") \
|
||||
E_(rxrpc_timer_trace_rack_rto, "RTO ")
|
||||
|
||||
#define rxrpc_propose_ack_traces \
|
||||
EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \
|
||||
@ -478,9 +481,9 @@
|
||||
EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \
|
||||
EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \
|
||||
EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \
|
||||
EM(rxrpc_txbuf_see_lost, "SEE LOST ") \
|
||||
EM(rxrpc_txbuf_see_out_of_step, "OUT-OF-STEP") \
|
||||
EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \
|
||||
E_(rxrpc_txbuf_see_unacked, "SEE UNACKED")
|
||||
E_(rxrpc_txbuf_see_send_more, "SEE SEND+ ")
|
||||
|
||||
#define rxrpc_tq_traces \
|
||||
EM(rxrpc_tq_alloc, "ALLOC") \
|
||||
@ -505,6 +508,24 @@
|
||||
EM(rxrpc_rotate_trace_sack, "soft-ack") \
|
||||
E_(rxrpc_rotate_trace_snak, "soft-nack")
|
||||
|
||||
#define rxrpc_rack_timer_modes \
|
||||
EM(RXRPC_CALL_RACKTIMER_OFF, "---") \
|
||||
EM(RXRPC_CALL_RACKTIMER_RACK_REORDER, "REO") \
|
||||
EM(RXRPC_CALL_RACKTIMER_TLP_PTO, "TLP") \
|
||||
E_(RXRPC_CALL_RACKTIMER_RTO, "RTO")
|
||||
|
||||
#define rxrpc_tlp_probe_traces \
|
||||
EM(rxrpc_tlp_probe_trace_busy, "busy") \
|
||||
EM(rxrpc_tlp_probe_trace_transmit_new, "transmit-new") \
|
||||
E_(rxrpc_tlp_probe_trace_retransmit, "retransmit")
|
||||
|
||||
#define rxrpc_tlp_ack_traces \
|
||||
EM(rxrpc_tlp_ack_trace_acked, "acked") \
|
||||
EM(rxrpc_tlp_ack_trace_dup_acked, "dup-acked") \
|
||||
EM(rxrpc_tlp_ack_trace_hard_beyond, "hard-beyond") \
|
||||
EM(rxrpc_tlp_ack_trace_incomplete, "incomplete") \
|
||||
E_(rxrpc_tlp_ack_trace_new_data, "new-data")
|
||||
|
||||
/*
|
||||
* Generate enums for tracing information.
|
||||
*/
|
||||
@ -537,6 +558,8 @@ enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte);
|
||||
enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte);
|
||||
enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte);
|
||||
enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte);
|
||||
enum rxrpc_tlp_ack_trace { rxrpc_tlp_ack_traces } __mode(byte);
|
||||
enum rxrpc_tlp_probe_trace { rxrpc_tlp_probe_traces } __mode(byte);
|
||||
enum rxrpc_tq_trace { rxrpc_tq_traces } __mode(byte);
|
||||
enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte);
|
||||
enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte);
|
||||
@ -567,6 +590,7 @@ rxrpc_conn_traces;
|
||||
rxrpc_local_traces;
|
||||
rxrpc_pmtud_reduce_traces;
|
||||
rxrpc_propose_ack_traces;
|
||||
rxrpc_rack_timer_modes;
|
||||
rxrpc_receive_traces;
|
||||
rxrpc_recvmsg_traces;
|
||||
rxrpc_req_ack_traces;
|
||||
@ -576,6 +600,8 @@ rxrpc_rtt_tx_traces;
|
||||
rxrpc_sack_traces;
|
||||
rxrpc_skb_traces;
|
||||
rxrpc_timer_traces;
|
||||
rxrpc_tlp_ack_traces;
|
||||
rxrpc_tlp_probe_traces;
|
||||
rxrpc_tq_traces;
|
||||
rxrpc_tx_points;
|
||||
rxrpc_txbuf_traces;
|
||||
@ -618,6 +644,20 @@ TRACE_EVENT(rxrpc_local,
|
||||
__entry->usage)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_iothread_rx,
|
||||
TP_PROTO(struct rxrpc_local *local, unsigned int nr_rx),
|
||||
TP_ARGS(local, nr_rx),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, local)
|
||||
__field(unsigned int, nr_rx)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->local = local->debug_id;
|
||||
__entry->nr_rx = nr_rx;
|
||||
),
|
||||
TP_printk("L=%08x nrx=%u", __entry->local, __entry->nr_rx)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_peer,
|
||||
TP_PROTO(unsigned int peer_debug_id, int ref, enum rxrpc_peer_trace why),
|
||||
|
||||
@ -1684,16 +1724,15 @@ TRACE_EVENT(rxrpc_drop_ack,
|
||||
TRACE_EVENT(rxrpc_retransmit,
|
||||
TP_PROTO(struct rxrpc_call *call,
|
||||
struct rxrpc_send_data_req *req,
|
||||
struct rxrpc_txbuf *txb, ktime_t expiry),
|
||||
struct rxrpc_txbuf *txb),
|
||||
|
||||
TP_ARGS(call, req, txb, expiry),
|
||||
TP_ARGS(call, req, txb),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(unsigned int, qbase)
|
||||
__field(rxrpc_seq_t, seq)
|
||||
__field(rxrpc_serial_t, serial)
|
||||
__field(ktime_t, expiry)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@ -1701,15 +1740,13 @@ TRACE_EVENT(rxrpc_retransmit,
|
||||
__entry->qbase = req->tq->qbase;
|
||||
__entry->seq = req->seq;
|
||||
__entry->serial = txb->serial;
|
||||
__entry->expiry = expiry;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x tq=%x q=%x r=%x xp=%lld",
|
||||
TP_printk("c=%08x tq=%x q=%x r=%x",
|
||||
__entry->call,
|
||||
__entry->qbase,
|
||||
__entry->seq,
|
||||
__entry->serial,
|
||||
ktime_to_us(__entry->expiry))
|
||||
__entry->serial)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_congest,
|
||||
@ -1767,9 +1804,9 @@ TRACE_EVENT(rxrpc_congest,
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_reset_cwnd,
|
||||
TP_PROTO(struct rxrpc_call *call, ktime_t now),
|
||||
TP_PROTO(struct rxrpc_call *call, ktime_t since_last_tx, ktime_t rtt),
|
||||
|
||||
TP_ARGS(call, now),
|
||||
TP_ARGS(call, since_last_tx, rtt),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
@ -1779,6 +1816,7 @@ TRACE_EVENT(rxrpc_reset_cwnd,
|
||||
__field(rxrpc_seq_t, hard_ack)
|
||||
__field(rxrpc_seq_t, prepared)
|
||||
__field(ktime_t, since_last_tx)
|
||||
__field(ktime_t, rtt)
|
||||
__field(bool, has_data)
|
||||
),
|
||||
|
||||
@ -1789,18 +1827,20 @@ TRACE_EVENT(rxrpc_reset_cwnd,
|
||||
__entry->extra = call->cong_extra;
|
||||
__entry->hard_ack = call->acks_hard_ack;
|
||||
__entry->prepared = call->send_top - call->tx_bottom;
|
||||
__entry->since_last_tx = ktime_sub(now, call->tx_last_sent);
|
||||
__entry->since_last_tx = since_last_tx;
|
||||
__entry->rtt = rtt;
|
||||
__entry->has_data = call->tx_bottom != call->tx_top;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u",
|
||||
TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu/%llu d=%u",
|
||||
__entry->call,
|
||||
__entry->hard_ack,
|
||||
__print_symbolic(__entry->ca_state, rxrpc_ca_states),
|
||||
__entry->cwnd,
|
||||
__entry->extra,
|
||||
__entry->prepared,
|
||||
ktime_to_ns(__entry->since_last_tx),
|
||||
ktime_to_us(__entry->since_last_tx),
|
||||
ktime_to_us(__entry->rtt),
|
||||
__entry->has_data)
|
||||
);
|
||||
|
||||
@ -1925,6 +1965,32 @@ TRACE_EVENT(rxrpc_resend,
|
||||
__entry->transmitted)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_resend_lost,
|
||||
TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, unsigned long lost),
|
||||
|
||||
TP_ARGS(call, tq, lost),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(rxrpc_seq_t, qbase)
|
||||
__field(u8, nr_rep)
|
||||
__field(unsigned long, lost)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call = call->debug_id;
|
||||
__entry->qbase = tq->qbase;
|
||||
__entry->nr_rep = tq->nr_reported_acks;
|
||||
__entry->lost = lost;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x tq=%x lost=%016lx nr=%u",
|
||||
__entry->call,
|
||||
__entry->qbase,
|
||||
__entry->lost,
|
||||
__entry->nr_rep)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_rotate,
|
||||
TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq,
|
||||
struct rxrpc_ack_summary *summary, rxrpc_seq_t seq,
|
||||
@ -2363,6 +2429,244 @@ TRACE_EVENT(rxrpc_pmtud_reduce,
|
||||
__entry->serial, __entry->max_data)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_rack,
|
||||
TP_PROTO(struct rxrpc_call *call, ktime_t timo),
|
||||
|
||||
TP_ARGS(call, timo),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(rxrpc_serial_t, ack_serial)
|
||||
__field(rxrpc_seq_t, seq)
|
||||
__field(enum rxrpc_rack_timer_mode, mode)
|
||||
__field(unsigned short, nr_sent)
|
||||
__field(unsigned short, nr_lost)
|
||||
__field(unsigned short, nr_resent)
|
||||
__field(unsigned short, nr_sacked)
|
||||
__field(ktime_t, timo)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call = call->debug_id;
|
||||
__entry->ack_serial = call->rx_serial;
|
||||
__entry->seq = call->rack_end_seq;
|
||||
__entry->mode = call->rack_timer_mode;
|
||||
__entry->nr_sent = call->tx_nr_sent;
|
||||
__entry->nr_lost = call->tx_nr_lost;
|
||||
__entry->nr_resent = call->tx_nr_resent;
|
||||
__entry->nr_sacked = call->acks_nr_sacks;
|
||||
__entry->timo = timo;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x r=%08x q=%08x %s slrs=%u,%u,%u,%u t=%lld",
|
||||
__entry->call, __entry->ack_serial, __entry->seq,
|
||||
__print_symbolic(__entry->mode, rxrpc_rack_timer_modes),
|
||||
__entry->nr_sent, __entry->nr_lost,
|
||||
__entry->nr_resent, __entry->nr_sacked,
|
||||
ktime_to_us(__entry->timo))
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_rack_update,
|
||||
TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary),
|
||||
|
||||
TP_ARGS(call, summary),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(rxrpc_serial_t, ack_serial)
|
||||
__field(rxrpc_seq_t, seq)
|
||||
__field(int, xmit_ts)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call = call->debug_id;
|
||||
__entry->ack_serial = call->rx_serial;
|
||||
__entry->seq = call->rack_end_seq;
|
||||
__entry->xmit_ts = ktime_sub(call->acks_latest_ts, call->rack_xmit_ts);
|
||||
),
|
||||
|
||||
TP_printk("c=%08x r=%08x q=%08x xt=%lld",
|
||||
__entry->call, __entry->ack_serial, __entry->seq,
|
||||
ktime_to_us(__entry->xmit_ts))
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_rack_scan_loss,
|
||||
TP_PROTO(struct rxrpc_call *call),
|
||||
|
||||
TP_ARGS(call),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(ktime_t, rack_rtt)
|
||||
__field(ktime_t, rack_reo_wnd)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call = call->debug_id;
|
||||
__entry->rack_rtt = call->rack_rtt;
|
||||
__entry->rack_reo_wnd = call->rack_reo_wnd;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x rtt=%lld reow=%lld",
|
||||
__entry->call, ktime_to_us(__entry->rack_rtt),
|
||||
ktime_to_us(__entry->rack_reo_wnd))
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_rack_scan_loss_tq,
|
||||
TP_PROTO(struct rxrpc_call *call, const struct rxrpc_txqueue *tq,
|
||||
unsigned long nacks),
|
||||
|
||||
TP_ARGS(call, tq, nacks),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(rxrpc_seq_t, qbase)
|
||||
__field(unsigned long, nacks)
|
||||
__field(unsigned long, lost)
|
||||
__field(unsigned long, retrans)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call = call->debug_id;
|
||||
__entry->qbase = tq->qbase;
|
||||
__entry->nacks = nacks;
|
||||
__entry->lost = tq->segment_lost;
|
||||
__entry->retrans = tq->segment_retransmitted;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x q=%08x n=%lx l=%lx r=%lx",
|
||||
__entry->call, __entry->qbase,
|
||||
__entry->nacks, __entry->lost, __entry->retrans)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_rack_detect_loss,
|
||||
TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
|
||||
rxrpc_seq_t seq),
|
||||
|
||||
TP_ARGS(call, summary, seq),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(rxrpc_serial_t, ack_serial)
|
||||
__field(rxrpc_seq_t, seq)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call = call->debug_id;
|
||||
__entry->ack_serial = call->rx_serial;
|
||||
__entry->seq = seq;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x r=%08x q=%08x",
|
||||
__entry->call, __entry->ack_serial, __entry->seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_rack_mark_loss_tq,
|
||||
TP_PROTO(struct rxrpc_call *call, const struct rxrpc_txqueue *tq),
|
||||
|
||||
TP_ARGS(call, tq),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(rxrpc_seq_t, qbase)
|
||||
__field(rxrpc_seq_t, trans)
|
||||
__field(unsigned long, acked)
|
||||
__field(unsigned long, lost)
|
||||
__field(unsigned long, retrans)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call = call->debug_id;
|
||||
__entry->qbase = tq->qbase;
|
||||
__entry->trans = call->tx_transmitted;
|
||||
__entry->acked = tq->segment_acked;
|
||||
__entry->lost = tq->segment_lost;
|
||||
__entry->retrans = tq->segment_retransmitted;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x tq=%08x txq=%08x a=%lx l=%lx r=%lx",
|
||||
__entry->call, __entry->qbase, __entry->trans,
|
||||
__entry->acked, __entry->lost, __entry->retrans)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_tlp_probe,
|
||||
TP_PROTO(struct rxrpc_call *call, enum rxrpc_tlp_probe_trace trace),
|
||||
|
||||
TP_ARGS(call, trace),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(rxrpc_serial_t, serial)
|
||||
__field(rxrpc_seq_t, seq)
|
||||
__field(enum rxrpc_tlp_probe_trace, trace)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call = call->debug_id;
|
||||
__entry->serial = call->tlp_serial;
|
||||
__entry->seq = call->tlp_seq;
|
||||
__entry->trace = trace;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x r=%08x pq=%08x %s",
|
||||
__entry->call, __entry->serial, __entry->seq,
|
||||
__print_symbolic(__entry->trace, rxrpc_tlp_probe_traces))
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_tlp_ack,
|
||||
TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
|
||||
enum rxrpc_tlp_ack_trace trace),
|
||||
|
||||
TP_ARGS(call, summary, trace),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(rxrpc_serial_t, serial)
|
||||
__field(rxrpc_seq_t, tlp_seq)
|
||||
__field(rxrpc_seq_t, hard_ack)
|
||||
__field(enum rxrpc_tlp_ack_trace, trace)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call = call->debug_id;
|
||||
__entry->serial = call->tlp_serial;
|
||||
__entry->tlp_seq = call->tlp_seq;
|
||||
__entry->hard_ack = call->acks_hard_ack;
|
||||
__entry->trace = trace;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x r=%08x pq=%08x hq=%08x %s",
|
||||
__entry->call, __entry->serial,
|
||||
__entry->tlp_seq, __entry->hard_ack,
|
||||
__print_symbolic(__entry->trace, rxrpc_tlp_ack_traces))
|
||||
);
|
||||
|
||||
TRACE_EVENT(rxrpc_rack_timer,
|
||||
TP_PROTO(struct rxrpc_call *call, ktime_t delay, bool exp),
|
||||
|
||||
TP_ARGS(call, delay, exp),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, call)
|
||||
__field(bool, exp)
|
||||
__field(enum rxrpc_rack_timer_mode, mode)
|
||||
__field(ktime_t, delay)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call = call->debug_id;
|
||||
__entry->exp = exp;
|
||||
__entry->mode = call->rack_timer_mode;
|
||||
__entry->delay = delay;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x %s %s to=%lld",
|
||||
__entry->call,
|
||||
__entry->exp ? "Exp" : "Set",
|
||||
__print_symbolic(__entry->mode, rxrpc_rack_timer_modes),
|
||||
ktime_to_us(__entry->delay))
|
||||
);
|
||||
|
||||
#undef EM
|
||||
#undef E_
|
||||
|
||||
|
@ -16,6 +16,7 @@ rxrpc-y := \
|
||||
conn_object.o \
|
||||
conn_service.o \
|
||||
input.o \
|
||||
input_rack.o \
|
||||
insecure.o \
|
||||
io_thread.o \
|
||||
key.o \
|
||||
|
@ -621,6 +621,18 @@ enum rxrpc_ca_state {
|
||||
NR__RXRPC_CA_STATES
|
||||
} __mode(byte);
|
||||
|
||||
/*
|
||||
* Current purpose of call RACK timer. According to the RACK-TLP protocol
|
||||
* [RFC8985], the transmission timer (call->rack_timo_at) may only be used for
|
||||
* one of these at once.
|
||||
*/
|
||||
enum rxrpc_rack_timer_mode {
|
||||
RXRPC_CALL_RACKTIMER_OFF, /* Timer not running */
|
||||
RXRPC_CALL_RACKTIMER_RACK_REORDER, /* RACK reordering timer */
|
||||
RXRPC_CALL_RACKTIMER_TLP_PTO, /* TLP timeout */
|
||||
RXRPC_CALL_RACKTIMER_RTO, /* Retransmission timeout */
|
||||
} __mode(byte);
|
||||
|
||||
/*
|
||||
* RxRPC call definition
|
||||
* - matched by { connection, call_id }
|
||||
@ -638,8 +650,7 @@ struct rxrpc_call {
|
||||
struct mutex user_mutex; /* User access mutex */
|
||||
struct sockaddr_rxrpc dest_srx; /* Destination address */
|
||||
ktime_t delay_ack_at; /* When DELAY ACK needs to happen */
|
||||
ktime_t ack_lost_at; /* When ACK is figured as lost */
|
||||
ktime_t resend_at; /* When next resend needs to happen */
|
||||
ktime_t rack_timo_at; /* When ACK is figured as lost */
|
||||
ktime_t ping_at; /* When next to send a ping */
|
||||
ktime_t keepalive_at; /* When next to send a keepalive ping */
|
||||
ktime_t expect_rx_by; /* When we expect to get a packet by */
|
||||
@ -695,8 +706,12 @@ struct rxrpc_call {
|
||||
rxrpc_seq_t tx_bottom; /* First packet in buffer */
|
||||
rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */
|
||||
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
|
||||
rxrpc_serial_t tx_last_serial; /* Serial of last DATA transmitted */
|
||||
u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */
|
||||
u8 tx_winsize; /* Maximum size of Tx window */
|
||||
u16 tx_nr_sent; /* Number of packets sent, but unacked */
|
||||
u16 tx_nr_lost; /* Number of packets marked lost */
|
||||
u16 tx_nr_resent; /* Number of packets resent, but unacked */
|
||||
u16 tx_winsize; /* Maximum size of Tx window */
|
||||
#define RXRPC_TX_MAX_WINDOW 128
|
||||
u8 tx_jumbo_max; /* Maximum subpkts peer will accept */
|
||||
ktime_t tx_last_sent; /* Last time a transmission occurred */
|
||||
@ -725,6 +740,25 @@ struct rxrpc_call {
|
||||
u16 cong_cumul_acks; /* Cumulative ACK count */
|
||||
ktime_t cong_tstamp; /* Last time cwnd was changed */
|
||||
|
||||
/* RACK-TLP [RFC8985] state. */
|
||||
ktime_t rack_xmit_ts; /* Latest transmission timestamp */
|
||||
ktime_t rack_rtt; /* RTT of most recently ACK'd segment */
|
||||
ktime_t rack_rtt_ts; /* Timestamp of rack_rtt */
|
||||
ktime_t rack_reo_wnd; /* Reordering window */
|
||||
unsigned int rack_reo_wnd_mult; /* Multiplier applied to rack_reo_wnd */
|
||||
int rack_reo_wnd_persist; /* Num loss recoveries before reset reo_wnd */
|
||||
rxrpc_seq_t rack_fack; /* Highest sequence so far ACK'd */
|
||||
rxrpc_seq_t rack_end_seq; /* Highest sequence seen */
|
||||
rxrpc_seq_t rack_dsack_round; /* DSACK opt recv'd in latest roundtrip */
|
||||
bool rack_dsack_round_none; /* T if dsack_round is "None" */
|
||||
bool rack_reordering_seen; /* T if detected reordering event */
|
||||
enum rxrpc_rack_timer_mode rack_timer_mode; /* Current mode of RACK timer */
|
||||
bool tlp_is_retrans; /* T if unacked TLP retransmission */
|
||||
rxrpc_serial_t tlp_serial; /* Serial of TLP probe (or 0 if none in progress) */
|
||||
rxrpc_seq_t tlp_seq; /* Sequence of TLP probe */
|
||||
unsigned int tlp_rtt_taken; /* Last time RTT taken */
|
||||
ktime_t tlp_max_ack_delay; /* Sender budget for max delayed ACK interval */
|
||||
|
||||
/* Receive-phase ACK management (ACKs we send). */
|
||||
u8 ackr_reason; /* reason to ACK */
|
||||
u16 ackr_sack_base; /* Starting slot in SACK table ring */
|
||||
@ -783,6 +817,9 @@ struct rxrpc_ack_summary {
|
||||
bool retrans_timeo:1; /* T if reTx due to timeout happened */
|
||||
bool need_retransmit:1; /* T if we need transmission */
|
||||
bool rtt_sample_avail:1; /* T if RTT sample available */
|
||||
bool in_fast_or_rto_recovery:1;
|
||||
bool exiting_fast_or_rto_recovery:1;
|
||||
bool tlp_probe_acked:1; /* T if the TLP probe seq was acked */
|
||||
u8 /*enum rxrpc_congest_change*/ change;
|
||||
};
|
||||
|
||||
@ -864,6 +901,7 @@ struct rxrpc_txqueue {
|
||||
unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */
|
||||
unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */
|
||||
unsigned long rtt_samples; /* Bit-per-buf: Set if available for RTT */
|
||||
unsigned long ever_retransmitted; /* Bit-per-buf: Set if ever retransmitted */
|
||||
|
||||
/* The arrays we want to pack into as few cache lines as possible. */
|
||||
struct {
|
||||
@ -883,7 +921,9 @@ struct rxrpc_send_data_req {
|
||||
struct rxrpc_txqueue *tq; /* Tx queue segment holding first DATA */
|
||||
rxrpc_seq_t seq; /* Sequence of first data */
|
||||
int n; /* Number of DATA packets to glue into jumbo */
|
||||
bool retrans; /* T if this is a retransmission */
|
||||
bool did_send; /* T if did actually send */
|
||||
bool tlp_probe; /* T if this is a TLP probe */
|
||||
int /* enum rxrpc_txdata_trace */ trace;
|
||||
};
|
||||
|
||||
@ -943,8 +983,9 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
|
||||
enum rxrpc_propose_ack_trace why);
|
||||
void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
|
||||
enum rxrpc_propose_ack_trace);
|
||||
void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response);
|
||||
|
||||
void rxrpc_resend_tlp(struct rxrpc_call *call);
|
||||
void rxrpc_transmit_some_data(struct rxrpc_call *call, unsigned int limit,
|
||||
enum rxrpc_txdata_trace trace);
|
||||
bool rxrpc_input_call_event(struct rxrpc_call *call);
|
||||
|
||||
/*
|
||||
@ -1123,6 +1164,32 @@ void rxrpc_congestion_degrade(struct rxrpc_call *);
|
||||
void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *);
|
||||
void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);
|
||||
|
||||
/*
|
||||
* input_rack.c
|
||||
*/
|
||||
void rxrpc_input_rack_one(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary,
|
||||
struct rxrpc_txqueue *tq,
|
||||
unsigned int ix);
|
||||
void rxrpc_input_rack(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary,
|
||||
struct rxrpc_txqueue *tq,
|
||||
unsigned long new_acks);
|
||||
void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary);
|
||||
ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now);
|
||||
void rxrpc_tlp_send_probe(struct rxrpc_call *call);
|
||||
void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary);
|
||||
void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by);
|
||||
|
||||
/* Initialise TLP state [RFC8958 7.1]. */
|
||||
static inline void rxrpc_tlp_init(struct rxrpc_call *call)
|
||||
{
|
||||
call->tlp_serial = 0;
|
||||
call->tlp_seq = call->acks_hard_ack;
|
||||
call->tlp_is_retrans = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* io_thread.c
|
||||
*/
|
||||
@ -1402,6 +1469,11 @@ static inline u32 latest(u32 seq1, u32 seq2)
|
||||
return after(seq1, seq2) ? seq1 : seq2;
|
||||
}
|
||||
|
||||
static inline bool rxrpc_seq_in_txq(const struct rxrpc_txqueue *tq, rxrpc_seq_t seq)
|
||||
{
|
||||
return (seq & (RXRPC_NR_TXQUEUE - 1)) == tq->qbase;
|
||||
}
|
||||
|
||||
static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
|
||||
{
|
||||
rxrpc_get_skb(skb, rxrpc_skb_get_call_rx);
|
||||
@ -1409,6 +1481,31 @@ static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk
|
||||
rxrpc_poke_call(call, rxrpc_call_poke_rx_packet);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate how much space there is for transmitting more DATA packets.
|
||||
*/
|
||||
static inline unsigned int rxrpc_tx_window_space(const struct rxrpc_call *call)
|
||||
{
|
||||
int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra);
|
||||
int transmitted = call->tx_top - call->tx_bottom;
|
||||
|
||||
return max(winsize - transmitted, 0);
|
||||
}
|
||||
|
||||
static inline unsigned int rxrpc_left_out(const struct rxrpc_call *call)
|
||||
{
|
||||
return call->acks_nr_sacks + call->tx_nr_lost;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the number of transmitted DATA packets assumed to be in flight
|
||||
* [approx RFC6675].
|
||||
*/
|
||||
static inline unsigned int rxrpc_tx_in_flight(const struct rxrpc_call *call)
|
||||
{
|
||||
return call->tx_nr_sent - rxrpc_left_out(call) + call->tx_nr_resent;
|
||||
}
|
||||
|
||||
/*
|
||||
* debug tracing
|
||||
*/
|
||||
|
@ -54,35 +54,21 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
|
||||
trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_delayed_ack);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle congestion being detected by the retransmit timeout.
|
||||
*/
|
||||
static void rxrpc_congestion_timeout(struct rxrpc_call *call)
|
||||
{
|
||||
set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Retransmit one or more packets.
|
||||
*/
|
||||
static bool rxrpc_retransmit_data(struct rxrpc_call *call,
|
||||
struct rxrpc_send_data_req *req,
|
||||
ktime_t rto, bool skip_too_young)
|
||||
struct rxrpc_send_data_req *req)
|
||||
{
|
||||
struct rxrpc_txqueue *tq = req->tq;
|
||||
unsigned int ix = req->seq & RXRPC_TXQ_MASK;
|
||||
struct rxrpc_txbuf *txb = tq->bufs[ix];
|
||||
ktime_t xmit_ts, resend_at;
|
||||
|
||||
_enter("%x,%x,%x,%x", tq->qbase, req->seq, ix, txb->debug_id);
|
||||
|
||||
xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]);
|
||||
resend_at = ktime_add(xmit_ts, rto);
|
||||
trace_rxrpc_retransmit(call, req, txb, ktime_sub(resend_at, req->now));
|
||||
if (skip_too_young && ktime_after(resend_at, req->now))
|
||||
return false;
|
||||
req->retrans = true;
|
||||
trace_rxrpc_retransmit(call, req, txb);
|
||||
|
||||
__set_bit(ix, &tq->segment_retransmitted);
|
||||
txb->flags |= RXRPC_TXBUF_RESENT;
|
||||
rxrpc_send_data_packet(call, req);
|
||||
rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
|
||||
@ -97,135 +83,78 @@ static bool rxrpc_retransmit_data(struct rxrpc_call *call,
|
||||
/*
|
||||
* Perform retransmission of NAK'd and unack'd packets.
|
||||
*/
|
||||
void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response)
|
||||
static void rxrpc_resend(struct rxrpc_call *call)
|
||||
{
|
||||
struct rxrpc_send_data_req req = {
|
||||
.now = ktime_get_real(),
|
||||
.trace = rxrpc_txdata_retransmit,
|
||||
};
|
||||
struct rxrpc_txqueue *tq = call->tx_queue;
|
||||
ktime_t lowest_xmit_ts = KTIME_MAX;
|
||||
ktime_t rto = rxrpc_get_rto_backoff(call, false);
|
||||
bool unacked = false;
|
||||
struct rxrpc_txqueue *tq;
|
||||
|
||||
_enter("{%d,%d}", call->tx_bottom, call->tx_top);
|
||||
|
||||
if (call->tx_bottom == call->tx_top) {
|
||||
call->resend_at = KTIME_MAX;
|
||||
trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend);
|
||||
return;
|
||||
}
|
||||
trace_rxrpc_resend(call, call->acks_highest_serial);
|
||||
|
||||
trace_rxrpc_resend(call, ack_serial);
|
||||
|
||||
/* Scan the transmission queue, looking for explicitly NAK'd packets. */
|
||||
do {
|
||||
unsigned long naks = ~tq->segment_acked;
|
||||
rxrpc_seq_t tq_top = tq->qbase + RXRPC_NR_TXQUEUE - 1;
|
||||
/* Scan the transmission queue, looking for lost packets. */
|
||||
for (tq = call->tx_queue; tq; tq = tq->next) {
|
||||
unsigned long lost = tq->segment_lost;
|
||||
|
||||
if (after(tq->qbase, call->tx_transmitted))
|
||||
break;
|
||||
|
||||
if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE)
|
||||
naks &= (1UL << tq->nr_reported_acks) - 1;
|
||||
|
||||
_debug("retr %16lx %u c=%08x [%x]",
|
||||
tq->segment_acked, tq->nr_reported_acks, call->debug_id, tq->qbase);
|
||||
_debug("nack %16lx", naks);
|
||||
_debug("lost %16lx", lost);
|
||||
|
||||
while (naks) {
|
||||
unsigned int ix = __ffs(naks);
|
||||
trace_rxrpc_resend_lost(call, tq, lost);
|
||||
while (lost) {
|
||||
unsigned int ix = __ffs(lost);
|
||||
struct rxrpc_txbuf *txb = tq->bufs[ix];
|
||||
|
||||
__clear_bit(ix, &naks);
|
||||
if (after(txb->serial, call->acks_highest_serial))
|
||||
continue; /* Ack point not yet reached */
|
||||
|
||||
rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked);
|
||||
__clear_bit(ix, &lost);
|
||||
rxrpc_see_txbuf(txb, rxrpc_txbuf_see_lost);
|
||||
|
||||
req.tq = tq;
|
||||
req.seq = tq->qbase + ix;
|
||||
req.n = 1;
|
||||
rxrpc_retransmit_data(call, &req, rto, false);
|
||||
rxrpc_retransmit_data(call, &req);
|
||||
}
|
||||
|
||||
/* Anything after the soft-ACK table up to and including
|
||||
* ack.previousPacket will get ACK'd or NACK'd in due course,
|
||||
* so don't worry about those here. We do, however, need to
|
||||
* consider retransmitting anything beyond that point.
|
||||
*/
|
||||
if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE &&
|
||||
after(tq_top, call->acks_prev_seq)) {
|
||||
rxrpc_seq_t start = latest(call->acks_prev_seq,
|
||||
tq->qbase + tq->nr_reported_acks);
|
||||
rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted);
|
||||
|
||||
_debug("unrep %x-%x", start, stop);
|
||||
for (rxrpc_seq_t seq = start; before_eq(seq, stop); seq++) {
|
||||
rxrpc_serial_t serial = tq->segment_serial[seq & RXRPC_TXQ_MASK];
|
||||
|
||||
if (ping_response &&
|
||||
before(serial, call->acks_highest_serial))
|
||||
break; /* Wasn't accounted for by a more recent ping. */
|
||||
req.tq = tq;
|
||||
req.seq = seq;
|
||||
req.n = 1;
|
||||
if (rxrpc_retransmit_data(call, &req, rto, true))
|
||||
unacked = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Work out the next retransmission timeout. */
|
||||
if (ktime_before(tq->xmit_ts_base, lowest_xmit_ts)) {
|
||||
unsigned int lowest_us = UINT_MAX;
|
||||
|
||||
for (int i = 0; i < RXRPC_NR_TXQUEUE; i++)
|
||||
if (!test_bit(i, &tq->segment_acked) &&
|
||||
tq->segment_xmit_ts[i] < lowest_us)
|
||||
lowest_us = tq->segment_xmit_ts[i];
|
||||
_debug("lowest[%x] %llx %u", tq->qbase, tq->xmit_ts_base, lowest_us);
|
||||
|
||||
if (lowest_us != UINT_MAX) {
|
||||
ktime_t lowest_ns = ktime_add_us(tq->xmit_ts_base, lowest_us);
|
||||
|
||||
if (ktime_before(lowest_ns, lowest_xmit_ts))
|
||||
lowest_xmit_ts = lowest_ns;
|
||||
}
|
||||
}
|
||||
} while ((tq = tq->next));
|
||||
|
||||
if (lowest_xmit_ts < KTIME_MAX) {
|
||||
ktime_t delay = rxrpc_get_rto_backoff(call, req.did_send);
|
||||
ktime_t resend_at = ktime_add(lowest_xmit_ts, delay);
|
||||
|
||||
_debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now));
|
||||
call->resend_at = resend_at;
|
||||
trace_rxrpc_timer_set(call, ktime_sub(resend_at, req.now),
|
||||
rxrpc_timer_trace_resend_reset);
|
||||
} else {
|
||||
call->resend_at = KTIME_MAX;
|
||||
trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend);
|
||||
}
|
||||
|
||||
if (unacked)
|
||||
rxrpc_congestion_timeout(call);
|
||||
|
||||
/* If there was nothing that needed retransmission then it's likely
|
||||
* that an ACK got lost somewhere. Send a ping to find out instead of
|
||||
* retransmitting data.
|
||||
*/
|
||||
if (!req.did_send) {
|
||||
ktime_t next_ping = ktime_add_us(call->acks_latest_ts,
|
||||
call->srtt_us >> 3);
|
||||
|
||||
if (ktime_sub(next_ping, req.now) <= 0)
|
||||
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
|
||||
rxrpc_propose_ack_ping_for_0_retrans);
|
||||
}
|
||||
|
||||
rxrpc_get_rto_backoff(call, req.did_send);
|
||||
_leave("");
|
||||
}
|
||||
|
||||
/*
|
||||
* Resend the highest-seq DATA packet so far transmitted for RACK-TLP [RFC8985 7.3].
|
||||
*/
|
||||
void rxrpc_resend_tlp(struct rxrpc_call *call)
|
||||
{
|
||||
struct rxrpc_send_data_req req = {
|
||||
.now = ktime_get_real(),
|
||||
.seq = call->tx_transmitted,
|
||||
.n = 1,
|
||||
.tlp_probe = true,
|
||||
.trace = rxrpc_txdata_tlp_retransmit,
|
||||
};
|
||||
|
||||
/* There's a chance it'll be on the tail segment of the queue. */
|
||||
req.tq = READ_ONCE(call->tx_qtail);
|
||||
if (req.tq &&
|
||||
before(call->tx_transmitted, req.tq->qbase + RXRPC_NR_TXQUEUE)) {
|
||||
rxrpc_retransmit_data(call, &req);
|
||||
return;
|
||||
}
|
||||
|
||||
for (req.tq = call->tx_queue; req.tq; req.tq = req.tq->next) {
|
||||
if (after_eq(call->tx_transmitted, req.tq->qbase) &&
|
||||
before(call->tx_transmitted, req.tq->qbase + RXRPC_NR_TXQUEUE)) {
|
||||
rxrpc_retransmit_data(call, &req);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start transmitting the reply to a service. This cancels the need to ACK the
|
||||
* request if we haven't yet done so.
|
||||
@ -259,18 +188,10 @@ static void rxrpc_close_tx_phase(struct rxrpc_call *call)
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int rxrpc_tx_window_space(struct rxrpc_call *call)
|
||||
{
|
||||
int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra);
|
||||
int in_flight = call->tx_top - call->tx_bottom;
|
||||
|
||||
return max(winsize - in_flight, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Transmit some as-yet untransmitted data.
|
||||
* Transmit some as-yet untransmitted data, to a maximum of the supplied limit.
|
||||
*/
|
||||
static void rxrpc_transmit_fresh_data(struct rxrpc_call *call,
|
||||
static void rxrpc_transmit_fresh_data(struct rxrpc_call *call, unsigned int limit,
|
||||
enum rxrpc_txdata_trace trace)
|
||||
{
|
||||
int space = rxrpc_tx_window_space(call);
|
||||
@ -335,8 +256,8 @@ static void rxrpc_transmit_fresh_data(struct rxrpc_call *call,
|
||||
}
|
||||
}
|
||||
|
||||
static void rxrpc_transmit_some_data(struct rxrpc_call *call,
|
||||
enum rxrpc_txdata_trace trace)
|
||||
void rxrpc_transmit_some_data(struct rxrpc_call *call, unsigned int limit,
|
||||
enum rxrpc_txdata_trace trace)
|
||||
{
|
||||
switch (__rxrpc_call_state(call)) {
|
||||
case RXRPC_CALL_SERVER_ACK_REQUEST:
|
||||
@ -353,7 +274,7 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call,
|
||||
rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow);
|
||||
return;
|
||||
}
|
||||
rxrpc_transmit_fresh_data(call, trace);
|
||||
rxrpc_transmit_fresh_data(call, limit, trace);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
@ -380,7 +301,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
ktime_t now, t;
|
||||
bool resend = false, did_receive = false, saw_ack = false;
|
||||
bool did_receive = false, saw_ack = false;
|
||||
s32 abort_code;
|
||||
|
||||
rxrpc_see_call(call, rxrpc_call_see_input);
|
||||
@ -398,21 +319,33 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
|
||||
goto out;
|
||||
}
|
||||
|
||||
while ((skb = __skb_dequeue(&call->rx_queue))) {
|
||||
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
|
||||
do {
|
||||
skb = __skb_dequeue(&call->rx_queue);
|
||||
if (skb) {
|
||||
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
|
||||
|
||||
if (__rxrpc_call_is_complete(call) ||
|
||||
skb->mark == RXRPC_SKB_MARK_ERROR) {
|
||||
if (__rxrpc_call_is_complete(call) ||
|
||||
skb->mark == RXRPC_SKB_MARK_ERROR) {
|
||||
rxrpc_free_skb(skb, rxrpc_skb_put_call_rx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK;
|
||||
|
||||
rxrpc_input_call_packet(call, skb);
|
||||
rxrpc_free_skb(skb, rxrpc_skb_put_call_rx);
|
||||
goto out;
|
||||
did_receive = true;
|
||||
}
|
||||
|
||||
saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK;
|
||||
t = ktime_sub(call->rack_timo_at, ktime_get_real());
|
||||
if (t <= 0) {
|
||||
trace_rxrpc_timer_exp(call, t,
|
||||
rxrpc_timer_trace_rack_off + call->rack_timer_mode);
|
||||
call->rack_timo_at = KTIME_MAX;
|
||||
rxrpc_rack_timer_expired(call, t);
|
||||
}
|
||||
|
||||
rxrpc_input_call_packet(call, skb);
|
||||
rxrpc_free_skb(skb, rxrpc_skb_put_call_rx);
|
||||
did_receive = true;
|
||||
}
|
||||
} while (!skb_queue_empty(&call->rx_queue));
|
||||
|
||||
/* If we see our async-event poke, check for timeout trippage. */
|
||||
now = ktime_get_real();
|
||||
@ -445,13 +378,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
|
||||
rxrpc_propose_ack_delayed_ack);
|
||||
}
|
||||
|
||||
t = ktime_sub(call->ack_lost_at, now);
|
||||
if (t <= 0) {
|
||||
trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_lost_ack);
|
||||
call->ack_lost_at = KTIME_MAX;
|
||||
set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events);
|
||||
}
|
||||
|
||||
t = ktime_sub(call->ping_at, now);
|
||||
if (t <= 0) {
|
||||
trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_ping);
|
||||
@ -460,15 +386,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
|
||||
rxrpc_propose_ack_ping_for_keepalive);
|
||||
}
|
||||
|
||||
t = ktime_sub(call->resend_at, now);
|
||||
if (t <= 0) {
|
||||
trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_resend);
|
||||
call->resend_at = KTIME_MAX;
|
||||
resend = true;
|
||||
}
|
||||
|
||||
rxrpc_transmit_some_data(call, rxrpc_txdata_new_data);
|
||||
|
||||
now = ktime_get_real();
|
||||
t = ktime_sub(call->keepalive_at, now);
|
||||
if (t <= 0) {
|
||||
@ -478,21 +395,30 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
|
||||
rxrpc_propose_ack_ping_for_keepalive);
|
||||
}
|
||||
|
||||
if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events))
|
||||
rxrpc_send_initial_ping(call);
|
||||
|
||||
rxrpc_transmit_some_data(call, UINT_MAX, rxrpc_txdata_new_data);
|
||||
|
||||
if (saw_ack)
|
||||
rxrpc_congestion_degrade(call);
|
||||
|
||||
if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events))
|
||||
rxrpc_send_initial_ping(call);
|
||||
if (did_receive &&
|
||||
(__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_SEND_REQUEST ||
|
||||
__rxrpc_call_state(call) == RXRPC_CALL_SERVER_SEND_REPLY)) {
|
||||
t = ktime_sub(call->rack_timo_at, ktime_get_real());
|
||||
trace_rxrpc_rack(call, t);
|
||||
}
|
||||
|
||||
/* Process events */
|
||||
if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
|
||||
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
|
||||
rxrpc_propose_ack_ping_for_lost_ack);
|
||||
|
||||
if (resend &&
|
||||
if (call->tx_nr_lost > 0 &&
|
||||
__rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY &&
|
||||
!test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
|
||||
rxrpc_resend(call, 0, false);
|
||||
rxrpc_resend(call);
|
||||
|
||||
if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
|
||||
rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
|
||||
@ -520,8 +446,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
|
||||
set(call->expect_req_by);
|
||||
set(call->expect_rx_by);
|
||||
set(call->delay_ack_at);
|
||||
set(call->ack_lost_at);
|
||||
set(call->resend_at);
|
||||
set(call->rack_timo_at);
|
||||
set(call->keepalive_at);
|
||||
set(call->ping_at);
|
||||
|
||||
|
@ -160,8 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
|
||||
call->ackr_window = 1;
|
||||
call->ackr_wtop = 1;
|
||||
call->delay_ack_at = KTIME_MAX;
|
||||
call->ack_lost_at = KTIME_MAX;
|
||||
call->resend_at = KTIME_MAX;
|
||||
call->rack_timo_at = KTIME_MAX;
|
||||
call->ping_at = KTIME_MAX;
|
||||
call->keepalive_at = KTIME_MAX;
|
||||
call->expect_rx_by = KTIME_MAX;
|
||||
|
@ -27,13 +27,13 @@ static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
|
||||
}
|
||||
|
||||
/*
|
||||
* Do TCP-style congestion management [RFC 5681].
|
||||
* Do TCP-style congestion management [RFC5681].
|
||||
*/
|
||||
static void rxrpc_congestion_management(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary)
|
||||
{
|
||||
summary->change = rxrpc_cong_no_change;
|
||||
summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks;
|
||||
summary->in_flight = rxrpc_tx_in_flight(call);
|
||||
|
||||
if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) {
|
||||
summary->retrans_timeo = true;
|
||||
@ -106,9 +106,12 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
|
||||
call->cong_extra = 0;
|
||||
call->cong_dup_acks = 0;
|
||||
summary->need_retransmit = true;
|
||||
summary->in_fast_or_rto_recovery = true;
|
||||
goto out;
|
||||
|
||||
case RXRPC_CA_FAST_RETRANSMIT:
|
||||
rxrpc_tlp_init(call);
|
||||
summary->in_fast_or_rto_recovery = true;
|
||||
if (!summary->new_low_snack) {
|
||||
if (summary->nr_new_sacks == 0)
|
||||
call->cong_cwnd += 1;
|
||||
@ -121,8 +124,10 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
|
||||
} else {
|
||||
summary->change = rxrpc_cong_progress;
|
||||
call->cong_cwnd = call->cong_ssthresh;
|
||||
if (call->acks_nr_snacks == 0)
|
||||
if (call->acks_nr_snacks == 0) {
|
||||
summary->exiting_fast_or_rto_recovery = true;
|
||||
goto resume_normality;
|
||||
}
|
||||
}
|
||||
goto out;
|
||||
|
||||
@ -171,7 +176,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
|
||||
*/
|
||||
void rxrpc_congestion_degrade(struct rxrpc_call *call)
|
||||
{
|
||||
ktime_t rtt, now;
|
||||
ktime_t rtt, now, time_since;
|
||||
|
||||
if (call->cong_ca_state != RXRPC_CA_SLOW_START &&
|
||||
call->cong_ca_state != RXRPC_CA_CONGEST_AVOIDANCE)
|
||||
@ -181,10 +186,11 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call)
|
||||
|
||||
rtt = ns_to_ktime(call->srtt_us * (NSEC_PER_USEC / 8));
|
||||
now = ktime_get_real();
|
||||
if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now))
|
||||
time_since = ktime_sub(now, call->tx_last_sent);
|
||||
if (ktime_before(time_since, rtt))
|
||||
return;
|
||||
|
||||
trace_rxrpc_reset_cwnd(call, now);
|
||||
trace_rxrpc_reset_cwnd(call, time_since, rtt);
|
||||
rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset);
|
||||
call->tx_last_sent = now;
|
||||
call->cong_ca_state = RXRPC_CA_SLOW_START;
|
||||
@ -200,11 +206,11 @@ static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call,
|
||||
struct rxrpc_txqueue *tq,
|
||||
int ix)
|
||||
{
|
||||
ktime_t xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]);
|
||||
|
||||
rxrpc_call_add_rtt(call, rxrpc_rtt_rx_data_ack, -1,
|
||||
summary->acked_serial, summary->ack_serial,
|
||||
ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]),
|
||||
call->acks_latest_ts);
|
||||
summary->rtt_sample_avail = false;
|
||||
xmit_ts, call->acks_latest_ts);
|
||||
__clear_bit(ix, &tq->rtt_samples); /* Prevent repeat RTT sample */
|
||||
}
|
||||
|
||||
@ -216,7 +222,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
|
||||
{
|
||||
struct rxrpc_txqueue *tq = call->tx_queue;
|
||||
rxrpc_seq_t seq = call->tx_bottom + 1;
|
||||
bool rot_last = false;
|
||||
bool rot_last = false, trace = false;
|
||||
|
||||
_enter("%x,%x", call->tx_bottom, to);
|
||||
|
||||
@ -250,14 +256,16 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
|
||||
rot_last = true;
|
||||
}
|
||||
|
||||
if (summary->rtt_sample_avail &&
|
||||
summary->acked_serial == tq->segment_serial[ix] &&
|
||||
if (summary->acked_serial == tq->segment_serial[ix] &&
|
||||
test_bit(ix, &tq->rtt_samples))
|
||||
rxrpc_add_data_rtt_sample(call, summary, tq, ix);
|
||||
|
||||
if (ix == tq->nr_reported_acks) {
|
||||
/* Packet directly hard ACK'd. */
|
||||
tq->nr_reported_acks++;
|
||||
rxrpc_input_rack_one(call, summary, tq, ix);
|
||||
if (seq == call->tlp_seq)
|
||||
summary->tlp_probe_acked = true;
|
||||
summary->nr_new_hacks++;
|
||||
__set_bit(ix, &tq->segment_acked);
|
||||
trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_hack);
|
||||
@ -268,11 +276,21 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
|
||||
} else {
|
||||
/* Soft NAK -> hard ACK. */
|
||||
call->acks_nr_snacks--;
|
||||
rxrpc_input_rack_one(call, summary, tq, ix);
|
||||
if (seq == call->tlp_seq)
|
||||
summary->tlp_probe_acked = true;
|
||||
summary->nr_new_hacks++;
|
||||
__set_bit(ix, &tq->segment_acked);
|
||||
trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_snak);
|
||||
}
|
||||
|
||||
call->tx_nr_sent--;
|
||||
if (__test_and_clear_bit(ix, &tq->segment_lost))
|
||||
call->tx_nr_lost--;
|
||||
if (__test_and_clear_bit(ix, &tq->segment_retransmitted))
|
||||
call->tx_nr_resent--;
|
||||
__clear_bit(ix, &tq->ever_retransmitted);
|
||||
|
||||
rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated);
|
||||
tq->bufs[ix] = NULL;
|
||||
|
||||
@ -282,7 +300,10 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
|
||||
rxrpc_txqueue_rotate));
|
||||
|
||||
seq++;
|
||||
trace = true;
|
||||
if (!(seq & RXRPC_TXQ_MASK)) {
|
||||
trace_rxrpc_rack_update(call, summary);
|
||||
trace = false;
|
||||
prefetch(tq->next);
|
||||
if (tq != call->tx_qtail) {
|
||||
call->tx_qbase += RXRPC_NR_TXQUEUE;
|
||||
@ -299,6 +320,9 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
|
||||
|
||||
} while (before_eq(seq, to));
|
||||
|
||||
if (trace)
|
||||
trace_rxrpc_rack_update(call, summary);
|
||||
|
||||
if (rot_last) {
|
||||
set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags);
|
||||
if (tq) {
|
||||
@ -325,8 +349,10 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
|
||||
{
|
||||
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
|
||||
|
||||
call->resend_at = KTIME_MAX;
|
||||
trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend);
|
||||
call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF;
|
||||
call->rack_timo_at = KTIME_MAX;
|
||||
trace_rxrpc_rack_timer(call, 0, false);
|
||||
trace_rxrpc_timer_can(call, rxrpc_timer_trace_rack_off + call->rack_timer_mode);
|
||||
|
||||
switch (__rxrpc_call_state(call)) {
|
||||
case RXRPC_CALL_CLIENT_SEND_REQUEST:
|
||||
@ -842,10 +868,13 @@ static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call,
|
||||
rxrpc_seq_t seq,
|
||||
rxrpc_seq_t *lowest_nak)
|
||||
{
|
||||
unsigned long old_reported, flipped, new_acks, a_to_n, n_to_a;
|
||||
unsigned long old_reported = 0, flipped, new_acks = 0;
|
||||
unsigned long a_to_n, n_to_a = 0;
|
||||
int new, a, n;
|
||||
|
||||
old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks);
|
||||
if (tq->nr_reported_acks > 0)
|
||||
old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks);
|
||||
|
||||
_enter("{%x,%lx,%d},%lx,%d,%x",
|
||||
tq->qbase, tq->segment_acked, tq->nr_reported_acks,
|
||||
extracted_acks, nr_reported, seq);
|
||||
@ -898,6 +927,18 @@ static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call,
|
||||
if (before(lowest, *lowest_nak))
|
||||
*lowest_nak = lowest;
|
||||
}
|
||||
|
||||
if (summary->acked_serial)
|
||||
rxrpc_input_soft_rtt(call, summary, tq);
|
||||
|
||||
new_acks |= n_to_a;
|
||||
if (new_acks)
|
||||
rxrpc_input_rack(call, summary, tq, new_acks);
|
||||
|
||||
if (call->tlp_serial &&
|
||||
rxrpc_seq_in_txq(tq, call->tlp_seq) &&
|
||||
test_bit(call->tlp_seq - tq->qbase, &new_acks))
|
||||
summary->tlp_probe_acked = true;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -940,8 +981,6 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call,
|
||||
|
||||
_debug("bound %16lx %u", extracted, nr);
|
||||
|
||||
if (summary->rtt_sample_avail)
|
||||
rxrpc_input_soft_rtt(call, summary, tq);
|
||||
rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE,
|
||||
seq - RXRPC_NR_TXQUEUE, &lowest_nak);
|
||||
extracted = ~0UL;
|
||||
@ -1063,7 +1102,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
|
||||
/* Discard any out-of-order or duplicate ACKs (outside lock). */
|
||||
if (!rxrpc_is_ack_valid(call, hard_ack, prev_pkt)) {
|
||||
trace_rxrpc_rx_discard_ack(call, summary.ack_serial, hard_ack, prev_pkt);
|
||||
goto send_response;
|
||||
goto send_response; /* Still respond if requested. */
|
||||
}
|
||||
|
||||
trailer.maxMTU = 0;
|
||||
@ -1079,14 +1118,19 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
|
||||
call->acks_hard_ack = hard_ack;
|
||||
call->acks_prev_seq = prev_pkt;
|
||||
|
||||
switch (summary.ack_reason) {
|
||||
case RXRPC_ACK_PING:
|
||||
break;
|
||||
default:
|
||||
if (summary.acked_serial &&
|
||||
after(summary.acked_serial, call->acks_highest_serial))
|
||||
call->acks_highest_serial = summary.acked_serial;
|
||||
break;
|
||||
if (summary.acked_serial) {
|
||||
switch (summary.ack_reason) {
|
||||
case RXRPC_ACK_PING_RESPONSE:
|
||||
rxrpc_complete_rtt_probe(call, call->acks_latest_ts,
|
||||
summary.acked_serial, summary.ack_serial,
|
||||
rxrpc_rtt_rx_ping_response);
|
||||
break;
|
||||
default:
|
||||
if (after(summary.acked_serial, call->acks_highest_serial))
|
||||
call->acks_highest_serial = summary.acked_serial;
|
||||
summary.rtt_sample_avail = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Parse rwind and mtu sizes if provided. */
|
||||
@ -1096,15 +1140,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
|
||||
if (hard_ack + 1 == 0)
|
||||
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero);
|
||||
|
||||
if (summary.acked_serial) {
|
||||
if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE)
|
||||
rxrpc_complete_rtt_probe(call, call->acks_latest_ts,
|
||||
summary.acked_serial, summary.ack_serial,
|
||||
rxrpc_rtt_rx_ping_response);
|
||||
else
|
||||
summary.rtt_sample_avail = true;
|
||||
}
|
||||
|
||||
/* Ignore ACKs unless we are or have just been transmitting. */
|
||||
switch (__rxrpc_call_state(call)) {
|
||||
case RXRPC_CALL_CLIENT_SEND_REQUEST:
|
||||
@ -1141,10 +1176,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
|
||||
rxrpc_propose_ping(call, summary.ack_serial,
|
||||
rxrpc_propose_ack_ping_for_lost_reply);
|
||||
|
||||
/* Drive the congestion management algorithm first and then RACK-TLP as
|
||||
* the latter depends on the state/change in state in the former.
|
||||
*/
|
||||
rxrpc_congestion_management(call, &summary);
|
||||
if (summary.need_retransmit)
|
||||
rxrpc_resend(call, summary.ack_serial,
|
||||
summary.ack_reason == RXRPC_ACK_PING_RESPONSE);
|
||||
rxrpc_rack_detect_loss_and_arm_timer(call, &summary);
|
||||
rxrpc_tlp_process_ack(call, &summary);
|
||||
if (call->tlp_serial && after_eq(summary.acked_serial, call->tlp_serial))
|
||||
call->tlp_serial = 0;
|
||||
|
||||
send_response:
|
||||
if (summary.ack_reason == RXRPC_ACK_PING)
|
||||
|
418
net/rxrpc/input_rack.c
Normal file
418
net/rxrpc/input_rack.c
Normal file
@ -0,0 +1,418 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* RACK-TLP [RFC8958] Implementation
|
||||
*
|
||||
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include "ar-internal.h"
|
||||
|
||||
static bool rxrpc_rack_sent_after(ktime_t t1, rxrpc_seq_t seq1,
|
||||
ktime_t t2, rxrpc_seq_t seq2)
|
||||
{
|
||||
if (ktime_after(t1, t2))
|
||||
return true;
|
||||
return t1 == t2 && after(seq1, seq2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a packet lost.
|
||||
*/
|
||||
static void rxrpc_rack_mark_lost(struct rxrpc_call *call,
|
||||
struct rxrpc_txqueue *tq, unsigned int ix)
|
||||
{
|
||||
if (__test_and_set_bit(ix, &tq->segment_lost)) {
|
||||
if (__test_and_clear_bit(ix, &tq->segment_retransmitted))
|
||||
call->tx_nr_resent--;
|
||||
} else {
|
||||
call->tx_nr_lost++;
|
||||
}
|
||||
tq->segment_xmit_ts[ix] = UINT_MAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the transmission time of a packet in the Tx queue.
|
||||
*/
|
||||
static ktime_t rxrpc_get_xmit_ts(const struct rxrpc_txqueue *tq, unsigned int ix)
|
||||
{
|
||||
if (tq->segment_xmit_ts[ix] == UINT_MAX)
|
||||
return KTIME_MAX;
|
||||
return ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a bitmask of nack bits for a queue segment and mask off any that aren't
|
||||
* yet reported.
|
||||
*/
|
||||
static unsigned long rxrpc_tq_nacks(const struct rxrpc_txqueue *tq)
|
||||
{
|
||||
unsigned long nacks = ~tq->segment_acked;
|
||||
|
||||
if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE)
|
||||
nacks &= (1UL << tq->nr_reported_acks) - 1;
|
||||
return nacks;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the RACK state for the most recently sent packet that has been
|
||||
* delivered [RFC8958 6.2 Step 2].
|
||||
*/
|
||||
static void rxrpc_rack_update(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary,
|
||||
struct rxrpc_txqueue *tq,
|
||||
unsigned int ix)
|
||||
{
|
||||
rxrpc_seq_t seq = tq->qbase + ix;
|
||||
ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
|
||||
ktime_t rtt = ktime_sub(call->acks_latest_ts, xmit_ts);
|
||||
|
||||
if (__test_and_clear_bit(ix, &tq->segment_lost))
|
||||
call->tx_nr_lost--;
|
||||
|
||||
if (test_bit(ix, &tq->segment_retransmitted)) {
|
||||
/* Use Rx.serial instead of TCP.ACK.ts_option.echo_reply. */
|
||||
if (before(call->acks_highest_serial, tq->segment_serial[ix]))
|
||||
return;
|
||||
if (rtt < minmax_get(&call->min_rtt))
|
||||
return;
|
||||
}
|
||||
|
||||
/* The RACK algorithm requires the segment ACKs to be traversed in
|
||||
* order of segment transmission - but the only thing this seems to
|
||||
* matter for is that RACK.rtt is set to the rtt of the most recently
|
||||
* transmitted segment. We should be able to achieve the same by only
|
||||
* setting RACK.rtt if the xmit time is greater.
|
||||
*/
|
||||
if (ktime_after(xmit_ts, call->rack_rtt_ts)) {
|
||||
call->rack_rtt = rtt;
|
||||
call->rack_rtt_ts = xmit_ts;
|
||||
}
|
||||
|
||||
if (rxrpc_rack_sent_after(xmit_ts, seq, call->rack_xmit_ts, call->rack_end_seq)) {
|
||||
call->rack_rtt = rtt;
|
||||
call->rack_xmit_ts = xmit_ts;
|
||||
call->rack_end_seq = seq;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect data segment reordering [RFC8958 6.2 Step 3].
|
||||
*/
|
||||
static void rxrpc_rack_detect_reordering(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary,
|
||||
struct rxrpc_txqueue *tq,
|
||||
unsigned int ix)
|
||||
{
|
||||
rxrpc_seq_t seq = tq->qbase + ix;
|
||||
|
||||
/* Track the highest sequence number so far ACK'd. This is not
|
||||
* necessarily the same as ack.firstPacket + ack.nAcks - 1 as the peer
|
||||
* could put a NACK in the last SACK slot.
|
||||
*/
|
||||
if (after(seq, call->rack_fack))
|
||||
call->rack_fack = seq;
|
||||
else if (before(seq, call->rack_fack) &&
|
||||
test_bit(ix, &tq->segment_retransmitted))
|
||||
call->rack_reordering_seen = true;
|
||||
}
|
||||
|
||||
void rxrpc_input_rack_one(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary,
|
||||
struct rxrpc_txqueue *tq,
|
||||
unsigned int ix)
|
||||
{
|
||||
rxrpc_rack_update(call, summary, tq, ix);
|
||||
rxrpc_rack_detect_reordering(call, summary, tq, ix);
|
||||
}
|
||||
|
||||
void rxrpc_input_rack(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary,
|
||||
struct rxrpc_txqueue *tq,
|
||||
unsigned long new_acks)
|
||||
{
|
||||
while (new_acks) {
|
||||
unsigned int ix = __ffs(new_acks);
|
||||
|
||||
__clear_bit(ix, &new_acks);
|
||||
rxrpc_input_rack_one(call, summary, tq, ix);
|
||||
}
|
||||
|
||||
trace_rxrpc_rack_update(call, summary);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the reordering window [RFC8958 6.2 Step 4]. Returns the updated
|
||||
* duration of the reordering window.
|
||||
*
|
||||
* Note that the Rx protocol doesn't have a 'DSACK option' per se, but ACKs can
|
||||
* be given a 'DUPLICATE' reason with the serial number referring to the
|
||||
* duplicated DATA packet. Rx does not inform as to whether this was a
|
||||
* reception of the same packet twice or of a retransmission of a packet we
|
||||
* already received (though this could be determined by the transmitter based
|
||||
* on the serial number).
|
||||
*/
|
||||
static ktime_t rxrpc_rack_update_reo_wnd(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary)
|
||||
{
|
||||
rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */
|
||||
rxrpc_seq_t snd_nxt = call->tx_transmitted + 1; /* Next seq to be sent */
|
||||
bool have_dsack_option = summary->ack_reason == RXRPC_ACK_DUPLICATE;
|
||||
int dup_thresh = 3;
|
||||
|
||||
/* DSACK-based reordering window adaptation */
|
||||
if (!call->rack_dsack_round_none &&
|
||||
after_eq(snd_una, call->rack_dsack_round))
|
||||
call->rack_dsack_round_none = true;
|
||||
|
||||
/* Grow the reordering window per round that sees DSACK. Reset the
|
||||
* window after 16 DSACK-free recoveries.
|
||||
*/
|
||||
if (call->rack_dsack_round_none && have_dsack_option) {
|
||||
call->rack_dsack_round_none = false;
|
||||
call->rack_dsack_round = snd_nxt;
|
||||
call->rack_reo_wnd_mult++;
|
||||
call->rack_reo_wnd_persist = 16;
|
||||
} else if (summary->exiting_fast_or_rto_recovery) {
|
||||
call->rack_reo_wnd_persist--;
|
||||
if (call->rack_reo_wnd_persist <= 0)
|
||||
call->rack_reo_wnd_mult = 1;
|
||||
}
|
||||
|
||||
if (!call->rack_reordering_seen) {
|
||||
if (summary->in_fast_or_rto_recovery)
|
||||
return 0;
|
||||
if (call->acks_nr_sacks >= dup_thresh)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return us_to_ktime(umin(call->rack_reo_wnd_mult * minmax_get(&call->min_rtt) / 4,
|
||||
call->srtt_us >> 3));
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect losses [RFC8958 6.2 Step 5].
|
||||
*/
|
||||
static ktime_t rxrpc_rack_detect_loss(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary)
|
||||
{
|
||||
struct rxrpc_txqueue *tq;
|
||||
ktime_t timeout = 0, lost_after, now = ktime_get_real();
|
||||
|
||||
call->rack_reo_wnd = rxrpc_rack_update_reo_wnd(call, summary);
|
||||
lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd);
|
||||
trace_rxrpc_rack_scan_loss(call);
|
||||
|
||||
for (tq = call->tx_queue; tq; tq = tq->next) {
|
||||
unsigned long nacks = rxrpc_tq_nacks(tq);
|
||||
|
||||
if (after(tq->qbase, call->tx_transmitted))
|
||||
break;
|
||||
trace_rxrpc_rack_scan_loss_tq(call, tq, nacks);
|
||||
|
||||
/* Skip ones marked lost but not yet retransmitted */
|
||||
nacks &= ~tq->segment_lost | tq->segment_retransmitted;
|
||||
|
||||
while (nacks) {
|
||||
unsigned int ix = __ffs(nacks);
|
||||
rxrpc_seq_t seq = tq->qbase + ix;
|
||||
ktime_t remaining;
|
||||
ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
|
||||
|
||||
__clear_bit(ix, &nacks);
|
||||
|
||||
if (rxrpc_rack_sent_after(call->rack_xmit_ts, call->rack_end_seq,
|
||||
xmit_ts, seq)) {
|
||||
remaining = ktime_sub(ktime_add(xmit_ts, lost_after), now);
|
||||
if (remaining <= 0) {
|
||||
rxrpc_rack_mark_lost(call, tq, ix);
|
||||
trace_rxrpc_rack_detect_loss(call, summary, seq);
|
||||
} else {
|
||||
timeout = max(remaining, timeout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return timeout;
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect losses and set a timer to retry the detection [RFC8958 6.2 Step 5].
|
||||
*/
|
||||
void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call,
|
||||
struct rxrpc_ack_summary *summary)
|
||||
{
|
||||
ktime_t timeout = rxrpc_rack_detect_loss(call, summary);
|
||||
|
||||
if (timeout) {
|
||||
call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RACK_REORDER;
|
||||
call->rack_timo_at = ktime_add(ktime_get_real(), timeout);
|
||||
trace_rxrpc_rack_timer(call, timeout, false);
|
||||
trace_rxrpc_timer_set(call, timeout, rxrpc_timer_trace_rack_reo);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle RACK-TLP RTO expiration [RFC8958 6.3].
|
||||
*/
|
||||
static void rxrpc_rack_mark_losses_on_rto(struct rxrpc_call *call)
|
||||
{
|
||||
struct rxrpc_txqueue *tq;
|
||||
rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */
|
||||
ktime_t lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd);
|
||||
ktime_t deadline = ktime_sub(ktime_get_real(), lost_after);
|
||||
|
||||
for (tq = call->tx_queue; tq; tq = tq->next) {
|
||||
unsigned long unacked = ~tq->segment_acked;
|
||||
|
||||
trace_rxrpc_rack_mark_loss_tq(call, tq);
|
||||
while (unacked) {
|
||||
unsigned int ix = __ffs(unacked);
|
||||
rxrpc_seq_t seq = tq->qbase + ix;
|
||||
ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
|
||||
|
||||
if (after(seq, call->tx_transmitted))
|
||||
return;
|
||||
__clear_bit(ix, &unacked);
|
||||
|
||||
if (seq == snd_una ||
|
||||
ktime_before(xmit_ts, deadline))
|
||||
rxrpc_rack_mark_lost(call, tq, ix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the TLP loss probe timeout (PTO) [RFC8958 7.2].
|
||||
*/
|
||||
ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now)
|
||||
{
|
||||
unsigned int flight_size = rxrpc_tx_in_flight(call);
|
||||
ktime_t rto_at = ktime_add(call->tx_last_sent,
|
||||
rxrpc_get_rto_backoff(call, false));
|
||||
ktime_t pto;
|
||||
|
||||
if (call->rtt_count > 0) {
|
||||
/* Use 2*SRTT as the timeout. */
|
||||
pto = ns_to_ktime(call->srtt_us * NSEC_PER_USEC / 4);
|
||||
if (flight_size)
|
||||
pto = ktime_add(pto, call->tlp_max_ack_delay);
|
||||
} else {
|
||||
pto = NSEC_PER_SEC;
|
||||
}
|
||||
|
||||
if (ktime_after(ktime_add(now, pto), rto_at))
|
||||
pto = ktime_sub(rto_at, now);
|
||||
return pto;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send a TLP loss probe on PTO expiration [RFC8958 7.3].
|
||||
*/
|
||||
void rxrpc_tlp_send_probe(struct rxrpc_call *call)
|
||||
{
|
||||
unsigned int in_flight = rxrpc_tx_in_flight(call);
|
||||
|
||||
if (after_eq(call->acks_hard_ack, call->tx_transmitted))
|
||||
return; /* Everything we transmitted has been acked. */
|
||||
|
||||
/* There must be no other loss probe still in flight and we need to
|
||||
* have taken a new RTT sample since last probe or the start of
|
||||
* connection.
|
||||
*/
|
||||
if (!call->tlp_serial &&
|
||||
call->tlp_rtt_taken != call->rtt_taken) {
|
||||
call->tlp_is_retrans = false;
|
||||
if (after(call->send_top, call->tx_transmitted) &&
|
||||
rxrpc_tx_window_space(call) > 0) {
|
||||
/* Transmit the lowest-sequence unsent DATA */
|
||||
call->tx_last_serial = 0;
|
||||
rxrpc_transmit_some_data(call, 1, rxrpc_txdata_tlp_new_data);
|
||||
call->tlp_serial = call->tx_last_serial;
|
||||
call->tlp_seq = call->tx_transmitted;
|
||||
trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_transmit_new);
|
||||
in_flight = rxrpc_tx_in_flight(call);
|
||||
} else {
|
||||
/* Retransmit the highest-sequence DATA sent */
|
||||
call->tx_last_serial = 0;
|
||||
rxrpc_resend_tlp(call);
|
||||
call->tlp_is_retrans = true;
|
||||
trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_retransmit);
|
||||
}
|
||||
} else {
|
||||
trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_busy);
|
||||
}
|
||||
|
||||
if (in_flight != 0) {
|
||||
ktime_t rto = rxrpc_get_rto_backoff(call, false);
|
||||
|
||||
call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RTO;
|
||||
call->rack_timo_at = ktime_add(ktime_get_real(), rto);
|
||||
trace_rxrpc_rack_timer(call, rto, false);
|
||||
trace_rxrpc_timer_set(call, rto, rxrpc_timer_trace_rack_rto);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect losses using the ACK of a TLP loss probe [RFC8958 7.4].
|
||||
*/
|
||||
void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary)
|
||||
{
|
||||
if (!call->tlp_serial || after(call->tlp_seq, call->acks_hard_ack))
|
||||
return;
|
||||
|
||||
if (!call->tlp_is_retrans) {
|
||||
/* TLP of new data delivered */
|
||||
trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_new_data);
|
||||
call->tlp_serial = 0;
|
||||
} else if (summary->ack_reason == RXRPC_ACK_DUPLICATE &&
|
||||
summary->acked_serial == call->tlp_serial) {
|
||||
/* General Case: Detected packet losses using RACK [7.4.1] */
|
||||
trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_dup_acked);
|
||||
call->tlp_serial = 0;
|
||||
} else if (after(call->acks_hard_ack, call->tlp_seq)) {
|
||||
/* Repaired the single loss */
|
||||
trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_hard_beyond);
|
||||
call->tlp_serial = 0;
|
||||
// TODO: Invoke congestion control to react to the loss
|
||||
// event the probe has repaired
|
||||
} else if (summary->tlp_probe_acked) {
|
||||
trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_acked);
|
||||
/* Special Case: Detected a single loss repaired by the loss
|
||||
* probe [7.4.2]
|
||||
*/
|
||||
call->tlp_serial = 0;
|
||||
} else {
|
||||
trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_incomplete);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle RACK timer expiration; returns true to request a resend.
|
||||
*/
|
||||
void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by)
|
||||
{
|
||||
struct rxrpc_ack_summary summary = {};
|
||||
enum rxrpc_rack_timer_mode mode = call->rack_timer_mode;
|
||||
|
||||
trace_rxrpc_rack_timer(call, overran_by, true);
|
||||
call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF;
|
||||
|
||||
switch (mode) {
|
||||
case RXRPC_CALL_RACKTIMER_RACK_REORDER:
|
||||
rxrpc_rack_detect_loss_and_arm_timer(call, &summary);
|
||||
break;
|
||||
case RXRPC_CALL_RACKTIMER_TLP_PTO:
|
||||
rxrpc_tlp_send_probe(call);
|
||||
break;
|
||||
case RXRPC_CALL_RACKTIMER_RTO:
|
||||
// Might need to poke the congestion algo in some way
|
||||
rxrpc_rack_mark_losses_on_rto(call);
|
||||
break;
|
||||
//case RXRPC_CALL_RACKTIMER_ZEROWIN:
|
||||
default:
|
||||
pr_warn("Unexpected rack timer %u", call->rack_timer_mode);
|
||||
}
|
||||
}
|
@ -470,6 +470,7 @@ int rxrpc_io_thread(void *data)
|
||||
spin_lock_irq(&local->rx_queue.lock);
|
||||
skb_queue_splice_tail_init(&local->rx_queue, &rx_queue);
|
||||
spin_unlock_irq(&local->rx_queue.lock);
|
||||
trace_rxrpc_iothread_rx(local, skb_queue_len(&rx_queue));
|
||||
}
|
||||
|
||||
/* Distribute packets and errors. */
|
||||
|
@ -542,12 +542,14 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se
|
||||
unsigned int xmit_ts;
|
||||
rxrpc_seq_t seq = req->seq;
|
||||
size_t len = 0;
|
||||
bool start_tlp = false;
|
||||
|
||||
trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit);
|
||||
|
||||
/* Each transmission of a Tx packet needs a new serial number */
|
||||
serial = rxrpc_get_next_serials(call->conn, req->n);
|
||||
|
||||
call->tx_last_serial = serial + req->n - 1;
|
||||
call->tx_last_sent = req->now;
|
||||
xmit_ts = rxrpc_prepare_txqueue(tq, req);
|
||||
prefetch(tq->next);
|
||||
@ -557,6 +559,18 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se
|
||||
struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK];
|
||||
|
||||
_debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq);
|
||||
|
||||
/* Record (re-)transmission for RACK [RFC8985 6.1]. */
|
||||
if (__test_and_clear_bit(ix, &tq->segment_lost))
|
||||
call->tx_nr_lost--;
|
||||
if (req->retrans) {
|
||||
__set_bit(ix, &tq->ever_retransmitted);
|
||||
__set_bit(ix, &tq->segment_retransmitted);
|
||||
call->tx_nr_resent++;
|
||||
} else {
|
||||
call->tx_nr_sent++;
|
||||
start_tlp = true;
|
||||
}
|
||||
tq->segment_xmit_ts[ix] = xmit_ts;
|
||||
tq->segment_serial[ix] = serial;
|
||||
if (i + 1 == req->n)
|
||||
@ -576,11 +590,24 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se
|
||||
}
|
||||
|
||||
/* Set timeouts */
|
||||
if (call->rtt_count > 1) {
|
||||
ktime_t delay = rxrpc_get_rto_backoff(call, false);
|
||||
if (req->tlp_probe) {
|
||||
/* Sending TLP loss probe [RFC8985 7.3]. */
|
||||
call->tlp_serial = serial - 1;
|
||||
call->tlp_seq = seq - 1;
|
||||
} else if (start_tlp) {
|
||||
/* Schedule TLP loss probe [RFC8985 7.2]. */
|
||||
ktime_t pto;
|
||||
|
||||
call->ack_lost_at = ktime_add(req->now, delay);
|
||||
trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack);
|
||||
if (!test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
|
||||
/* The first packet may take longer to elicit a response. */
|
||||
pto = NSEC_PER_SEC;
|
||||
else
|
||||
pto = rxrpc_tlp_calc_pto(call, req->now);
|
||||
|
||||
call->rack_timer_mode = RXRPC_CALL_RACKTIMER_TLP_PTO;
|
||||
call->rack_timo_at = ktime_add(req->now, pto);
|
||||
trace_rxrpc_rack_timer(call, pto, false);
|
||||
trace_rxrpc_timer_set(call, pto, rxrpc_timer_trace_rack_tlp_pto);
|
||||
}
|
||||
|
||||
if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) {
|
||||
@ -589,12 +616,6 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se
|
||||
call->expect_rx_by = ktime_add(req->now, delay);
|
||||
trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
|
||||
}
|
||||
if (call->resend_at == KTIME_MAX) {
|
||||
ktime_t delay = rxrpc_get_rto_backoff(call, false);
|
||||
|
||||
call->resend_at = ktime_add(req->now, delay);
|
||||
trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend);
|
||||
}
|
||||
|
||||
rxrpc_set_keepalive(call, req->now);
|
||||
return len;
|
||||
|
Loading…
Reference in New Issue
Block a user