fs: dlm: implement delayed ack handling

This patch changes that we don't ack each message. Lowcomms will take
care about to send an ack back after a bulk of messages was processed.
Currently it's only when the whole receive buffer was processed, there
might better positions to send an ack back but only the lowcomms
implementation know when there are more data to receive. This patch has
also disadvantages that we might retransmit more on errors, however this
is a very rare case.

Tested with make_panic on gfs2 with three nodes by running:

trace-cmd record -p function -l 'dlm_send_ack' sleep 100

and

trace-cmd report | wc -l

Before patch:
- 20548
- 21376
- 21398

After patch:
- 18338
- 20679
- 19949

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
This commit is contained in:
Alexander Aring 2021-08-18 16:27:13 -04:00 committed by David Teigland
parent 62699b3f0a
commit b97f85259f
3 changed files with 50 additions and 8 deletions

View File

@ -947,6 +947,7 @@ static int receive_from_sock(struct connection *con)
} }
} }
dlm_midcomms_receive_done(con->nodeid);
mutex_unlock(&con->sock_mutex); mutex_unlock(&con->sock_mutex);
return 0; return 0;

View File

@ -46,6 +46,7 @@ int dlm_lowcomms_resend_msg(struct dlm_msg *msg);
int dlm_lowcomms_connect_node(int nodeid); int dlm_lowcomms_connect_node(int nodeid);
int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark); int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark);
int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len); int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
void dlm_midcomms_receive_done(int nodeid);
#endif /* __LOWCOMMS_DOT_H__ */ #endif /* __LOWCOMMS_DOT_H__ */

View File

@ -109,12 +109,6 @@
* compatibility. There exists better ways to make a better handling. * compatibility. There exists better ways to make a better handling.
* However this should be changed in the next major version bump of dlm. * However this should be changed in the next major version bump of dlm.
* *
* Ack handling:
*
* Currently we send an ack message for every dlm message. However we
* can ack multiple dlm messages with one ack by just delaying the ack
* message. Will reduce some traffic but makes the drop detection slower.
*
* Tail Size checking: * Tail Size checking:
* *
* There exists a message tail payload in e.g. DLM_MSG however we don't * There exists a message tail payload in e.g. DLM_MSG however we don't
@ -169,6 +163,7 @@ struct midcomms_node {
#define DLM_NODE_FLAG_CLOSE 1 #define DLM_NODE_FLAG_CLOSE 1
#define DLM_NODE_FLAG_STOP_TX 2 #define DLM_NODE_FLAG_STOP_TX 2
#define DLM_NODE_FLAG_STOP_RX 3 #define DLM_NODE_FLAG_STOP_RX 3
#define DLM_NODE_ULP_DELIVERED 4
unsigned long flags; unsigned long flags;
wait_queue_head_t shutdown_wait; wait_queue_head_t shutdown_wait;
@ -480,11 +475,12 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
{ {
if (seq == node->seq_next) { if (seq == node->seq_next) {
node->seq_next++; node->seq_next++;
/* send ack before fin */
dlm_send_ack(node->nodeid, node->seq_next);
switch (p->header.h_cmd) { switch (p->header.h_cmd) {
case DLM_FIN: case DLM_FIN:
/* send ack before fin */
dlm_send_ack(node->nodeid, node->seq_next);
spin_lock(&node->state_lock); spin_lock(&node->state_lock);
pr_debug("receive fin msg from node %d with state %s\n", pr_debug("receive fin msg from node %d with state %s\n",
node->nodeid, dlm_state_str(node->state)); node->nodeid, dlm_state_str(node->state));
@ -534,6 +530,7 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
default: default:
WARN_ON(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags)); WARN_ON(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags));
dlm_receive_buffer(p, node->nodeid); dlm_receive_buffer(p, node->nodeid);
set_bit(DLM_NODE_ULP_DELIVERED, &node->flags);
break; break;
} }
} else { } else {
@ -933,6 +930,49 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
return ret; return ret;
} }
void dlm_midcomms_receive_done(int nodeid)
{
struct midcomms_node *node;
int idx;
idx = srcu_read_lock(&nodes_srcu);
node = nodeid2node(nodeid, 0);
if (!node) {
srcu_read_unlock(&nodes_srcu, idx);
return;
}
/* old protocol, we do nothing */
switch (node->version) {
case DLM_VERSION_3_2:
break;
default:
srcu_read_unlock(&nodes_srcu, idx);
return;
}
/* do nothing if we didn't delivered stateful to ulp */
if (!test_and_clear_bit(DLM_NODE_ULP_DELIVERED,
&node->flags)) {
srcu_read_unlock(&nodes_srcu, idx);
return;
}
spin_lock(&node->state_lock);
/* we only ack if state is ESTABLISHED */
switch (node->state) {
case DLM_ESTABLISHED:
spin_unlock(&node->state_lock);
dlm_send_ack(node->nodeid, node->seq_next);
break;
default:
spin_unlock(&node->state_lock);
/* do nothing FIN has it's own ack send */
break;
};
srcu_read_unlock(&nodes_srcu, idx);
}
void dlm_midcomms_unack_msg_resend(int nodeid) void dlm_midcomms_unack_msg_resend(int nodeid)
{ {
struct midcomms_node *node; struct midcomms_node *node;