mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-10 15:58:47 +00:00
16ad3f4022
We introduce a simple variable window congestion control for links. The algorithm is inspired by the Reno algorithm, covering both 'slow start', 'congestion avoidance', and 'fast recovery' modes. - We introduce hard lower and upper window limits per link, still different and configurable per bearer type. - We introduce a 'slow start theshold' variable, initially set to the maximum window size. - We let a link start at the minimum congestion window, i.e. in slow start mode, and then let is grow rapidly (+1 per rceived ACK) until it reaches the slow start threshold and enters congestion avoidance mode. - In congestion avoidance mode we increment the congestion window for each window-size number of acked packets, up to a possible maximum equal to the configured maximum window. - For each non-duplicate NACK received, we drop back to fast recovery mode, by setting the both the slow start threshold to and the congestion window to (current_congestion_window / 2). - If the timeout handler finds that the transmit queue has not moved since the previous timeout, it drops the link back to slow start and forces a probe containing the last sent sequence number to the sent to the peer, so that this can discover the stale situation. This change does in reality have effect only on unicast ethernet transport, as we have seen that there is no room whatsoever for increasing the window max size for the UDP bearer. For now, we also choose to keep the limits for the broadcast link unchanged and equal. This algorithm seems to give a 50-100% throughput improvement for messages larger than MTU. Suggested-by: Xin Long <lucien.xin@gmail.com> Acked-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
105 lines
3.7 KiB
C
105 lines
3.7 KiB
C
/*
|
|
* net/tipc/ib_media.c: Infiniband bearer support for TIPC
|
|
*
|
|
* Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
|
|
*
|
|
* Based on eth_media.c, which carries the following copyright notice:
|
|
*
|
|
* Copyright (c) 2001-2007, Ericsson AB
|
|
* Copyright (c) 2005-2008, 2011, Wind River Systems
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
* Software Foundation.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <linux/if_infiniband.h>
|
|
#include "core.h"
|
|
#include "bearer.h"
|
|
|
|
#define TIPC_MAX_IB_LINK_WIN 500
|
|
|
|
/* convert InfiniBand address (media address format) media address to string */
|
|
static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf,
|
|
int str_size)
|
|
{
|
|
if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */
|
|
return 1;
|
|
|
|
sprintf(str_buf, "%20phC", a->value);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Convert from media address format to discovery message addr format */
|
|
static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr)
|
|
{
|
|
memset(msg, 0, TIPC_MEDIA_INFO_SIZE);
|
|
memcpy(msg, addr->value, INFINIBAND_ALEN);
|
|
return 0;
|
|
}
|
|
|
|
/* Convert raw InfiniBand address format to media addr format */
|
|
static int tipc_ib_raw2addr(struct tipc_bearer *b,
|
|
struct tipc_media_addr *addr,
|
|
char *msg)
|
|
{
|
|
memset(addr, 0, sizeof(*addr));
|
|
memcpy(addr->value, msg, INFINIBAND_ALEN);
|
|
addr->media_id = TIPC_MEDIA_TYPE_IB;
|
|
addr->broadcast = !memcmp(msg, b->bcast_addr.value,
|
|
INFINIBAND_ALEN);
|
|
return 0;
|
|
}
|
|
|
|
/* Convert discovery msg addr format to InfiniBand media addr format */
|
|
static int tipc_ib_msg2addr(struct tipc_bearer *b,
|
|
struct tipc_media_addr *addr,
|
|
char *msg)
|
|
{
|
|
return tipc_ib_raw2addr(b, addr, msg);
|
|
}
|
|
|
|
/* InfiniBand media registration info */
|
|
struct tipc_media ib_media_info = {
|
|
.send_msg = tipc_l2_send_msg,
|
|
.enable_media = tipc_enable_l2_media,
|
|
.disable_media = tipc_disable_l2_media,
|
|
.addr2str = tipc_ib_addr2str,
|
|
.addr2msg = tipc_ib_addr2msg,
|
|
.msg2addr = tipc_ib_msg2addr,
|
|
.raw2addr = tipc_ib_raw2addr,
|
|
.priority = TIPC_DEF_LINK_PRI,
|
|
.tolerance = TIPC_DEF_LINK_TOL,
|
|
.min_win = TIPC_DEF_LINK_WIN,
|
|
.max_win = TIPC_MAX_IB_LINK_WIN,
|
|
.type_id = TIPC_MEDIA_TYPE_IB,
|
|
.hwaddr_len = INFINIBAND_ALEN,
|
|
.name = "ib"
|
|
};
|