ipv4: Add a sysctl to control multipath hash fields

A subsequent patch will add a new multipath hash policy where the packet
fields used for multipath hash calculation are determined by user space.
This patch adds a sysctl that allows user space to set these fields.

The packet fields are represented using a bitmask and are common between
IPv4 and IPv6 to allow user space to use the same numbering across both
protocols. For example, to hash based on standard 5-tuple:

 # sysctl -w net.ipv4.fib_multipath_hash_fields=0x0037
 net.ipv4.fib_multipath_hash_fields = 0x0037

The kernel rejects unknown fields, for example:

 # sysctl -w net.ipv4.fib_multipath_hash_fields=0x1000
 sysctl: setting key "net.ipv4.fib_multipath_hash_fields": Invalid argument

More fields can be added in the future, if needed.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Ido Schimmel 2021-05-17 21:15:18 +03:00 committed by David S. Miller
parent 2e68ea9268
commit ce5c9c20d3
5 changed files with 89 additions and 0 deletions

View File

@ -100,6 +100,33 @@ fib_multipath_hash_policy - INTEGER
- 1 - Layer 4
- 2 - Layer 3 or inner Layer 3 if present
fib_multipath_hash_fields - UNSIGNED INTEGER
When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
fields used for multipath hash calculation are determined by this
sysctl.
This value is a bitmask which enables various fields for multipath hash
calculation.
Possible fields are:
====== ============================
0x0001 Source IP address
0x0002 Destination IP address
0x0004 IP protocol
0x0008 Unused (Flow Label)
0x0010 Source port
0x0020 Destination port
0x0040 Inner source IP address
0x0080 Inner destination IP address
0x0100 Inner IP protocol
0x0200 Inner Flow Label
0x0400 Inner source port
0x0800 Inner destination port
====== ============================
Default: 0x0007 (source IP, destination IP and IP protocol)
fib_sync_mem - UNSIGNED INTEGER
Amount of dirty memory from fib entries that can be backlogged before
synchronize_rcu is forced.

View File

@ -466,6 +466,49 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags);
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
/* Fields used for sysctl_fib_multipath_hash_fields.
* Common to IPv4 and IPv6.
*
* Add new fields at the end. This is user API.
*/
#define FIB_MULTIPATH_HASH_FIELD_SRC_IP BIT(0)
#define FIB_MULTIPATH_HASH_FIELD_DST_IP BIT(1)
#define FIB_MULTIPATH_HASH_FIELD_IP_PROTO BIT(2)
#define FIB_MULTIPATH_HASH_FIELD_FLOWLABEL BIT(3)
#define FIB_MULTIPATH_HASH_FIELD_SRC_PORT BIT(4)
#define FIB_MULTIPATH_HASH_FIELD_DST_PORT BIT(5)
#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP BIT(6)
#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP BIT(7)
#define FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO BIT(8)
#define FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL BIT(9)
#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT BIT(10)
#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT BIT(11)
#define FIB_MULTIPATH_HASH_FIELD_OUTER_MASK \
(FIB_MULTIPATH_HASH_FIELD_SRC_IP | \
FIB_MULTIPATH_HASH_FIELD_DST_IP | \
FIB_MULTIPATH_HASH_FIELD_IP_PROTO | \
FIB_MULTIPATH_HASH_FIELD_FLOWLABEL | \
FIB_MULTIPATH_HASH_FIELD_SRC_PORT | \
FIB_MULTIPATH_HASH_FIELD_DST_PORT)
#define FIB_MULTIPATH_HASH_FIELD_INNER_MASK \
(FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP | \
FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP | \
FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO | \
FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL | \
FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT | \
FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
#define FIB_MULTIPATH_HASH_FIELD_ALL_MASK \
(FIB_MULTIPATH_HASH_FIELD_OUTER_MASK | \
FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
#define FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK \
(FIB_MULTIPATH_HASH_FIELD_SRC_IP | \
FIB_MULTIPATH_HASH_FIELD_DST_IP | \
FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys);

View File

@ -210,6 +210,7 @@ struct netns_ipv4 {
#endif
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
u32 sysctl_fib_multipath_hash_fields;
u8 sysctl_fib_multipath_use_neigh;
u8 sysctl_fib_multipath_hash_policy;
#endif

View File

@ -1514,6 +1514,12 @@ static int __net_init ip_fib_net_init(struct net *net)
if (err)
return err;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
/* Default to 3-tuple */
net->ipv4.sysctl_fib_multipath_hash_fields =
FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
#endif
/* Avoid false sharing : Use at least a full cache line */
size = max_t(size_t, size, L1_CACHE_BYTES);

View File

@ -19,6 +19,7 @@
#include <net/snmp.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/ip_fib.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/udp.h>
@ -48,6 +49,8 @@ static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
static u32 u32_max_div_HZ = UINT_MAX / HZ;
static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@ -1052,6 +1055,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
.procname = "fib_multipath_hash_fields",
.data = &init_net.ipv4.sysctl_fib_multipath_hash_fields,
.maxlen = sizeof(u32),
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
.extra1 = SYSCTL_ONE,
.extra2 = &fib_multipath_hash_fields_all_mask,
},
#endif
{
.procname = "ip_unprivileged_port_start",