mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
ipv6: Define dscp_t and stop taking ECN bits into account in fib6-rules
Define a dscp_t type and its appropriate helpers that ensure ECN bits are not taken into account when handling DSCP. Use this new type to replace the tclass field of struct fib6_rule, so that fib6-rules don't get influenced by ECN bits anymore. Before this patch, fib6-rules didn't make any distinction between the DSCP and ECN bits. Therefore, rules specifying a DSCP (tos or dsfield options in iproute2) stopped working as soon a packets had at least one of its ECN bits set (as a work around one could create four rules for each DSCP value to match, one for each possible ECN value). After this patch fib6-rules only compare the DSCP bits. ECN doesn't influence the result anymore. Also, fib6-rules now must have the ECN bits cleared or they will be rejected. Signed-off-by: Guillaume Nault <gnault@redhat.com> Acked-by: David Ahern <dsahern@kernel.org> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
642436a1ad
commit
a410a0cf98
57
include/net/inet_dscp.h
Normal file
57
include/net/inet_dscp.h
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||||
|
/*
|
||||||
|
* inet_dscp.h: helpers for handling differentiated services codepoints (DSCP)
|
||||||
|
*
|
||||||
|
* DSCP is defined in RFC 2474:
|
||||||
|
*
|
||||||
|
* 0 1 2 3 4 5 6 7
|
||||||
|
* +---+---+---+---+---+---+---+---+
|
||||||
|
* | DSCP | CU |
|
||||||
|
* +---+---+---+---+---+---+---+---+
|
||||||
|
*
|
||||||
|
* DSCP: differentiated services codepoint
|
||||||
|
* CU: currently unused
|
||||||
|
*
|
||||||
|
* The whole DSCP + CU bits form the DS field.
|
||||||
|
* The DS field is also commonly called TOS or Traffic Class (for IPv6).
|
||||||
|
*
|
||||||
|
* Note: the CU bits are now used for Explicit Congestion Notification
|
||||||
|
* (RFC 3168).
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _INET_DSCP_H
|
||||||
|
#define _INET_DSCP_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
/* Special type for storing DSCP values.
|
||||||
|
*
|
||||||
|
* A dscp_t variable stores a DS field with the CU (ECN) bits cleared.
|
||||||
|
* Using dscp_t allows to strictly separate DSCP and ECN bits, thus avoiding
|
||||||
|
* bugs where ECN bits are erroneously taken into account during FIB lookups
|
||||||
|
* or policy routing.
|
||||||
|
*
|
||||||
|
* Note: to get the real DSCP value contained in a dscp_t variable one would
|
||||||
|
* have to do a bit shift after calling inet_dscp_to_dsfield(). We could have
|
||||||
|
* a helper for that, but there's currently no users.
|
||||||
|
*/
|
||||||
|
typedef u8 __bitwise dscp_t;
|
||||||
|
|
||||||
|
#define INET_DSCP_MASK 0xfc
|
||||||
|
|
||||||
|
static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield)
|
||||||
|
{
|
||||||
|
return (__force dscp_t)(dsfield & INET_DSCP_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __u8 inet_dscp_to_dsfield(dscp_t dscp)
|
||||||
|
{
|
||||||
|
return (__force __u8)dscp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool inet_validate_dscp(__u8 val)
|
||||||
|
{
|
||||||
|
return !(val & ~INET_DSCP_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _INET_DSCP_H */
|
@ -17,6 +17,7 @@
|
|||||||
#include <net/if_inet6.h>
|
#include <net/if_inet6.h>
|
||||||
#include <net/flow.h>
|
#include <net/flow.h>
|
||||||
#include <net/flow_dissector.h>
|
#include <net/flow_dissector.h>
|
||||||
|
#include <net/inet_dscp.h>
|
||||||
#include <net/snmp.h>
|
#include <net/snmp.h>
|
||||||
#include <net/netns/hash.h>
|
#include <net/netns/hash.h>
|
||||||
|
|
||||||
@ -974,6 +975,11 @@ static inline u8 ip6_tclass(__be32 flowinfo)
|
|||||||
return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT;
|
return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline dscp_t ip6_dscp(__be32 flowinfo)
|
||||||
|
{
|
||||||
|
return inet_dsfield_to_dscp(ip6_tclass(flowinfo));
|
||||||
|
}
|
||||||
|
|
||||||
static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
|
static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
|
||||||
{
|
{
|
||||||
return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
|
return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include <linux/indirect_call_wrapper.h>
|
#include <linux/indirect_call_wrapper.h>
|
||||||
|
|
||||||
#include <net/fib_rules.h>
|
#include <net/fib_rules.h>
|
||||||
|
#include <net/inet_dscp.h>
|
||||||
#include <net/ipv6.h>
|
#include <net/ipv6.h>
|
||||||
#include <net/addrconf.h>
|
#include <net/addrconf.h>
|
||||||
#include <net/ip6_route.h>
|
#include <net/ip6_route.h>
|
||||||
@ -25,14 +26,14 @@ struct fib6_rule {
|
|||||||
struct fib_rule common;
|
struct fib_rule common;
|
||||||
struct rt6key src;
|
struct rt6key src;
|
||||||
struct rt6key dst;
|
struct rt6key dst;
|
||||||
u8 tclass;
|
dscp_t dscp;
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool fib6_rule_matchall(const struct fib_rule *rule)
|
static bool fib6_rule_matchall(const struct fib_rule *rule)
|
||||||
{
|
{
|
||||||
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
|
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
|
||||||
|
|
||||||
if (r->dst.plen || r->src.plen || r->tclass)
|
if (r->dst.plen || r->src.plen || r->dscp)
|
||||||
return false;
|
return false;
|
||||||
return fib_rule_matchall(rule);
|
return fib_rule_matchall(rule);
|
||||||
}
|
}
|
||||||
@ -323,7 +324,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
|
if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
|
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
|
||||||
@ -349,6 +350,13 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
|
|||||||
struct net *net = sock_net(skb->sk);
|
struct net *net = sock_net(skb->sk);
|
||||||
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
|
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
|
||||||
|
|
||||||
|
if (!inet_validate_dscp(frh->tos)) {
|
||||||
|
NL_SET_ERR_MSG(extack,
|
||||||
|
"Invalid dsfield (tos): ECN bits must be 0");
|
||||||
|
goto errout;
|
||||||
|
}
|
||||||
|
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
|
||||||
|
|
||||||
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
|
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
|
||||||
if (rule->table == RT6_TABLE_UNSPEC) {
|
if (rule->table == RT6_TABLE_UNSPEC) {
|
||||||
NL_SET_ERR_MSG(extack, "Invalid table");
|
NL_SET_ERR_MSG(extack, "Invalid table");
|
||||||
@ -369,7 +377,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
|
|||||||
|
|
||||||
rule6->src.plen = frh->src_len;
|
rule6->src.plen = frh->src_len;
|
||||||
rule6->dst.plen = frh->dst_len;
|
rule6->dst.plen = frh->dst_len;
|
||||||
rule6->tclass = frh->tos;
|
|
||||||
|
|
||||||
if (fib_rule_requires_fldissect(rule))
|
if (fib_rule_requires_fldissect(rule))
|
||||||
net->ipv6.fib6_rules_require_fldissect++;
|
net->ipv6.fib6_rules_require_fldissect++;
|
||||||
@ -402,7 +409,7 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
|
|||||||
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
|
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (frh->tos && (rule6->tclass != frh->tos))
|
if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (frh->src_len &&
|
if (frh->src_len &&
|
||||||
@ -423,7 +430,7 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
|
|||||||
|
|
||||||
frh->dst_len = rule6->dst.plen;
|
frh->dst_len = rule6->dst.plen;
|
||||||
frh->src_len = rule6->src.plen;
|
frh->src_len = rule6->src.plen;
|
||||||
frh->tos = rule6->tclass;
|
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
|
||||||
|
|
||||||
if ((rule6->dst.plen &&
|
if ((rule6->dst.plen &&
|
||||||
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
|
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
|
||||||
|
@ -114,10 +114,25 @@ fib_rule6_test_match_n_redirect()
|
|||||||
log_test $? 0 "rule6 del by pref: $description"
|
log_test $? 0 "rule6 del by pref: $description"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fib_rule6_test_reject()
|
||||||
|
{
|
||||||
|
local match="$1"
|
||||||
|
local rc
|
||||||
|
|
||||||
|
$IP -6 rule add $match table $RTABLE 2>/dev/null
|
||||||
|
rc=$?
|
||||||
|
log_test $rc 2 "rule6 check: $match"
|
||||||
|
|
||||||
|
if [ $rc -eq 0 ]; then
|
||||||
|
$IP -6 rule del $match table $RTABLE
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
fib_rule6_test()
|
fib_rule6_test()
|
||||||
{
|
{
|
||||||
local getmatch
|
local getmatch
|
||||||
local match
|
local match
|
||||||
|
local cnt
|
||||||
|
|
||||||
# setup the fib rule redirect route
|
# setup the fib rule redirect route
|
||||||
$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
|
$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
|
||||||
@ -128,8 +143,21 @@ fib_rule6_test()
|
|||||||
match="from $SRC_IP6 iif $DEV"
|
match="from $SRC_IP6 iif $DEV"
|
||||||
fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
|
fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
|
||||||
|
|
||||||
|
# Reject dsfield (tos) options which have ECN bits set
|
||||||
|
for cnt in $(seq 1 3); do
|
||||||
|
match="dsfield $cnt"
|
||||||
|
fib_rule6_test_reject "$match"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Don't take ECN bits into account when matching on dsfield
|
||||||
match="tos 0x10"
|
match="tos 0x10"
|
||||||
fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
|
for cnt in "0x10" "0x11" "0x12" "0x13"; do
|
||||||
|
# Using option 'tos' instead of 'dsfield' as old iproute2
|
||||||
|
# versions don't support 'dsfield' in ip rule show.
|
||||||
|
getmatch="tos $cnt"
|
||||||
|
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
|
||||||
|
"$getmatch redirect to table"
|
||||||
|
done
|
||||||
|
|
||||||
match="fwmark 0x64"
|
match="fwmark 0x64"
|
||||||
getmatch="mark 0x64"
|
getmatch="mark 0x64"
|
||||||
|
Loading…
Reference in New Issue
Block a user