linux-stable/include/net/flow_offload.h

746 lines
20 KiB
C
Raw Permalink Normal View History

#ifndef _NET_FLOW_OFFLOAD_H
#define _NET_FLOW_OFFLOAD_H
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/netlink.h>
#include <net/flow_dissector.h>
struct flow_match {
struct flow_dissector *dissector;
void *mask;
void *key;
};
struct flow_match_meta {
struct flow_dissector_key_meta *key, *mask;
};
struct flow_match_basic {
struct flow_dissector_key_basic *key, *mask;
};
struct flow_match_control {
struct flow_dissector_key_control *key, *mask;
};
struct flow_match_eth_addrs {
struct flow_dissector_key_eth_addrs *key, *mask;
};
struct flow_match_vlan {
struct flow_dissector_key_vlan *key, *mask;
};
struct flow_match_arp {
struct flow_dissector_key_arp *key, *mask;
};
struct flow_match_ipv4_addrs {
struct flow_dissector_key_ipv4_addrs *key, *mask;
};
struct flow_match_ipv6_addrs {
struct flow_dissector_key_ipv6_addrs *key, *mask;
};
struct flow_match_ip {
struct flow_dissector_key_ip *key, *mask;
};
struct flow_match_ports {
struct flow_dissector_key_ports *key, *mask;
};
struct flow_match_ports_range {
struct flow_dissector_key_ports_range *key, *mask;
};
struct flow_match_icmp {
struct flow_dissector_key_icmp *key, *mask;
};
struct flow_match_tcp {
struct flow_dissector_key_tcp *key, *mask;
};
struct flow_match_ipsec {
struct flow_dissector_key_ipsec *key, *mask;
};
struct flow_match_mpls {
struct flow_dissector_key_mpls *key, *mask;
};
struct flow_match_enc_keyid {
struct flow_dissector_key_keyid *key, *mask;
};
struct flow_match_enc_opts {
struct flow_dissector_key_enc_opts *key, *mask;
};
struct flow_match_ct {
struct flow_dissector_key_ct *key, *mask;
};
struct flow_match_pppoe {
struct flow_dissector_key_pppoe *key, *mask;
};
struct flow_match_l2tpv3 {
struct flow_dissector_key_l2tpv3 *key, *mask;
};
struct flow_rule;
void flow_rule_match_meta(const struct flow_rule *rule,
struct flow_match_meta *out);
void flow_rule_match_basic(const struct flow_rule *rule,
struct flow_match_basic *out);
void flow_rule_match_control(const struct flow_rule *rule,
struct flow_match_control *out);
void flow_rule_match_eth_addrs(const struct flow_rule *rule,
struct flow_match_eth_addrs *out);
void flow_rule_match_vlan(const struct flow_rule *rule,
struct flow_match_vlan *out);
void flow_rule_match_cvlan(const struct flow_rule *rule,
struct flow_match_vlan *out);
void flow_rule_match_arp(const struct flow_rule *rule,
struct flow_match_arp *out);
void flow_rule_match_ipv4_addrs(const struct flow_rule *rule,
struct flow_match_ipv4_addrs *out);
void flow_rule_match_ipv6_addrs(const struct flow_rule *rule,
struct flow_match_ipv6_addrs *out);
void flow_rule_match_ip(const struct flow_rule *rule,
struct flow_match_ip *out);
void flow_rule_match_ports(const struct flow_rule *rule,
struct flow_match_ports *out);
void flow_rule_match_ports_range(const struct flow_rule *rule,
struct flow_match_ports_range *out);
void flow_rule_match_tcp(const struct flow_rule *rule,
struct flow_match_tcp *out);
void flow_rule_match_ipsec(const struct flow_rule *rule,
struct flow_match_ipsec *out);
void flow_rule_match_icmp(const struct flow_rule *rule,
struct flow_match_icmp *out);
void flow_rule_match_mpls(const struct flow_rule *rule,
struct flow_match_mpls *out);
void flow_rule_match_enc_control(const struct flow_rule *rule,
struct flow_match_control *out);
void flow_rule_match_enc_ipv4_addrs(const struct flow_rule *rule,
struct flow_match_ipv4_addrs *out);
void flow_rule_match_enc_ipv6_addrs(const struct flow_rule *rule,
struct flow_match_ipv6_addrs *out);
void flow_rule_match_enc_ip(const struct flow_rule *rule,
struct flow_match_ip *out);
void flow_rule_match_enc_ports(const struct flow_rule *rule,
struct flow_match_ports *out);
void flow_rule_match_enc_keyid(const struct flow_rule *rule,
struct flow_match_enc_keyid *out);
void flow_rule_match_enc_opts(const struct flow_rule *rule,
struct flow_match_enc_opts *out);
void flow_rule_match_ct(const struct flow_rule *rule,
struct flow_match_ct *out);
void flow_rule_match_pppoe(const struct flow_rule *rule,
struct flow_match_pppoe *out);
void flow_rule_match_l2tpv3(const struct flow_rule *rule,
struct flow_match_l2tpv3 *out);
enum flow_action_id {
FLOW_ACTION_ACCEPT = 0,
FLOW_ACTION_DROP,
FLOW_ACTION_TRAP,
FLOW_ACTION_GOTO,
FLOW_ACTION_REDIRECT,
FLOW_ACTION_MIRRED,
FLOW_ACTION_REDIRECT_INGRESS,
FLOW_ACTION_MIRRED_INGRESS,
FLOW_ACTION_VLAN_PUSH,
FLOW_ACTION_VLAN_POP,
FLOW_ACTION_VLAN_MANGLE,
FLOW_ACTION_TUNNEL_ENCAP,
FLOW_ACTION_TUNNEL_DECAP,
FLOW_ACTION_MANGLE,
FLOW_ACTION_ADD,
FLOW_ACTION_CSUM,
FLOW_ACTION_MARK,
FLOW_ACTION_PTYPE,
FLOW_ACTION_PRIORITY,
FLOW_ACTION_RX_QUEUE_MAPPING,
FLOW_ACTION_WAKE,
FLOW_ACTION_QUEUE,
FLOW_ACTION_SAMPLE,
FLOW_ACTION_POLICE,
net/sched: Introduce action ct Allow sending a packet to conntrack module for connection tracking. The packet will be marked with conntrack connection's state, and any metadata such as conntrack mark and label. This state metadata can later be matched against with tc classifers, for example with the flower classifier as below. In addition to committing new connections the user can optionally specific a zone to track within, set a mark/label and configure nat with an address range and port range. Usage is as follows: $ tc qdisc add dev ens1f0_0 ingress $ tc qdisc add dev ens1f0_1 ingress $ tc filter add dev ens1f0_0 ingress \ prio 1 chain 0 proto ip \ flower ip_proto tcp ct_state -trk \ action ct zone 2 pipe \ action goto chain 2 $ tc filter add dev ens1f0_0 ingress \ prio 1 chain 2 proto ip \ flower ct_state +trk+new \ action ct zone 2 commit mark 0xbb nat src addr 5.5.5.7 pipe \ action mirred egress redirect dev ens1f0_1 $ tc filter add dev ens1f0_0 ingress \ prio 1 chain 2 proto ip \ flower ct_zone 2 ct_mark 0xbb ct_state +trk+est \ action ct nat pipe \ action mirred egress redirect dev ens1f0_1 $ tc filter add dev ens1f0_1 ingress \ prio 1 chain 0 proto ip \ flower ip_proto tcp ct_state -trk \ action ct zone 2 pipe \ action goto chain 1 $ tc filter add dev ens1f0_1 ingress \ prio 1 chain 1 proto ip \ flower ct_zone 2 ct_mark 0xbb ct_state +trk+est \ action ct nat pipe \ action mirred egress redirect dev ens1f0_0 Signed-off-by: Paul Blakey <paulb@mellanox.com> Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: Yossi Kuperman <yossiku@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Changelog: V5->V6: Added CONFIG_NF_DEFRAG_IPV6 in handle fragments ipv6 case V4->V5: Reordered nf_conntrack_put() in tcf_ct_skb_nfct_cached() V3->V4: Added strict_start_type for act_ct policy V2->V3: Fixed david's comments: Removed extra newline after rcu in tcf_ct_params , and indent of break in act_ct.c V1->V2: Fixed parsing of ranges TCA_CT_NAT_IPV6_MAX as 'else' case overwritten ipv4 max Refactored NAT_PORT_MIN_MAX range handling as well Added ipv4/ipv6 defragmentation Removed extra skb pull push of nw offset in exectute nat Refactored tcf_ct_skb_network_trim after pull Removed TCA_ACT_CT define Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-09 07:30:48 +00:00
FLOW_ACTION_CT,
FLOW_ACTION_CT_METADATA,
FLOW_ACTION_MPLS_PUSH,
FLOW_ACTION_MPLS_POP,
FLOW_ACTION_MPLS_MANGLE,
FLOW_ACTION_GATE,
FLOW_ACTION_PPPOE_PUSH,
FLOW_ACTION_JUMP,
FLOW_ACTION_PIPE,
FLOW_ACTION_VLAN_PUSH_ETH,
FLOW_ACTION_VLAN_POP_ETH,
FLOW_ACTION_CONTINUE,
NUM_FLOW_ACTIONS,
};
/* This is mirroring enum pedit_header_type definition for easy mapping between
* tc pedit action. Legacy TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK is mapped to
* FLOW_ACT_MANGLE_UNSPEC, which is supported by no driver.
*/
enum flow_action_mangle_base {
FLOW_ACT_MANGLE_UNSPEC = 0,
FLOW_ACT_MANGLE_HDR_TYPE_ETH,
FLOW_ACT_MANGLE_HDR_TYPE_IP4,
FLOW_ACT_MANGLE_HDR_TYPE_IP6,
FLOW_ACT_MANGLE_HDR_TYPE_TCP,
FLOW_ACT_MANGLE_HDR_TYPE_UDP,
};
enum flow_action_hw_stats_bit {
FLOW_ACTION_HW_STATS_IMMEDIATE_BIT,
FLOW_ACTION_HW_STATS_DELAYED_BIT,
FLOW_ACTION_HW_STATS_DISABLED_BIT,
FLOW_ACTION_HW_STATS_NUM_BITS
};
enum flow_action_hw_stats {
FLOW_ACTION_HW_STATS_IMMEDIATE =
BIT(FLOW_ACTION_HW_STATS_IMMEDIATE_BIT),
FLOW_ACTION_HW_STATS_DELAYED = BIT(FLOW_ACTION_HW_STATS_DELAYED_BIT),
FLOW_ACTION_HW_STATS_ANY = FLOW_ACTION_HW_STATS_IMMEDIATE |
FLOW_ACTION_HW_STATS_DELAYED,
FLOW_ACTION_HW_STATS_DISABLED =
BIT(FLOW_ACTION_HW_STATS_DISABLED_BIT),
FLOW_ACTION_HW_STATS_DONT_CARE = BIT(FLOW_ACTION_HW_STATS_NUM_BITS) - 1,
};
typedef void (*action_destr)(void *priv);
struct flow_action_cookie {
u32 cookie_len;
u8 cookie[];
};
struct flow_action_cookie *flow_action_cookie_create(void *data,
unsigned int len,
gfp_t gfp);
void flow_action_cookie_destroy(struct flow_action_cookie *cookie);
struct flow_action_entry {
enum flow_action_id id;
u32 hw_index;
unsigned long cookie;
u64 miss_cookie;
enum flow_action_hw_stats hw_stats;
action_destr destructor;
void *destructor_priv;
union {
u32 chain_index; /* FLOW_ACTION_GOTO */
struct net_device *dev; /* FLOW_ACTION_REDIRECT */
struct { /* FLOW_ACTION_VLAN */
u16 vid;
__be16 proto;
u8 prio;
} vlan;
struct { /* FLOW_ACTION_VLAN_PUSH_ETH */
unsigned char dst[ETH_ALEN];
unsigned char src[ETH_ALEN];
} vlan_push_eth;
struct { /* FLOW_ACTION_MANGLE */
/* FLOW_ACTION_ADD */
enum flow_action_mangle_base htype;
u32 offset;
u32 mask;
u32 val;
} mangle;
struct ip_tunnel_info *tunnel; /* FLOW_ACTION_TUNNEL_ENCAP */
u32 csum_flags; /* FLOW_ACTION_CSUM */
u32 mark; /* FLOW_ACTION_MARK */
u16 ptype; /* FLOW_ACTION_PTYPE */
u16 rx_queue; /* FLOW_ACTION_RX_QUEUE_MAPPING */
u32 priority; /* FLOW_ACTION_PRIORITY */
struct { /* FLOW_ACTION_QUEUE */
u32 ctx;
u32 index;
u8 vf;
} queue;
struct { /* FLOW_ACTION_SAMPLE */
struct psample_group *psample_group;
u32 rate;
u32 trunc_size;
bool truncate;
} sample;
struct { /* FLOW_ACTION_POLICE */
u32 burst;
u64 rate_bytes_ps;
u64 peakrate_bytes_ps;
u32 avrate;
u16 overhead;
u64 burst_pkt;
u64 rate_pkt_ps;
u32 mtu;
struct {
enum flow_action_id act_id;
u32 extval;
} exceed, notexceed;
} police;
net/sched: Introduce action ct Allow sending a packet to conntrack module for connection tracking. The packet will be marked with conntrack connection's state, and any metadata such as conntrack mark and label. This state metadata can later be matched against with tc classifers, for example with the flower classifier as below. In addition to committing new connections the user can optionally specific a zone to track within, set a mark/label and configure nat with an address range and port range. Usage is as follows: $ tc qdisc add dev ens1f0_0 ingress $ tc qdisc add dev ens1f0_1 ingress $ tc filter add dev ens1f0_0 ingress \ prio 1 chain 0 proto ip \ flower ip_proto tcp ct_state -trk \ action ct zone 2 pipe \ action goto chain 2 $ tc filter add dev ens1f0_0 ingress \ prio 1 chain 2 proto ip \ flower ct_state +trk+new \ action ct zone 2 commit mark 0xbb nat src addr 5.5.5.7 pipe \ action mirred egress redirect dev ens1f0_1 $ tc filter add dev ens1f0_0 ingress \ prio 1 chain 2 proto ip \ flower ct_zone 2 ct_mark 0xbb ct_state +trk+est \ action ct nat pipe \ action mirred egress redirect dev ens1f0_1 $ tc filter add dev ens1f0_1 ingress \ prio 1 chain 0 proto ip \ flower ip_proto tcp ct_state -trk \ action ct zone 2 pipe \ action goto chain 1 $ tc filter add dev ens1f0_1 ingress \ prio 1 chain 1 proto ip \ flower ct_zone 2 ct_mark 0xbb ct_state +trk+est \ action ct nat pipe \ action mirred egress redirect dev ens1f0_0 Signed-off-by: Paul Blakey <paulb@mellanox.com> Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: Yossi Kuperman <yossiku@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Changelog: V5->V6: Added CONFIG_NF_DEFRAG_IPV6 in handle fragments ipv6 case V4->V5: Reordered nf_conntrack_put() in tcf_ct_skb_nfct_cached() V3->V4: Added strict_start_type for act_ct policy V2->V3: Fixed david's comments: Removed extra newline after rcu in tcf_ct_params , and indent of break in act_ct.c V1->V2: Fixed parsing of ranges TCA_CT_NAT_IPV6_MAX as 'else' case overwritten ipv4 max Refactored NAT_PORT_MIN_MAX range handling as well Added ipv4/ipv6 defragmentation Removed extra skb pull push of nw offset in exectute nat Refactored tcf_ct_skb_network_trim after pull Removed TCA_ACT_CT define Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-09 07:30:48 +00:00
struct { /* FLOW_ACTION_CT */
int action;
u16 zone;
struct nf_flowtable *flow_table;
net/sched: Introduce action ct Allow sending a packet to conntrack module for connection tracking. The packet will be marked with conntrack connection's state, and any metadata such as conntrack mark and label. This state metadata can later be matched against with tc classifers, for example with the flower classifier as below. In addition to committing new connections the user can optionally specific a zone to track within, set a mark/label and configure nat with an address range and port range. Usage is as follows: $ tc qdisc add dev ens1f0_0 ingress $ tc qdisc add dev ens1f0_1 ingress $ tc filter add dev ens1f0_0 ingress \ prio 1 chain 0 proto ip \ flower ip_proto tcp ct_state -trk \ action ct zone 2 pipe \ action goto chain 2 $ tc filter add dev ens1f0_0 ingress \ prio 1 chain 2 proto ip \ flower ct_state +trk+new \ action ct zone 2 commit mark 0xbb nat src addr 5.5.5.7 pipe \ action mirred egress redirect dev ens1f0_1 $ tc filter add dev ens1f0_0 ingress \ prio 1 chain 2 proto ip \ flower ct_zone 2 ct_mark 0xbb ct_state +trk+est \ action ct nat pipe \ action mirred egress redirect dev ens1f0_1 $ tc filter add dev ens1f0_1 ingress \ prio 1 chain 0 proto ip \ flower ip_proto tcp ct_state -trk \ action ct zone 2 pipe \ action goto chain 1 $ tc filter add dev ens1f0_1 ingress \ prio 1 chain 1 proto ip \ flower ct_zone 2 ct_mark 0xbb ct_state +trk+est \ action ct nat pipe \ action mirred egress redirect dev ens1f0_0 Signed-off-by: Paul Blakey <paulb@mellanox.com> Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: Yossi Kuperman <yossiku@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Changelog: V5->V6: Added CONFIG_NF_DEFRAG_IPV6 in handle fragments ipv6 case V4->V5: Reordered nf_conntrack_put() in tcf_ct_skb_nfct_cached() V3->V4: Added strict_start_type for act_ct policy V2->V3: Fixed david's comments: Removed extra newline after rcu in tcf_ct_params , and indent of break in act_ct.c V1->V2: Fixed parsing of ranges TCA_CT_NAT_IPV6_MAX as 'else' case overwritten ipv4 max Refactored NAT_PORT_MIN_MAX range handling as well Added ipv4/ipv6 defragmentation Removed extra skb pull push of nw offset in exectute nat Refactored tcf_ct_skb_network_trim after pull Removed TCA_ACT_CT define Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-09 07:30:48 +00:00
} ct;
struct {
unsigned long cookie;
u32 mark;
u32 labels[4];
bool orig_dir;
} ct_metadata;
struct { /* FLOW_ACTION_MPLS_PUSH */
u32 label;
__be16 proto;
u8 tc;
u8 bos;
u8 ttl;
} mpls_push;
struct { /* FLOW_ACTION_MPLS_POP */
__be16 proto;
} mpls_pop;
struct { /* FLOW_ACTION_MPLS_MANGLE */
u32 label;
u8 tc;
u8 bos;
u8 ttl;
} mpls_mangle;
struct {
s32 prio;
u64 basetime;
u64 cycletime;
u64 cycletimeext;
u32 num_entries;
struct action_gate_entry *entries;
} gate;
struct { /* FLOW_ACTION_PPPOE_PUSH */
u16 sid;
} pppoe;
};
struct flow_action_cookie *user_cookie; /* user defined action cookie */
};
struct flow_action {
unsigned int num_entries;
struct flow_action_entry entries[] __counted_by(num_entries);
};
static inline bool flow_action_has_entries(const struct flow_action *action)
{
return action->num_entries;
}
/**
* flow_offload_has_one_action() - check if exactly one action is present
* @action: tc filter flow offload action
*
* Return: true if exactly one action is present.
*/
static inline bool flow_offload_has_one_action(const struct flow_action *action)
{
return action->num_entries == 1;
}
static inline bool flow_action_is_last_entry(const struct flow_action *action,
const struct flow_action_entry *entry)
{
return entry == &action->entries[action->num_entries - 1];
}
#define flow_action_for_each(__i, __act, __actions) \
for (__i = 0, __act = &(__actions)->entries[0]; \
__i < (__actions)->num_entries; \
__act = &(__actions)->entries[++__i])
static inline bool
flow_action_mixed_hw_stats_check(const struct flow_action *action,
struct netlink_ext_ack *extack)
{
const struct flow_action_entry *action_entry;
treewide: Remove uninitialized_var() usage Using uninitialized_var() is dangerous as it papers over real bugs[1] (or can in the future), and suppresses unrelated compiler warnings (e.g. "unused variable"). If the compiler thinks it is uninitialized, either simply initialize the variable or make compiler changes. In preparation for removing[2] the[3] macro[4], remove all remaining needless uses with the following script: git grep '\buninitialized_var\b' | cut -d: -f1 | sort -u | \ xargs perl -pi -e \ 's/\buninitialized_var\(([^\)]+)\)/\1/g; s:\s*/\* (GCC be quiet|to make compiler happy) \*/$::g;' drivers/video/fbdev/riva/riva_hw.c was manually tweaked to avoid pathological white-space. No outstanding warnings were found building allmodconfig with GCC 9.3.0 for x86_64, i386, arm64, arm, powerpc, powerpc64le, s390x, mips, sparc64, alpha, and m68k. [1] https://lore.kernel.org/lkml/20200603174714.192027-1-glider@google.com/ [2] https://lore.kernel.org/lkml/CA+55aFw+Vbj0i=1TGqCR5vQkCzWJ0QxK6CernOU6eedsudAixw@mail.gmail.com/ [3] https://lore.kernel.org/lkml/CA+55aFwgbgqhbp1fkxvRKEpzyR5J8n1vKT1VZdz9knmPuXhOeg@mail.gmail.com/ [4] https://lore.kernel.org/lkml/CA+55aFz2500WfbKXAx8s67wrm9=yVJu65TpLgN_ybYNv0VEOKA@mail.gmail.com/ Reviewed-by: Leon Romanovsky <leonro@mellanox.com> # drivers/infiniband and mlx4/mlx5 Acked-by: Jason Gunthorpe <jgg@mellanox.com> # IB Acked-by: Kalle Valo <kvalo@codeaurora.org> # wireless drivers Reviewed-by: Chao Yu <yuchao0@huawei.com> # erofs Signed-off-by: Kees Cook <keescook@chromium.org>
2020-06-03 20:09:38 +00:00
u8 last_hw_stats;
int i;
if (flow_offload_has_one_action(action))
return true;
flow_action_for_each(i, action_entry, action) {
if (i && action_entry->hw_stats != last_hw_stats) {
NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported");
return false;
}
last_hw_stats = action_entry->hw_stats;
}
return true;
}
static inline const struct flow_action_entry *
flow_action_first_entry_get(const struct flow_action *action)
{
WARN_ON(!flow_action_has_entries(action));
return &action->entries[0];
}
static inline bool
__flow_action_hw_stats_check(const struct flow_action *action,
struct netlink_ext_ack *extack,
bool check_allow_bit,
enum flow_action_hw_stats_bit allow_bit)
{
const struct flow_action_entry *action_entry;
if (!flow_action_has_entries(action))
return true;
if (!flow_action_mixed_hw_stats_check(action, extack))
return false;
action_entry = flow_action_first_entry_get(action);
/* Zero is not a legal value for hw_stats, catch anyone passing it */
WARN_ON_ONCE(!action_entry->hw_stats);
if (!check_allow_bit &&
~action_entry->hw_stats & FLOW_ACTION_HW_STATS_ANY) {
NL_SET_ERR_MSG_MOD(extack, "Driver supports only default HW stats type \"any\"");
return false;
} else if (check_allow_bit &&
!(action_entry->hw_stats & BIT(allow_bit))) {
NL_SET_ERR_MSG_MOD(extack, "Driver does not support selected HW stats type");
return false;
}
return true;
}
static inline bool
flow_action_hw_stats_check(const struct flow_action *action,
struct netlink_ext_ack *extack,
enum flow_action_hw_stats_bit allow_bit)
{
return __flow_action_hw_stats_check(action, extack, true, allow_bit);
}
static inline bool
flow_action_basic_hw_stats_check(const struct flow_action *action,
struct netlink_ext_ack *extack)
{
return __flow_action_hw_stats_check(action, extack, false, 0);
}
struct flow_rule {
struct flow_match match;
struct flow_action action;
};
struct flow_rule *flow_rule_alloc(unsigned int num_actions);
static inline bool flow_rule_match_key(const struct flow_rule *rule,
enum flow_dissector_key_id key)
{
return dissector_uses_key(rule->match.dissector, key);
}
/**
* flow_rule_is_supp_control_flags() - check for supported control flags
* @supp_flags: control flags supported by driver
* @ctrl_flags: control flags present in rule
* @extack: The netlink extended ACK for reporting errors.
*
* Return: true if only supported control flags are set, false otherwise.
*/
static inline bool flow_rule_is_supp_control_flags(const u32 supp_flags,
const u32 ctrl_flags,
struct netlink_ext_ack *extack)
{
if (likely((ctrl_flags & ~supp_flags) == 0))
return true;
NL_SET_ERR_MSG_FMT_MOD(extack,
"Unsupported match on control.flags %#x",
ctrl_flags);
return false;
}
/**
* flow_rule_is_supp_enc_control_flags() - check for supported control flags
* @supp_enc_flags: encapsulation control flags supported by driver
* @enc_ctrl_flags: encapsulation control flags present in rule
* @extack: The netlink extended ACK for reporting errors.
*
* Return: true if only supported control flags are set, false otherwise.
*/
static inline bool flow_rule_is_supp_enc_control_flags(const u32 supp_enc_flags,
const u32 enc_ctrl_flags,
struct netlink_ext_ack *extack)
{
if (likely((enc_ctrl_flags & ~supp_enc_flags) == 0))
return true;
NL_SET_ERR_MSG_FMT_MOD(extack,
"Unsupported match on enc_control.flags %#x",
enc_ctrl_flags);
return false;
}
/**
* flow_rule_has_control_flags() - check for presence of any control flags
* @ctrl_flags: control flags present in rule
* @extack: The netlink extended ACK for reporting errors.
*
* Return: true if control flags are set, false otherwise.
*/
static inline bool flow_rule_has_control_flags(const u32 ctrl_flags,
struct netlink_ext_ack *extack)
{
return !flow_rule_is_supp_control_flags(0, ctrl_flags, extack);
}
/**
* flow_rule_has_enc_control_flags() - check for presence of any control flags
* @enc_ctrl_flags: encapsulation control flags present in rule
* @extack: The netlink extended ACK for reporting errors.
*
* Return: true if control flags are set, false otherwise.
*/
static inline bool flow_rule_has_enc_control_flags(const u32 enc_ctrl_flags,
struct netlink_ext_ack *extack)
{
return !flow_rule_is_supp_enc_control_flags(0, enc_ctrl_flags, extack);
}
/**
* flow_rule_match_has_control_flags() - match and check for any control flags
* @rule: The flow_rule under evaluation.
* @extack: The netlink extended ACK for reporting errors.
*
* Return: true if control flags are set, false otherwise.
*/
static inline bool flow_rule_match_has_control_flags(struct flow_rule *rule,
struct netlink_ext_ack *extack)
{
struct flow_match_control match;
if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL))
return false;
flow_rule_match_control(rule, &match);
return flow_rule_has_control_flags(match.mask->flags, extack);
}
struct flow_stats {
u64 pkts;
u64 bytes;
u64 drops;
u64 lastused;
enum flow_action_hw_stats used_hw_stats;
bool used_hw_stats_valid;
};
static inline void flow_stats_update(struct flow_stats *flow_stats,
u64 bytes, u64 pkts,
u64 drops, u64 lastused,
enum flow_action_hw_stats used_hw_stats)
{
flow_stats->pkts += pkts;
flow_stats->bytes += bytes;
flow_stats->drops += drops;
flow_stats->lastused = max_t(u64, flow_stats->lastused, lastused);
/* The driver should pass value with a maximum of one bit set.
* Passing FLOW_ACTION_HW_STATS_ANY is invalid.
*/
WARN_ON(used_hw_stats == FLOW_ACTION_HW_STATS_ANY);
flow_stats->used_hw_stats |= used_hw_stats;
flow_stats->used_hw_stats_valid = true;
}
enum flow_block_command {
FLOW_BLOCK_BIND,
FLOW_BLOCK_UNBIND,
};
enum flow_block_binder_type {
FLOW_BLOCK_BINDER_TYPE_UNSPEC,
FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
FLOW_BLOCK_BINDER_TYPE_RED_MARK,
};
struct flow_block {
struct list_head cb_list;
};
struct netlink_ext_ack;
struct flow_block_offload {
enum flow_block_command command;
enum flow_block_binder_type binder_type;
bool block_shared;
bool unlocked_driver_cb;
struct net *net;
struct flow_block *block;
struct list_head cb_list;
struct list_head *driver_block_list;
struct netlink_ext_ack *extack;
struct Qdisc *sch;
struct list_head *cb_list_head;
};
enum tc_setup_type;
typedef int flow_setup_cb_t(enum tc_setup_type type, void *type_data,
void *cb_priv);
struct flow_block_cb;
struct flow_block_indr {
struct list_head list;
struct net_device *dev;
struct Qdisc *sch;
enum flow_block_binder_type binder_type;
void *data;
void *cb_priv;
void (*cleanup)(struct flow_block_cb *block_cb);
};
struct flow_block_cb {
struct list_head driver_list;
struct list_head list;
flow_setup_cb_t *cb;
void *cb_ident;
void *cb_priv;
void (*release)(void *cb_priv);
struct flow_block_indr indr;
unsigned int refcnt;
};
struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv));
struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv),
struct flow_block_offload *bo,
struct net_device *dev,
struct Qdisc *sch, void *data,
void *indr_cb_priv,
void (*cleanup)(struct flow_block_cb *block_cb));
void flow_block_cb_free(struct flow_block_cb *block_cb);
struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
flow_setup_cb_t *cb, void *cb_ident);
void *flow_block_cb_priv(struct flow_block_cb *block_cb);
void flow_block_cb_incref(struct flow_block_cb *block_cb);
unsigned int flow_block_cb_decref(struct flow_block_cb *block_cb);
static inline void flow_block_cb_add(struct flow_block_cb *block_cb,
struct flow_block_offload *offload)
{
list_add_tail(&block_cb->list, &offload->cb_list);
}
static inline void flow_block_cb_remove(struct flow_block_cb *block_cb,
struct flow_block_offload *offload)
{
list_move(&block_cb->list, &offload->cb_list);
}
static inline void flow_indr_block_cb_remove(struct flow_block_cb *block_cb,
struct flow_block_offload *offload)
{
list_del(&block_cb->indr.list);
list_move(&block_cb->list, &offload->cb_list);
}
bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
struct list_head *driver_block_list);
int flow_block_cb_setup_simple(struct flow_block_offload *f,
struct list_head *driver_list,
flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv, bool ingress_only);
enum flow_cls_command {
FLOW_CLS_REPLACE,
FLOW_CLS_DESTROY,
FLOW_CLS_STATS,
FLOW_CLS_TMPLT_CREATE,
FLOW_CLS_TMPLT_DESTROY,
};
struct flow_cls_common_offload {
u32 chain_index;
__be16 protocol;
u32 prio;
net: sched: propagate "skip_sw" flag to struct flow_cls_common_offload Background: switchdev ports offload the Linux bridge, and most of the packets they handle will never see the CPU. The ports between which there exists no hardware data path are considered 'foreign' to switchdev. These can either be normal physical NICs without switchdev offload, or incompatible switchdev ports, or virtual interfaces like veth/dummy/etc. In some cases, an offloaded filter can only do half the work, and the rest must be handled by software. Redirecting/mirroring from the ingress of a switchdev port towards a foreign interface is one example of combined hardware/software data path. The most that the switchdev port can do is to extract the matching packets from its offloaded data path and send them to the CPU. From there on, the software filter runs (a second time, after the first run in hardware) on the packet and performs the mirred action. It makes sense for switchdev drivers which allow this kind of "half offloading" to sense the "skip_sw" flag of the filter/action pair, and deny attempts from the user to install a filter that does not run in software, because that simply won't work. In fact, a mirred action on a switchdev port towards a dummy interface appears to be a valid way of (selectively) monitoring offloaded traffic that flows through it. IFF_PROMISC was also discussed years ago, but (despite initial disagreement) there seems to be consensus that this flag should not affect the destination taken by packets, but merely whether or not the NIC discards packets with unknown MAC DA for local processing. [1] https://lore.kernel.org/netdev/20190830092637.7f83d162@ceranb/ [2] https://lore.kernel.org/netdev/20191002233750.13566-1-olteanv@gmail.com/ Suggested-by: Ido Schimmel <idosch@nvidia.com> Link: https://lore.kernel.org/netdev/ZxUo0Dc0M5Y6l9qF@shredder.mtl.com/ Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> Link: https://patch.msgid.link/20241023135251.1752488-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-10-23 13:52:46 +00:00
bool skip_sw;
struct netlink_ext_ack *extack;
};
struct flow_cls_offload {
struct flow_cls_common_offload common;
enum flow_cls_command command;
bool use_act_stats;
unsigned long cookie;
struct flow_rule *rule;
struct flow_stats stats;
u32 classid;
};
enum offload_act_command {
FLOW_ACT_REPLACE,
FLOW_ACT_DESTROY,
FLOW_ACT_STATS,
};
struct flow_offload_action {
struct netlink_ext_ack *extack; /* NULL in FLOW_ACT_STATS process*/
enum offload_act_command command;
enum flow_action_id id;
u32 index;
unsigned long cookie;
struct flow_stats stats;
struct flow_action action;
};
struct flow_offload_action *offload_action_alloc(unsigned int num_actions);
static inline struct flow_rule *
flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd)
{
return flow_cmd->rule;
}
static inline void flow_block_init(struct flow_block *flow_block)
{
INIT_LIST_HEAD(&flow_block->cb_list);
}
typedef int flow_indr_block_bind_cb_t(struct net_device *dev, struct Qdisc *sch, void *cb_priv,
enum tc_setup_type type, void *type_data,
void *data,
void (*cleanup)(struct flow_block_cb *block_cb));
int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv);
void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv,
void (*release)(void *cb_priv));
int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
enum tc_setup_type type, void *data,
struct flow_block_offload *bo,
void (*cleanup)(struct flow_block_cb *block_cb));
bool flow_indr_dev_exists(void);
#endif /* _NET_FLOW_OFFLOAD_H */