mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
Merge branch 'netfilter-flowtable-hardware-offload'
Pablo Neira Ayuso says: ==================== netfilter flowtable hardware offload The following patchset adds hardware offload support for the flowtable infrastructure [1]. This infrastructure provides a fast datapath for the classic Linux forwarding path that users can enable through policy, eg. table inet x { flowtable f { hook ingress priority 10 devices = { eth0, eth1 } flags offload } chain y { type filter hook forward priority 0; policy accept; ip protocol tcp flow offload @f } } This example above enables the fastpath for TCP traffic between devices eth0 and eth1. Users can turn on the hardware offload through the 'offload' flag from the flowtable definition. If this new flag is not specified, the software flowtable datapath is used. This patchset is composed of 4 preparation patches: room to extend this infrastructure, eg. accelerate bridge forwarding. And 2 patches to add the hardware offload control and data planes: hardware offload. This includes a new NFTA_FLOWTABLE_FLAGS netlink attribute to convey the optional NF_FLOWTABLE_HW_OFFLOAD flag. API available at net/core/flow_offload.h to represent the flow through two flow_rule objects to configure an exact 5-tuple matching on each direction plus the corresponding forwarding actions, that is, the MAC address, NAT and checksum updates; and port redirection in order to configure the hardware datapath. This patch only supports for IPv4 support and statistics collection for flow aging as an initial step. This patchset introduces a new flow_block callback type that needs to be set up to configure the flowtable hardware offload. The first client of this infrastructure follows up after this batch. I would like to thank Mellanox for developing the first upstream driver to use this infrastructure. [1] Documentation/networking/nf_flowtable.txt ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
0320d1e7a3
@ -848,6 +848,7 @@ enum tc_setup_type {
|
||||
TC_SETUP_ROOT_QDISC,
|
||||
TC_SETUP_QDISC_GRED,
|
||||
TC_SETUP_QDISC_TAPRIO,
|
||||
TC_SETUP_FT,
|
||||
};
|
||||
|
||||
/* These structures hold the attributes of bpf state that are being passed
|
||||
|
@ -8,25 +8,43 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/netfilter.h>
|
||||
#include <linux/netfilter/nf_conntrack_tuple_common.h>
|
||||
#include <net/flow_offload.h>
|
||||
#include <net/dst.h>
|
||||
|
||||
struct nf_flowtable;
|
||||
struct nf_flow_rule;
|
||||
struct flow_offload;
|
||||
enum flow_offload_tuple_dir;
|
||||
|
||||
struct nf_flowtable_type {
|
||||
struct list_head list;
|
||||
int family;
|
||||
int (*init)(struct nf_flowtable *ft);
|
||||
int (*setup)(struct nf_flowtable *ft,
|
||||
struct net_device *dev,
|
||||
enum flow_block_command cmd);
|
||||
int (*action)(struct net *net,
|
||||
const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule);
|
||||
void (*free)(struct nf_flowtable *ft);
|
||||
nf_hookfn *hook;
|
||||
struct module *owner;
|
||||
};
|
||||
|
||||
enum nf_flowtable_flags {
|
||||
NF_FLOWTABLE_HW_OFFLOAD = 0x1,
|
||||
};
|
||||
|
||||
struct nf_flowtable {
|
||||
struct list_head list;
|
||||
struct rhashtable rhashtable;
|
||||
int priority;
|
||||
const struct nf_flowtable_type *type;
|
||||
struct delayed_work gc_work;
|
||||
unsigned int flags;
|
||||
struct flow_block flow_block;
|
||||
possible_net_t net;
|
||||
};
|
||||
|
||||
enum flow_offload_tuple_dir {
|
||||
@ -69,14 +87,22 @@ struct flow_offload_tuple_rhash {
|
||||
#define FLOW_OFFLOAD_DNAT 0x2
|
||||
#define FLOW_OFFLOAD_DYING 0x4
|
||||
#define FLOW_OFFLOAD_TEARDOWN 0x8
|
||||
#define FLOW_OFFLOAD_HW 0x10
|
||||
#define FLOW_OFFLOAD_HW_DYING 0x20
|
||||
#define FLOW_OFFLOAD_HW_DEAD 0x40
|
||||
|
||||
enum flow_offload_type {
|
||||
NF_FLOW_OFFLOAD_UNSPEC = 0,
|
||||
NF_FLOW_OFFLOAD_ROUTE,
|
||||
};
|
||||
|
||||
struct flow_offload {
|
||||
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
|
||||
u32 flags;
|
||||
union {
|
||||
/* Your private driver data here. */
|
||||
u32 timeout;
|
||||
};
|
||||
struct nf_conn *ct;
|
||||
u16 flags;
|
||||
u16 type;
|
||||
u32 timeout;
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
#define NF_FLOW_TIMEOUT (30 * HZ)
|
||||
@ -87,10 +113,12 @@ struct nf_flow_route {
|
||||
} tuple[FLOW_OFFLOAD_DIR_MAX];
|
||||
};
|
||||
|
||||
struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
|
||||
struct nf_flow_route *route);
|
||||
struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
|
||||
void flow_offload_free(struct flow_offload *flow);
|
||||
|
||||
int flow_offload_route_init(struct flow_offload *flow,
|
||||
const struct nf_flow_route *route);
|
||||
|
||||
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
|
||||
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||
struct flow_offload_tuple *tuple);
|
||||
@ -124,4 +152,22 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||
#define MODULE_ALIAS_NF_FLOWTABLE(family) \
|
||||
MODULE_ALIAS("nf-flowtable-" __stringify(family))
|
||||
|
||||
void nf_flow_offload_add(struct nf_flowtable *flowtable,
|
||||
struct flow_offload *flow);
|
||||
void nf_flow_offload_del(struct nf_flowtable *flowtable,
|
||||
struct flow_offload *flow);
|
||||
void nf_flow_offload_stats(struct nf_flowtable *flowtable,
|
||||
struct flow_offload *flow);
|
||||
|
||||
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
|
||||
int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
|
||||
struct net_device *dev,
|
||||
enum flow_block_command cmd);
|
||||
int nf_flow_rule_route(struct net *net, const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule);
|
||||
|
||||
int nf_flow_table_offload_init(void);
|
||||
void nf_flow_table_offload_exit(void);
|
||||
|
||||
#endif /* _NF_FLOW_TABLE_H */
|
||||
|
@ -1518,6 +1518,7 @@ enum nft_object_attributes {
|
||||
* @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
|
||||
* @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
|
||||
* @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64)
|
||||
* @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32)
|
||||
*/
|
||||
enum nft_flowtable_attributes {
|
||||
NFTA_FLOWTABLE_UNSPEC,
|
||||
@ -1527,6 +1528,7 @@ enum nft_flowtable_attributes {
|
||||
NFTA_FLOWTABLE_USE,
|
||||
NFTA_FLOWTABLE_HANDLE,
|
||||
NFTA_FLOWTABLE_PAD,
|
||||
NFTA_FLOWTABLE_FLAGS,
|
||||
__NFTA_FLOWTABLE_MAX
|
||||
};
|
||||
#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1)
|
||||
|
@ -9,6 +9,8 @@
|
||||
static struct nf_flowtable_type flowtable_ipv4 = {
|
||||
.family = NFPROTO_IPV4,
|
||||
.init = nf_flow_table_init,
|
||||
.setup = nf_flow_table_offload_setup,
|
||||
.action = nf_flow_rule_route,
|
||||
.free = nf_flow_table_free,
|
||||
.hook = nf_flow_offload_ip_hook,
|
||||
.owner = THIS_MODULE,
|
||||
|
@ -10,6 +10,8 @@
|
||||
static struct nf_flowtable_type flowtable_ipv6 = {
|
||||
.family = NFPROTO_IPV6,
|
||||
.init = nf_flow_table_init,
|
||||
.setup = nf_flow_table_offload_setup,
|
||||
.action = nf_flow_rule_route,
|
||||
.free = nf_flow_table_free,
|
||||
.hook = nf_flow_offload_ipv6_hook,
|
||||
.owner = THIS_MODULE,
|
||||
|
@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
|
||||
|
||||
# flow table infrastructure
|
||||
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
|
||||
nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
|
||||
nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
|
||||
nf_flow_table_offload.o
|
||||
|
||||
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
|
||||
|
||||
|
@ -14,24 +14,15 @@
|
||||
#include <net/netfilter/nf_conntrack_l4proto.h>
|
||||
#include <net/netfilter/nf_conntrack_tuple.h>
|
||||
|
||||
struct flow_offload_entry {
|
||||
struct flow_offload flow;
|
||||
struct nf_conn *ct;
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(flowtable_lock);
|
||||
static LIST_HEAD(flowtables);
|
||||
|
||||
static void
|
||||
flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
|
||||
struct nf_flow_route *route,
|
||||
enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
|
||||
struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
|
||||
struct dst_entry *other_dst = route->tuple[!dir].dst;
|
||||
struct dst_entry *dst = route->tuple[dir].dst;
|
||||
|
||||
ft->dir = dir;
|
||||
|
||||
@ -39,12 +30,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
|
||||
case NFPROTO_IPV4:
|
||||
ft->src_v4 = ctt->src.u3.in;
|
||||
ft->dst_v4 = ctt->dst.u3.in;
|
||||
ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
|
||||
break;
|
||||
case NFPROTO_IPV6:
|
||||
ft->src_v6 = ctt->src.u3.in6;
|
||||
ft->dst_v6 = ctt->dst.u3.in6;
|
||||
ft->mtu = ip6_dst_mtu_forward(dst);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -52,37 +41,24 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
|
||||
ft->l4proto = ctt->dst.protonum;
|
||||
ft->src_port = ctt->src.u.tcp.port;
|
||||
ft->dst_port = ctt->dst.u.tcp.port;
|
||||
|
||||
ft->iifidx = other_dst->dev->ifindex;
|
||||
ft->dst_cache = dst;
|
||||
}
|
||||
|
||||
struct flow_offload *
|
||||
flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
|
||||
struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
|
||||
{
|
||||
struct flow_offload_entry *entry;
|
||||
struct flow_offload *flow;
|
||||
|
||||
if (unlikely(nf_ct_is_dying(ct) ||
|
||||
!atomic_inc_not_zero(&ct->ct_general.use)))
|
||||
return NULL;
|
||||
|
||||
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
|
||||
if (!entry)
|
||||
flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
|
||||
if (!flow)
|
||||
goto err_ct_refcnt;
|
||||
|
||||
flow = &entry->flow;
|
||||
flow->ct = ct;
|
||||
|
||||
if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
|
||||
goto err_dst_cache_original;
|
||||
|
||||
if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
|
||||
goto err_dst_cache_reply;
|
||||
|
||||
entry->ct = ct;
|
||||
|
||||
flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
|
||||
flow_offload_fill_dir(flow, ct, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
flow_offload_fill_dir(flow, ct, FLOW_OFFLOAD_DIR_REPLY);
|
||||
|
||||
if (ct->status & IPS_SRC_NAT)
|
||||
flow->flags |= FLOW_OFFLOAD_SNAT;
|
||||
@ -91,10 +67,6 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
|
||||
|
||||
return flow;
|
||||
|
||||
err_dst_cache_reply:
|
||||
dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
|
||||
err_dst_cache_original:
|
||||
kfree(entry);
|
||||
err_ct_refcnt:
|
||||
nf_ct_put(ct);
|
||||
|
||||
@ -102,6 +74,56 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flow_offload_alloc);
|
||||
|
||||
static int flow_offload_fill_route(struct flow_offload *flow,
|
||||
const struct nf_flow_route *route,
|
||||
enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
|
||||
struct dst_entry *other_dst = route->tuple[!dir].dst;
|
||||
struct dst_entry *dst = route->tuple[dir].dst;
|
||||
|
||||
if (!dst_hold_safe(route->tuple[dir].dst))
|
||||
return -1;
|
||||
|
||||
switch (flow_tuple->l3proto) {
|
||||
case NFPROTO_IPV4:
|
||||
flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
|
||||
break;
|
||||
case NFPROTO_IPV6:
|
||||
flow_tuple->mtu = ip6_dst_mtu_forward(dst);
|
||||
break;
|
||||
}
|
||||
|
||||
flow_tuple->iifidx = other_dst->dev->ifindex;
|
||||
flow_tuple->dst_cache = dst;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int flow_offload_route_init(struct flow_offload *flow,
|
||||
const struct nf_flow_route *route)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
|
||||
if (err < 0)
|
||||
goto err_route_reply;
|
||||
|
||||
flow->type = NF_FLOW_OFFLOAD_ROUTE;
|
||||
|
||||
return 0;
|
||||
|
||||
err_route_reply:
|
||||
dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flow_offload_route_init);
|
||||
|
||||
static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
|
||||
{
|
||||
tcp->state = TCP_CONNTRACK_ESTABLISHED;
|
||||
@ -150,17 +172,25 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
|
||||
flow_offload_fixup_ct_timeout(ct);
|
||||
}
|
||||
|
||||
void flow_offload_free(struct flow_offload *flow)
|
||||
static void flow_offload_route_release(struct flow_offload *flow)
|
||||
{
|
||||
struct flow_offload_entry *e;
|
||||
|
||||
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
|
||||
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
|
||||
e = container_of(flow, struct flow_offload_entry, flow);
|
||||
}
|
||||
|
||||
void flow_offload_free(struct flow_offload *flow)
|
||||
{
|
||||
switch (flow->type) {
|
||||
case NF_FLOW_OFFLOAD_ROUTE:
|
||||
flow_offload_route_release(flow);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (flow->flags & FLOW_OFFLOAD_DYING)
|
||||
nf_ct_delete(e->ct, 0, 0);
|
||||
nf_ct_put(e->ct);
|
||||
kfree_rcu(e, rcu_head);
|
||||
nf_ct_delete(flow->ct, 0, 0);
|
||||
nf_ct_put(flow->ct);
|
||||
kfree_rcu(flow, rcu_head);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flow_offload_free);
|
||||
|
||||
@ -220,6 +250,9 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (flow_table->flags & NF_FLOWTABLE_HW_OFFLOAD)
|
||||
nf_flow_offload_add(flow_table, flow);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flow_offload_add);
|
||||
@ -232,8 +265,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
|
||||
static void flow_offload_del(struct nf_flowtable *flow_table,
|
||||
struct flow_offload *flow)
|
||||
{
|
||||
struct flow_offload_entry *e;
|
||||
|
||||
rhashtable_remove_fast(&flow_table->rhashtable,
|
||||
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||
nf_flow_offload_rhash_params);
|
||||
@ -241,25 +272,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
|
||||
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||
nf_flow_offload_rhash_params);
|
||||
|
||||
e = container_of(flow, struct flow_offload_entry, flow);
|
||||
clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
|
||||
clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
|
||||
|
||||
if (nf_flow_has_expired(flow))
|
||||
flow_offload_fixup_ct(e->ct);
|
||||
flow_offload_fixup_ct(flow->ct);
|
||||
else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
|
||||
flow_offload_fixup_ct_timeout(e->ct);
|
||||
flow_offload_fixup_ct_timeout(flow->ct);
|
||||
|
||||
flow_offload_free(flow);
|
||||
}
|
||||
|
||||
void flow_offload_teardown(struct flow_offload *flow)
|
||||
{
|
||||
struct flow_offload_entry *e;
|
||||
|
||||
flow->flags |= FLOW_OFFLOAD_TEARDOWN;
|
||||
|
||||
e = container_of(flow, struct flow_offload_entry, flow);
|
||||
flow_offload_fixup_ct_state(e->ct);
|
||||
flow_offload_fixup_ct_state(flow->ct);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flow_offload_teardown);
|
||||
|
||||
@ -269,7 +296,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||
{
|
||||
struct flow_offload_tuple_rhash *tuplehash;
|
||||
struct flow_offload *flow;
|
||||
struct flow_offload_entry *e;
|
||||
int dir;
|
||||
|
||||
tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
|
||||
@ -282,8 +308,7 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||
if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
|
||||
return NULL;
|
||||
|
||||
e = container_of(flow, struct flow_offload_entry, flow);
|
||||
if (unlikely(nf_ct_is_dying(e->ct)))
|
||||
if (unlikely(nf_ct_is_dying(flow->ct)))
|
||||
return NULL;
|
||||
|
||||
return tuplehash;
|
||||
@ -327,12 +352,21 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||
static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
|
||||
{
|
||||
struct nf_flowtable *flow_table = data;
|
||||
struct flow_offload_entry *e;
|
||||
|
||||
e = container_of(flow, struct flow_offload_entry, flow);
|
||||
if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) ||
|
||||
(flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)))
|
||||
flow_offload_del(flow_table, flow);
|
||||
if (flow->flags & FLOW_OFFLOAD_HW)
|
||||
nf_flow_offload_stats(flow_table, flow);
|
||||
|
||||
if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) ||
|
||||
(flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))) {
|
||||
if (flow->flags & FLOW_OFFLOAD_HW) {
|
||||
if (!(flow->flags & FLOW_OFFLOAD_HW_DYING))
|
||||
nf_flow_offload_del(flow_table, flow);
|
||||
else if (flow->flags & FLOW_OFFLOAD_HW_DEAD)
|
||||
flow_offload_del(flow_table, flow);
|
||||
} else {
|
||||
flow_offload_del(flow_table, flow);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void nf_flow_offload_work_gc(struct work_struct *work)
|
||||
@ -465,6 +499,7 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
|
||||
int err;
|
||||
|
||||
INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
|
||||
flow_block_init(&flowtable->flow_block);
|
||||
|
||||
err = rhashtable_init(&flowtable->rhashtable,
|
||||
&nf_flow_offload_rhash_params);
|
||||
@ -485,15 +520,13 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
|
||||
static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
|
||||
{
|
||||
struct net_device *dev = data;
|
||||
struct flow_offload_entry *e;
|
||||
|
||||
e = container_of(flow, struct flow_offload_entry, flow);
|
||||
|
||||
if (!dev) {
|
||||
flow_offload_teardown(flow);
|
||||
return;
|
||||
}
|
||||
if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
|
||||
|
||||
if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
|
||||
(flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
|
||||
flow->tuplehash[1].tuple.iifidx == dev->ifindex))
|
||||
flow_offload_dead(flow);
|
||||
@ -502,6 +535,7 @@ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
|
||||
static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
|
||||
struct net_device *dev)
|
||||
{
|
||||
nf_flow_table_offload_flush(flowtable);
|
||||
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
|
||||
flush_delayed_work(&flowtable->gc_work);
|
||||
}
|
||||
@ -529,5 +563,18 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_table_free);
|
||||
|
||||
static int __init nf_flow_table_module_init(void)
|
||||
{
|
||||
return nf_flow_table_offload_init();
|
||||
}
|
||||
|
||||
static void __exit nf_flow_table_module_exit(void)
|
||||
{
|
||||
nf_flow_table_offload_exit();
|
||||
}
|
||||
|
||||
module_init(nf_flow_table_module_init);
|
||||
module_exit(nf_flow_table_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
||||
|
@ -24,6 +24,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
|
||||
static struct nf_flowtable_type flowtable_inet = {
|
||||
.family = NFPROTO_INET,
|
||||
.init = nf_flow_table_init,
|
||||
.setup = nf_flow_table_offload_setup,
|
||||
.action = nf_flow_rule_route,
|
||||
.free = nf_flow_table_free,
|
||||
.hook = nf_flow_offload_inet_hook,
|
||||
.owner = THIS_MODULE,
|
||||
|
758
net/netfilter/nf_flow_table_offload.c
Normal file
758
net/netfilter/nf_flow_table_offload.c
Normal file
@ -0,0 +1,758 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/netfilter.h>
|
||||
#include <linux/rhashtable.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/tc_act/tc_csum.h>
|
||||
#include <net/flow_offload.h>
|
||||
#include <net/netfilter/nf_flow_table.h>
|
||||
#include <net/netfilter/nf_conntrack.h>
|
||||
#include <net/netfilter/nf_conntrack_core.h>
|
||||
#include <net/netfilter/nf_conntrack_tuple.h>
|
||||
|
||||
static struct work_struct nf_flow_offload_work;
|
||||
static DEFINE_SPINLOCK(flow_offload_pending_list_lock);
|
||||
static LIST_HEAD(flow_offload_pending_list);
|
||||
|
||||
struct flow_offload_work {
|
||||
struct list_head list;
|
||||
enum flow_cls_command cmd;
|
||||
int priority;
|
||||
struct nf_flowtable *flowtable;
|
||||
struct flow_offload *flow;
|
||||
};
|
||||
|
||||
struct nf_flow_key {
|
||||
struct flow_dissector_key_control control;
|
||||
struct flow_dissector_key_basic basic;
|
||||
union {
|
||||
struct flow_dissector_key_ipv4_addrs ipv4;
|
||||
};
|
||||
struct flow_dissector_key_tcp tcp;
|
||||
struct flow_dissector_key_ports tp;
|
||||
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
|
||||
|
||||
struct nf_flow_match {
|
||||
struct flow_dissector dissector;
|
||||
struct nf_flow_key key;
|
||||
struct nf_flow_key mask;
|
||||
};
|
||||
|
||||
struct nf_flow_rule {
|
||||
struct nf_flow_match match;
|
||||
struct flow_rule *rule;
|
||||
};
|
||||
|
||||
#define NF_FLOW_DISSECTOR(__match, __type, __field) \
|
||||
(__match)->dissector.offset[__type] = \
|
||||
offsetof(struct nf_flow_key, __field)
|
||||
|
||||
static int nf_flow_rule_match(struct nf_flow_match *match,
|
||||
const struct flow_offload_tuple *tuple)
|
||||
{
|
||||
struct nf_flow_key *mask = &match->mask;
|
||||
struct nf_flow_key *key = &match->key;
|
||||
|
||||
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
|
||||
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
|
||||
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
|
||||
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
|
||||
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
|
||||
|
||||
switch (tuple->l3proto) {
|
||||
case AF_INET:
|
||||
key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
|
||||
key->basic.n_proto = htons(ETH_P_IP);
|
||||
key->ipv4.src = tuple->src_v4.s_addr;
|
||||
mask->ipv4.src = 0xffffffff;
|
||||
key->ipv4.dst = tuple->dst_v4.s_addr;
|
||||
mask->ipv4.dst = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
mask->basic.n_proto = 0xffff;
|
||||
|
||||
switch (tuple->l4proto) {
|
||||
case IPPROTO_TCP:
|
||||
key->tcp.flags = 0;
|
||||
mask->tcp.flags = TCP_FLAG_RST | TCP_FLAG_FIN;
|
||||
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
key->basic.ip_proto = tuple->l4proto;
|
||||
mask->basic.ip_proto = 0xff;
|
||||
|
||||
key->tp.src = tuple->src_port;
|
||||
mask->tp.src = 0xffff;
|
||||
key->tp.dst = tuple->dst_port;
|
||||
mask->tp.dst = 0xffff;
|
||||
|
||||
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL) |
|
||||
BIT(FLOW_DISSECTOR_KEY_BASIC) |
|
||||
BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
|
||||
BIT(FLOW_DISSECTOR_KEY_PORTS);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void flow_offload_mangle(struct flow_action_entry *entry,
|
||||
enum flow_action_mangle_base htype,
|
||||
u32 offset, u8 *value, u8 *mask)
|
||||
{
|
||||
entry->id = FLOW_ACTION_MANGLE;
|
||||
entry->mangle.htype = htype;
|
||||
entry->mangle.offset = offset;
|
||||
memcpy(&entry->mangle.mask, mask, sizeof(u32));
|
||||
memcpy(&entry->mangle.val, value, sizeof(u32));
|
||||
}
|
||||
|
||||
static int flow_offload_eth_src(struct net *net,
|
||||
const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct flow_action_entry *entry0,
|
||||
struct flow_action_entry *entry1)
|
||||
{
|
||||
const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
|
||||
struct net_device *dev;
|
||||
u32 mask, val;
|
||||
u16 val16;
|
||||
|
||||
dev = dev_get_by_index(net, tuple->iifidx);
|
||||
if (!dev)
|
||||
return -ENOENT;
|
||||
|
||||
mask = ~0xffff0000;
|
||||
memcpy(&val16, dev->dev_addr, 2);
|
||||
val = val16 << 16;
|
||||
flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
|
||||
(u8 *)&val, (u8 *)&mask);
|
||||
|
||||
mask = ~0xffffffff;
|
||||
memcpy(&val, dev->dev_addr + 2, 4);
|
||||
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
|
||||
(u8 *)&val, (u8 *)&mask);
|
||||
dev_put(dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int flow_offload_eth_dst(struct net *net,
|
||||
const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct flow_action_entry *entry0,
|
||||
struct flow_action_entry *entry1)
|
||||
{
|
||||
const struct flow_offload_tuple *tuple = &flow->tuplehash[dir].tuple;
|
||||
struct neighbour *n;
|
||||
u32 mask, val;
|
||||
u16 val16;
|
||||
|
||||
n = dst_neigh_lookup(tuple->dst_cache, &tuple->dst_v4);
|
||||
if (!n)
|
||||
return -ENOENT;
|
||||
|
||||
mask = ~0xffffffff;
|
||||
memcpy(&val, n->ha, 4);
|
||||
flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
|
||||
(u8 *)&val, (u8 *)&mask);
|
||||
|
||||
mask = ~0x0000ffff;
|
||||
memcpy(&val16, n->ha + 4, 2);
|
||||
val = val16;
|
||||
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
|
||||
(u8 *)&val, (u8 *)&mask);
|
||||
neigh_release(n);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void flow_offload_ipv4_snat(struct net *net,
|
||||
const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct flow_action_entry *entry)
|
||||
{
|
||||
u32 mask = ~htonl(0xffffffff);
|
||||
__be32 addr;
|
||||
u32 offset;
|
||||
|
||||
switch (dir) {
|
||||
case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||
addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
|
||||
offset = offsetof(struct iphdr, saddr);
|
||||
break;
|
||||
case FLOW_OFFLOAD_DIR_REPLY:
|
||||
addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
|
||||
offset = offsetof(struct iphdr, daddr);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
|
||||
(u8 *)&addr, (u8 *)&mask);
|
||||
}
|
||||
|
||||
static void flow_offload_ipv4_dnat(struct net *net,
|
||||
const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct flow_action_entry *entry)
|
||||
{
|
||||
u32 mask = ~htonl(0xffffffff);
|
||||
__be32 addr;
|
||||
u32 offset;
|
||||
|
||||
switch (dir) {
|
||||
case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||
addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
|
||||
offset = offsetof(struct iphdr, daddr);
|
||||
break;
|
||||
case FLOW_OFFLOAD_DIR_REPLY:
|
||||
addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
|
||||
offset = offsetof(struct iphdr, saddr);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
|
||||
(u8 *)&addr, (u8 *)&mask);
|
||||
}
|
||||
|
||||
static int flow_offload_l4proto(const struct flow_offload *flow)
|
||||
{
|
||||
u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
|
||||
u8 type = 0;
|
||||
|
||||
switch (protonum) {
|
||||
case IPPROTO_TCP:
|
||||
type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
static void flow_offload_port_snat(struct net *net,
|
||||
const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct flow_action_entry *entry)
|
||||
{
|
||||
u32 mask = ~htonl(0xffff0000);
|
||||
__be16 port;
|
||||
u32 offset;
|
||||
|
||||
switch (dir) {
|
||||
case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||
port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
|
||||
offset = 0; /* offsetof(struct tcphdr, source); */
|
||||
break;
|
||||
case FLOW_OFFLOAD_DIR_REPLY:
|
||||
port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
|
||||
offset = 0; /* offsetof(struct tcphdr, dest); */
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
|
||||
(u8 *)&port, (u8 *)&mask);
|
||||
}
|
||||
|
||||
static void flow_offload_port_dnat(struct net *net,
|
||||
const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct flow_action_entry *entry)
|
||||
{
|
||||
u32 mask = ~htonl(0xffff);
|
||||
__be16 port;
|
||||
u32 offset;
|
||||
|
||||
switch (dir) {
|
||||
case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||
port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
|
||||
offset = 0; /* offsetof(struct tcphdr, source); */
|
||||
break;
|
||||
case FLOW_OFFLOAD_DIR_REPLY:
|
||||
port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
|
||||
offset = 0; /* offsetof(struct tcphdr, dest); */
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
|
||||
(u8 *)&port, (u8 *)&mask);
|
||||
}
|
||||
|
||||
static void flow_offload_ipv4_checksum(struct net *net,
|
||||
const struct flow_offload *flow,
|
||||
struct flow_action_entry *entry)
|
||||
{
|
||||
u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
|
||||
|
||||
entry->id = FLOW_ACTION_CSUM;
|
||||
entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
|
||||
|
||||
switch (protonum) {
|
||||
case IPPROTO_TCP:
|
||||
entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void flow_offload_redirect(const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct flow_action_entry *entry)
|
||||
{
|
||||
struct rtable *rt;
|
||||
|
||||
rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||
entry->id = FLOW_ACTION_REDIRECT;
|
||||
entry->dev = rt->dst.dev;
|
||||
dev_hold(rt->dst.dev);
|
||||
}
|
||||
|
||||
int nf_flow_rule_route(struct net *net, const struct flow_offload *flow,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (flow_offload_eth_src(net, flow, dir,
|
||||
&flow_rule->rule->action.entries[0],
|
||||
&flow_rule->rule->action.entries[1]) < 0)
|
||||
return -1;
|
||||
|
||||
if (flow_offload_eth_dst(net, flow, dir,
|
||||
&flow_rule->rule->action.entries[2],
|
||||
&flow_rule->rule->action.entries[3]) < 0)
|
||||
return -1;
|
||||
|
||||
i = 4;
|
||||
if (flow->flags & FLOW_OFFLOAD_SNAT) {
|
||||
flow_offload_ipv4_snat(net, flow, dir,
|
||||
&flow_rule->rule->action.entries[i++]);
|
||||
flow_offload_port_snat(net, flow, dir,
|
||||
&flow_rule->rule->action.entries[i++]);
|
||||
}
|
||||
if (flow->flags & FLOW_OFFLOAD_DNAT) {
|
||||
flow_offload_ipv4_dnat(net, flow, dir,
|
||||
&flow_rule->rule->action.entries[i++]);
|
||||
flow_offload_port_dnat(net, flow, dir,
|
||||
&flow_rule->rule->action.entries[i++]);
|
||||
}
|
||||
if (flow->flags & FLOW_OFFLOAD_SNAT ||
|
||||
flow->flags & FLOW_OFFLOAD_DNAT)
|
||||
flow_offload_ipv4_checksum(net, flow,
|
||||
&flow_rule->rule->action.entries[i++]);
|
||||
|
||||
flow_offload_redirect(flow, dir, &flow_rule->rule->action.entries[i++]);
|
||||
|
||||
return i;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_rule_route);
|
||||
|
||||
static struct nf_flow_rule *
|
||||
nf_flow_offload_rule_alloc(struct net *net,
|
||||
const struct flow_offload_work *offload,
|
||||
enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
const struct nf_flowtable *flowtable = offload->flowtable;
|
||||
const struct flow_offload *flow = offload->flow;
|
||||
const struct flow_offload_tuple *tuple;
|
||||
struct nf_flow_rule *flow_rule;
|
||||
int err = -ENOMEM, num_actions;
|
||||
|
||||
flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
|
||||
if (!flow_rule)
|
||||
goto err_flow;
|
||||
|
||||
flow_rule->rule = flow_rule_alloc(10);
|
||||
if (!flow_rule->rule)
|
||||
goto err_flow_rule;
|
||||
|
||||
flow_rule->rule->match.dissector = &flow_rule->match.dissector;
|
||||
flow_rule->rule->match.mask = &flow_rule->match.mask;
|
||||
flow_rule->rule->match.key = &flow_rule->match.key;
|
||||
|
||||
tuple = &flow->tuplehash[dir].tuple;
|
||||
err = nf_flow_rule_match(&flow_rule->match, tuple);
|
||||
if (err < 0)
|
||||
goto err_flow_match;
|
||||
|
||||
num_actions = flowtable->type->action(net, flow, dir, flow_rule);
|
||||
if (num_actions < 0)
|
||||
goto err_flow_match;
|
||||
|
||||
flow_rule->rule->action.num_entries = num_actions;
|
||||
|
||||
return flow_rule;
|
||||
|
||||
err_flow_match:
|
||||
kfree(flow_rule->rule);
|
||||
err_flow_rule:
|
||||
kfree(flow_rule);
|
||||
err_flow:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
struct flow_action_entry *entry;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
|
||||
entry = &flow_rule->rule->action.entries[i];
|
||||
if (entry->id != FLOW_ACTION_REDIRECT)
|
||||
continue;
|
||||
|
||||
dev_put(entry->dev);
|
||||
}
|
||||
kfree(flow_rule->rule);
|
||||
kfree(flow_rule);
|
||||
}
|
||||
|
||||
static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
|
||||
__nf_flow_offload_destroy(flow_rule[i]);
|
||||
}
|
||||
|
||||
static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
|
||||
struct nf_flow_rule *flow_rule[])
|
||||
{
|
||||
struct net *net = read_pnet(&offload->flowtable->net);
|
||||
|
||||
flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
|
||||
FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
if (!flow_rule[0])
|
||||
return -ENOMEM;
|
||||
|
||||
flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
|
||||
FLOW_OFFLOAD_DIR_REPLY);
|
||||
if (!flow_rule[1]) {
|
||||
__nf_flow_offload_destroy(flow_rule[0]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
|
||||
__be16 proto, int priority,
|
||||
enum flow_cls_command cmd,
|
||||
const struct flow_offload_tuple *tuple,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
cls_flow->common.protocol = proto;
|
||||
cls_flow->common.prio = priority;
|
||||
cls_flow->common.extack = extack;
|
||||
cls_flow->command = cmd;
|
||||
cls_flow->cookie = (unsigned long)tuple;
|
||||
}
|
||||
|
||||
static int flow_offload_tuple_add(struct flow_offload_work *offload,
|
||||
struct nf_flow_rule *flow_rule,
|
||||
enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct nf_flowtable *flowtable = offload->flowtable;
|
||||
struct flow_cls_offload cls_flow = {};
|
||||
struct flow_block_cb *block_cb;
|
||||
struct netlink_ext_ack extack;
|
||||
__be16 proto = ETH_P_ALL;
|
||||
int err, i = 0;
|
||||
|
||||
nf_flow_offload_init(&cls_flow, proto, offload->priority,
|
||||
FLOW_CLS_REPLACE,
|
||||
&offload->flow->tuplehash[dir].tuple, &extack);
|
||||
cls_flow.rule = flow_rule->rule;
|
||||
|
||||
list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) {
|
||||
err = block_cb->cb(TC_SETUP_FT, &cls_flow,
|
||||
block_cb->cb_priv);
|
||||
if (err < 0)
|
||||
continue;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static void flow_offload_tuple_del(struct flow_offload_work *offload,
|
||||
enum flow_offload_tuple_dir dir)
|
||||
{
|
||||
struct nf_flowtable *flowtable = offload->flowtable;
|
||||
struct flow_cls_offload cls_flow = {};
|
||||
struct flow_block_cb *block_cb;
|
||||
struct netlink_ext_ack extack;
|
||||
__be16 proto = ETH_P_ALL;
|
||||
|
||||
nf_flow_offload_init(&cls_flow, proto, offload->priority,
|
||||
FLOW_CLS_DESTROY,
|
||||
&offload->flow->tuplehash[dir].tuple, &extack);
|
||||
|
||||
list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
|
||||
block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv);
|
||||
|
||||
offload->flow->flags |= FLOW_OFFLOAD_HW_DEAD;
|
||||
}
|
||||
|
||||
static int flow_offload_rule_add(struct flow_offload_work *offload,
|
||||
struct nf_flow_rule *flow_rule[])
|
||||
{
|
||||
int ok_count = 0;
|
||||
|
||||
ok_count += flow_offload_tuple_add(offload, flow_rule[0],
|
||||
FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
ok_count += flow_offload_tuple_add(offload, flow_rule[1],
|
||||
FLOW_OFFLOAD_DIR_REPLY);
|
||||
if (ok_count == 0)
|
||||
return -ENOENT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int flow_offload_work_add(struct flow_offload_work *offload)
|
||||
{
|
||||
struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
|
||||
int err;
|
||||
|
||||
err = nf_flow_offload_alloc(offload, flow_rule);
|
||||
if (err < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
err = flow_offload_rule_add(offload, flow_rule);
|
||||
|
||||
nf_flow_offload_destroy(flow_rule);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void flow_offload_work_del(struct flow_offload_work *offload)
|
||||
{
|
||||
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
|
||||
}
|
||||
|
||||
static void flow_offload_tuple_stats(struct flow_offload_work *offload,
|
||||
enum flow_offload_tuple_dir dir,
|
||||
struct flow_stats *stats)
|
||||
{
|
||||
struct nf_flowtable *flowtable = offload->flowtable;
|
||||
struct flow_cls_offload cls_flow = {};
|
||||
struct flow_block_cb *block_cb;
|
||||
struct netlink_ext_ack extack;
|
||||
__be16 proto = ETH_P_ALL;
|
||||
|
||||
nf_flow_offload_init(&cls_flow, proto, offload->priority,
|
||||
FLOW_CLS_STATS,
|
||||
&offload->flow->tuplehash[dir].tuple, &extack);
|
||||
|
||||
list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
|
||||
block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv);
|
||||
memcpy(stats, &cls_flow.stats, sizeof(*stats));
|
||||
}
|
||||
|
||||
static void flow_offload_work_stats(struct flow_offload_work *offload)
|
||||
{
|
||||
struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
|
||||
u64 lastused;
|
||||
|
||||
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
|
||||
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
|
||||
|
||||
lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
|
||||
offload->flow->timeout = max_t(u64, offload->flow->timeout,
|
||||
lastused + NF_FLOW_TIMEOUT);
|
||||
}
|
||||
|
||||
static void flow_offload_work_handler(struct work_struct *work)
|
||||
{
|
||||
struct flow_offload_work *offload, *next;
|
||||
LIST_HEAD(offload_pending_list);
|
||||
int ret;
|
||||
|
||||
spin_lock_bh(&flow_offload_pending_list_lock);
|
||||
list_replace_init(&flow_offload_pending_list, &offload_pending_list);
|
||||
spin_unlock_bh(&flow_offload_pending_list_lock);
|
||||
|
||||
list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
|
||||
switch (offload->cmd) {
|
||||
case FLOW_CLS_REPLACE:
|
||||
ret = flow_offload_work_add(offload);
|
||||
if (ret < 0)
|
||||
offload->flow->flags &= ~FLOW_OFFLOAD_HW;
|
||||
break;
|
||||
case FLOW_CLS_DESTROY:
|
||||
flow_offload_work_del(offload);
|
||||
break;
|
||||
case FLOW_CLS_STATS:
|
||||
flow_offload_work_stats(offload);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
list_del(&offload->list);
|
||||
kfree(offload);
|
||||
}
|
||||
}
|
||||
|
||||
static void flow_offload_queue_work(struct flow_offload_work *offload)
|
||||
{
|
||||
spin_lock_bh(&flow_offload_pending_list_lock);
|
||||
list_add_tail(&offload->list, &flow_offload_pending_list);
|
||||
spin_unlock_bh(&flow_offload_pending_list_lock);
|
||||
|
||||
schedule_work(&nf_flow_offload_work);
|
||||
}
|
||||
|
||||
void nf_flow_offload_add(struct nf_flowtable *flowtable,
|
||||
struct flow_offload *flow)
|
||||
{
|
||||
struct flow_offload_work *offload;
|
||||
|
||||
offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
|
||||
if (!offload)
|
||||
return;
|
||||
|
||||
offload->cmd = FLOW_CLS_REPLACE;
|
||||
offload->flow = flow;
|
||||
offload->priority = flowtable->priority;
|
||||
offload->flowtable = flowtable;
|
||||
flow->flags |= FLOW_OFFLOAD_HW;
|
||||
|
||||
flow_offload_queue_work(offload);
|
||||
}
|
||||
|
||||
void nf_flow_offload_del(struct nf_flowtable *flowtable,
|
||||
struct flow_offload *flow)
|
||||
{
|
||||
struct flow_offload_work *offload;
|
||||
|
||||
offload = kzalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
|
||||
if (!offload)
|
||||
return;
|
||||
|
||||
offload->cmd = FLOW_CLS_DESTROY;
|
||||
offload->flow = flow;
|
||||
offload->flow->flags |= FLOW_OFFLOAD_HW_DYING;
|
||||
offload->flowtable = flowtable;
|
||||
|
||||
flow_offload_queue_work(offload);
|
||||
}
|
||||
|
||||
void nf_flow_offload_stats(struct nf_flowtable *flowtable,
|
||||
struct flow_offload *flow)
|
||||
{
|
||||
struct flow_offload_work *offload;
|
||||
s64 delta;
|
||||
|
||||
delta = flow->timeout - jiffies;
|
||||
if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10) ||
|
||||
flow->flags & FLOW_OFFLOAD_HW_DYING)
|
||||
return;
|
||||
|
||||
offload = kzalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
|
||||
if (!offload)
|
||||
return;
|
||||
|
||||
offload->cmd = FLOW_CLS_STATS;
|
||||
offload->flow = flow;
|
||||
offload->flowtable = flowtable;
|
||||
|
||||
flow_offload_queue_work(offload);
|
||||
}
|
||||
|
||||
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
|
||||
{
|
||||
if (flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD)
|
||||
flush_work(&nf_flow_offload_work);
|
||||
}
|
||||
|
||||
static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
|
||||
struct flow_block_offload *bo,
|
||||
enum flow_block_command cmd)
|
||||
{
|
||||
struct flow_block_cb *block_cb, *next;
|
||||
int err = 0;
|
||||
|
||||
switch (cmd) {
|
||||
case FLOW_BLOCK_BIND:
|
||||
list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
|
||||
break;
|
||||
case FLOW_BLOCK_UNBIND:
|
||||
list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
|
||||
list_del(&block_cb->list);
|
||||
flow_block_cb_free(block_cb);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
err = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
|
||||
struct net_device *dev,
|
||||
enum flow_block_command cmd)
|
||||
{
|
||||
struct netlink_ext_ack extack = {};
|
||||
struct flow_block_offload bo = {};
|
||||
int err;
|
||||
|
||||
if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD))
|
||||
return 0;
|
||||
|
||||
bo.net = dev_net(dev);
|
||||
bo.block = &flowtable->flow_block;
|
||||
bo.command = cmd;
|
||||
bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
|
||||
bo.extack = &extack;
|
||||
INIT_LIST_HEAD(&bo.cb_list);
|
||||
|
||||
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
return nf_flow_table_block_setup(flowtable, &bo, cmd);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
|
||||
|
||||
int nf_flow_table_offload_init(void)
|
||||
{
|
||||
INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nf_flow_table_offload_exit(void)
|
||||
{
|
||||
struct flow_offload_work *offload, *next;
|
||||
LIST_HEAD(offload_pending_list);
|
||||
|
||||
cancel_work_sync(&nf_flow_offload_work);
|
||||
|
||||
list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
|
||||
list_del(&offload->list);
|
||||
kfree(offload);
|
||||
}
|
||||
}
|
@ -5835,6 +5835,7 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
|
||||
.len = NFT_NAME_MAXLEN - 1 },
|
||||
[NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
|
||||
[NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 },
|
||||
[NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 },
|
||||
};
|
||||
|
||||
struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
|
||||
@ -5968,8 +5969,11 @@ static void nft_unregister_flowtable_net_hooks(struct net *net,
|
||||
{
|
||||
struct nft_hook *hook;
|
||||
|
||||
list_for_each_entry(hook, &flowtable->hook_list, list)
|
||||
list_for_each_entry(hook, &flowtable->hook_list, list) {
|
||||
nf_unregister_net_hook(net, &hook->ops);
|
||||
flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
|
||||
FLOW_BLOCK_UNBIND);
|
||||
}
|
||||
}
|
||||
|
||||
static int nft_register_flowtable_net_hooks(struct net *net,
|
||||
@ -5991,6 +5995,8 @@ static int nft_register_flowtable_net_hooks(struct net *net,
|
||||
}
|
||||
}
|
||||
|
||||
flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
|
||||
FLOW_BLOCK_BIND);
|
||||
err = nf_register_net_hook(net, &hook->ops);
|
||||
if (err < 0)
|
||||
goto err_unregister_net_hooks;
|
||||
@ -6006,6 +6012,8 @@ static int nft_register_flowtable_net_hooks(struct net *net,
|
||||
break;
|
||||
|
||||
nf_unregister_net_hook(net, &hook->ops);
|
||||
flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
|
||||
FLOW_BLOCK_UNBIND);
|
||||
list_del_rcu(&hook->list);
|
||||
kfree_rcu(hook, rcu);
|
||||
}
|
||||
@ -6080,6 +6088,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
|
||||
goto err2;
|
||||
}
|
||||
|
||||
if (nla[NFTA_FLOWTABLE_FLAGS]) {
|
||||
flowtable->data.flags =
|
||||
ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
|
||||
if (flowtable->data.flags & ~NF_FLOWTABLE_HW_OFFLOAD)
|
||||
goto err3;
|
||||
}
|
||||
|
||||
write_pnet(&flowtable->data.net, net);
|
||||
flowtable->data.type = type;
|
||||
err = type->init(&flowtable->data);
|
||||
if (err < 0)
|
||||
@ -6191,7 +6207,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
|
||||
nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
|
||||
nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
|
||||
nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
|
||||
NFTA_FLOWTABLE_PAD))
|
||||
NFTA_FLOWTABLE_PAD) ||
|
||||
nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
|
||||
goto nla_put_failure;
|
||||
|
||||
nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK);
|
||||
|
@ -115,10 +115,13 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
|
||||
if (nft_flow_route(pkt, ct, &route, dir) < 0)
|
||||
goto err_flow_route;
|
||||
|
||||
flow = flow_offload_alloc(ct, &route);
|
||||
flow = flow_offload_alloc(ct);
|
||||
if (!flow)
|
||||
goto err_flow_alloc;
|
||||
|
||||
if (flow_offload_route_init(flow, &route) < 0)
|
||||
goto err_flow_add;
|
||||
|
||||
if (tcph) {
|
||||
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
|
||||
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
|
||||
|
Loading…
Reference in New Issue
Block a user