Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Florian Westphal says:

====================
netfilter: conntrack and nf_tables bug fixes

The following patchset contains netfilter fixes for net.

Broken since 5.19:
  A few ancient connection tracking helpers assume TCP packets cannot
  exceed 64kb in size, but this isn't the case anymore with 5.19 when
  BIG TCP got merged, from myself.

Regressions since 5.19:
  1. 'conntrack -E expect' won't display anything because nfnetlink failed
     to enable events for expectations, only for normal conntrack events.

  2. partially revert change that added resched calls to a function that can
     be in atomic context.  Both broken and fixed up by myself.

Broken for several releases (up to original merge of nf_tables):
  Several fixes for nf_tables control plane, from Pablo.
  This fixes up resource leaks in error paths and adds more sanity
  checks for mutually exclusive attributes/flags.

Kconfig:
  NF_CONNTRACK_PROCFS is very old and doesn't provide all info provided
  via ctnetlink, so it should not default to y. From Geert Uytterhoeven.

Selftests:
  rework nft_flowtable.sh: it frequently indicated failure; the way it
  tried to detect an offload failure did not work reliably.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  testing: selftests: nft_flowtable.sh: rework test to detect offload failure
  testing: selftests: nft_flowtable.sh: use random netns names
  netfilter: conntrack: NF_CONNTRACK_PROCFS should no longer default to y
  netfilter: nf_tables: check NFT_SET_CONCAT flag if field_count is specified
  netfilter: nf_tables: disallow NFT_SET_ELEM_CATCHALL and NFT_SET_ELEM_INTERVAL_END
  netfilter: nf_tables: NFTA_SET_ELEM_KEY_END requires concat and interval flags
  netfilter: nf_tables: validate NFTA_SET_ELEM_OBJREF based on NFT_SET_OBJECT flag
  netfilter: nf_tables: really skip inactive sets when allocating name
  netfilter: nfnetlink: re-enable conntrack expectation events
  netfilter: nf_tables: fix scheduling-while-atomic splat
  netfilter: nf_ct_irc: cap packet search space to 4k
  netfilter: nf_ct_ftp: prefer skb_linearize
  netfilter: nf_ct_h323: cap packet size at 64k
  netfilter: nf_ct_sane: remove pseudo skb linearization
  netfilter: nf_tables: possible module reference underflow in error path
  netfilter: nf_tables: disallow NFTA_SET_ELEM_KEY_END with NFT_SET_ELEM_INTERVAL_END flag
  netfilter: nf_tables: use READ_ONCE and WRITE_ONCE for shared generation id access
====================

Link: https://lore.kernel.org/r/20220817140015.25843-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2022-08-17 20:17:44 -07:00
commit bec13ba9ce
9 changed files with 384 additions and 255 deletions

View File

@ -95,7 +95,7 @@ struct nf_ip_net {
struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
bool ctnetlink_has_listener;
u8 ctnetlink_has_listener;
bool ecache_dwork_pending;
#endif
u8 sysctl_log_invalid; /* Log invalid packets */

View File

@ -144,7 +144,6 @@ config NF_CONNTRACK_ZONES
config NF_CONNTRACK_PROCFS
bool "Supply CT list in procfs (OBSOLETE)"
default y
depends on PROC_FS
help
This option enables for the list of known conntrack entries

View File

@ -34,11 +34,6 @@ MODULE_DESCRIPTION("ftp connection tracking helper");
MODULE_ALIAS("ip_conntrack_ftp");
MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
/* This is slow, but it's simple. --RR */
static char *ftp_buffer;
static DEFINE_SPINLOCK(nf_ftp_lock);
#define MAX_PORTS 8
static u_int16_t ports[MAX_PORTS];
static unsigned int ports_c;
@ -398,6 +393,9 @@ static int help(struct sk_buff *skb,
return NF_ACCEPT;
}
if (unlikely(skb_linearize(skb)))
return NF_DROP;
th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
@ -411,12 +409,8 @@ static int help(struct sk_buff *skb,
}
datalen = skb->len - dataoff;
spin_lock_bh(&nf_ftp_lock);
fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
if (!fb_ptr) {
spin_unlock_bh(&nf_ftp_lock);
return NF_ACCEPT;
}
spin_lock_bh(&ct->lock);
fb_ptr = skb->data + dataoff;
ends_in_nl = (fb_ptr[datalen - 1] == '\n');
seq = ntohl(th->seq) + datalen;
@ -544,7 +538,7 @@ static int help(struct sk_buff *skb,
if (ends_in_nl)
update_nl_seq(ct, seq, ct_ftp_info, dir, skb);
out:
spin_unlock_bh(&nf_ftp_lock);
spin_unlock_bh(&ct->lock);
return ret;
}
@ -571,7 +565,6 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = {
static void __exit nf_conntrack_ftp_fini(void)
{
nf_conntrack_helpers_unregister(ftp, ports_c * 2);
kfree(ftp_buffer);
}
static int __init nf_conntrack_ftp_init(void)
@ -580,10 +573,6 @@ static int __init nf_conntrack_ftp_init(void)
NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_ftp_master));
ftp_buffer = kmalloc(65536, GFP_KERNEL);
if (!ftp_buffer)
return -ENOMEM;
if (ports_c == 0)
ports[ports_c++] = FTP_PORT;
@ -603,7 +592,6 @@ static int __init nf_conntrack_ftp_init(void)
ret = nf_conntrack_helpers_register(ftp, ports_c * 2);
if (ret < 0) {
pr_err("failed to register helpers\n");
kfree(ftp_buffer);
return ret;
}

View File

@ -34,6 +34,8 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_conntrack_h323.h>
#define H323_MAX_SIZE 65535
/* Parameters */
static unsigned int default_rrq_ttl __read_mostly = 300;
module_param(default_rrq_ttl, uint, 0600);
@ -86,6 +88,9 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
if (tcpdatalen <= 0) /* No TCP data */
goto clear_out;
if (tcpdatalen > H323_MAX_SIZE)
tcpdatalen = H323_MAX_SIZE;
if (*data == NULL) { /* first TPKT */
/* Get first TPKT pointer */
tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
@ -1169,6 +1174,9 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
if (dataoff >= skb->len)
return NULL;
*datalen = skb->len - dataoff;
if (*datalen > H323_MAX_SIZE)
*datalen = H323_MAX_SIZE;
return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
}
@ -1770,7 +1778,7 @@ static int __init nf_conntrack_h323_init(void)
NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_h323_master));
h323_buffer = kmalloc(65536, GFP_KERNEL);
h323_buffer = kmalloc(H323_MAX_SIZE + 1, GFP_KERNEL);
if (!h323_buffer)
return -ENOMEM;
ret = h323_helper_init();

View File

@ -39,6 +39,7 @@ unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_irc_hook);
#define HELPER_NAME "irc"
#define MAX_SEARCH_SIZE 4095
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
@ -121,6 +122,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
int i, ret = NF_ACCEPT;
char *addr_beg_p, *addr_end_p;
typeof(nf_nat_irc_hook) nf_nat_irc;
unsigned int datalen;
/* If packet is coming from IRC server */
if (dir == IP_CT_DIR_REPLY)
@ -140,8 +142,12 @@ static int help(struct sk_buff *skb, unsigned int protoff,
if (dataoff >= skb->len)
return NF_ACCEPT;
datalen = skb->len - dataoff;
if (datalen > MAX_SEARCH_SIZE)
datalen = MAX_SEARCH_SIZE;
spin_lock_bh(&irc_buffer_lock);
ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff,
ib_ptr = skb_header_pointer(skb, dataoff, datalen,
irc_buffer);
if (!ib_ptr) {
spin_unlock_bh(&irc_buffer_lock);
@ -149,7 +155,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
}
data = ib_ptr;
data_limit = ib_ptr + skb->len - dataoff;
data_limit = ib_ptr + datalen;
/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
* 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
@ -251,7 +257,7 @@ static int __init nf_conntrack_irc_init(void)
irc_exp_policy.max_expected = max_dcc_channels;
irc_exp_policy.timeout = dcc_timeout;
irc_buffer = kmalloc(65536, GFP_KERNEL);
irc_buffer = kmalloc(MAX_SEARCH_SIZE + 1, GFP_KERNEL);
if (!irc_buffer)
return -ENOMEM;

View File

@ -34,10 +34,6 @@ MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>");
MODULE_DESCRIPTION("SANE connection tracking helper");
MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
static char *sane_buffer;
static DEFINE_SPINLOCK(nf_sane_lock);
#define MAX_PORTS 8
static u_int16_t ports[MAX_PORTS];
static unsigned int ports_c;
@ -67,14 +63,16 @@ static int help(struct sk_buff *skb,
unsigned int dataoff, datalen;
const struct tcphdr *th;
struct tcphdr _tcph;
void *sb_ptr;
int ret = NF_ACCEPT;
int dir = CTINFO2DIR(ctinfo);
struct nf_ct_sane_master *ct_sane_info = nfct_help_data(ct);
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple *tuple;
struct sane_request *req;
struct sane_reply_net_start *reply;
union {
struct sane_request req;
struct sane_reply_net_start repl;
} buf;
/* Until there's been traffic both ways, don't look in packets. */
if (ctinfo != IP_CT_ESTABLISHED &&
@ -92,59 +90,62 @@ static int help(struct sk_buff *skb,
return NF_ACCEPT;
datalen = skb->len - dataoff;
spin_lock_bh(&nf_sane_lock);
sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer);
if (!sb_ptr) {
spin_unlock_bh(&nf_sane_lock);
return NF_ACCEPT;
}
if (dir == IP_CT_DIR_ORIGINAL) {
if (datalen != sizeof(struct sane_request))
goto out;
const struct sane_request *req;
if (datalen != sizeof(struct sane_request))
return NF_ACCEPT;
req = skb_header_pointer(skb, dataoff, datalen, &buf.req);
if (!req)
return NF_ACCEPT;
req = sb_ptr;
if (req->RPC_code != htonl(SANE_NET_START)) {
/* Not an interesting command */
ct_sane_info->state = SANE_STATE_NORMAL;
goto out;
WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL);
return NF_ACCEPT;
}
/* We're interested in the next reply */
ct_sane_info->state = SANE_STATE_START_REQUESTED;
goto out;
WRITE_ONCE(ct_sane_info->state, SANE_STATE_START_REQUESTED);
return NF_ACCEPT;
}
/* IP_CT_DIR_REPLY */
/* Is it a reply to an uninteresting command? */
if (ct_sane_info->state != SANE_STATE_START_REQUESTED)
goto out;
if (READ_ONCE(ct_sane_info->state) != SANE_STATE_START_REQUESTED)
return NF_ACCEPT;
/* It's a reply to SANE_NET_START. */
ct_sane_info->state = SANE_STATE_NORMAL;
WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL);
if (datalen < sizeof(struct sane_reply_net_start)) {
pr_debug("NET_START reply too short\n");
goto out;
return NF_ACCEPT;
}
reply = sb_ptr;
datalen = sizeof(struct sane_reply_net_start);
reply = skb_header_pointer(skb, dataoff, datalen, &buf.repl);
if (!reply)
return NF_ACCEPT;
if (reply->status != htonl(SANE_STATUS_SUCCESS)) {
/* saned refused the command */
pr_debug("unsuccessful SANE_STATUS = %u\n",
ntohl(reply->status));
goto out;
return NF_ACCEPT;
}
/* Invalid saned reply? Ignore it. */
if (reply->zero != 0)
goto out;
return NF_ACCEPT;
exp = nf_ct_expect_alloc(ct);
if (exp == NULL) {
nf_ct_helper_log(skb, ct, "cannot alloc expectation");
ret = NF_DROP;
goto out;
return NF_DROP;
}
tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
@ -162,9 +163,6 @@ static int help(struct sk_buff *skb,
}
nf_ct_expect_put(exp);
out:
spin_unlock_bh(&nf_sane_lock);
return ret;
}
@ -178,7 +176,6 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = {
static void __exit nf_conntrack_sane_fini(void)
{
nf_conntrack_helpers_unregister(sane, ports_c * 2);
kfree(sane_buffer);
}
static int __init nf_conntrack_sane_init(void)
@ -187,10 +184,6 @@ static int __init nf_conntrack_sane_init(void)
NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sane_master));
sane_buffer = kmalloc(65536, GFP_KERNEL);
if (!sane_buffer)
return -ENOMEM;
if (ports_c == 0)
ports[ports_c++] = SANE_PORT;
@ -210,7 +203,6 @@ static int __init nf_conntrack_sane_init(void)
ret = nf_conntrack_helpers_register(sane, ports_c * 2);
if (ret < 0) {
pr_err("failed to register helpers\n");
kfree(sane_buffer);
return ret;
}

View File

@ -889,7 +889,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@ -1705,7 +1705,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@ -3149,7 +3149,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@ -3907,7 +3907,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
list_for_each_entry(i, &ctx->table->sets, list) {
int tmp;
if (!nft_is_active_next(ctx->net, set))
if (!nft_is_active_next(ctx->net, i))
continue;
if (!sscanf(i->name, name, &tmp))
continue;
@ -4133,7 +4133,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (ctx->family != NFPROTO_UNSPEC &&
@ -4451,6 +4451,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
if (err < 0)
return err;
if (desc.field_count > 1 && !(flags & NFT_SET_CONCAT))
return -EINVAL;
} else if (flags & NFT_SET_CONCAT) {
return -EINVAL;
}
if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS])
@ -5061,6 +5066,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
dump_ctx->ctx.family != table->family)
@ -5196,6 +5203,9 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
if (!(set->flags & NFT_SET_INTERVAL) &&
*flags & NFT_SET_ELEM_INTERVAL_END)
return -EINVAL;
if ((*flags & (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) ==
(NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL))
return -EINVAL;
return 0;
}
@ -5599,7 +5609,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
err = nft_expr_clone(expr, set->exprs[i]);
if (err < 0) {
nft_expr_destroy(ctx, expr);
kfree(expr);
goto err_expr;
}
expr_array[i] = expr;
@ -5842,6 +5852,24 @@ static void nft_setelem_remove(const struct net *net,
set->ops->remove(net, set, elem);
}
static bool nft_setelem_valid_key_end(const struct nft_set *set,
struct nlattr **nla, u32 flags)
{
if ((set->flags & (NFT_SET_CONCAT | NFT_SET_INTERVAL)) ==
(NFT_SET_CONCAT | NFT_SET_INTERVAL)) {
if (flags & NFT_SET_ELEM_INTERVAL_END)
return false;
if (!nla[NFTA_SET_ELEM_KEY_END] &&
!(flags & NFT_SET_ELEM_CATCHALL))
return false;
} else {
if (nla[NFTA_SET_ELEM_KEY_END])
return false;
}
return true;
}
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr, u32 nlmsg_flags)
{
@ -5892,6 +5920,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return -EINVAL;
}
if (set->flags & NFT_SET_OBJECT) {
if (!nla[NFTA_SET_ELEM_OBJREF] &&
!(flags & NFT_SET_ELEM_INTERVAL_END))
return -EINVAL;
} else {
if (nla[NFTA_SET_ELEM_OBJREF])
return -EINVAL;
}
if (!nft_setelem_valid_key_end(set, nla, flags))
return -EINVAL;
if ((flags & NFT_SET_ELEM_INTERVAL_END) &&
(nla[NFTA_SET_ELEM_DATA] ||
nla[NFTA_SET_ELEM_OBJREF] ||
@ -5899,6 +5939,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
nla[NFTA_SET_ELEM_EXPIRATION] ||
nla[NFTA_SET_ELEM_USERDATA] ||
nla[NFTA_SET_ELEM_EXPR] ||
nla[NFTA_SET_ELEM_KEY_END] ||
nla[NFTA_SET_ELEM_EXPRESSIONS]))
return -EINVAL;
@ -6029,10 +6070,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
if (!(set->flags & NFT_SET_OBJECT)) {
err = -EINVAL;
goto err_parse_key_end;
}
obj = nft_obj_lookup(ctx->net, ctx->table,
nla[NFTA_SET_ELEM_OBJREF],
set->objtype, genmask);
@ -6325,6 +6362,9 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
return -EINVAL;
if (!nft_setelem_valid_key_end(set, nla, flags))
return -EINVAL;
nft_set_ext_prepare(&tmpl);
if (flags != 0) {
@ -6941,7 +6981,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@ -7873,7 +7913,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@ -8806,6 +8846,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
unsigned int base_seq;
LIST_HEAD(adl);
int err;
@ -8855,9 +8896,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
* Bump generation counter, invalidate any dump in progress.
* Cannot fail after this point.
*/
while (++nft_net->base_seq == 0)
base_seq = READ_ONCE(nft_net->base_seq);
while (++base_seq == 0)
;
WRITE_ONCE(nft_net->base_seq, base_seq);
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
@ -9419,13 +9463,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
break;
}
}
cond_resched();
}
list_for_each_entry(set, &ctx->table->sets, list) {
cond_resched();
if (!nft_is_active_next(ctx->net, set))
continue;
if (!(set->flags & NFT_SET_MAP) ||

View File

@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket");
static unsigned int nfnetlink_pernet_id __read_mostly;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static DEFINE_SPINLOCK(nfnl_grp_active_lock);
#endif
struct nfnl_net {
struct sock *nfnl;
};
@ -654,6 +658,44 @@ static void nfnetlink_rcv(struct sk_buff *skb)
netlink_rcv_skb(skb, nfnetlink_rcv_msg);
}
static void nfnetlink_bind_event(struct net *net, unsigned int group)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
int type, group_bit;
u8 v;
/* All NFNLGRP_CONNTRACK_* group bits fit into u8.
* The other groups are not relevant and can be ignored.
*/
if (group >= 8)
return;
type = nfnl_group2type[group];
switch (type) {
case NFNL_SUBSYS_CTNETLINK:
break;
case NFNL_SUBSYS_CTNETLINK_EXP:
break;
default:
return;
}
group_bit = (1 << group);
spin_lock(&nfnl_grp_active_lock);
v = READ_ONCE(net->ct.ctnetlink_has_listener);
if ((v & group_bit) == 0) {
v |= group_bit;
/* read concurrently without nfnl_grp_active_lock held. */
WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
}
spin_unlock(&nfnl_grp_active_lock);
#endif
}
static int nfnetlink_bind(struct net *net, int group)
{
const struct nfnetlink_subsystem *ss;
@ -670,28 +712,45 @@ static int nfnetlink_bind(struct net *net, int group)
if (!ss)
request_module_nowait("nfnetlink-subsys-%d", type);
#ifdef CONFIG_NF_CONNTRACK_EVENTS
if (type == NFNL_SUBSYS_CTNETLINK) {
nfnl_lock(NFNL_SUBSYS_CTNETLINK);
WRITE_ONCE(net->ct.ctnetlink_has_listener, true);
nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
}
#endif
nfnetlink_bind_event(net, group);
return 0;
}
static void nfnetlink_unbind(struct net *net, int group)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
int type, group_bit;
if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX)
return;
if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) {
nfnl_lock(NFNL_SUBSYS_CTNETLINK);
if (!nfnetlink_has_listeners(net, group))
WRITE_ONCE(net->ct.ctnetlink_has_listener, false);
nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
type = nfnl_group2type[group];
switch (type) {
case NFNL_SUBSYS_CTNETLINK:
break;
case NFNL_SUBSYS_CTNETLINK_EXP:
break;
default:
return;
}
/* ctnetlink_has_listener is u8 */
if (group >= 8)
return;
group_bit = (1 << group);
spin_lock(&nfnl_grp_active_lock);
if (!nfnetlink_has_listeners(net, group)) {
u8 v = READ_ONCE(net->ct.ctnetlink_has_listener);
v &= ~group_bit;
/* read concurrently without nfnl_grp_active_lock held. */
WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
}
spin_unlock(&nfnl_grp_active_lock);
#endif
}

View File

@ -14,13 +14,17 @@
# nft_flowtable.sh -o8000 -l1500 -r2000
#
sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
nsr1="nsr1-$sfx"
nsr2="nsr2-$sfx"
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
ret=0
ns1in=""
ns2in=""
nsin=""
ns1out=""
ns2out=""
@ -36,21 +40,19 @@ checktool (){
checktool "nft --version" "run test without nft tool"
checktool "ip -Version" "run test without ip tool"
checktool "which nc" "run test without nc (netcat)"
checktool "ip netns add nsr1" "create net namespace"
checktool "ip netns add $nsr1" "create net namespace $nsr1"
ip netns add ns1
ip netns add ns2
ip netns add nsr2
ip netns add $ns1
ip netns add $ns2
ip netns add $nsr2
cleanup() {
for i in 1 2; do
ip netns del ns$i
ip netns del nsr$i
done
ip netns del $ns1
ip netns del $ns2
ip netns del $nsr1
ip netns del $nsr2
rm -f "$ns1in" "$ns1out"
rm -f "$ns2in" "$ns2out"
rm -f "$nsin" "$ns1out" "$ns2out"
[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
}
@ -59,22 +61,21 @@ trap cleanup EXIT
sysctl -q net.netfilter.nf_log_all_netns=1
ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1
ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2
ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2
for dev in lo veth0 veth1; do
for i in 1 2; do
ip -net nsr$i link set $dev up
done
ip -net $nsr1 link set $dev up
ip -net $nsr2 link set $dev up
done
ip -net nsr1 addr add 10.0.1.1/24 dev veth0
ip -net nsr1 addr add dead:1::1/64 dev veth0
ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
ip -net $nsr1 addr add dead:1::1/64 dev veth0
ip -net nsr2 addr add 10.0.2.1/24 dev veth1
ip -net nsr2 addr add dead:2::1/64 dev veth1
ip -net $nsr2 addr add 10.0.2.1/24 dev veth1
ip -net $nsr2 addr add dead:2::1/64 dev veth1
# set different MTUs so we need to push packets coming from ns1 (large MTU)
# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
@ -106,85 +107,76 @@ do
esac
done
if ! ip -net nsr1 link set veth0 mtu $omtu; then
if ! ip -net $nsr1 link set veth0 mtu $omtu; then
exit 1
fi
ip -net ns1 link set eth0 mtu $omtu
ip -net $ns1 link set eth0 mtu $omtu
if ! ip -net nsr2 link set veth1 mtu $rmtu; then
if ! ip -net $nsr2 link set veth1 mtu $rmtu; then
exit 1
fi
ip -net ns2 link set eth0 mtu $rmtu
ip -net $ns2 link set eth0 mtu $rmtu
# transfer-net between nsr1 and nsr2.
# these addresses are not used for connections.
ip -net nsr1 addr add 192.168.10.1/24 dev veth1
ip -net nsr1 addr add fee1:2::1/64 dev veth1
ip -net $nsr1 addr add 192.168.10.1/24 dev veth1
ip -net $nsr1 addr add fee1:2::1/64 dev veth1
ip -net nsr2 addr add 192.168.10.2/24 dev veth0
ip -net nsr2 addr add fee1:2::2/64 dev veth0
ip -net $nsr2 addr add 192.168.10.2/24 dev veth0
ip -net $nsr2 addr add fee1:2::2/64 dev veth0
for i in 1 2; do
ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
for i in 0 1; do
ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
done
ip -net ns$i link set lo up
ip -net ns$i link set eth0 up
ip -net ns$i addr add 10.0.$i.99/24 dev eth0
ip -net ns$i route add default via 10.0.$i.1
ip -net ns$i addr add dead:$i::99/64 dev eth0
ip -net ns$i route add default via dead:$i::1
if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
for ns in $ns1 $ns2;do
ip -net $ns link set lo up
ip -net $ns link set eth0 up
if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
echo "ERROR: Check Originator/Responder values (problem during address addition)"
exit 1
fi
# don't set ip DF bit for first two tests
ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
done
ip -net nsr1 route add default via 192.168.10.2
ip -net nsr2 route add default via 192.168.10.1
ip -net $ns1 addr add 10.0.1.99/24 dev eth0
ip -net $ns2 addr add 10.0.2.99/24 dev eth0
ip -net $ns1 route add default via 10.0.1.1
ip -net $ns2 route add default via 10.0.2.1
ip -net $ns1 addr add dead:1::99/64 dev eth0
ip -net $ns2 addr add dead:2::99/64 dev eth0
ip -net $ns1 route add default via dead:1::1
ip -net $ns2 route add default via dead:2::1
ip netns exec nsr1 nft -f - <<EOF
ip -net $nsr1 route add default via 192.168.10.2
ip -net $nsr2 route add default via 192.168.10.1
ip netns exec $nsr1 nft -f - <<EOF
table inet filter {
flowtable f1 {
hook ingress priority 0
devices = { veth0, veth1 }
}
counter routed_orig { }
counter routed_repl { }
chain forward {
type filter hook forward priority 0; policy drop;
# flow offloaded? Tag ct with mark 1, so we can detect when it fails.
meta oif "veth1" tcp dport 12345 flow offload @f1 counter
meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept
# use packet size to trigger 'should be offloaded by now'.
# otherwise, if 'flow offload' expression never offloads, the
# test will pass.
tcp dport 12345 meta length gt 200 ct mark set 1 counter
# this turns off flow offloading internally, so expect packets again
tcp flags fin,rst ct mark set 0 accept
# this allows large packets from responder, we need this as long
# as PMTUd is off.
# This rule is deleted for the last test, when we expect PMTUd
# to kick in and ensure all packets meet mtu requirements.
meta length gt $lmtu accept comment something-to-grep-for
# next line blocks connection w.o. working offload.
# we only do this for reverse dir, because we expect packets to
# enter slow path due to MTU mismatch of veth0 and veth1.
tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
# count packets supposedly offloaded as per direction.
ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept
ct state established,related accept
# for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
meta length lt 200 oif "veth1" tcp dport 12345 counter accept
meta nfproto ipv4 meta l4proto icmp accept
meta nfproto ipv6 meta l4proto icmpv6 accept
}
@ -197,30 +189,30 @@ if [ $? -ne 0 ]; then
fi
# test basic connectivity
if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
echo "ERROR: ns1 cannot reach ns2" 1>&2
if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
echo "ERROR: $ns1 cannot reach ns2" 1>&2
exit 1
fi
if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
echo "ERROR: ns2 cannot reach ns1" 1>&2
if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
echo "ERROR: $ns2 cannot reach $ns1" 1>&2
exit 1
fi
if [ $ret -eq 0 ];then
echo "PASS: netns routing/connectivity: ns1 can reach ns2"
echo "PASS: netns routing/connectivity: $ns1 can reach $ns2"
fi
ns1in=$(mktemp)
nsin=$(mktemp)
ns1out=$(mktemp)
ns2in=$(mktemp)
ns2out=$(mktemp)
make_file()
{
name=$1
SIZE=$((RANDOM % (1024 * 8)))
SIZE=$((RANDOM % (1024 * 128)))
SIZE=$((SIZE + (1024 * 8)))
TSIZE=$((SIZE * 1024))
dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
@ -231,6 +223,38 @@ make_file()
dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
}
check_counters()
{
local what=$1
local ok=1
local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets)
local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets)
local orig_cnt=${orig#*bytes}
local repl_cnt=${repl#*bytes}
local fs=$(du -sb $nsin)
local max_orig=${fs%%/*}
local max_repl=$((max_orig/4))
if [ $orig_cnt -gt $max_orig ];then
echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2
ret=1
ok=0
fi
if [ $repl_cnt -gt $max_repl ];then
echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2
ret=1
ok=0
fi
if [ $ok -eq 1 ]; then
echo "PASS: $what"
fi
}
check_transfer()
{
in=$1
@ -255,11 +279,11 @@ test_tcp_forwarding_ip()
local dstport=$4
local lret=0
ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" &
lpid=$!
sleep 1
ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" &
cpid=$!
sleep 3
@ -274,11 +298,11 @@ test_tcp_forwarding_ip()
wait
if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then
if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
lret=1
fi
if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then
if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
lret=1
fi
@ -295,41 +319,59 @@ test_tcp_forwarding()
test_tcp_forwarding_nat()
{
local lret
local pmtu
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
lret=$?
pmtu=$3
what=$4
if [ $lret -eq 0 ] ; then
if [ $pmtu -eq 1 ] ;then
check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
else
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
fi
test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
lret=$?
if [ $pmtu -eq 1 ] ;then
check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
elif [ $lret -eq 0 ] ; then
echo "PASS: flow offload for ns1/ns2 with dnat $what"
fi
fi
return $lret
}
make_file "$ns1in"
make_file "$ns2in"
make_file "$nsin"
# First test:
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
if test_tcp_forwarding ns1 ns2; then
# Due to MTU mismatch in both directions, all packets (except small packets like pure
# acks) have to be handled by normal forwarding path. Therefore, packet counters
# are not checked.
if test_tcp_forwarding $ns1 $ns2; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
ip netns exec nsr1 nft list ruleset
ip netns exec $nsr1 nft list ruleset
ret=1
fi
# delete default route, i.e. ns2 won't be able to reach ns1 and
# will depend on ns1 being masqueraded in nsr1.
# expect ns1 has nsr1 address.
ip -net ns2 route del default via 10.0.2.1
ip -net ns2 route del default via dead:2::1
ip -net ns2 route add 192.168.10.1 via 10.0.2.1
ip -net $ns2 route del default via 10.0.2.1
ip -net $ns2 route del default via dead:2::1
ip -net $ns2 route add 192.168.10.1 via 10.0.2.1
# Second test:
# Same, but with NAT enabled.
ip netns exec nsr1 nft -f - <<EOF
# Same, but with NAT enabled. Same as in first test: we expect normal forward path
# to handle most packets.
ip netns exec $nsr1 nft -f - <<EOF
table ip nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
@ -343,47 +385,45 @@ table ip nat {
}
EOF
if test_tcp_forwarding_nat ns1 ns2; then
echo "PASS: flow offloaded for ns1/ns2 with NAT"
else
if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
ip netns exec nsr1 nft list ruleset
ip netns exec $nsr1 nft list ruleset
ret=1
fi
# Third test:
# Same as second test, but with PMTU discovery enabled.
handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
# Same as second test, but with PMTU discovery enabled. This
# means that we expect the fastpath to handle packets as soon
# as the endpoints adjust the packet size.
ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then
echo "FAIL: Could not delete large-packet accept rule"
exit 1
fi
# reset counters.
# With pmtu in-place we'll also check that nft counters
# are lower than file size and packets were forwarded via flowtable layer.
# For earlier tests (large mtus), packets cannot be handled via flowtable
# (except pure acks and other small packets).
ip netns exec $nsr1 nft reset counters table inet filter >/dev/null
ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
if test_tcp_forwarding_nat ns1 ns2; then
echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
else
if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
ip netns exec nsr1 nft list ruleset
ip netns exec $nsr1 nft list ruleset
fi
# Another test:
# Add bridge interface br0 to Router1, with NAT enabled.
ip -net nsr1 link add name br0 type bridge
ip -net nsr1 addr flush dev veth0
ip -net nsr1 link set up dev veth0
ip -net nsr1 link set veth0 master br0
ip -net nsr1 addr add 10.0.1.1/24 dev br0
ip -net nsr1 addr add dead:1::1/64 dev br0
ip -net nsr1 link set up dev br0
ip -net $nsr1 link add name br0 type bridge
ip -net $nsr1 addr flush dev veth0
ip -net $nsr1 link set up dev veth0
ip -net $nsr1 link set veth0 master br0
ip -net $nsr1 addr add 10.0.1.1/24 dev br0
ip -net $nsr1 addr add dead:1::1/64 dev br0
ip -net $nsr1 link set up dev br0
ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
# br0 with NAT enabled.
ip netns exec nsr1 nft -f - <<EOF
ip netns exec $nsr1 nft -f - <<EOF
flush table ip nat
table ip nat {
chain prerouting {
@ -398,59 +438,56 @@ table ip nat {
}
EOF
if test_tcp_forwarding_nat ns1 ns2; then
echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
else
if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then
echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
ip netns exec nsr1 nft list ruleset
ip netns exec $nsr1 nft list ruleset
ret=1
fi
# Another test:
# Add bridge interface br0 to Router1, with NAT and VLAN.
ip -net nsr1 link set veth0 nomaster
ip -net nsr1 link set down dev veth0
ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
ip -net nsr1 link set up dev veth0
ip -net nsr1 link set up dev veth0.10
ip -net nsr1 link set veth0.10 master br0
ip -net $nsr1 link set veth0 nomaster
ip -net $nsr1 link set down dev veth0
ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10
ip -net $nsr1 link set up dev veth0
ip -net $nsr1 link set up dev veth0.10
ip -net $nsr1 link set veth0.10 master br0
ip -net ns1 addr flush dev eth0
ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
ip -net ns1 link set eth0 up
ip -net ns1 link set eth0.10 up
ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
ip -net ns1 route add default via 10.0.1.1
ip -net ns1 addr add dead:1::99/64 dev eth0.10
ip -net $ns1 addr flush dev eth0
ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10
ip -net $ns1 link set eth0 up
ip -net $ns1 link set eth0.10 up
ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10
ip -net $ns1 route add default via 10.0.1.1
ip -net $ns1 addr add dead:1::99/64 dev eth0.10
if test_tcp_forwarding_nat ns1 ns2; then
echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
else
if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then
echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
ip netns exec nsr1 nft list ruleset
ip netns exec $nsr1 nft list ruleset
ret=1
fi
# restore test topology (remove bridge and VLAN)
ip -net nsr1 link set veth0 nomaster
ip -net nsr1 link set veth0 down
ip -net nsr1 link set veth0.10 down
ip -net nsr1 link delete veth0.10 type vlan
ip -net nsr1 link delete br0 type bridge
ip -net ns1 addr flush dev eth0.10
ip -net ns1 link set eth0.10 down
ip -net ns1 link set eth0 down
ip -net ns1 link delete eth0.10 type vlan
ip -net $nsr1 link set veth0 nomaster
ip -net $nsr1 link set veth0 down
ip -net $nsr1 link set veth0.10 down
ip -net $nsr1 link delete veth0.10 type vlan
ip -net $nsr1 link delete br0 type bridge
ip -net $ns1 addr flush dev eth0.10
ip -net $ns1 link set eth0.10 down
ip -net $ns1 link set eth0 down
ip -net $ns1 link delete eth0.10 type vlan
# restore address in ns1 and nsr1
ip -net ns1 link set eth0 up
ip -net ns1 addr add 10.0.1.99/24 dev eth0
ip -net ns1 route add default via 10.0.1.1
ip -net ns1 addr add dead:1::99/64 dev eth0
ip -net ns1 route add default via dead:1::1
ip -net nsr1 addr add 10.0.1.1/24 dev veth0
ip -net nsr1 addr add dead:1::1/64 dev veth0
ip -net nsr1 link set up dev veth0
ip -net $ns1 link set eth0 up
ip -net $ns1 addr add 10.0.1.99/24 dev eth0
ip -net $ns1 route add default via 10.0.1.1
ip -net $ns1 addr add dead:1::99/64 dev eth0
ip -net $ns1 route add default via dead:1::1
ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
ip -net $nsr1 addr add dead:1::1/64 dev veth0
ip -net $nsr1 link set up dev veth0
KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
@ -480,23 +517,23 @@ do_esp() {
}
do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
ip netns exec nsr1 nft delete table ip nat
ip netns exec $nsr1 nft delete table ip nat
# restore default routes
ip -net ns2 route del 192.168.10.1 via 10.0.2.1
ip -net ns2 route add default via 10.0.2.1
ip -net ns2 route add default via dead:2::1
ip -net $ns2 route del 192.168.10.1 via 10.0.2.1
ip -net $ns2 route add default via 10.0.2.1
ip -net $ns2 route add default via dead:2::1
if test_tcp_forwarding ns1 ns2; then
echo "PASS: ipsec tunnel mode for ns1/ns2"
if test_tcp_forwarding $ns1 $ns2; then
check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
ip netns exec nsr1 nft list ruleset 1>&2
ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
ip netns exec $nsr1 nft list ruleset 1>&2
ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2
fi
exit $ret