linux-next/net/sched/act_ipt.c
Florian Westphal 93d75d475c net/sched: act_ipt: zero skb->cb before calling target
xtables relies on skb being owned by ip stack, i.e. with ipv4
check in place skb->cb is supposed to be IPCB.

I don't see an immediate problem (REJECT target cannot be used anymore
now that PRE/POSTROUTING hook validation has been fixed), but better be
safe than sorry.

A much better patch would be to either mark act_ipt as
"depends on BROKEN" or remove it altogether. I plan to do this
for -next in the near future.

This tc extension is broken in the sense that tc lacks an
equivalent of NF_STOLEN verdict.

With NF_STOLEN, target function takes complete ownership of skb, caller
cannot dereference it anymore.

ACT_STOLEN cannot be used for this: it has a different meaning, caller
is allowed to dereference the skb.

At this time NF_STOLEN won't be returned by any targets as far as I can
see, but this may change in the future.

It might be possible to work around this via list of allowed
target extensions known to only return DROP or ACCEPT verdicts, but this
is error prone/fragile.

Existing selftest only validates xt_LOG and act_ipt is restricted
to ipv4 so I don't think this action is used widely.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2023-06-29 12:10:37 +02:00

465 lines
10 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/sched/act_ipt.c iptables target interface
*
*TODO: Add other tables. For now we only support the ipv4 table targets
*
* Copyright: Jamal Hadi Salim (2002-13)
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_ipt.h>
#include <net/tc_act/tc_ipt.h>
#include <net/tc_wrapper.h>
#include <net/ip.h>
#include <linux/netfilter_ipv4/ip_tables.h>
static struct tc_action_ops act_ipt_ops;
static struct tc_action_ops act_xt_ops;
static int ipt_init_target(struct net *net, struct xt_entry_target *t,
char *table, unsigned int hook)
{
struct xt_tgchk_param par;
struct xt_target *target;
struct ipt_entry e = {};
int ret = 0;
target = xt_request_find_target(AF_INET, t->u.user.name,
t->u.user.revision);
if (IS_ERR(target))
return PTR_ERR(target);
t->u.kernel.target = target;
memset(&par, 0, sizeof(par));
par.net = net;
par.table = table;
par.entryinfo = &e;
par.target = target;
par.targinfo = t->data;
par.hook_mask = 1 << hook;
par.family = NFPROTO_IPV4;
ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
if (ret < 0) {
module_put(t->u.kernel.target->me);
return ret;
}
return 0;
}
static void ipt_destroy_target(struct xt_entry_target *t, struct net *net)
{
struct xt_tgdtor_param par = {
.target = t->u.kernel.target,
.targinfo = t->data,
.family = NFPROTO_IPV4,
.net = net,
};
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
}
static void tcf_ipt_release(struct tc_action *a)
{
struct tcf_ipt *ipt = to_ipt(a);
if (ipt->tcfi_t) {
ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net);
kfree(ipt->tcfi_t);
}
kfree(ipt->tcfi_tname);
}
static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
[TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
[TCA_IPT_HOOK] = NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING,
NF_INET_NUMHOOKS),
[TCA_IPT_INDEX] = { .type = NLA_U32 },
[TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
};
static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
const struct tc_action_ops *ops,
struct tcf_proto *tp, u32 flags)
{
struct tc_action_net *tn = net_generic(net, id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_IPT_MAX + 1];
struct tcf_ipt *ipt;
struct xt_entry_target *td, *t;
char *tname;
bool exists = false;
int ret = 0, err;
u32 hook = 0;
u32 index = 0;
if (nla == NULL)
return -EINVAL;
err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy,
NULL);
if (err < 0)
return err;
if (tb[TCA_IPT_INDEX] != NULL)
index = nla_get_u32(tb[TCA_IPT_INDEX]);
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
exists = err;
if (exists && bind)
return 0;
if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
if (exists)
tcf_idr_release(*a, bind);
else
tcf_idr_cleanup(tn, index);
return -EINVAL;
}
td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) {
if (exists)
tcf_idr_release(*a, bind);
else
tcf_idr_cleanup(tn, index);
return -EINVAL;
}
if (!exists) {
ret = tcf_idr_create(tn, index, est, a, ops, bind,
false, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
}
ret = ACT_P_CREATED;
} else {
if (bind)/* dont override defaults */
return 0;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
}
err = -EINVAL;
hook = nla_get_u32(tb[TCA_IPT_HOOK]);
switch (hook) {
case NF_INET_PRE_ROUTING:
break;
case NF_INET_POST_ROUTING:
break;
default:
goto err1;
}
if (tb[TCA_IPT_TABLE]) {
/* mangle only for now */
if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle"))
goto err1;
}
tname = kstrdup("mangle", GFP_KERNEL);
if (unlikely(!tname))
goto err1;
t = kmemdup(td, td->u.target_size, GFP_KERNEL);
if (unlikely(!t))
goto err2;
err = ipt_init_target(net, t, tname, hook);
if (err < 0)
goto err3;
ipt = to_ipt(*a);
spin_lock_bh(&ipt->tcf_lock);
if (ret != ACT_P_CREATED) {
ipt_destroy_target(ipt->tcfi_t, net);
kfree(ipt->tcfi_tname);
kfree(ipt->tcfi_t);
}
ipt->tcfi_tname = tname;
ipt->tcfi_t = t;
ipt->tcfi_hook = hook;
spin_unlock_bh(&ipt->tcf_lock);
return ret;
err3:
kfree(t);
err2:
kfree(tname);
err1:
tcf_idr_release(*a, bind);
return err;
}
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est,
a, &act_ipt_ops, tp, flags);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est,
a, &act_xt_ops, tp, flags);
}
static bool tcf_ipt_act_check(struct sk_buff *skb)
{
const struct iphdr *iph;
unsigned int nhoff, len;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
return false;
nhoff = skb_network_offset(skb);
iph = ip_hdr(skb);
if (iph->ihl < 5 || iph->version != 4)
return false;
len = skb_ip_totlen(skb);
if (skb->len < nhoff + len || len < (iph->ihl * 4u))
return false;
return pskb_may_pull(skb, iph->ihl * 4u);
}
TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb,
const struct tc_action *a,
struct tcf_result *res)
{
char saved_cb[sizeof_field(struct sk_buff, cb)];
int ret = 0, result = 0;
struct tcf_ipt *ipt = to_ipt(a);
struct xt_action_param par;
struct nf_hook_state state = {
.net = dev_net(skb->dev),
.in = skb->dev,
.hook = ipt->tcfi_hook,
.pf = NFPROTO_IPV4,
};
if (skb_protocol(skb, false) != htons(ETH_P_IP))
return TC_ACT_UNSPEC;
if (skb_unclone(skb, GFP_ATOMIC))
return TC_ACT_UNSPEC;
if (!tcf_ipt_act_check(skb))
return TC_ACT_UNSPEC;
if (state.hook == NF_INET_POST_ROUTING) {
if (!skb_dst(skb))
return TC_ACT_UNSPEC;
state.out = skb->dev;
}
memcpy(saved_cb, skb->cb, sizeof(saved_cb));
spin_lock(&ipt->tcf_lock);
tcf_lastuse_update(&ipt->tcf_tm);
bstats_update(&ipt->tcf_bstats, skb);
/* yes, we have to worry about both in and out dev
* worry later - danger - this API seems to have changed
* from earlier kernels
*/
par.state = &state;
par.target = ipt->tcfi_t->u.kernel.target;
par.targinfo = ipt->tcfi_t->data;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
ret = par.target->target(skb, &par);
switch (ret) {
case NF_ACCEPT:
result = TC_ACT_OK;
break;
case NF_DROP:
result = TC_ACT_SHOT;
ipt->tcf_qstats.drops++;
break;
case XT_CONTINUE:
result = TC_ACT_PIPE;
break;
default:
net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n",
ret);
result = TC_ACT_OK;
break;
}
spin_unlock(&ipt->tcf_lock);
memcpy(skb->cb, saved_cb, sizeof(skb->cb));
return result;
}
static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_ipt *ipt = to_ipt(a);
struct xt_entry_target *t;
struct tcf_t tm;
struct tc_cnt c;
/* for simple targets kernel size == user size
* user name = target name
* for foolproof you need to not assume this
*/
spin_lock_bh(&ipt->tcf_lock);
t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
if (unlikely(!t))
goto nla_put_failure;
c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind;
c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref;
strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) ||
nla_put_u32(skb, TCA_IPT_INDEX, ipt->tcf_index) ||
nla_put_u32(skb, TCA_IPT_HOOK, ipt->tcfi_hook) ||
nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) ||
nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname))
goto nla_put_failure;
tcf_tm_dump(&tm, &ipt->tcf_tm);
if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD))
goto nla_put_failure;
spin_unlock_bh(&ipt->tcf_lock);
kfree(t);
return skb->len;
nla_put_failure:
spin_unlock_bh(&ipt->tcf_lock);
nlmsg_trim(skb, b);
kfree(t);
return -1;
}
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
.id = TCA_ID_IPT,
.owner = THIS_MODULE,
.act = tcf_ipt_act,
.dump = tcf_ipt_dump,
.cleanup = tcf_ipt_release,
.init = tcf_ipt_init,
.size = sizeof(struct tcf_ipt),
};
static __net_init int ipt_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id);
return tc_action_net_init(net, tn, &act_ipt_ops);
}
static void __net_exit ipt_exit_net(struct list_head *net_list)
{
tc_action_net_exit(net_list, act_ipt_ops.net_id);
}
static struct pernet_operations ipt_net_ops = {
.init = ipt_init_net,
.exit_batch = ipt_exit_net,
.id = &act_ipt_ops.net_id,
.size = sizeof(struct tc_action_net),
};
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
.id = TCA_ID_XT,
.owner = THIS_MODULE,
.act = tcf_ipt_act,
.dump = tcf_ipt_dump,
.cleanup = tcf_ipt_release,
.init = tcf_xt_init,
.size = sizeof(struct tcf_ipt),
};
static __net_init int xt_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id);
return tc_action_net_init(net, tn, &act_xt_ops);
}
static void __net_exit xt_exit_net(struct list_head *net_list)
{
tc_action_net_exit(net_list, act_xt_ops.net_id);
}
static struct pernet_operations xt_net_ops = {
.init = xt_init_net,
.exit_batch = xt_exit_net,
.id = &act_xt_ops.net_id,
.size = sizeof(struct tc_action_net),
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
MODULE_DESCRIPTION("Iptables target actions");
MODULE_LICENSE("GPL");
MODULE_ALIAS("act_xt");
static int __init ipt_init_module(void)
{
int ret1, ret2;
ret1 = tcf_register_action(&act_xt_ops, &xt_net_ops);
if (ret1 < 0)
pr_err("Failed to load xt action\n");
ret2 = tcf_register_action(&act_ipt_ops, &ipt_net_ops);
if (ret2 < 0)
pr_err("Failed to load ipt action\n");
if (ret1 < 0 && ret2 < 0) {
return ret1;
} else
return 0;
}
static void __exit ipt_cleanup_module(void)
{
tcf_unregister_action(&act_ipt_ops, &ipt_net_ops);
tcf_unregister_action(&act_xt_ops, &xt_net_ops);
}
module_init(ipt_init_module);
module_exit(ipt_cleanup_module);