2019-05-27 06:55:01 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2005-04-16 22:20:36 +00:00
|
|
|
/*
|
|
|
|
* net/sched/act_api.c Packet action API.
|
|
|
|
*
|
|
|
|
* Author: Jamal Hadi Salim
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/errno.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
|
|
|
#include <linux/slab.h>
|
2005-04-16 22:20:36 +00:00
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/kmod.h>
|
2008-01-24 04:33:13 +00:00
|
|
|
#include <linux/err.h>
|
2011-05-27 13:12:25 +00:00
|
|
|
#include <linux/module.h>
|
2007-11-30 13:21:31 +00:00
|
|
|
#include <net/net_namespace.h>
|
|
|
|
#include <net/sock.h>
|
2005-04-16 22:20:36 +00:00
|
|
|
#include <net/sch_generic.h>
|
2017-01-24 12:02:41 +00:00
|
|
|
#include <net/pkt_cls.h>
|
2021-12-17 18:16:22 +00:00
|
|
|
#include <net/tc_act/tc_pedit.h>
|
2005-04-16 22:20:36 +00:00
|
|
|
#include <net/act_api.h>
|
2007-03-26 06:06:12 +00:00
|
|
|
#include <net/netlink.h>
|
2021-12-17 18:16:22 +00:00
|
|
|
#include <net/flow_offload.h>
|
2022-12-06 13:55:12 +00:00
|
|
|
#include <net/tc_wrapper.h>
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2020-11-25 04:01:23 +00:00
|
|
|
#ifdef CONFIG_INET
|
|
|
|
DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count);
|
|
|
|
EXPORT_SYMBOL_GPL(tcf_frag_xmit_count);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb))
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_INET
|
|
|
|
if (static_branch_unlikely(&tcf_frag_xmit_count))
|
|
|
|
return sch_frag_xmit_hook(skb, xmit);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return xmit(skb);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit);
|
|
|
|
|
2017-05-17 09:08:03 +00:00
|
|
|
static void tcf_action_goto_chain_exec(const struct tc_action *a,
|
|
|
|
struct tcf_result *res)
|
|
|
|
{
|
2019-03-20 14:00:16 +00:00
|
|
|
const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain);
|
2017-05-17 09:08:03 +00:00
|
|
|
|
|
|
|
res->goto_tp = rcu_dereference_bh(chain->filter_chain);
|
|
|
|
}
|
|
|
|
|
2018-07-05 14:24:23 +00:00
|
|
|
static void tcf_free_cookie_rcu(struct rcu_head *p)
|
|
|
|
{
|
|
|
|
struct tc_cookie *cookie = container_of(p, struct tc_cookie, rcu);
|
|
|
|
|
|
|
|
kfree(cookie->data);
|
|
|
|
kfree(cookie);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
|
|
|
|
struct tc_cookie *new_cookie)
|
|
|
|
{
|
|
|
|
struct tc_cookie *old;
|
|
|
|
|
2024-06-04 11:16:03 +00:00
|
|
|
old = unrcu_pointer(xchg(old_cookie, RCU_INITIALIZER(new_cookie)));
|
2018-07-05 14:24:23 +00:00
|
|
|
if (old)
|
|
|
|
call_rcu(&old->rcu, tcf_free_cookie_rcu);
|
|
|
|
}
|
|
|
|
|
net/sched: prepare TC actions to properly validate the control action
- pass a pointer to struct tcf_proto in each actions's init() handler,
to allow validating the control action, checking whether the chain
exists and (eventually) refcounting it.
- remove code that validates the control action after a successful call
to the action's init() handler, and replace it with a test that forbids
addition of actions having 'goto_chain' and NULL goto_chain pointer at
the same time.
- add tcf_action_check_ctrlact(), that will validate the control action
and eventually allocate the action 'goto_chain' within the init()
handler.
- add tcf_action_set_ctrlact(), that will assign the control action and
swap the current 'goto_chain' pointer with the new given one.
This disallows 'goto_chain' on actions that don't initialize it properly
in their init() handler, i.e. calling tcf_action_check_ctrlact() after
successful IDR reservation and then calling tcf_action_set_ctrlact()
to assign 'goto_chain' and 'tcf_action' consistently.
By doing this, the kernel does not leak anymore refcounts when a valid
'goto chain' handle is replaced in TC actions, causing kmemleak splats
like the following one:
# tc chain add dev dd0 chain 42 ingress protocol ip flower \
> ip_proto tcp action drop
# tc chain add dev dd0 chain 43 ingress protocol ip flower \
> ip_proto udp action drop
# tc filter add dev dd0 ingress matchall \
> action gact goto chain 42 index 66
# tc filter replace dev dd0 ingress matchall \
> action gact goto chain 43 index 66
# echo scan >/sys/kernel/debug/kmemleak
<...>
unreferenced object 0xffff93c0ee09f000 (size 1024):
comm "tc", pid 2565, jiffies 4295339808 (age 65.426s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 08 00 06 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<000000009b63f92d>] tc_ctl_chain+0x3d2/0x4c0
[<00000000683a8d72>] rtnetlink_rcv_msg+0x263/0x2d0
[<00000000ddd88f8e>] netlink_rcv_skb+0x4a/0x110
[<000000006126a348>] netlink_unicast+0x1a0/0x250
[<00000000b3340877>] netlink_sendmsg+0x2c1/0x3c0
[<00000000a25a2171>] sock_sendmsg+0x36/0x40
[<00000000f19ee1ec>] ___sys_sendmsg+0x280/0x2f0
[<00000000d0422042>] __sys_sendmsg+0x5e/0xa0
[<000000007a6c61f9>] do_syscall_64+0x5b/0x180
[<00000000ccd07542>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[<0000000013eaa334>] 0xffffffffffffffff
Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain")
Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-03-20 13:59:59 +00:00
|
|
|
int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
|
|
|
|
struct tcf_chain **newchain,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL;
|
|
|
|
u32 chain_index;
|
|
|
|
|
|
|
|
if (!opcode)
|
|
|
|
ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0;
|
|
|
|
else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC)
|
|
|
|
ret = 0;
|
|
|
|
if (ret) {
|
|
|
|
NL_SET_ERR_MSG(extack, "invalid control action");
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) {
|
|
|
|
chain_index = action & TC_ACT_EXT_VAL_MASK;
|
|
|
|
if (!tp || !newchain) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
NL_SET_ERR_MSG(extack,
|
|
|
|
"can't goto NULL proto/chain");
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
*newchain = tcf_chain_get_by_act(tp->chain->block, chain_index);
|
|
|
|
if (!*newchain) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
NL_SET_ERR_MSG(extack,
|
|
|
|
"can't allocate goto_chain");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(tcf_action_check_ctrlact);
|
|
|
|
|
|
|
|
struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
|
2019-03-20 14:00:16 +00:00
|
|
|
struct tcf_chain *goto_chain)
|
net/sched: prepare TC actions to properly validate the control action
- pass a pointer to struct tcf_proto in each actions's init() handler,
to allow validating the control action, checking whether the chain
exists and (eventually) refcounting it.
- remove code that validates the control action after a successful call
to the action's init() handler, and replace it with a test that forbids
addition of actions having 'goto_chain' and NULL goto_chain pointer at
the same time.
- add tcf_action_check_ctrlact(), that will validate the control action
and eventually allocate the action 'goto_chain' within the init()
handler.
- add tcf_action_set_ctrlact(), that will assign the control action and
swap the current 'goto_chain' pointer with the new given one.
This disallows 'goto_chain' on actions that don't initialize it properly
in their init() handler, i.e. calling tcf_action_check_ctrlact() after
successful IDR reservation and then calling tcf_action_set_ctrlact()
to assign 'goto_chain' and 'tcf_action' consistently.
By doing this, the kernel does not leak anymore refcounts when a valid
'goto chain' handle is replaced in TC actions, causing kmemleak splats
like the following one:
# tc chain add dev dd0 chain 42 ingress protocol ip flower \
> ip_proto tcp action drop
# tc chain add dev dd0 chain 43 ingress protocol ip flower \
> ip_proto udp action drop
# tc filter add dev dd0 ingress matchall \
> action gact goto chain 42 index 66
# tc filter replace dev dd0 ingress matchall \
> action gact goto chain 43 index 66
# echo scan >/sys/kernel/debug/kmemleak
<...>
unreferenced object 0xffff93c0ee09f000 (size 1024):
comm "tc", pid 2565, jiffies 4295339808 (age 65.426s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 08 00 06 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<000000009b63f92d>] tc_ctl_chain+0x3d2/0x4c0
[<00000000683a8d72>] rtnetlink_rcv_msg+0x263/0x2d0
[<00000000ddd88f8e>] netlink_rcv_skb+0x4a/0x110
[<000000006126a348>] netlink_unicast+0x1a0/0x250
[<00000000b3340877>] netlink_sendmsg+0x2c1/0x3c0
[<00000000a25a2171>] sock_sendmsg+0x36/0x40
[<00000000f19ee1ec>] ___sys_sendmsg+0x280/0x2f0
[<00000000d0422042>] __sys_sendmsg+0x5e/0xa0
[<000000007a6c61f9>] do_syscall_64+0x5b/0x180
[<00000000ccd07542>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[<0000000013eaa334>] 0xffffffffffffffff
Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain")
Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-03-20 13:59:59 +00:00
|
|
|
{
|
|
|
|
a->tcfa_action = action;
|
2019-09-23 23:09:18 +00:00
|
|
|
goto_chain = rcu_replace_pointer(a->goto_chain, goto_chain, 1);
|
2019-03-20 14:00:16 +00:00
|
|
|
return goto_chain;
|
net/sched: prepare TC actions to properly validate the control action
- pass a pointer to struct tcf_proto in each actions's init() handler,
to allow validating the control action, checking whether the chain
exists and (eventually) refcounting it.
- remove code that validates the control action after a successful call
to the action's init() handler, and replace it with a test that forbids
addition of actions having 'goto_chain' and NULL goto_chain pointer at
the same time.
- add tcf_action_check_ctrlact(), that will validate the control action
and eventually allocate the action 'goto_chain' within the init()
handler.
- add tcf_action_set_ctrlact(), that will assign the control action and
swap the current 'goto_chain' pointer with the new given one.
This disallows 'goto_chain' on actions that don't initialize it properly
in their init() handler, i.e. calling tcf_action_check_ctrlact() after
successful IDR reservation and then calling tcf_action_set_ctrlact()
to assign 'goto_chain' and 'tcf_action' consistently.
By doing this, the kernel does not leak anymore refcounts when a valid
'goto chain' handle is replaced in TC actions, causing kmemleak splats
like the following one:
# tc chain add dev dd0 chain 42 ingress protocol ip flower \
> ip_proto tcp action drop
# tc chain add dev dd0 chain 43 ingress protocol ip flower \
> ip_proto udp action drop
# tc filter add dev dd0 ingress matchall \
> action gact goto chain 42 index 66
# tc filter replace dev dd0 ingress matchall \
> action gact goto chain 43 index 66
# echo scan >/sys/kernel/debug/kmemleak
<...>
unreferenced object 0xffff93c0ee09f000 (size 1024):
comm "tc", pid 2565, jiffies 4295339808 (age 65.426s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 08 00 06 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<000000009b63f92d>] tc_ctl_chain+0x3d2/0x4c0
[<00000000683a8d72>] rtnetlink_rcv_msg+0x263/0x2d0
[<00000000ddd88f8e>] netlink_rcv_skb+0x4a/0x110
[<000000006126a348>] netlink_unicast+0x1a0/0x250
[<00000000b3340877>] netlink_sendmsg+0x2c1/0x3c0
[<00000000a25a2171>] sock_sendmsg+0x36/0x40
[<00000000f19ee1ec>] ___sys_sendmsg+0x280/0x2f0
[<00000000d0422042>] __sys_sendmsg+0x5e/0xa0
[<000000007a6c61f9>] do_syscall_64+0x5b/0x180
[<00000000ccd07542>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[<0000000013eaa334>] 0xffffffffffffffff
Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain")
Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-03-20 13:59:59 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(tcf_action_set_ctrlact);
|
|
|
|
|
2017-09-11 23:33:30 +00:00
|
|
|
/* XXX: For standalone actions, we don't need a RCU grace period either, because
|
|
|
|
* actions are always connected to filters and filters are already destroyed in
|
|
|
|
* RCU callbacks, so after a RCU grace period actions are already disconnected
|
|
|
|
* from filters. Readers later can not find us.
|
|
|
|
*/
|
|
|
|
static void free_tcf(struct tc_action *p)
|
2015-07-06 12:18:04 +00:00
|
|
|
{
|
2019-03-20 14:00:16 +00:00
|
|
|
struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1);
|
net/sched: prepare TC actions to properly validate the control action
- pass a pointer to struct tcf_proto in each actions's init() handler,
to allow validating the control action, checking whether the chain
exists and (eventually) refcounting it.
- remove code that validates the control action after a successful call
to the action's init() handler, and replace it with a test that forbids
addition of actions having 'goto_chain' and NULL goto_chain pointer at
the same time.
- add tcf_action_check_ctrlact(), that will validate the control action
and eventually allocate the action 'goto_chain' within the init()
handler.
- add tcf_action_set_ctrlact(), that will assign the control action and
swap the current 'goto_chain' pointer with the new given one.
This disallows 'goto_chain' on actions that don't initialize it properly
in their init() handler, i.e. calling tcf_action_check_ctrlact() after
successful IDR reservation and then calling tcf_action_set_ctrlact()
to assign 'goto_chain' and 'tcf_action' consistently.
By doing this, the kernel does not leak anymore refcounts when a valid
'goto chain' handle is replaced in TC actions, causing kmemleak splats
like the following one:
# tc chain add dev dd0 chain 42 ingress protocol ip flower \
> ip_proto tcp action drop
# tc chain add dev dd0 chain 43 ingress protocol ip flower \
> ip_proto udp action drop
# tc filter add dev dd0 ingress matchall \
> action gact goto chain 42 index 66
# tc filter replace dev dd0 ingress matchall \
> action gact goto chain 43 index 66
# echo scan >/sys/kernel/debug/kmemleak
<...>
unreferenced object 0xffff93c0ee09f000 (size 1024):
comm "tc", pid 2565, jiffies 4295339808 (age 65.426s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 08 00 06 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<000000009b63f92d>] tc_ctl_chain+0x3d2/0x4c0
[<00000000683a8d72>] rtnetlink_rcv_msg+0x263/0x2d0
[<00000000ddd88f8e>] netlink_rcv_skb+0x4a/0x110
[<000000006126a348>] netlink_unicast+0x1a0/0x250
[<00000000b3340877>] netlink_sendmsg+0x2c1/0x3c0
[<00000000a25a2171>] sock_sendmsg+0x36/0x40
[<00000000f19ee1ec>] ___sys_sendmsg+0x280/0x2f0
[<00000000d0422042>] __sys_sendmsg+0x5e/0xa0
[<000000007a6c61f9>] do_syscall_64+0x5b/0x180
[<00000000ccd07542>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[<0000000013eaa334>] 0xffffffffffffffff
Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain")
Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-03-20 13:59:59 +00:00
|
|
|
|
2015-07-06 12:18:04 +00:00
|
|
|
free_percpu(p->cpu_bstats);
|
2018-09-21 11:14:02 +00:00
|
|
|
free_percpu(p->cpu_bstats_hw);
|
2015-07-06 12:18:04 +00:00
|
|
|
free_percpu(p->cpu_qstats);
|
2017-01-24 12:02:41 +00:00
|
|
|
|
2023-02-17 22:36:13 +00:00
|
|
|
tcf_set_action_cookie(&p->user_cookie, NULL);
|
net/sched: prepare TC actions to properly validate the control action
- pass a pointer to struct tcf_proto in each actions's init() handler,
to allow validating the control action, checking whether the chain
exists and (eventually) refcounting it.
- remove code that validates the control action after a successful call
to the action's init() handler, and replace it with a test that forbids
addition of actions having 'goto_chain' and NULL goto_chain pointer at
the same time.
- add tcf_action_check_ctrlact(), that will validate the control action
and eventually allocate the action 'goto_chain' within the init()
handler.
- add tcf_action_set_ctrlact(), that will assign the control action and
swap the current 'goto_chain' pointer with the new given one.
This disallows 'goto_chain' on actions that don't initialize it properly
in their init() handler, i.e. calling tcf_action_check_ctrlact() after
successful IDR reservation and then calling tcf_action_set_ctrlact()
to assign 'goto_chain' and 'tcf_action' consistently.
By doing this, the kernel does not leak anymore refcounts when a valid
'goto chain' handle is replaced in TC actions, causing kmemleak splats
like the following one:
# tc chain add dev dd0 chain 42 ingress protocol ip flower \
> ip_proto tcp action drop
# tc chain add dev dd0 chain 43 ingress protocol ip flower \
> ip_proto udp action drop
# tc filter add dev dd0 ingress matchall \
> action gact goto chain 42 index 66
# tc filter replace dev dd0 ingress matchall \
> action gact goto chain 43 index 66
# echo scan >/sys/kernel/debug/kmemleak
<...>
unreferenced object 0xffff93c0ee09f000 (size 1024):
comm "tc", pid 2565, jiffies 4295339808 (age 65.426s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 08 00 06 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<000000009b63f92d>] tc_ctl_chain+0x3d2/0x4c0
[<00000000683a8d72>] rtnetlink_rcv_msg+0x263/0x2d0
[<00000000ddd88f8e>] netlink_rcv_skb+0x4a/0x110
[<000000006126a348>] netlink_unicast+0x1a0/0x250
[<00000000b3340877>] netlink_sendmsg+0x2c1/0x3c0
[<00000000a25a2171>] sock_sendmsg+0x36/0x40
[<00000000f19ee1ec>] ___sys_sendmsg+0x280/0x2f0
[<00000000d0422042>] __sys_sendmsg+0x5e/0xa0
[<000000007a6c61f9>] do_syscall_64+0x5b/0x180
[<00000000ccd07542>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[<0000000013eaa334>] 0xffffffffffffffff
Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain")
Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-03-20 13:59:59 +00:00
|
|
|
if (chain)
|
|
|
|
tcf_chain_put_by_act(chain);
|
2017-01-24 12:02:41 +00:00
|
|
|
|
2015-07-06 12:18:04 +00:00
|
|
|
kfree(p);
|
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:23 +00:00
|
|
|
static void offload_action_hw_count_set(struct tc_action *act,
|
|
|
|
u32 hw_count)
|
|
|
|
{
|
|
|
|
act->in_hw_count = hw_count;
|
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
static void offload_action_hw_count_inc(struct tc_action *act,
|
|
|
|
u32 hw_count)
|
|
|
|
{
|
|
|
|
act->in_hw_count += hw_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void offload_action_hw_count_dec(struct tc_action *act,
|
|
|
|
u32 hw_count)
|
|
|
|
{
|
|
|
|
act->in_hw_count = act->in_hw_count > hw_count ?
|
|
|
|
act->in_hw_count - hw_count : 0;
|
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:22 +00:00
|
|
|
static unsigned int tcf_offload_act_num_actions_single(struct tc_action *act)
|
|
|
|
{
|
|
|
|
if (is_tcf_pedit(act))
|
|
|
|
return tcf_pedit_nkeys(act);
|
|
|
|
else
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:23 +00:00
|
|
|
static bool tc_act_skip_hw(u32 flags)
|
|
|
|
{
|
|
|
|
return (flags & TCA_ACT_FLAGS_SKIP_HW) ? true : false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool tc_act_skip_sw(u32 flags)
|
|
|
|
{
|
|
|
|
return (flags & TCA_ACT_FLAGS_SKIP_SW) ? true : false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* SKIP_HW and SKIP_SW are mutually exclusive flags. */
|
|
|
|
static bool tc_act_flags_valid(u32 flags)
|
|
|
|
{
|
|
|
|
flags &= TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW;
|
|
|
|
|
|
|
|
return flags ^ (TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW);
|
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:22 +00:00
|
|
|
static int offload_action_init(struct flow_offload_action *fl_action,
|
|
|
|
struct tc_action *act,
|
|
|
|
enum offload_act_command cmd,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
2021-12-22 04:25:46 +00:00
|
|
|
int err;
|
|
|
|
|
2021-12-17 18:16:22 +00:00
|
|
|
fl_action->extack = extack;
|
|
|
|
fl_action->command = cmd;
|
|
|
|
fl_action->index = act->tcfa_index;
|
2023-02-12 13:25:15 +00:00
|
|
|
fl_action->cookie = (unsigned long)act;
|
2021-12-17 18:16:22 +00:00
|
|
|
|
2021-12-22 04:25:46 +00:00
|
|
|
if (act->ops->offload_act_setup) {
|
|
|
|
spin_lock_bh(&act->tcfa_lock);
|
|
|
|
err = act->ops->offload_act_setup(act, fl_action, NULL,
|
2022-04-07 07:35:22 +00:00
|
|
|
false, extack);
|
2021-12-22 04:25:46 +00:00
|
|
|
spin_unlock_bh(&act->tcfa_lock);
|
|
|
|
return err;
|
|
|
|
}
|
2021-12-17 18:16:22 +00:00
|
|
|
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
static int tcf_action_offload_cmd_ex(struct flow_offload_action *fl_act,
|
|
|
|
u32 *hw_count)
|
2021-12-17 18:16:22 +00:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = flow_indr_dev_setup_offload(NULL, NULL, TC_SETUP_ACT,
|
|
|
|
fl_act, NULL, NULL);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
2021-12-17 18:16:23 +00:00
|
|
|
if (hw_count)
|
|
|
|
*hw_count = err;
|
|
|
|
|
2021-12-17 18:16:22 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
static int tcf_action_offload_cmd_cb_ex(struct flow_offload_action *fl_act,
|
|
|
|
u32 *hw_count,
|
|
|
|
flow_indr_block_bind_cb_t *cb,
|
|
|
|
void *cb_priv)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = cb(NULL, NULL, cb_priv, TC_SETUP_ACT, NULL, fl_act, NULL);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (hw_count)
|
|
|
|
*hw_count = 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tcf_action_offload_cmd(struct flow_offload_action *fl_act,
|
|
|
|
u32 *hw_count,
|
|
|
|
flow_indr_block_bind_cb_t *cb,
|
|
|
|
void *cb_priv)
|
|
|
|
{
|
|
|
|
return cb ? tcf_action_offload_cmd_cb_ex(fl_act, hw_count,
|
|
|
|
cb, cb_priv) :
|
|
|
|
tcf_action_offload_cmd_ex(fl_act, hw_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tcf_action_offload_add_ex(struct tc_action *action,
|
|
|
|
struct netlink_ext_ack *extack,
|
|
|
|
flow_indr_block_bind_cb_t *cb,
|
|
|
|
void *cb_priv)
|
2021-12-17 18:16:22 +00:00
|
|
|
{
|
2021-12-17 18:16:23 +00:00
|
|
|
bool skip_sw = tc_act_skip_sw(action->tcfa_flags);
|
2021-12-17 18:16:22 +00:00
|
|
|
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {
|
|
|
|
[0] = action,
|
|
|
|
};
|
|
|
|
struct flow_offload_action *fl_action;
|
2021-12-17 18:16:23 +00:00
|
|
|
u32 in_hw_count = 0;
|
2021-12-17 18:16:22 +00:00
|
|
|
int num, err = 0;
|
|
|
|
|
2021-12-17 18:16:23 +00:00
|
|
|
if (tc_act_skip_hw(action->tcfa_flags))
|
|
|
|
return 0;
|
|
|
|
|
2021-12-17 18:16:22 +00:00
|
|
|
num = tcf_offload_act_num_actions_single(action);
|
|
|
|
fl_action = offload_action_alloc(num);
|
|
|
|
if (!fl_action)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
err = offload_action_init(fl_action, action, FLOW_ACT_REPLACE, extack);
|
|
|
|
if (err)
|
|
|
|
goto fl_err;
|
|
|
|
|
2023-02-17 22:36:14 +00:00
|
|
|
err = tc_setup_action(&fl_action->action, actions, 0, extack);
|
2021-12-17 18:16:22 +00:00
|
|
|
if (err) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
2022-02-23 02:34:19 +00:00
|
|
|
"Failed to setup tc actions for offload");
|
2021-12-17 18:16:22 +00:00
|
|
|
goto fl_err;
|
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
err = tcf_action_offload_cmd(fl_action, &in_hw_count, cb, cb_priv);
|
2021-12-17 18:16:23 +00:00
|
|
|
if (!err)
|
2021-12-17 18:16:27 +00:00
|
|
|
cb ? offload_action_hw_count_inc(action, in_hw_count) :
|
|
|
|
offload_action_hw_count_set(action, in_hw_count);
|
2021-12-17 18:16:23 +00:00
|
|
|
|
|
|
|
if (skip_sw && !tc_act_in_hw(action))
|
|
|
|
err = -EINVAL;
|
|
|
|
|
2021-12-17 18:16:22 +00:00
|
|
|
tc_cleanup_offload_action(&fl_action->action);
|
|
|
|
|
|
|
|
fl_err:
|
|
|
|
kfree(fl_action);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
/* offload the tc action after it is inserted */
|
|
|
|
static int tcf_action_offload_add(struct tc_action *action,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
return tcf_action_offload_add_ex(action, extack, NULL, NULL);
|
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:25 +00:00
|
|
|
int tcf_action_update_hw_stats(struct tc_action *action)
|
|
|
|
{
|
|
|
|
struct flow_offload_action fl_act = {};
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = offload_action_init(&fl_act, action, FLOW_ACT_STATS, NULL);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
err = tcf_action_offload_cmd(&fl_act, NULL, NULL, NULL);
|
2021-12-17 18:16:25 +00:00
|
|
|
if (!err) {
|
|
|
|
preempt_disable();
|
|
|
|
tcf_action_stats_update(action, fl_act.stats.bytes,
|
|
|
|
fl_act.stats.pkts,
|
|
|
|
fl_act.stats.drops,
|
|
|
|
fl_act.stats.lastused,
|
|
|
|
true);
|
|
|
|
preempt_enable();
|
|
|
|
action->used_hw_stats = fl_act.stats.used_hw_stats;
|
|
|
|
action->used_hw_stats_valid = true;
|
|
|
|
} else {
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(tcf_action_update_hw_stats);
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
static int tcf_action_offload_del_ex(struct tc_action *action,
|
|
|
|
flow_indr_block_bind_cb_t *cb,
|
|
|
|
void *cb_priv)
|
2021-12-17 18:16:22 +00:00
|
|
|
{
|
|
|
|
struct flow_offload_action fl_act = {};
|
2021-12-17 18:16:23 +00:00
|
|
|
u32 in_hw_count = 0;
|
2021-12-17 18:16:22 +00:00
|
|
|
int err = 0;
|
|
|
|
|
2021-12-17 18:16:23 +00:00
|
|
|
if (!tc_act_in_hw(action))
|
|
|
|
return 0;
|
|
|
|
|
2021-12-17 18:16:22 +00:00
|
|
|
err = offload_action_init(&fl_act, action, FLOW_ACT_DESTROY, NULL);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
err = tcf_action_offload_cmd(&fl_act, &in_hw_count, cb, cb_priv);
|
|
|
|
if (err < 0)
|
2021-12-17 18:16:23 +00:00
|
|
|
return err;
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
if (!cb && action->in_hw_count != in_hw_count)
|
2021-12-17 18:16:23 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
/* do not need to update hw state when deleting action */
|
|
|
|
if (cb && in_hw_count)
|
|
|
|
offload_action_hw_count_dec(action, in_hw_count);
|
|
|
|
|
2021-12-17 18:16:23 +00:00
|
|
|
return 0;
|
2021-12-17 18:16:22 +00:00
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
static int tcf_action_offload_del(struct tc_action *action)
|
|
|
|
{
|
|
|
|
return tcf_action_offload_del_ex(action, NULL, NULL);
|
|
|
|
}
|
|
|
|
|
2018-07-05 14:24:29 +00:00
|
|
|
static void tcf_action_cleanup(struct tc_action *p)
|
2006-08-22 06:54:55 +00:00
|
|
|
{
|
2021-12-17 18:16:22 +00:00
|
|
|
tcf_action_offload_del(p);
|
2018-07-05 14:24:29 +00:00
|
|
|
if (p->ops->cleanup)
|
|
|
|
p->ops->cleanup(p);
|
|
|
|
|
2016-12-04 17:48:16 +00:00
|
|
|
gen_kill_estimator(&p->tcfa_rate_est);
|
2017-09-11 23:33:30 +00:00
|
|
|
free_tcf(p);
|
2006-08-22 06:54:55 +00:00
|
|
|
}
|
|
|
|
|
2018-07-05 14:24:29 +00:00
|
|
|
static int __tcf_action_put(struct tc_action *p, bool bind)
|
|
|
|
{
|
|
|
|
struct tcf_idrinfo *idrinfo = p->idrinfo;
|
|
|
|
|
2018-10-02 19:50:19 +00:00
|
|
|
if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
|
2018-07-05 14:24:29 +00:00
|
|
|
if (bind)
|
|
|
|
atomic_dec(&p->tcfa_bindcnt);
|
|
|
|
idr_remove(&idrinfo->action_idr, p->tcfa_index);
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_unlock(&idrinfo->lock);
|
2018-07-05 14:24:29 +00:00
|
|
|
|
|
|
|
tcf_action_cleanup(p);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bind)
|
|
|
|
atomic_dec(&p->tcfa_bindcnt);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-04-07 15:36:04 +00:00
|
|
|
static int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
|
2006-08-22 06:54:55 +00:00
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
2018-07-05 14:24:24 +00:00
|
|
|
/* Release with strict==1 and bind==0 is only called through act API
|
|
|
|
* interface (classifiers always bind). Only case when action with
|
|
|
|
* positive reference count and zero bind count can exist is when it was
|
|
|
|
* also created with act API (unbinding last classifier will destroy the
|
|
|
|
* action if it was created by classifier). So only case when bind count
|
|
|
|
* can be changed after initial check is when unbound action is
|
|
|
|
* destroyed by act API while classifier binds to action with same id
|
|
|
|
* concurrently. This result either creation of new action(same behavior
|
|
|
|
* as before), or reusing existing action if concurrent process
|
|
|
|
* increments reference count before action is deleted. Both scenarios
|
|
|
|
* are acceptable.
|
|
|
|
*/
|
2006-08-22 06:54:55 +00:00
|
|
|
if (p) {
|
2018-07-05 14:24:29 +00:00
|
|
|
if (!bind && strict && atomic_read(&p->tcfa_bindcnt) > 0)
|
2014-02-12 01:07:34 +00:00
|
|
|
return -EPERM;
|
2006-08-22 06:54:55 +00:00
|
|
|
|
2018-07-05 14:24:29 +00:00
|
|
|
if (__tcf_action_put(p, bind))
|
2016-02-22 23:57:52 +00:00
|
|
|
ret = ACT_P_DELETED;
|
2006-08-22 06:54:55 +00:00
|
|
|
}
|
net: sched: fix refcount imbalance in actions
Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action
outside"), we end up with a wrong reference count for a tc action.
Test case 1:
FOO="1,6 0 0 4294967295,"
BAR="1,6 0 0 4294967294,"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \
action bpf bytecode "$FOO"
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 1 bind 1
tc actions replace action bpf bytecode "$BAR" index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 1 ref 2 bind 1
tc actions replace action bpf bytecode "$FOO" index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 3 bind 1
Test case 2:
FOO="1,6 0 0 4294967295,"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok
tc actions show action gact
action order 0: gact action pass
random type none pass val 0
index 1 ref 1 bind 1
tc actions add action drop index 1
RTNETLINK answers: File exists [...]
tc actions show action gact
action order 0: gact action pass
random type none pass val 0
index 1 ref 2 bind 1
tc actions add action drop index 1
RTNETLINK answers: File exists [...]
tc actions show action gact
action order 0: gact action pass
random type none pass val 0
index 1 ref 3 bind 1
What happens is that in tcf_hash_check(), we check tcf_common for a given
index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've
found an existing action. Now there are the following cases:
1) We do a late binding of an action. In that case, we leave the
tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init()
handler. This is correctly handeled.
2) We replace the given action, or we try to add one without replacing
and find out that the action at a specific index already exists
(thus, we go out with error in that case).
In case of 2), we have to undo the reference count increase from
tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to
do so because of the 'tcfc_bindcnt > 0' check which bails out early with
an -EPERM error.
Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an
already classifier-bound action to drop the reference count (which could
then become negative, wrap around etc), this restriction only accounts for
invocations outside a specific action's ->init() handler.
One possible solution would be to add a flag thus we possibly trigger
the -EPERM ony in situations where it is indeed relevant.
After the patch, above test cases have correct reference count again.
Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-29 21:35:25 +00:00
|
|
|
|
2006-08-22 06:54:55 +00:00
|
|
|
return ret;
|
|
|
|
}
|
2021-04-07 15:36:04 +00:00
|
|
|
|
|
|
|
int tcf_idr_release(struct tc_action *a, bool bind)
|
|
|
|
{
|
|
|
|
const struct tc_action_ops *ops = a->ops;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = __tcf_idr_release(a, bind, false);
|
|
|
|
if (ret == ACT_P_DELETED)
|
|
|
|
module_put(ops->owner);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(tcf_idr_release);
|
2006-08-22 06:54:55 +00:00
|
|
|
|
2018-03-08 21:59:19 +00:00
|
|
|
static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
|
|
|
|
{
|
2023-02-17 22:36:13 +00:00
|
|
|
struct tc_cookie *user_cookie;
|
2018-03-08 21:59:19 +00:00
|
|
|
u32 cookie_len = 0;
|
|
|
|
|
2018-07-09 17:26:47 +00:00
|
|
|
rcu_read_lock();
|
2023-02-17 22:36:13 +00:00
|
|
|
user_cookie = rcu_dereference(act->user_cookie);
|
2018-07-09 17:26:47 +00:00
|
|
|
|
2023-02-17 22:36:13 +00:00
|
|
|
if (user_cookie)
|
|
|
|
cookie_len = nla_total_size(user_cookie->len);
|
2018-07-09 17:26:47 +00:00
|
|
|
rcu_read_unlock();
|
2018-03-08 21:59:19 +00:00
|
|
|
|
|
|
|
return nla_total_size(0) /* action number nested */
|
|
|
|
+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
|
|
|
|
+ cookie_len /* TCA_ACT_COOKIE */
|
2020-03-19 23:26:23 +00:00
|
|
|
+ nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_HW_STATS */
|
2018-03-08 21:59:19 +00:00
|
|
|
+ nla_total_size(0) /* TCA_ACT_STATS nested */
|
2020-02-25 12:54:12 +00:00
|
|
|
+ nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */
|
2018-03-08 21:59:19 +00:00
|
|
|
/* TCA_STATS_BASIC */
|
|
|
|
+ nla_total_size_64bit(sizeof(struct gnet_stats_basic))
|
2019-11-05 03:13:15 +00:00
|
|
|
/* TCA_STATS_PKT64 */
|
|
|
|
+ nla_total_size_64bit(sizeof(u64))
|
2018-03-08 21:59:19 +00:00
|
|
|
/* TCA_STATS_QUEUE */
|
|
|
|
+ nla_total_size_64bit(sizeof(struct gnet_stats_queue))
|
2023-03-21 22:33:45 +00:00
|
|
|
+ nla_total_size(0) /* TCA_ACT_OPTIONS nested */
|
2018-03-08 21:59:19 +00:00
|
|
|
+ nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t tcf_action_full_attrs_size(size_t sz)
|
|
|
|
{
|
|
|
|
return NLMSG_HDRLEN /* struct nlmsghdr */
|
|
|
|
+ sizeof(struct tcamsg)
|
|
|
|
+ nla_total_size(0) /* TCA_ACT_TAB nested */
|
|
|
|
+ sz;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t tcf_action_fill_size(const struct tc_action *act)
|
|
|
|
{
|
|
|
|
size_t sz = tcf_action_shared_attrs_size(act);
|
|
|
|
|
|
|
|
if (act->ops->get_fill_size)
|
|
|
|
return act->ops->get_fill_size(act) + sz;
|
|
|
|
return sz;
|
|
|
|
}
|
|
|
|
|
2020-11-02 20:12:43 +00:00
|
|
|
static int
|
|
|
|
tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act)
|
|
|
|
{
|
|
|
|
unsigned char *b = skb_tail_pointer(skb);
|
|
|
|
struct tc_cookie *cookie;
|
|
|
|
|
2023-03-21 22:33:45 +00:00
|
|
|
if (nla_put_string(skb, TCA_ACT_KIND, a->ops->kind))
|
2020-11-02 20:12:43 +00:00
|
|
|
goto nla_put_failure;
|
|
|
|
if (tcf_action_copy_stats(skb, a, 0))
|
|
|
|
goto nla_put_failure;
|
|
|
|
if (from_act && nla_put_u32(skb, TCA_ACT_INDEX, a->tcfa_index))
|
|
|
|
goto nla_put_failure;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
2023-02-17 22:36:13 +00:00
|
|
|
cookie = rcu_dereference(a->user_cookie);
|
2020-11-02 20:12:43 +00:00
|
|
|
if (cookie) {
|
|
|
|
if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
|
|
|
|
rcu_read_unlock();
|
|
|
|
goto nla_put_failure;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
nla_put_failure:
|
|
|
|
nlmsg_trim(skb, b);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2024-10-17 16:19:34 +00:00
|
|
|
static int
|
|
|
|
tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
|
|
|
|
{
|
|
|
|
unsigned char *b = skb_tail_pointer(skb);
|
|
|
|
struct nlattr *nest;
|
|
|
|
int err = -EINVAL;
|
|
|
|
u32 flags;
|
|
|
|
|
|
|
|
if (tcf_action_dump_terse(skb, a, false))
|
|
|
|
goto nla_put_failure;
|
|
|
|
|
|
|
|
if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
|
|
|
|
nla_put_bitfield32(skb, TCA_ACT_HW_STATS,
|
|
|
|
a->hw_stats, TCA_ACT_HW_STATS_ANY))
|
|
|
|
goto nla_put_failure;
|
|
|
|
|
|
|
|
if (a->used_hw_stats_valid &&
|
|
|
|
nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS,
|
|
|
|
a->used_hw_stats, TCA_ACT_HW_STATS_ANY))
|
|
|
|
goto nla_put_failure;
|
|
|
|
|
|
|
|
flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK;
|
|
|
|
if (flags &&
|
|
|
|
nla_put_bitfield32(skb, TCA_ACT_FLAGS,
|
|
|
|
flags, flags))
|
|
|
|
goto nla_put_failure;
|
|
|
|
|
|
|
|
if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count))
|
|
|
|
goto nla_put_failure;
|
|
|
|
|
|
|
|
nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS);
|
|
|
|
if (nest == NULL)
|
|
|
|
goto nla_put_failure;
|
|
|
|
err = tcf_action_dump_old(skb, a, bind, ref);
|
|
|
|
if (err > 0) {
|
|
|
|
nla_nest_end(skb, nest);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
nla_put_failure:
|
|
|
|
nlmsg_trim(skb, b);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-08-30 06:31:59 +00:00
|
|
|
static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
|
2016-07-25 23:09:41 +00:00
|
|
|
struct netlink_callback *cb)
|
2006-08-22 06:54:55 +00:00
|
|
|
{
|
2017-08-30 06:31:59 +00:00
|
|
|
int err = 0, index = -1, s_i = 0, n_i = 0;
|
2017-07-30 17:24:51 +00:00
|
|
|
u32 act_flags = cb->args[2];
|
2017-07-30 17:24:52 +00:00
|
|
|
unsigned long jiffy_since = cb->args[3];
|
2008-01-24 04:34:11 +00:00
|
|
|
struct nlattr *nest;
|
2017-08-30 06:31:59 +00:00
|
|
|
struct idr *idr = &idrinfo->action_idr;
|
|
|
|
struct tc_action *p;
|
|
|
|
unsigned long id = 1;
|
2019-06-28 18:03:41 +00:00
|
|
|
unsigned long tmp;
|
2006-08-22 06:54:55 +00:00
|
|
|
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_lock(&idrinfo->lock);
|
2006-08-22 06:54:55 +00:00
|
|
|
|
|
|
|
s_i = cb->args[0];
|
|
|
|
|
2019-06-28 18:03:41 +00:00
|
|
|
idr_for_each_entry_ul(idr, p, tmp, id) {
|
2017-08-30 06:31:59 +00:00
|
|
|
index++;
|
|
|
|
if (index < s_i)
|
|
|
|
continue;
|
2020-10-02 19:13:34 +00:00
|
|
|
if (IS_ERR(p))
|
|
|
|
continue;
|
2017-08-30 06:31:59 +00:00
|
|
|
|
|
|
|
if (jiffy_since &&
|
|
|
|
time_after(jiffy_since,
|
|
|
|
(unsigned long)p->tcfa_tm.lastuse))
|
|
|
|
continue;
|
|
|
|
|
2023-02-12 13:25:12 +00:00
|
|
|
tcf_action_update_hw_stats(p);
|
|
|
|
|
2019-04-26 09:13:06 +00:00
|
|
|
nest = nla_nest_start_noflag(skb, n_i);
|
2018-03-26 18:58:32 +00:00
|
|
|
if (!nest) {
|
|
|
|
index--;
|
2017-08-30 06:31:59 +00:00
|
|
|
goto nla_put_failure;
|
2018-03-26 18:58:32 +00:00
|
|
|
}
|
2020-11-24 16:40:54 +00:00
|
|
|
err = (act_flags & TCA_ACT_FLAG_TERSE_DUMP) ?
|
2020-11-02 20:12:43 +00:00
|
|
|
tcf_action_dump_terse(skb, p, true) :
|
|
|
|
tcf_action_dump_1(skb, p, 0, 0);
|
2017-08-30 06:31:59 +00:00
|
|
|
if (err < 0) {
|
|
|
|
index--;
|
|
|
|
nlmsg_trim(skb, nest);
|
|
|
|
goto done;
|
2006-08-22 06:54:55 +00:00
|
|
|
}
|
2017-08-30 06:31:59 +00:00
|
|
|
nla_nest_end(skb, nest);
|
|
|
|
n_i++;
|
2020-11-24 16:40:54 +00:00
|
|
|
if (!(act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) &&
|
2017-08-30 06:31:59 +00:00
|
|
|
n_i >= TCA_ACT_MAX_PRIO)
|
|
|
|
goto done;
|
2006-08-22 06:54:55 +00:00
|
|
|
}
|
|
|
|
done:
|
2017-07-30 17:24:52 +00:00
|
|
|
if (index >= 0)
|
|
|
|
cb->args[0] = index + 1;
|
|
|
|
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_unlock(&idrinfo->lock);
|
2017-07-30 17:24:51 +00:00
|
|
|
if (n_i) {
|
2020-11-24 16:40:54 +00:00
|
|
|
if (act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON)
|
2017-07-30 17:24:51 +00:00
|
|
|
cb->args[1] = n_i;
|
|
|
|
}
|
2006-08-22 06:54:55 +00:00
|
|
|
return n_i;
|
|
|
|
|
2008-01-23 06:11:50 +00:00
|
|
|
nla_put_failure:
|
2008-01-24 04:34:11 +00:00
|
|
|
nla_nest_cancel(skb, nest);
|
2006-08-22 06:54:55 +00:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2018-09-19 23:37:29 +00:00
|
|
|
static int tcf_idr_release_unsafe(struct tc_action *p)
|
|
|
|
{
|
|
|
|
if (atomic_read(&p->tcfa_bindcnt) > 0)
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
if (refcount_dec_and_test(&p->tcfa_refcnt)) {
|
|
|
|
idr_remove(&p->idrinfo->action_idr, p->tcfa_index);
|
|
|
|
tcf_action_cleanup(p);
|
|
|
|
return ACT_P_DELETED;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-08-30 06:31:59 +00:00
|
|
|
static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
|
2022-06-23 14:07:41 +00:00
|
|
|
const struct tc_action_ops *ops,
|
|
|
|
struct netlink_ext_ack *extack)
|
2006-08-22 06:54:55 +00:00
|
|
|
{
|
2008-01-24 04:34:11 +00:00
|
|
|
struct nlattr *nest;
|
2017-08-30 06:31:59 +00:00
|
|
|
int n_i = 0;
|
2014-02-12 01:07:34 +00:00
|
|
|
int ret = -EINVAL;
|
2017-08-30 06:31:59 +00:00
|
|
|
struct idr *idr = &idrinfo->action_idr;
|
|
|
|
struct tc_action *p;
|
|
|
|
unsigned long id = 1;
|
2019-06-28 18:03:41 +00:00
|
|
|
unsigned long tmp;
|
2006-08-22 06:54:55 +00:00
|
|
|
|
2019-04-26 09:13:06 +00:00
|
|
|
nest = nla_nest_start_noflag(skb, 0);
|
2008-01-24 04:34:11 +00:00
|
|
|
if (nest == NULL)
|
|
|
|
goto nla_put_failure;
|
2023-03-21 22:33:45 +00:00
|
|
|
if (nla_put_string(skb, TCA_ACT_KIND, ops->kind))
|
2012-03-29 09:11:39 +00:00
|
|
|
goto nla_put_failure;
|
2017-08-30 06:31:59 +00:00
|
|
|
|
2022-06-23 14:07:41 +00:00
|
|
|
ret = 0;
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_lock(&idrinfo->lock);
|
2019-06-28 18:03:41 +00:00
|
|
|
idr_for_each_entry_ul(idr, p, tmp, id) {
|
2020-09-23 03:56:24 +00:00
|
|
|
if (IS_ERR(p))
|
|
|
|
continue;
|
2018-09-19 23:37:29 +00:00
|
|
|
ret = tcf_idr_release_unsafe(p);
|
2022-06-23 14:07:41 +00:00
|
|
|
if (ret == ACT_P_DELETED)
|
2017-09-13 15:32:37 +00:00
|
|
|
module_put(ops->owner);
|
2022-06-23 14:07:41 +00:00
|
|
|
else if (ret < 0)
|
|
|
|
break;
|
|
|
|
n_i++;
|
2006-08-22 06:54:55 +00:00
|
|
|
}
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_unlock(&idrinfo->lock);
|
2022-06-23 14:07:41 +00:00
|
|
|
if (ret < 0) {
|
|
|
|
if (n_i)
|
|
|
|
NL_SET_ERR_MSG(extack, "Unable to flush all TC actions");
|
|
|
|
else
|
|
|
|
goto nla_put_failure;
|
|
|
|
}
|
2018-09-19 23:37:29 +00:00
|
|
|
|
2021-06-17 08:02:07 +00:00
|
|
|
ret = nla_put_u32(skb, TCA_FCNT, n_i);
|
|
|
|
if (ret)
|
2012-03-29 09:11:39 +00:00
|
|
|
goto nla_put_failure;
|
2008-01-24 04:34:11 +00:00
|
|
|
nla_nest_end(skb, nest);
|
2006-08-22 06:54:55 +00:00
|
|
|
|
|
|
|
return n_i;
|
2008-01-23 06:11:50 +00:00
|
|
|
nla_put_failure:
|
2008-01-24 04:34:11 +00:00
|
|
|
nla_nest_cancel(skb, nest);
|
2014-02-12 01:07:34 +00:00
|
|
|
return ret;
|
2006-08-22 06:54:55 +00:00
|
|
|
}
|
|
|
|
|
2016-02-22 23:57:53 +00:00
|
|
|
int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb, int type,
|
2018-02-15 15:54:59 +00:00
|
|
|
const struct tc_action_ops *ops,
|
|
|
|
struct netlink_ext_ack *extack)
|
2006-08-22 06:54:55 +00:00
|
|
|
{
|
2017-08-30 06:31:59 +00:00
|
|
|
struct tcf_idrinfo *idrinfo = tn->idrinfo;
|
2016-02-22 23:57:53 +00:00
|
|
|
|
2006-08-22 06:54:55 +00:00
|
|
|
if (type == RTM_DELACTION) {
|
2022-06-23 14:07:41 +00:00
|
|
|
return tcf_del_walker(idrinfo, skb, ops, extack);
|
2006-08-22 06:54:55 +00:00
|
|
|
} else if (type == RTM_GETACTION) {
|
2017-08-30 06:31:59 +00:00
|
|
|
return tcf_dump_walker(idrinfo, skb, cb);
|
2006-08-22 06:54:55 +00:00
|
|
|
} else {
|
2018-02-15 15:54:59 +00:00
|
|
|
WARN(1, "tcf_generic_walker: unknown command %d\n", type);
|
|
|
|
NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command");
|
2006-08-22 06:54:55 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
2016-02-22 23:57:53 +00:00
|
|
|
EXPORT_SYMBOL(tcf_generic_walker);
|
2006-08-22 06:54:55 +00:00
|
|
|
|
2018-08-19 19:22:08 +00:00
|
|
|
int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
|
2006-08-22 06:54:55 +00:00
|
|
|
{
|
2018-07-05 14:24:26 +00:00
|
|
|
struct tcf_idrinfo *idrinfo = tn->idrinfo;
|
|
|
|
struct tc_action *p;
|
2006-08-22 06:54:55 +00:00
|
|
|
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_lock(&idrinfo->lock);
|
2017-11-28 15:01:24 +00:00
|
|
|
p = idr_find(&idrinfo->action_idr, index);
|
2018-08-19 19:22:08 +00:00
|
|
|
if (IS_ERR(p))
|
2018-07-05 14:24:32 +00:00
|
|
|
p = NULL;
|
2018-08-19 19:22:08 +00:00
|
|
|
else if (p)
|
2018-07-05 14:24:26 +00:00
|
|
|
refcount_inc(&p->tcfa_refcnt);
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_unlock(&idrinfo->lock);
|
2006-08-22 06:54:55 +00:00
|
|
|
|
2018-07-05 14:24:26 +00:00
|
|
|
if (p) {
|
|
|
|
*a = p;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2006-08-22 06:54:55 +00:00
|
|
|
}
|
2017-08-30 06:31:59 +00:00
|
|
|
EXPORT_SYMBOL(tcf_idr_search);
|
2006-08-22 06:54:55 +00:00
|
|
|
|
2022-09-08 04:14:34 +00:00
|
|
|
static int __tcf_generic_walker(struct net *net, struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb, int type,
|
|
|
|
const struct tc_action_ops *ops,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct tc_action_net *tn = net_generic(net, ops->net_id);
|
|
|
|
|
|
|
|
if (unlikely(ops->walk))
|
|
|
|
return ops->walk(net, skb, cb, type, ops, extack);
|
|
|
|
|
|
|
|
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __tcf_idr_search(struct net *net,
|
|
|
|
const struct tc_action_ops *ops,
|
|
|
|
struct tc_action **a, u32 index)
|
|
|
|
{
|
|
|
|
struct tc_action_net *tn = net_generic(net, ops->net_id);
|
|
|
|
|
|
|
|
if (unlikely(ops->lookup))
|
|
|
|
return ops->lookup(net, a, index);
|
|
|
|
|
|
|
|
return tcf_idr_search(tn, a, index);
|
|
|
|
}
|
|
|
|
|
2018-08-19 19:22:06 +00:00
|
|
|
static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
|
2018-07-05 14:24:27 +00:00
|
|
|
{
|
|
|
|
struct tc_action *p;
|
|
|
|
int ret = 0;
|
|
|
|
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_lock(&idrinfo->lock);
|
2018-07-05 14:24:27 +00:00
|
|
|
p = idr_find(&idrinfo->action_idr, index);
|
|
|
|
if (!p) {
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_unlock(&idrinfo->lock);
|
2018-07-05 14:24:27 +00:00
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!atomic_read(&p->tcfa_bindcnt)) {
|
|
|
|
if (refcount_dec_and_test(&p->tcfa_refcnt)) {
|
|
|
|
struct module *owner = p->ops->owner;
|
|
|
|
|
|
|
|
WARN_ON(p != idr_remove(&idrinfo->action_idr,
|
|
|
|
p->tcfa_index));
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_unlock(&idrinfo->lock);
|
2018-07-05 14:24:27 +00:00
|
|
|
|
2018-07-05 14:24:29 +00:00
|
|
|
tcf_action_cleanup(p);
|
2018-07-05 14:24:27 +00:00
|
|
|
module_put(owner);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
} else {
|
|
|
|
ret = -EPERM;
|
|
|
|
}
|
|
|
|
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_unlock(&idrinfo->lock);
|
2018-07-05 14:24:27 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-08-30 06:31:59 +00:00
|
|
|
int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
|
|
|
|
struct tc_action **a, const struct tc_action_ops *ops,
|
2019-10-30 14:09:06 +00:00
|
|
|
int bind, bool cpustats, u32 flags)
|
2006-08-22 06:54:55 +00:00
|
|
|
{
|
2016-07-25 23:09:42 +00:00
|
|
|
struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
|
2017-08-30 06:31:59 +00:00
|
|
|
struct tcf_idrinfo *idrinfo = tn->idrinfo;
|
2015-07-06 12:18:04 +00:00
|
|
|
int err = -ENOMEM;
|
2006-08-22 06:54:55 +00:00
|
|
|
|
|
|
|
if (unlikely(!p))
|
2014-02-12 01:07:31 +00:00
|
|
|
return -ENOMEM;
|
2018-07-05 14:24:24 +00:00
|
|
|
refcount_set(&p->tcfa_refcnt, 1);
|
2006-08-22 06:54:55 +00:00
|
|
|
if (bind)
|
2018-07-05 14:24:24 +00:00
|
|
|
atomic_set(&p->tcfa_bindcnt, 1);
|
2006-08-22 06:54:55 +00:00
|
|
|
|
2015-07-06 12:18:04 +00:00
|
|
|
if (cpustats) {
|
2021-10-16 08:49:09 +00:00
|
|
|
p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
|
2017-11-28 15:28:15 +00:00
|
|
|
if (!p->cpu_bstats)
|
2015-07-06 12:18:04 +00:00
|
|
|
goto err1;
|
2021-10-16 08:49:09 +00:00
|
|
|
p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
|
2018-09-21 11:14:02 +00:00
|
|
|
if (!p->cpu_bstats_hw)
|
|
|
|
goto err2;
|
2017-11-28 15:28:15 +00:00
|
|
|
p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
|
|
|
|
if (!p->cpu_qstats)
|
2018-09-21 11:14:02 +00:00
|
|
|
goto err3;
|
2015-07-06 12:18:04 +00:00
|
|
|
}
|
2021-10-16 08:49:09 +00:00
|
|
|
gnet_stats_basic_sync_init(&p->tcfa_bstats);
|
|
|
|
gnet_stats_basic_sync_init(&p->tcfa_bstats_hw);
|
2016-07-25 23:09:42 +00:00
|
|
|
spin_lock_init(&p->tcfa_lock);
|
2017-11-28 15:28:15 +00:00
|
|
|
p->tcfa_index = index;
|
2016-07-25 23:09:42 +00:00
|
|
|
p->tcfa_tm.install = jiffies;
|
|
|
|
p->tcfa_tm.lastuse = jiffies;
|
|
|
|
p->tcfa_tm.firstuse = 0;
|
2021-12-17 18:16:26 +00:00
|
|
|
p->tcfa_flags = flags;
|
2008-11-26 05:12:32 +00:00
|
|
|
if (est) {
|
2016-07-25 23:09:42 +00:00
|
|
|
err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
|
|
|
|
&p->tcfa_rate_est,
|
net: sched: Remove Qdisc::running sequence counter
The Qdisc::running sequence counter has two uses:
1. Reliably reading qdisc's tc statistics while the qdisc is running
(a seqcount read/retry loop at gnet_stats_add_basic()).
2. As a flag, indicating whether the qdisc in question is running
(without any retry loops).
For the first usage, the Qdisc::running sequence counter write section,
qdisc_run_begin() => qdisc_run_end(), covers a much wider area than what
is actually needed: the raw qdisc's bstats update. A u64_stats sync
point was thus introduced (in previous commits) inside the bstats
structure itself. A local u64_stats write section is then started and
stopped for the bstats updates.
Use that u64_stats sync point mechanism for the bstats read/retry loop
at gnet_stats_add_basic().
For the second qdisc->running usage, a __QDISC_STATE_RUNNING bit flag,
accessed with atomic bitops, is sufficient. Using a bit flag instead of
a sequence counter at qdisc_run_begin/end() and qdisc_is_running() leads
to the SMP barriers implicitly added through raw_read_seqcount() and
write_seqcount_begin/end() getting removed. All call sites have been
surveyed though, and no required ordering was identified.
Now that the qdisc->running sequence counter is no longer used, remove
it.
Note, using u64_stats implies no sequence counter protection for 64-bit
architectures. This can lead to the qdisc tc statistics "packets" vs.
"bytes" values getting out of sync on rare occasions. The individual
values will still be valid.
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-16 08:49:10 +00:00
|
|
|
&p->tcfa_lock, false, est);
|
2017-11-28 15:28:15 +00:00
|
|
|
if (err)
|
2018-09-21 11:14:02 +00:00
|
|
|
goto err4;
|
2008-11-26 05:12:32 +00:00
|
|
|
}
|
|
|
|
|
2017-08-30 06:31:59 +00:00
|
|
|
p->idrinfo = idrinfo;
|
2021-04-07 15:36:04 +00:00
|
|
|
__module_get(ops->owner);
|
2016-07-25 23:09:42 +00:00
|
|
|
p->ops = ops;
|
|
|
|
*a = p;
|
2014-02-12 01:07:31 +00:00
|
|
|
return 0;
|
2018-09-21 11:14:02 +00:00
|
|
|
err4:
|
2017-11-28 15:28:15 +00:00
|
|
|
free_percpu(p->cpu_qstats);
|
2018-09-21 11:14:02 +00:00
|
|
|
err3:
|
|
|
|
free_percpu(p->cpu_bstats_hw);
|
2017-11-28 15:28:15 +00:00
|
|
|
err2:
|
|
|
|
free_percpu(p->cpu_bstats);
|
|
|
|
err1:
|
|
|
|
kfree(p);
|
|
|
|
return err;
|
2006-08-22 06:54:55 +00:00
|
|
|
}
|
2017-08-30 06:31:59 +00:00
|
|
|
EXPORT_SYMBOL(tcf_idr_create);
|
2006-08-22 06:54:55 +00:00
|
|
|
|
2019-10-30 14:09:06 +00:00
|
|
|
int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
|
|
|
|
struct nlattr *est, struct tc_action **a,
|
|
|
|
const struct tc_action_ops *ops, int bind,
|
|
|
|
u32 flags)
|
|
|
|
{
|
|
|
|
/* Set cpustats according to actions flags. */
|
|
|
|
return tcf_idr_create(tn, index, est, a, ops, bind,
|
|
|
|
!(flags & TCA_ACT_FLAGS_NO_PERCPU_STATS), flags);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(tcf_idr_create_from_flags);
|
|
|
|
|
2018-07-05 14:24:32 +00:00
|
|
|
/* Cleanup idr index that was allocated but not initialized. */
|
|
|
|
|
|
|
|
void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
|
|
|
|
{
|
|
|
|
struct tcf_idrinfo *idrinfo = tn->idrinfo;
|
|
|
|
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_lock(&idrinfo->lock);
|
2018-07-05 14:24:32 +00:00
|
|
|
/* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
|
|
|
|
WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_unlock(&idrinfo->lock);
|
2018-07-05 14:24:32 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(tcf_idr_cleanup);
|
|
|
|
|
|
|
|
/* Check if action with specified index exists. If actions is found, increments
|
|
|
|
* its reference and bind counters, and return 1. Otherwise insert temporary
|
|
|
|
* error pointer (to prevent concurrent users from inserting actions with same
|
|
|
|
* index) and return 0.
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
*
|
|
|
|
* May return -EAGAIN for binding actions in case of a parallel add/delete on
|
|
|
|
* the requested index.
|
2018-07-05 14:24:32 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
|
|
|
|
struct tc_action **a, int bind)
|
|
|
|
{
|
|
|
|
struct tcf_idrinfo *idrinfo = tn->idrinfo;
|
|
|
|
struct tc_action *p;
|
|
|
|
int ret;
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
u32 max;
|
2018-07-05 14:24:32 +00:00
|
|
|
|
|
|
|
if (*index) {
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
rcu_read_lock();
|
2018-07-05 14:24:32 +00:00
|
|
|
p = idr_find(&idrinfo->action_idr, *index);
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
|
2018-07-05 14:24:32 +00:00
|
|
|
if (IS_ERR(p)) {
|
|
|
|
/* This means that another process allocated
|
|
|
|
* index but did not assign the pointer yet.
|
|
|
|
*/
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
rcu_read_unlock();
|
2024-06-14 19:03:26 +00:00
|
|
|
return -EAGAIN;
|
2018-07-05 14:24:32 +00:00
|
|
|
}
|
|
|
|
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
if (!p) {
|
|
|
|
/* Empty slot, try to allocate it */
|
|
|
|
max = *index;
|
|
|
|
rcu_read_unlock();
|
|
|
|
goto new;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!refcount_inc_not_zero(&p->tcfa_refcnt)) {
|
|
|
|
/* Action was deleted in parallel */
|
|
|
|
rcu_read_unlock();
|
|
|
|
return -EAGAIN;
|
2018-07-05 14:24:32 +00:00
|
|
|
}
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
|
|
|
|
if (bind)
|
|
|
|
atomic_inc(&p->tcfa_bindcnt);
|
|
|
|
*a = p;
|
|
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return 1;
|
2018-07-05 14:24:32 +00:00
|
|
|
} else {
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
/* Find a slot */
|
2018-07-05 14:24:32 +00:00
|
|
|
*index = 1;
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
max = UINT_MAX;
|
2018-07-05 14:24:32 +00:00
|
|
|
}
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
|
|
|
|
new:
|
|
|
|
*a = NULL;
|
|
|
|
|
|
|
|
mutex_lock(&idrinfo->lock);
|
|
|
|
ret = idr_alloc_u32(&idrinfo->action_idr, ERR_PTR(-EBUSY), index, max,
|
|
|
|
GFP_KERNEL);
|
2018-10-02 19:50:19 +00:00
|
|
|
mutex_unlock(&idrinfo->lock);
|
net/sched: act_api: rely on rcu in tcf_idr_check_alloc
Instead of relying only on the idrinfo->lock mutex for
bind/alloc logic, rely on a combination of rcu + mutex + atomics
to better scale the case where multiple rtnl-less filters are
binding to the same action object.
Action binding happens when an action index is specified explicitly and
an action exists which such index exists. Example:
tc actions add action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter add ... matchall action drop index 1
tc filter ls ...
filter protocol all pref 49150 matchall chain 0 filter protocol all pref 49150 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49151 matchall chain 0 filter protocol all pref 49151 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
filter protocol all pref 49152 matchall chain 0 filter protocol all pref 49152 matchall chain 0 handle 0x1
not_in_hw
action order 1: gact action drop
random type none pass val 0
index 1 ref 4 bind 3
When no index is specified, as before, grab the mutex and allocate
in the idr the next available id. In this version, as opposed to before,
it's simplified to store the -EBUSY pointer instead of the previous
alloc + replace combination.
When an index is specified, rely on rcu to find if there's an object in
such index. If there's none, fallback to the above, serializing on the
mutex and reserving the specified id. If there's one, it can be an -EBUSY
pointer, in which case we just try again until it's an action, or an action.
Given the rcu guarantees, the action found could be dead and therefore
we need to bump the refcount if it's not 0, handling the case it's
in fact 0.
As bind and the action refcount are already atomics, these increments can
happen without the mutex protection while many tcf_idr_check_alloc race
to bind to the same action instance.
In case binding encounters a parallel delete or add, it will return
-EAGAIN in order to try again. Both filter and action apis already
have the retry machinery in-place. In case it's an unlocked filter it
retries under the rtnl lock.
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Link: https://lore.kernel.org/r/20231211181807.96028-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-12-11 18:18:06 +00:00
|
|
|
|
|
|
|
/* N binds raced for action allocation,
|
|
|
|
* retry for all the ones that failed.
|
|
|
|
*/
|
|
|
|
if (ret == -ENOSPC && *index == max)
|
|
|
|
ret = -EAGAIN;
|
|
|
|
|
2018-07-05 14:24:32 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(tcf_idr_check_alloc);
|
|
|
|
|
2017-08-30 06:31:59 +00:00
|
|
|
void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
|
|
|
|
struct tcf_idrinfo *idrinfo)
|
2016-02-22 23:57:52 +00:00
|
|
|
{
|
2017-08-30 06:31:59 +00:00
|
|
|
struct idr *idr = &idrinfo->action_idr;
|
|
|
|
struct tc_action *p;
|
|
|
|
int ret;
|
|
|
|
unsigned long id = 1;
|
2019-06-28 18:03:41 +00:00
|
|
|
unsigned long tmp;
|
2016-02-22 23:57:52 +00:00
|
|
|
|
2019-06-28 18:03:41 +00:00
|
|
|
idr_for_each_entry_ul(idr, p, tmp, id) {
|
2017-08-30 06:31:59 +00:00
|
|
|
ret = __tcf_idr_release(p, false, true);
|
|
|
|
if (ret == ACT_P_DELETED)
|
|
|
|
module_put(ops->owner);
|
|
|
|
else if (ret < 0)
|
|
|
|
return;
|
2016-02-22 23:57:52 +00:00
|
|
|
}
|
2017-08-30 06:31:59 +00:00
|
|
|
idr_destroy(&idrinfo->action_idr);
|
2016-02-22 23:57:52 +00:00
|
|
|
}
|
2017-08-30 06:31:59 +00:00
|
|
|
EXPORT_SYMBOL(tcf_idrinfo_destroy);
|
2016-02-22 23:57:52 +00:00
|
|
|
|
2013-12-16 04:15:10 +00:00
|
|
|
static LIST_HEAD(act_base);
|
2005-04-16 22:20:36 +00:00
|
|
|
static DEFINE_RWLOCK(act_mod_lock);
|
2021-12-17 18:16:27 +00:00
|
|
|
/* since act ops id is stored in pernet subsystem list,
|
|
|
|
* then there is no way to walk through only all the action
|
|
|
|
* subsystem, so we keep tc action pernet ops id for
|
|
|
|
* reoffload to walk through.
|
|
|
|
*/
|
|
|
|
static LIST_HEAD(act_pernet_id_list);
|
|
|
|
static DEFINE_MUTEX(act_id_mutex);
|
|
|
|
struct tc_act_pernet_id {
|
|
|
|
struct list_head list;
|
|
|
|
unsigned int id;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int tcf_pernet_add_id_list(unsigned int id)
|
|
|
|
{
|
|
|
|
struct tc_act_pernet_id *id_ptr;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
mutex_lock(&act_id_mutex);
|
|
|
|
list_for_each_entry(id_ptr, &act_pernet_id_list, list) {
|
|
|
|
if (id_ptr->id == id) {
|
|
|
|
ret = -EEXIST;
|
|
|
|
goto err_out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
id_ptr = kzalloc(sizeof(*id_ptr), GFP_KERNEL);
|
|
|
|
if (!id_ptr) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err_out;
|
|
|
|
}
|
|
|
|
id_ptr->id = id;
|
|
|
|
|
|
|
|
list_add_tail(&id_ptr->list, &act_pernet_id_list);
|
|
|
|
|
|
|
|
err_out:
|
|
|
|
mutex_unlock(&act_id_mutex);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void tcf_pernet_del_id_list(unsigned int id)
|
|
|
|
{
|
|
|
|
struct tc_act_pernet_id *id_ptr;
|
|
|
|
|
|
|
|
mutex_lock(&act_id_mutex);
|
|
|
|
list_for_each_entry(id_ptr, &act_pernet_id_list, list) {
|
|
|
|
if (id_ptr->id == id) {
|
|
|
|
list_del(&id_ptr->list);
|
|
|
|
kfree(id_ptr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mutex_unlock(&act_id_mutex);
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2016-02-22 23:57:53 +00:00
|
|
|
int tcf_register_action(struct tc_action_ops *act,
|
|
|
|
struct pernet_operations *ops)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2013-12-16 04:15:10 +00:00
|
|
|
struct tc_action_ops *a;
|
2016-02-22 23:57:53 +00:00
|
|
|
int ret;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2022-09-08 04:14:34 +00:00
|
|
|
if (!act->act || !act->dump || !act->init)
|
2013-12-04 14:26:52 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
2016-10-11 17:56:45 +00:00
|
|
|
/* We have to register pernet ops before making the action ops visible,
|
|
|
|
* otherwise tcf_action_init_1() could get a partially initialized
|
|
|
|
* netns.
|
|
|
|
*/
|
|
|
|
ret = register_pernet_subsys(ops);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2021-12-17 18:16:27 +00:00
|
|
|
if (ops->id) {
|
|
|
|
ret = tcf_pernet_add_id_list(*ops->id);
|
|
|
|
if (ret)
|
|
|
|
goto err_id;
|
|
|
|
}
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
write_lock(&act_mod_lock);
|
2013-12-16 04:15:10 +00:00
|
|
|
list_for_each_entry(a, &act_base, head) {
|
2019-02-10 12:25:00 +00:00
|
|
|
if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) {
|
2021-12-17 18:16:27 +00:00
|
|
|
ret = -EEXIST;
|
|
|
|
goto err_out;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
}
|
2013-12-16 04:15:10 +00:00
|
|
|
list_add_tail(&act->head, &act_base);
|
2005-04-16 22:20:36 +00:00
|
|
|
write_unlock(&act_mod_lock);
|
2016-02-22 23:57:53 +00:00
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
return 0;
|
2021-12-17 18:16:27 +00:00
|
|
|
|
|
|
|
err_out:
|
|
|
|
write_unlock(&act_mod_lock);
|
|
|
|
if (ops->id)
|
|
|
|
tcf_pernet_del_id_list(*ops->id);
|
|
|
|
err_id:
|
|
|
|
unregister_pernet_subsys(ops);
|
|
|
|
return ret;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
2008-01-23 06:10:23 +00:00
|
|
|
EXPORT_SYMBOL(tcf_register_action);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2016-02-22 23:57:53 +00:00
|
|
|
int tcf_unregister_action(struct tc_action_ops *act,
|
|
|
|
struct pernet_operations *ops)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2013-12-16 04:15:10 +00:00
|
|
|
struct tc_action_ops *a;
|
2005-04-16 22:20:36 +00:00
|
|
|
int err = -ENOENT;
|
|
|
|
|
|
|
|
write_lock(&act_mod_lock);
|
2013-12-20 20:32:32 +00:00
|
|
|
list_for_each_entry(a, &act_base, head) {
|
|
|
|
if (a == act) {
|
|
|
|
list_del(&act->head);
|
|
|
|
err = 0;
|
2005-04-16 22:20:36 +00:00
|
|
|
break;
|
2013-12-20 20:32:32 +00:00
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
write_unlock(&act_mod_lock);
|
2021-12-17 18:16:27 +00:00
|
|
|
if (!err) {
|
2016-10-11 17:56:45 +00:00
|
|
|
unregister_pernet_subsys(ops);
|
2021-12-17 18:16:27 +00:00
|
|
|
if (ops->id)
|
|
|
|
tcf_pernet_del_id_list(*ops->id);
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
return err;
|
|
|
|
}
|
2008-01-23 06:10:23 +00:00
|
|
|
EXPORT_SYMBOL(tcf_unregister_action);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
/* lookup by name */
|
|
|
|
static struct tc_action_ops *tc_lookup_action_n(char *kind)
|
|
|
|
{
|
2013-12-20 20:32:32 +00:00
|
|
|
struct tc_action_ops *a, *res = NULL;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
if (kind) {
|
|
|
|
read_lock(&act_mod_lock);
|
2013-12-16 04:15:10 +00:00
|
|
|
list_for_each_entry(a, &act_base, head) {
|
2005-04-16 22:20:36 +00:00
|
|
|
if (strcmp(kind, a->kind) == 0) {
|
2013-12-20 20:32:32 +00:00
|
|
|
if (try_module_get(a->owner))
|
|
|
|
res = a;
|
2005-04-16 22:20:36 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
read_unlock(&act_mod_lock);
|
|
|
|
}
|
2013-12-20 20:32:32 +00:00
|
|
|
return res;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2008-01-23 06:11:50 +00:00
|
|
|
/* lookup by nlattr */
|
|
|
|
static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2013-12-20 20:32:32 +00:00
|
|
|
struct tc_action_ops *a, *res = NULL;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
if (kind) {
|
|
|
|
read_lock(&act_mod_lock);
|
2013-12-16 04:15:10 +00:00
|
|
|
list_for_each_entry(a, &act_base, head) {
|
2008-01-23 06:11:50 +00:00
|
|
|
if (nla_strcmp(kind, a->kind) == 0) {
|
2013-12-20 20:32:32 +00:00
|
|
|
if (try_module_get(a->owner))
|
|
|
|
res = a;
|
2005-04-16 22:20:36 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
read_unlock(&act_mod_lock);
|
|
|
|
}
|
2013-12-20 20:32:32 +00:00
|
|
|
return res;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2020-11-09 07:02:17 +00:00
|
|
|
/*TCA_ACT_MAX_PRIO is 32, there count up to 32 */
|
2017-04-23 17:17:28 +00:00
|
|
|
#define TCA_ACT_MAX_PRIO_MASK 0x1FF
|
2016-08-14 05:35:00 +00:00
|
|
|
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
|
|
|
|
int nr_actions, struct tcf_result *res)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2017-04-23 17:17:28 +00:00
|
|
|
u32 jmp_prgcnt = 0;
|
|
|
|
u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */
|
2017-08-04 12:29:02 +00:00
|
|
|
int i;
|
|
|
|
int ret = TC_ACT_OK;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2017-01-07 22:06:35 +00:00
|
|
|
if (skb_skip_tc_classify(skb))
|
|
|
|
return TC_ACT_OK;
|
|
|
|
|
2017-04-23 17:17:28 +00:00
|
|
|
restart_act_graph:
|
2016-08-14 05:35:00 +00:00
|
|
|
for (i = 0; i < nr_actions; i++) {
|
|
|
|
const struct tc_action *a = actions[i];
|
net: sched: limit TC_ACT_REPEAT loops
We have been living dangerously, at the mercy of malicious users,
abusing TC_ACT_REPEAT, as shown by this syzpot report [1].
Add an arbitrary limit (32) to the number of times an action can
return TC_ACT_REPEAT.
v2: switch the limit to 32 instead of 10.
Use net_warn_ratelimited() instead of pr_err_once().
[1] (C repro available on demand)
rcu: INFO: rcu_preempt self-detected stall on CPU
rcu: 1-...!: (10500 ticks this GP) idle=021/1/0x4000000000000000 softirq=5592/5592 fqs=0
(t=10502 jiffies g=5305 q=190)
rcu: rcu_preempt kthread timer wakeup didn't happen for 10502 jiffies! g5305 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402
rcu: Possible timer handling issue on cpu=0 timer-softirq=3527
rcu: rcu_preempt kthread starved for 10505 jiffies! g5305 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=0
rcu: Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior.
rcu: RCU grace-period kthread stack dump:
task:rcu_preempt state:I stack:29344 pid: 14 ppid: 2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:4986 [inline]
__schedule+0xab2/0x4db0 kernel/sched/core.c:6295
schedule+0xd2/0x260 kernel/sched/core.c:6368
schedule_timeout+0x14a/0x2a0 kernel/time/timer.c:1881
rcu_gp_fqs_loop+0x186/0x810 kernel/rcu/tree.c:1963
rcu_gp_kthread+0x1de/0x320 kernel/rcu/tree.c:2136
kthread+0x2e9/0x3a0 kernel/kthread.c:377
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
</TASK>
rcu: Stack dump where RCU GP kthread last ran:
Sending NMI from CPU 1 to CPUs 0:
NMI backtrace for cpu 0
CPU: 0 PID: 3646 Comm: syz-executor358 Not tainted 5.17.0-rc3-syzkaller-00149-gbf8e59fd315f #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:rep_nop arch/x86/include/asm/vdso/processor.h:13 [inline]
RIP: 0010:cpu_relax arch/x86/include/asm/vdso/processor.h:18 [inline]
RIP: 0010:pv_wait_head_or_lock kernel/locking/qspinlock_paravirt.h:437 [inline]
RIP: 0010:__pv_queued_spin_lock_slowpath+0x3b8/0xb40 kernel/locking/qspinlock.c:508
Code: 48 89 eb c6 45 01 01 41 bc 00 80 00 00 48 c1 e9 03 83 e3 07 41 be 01 00 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8d 2c 01 eb 0c <f3> 90 41 83 ec 01 0f 84 72 04 00 00 41 0f b6 45 00 38 d8 7f 08 84
RSP: 0018:ffffc9000283f1b0 EFLAGS: 00000206
RAX: 0000000000000003 RBX: 0000000000000000 RCX: 1ffff1100fc0071e
RDX: 0000000000000001 RSI: 0000000000000201 RDI: 0000000000000000
RBP: ffff88807e0038f0 R08: 0000000000000001 R09: ffffffff8ffbf9ff
R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000004c1e
R13: ffffed100fc0071e R14: 0000000000000001 R15: ffff8880b9c3aa80
FS: 00005555562bf300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffdbfef12b8 CR3: 00000000723c2000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
pv_queued_spin_lock_slowpath arch/x86/include/asm/paravirt.h:591 [inline]
queued_spin_lock_slowpath arch/x86/include/asm/qspinlock.h:51 [inline]
queued_spin_lock include/asm-generic/qspinlock.h:85 [inline]
do_raw_spin_lock+0x200/0x2b0 kernel/locking/spinlock_debug.c:115
spin_lock_bh include/linux/spinlock.h:354 [inline]
sch_tree_lock include/net/sch_generic.h:610 [inline]
sch_tree_lock include/net/sch_generic.h:605 [inline]
prio_tune+0x3b9/0xb50 net/sched/sch_prio.c:211
prio_init+0x5c/0x80 net/sched/sch_prio.c:244
qdisc_create.constprop.0+0x44a/0x10f0 net/sched/sch_api.c:1253
tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660
rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5594
netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
sock_sendmsg_nosec net/socket.c:705 [inline]
sock_sendmsg+0xcf/0x120 net/socket.c:725
____sys_sendmsg+0x6e8/0x810 net/socket.c:2413
___sys_sendmsg+0xf3/0x170 net/socket.c:2467
__sys_sendmsg+0xe5/0x1b0 net/socket.c:2496
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f7ee98aae99
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffdbfef12d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007ffdbfef1300 RCX: 00007f7ee98aae99
RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000003
RBP: 0000000000000000 R08: 000000000000000d R09: 000000000000000d
R10: 000000000000000d R11: 0000000000000246 R12: 00007ffdbfef12f0
R13: 00000000000f4240 R14: 000000000004ca47 R15: 00007ffdbfef12e4
</TASK>
INFO: NMI handler (nmi_cpu_backtrace_handler) took too long to run: 2.293 msecs
NMI backtrace for cpu 1
CPU: 1 PID: 3260 Comm: kworker/1:3 Not tainted 5.17.0-rc3-syzkaller-00149-gbf8e59fd315f #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: mld mld_ifc_work
Call Trace:
<IRQ>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
nmi_cpu_backtrace.cold+0x47/0x144 lib/nmi_backtrace.c:111
nmi_trigger_cpumask_backtrace+0x1b3/0x230 lib/nmi_backtrace.c:62
trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline]
rcu_dump_cpu_stacks+0x25e/0x3f0 kernel/rcu/tree_stall.h:343
print_cpu_stall kernel/rcu/tree_stall.h:604 [inline]
check_cpu_stall kernel/rcu/tree_stall.h:688 [inline]
rcu_pending kernel/rcu/tree.c:3919 [inline]
rcu_sched_clock_irq.cold+0x5c/0x759 kernel/rcu/tree.c:2617
update_process_times+0x16d/0x200 kernel/time/timer.c:1785
tick_sched_handle+0x9b/0x180 kernel/time/tick-sched.c:226
tick_sched_timer+0x1b0/0x2d0 kernel/time/tick-sched.c:1428
__run_hrtimer kernel/time/hrtimer.c:1685 [inline]
__hrtimer_run_queues+0x1c0/0xe50 kernel/time/hrtimer.c:1749
hrtimer_interrupt+0x31c/0x790 kernel/time/hrtimer.c:1811
local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1086 [inline]
__sysvec_apic_timer_interrupt+0x146/0x530 arch/x86/kernel/apic/apic.c:1103
sysvec_apic_timer_interrupt+0x8e/0xc0 arch/x86/kernel/apic/apic.c:1097
</IRQ>
<TASK>
asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638
RIP: 0010:__sanitizer_cov_trace_const_cmp4+0xc/0x70 kernel/kcov.c:286
Code: 00 00 00 48 89 7c 30 e8 48 89 4c 30 f0 4c 89 54 d8 20 48 89 10 5b c3 0f 1f 80 00 00 00 00 41 89 f8 bf 03 00 00 00 4c 8b 14 24 <89> f1 65 48 8b 34 25 00 70 02 00 e8 14 f9 ff ff 84 c0 74 4b 48 8b
RSP: 0018:ffffc90002c5eea8 EFLAGS: 00000246
RAX: 0000000000000007 RBX: ffff88801c625800 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003
RBP: ffff8880137d3100 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff874fcd88 R11: 0000000000000000 R12: ffff88801d692dc0
R13: ffff8880137d3104 R14: 0000000000000000 R15: ffff88801d692de8
tcf_police_act+0x358/0x11d0 net/sched/act_police.c:256
tcf_action_exec net/sched/act_api.c:1049 [inline]
tcf_action_exec+0x1a6/0x530 net/sched/act_api.c:1026
tcf_exts_exec include/net/pkt_cls.h:326 [inline]
route4_classify+0xef0/0x1400 net/sched/cls_route.c:179
__tcf_classify net/sched/cls_api.c:1549 [inline]
tcf_classify+0x3e8/0x9d0 net/sched/cls_api.c:1615
prio_classify net/sched/sch_prio.c:42 [inline]
prio_enqueue+0x3a7/0x790 net/sched/sch_prio.c:75
dev_qdisc_enqueue+0x40/0x300 net/core/dev.c:3668
__dev_xmit_skb net/core/dev.c:3756 [inline]
__dev_queue_xmit+0x1f61/0x3660 net/core/dev.c:4081
neigh_hh_output include/net/neighbour.h:533 [inline]
neigh_output include/net/neighbour.h:547 [inline]
ip_finish_output2+0x14dc/0x2170 net/ipv4/ip_output.c:228
__ip_finish_output net/ipv4/ip_output.c:306 [inline]
__ip_finish_output+0x396/0x650 net/ipv4/ip_output.c:288
ip_finish_output+0x32/0x200 net/ipv4/ip_output.c:316
NF_HOOK_COND include/linux/netfilter.h:296 [inline]
ip_output+0x196/0x310 net/ipv4/ip_output.c:430
dst_output include/net/dst.h:451 [inline]
ip_local_out+0xaf/0x1a0 net/ipv4/ip_output.c:126
iptunnel_xmit+0x628/0xa50 net/ipv4/ip_tunnel_core.c:82
geneve_xmit_skb drivers/net/geneve.c:966 [inline]
geneve_xmit+0x10c8/0x3530 drivers/net/geneve.c:1077
__netdev_start_xmit include/linux/netdevice.h:4683 [inline]
netdev_start_xmit include/linux/netdevice.h:4697 [inline]
xmit_one net/core/dev.c:3473 [inline]
dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3489
__dev_queue_xmit+0x2985/0x3660 net/core/dev.c:4116
neigh_hh_output include/net/neighbour.h:533 [inline]
neigh_output include/net/neighbour.h:547 [inline]
ip6_finish_output2+0xf7a/0x14f0 net/ipv6/ip6_output.c:126
__ip6_finish_output net/ipv6/ip6_output.c:191 [inline]
__ip6_finish_output+0x61e/0xe90 net/ipv6/ip6_output.c:170
ip6_finish_output+0x32/0x200 net/ipv6/ip6_output.c:201
NF_HOOK_COND include/linux/netfilter.h:296 [inline]
ip6_output+0x1e4/0x530 net/ipv6/ip6_output.c:224
dst_output include/net/dst.h:451 [inline]
NF_HOOK include/linux/netfilter.h:307 [inline]
NF_HOOK include/linux/netfilter.h:301 [inline]
mld_sendpack+0x9a3/0xe40 net/ipv6/mcast.c:1826
mld_send_cr net/ipv6/mcast.c:2127 [inline]
mld_ifc_work+0x71c/0xdc0 net/ipv6/mcast.c:2659
process_one_work+0x9ac/0x1650 kernel/workqueue.c:2307
worker_thread+0x657/0x1110 kernel/workqueue.c:2454
kthread+0x2e9/0x3a0 kernel/kthread.c:377
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
</TASK>
----------------
Code disassembly (best guess):
0: 48 89 eb mov %rbp,%rbx
3: c6 45 01 01 movb $0x1,0x1(%rbp)
7: 41 bc 00 80 00 00 mov $0x8000,%r12d
d: 48 c1 e9 03 shr $0x3,%rcx
11: 83 e3 07 and $0x7,%ebx
14: 41 be 01 00 00 00 mov $0x1,%r14d
1a: 48 b8 00 00 00 00 00 movabs $0xdffffc0000000000,%rax
21: fc ff df
24: 4c 8d 2c 01 lea (%rcx,%rax,1),%r13
28: eb 0c jmp 0x36
* 2a: f3 90 pause <-- trapping instruction
2c: 41 83 ec 01 sub $0x1,%r12d
30: 0f 84 72 04 00 00 je 0x4a8
36: 41 0f b6 45 00 movzbl 0x0(%r13),%eax
3b: 38 d8 cmp %bl,%al
3d: 7f 08 jg 0x47
3f: 84 .byte 0x84
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220215235305.3272331-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-02-15 23:53:05 +00:00
|
|
|
int repeat_ttl;
|
2016-08-14 05:35:00 +00:00
|
|
|
|
2017-04-23 17:17:28 +00:00
|
|
|
if (jmp_prgcnt > 0) {
|
|
|
|
jmp_prgcnt -= 1;
|
|
|
|
continue;
|
|
|
|
}
|
2021-12-17 18:16:23 +00:00
|
|
|
|
|
|
|
if (tc_act_skip_sw(a->tcfa_flags))
|
|
|
|
continue;
|
net: sched: limit TC_ACT_REPEAT loops
We have been living dangerously, at the mercy of malicious users,
abusing TC_ACT_REPEAT, as shown by this syzpot report [1].
Add an arbitrary limit (32) to the number of times an action can
return TC_ACT_REPEAT.
v2: switch the limit to 32 instead of 10.
Use net_warn_ratelimited() instead of pr_err_once().
[1] (C repro available on demand)
rcu: INFO: rcu_preempt self-detected stall on CPU
rcu: 1-...!: (10500 ticks this GP) idle=021/1/0x4000000000000000 softirq=5592/5592 fqs=0
(t=10502 jiffies g=5305 q=190)
rcu: rcu_preempt kthread timer wakeup didn't happen for 10502 jiffies! g5305 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402
rcu: Possible timer handling issue on cpu=0 timer-softirq=3527
rcu: rcu_preempt kthread starved for 10505 jiffies! g5305 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=0
rcu: Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior.
rcu: RCU grace-period kthread stack dump:
task:rcu_preempt state:I stack:29344 pid: 14 ppid: 2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:4986 [inline]
__schedule+0xab2/0x4db0 kernel/sched/core.c:6295
schedule+0xd2/0x260 kernel/sched/core.c:6368
schedule_timeout+0x14a/0x2a0 kernel/time/timer.c:1881
rcu_gp_fqs_loop+0x186/0x810 kernel/rcu/tree.c:1963
rcu_gp_kthread+0x1de/0x320 kernel/rcu/tree.c:2136
kthread+0x2e9/0x3a0 kernel/kthread.c:377
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
</TASK>
rcu: Stack dump where RCU GP kthread last ran:
Sending NMI from CPU 1 to CPUs 0:
NMI backtrace for cpu 0
CPU: 0 PID: 3646 Comm: syz-executor358 Not tainted 5.17.0-rc3-syzkaller-00149-gbf8e59fd315f #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:rep_nop arch/x86/include/asm/vdso/processor.h:13 [inline]
RIP: 0010:cpu_relax arch/x86/include/asm/vdso/processor.h:18 [inline]
RIP: 0010:pv_wait_head_or_lock kernel/locking/qspinlock_paravirt.h:437 [inline]
RIP: 0010:__pv_queued_spin_lock_slowpath+0x3b8/0xb40 kernel/locking/qspinlock.c:508
Code: 48 89 eb c6 45 01 01 41 bc 00 80 00 00 48 c1 e9 03 83 e3 07 41 be 01 00 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8d 2c 01 eb 0c <f3> 90 41 83 ec 01 0f 84 72 04 00 00 41 0f b6 45 00 38 d8 7f 08 84
RSP: 0018:ffffc9000283f1b0 EFLAGS: 00000206
RAX: 0000000000000003 RBX: 0000000000000000 RCX: 1ffff1100fc0071e
RDX: 0000000000000001 RSI: 0000000000000201 RDI: 0000000000000000
RBP: ffff88807e0038f0 R08: 0000000000000001 R09: ffffffff8ffbf9ff
R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000004c1e
R13: ffffed100fc0071e R14: 0000000000000001 R15: ffff8880b9c3aa80
FS: 00005555562bf300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffdbfef12b8 CR3: 00000000723c2000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
pv_queued_spin_lock_slowpath arch/x86/include/asm/paravirt.h:591 [inline]
queued_spin_lock_slowpath arch/x86/include/asm/qspinlock.h:51 [inline]
queued_spin_lock include/asm-generic/qspinlock.h:85 [inline]
do_raw_spin_lock+0x200/0x2b0 kernel/locking/spinlock_debug.c:115
spin_lock_bh include/linux/spinlock.h:354 [inline]
sch_tree_lock include/net/sch_generic.h:610 [inline]
sch_tree_lock include/net/sch_generic.h:605 [inline]
prio_tune+0x3b9/0xb50 net/sched/sch_prio.c:211
prio_init+0x5c/0x80 net/sched/sch_prio.c:244
qdisc_create.constprop.0+0x44a/0x10f0 net/sched/sch_api.c:1253
tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660
rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5594
netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
sock_sendmsg_nosec net/socket.c:705 [inline]
sock_sendmsg+0xcf/0x120 net/socket.c:725
____sys_sendmsg+0x6e8/0x810 net/socket.c:2413
___sys_sendmsg+0xf3/0x170 net/socket.c:2467
__sys_sendmsg+0xe5/0x1b0 net/socket.c:2496
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f7ee98aae99
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffdbfef12d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007ffdbfef1300 RCX: 00007f7ee98aae99
RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000003
RBP: 0000000000000000 R08: 000000000000000d R09: 000000000000000d
R10: 000000000000000d R11: 0000000000000246 R12: 00007ffdbfef12f0
R13: 00000000000f4240 R14: 000000000004ca47 R15: 00007ffdbfef12e4
</TASK>
INFO: NMI handler (nmi_cpu_backtrace_handler) took too long to run: 2.293 msecs
NMI backtrace for cpu 1
CPU: 1 PID: 3260 Comm: kworker/1:3 Not tainted 5.17.0-rc3-syzkaller-00149-gbf8e59fd315f #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: mld mld_ifc_work
Call Trace:
<IRQ>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
nmi_cpu_backtrace.cold+0x47/0x144 lib/nmi_backtrace.c:111
nmi_trigger_cpumask_backtrace+0x1b3/0x230 lib/nmi_backtrace.c:62
trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline]
rcu_dump_cpu_stacks+0x25e/0x3f0 kernel/rcu/tree_stall.h:343
print_cpu_stall kernel/rcu/tree_stall.h:604 [inline]
check_cpu_stall kernel/rcu/tree_stall.h:688 [inline]
rcu_pending kernel/rcu/tree.c:3919 [inline]
rcu_sched_clock_irq.cold+0x5c/0x759 kernel/rcu/tree.c:2617
update_process_times+0x16d/0x200 kernel/time/timer.c:1785
tick_sched_handle+0x9b/0x180 kernel/time/tick-sched.c:226
tick_sched_timer+0x1b0/0x2d0 kernel/time/tick-sched.c:1428
__run_hrtimer kernel/time/hrtimer.c:1685 [inline]
__hrtimer_run_queues+0x1c0/0xe50 kernel/time/hrtimer.c:1749
hrtimer_interrupt+0x31c/0x790 kernel/time/hrtimer.c:1811
local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1086 [inline]
__sysvec_apic_timer_interrupt+0x146/0x530 arch/x86/kernel/apic/apic.c:1103
sysvec_apic_timer_interrupt+0x8e/0xc0 arch/x86/kernel/apic/apic.c:1097
</IRQ>
<TASK>
asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638
RIP: 0010:__sanitizer_cov_trace_const_cmp4+0xc/0x70 kernel/kcov.c:286
Code: 00 00 00 48 89 7c 30 e8 48 89 4c 30 f0 4c 89 54 d8 20 48 89 10 5b c3 0f 1f 80 00 00 00 00 41 89 f8 bf 03 00 00 00 4c 8b 14 24 <89> f1 65 48 8b 34 25 00 70 02 00 e8 14 f9 ff ff 84 c0 74 4b 48 8b
RSP: 0018:ffffc90002c5eea8 EFLAGS: 00000246
RAX: 0000000000000007 RBX: ffff88801c625800 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003
RBP: ffff8880137d3100 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff874fcd88 R11: 0000000000000000 R12: ffff88801d692dc0
R13: ffff8880137d3104 R14: 0000000000000000 R15: ffff88801d692de8
tcf_police_act+0x358/0x11d0 net/sched/act_police.c:256
tcf_action_exec net/sched/act_api.c:1049 [inline]
tcf_action_exec+0x1a6/0x530 net/sched/act_api.c:1026
tcf_exts_exec include/net/pkt_cls.h:326 [inline]
route4_classify+0xef0/0x1400 net/sched/cls_route.c:179
__tcf_classify net/sched/cls_api.c:1549 [inline]
tcf_classify+0x3e8/0x9d0 net/sched/cls_api.c:1615
prio_classify net/sched/sch_prio.c:42 [inline]
prio_enqueue+0x3a7/0x790 net/sched/sch_prio.c:75
dev_qdisc_enqueue+0x40/0x300 net/core/dev.c:3668
__dev_xmit_skb net/core/dev.c:3756 [inline]
__dev_queue_xmit+0x1f61/0x3660 net/core/dev.c:4081
neigh_hh_output include/net/neighbour.h:533 [inline]
neigh_output include/net/neighbour.h:547 [inline]
ip_finish_output2+0x14dc/0x2170 net/ipv4/ip_output.c:228
__ip_finish_output net/ipv4/ip_output.c:306 [inline]
__ip_finish_output+0x396/0x650 net/ipv4/ip_output.c:288
ip_finish_output+0x32/0x200 net/ipv4/ip_output.c:316
NF_HOOK_COND include/linux/netfilter.h:296 [inline]
ip_output+0x196/0x310 net/ipv4/ip_output.c:430
dst_output include/net/dst.h:451 [inline]
ip_local_out+0xaf/0x1a0 net/ipv4/ip_output.c:126
iptunnel_xmit+0x628/0xa50 net/ipv4/ip_tunnel_core.c:82
geneve_xmit_skb drivers/net/geneve.c:966 [inline]
geneve_xmit+0x10c8/0x3530 drivers/net/geneve.c:1077
__netdev_start_xmit include/linux/netdevice.h:4683 [inline]
netdev_start_xmit include/linux/netdevice.h:4697 [inline]
xmit_one net/core/dev.c:3473 [inline]
dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3489
__dev_queue_xmit+0x2985/0x3660 net/core/dev.c:4116
neigh_hh_output include/net/neighbour.h:533 [inline]
neigh_output include/net/neighbour.h:547 [inline]
ip6_finish_output2+0xf7a/0x14f0 net/ipv6/ip6_output.c:126
__ip6_finish_output net/ipv6/ip6_output.c:191 [inline]
__ip6_finish_output+0x61e/0xe90 net/ipv6/ip6_output.c:170
ip6_finish_output+0x32/0x200 net/ipv6/ip6_output.c:201
NF_HOOK_COND include/linux/netfilter.h:296 [inline]
ip6_output+0x1e4/0x530 net/ipv6/ip6_output.c:224
dst_output include/net/dst.h:451 [inline]
NF_HOOK include/linux/netfilter.h:307 [inline]
NF_HOOK include/linux/netfilter.h:301 [inline]
mld_sendpack+0x9a3/0xe40 net/ipv6/mcast.c:1826
mld_send_cr net/ipv6/mcast.c:2127 [inline]
mld_ifc_work+0x71c/0xdc0 net/ipv6/mcast.c:2659
process_one_work+0x9ac/0x1650 kernel/workqueue.c:2307
worker_thread+0x657/0x1110 kernel/workqueue.c:2454
kthread+0x2e9/0x3a0 kernel/kthread.c:377
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
</TASK>
----------------
Code disassembly (best guess):
0: 48 89 eb mov %rbp,%rbx
3: c6 45 01 01 movb $0x1,0x1(%rbp)
7: 41 bc 00 80 00 00 mov $0x8000,%r12d
d: 48 c1 e9 03 shr $0x3,%rcx
11: 83 e3 07 and $0x7,%ebx
14: 41 be 01 00 00 00 mov $0x1,%r14d
1a: 48 b8 00 00 00 00 00 movabs $0xdffffc0000000000,%rax
21: fc ff df
24: 4c 8d 2c 01 lea (%rcx,%rax,1),%r13
28: eb 0c jmp 0x36
* 2a: f3 90 pause <-- trapping instruction
2c: 41 83 ec 01 sub $0x1,%r12d
30: 0f 84 72 04 00 00 je 0x4a8
36: 41 0f b6 45 00 movzbl 0x0(%r13),%eax
3b: 38 d8 cmp %bl,%al
3d: 7f 08 jg 0x47
3f: 84 .byte 0x84
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220215235305.3272331-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-02-15 23:53:05 +00:00
|
|
|
|
|
|
|
repeat_ttl = 32;
|
2005-04-16 22:20:36 +00:00
|
|
|
repeat:
|
2022-12-06 13:55:12 +00:00
|
|
|
ret = tc_act(skb, a, res);
|
net: sched: limit TC_ACT_REPEAT loops
We have been living dangerously, at the mercy of malicious users,
abusing TC_ACT_REPEAT, as shown by this syzpot report [1].
Add an arbitrary limit (32) to the number of times an action can
return TC_ACT_REPEAT.
v2: switch the limit to 32 instead of 10.
Use net_warn_ratelimited() instead of pr_err_once().
[1] (C repro available on demand)
rcu: INFO: rcu_preempt self-detected stall on CPU
rcu: 1-...!: (10500 ticks this GP) idle=021/1/0x4000000000000000 softirq=5592/5592 fqs=0
(t=10502 jiffies g=5305 q=190)
rcu: rcu_preempt kthread timer wakeup didn't happen for 10502 jiffies! g5305 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402
rcu: Possible timer handling issue on cpu=0 timer-softirq=3527
rcu: rcu_preempt kthread starved for 10505 jiffies! g5305 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=0
rcu: Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior.
rcu: RCU grace-period kthread stack dump:
task:rcu_preempt state:I stack:29344 pid: 14 ppid: 2 flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:4986 [inline]
__schedule+0xab2/0x4db0 kernel/sched/core.c:6295
schedule+0xd2/0x260 kernel/sched/core.c:6368
schedule_timeout+0x14a/0x2a0 kernel/time/timer.c:1881
rcu_gp_fqs_loop+0x186/0x810 kernel/rcu/tree.c:1963
rcu_gp_kthread+0x1de/0x320 kernel/rcu/tree.c:2136
kthread+0x2e9/0x3a0 kernel/kthread.c:377
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
</TASK>
rcu: Stack dump where RCU GP kthread last ran:
Sending NMI from CPU 1 to CPUs 0:
NMI backtrace for cpu 0
CPU: 0 PID: 3646 Comm: syz-executor358 Not tainted 5.17.0-rc3-syzkaller-00149-gbf8e59fd315f #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:rep_nop arch/x86/include/asm/vdso/processor.h:13 [inline]
RIP: 0010:cpu_relax arch/x86/include/asm/vdso/processor.h:18 [inline]
RIP: 0010:pv_wait_head_or_lock kernel/locking/qspinlock_paravirt.h:437 [inline]
RIP: 0010:__pv_queued_spin_lock_slowpath+0x3b8/0xb40 kernel/locking/qspinlock.c:508
Code: 48 89 eb c6 45 01 01 41 bc 00 80 00 00 48 c1 e9 03 83 e3 07 41 be 01 00 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8d 2c 01 eb 0c <f3> 90 41 83 ec 01 0f 84 72 04 00 00 41 0f b6 45 00 38 d8 7f 08 84
RSP: 0018:ffffc9000283f1b0 EFLAGS: 00000206
RAX: 0000000000000003 RBX: 0000000000000000 RCX: 1ffff1100fc0071e
RDX: 0000000000000001 RSI: 0000000000000201 RDI: 0000000000000000
RBP: ffff88807e0038f0 R08: 0000000000000001 R09: ffffffff8ffbf9ff
R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000004c1e
R13: ffffed100fc0071e R14: 0000000000000001 R15: ffff8880b9c3aa80
FS: 00005555562bf300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffdbfef12b8 CR3: 00000000723c2000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
pv_queued_spin_lock_slowpath arch/x86/include/asm/paravirt.h:591 [inline]
queued_spin_lock_slowpath arch/x86/include/asm/qspinlock.h:51 [inline]
queued_spin_lock include/asm-generic/qspinlock.h:85 [inline]
do_raw_spin_lock+0x200/0x2b0 kernel/locking/spinlock_debug.c:115
spin_lock_bh include/linux/spinlock.h:354 [inline]
sch_tree_lock include/net/sch_generic.h:610 [inline]
sch_tree_lock include/net/sch_generic.h:605 [inline]
prio_tune+0x3b9/0xb50 net/sched/sch_prio.c:211
prio_init+0x5c/0x80 net/sched/sch_prio.c:244
qdisc_create.constprop.0+0x44a/0x10f0 net/sched/sch_api.c:1253
tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660
rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5594
netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
sock_sendmsg_nosec net/socket.c:705 [inline]
sock_sendmsg+0xcf/0x120 net/socket.c:725
____sys_sendmsg+0x6e8/0x810 net/socket.c:2413
___sys_sendmsg+0xf3/0x170 net/socket.c:2467
__sys_sendmsg+0xe5/0x1b0 net/socket.c:2496
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f7ee98aae99
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffdbfef12d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007ffdbfef1300 RCX: 00007f7ee98aae99
RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000003
RBP: 0000000000000000 R08: 000000000000000d R09: 000000000000000d
R10: 000000000000000d R11: 0000000000000246 R12: 00007ffdbfef12f0
R13: 00000000000f4240 R14: 000000000004ca47 R15: 00007ffdbfef12e4
</TASK>
INFO: NMI handler (nmi_cpu_backtrace_handler) took too long to run: 2.293 msecs
NMI backtrace for cpu 1
CPU: 1 PID: 3260 Comm: kworker/1:3 Not tainted 5.17.0-rc3-syzkaller-00149-gbf8e59fd315f #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: mld mld_ifc_work
Call Trace:
<IRQ>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
nmi_cpu_backtrace.cold+0x47/0x144 lib/nmi_backtrace.c:111
nmi_trigger_cpumask_backtrace+0x1b3/0x230 lib/nmi_backtrace.c:62
trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline]
rcu_dump_cpu_stacks+0x25e/0x3f0 kernel/rcu/tree_stall.h:343
print_cpu_stall kernel/rcu/tree_stall.h:604 [inline]
check_cpu_stall kernel/rcu/tree_stall.h:688 [inline]
rcu_pending kernel/rcu/tree.c:3919 [inline]
rcu_sched_clock_irq.cold+0x5c/0x759 kernel/rcu/tree.c:2617
update_process_times+0x16d/0x200 kernel/time/timer.c:1785
tick_sched_handle+0x9b/0x180 kernel/time/tick-sched.c:226
tick_sched_timer+0x1b0/0x2d0 kernel/time/tick-sched.c:1428
__run_hrtimer kernel/time/hrtimer.c:1685 [inline]
__hrtimer_run_queues+0x1c0/0xe50 kernel/time/hrtimer.c:1749
hrtimer_interrupt+0x31c/0x790 kernel/time/hrtimer.c:1811
local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1086 [inline]
__sysvec_apic_timer_interrupt+0x146/0x530 arch/x86/kernel/apic/apic.c:1103
sysvec_apic_timer_interrupt+0x8e/0xc0 arch/x86/kernel/apic/apic.c:1097
</IRQ>
<TASK>
asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638
RIP: 0010:__sanitizer_cov_trace_const_cmp4+0xc/0x70 kernel/kcov.c:286
Code: 00 00 00 48 89 7c 30 e8 48 89 4c 30 f0 4c 89 54 d8 20 48 89 10 5b c3 0f 1f 80 00 00 00 00 41 89 f8 bf 03 00 00 00 4c 8b 14 24 <89> f1 65 48 8b 34 25 00 70 02 00 e8 14 f9 ff ff 84 c0 74 4b 48 8b
RSP: 0018:ffffc90002c5eea8 EFLAGS: 00000246
RAX: 0000000000000007 RBX: ffff88801c625800 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003
RBP: ffff8880137d3100 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff874fcd88 R11: 0000000000000000 R12: ffff88801d692dc0
R13: ffff8880137d3104 R14: 0000000000000000 R15: ffff88801d692de8
tcf_police_act+0x358/0x11d0 net/sched/act_police.c:256
tcf_action_exec net/sched/act_api.c:1049 [inline]
tcf_action_exec+0x1a6/0x530 net/sched/act_api.c:1026
tcf_exts_exec include/net/pkt_cls.h:326 [inline]
route4_classify+0xef0/0x1400 net/sched/cls_route.c:179
__tcf_classify net/sched/cls_api.c:1549 [inline]
tcf_classify+0x3e8/0x9d0 net/sched/cls_api.c:1615
prio_classify net/sched/sch_prio.c:42 [inline]
prio_enqueue+0x3a7/0x790 net/sched/sch_prio.c:75
dev_qdisc_enqueue+0x40/0x300 net/core/dev.c:3668
__dev_xmit_skb net/core/dev.c:3756 [inline]
__dev_queue_xmit+0x1f61/0x3660 net/core/dev.c:4081
neigh_hh_output include/net/neighbour.h:533 [inline]
neigh_output include/net/neighbour.h:547 [inline]
ip_finish_output2+0x14dc/0x2170 net/ipv4/ip_output.c:228
__ip_finish_output net/ipv4/ip_output.c:306 [inline]
__ip_finish_output+0x396/0x650 net/ipv4/ip_output.c:288
ip_finish_output+0x32/0x200 net/ipv4/ip_output.c:316
NF_HOOK_COND include/linux/netfilter.h:296 [inline]
ip_output+0x196/0x310 net/ipv4/ip_output.c:430
dst_output include/net/dst.h:451 [inline]
ip_local_out+0xaf/0x1a0 net/ipv4/ip_output.c:126
iptunnel_xmit+0x628/0xa50 net/ipv4/ip_tunnel_core.c:82
geneve_xmit_skb drivers/net/geneve.c:966 [inline]
geneve_xmit+0x10c8/0x3530 drivers/net/geneve.c:1077
__netdev_start_xmit include/linux/netdevice.h:4683 [inline]
netdev_start_xmit include/linux/netdevice.h:4697 [inline]
xmit_one net/core/dev.c:3473 [inline]
dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3489
__dev_queue_xmit+0x2985/0x3660 net/core/dev.c:4116
neigh_hh_output include/net/neighbour.h:533 [inline]
neigh_output include/net/neighbour.h:547 [inline]
ip6_finish_output2+0xf7a/0x14f0 net/ipv6/ip6_output.c:126
__ip6_finish_output net/ipv6/ip6_output.c:191 [inline]
__ip6_finish_output+0x61e/0xe90 net/ipv6/ip6_output.c:170
ip6_finish_output+0x32/0x200 net/ipv6/ip6_output.c:201
NF_HOOK_COND include/linux/netfilter.h:296 [inline]
ip6_output+0x1e4/0x530 net/ipv6/ip6_output.c:224
dst_output include/net/dst.h:451 [inline]
NF_HOOK include/linux/netfilter.h:307 [inline]
NF_HOOK include/linux/netfilter.h:301 [inline]
mld_sendpack+0x9a3/0xe40 net/ipv6/mcast.c:1826
mld_send_cr net/ipv6/mcast.c:2127 [inline]
mld_ifc_work+0x71c/0xdc0 net/ipv6/mcast.c:2659
process_one_work+0x9ac/0x1650 kernel/workqueue.c:2307
worker_thread+0x657/0x1110 kernel/workqueue.c:2454
kthread+0x2e9/0x3a0 kernel/kthread.c:377
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
</TASK>
----------------
Code disassembly (best guess):
0: 48 89 eb mov %rbp,%rbx
3: c6 45 01 01 movb $0x1,0x1(%rbp)
7: 41 bc 00 80 00 00 mov $0x8000,%r12d
d: 48 c1 e9 03 shr $0x3,%rcx
11: 83 e3 07 and $0x7,%ebx
14: 41 be 01 00 00 00 mov $0x1,%r14d
1a: 48 b8 00 00 00 00 00 movabs $0xdffffc0000000000,%rax
21: fc ff df
24: 4c 8d 2c 01 lea (%rcx,%rax,1),%r13
28: eb 0c jmp 0x36
* 2a: f3 90 pause <-- trapping instruction
2c: 41 83 ec 01 sub $0x1,%r12d
30: 0f 84 72 04 00 00 je 0x4a8
36: 41 0f b6 45 00 movzbl 0x0(%r13),%eax
3b: 38 d8 cmp %bl,%al
3d: 7f 08 jg 0x47
3f: 84 .byte 0x84
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220215235305.3272331-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-02-15 23:53:05 +00:00
|
|
|
if (unlikely(ret == TC_ACT_REPEAT)) {
|
|
|
|
if (--repeat_ttl != 0)
|
|
|
|
goto repeat;
|
|
|
|
/* suspicious opcode, stop pipeline */
|
|
|
|
net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n");
|
|
|
|
return TC_ACT_OK;
|
|
|
|
}
|
2017-05-02 08:12:00 +00:00
|
|
|
if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) {
|
2017-04-23 17:17:28 +00:00
|
|
|
jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
|
|
|
|
if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) {
|
|
|
|
/* faulty opcode, stop pipeline */
|
|
|
|
return TC_ACT_OK;
|
|
|
|
} else {
|
|
|
|
jmp_ttl -= 1;
|
|
|
|
if (jmp_ttl > 0)
|
|
|
|
goto restart_act_graph;
|
|
|
|
else /* faulty graph, stop pipeline */
|
|
|
|
return TC_ACT_OK;
|
|
|
|
}
|
2017-05-17 09:08:03 +00:00
|
|
|
} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
|
2019-03-20 14:00:16 +00:00
|
|
|
if (unlikely(!rcu_access_pointer(a->goto_chain))) {
|
2023-12-16 20:44:36 +00:00
|
|
|
tcf_set_drop_reason(skb,
|
|
|
|
SKB_DROP_REASON_TC_CHAIN_NOTFOUND);
|
2019-03-20 14:00:16 +00:00
|
|
|
return TC_ACT_SHOT;
|
|
|
|
}
|
2017-05-17 09:08:03 +00:00
|
|
|
tcf_action_goto_chain_exec(a, res);
|
2017-04-23 17:17:28 +00:00
|
|
|
}
|
|
|
|
|
2013-12-23 13:02:12 +00:00
|
|
|
if (ret != TC_ACT_PIPE)
|
2017-01-07 22:06:35 +00:00
|
|
|
break;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
2017-04-23 17:17:28 +00:00
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
return ret;
|
|
|
|
}
|
2008-01-23 06:10:23 +00:00
|
|
|
EXPORT_SYMBOL(tcf_action_exec);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2018-07-05 14:24:33 +00:00
|
|
|
int tcf_action_destroy(struct tc_action *actions[], int bind)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2017-09-13 15:32:37 +00:00
|
|
|
const struct tc_action_ops *ops;
|
2018-07-05 14:24:33 +00:00
|
|
|
struct tc_action *a;
|
|
|
|
int ret = 0, i;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2023-12-01 17:50:12 +00:00
|
|
|
tcf_act_for_each_action(i, a, actions) {
|
2018-07-05 14:24:33 +00:00
|
|
|
actions[i] = NULL;
|
2017-09-13 15:32:37 +00:00
|
|
|
ops = a->ops;
|
2017-08-30 06:31:59 +00:00
|
|
|
ret = __tcf_idr_release(a, bind, true);
|
2014-02-12 01:07:34 +00:00
|
|
|
if (ret == ACT_P_DELETED)
|
2017-09-13 15:32:37 +00:00
|
|
|
module_put(ops->owner);
|
2014-02-12 01:07:34 +00:00
|
|
|
else if (ret < 0)
|
|
|
|
return ret;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
2014-02-12 01:07:34 +00:00
|
|
|
return ret;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2018-07-05 14:24:29 +00:00
|
|
|
static int tcf_action_put(struct tc_action *p)
|
|
|
|
{
|
|
|
|
return __tcf_action_put(p, false);
|
|
|
|
}
|
|
|
|
|
2018-07-05 14:24:33 +00:00
|
|
|
static void tcf_action_put_many(struct tc_action *actions[])
|
2018-07-05 14:24:31 +00:00
|
|
|
{
|
2023-12-01 17:50:13 +00:00
|
|
|
struct tc_action *a;
|
2018-07-05 14:24:33 +00:00
|
|
|
int i;
|
2018-07-05 14:24:31 +00:00
|
|
|
|
2023-12-01 17:50:13 +00:00
|
|
|
tcf_act_for_each_action(i, a, actions) {
|
|
|
|
const struct tc_action_ops *ops = a->ops;
|
|
|
|
if (tcf_action_put(a))
|
|
|
|
module_put(ops->owner);
|
|
|
|
}
|
|
|
|
}
|
2018-07-05 14:24:31 +00:00
|
|
|
|
2023-12-01 17:50:13 +00:00
|
|
|
static void tca_put_bound_many(struct tc_action *actions[], int init_res[])
|
|
|
|
{
|
|
|
|
struct tc_action *a;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
tcf_act_for_each_action(i, a, actions) {
|
|
|
|
const struct tc_action_ops *ops = a->ops;
|
|
|
|
|
|
|
|
if (init_res[i] == ACT_P_CREATED)
|
2018-08-19 19:22:05 +00:00
|
|
|
continue;
|
2023-12-01 17:50:13 +00:00
|
|
|
|
2018-07-05 14:24:31 +00:00
|
|
|
if (tcf_action_put(a))
|
|
|
|
module_put(ops->owner);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
int
|
|
|
|
tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
|
|
|
|
{
|
|
|
|
return a->ops->dump(skb, a, bind, ref);
|
|
|
|
}
|
|
|
|
|
2018-07-05 14:24:33 +00:00
|
|
|
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
|
2020-05-15 11:40:12 +00:00
|
|
|
int bind, int ref, bool terse)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
struct tc_action *a;
|
2018-07-05 14:24:33 +00:00
|
|
|
int err = -EINVAL, i;
|
2008-01-24 04:34:11 +00:00
|
|
|
struct nlattr *nest;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2023-12-01 17:50:12 +00:00
|
|
|
tcf_act_for_each_action(i, a, actions) {
|
2019-05-23 06:32:31 +00:00
|
|
|
nest = nla_nest_start_noflag(skb, i + 1);
|
2008-01-24 04:34:11 +00:00
|
|
|
if (nest == NULL)
|
|
|
|
goto nla_put_failure;
|
2020-11-02 20:12:43 +00:00
|
|
|
err = terse ? tcf_action_dump_terse(skb, a, false) :
|
2020-05-15 11:40:12 +00:00
|
|
|
tcf_action_dump_1(skb, a, bind, ref);
|
2005-04-16 22:20:36 +00:00
|
|
|
if (err < 0)
|
2006-07-06 03:47:28 +00:00
|
|
|
goto errout;
|
2008-01-24 04:34:11 +00:00
|
|
|
nla_nest_end(skb, nest);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
2008-01-23 06:11:50 +00:00
|
|
|
nla_put_failure:
|
2006-07-06 03:47:28 +00:00
|
|
|
err = -EINVAL;
|
|
|
|
errout:
|
2008-01-24 04:34:11 +00:00
|
|
|
nla_nest_cancel(skb, nest);
|
2006-07-06 03:47:28 +00:00
|
|
|
return err;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2017-04-20 12:08:26 +00:00
|
|
|
static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
|
2017-01-24 12:02:41 +00:00
|
|
|
{
|
2017-04-20 12:08:26 +00:00
|
|
|
struct tc_cookie *c = kzalloc(sizeof(*c), GFP_KERNEL);
|
|
|
|
if (!c)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
c->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL);
|
|
|
|
if (!c->data) {
|
|
|
|
kfree(c);
|
|
|
|
return NULL;
|
2017-01-24 12:02:41 +00:00
|
|
|
}
|
2017-04-20 12:08:26 +00:00
|
|
|
c->len = nla_len(tb[TCA_ACT_COOKIE]);
|
2017-01-24 12:02:41 +00:00
|
|
|
|
2017-04-20 12:08:26 +00:00
|
|
|
return c;
|
2017-01-24 12:02:41 +00:00
|
|
|
}
|
|
|
|
|
2020-03-19 23:26:23 +00:00
|
|
|
static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr)
|
2020-03-07 11:40:20 +00:00
|
|
|
{
|
2020-03-19 23:26:23 +00:00
|
|
|
struct nla_bitfield32 hw_stats_bf;
|
2020-03-07 11:40:20 +00:00
|
|
|
|
|
|
|
/* If the user did not pass the attr, that means he does
|
|
|
|
* not care about the type. Return "any" in that case
|
|
|
|
* which is setting on all supported types.
|
|
|
|
*/
|
2020-03-19 23:26:23 +00:00
|
|
|
if (!hw_stats_attr)
|
|
|
|
return TCA_ACT_HW_STATS_ANY;
|
|
|
|
hw_stats_bf = nla_get_bitfield32(hw_stats_attr);
|
|
|
|
return hw_stats_bf.value;
|
2020-03-07 11:40:20 +00:00
|
|
|
}
|
|
|
|
|
2019-09-19 01:44:43 +00:00
|
|
|
static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
|
2019-10-07 20:26:29 +00:00
|
|
|
[TCA_ACT_KIND] = { .type = NLA_STRING },
|
2019-09-19 01:44:43 +00:00
|
|
|
[TCA_ACT_INDEX] = { .type = NLA_U32 },
|
|
|
|
[TCA_ACT_COOKIE] = { .type = NLA_BINARY,
|
|
|
|
.len = TC_COOKIE_MAX_SIZE },
|
|
|
|
[TCA_ACT_OPTIONS] = { .type = NLA_NESTED },
|
2021-12-17 18:16:23 +00:00
|
|
|
[TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS |
|
|
|
|
TCA_ACT_FLAGS_SKIP_HW |
|
|
|
|
TCA_ACT_FLAGS_SKIP_SW),
|
2020-04-30 20:13:05 +00:00
|
|
|
[TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
|
2019-09-19 01:44:43 +00:00
|
|
|
};
|
|
|
|
|
2023-12-11 18:18:07 +00:00
|
|
|
void tcf_idr_insert_many(struct tc_action *actions[], int init_res[])
|
2020-09-23 03:56:23 +00:00
|
|
|
{
|
2023-12-01 17:50:15 +00:00
|
|
|
struct tc_action *a;
|
2020-09-23 03:56:24 +00:00
|
|
|
int i;
|
2020-09-23 03:56:23 +00:00
|
|
|
|
2023-12-01 17:50:15 +00:00
|
|
|
tcf_act_for_each_action(i, a, actions) {
|
2020-09-23 03:56:24 +00:00
|
|
|
struct tcf_idrinfo *idrinfo;
|
|
|
|
|
2023-12-29 13:26:41 +00:00
|
|
|
if (init_res[i] == ACT_P_BOUND)
|
2023-12-11 18:18:07 +00:00
|
|
|
continue;
|
|
|
|
|
2020-09-23 03:56:24 +00:00
|
|
|
idrinfo = a->idrinfo;
|
|
|
|
mutex_lock(&idrinfo->lock);
|
2023-12-11 18:18:07 +00:00
|
|
|
/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
|
2020-09-23 03:56:24 +00:00
|
|
|
idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
|
|
|
|
mutex_unlock(&idrinfo->lock);
|
|
|
|
}
|
2020-09-23 03:56:23 +00:00
|
|
|
}
|
|
|
|
|
2024-01-05 00:38:10 +00:00
|
|
|
struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags,
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
struct netlink_ext_ack *extack)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2024-01-05 00:38:10 +00:00
|
|
|
bool police = flags & TCA_ACT_FLAGS_POLICE;
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
struct nlattr *tb[TCA_ACT_MAX + 1];
|
2005-04-16 22:20:36 +00:00
|
|
|
struct tc_action_ops *a_o;
|
|
|
|
char act_name[IFNAMSIZ];
|
2008-01-23 06:11:50 +00:00
|
|
|
struct nlattr *kind;
|
2008-01-24 04:33:13 +00:00
|
|
|
int err;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2021-07-29 23:12:14 +00:00
|
|
|
if (!police) {
|
2019-09-19 01:44:43 +00:00
|
|
|
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
|
|
|
|
tcf_action_policy, extack);
|
2008-01-24 04:33:32 +00:00
|
|
|
if (err < 0)
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
return ERR_PTR(err);
|
2008-01-24 04:33:32 +00:00
|
|
|
err = -EINVAL;
|
2008-01-23 06:11:50 +00:00
|
|
|
kind = tb[TCA_ACT_KIND];
|
2018-02-15 15:54:55 +00:00
|
|
|
if (!kind) {
|
|
|
|
NL_SET_ERR_MSG(extack, "TC action kind must be specified");
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
return ERR_PTR(err);
|
2018-02-15 15:54:55 +00:00
|
|
|
}
|
2020-11-15 17:08:06 +00:00
|
|
|
if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) {
|
2019-10-07 20:26:29 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "TC action name too long");
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
return ERR_PTR(err);
|
2019-10-07 20:26:29 +00:00
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
} else {
|
2023-07-10 03:07:11 +00:00
|
|
|
if (strscpy(act_name, "police", IFNAMSIZ) < 0) {
|
2018-02-15 15:54:55 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "TC action name too long");
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
return ERR_PTR(-EINVAL);
|
2018-02-15 15:54:55 +00:00
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
a_o = tc_lookup_action_n(act_name);
|
|
|
|
if (a_o == NULL) {
|
2008-10-16 22:24:51 +00:00
|
|
|
#ifdef CONFIG_MODULES
|
2024-01-05 00:38:10 +00:00
|
|
|
bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
|
|
|
|
|
2018-07-05 14:24:25 +00:00
|
|
|
if (rtnl_held)
|
|
|
|
rtnl_unlock();
|
2024-02-01 13:09:42 +00:00
|
|
|
request_module(NET_ACT_ALIAS_PREFIX "%s", act_name);
|
2018-07-05 14:24:25 +00:00
|
|
|
if (rtnl_held)
|
|
|
|
rtnl_lock();
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
a_o = tc_lookup_action_n(act_name);
|
|
|
|
|
|
|
|
/* We dropped the RTNL semaphore in order to
|
|
|
|
* perform the module load. So, even if we
|
|
|
|
* succeeded in loading the module we have to
|
|
|
|
* tell the caller to replay the request. We
|
|
|
|
* indicate this using -EAGAIN.
|
|
|
|
*/
|
|
|
|
if (a_o != NULL) {
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
module_put(a_o->owner);
|
|
|
|
return ERR_PTR(-EAGAIN);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
#endif
|
2018-02-15 15:54:55 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "Failed to load TC action module");
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
return ERR_PTR(-ENOENT);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
return a_o;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
|
|
|
|
struct nlattr *nla, struct nlattr *est,
|
2021-04-07 15:36:03 +00:00
|
|
|
struct tc_action_ops *a_o, int *init_res,
|
2021-07-29 23:12:14 +00:00
|
|
|
u32 flags, struct netlink_ext_ack *extack)
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
{
|
2021-07-29 23:12:14 +00:00
|
|
|
bool police = flags & TCA_ACT_FLAGS_POLICE;
|
|
|
|
struct nla_bitfield32 userflags = { 0, 0 };
|
2023-02-17 22:36:13 +00:00
|
|
|
struct tc_cookie *user_cookie = NULL;
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
u8 hw_stats = TCA_ACT_HW_STATS_ANY;
|
|
|
|
struct nlattr *tb[TCA_ACT_MAX + 1];
|
|
|
|
struct tc_action *a;
|
|
|
|
int err;
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
/* backward compatibility for policer */
|
2021-07-29 23:12:14 +00:00
|
|
|
if (!police) {
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
|
|
|
|
tcf_action_policy, extack);
|
|
|
|
if (err < 0)
|
|
|
|
return ERR_PTR(err);
|
|
|
|
if (tb[TCA_ACT_COOKIE]) {
|
2023-02-17 22:36:13 +00:00
|
|
|
user_cookie = nla_memdup_cookie(tb);
|
|
|
|
if (!user_cookie) {
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto err_out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
|
2021-12-17 18:16:23 +00:00
|
|
|
if (tb[TCA_ACT_FLAGS]) {
|
2021-07-29 23:12:14 +00:00
|
|
|
userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
|
2021-12-17 18:16:23 +00:00
|
|
|
if (!tc_act_flags_valid(userflags.value)) {
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err_out;
|
|
|
|
}
|
|
|
|
}
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
|
2021-07-29 23:12:14 +00:00
|
|
|
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp,
|
|
|
|
userflags.value | flags, extack);
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
} else {
|
2021-07-29 23:12:14 +00:00
|
|
|
err = a_o->init(net, nla, est, &a, tp, userflags.value | flags,
|
|
|
|
extack);
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
}
|
2008-01-24 04:33:13 +00:00
|
|
|
if (err < 0)
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
goto err_out;
|
2021-04-07 15:36:03 +00:00
|
|
|
*init_res = err;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2021-07-29 23:12:14 +00:00
|
|
|
if (!police && tb[TCA_ACT_COOKIE])
|
2023-02-17 22:36:13 +00:00
|
|
|
tcf_set_action_cookie(&a->user_cookie, user_cookie);
|
2017-01-24 12:02:41 +00:00
|
|
|
|
2021-07-29 23:12:14 +00:00
|
|
|
if (!police)
|
2020-03-19 23:26:23 +00:00
|
|
|
a->hw_stats = hw_stats;
|
2020-03-07 11:40:20 +00:00
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
return a;
|
|
|
|
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
err_out:
|
2023-02-17 22:36:13 +00:00
|
|
|
if (user_cookie) {
|
|
|
|
kfree(user_cookie->data);
|
|
|
|
kfree(user_cookie);
|
2017-04-20 12:08:26 +00:00
|
|
|
}
|
2008-01-24 04:33:13 +00:00
|
|
|
return ERR_PTR(err);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2021-12-17 18:16:22 +00:00
|
|
|
static bool tc_act_bind(u32 flags)
|
|
|
|
{
|
|
|
|
return !!(flags & TCA_ACT_FLAGS_BIND);
|
|
|
|
}
|
|
|
|
|
2018-07-05 14:24:33 +00:00
|
|
|
/* Returns numbers of initialized actions or negative error. */
|
|
|
|
|
2017-05-17 09:08:02 +00:00
|
|
|
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
|
2021-07-29 23:12:14 +00:00
|
|
|
struct nlattr *est, struct tc_action *actions[],
|
2021-12-17 18:16:28 +00:00
|
|
|
int init_res[], size_t *attr_size,
|
|
|
|
u32 flags, u32 fl_flags,
|
2021-07-29 23:12:14 +00:00
|
|
|
struct netlink_ext_ack *extack)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
|
2011-01-19 19:26:56 +00:00
|
|
|
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
|
2013-12-16 04:15:05 +00:00
|
|
|
struct tc_action *act;
|
2018-03-08 21:59:19 +00:00
|
|
|
size_t sz = 0;
|
2008-01-24 04:33:32 +00:00
|
|
|
int err;
|
2005-04-16 22:20:36 +00:00
|
|
|
int i;
|
|
|
|
|
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 12:07:28 +00:00
|
|
|
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL,
|
|
|
|
extack);
|
2008-01-24 04:33:32 +00:00
|
|
|
if (err < 0)
|
2013-12-16 04:15:05 +00:00
|
|
|
return err;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
|
|
|
|
struct tc_action_ops *a_o;
|
|
|
|
|
2024-01-05 00:38:10 +00:00
|
|
|
a_o = tc_action_load_ops(tb[i], flags, extack);
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
if (IS_ERR(a_o)) {
|
|
|
|
err = PTR_ERR(a_o);
|
|
|
|
goto err_mod;
|
|
|
|
}
|
|
|
|
ops[i - 1] = a_o;
|
|
|
|
}
|
|
|
|
|
2008-01-23 06:11:50 +00:00
|
|
|
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
|
2021-07-29 23:12:14 +00:00
|
|
|
act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1],
|
|
|
|
&init_res[i - 1], flags, extack);
|
2013-12-16 04:15:05 +00:00
|
|
|
if (IS_ERR(act)) {
|
|
|
|
err = PTR_ERR(act);
|
2005-04-16 22:20:36 +00:00
|
|
|
goto err;
|
2013-12-16 04:15:05 +00:00
|
|
|
}
|
2018-03-08 21:59:19 +00:00
|
|
|
sz += tcf_action_fill_size(act);
|
2018-07-05 14:24:33 +00:00
|
|
|
/* Start from index 0 */
|
|
|
|
actions[i - 1] = act;
|
2021-12-17 18:16:28 +00:00
|
|
|
if (tc_act_bind(flags)) {
|
|
|
|
bool skip_sw = tc_skip_sw(fl_flags);
|
|
|
|
bool skip_hw = tc_skip_hw(fl_flags);
|
|
|
|
|
net/sched: act_api: deny mismatched skip_sw/skip_hw flags for actions created by classifiers
tcf_action_init() has logic for checking mismatches between action and
filter offload flags (skip_sw/skip_hw). AFAIU, this is intended to run
on the transition between the new tc_act_bind(flags) returning true (aka
now gets bound to classifier) and tc_act_bind(act->tcfa_flags) returning
false (aka action was not bound to classifier before). Otherwise, the
check is skipped.
For the case where an action is not standalone, but rather it was
created by a classifier and is bound to it, tcf_action_init() skips the
check entirely, and this means it allows mismatched flags to occur.
Taking the matchall classifier code path as an example (with mirred as
an action), the reason is the following:
1 | mall_change()
2 | -> mall_replace_hw_filter()
3 | -> tcf_exts_validate_ex()
4 | -> flags |= TCA_ACT_FLAGS_BIND;
5 | -> tcf_action_init()
6 | -> tcf_action_init_1()
7 | -> a_o->init()
8 | -> tcf_mirred_init()
9 | -> tcf_idr_create_from_flags()
10 | -> tcf_idr_create()
11 | -> p->tcfa_flags = flags;
12 | -> tc_act_bind(flags))
13 | -> tc_act_bind(act->tcfa_flags)
When invoked from tcf_exts_validate_ex() like matchall does (but other
classifiers validate their extensions as well), tcf_action_init() runs
in a call path where "flags" always contains TCA_ACT_FLAGS_BIND (set by
line 4). So line 12 is always true, and line 13 is always true as well.
No transition ever takes place, and the check is skipped.
The code was added in this form in commit c86e0209dc77 ("flow_offload:
validate flags of filter and actions"), but I'm attributing the blame
even earlier in that series, to when TCA_ACT_FLAGS_SKIP_HW and
TCA_ACT_FLAGS_SKIP_SW were added to the UAPI.
Following the development process of this change, the check did not
always exist in this form. A change took place between v3 [1] and v4 [2],
AFAIU due to review feedback that it doesn't make sense for action flags
to be different than classifier flags. I think I agree with that
feedback, but it was translated into code that omits enforcing this for
"classic" actions created at the same time with the filters themselves.
There are 3 more important cases to discuss. First there is this command:
$ tc qdisc add dev eth0 clasct
$ tc filter add dev eth0 ingress matchall skip_sw \
action mirred ingress mirror dev eth1
which should be allowed, because prior to the concept of dedicated
action flags, it used to work and it used to mean the action inherited
the skip_sw/skip_hw flags from the classifier. It's not a mismatch.
Then we have this command:
$ tc qdisc add dev eth0 clasct
$ tc filter add dev eth0 ingress matchall skip_sw \
action mirred ingress mirror dev eth1 skip_hw
where there is a mismatch and it should be rejected.
Finally, we have:
$ tc qdisc add dev eth0 clasct
$ tc filter add dev eth0 ingress matchall skip_sw \
action mirred ingress mirror dev eth1 skip_sw
where the offload flags coincide, and this should be treated the same as
the first command based on inheritance, and accepted.
[1]: https://lore.kernel.org/netdev/20211028110646.13791-9-simon.horman@corigine.com/
[2]: https://lore.kernel.org/netdev/20211118130805.23897-10-simon.horman@corigine.com/
Fixes: 7adc57651211 ("flow_offload: add skip_hw and skip_sw to control if offload the action")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20241017161049.3570037-1-vladimir.oltean@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-10-17 16:10:48 +00:00
|
|
|
if (tc_act_bind(act->tcfa_flags)) {
|
|
|
|
/* Action is created by classifier and is not
|
|
|
|
* standalone. Check that the user did not set
|
|
|
|
* any action flags different than the
|
|
|
|
* classifier flags, and inherit the flags from
|
|
|
|
* the classifier for the compatibility case
|
|
|
|
* where no flags were specified at all.
|
|
|
|
*/
|
|
|
|
if ((tc_act_skip_sw(act->tcfa_flags) && !skip_sw) ||
|
|
|
|
(tc_act_skip_hw(act->tcfa_flags) && !skip_hw)) {
|
|
|
|
NL_SET_ERR_MSG(extack,
|
|
|
|
"Mismatch between action and filter offload flags");
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
if (skip_sw)
|
|
|
|
act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_SW;
|
|
|
|
if (skip_hw)
|
|
|
|
act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_HW;
|
2021-12-17 18:16:28 +00:00
|
|
|
continue;
|
net/sched: act_api: deny mismatched skip_sw/skip_hw flags for actions created by classifiers
tcf_action_init() has logic for checking mismatches between action and
filter offload flags (skip_sw/skip_hw). AFAIU, this is intended to run
on the transition between the new tc_act_bind(flags) returning true (aka
now gets bound to classifier) and tc_act_bind(act->tcfa_flags) returning
false (aka action was not bound to classifier before). Otherwise, the
check is skipped.
For the case where an action is not standalone, but rather it was
created by a classifier and is bound to it, tcf_action_init() skips the
check entirely, and this means it allows mismatched flags to occur.
Taking the matchall classifier code path as an example (with mirred as
an action), the reason is the following:
1 | mall_change()
2 | -> mall_replace_hw_filter()
3 | -> tcf_exts_validate_ex()
4 | -> flags |= TCA_ACT_FLAGS_BIND;
5 | -> tcf_action_init()
6 | -> tcf_action_init_1()
7 | -> a_o->init()
8 | -> tcf_mirred_init()
9 | -> tcf_idr_create_from_flags()
10 | -> tcf_idr_create()
11 | -> p->tcfa_flags = flags;
12 | -> tc_act_bind(flags))
13 | -> tc_act_bind(act->tcfa_flags)
When invoked from tcf_exts_validate_ex() like matchall does (but other
classifiers validate their extensions as well), tcf_action_init() runs
in a call path where "flags" always contains TCA_ACT_FLAGS_BIND (set by
line 4). So line 12 is always true, and line 13 is always true as well.
No transition ever takes place, and the check is skipped.
The code was added in this form in commit c86e0209dc77 ("flow_offload:
validate flags of filter and actions"), but I'm attributing the blame
even earlier in that series, to when TCA_ACT_FLAGS_SKIP_HW and
TCA_ACT_FLAGS_SKIP_SW were added to the UAPI.
Following the development process of this change, the check did not
always exist in this form. A change took place between v3 [1] and v4 [2],
AFAIU due to review feedback that it doesn't make sense for action flags
to be different than classifier flags. I think I agree with that
feedback, but it was translated into code that omits enforcing this for
"classic" actions created at the same time with the filters themselves.
There are 3 more important cases to discuss. First there is this command:
$ tc qdisc add dev eth0 clasct
$ tc filter add dev eth0 ingress matchall skip_sw \
action mirred ingress mirror dev eth1
which should be allowed, because prior to the concept of dedicated
action flags, it used to work and it used to mean the action inherited
the skip_sw/skip_hw flags from the classifier. It's not a mismatch.
Then we have this command:
$ tc qdisc add dev eth0 clasct
$ tc filter add dev eth0 ingress matchall skip_sw \
action mirred ingress mirror dev eth1 skip_hw
where there is a mismatch and it should be rejected.
Finally, we have:
$ tc qdisc add dev eth0 clasct
$ tc filter add dev eth0 ingress matchall skip_sw \
action mirred ingress mirror dev eth1 skip_sw
where the offload flags coincide, and this should be treated the same as
the first command based on inheritance, and accepted.
[1]: https://lore.kernel.org/netdev/20211028110646.13791-9-simon.horman@corigine.com/
[2]: https://lore.kernel.org/netdev/20211118130805.23897-10-simon.horman@corigine.com/
Fixes: 7adc57651211 ("flow_offload: add skip_hw and skip_sw to control if offload the action")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20241017161049.3570037-1-vladimir.oltean@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-10-17 16:10:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Action is standalone */
|
2021-12-17 18:16:28 +00:00
|
|
|
if (skip_sw != tc_act_skip_sw(act->tcfa_flags) ||
|
|
|
|
skip_hw != tc_act_skip_hw(act->tcfa_flags)) {
|
2022-03-02 03:29:29 +00:00
|
|
|
NL_SET_ERR_MSG(extack,
|
|
|
|
"Mismatch between action and filter offload flags");
|
2021-12-17 18:16:28 +00:00
|
|
|
err = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
} else {
|
2021-12-17 18:16:23 +00:00
|
|
|
err = tcf_action_offload_add(act, extack);
|
|
|
|
if (tc_act_skip_sw(act->tcfa_flags) && err)
|
|
|
|
goto err;
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
2016-09-19 23:02:51 +00:00
|
|
|
|
2020-09-23 03:56:24 +00:00
|
|
|
/* We have to commit them all together, because if any error happened in
|
|
|
|
* between, we could not handle the failure gracefully.
|
|
|
|
*/
|
2023-12-11 18:18:07 +00:00
|
|
|
tcf_idr_insert_many(actions, init_res);
|
2020-09-23 03:56:24 +00:00
|
|
|
|
2018-03-08 21:59:19 +00:00
|
|
|
*attr_size = tcf_action_full_attrs_size(sz);
|
2021-04-07 15:36:04 +00:00
|
|
|
err = i - 1;
|
|
|
|
goto err_mod;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
err:
|
2021-07-29 23:12:14 +00:00
|
|
|
tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND);
|
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.
One of the problem is deadlock:
CPU 0 CPU 1
rtnl_lock();
for (...) {
tcf_action_init_1();
-> rtnl_unlock();
-> request_module();
rtnl_lock();
for (...) {
tcf_action_init_1();
-> tcf_idr_check_alloc();
// Insert one action into idr,
// but it is not committed until
// tcf_idr_insert_many(), then drop
// the RTNL lock in the _next_
// iteration
-> rtnl_unlock();
-> rtnl_lock();
-> a_o->init();
-> tcf_idr_check_alloc();
// Now waiting for the same index
// to be committed
-> request_module();
-> rtnl_lock()
// Now waiting for RTNL lock
}
rtnl_unlock();
}
rtnl_unlock();
This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
a_o = tc_action_load_ops(name, tb[i]...);
ops[i - 1] = a_o;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(ops[i - 1]...);
}
Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.
This patch has been tested by syzbot and tested with tdc.py by me.
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-17 00:56:57 +00:00
|
|
|
err_mod:
|
2023-12-01 17:50:14 +00:00
|
|
|
for (i = 0; i < TCA_ACT_MAX_PRIO && ops[i]; i++)
|
|
|
|
module_put(ops[i]->owner);
|
2013-12-16 04:15:05 +00:00
|
|
|
return err;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2020-06-19 06:01:07 +00:00
|
|
|
void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
|
|
|
|
u64 drops, bool hw)
|
2019-10-30 14:09:00 +00:00
|
|
|
{
|
2019-10-30 14:09:04 +00:00
|
|
|
if (a->cpu_bstats) {
|
2021-10-16 08:49:09 +00:00
|
|
|
_bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
|
2019-10-30 14:09:00 +00:00
|
|
|
|
2020-06-19 06:01:07 +00:00
|
|
|
this_cpu_ptr(a->cpu_qstats)->drops += drops;
|
2019-10-30 14:09:04 +00:00
|
|
|
|
|
|
|
if (hw)
|
2021-10-16 08:49:09 +00:00
|
|
|
_bstats_update(this_cpu_ptr(a->cpu_bstats_hw),
|
|
|
|
bytes, packets);
|
2019-10-30 14:09:04 +00:00
|
|
|
return;
|
|
|
|
}
|
2019-10-30 14:09:00 +00:00
|
|
|
|
2019-10-30 14:09:04 +00:00
|
|
|
_bstats_update(&a->tcfa_bstats, bytes, packets);
|
2020-06-19 06:01:07 +00:00
|
|
|
a->tcfa_qstats.drops += drops;
|
2019-10-30 14:09:00 +00:00
|
|
|
if (hw)
|
2019-10-30 14:09:04 +00:00
|
|
|
_bstats_update(&a->tcfa_bstats_hw, bytes, packets);
|
2019-10-30 14:09:00 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(tcf_action_update_stats);
|
|
|
|
|
2016-07-25 23:09:42 +00:00
|
|
|
int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
|
2005-04-16 22:20:36 +00:00
|
|
|
int compat_mode)
|
|
|
|
{
|
|
|
|
int err = 0;
|
|
|
|
struct gnet_dump d;
|
2007-02-09 14:25:16 +00:00
|
|
|
|
2014-01-10 00:14:05 +00:00
|
|
|
if (p == NULL)
|
2005-04-16 22:20:36 +00:00
|
|
|
goto errout;
|
|
|
|
|
|
|
|
/* compat_mode being true specifies a call that is supposed
|
2009-09-29 01:43:57 +00:00
|
|
|
* to add additional backward compatibility statistic TLVs.
|
2005-04-16 22:20:36 +00:00
|
|
|
*/
|
|
|
|
if (compat_mode) {
|
2016-07-25 23:09:42 +00:00
|
|
|
if (p->type == TCA_OLD_COMPAT)
|
2005-04-16 22:20:36 +00:00
|
|
|
err = gnet_stats_start_copy_compat(skb, 0,
|
2016-04-26 08:06:18 +00:00
|
|
|
TCA_STATS,
|
|
|
|
TCA_XSTATS,
|
2016-07-25 23:09:42 +00:00
|
|
|
&p->tcfa_lock, &d,
|
2016-04-26 08:06:18 +00:00
|
|
|
TCA_PAD);
|
2005-04-16 22:20:36 +00:00
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
} else
|
|
|
|
err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
|
2016-07-25 23:09:42 +00:00
|
|
|
&p->tcfa_lock, &d, TCA_ACT_PAD);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
if (err < 0)
|
|
|
|
goto errout;
|
|
|
|
|
net: sched: Remove Qdisc::running sequence counter
The Qdisc::running sequence counter has two uses:
1. Reliably reading qdisc's tc statistics while the qdisc is running
(a seqcount read/retry loop at gnet_stats_add_basic()).
2. As a flag, indicating whether the qdisc in question is running
(without any retry loops).
For the first usage, the Qdisc::running sequence counter write section,
qdisc_run_begin() => qdisc_run_end(), covers a much wider area than what
is actually needed: the raw qdisc's bstats update. A u64_stats sync
point was thus introduced (in previous commits) inside the bstats
structure itself. A local u64_stats write section is then started and
stopped for the bstats updates.
Use that u64_stats sync point mechanism for the bstats read/retry loop
at gnet_stats_add_basic().
For the second qdisc->running usage, a __QDISC_STATE_RUNNING bit flag,
accessed with atomic bitops, is sufficient. Using a bit flag instead of
a sequence counter at qdisc_run_begin/end() and qdisc_is_running() leads
to the SMP barriers implicitly added through raw_read_seqcount() and
write_seqcount_begin/end() getting removed. All call sites have been
surveyed though, and no required ordering was identified.
Now that the qdisc->running sequence counter is no longer used, remove
it.
Note, using u64_stats implies no sequence counter protection for 64-bit
architectures. This can lead to the qdisc tc statistics "packets" vs.
"bytes" values getting out of sync on rare occasions. The individual
values will still be valid.
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-16 08:49:10 +00:00
|
|
|
if (gnet_stats_copy_basic(&d, p->cpu_bstats,
|
|
|
|
&p->tcfa_bstats, false) < 0 ||
|
|
|
|
gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
|
|
|
|
&p->tcfa_bstats_hw, false) < 0 ||
|
2016-12-04 17:48:16 +00:00
|
|
|
gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
|
2015-07-06 12:18:04 +00:00
|
|
|
gnet_stats_copy_queue(&d, p->cpu_qstats,
|
2016-07-25 23:09:42 +00:00
|
|
|
&p->tcfa_qstats,
|
|
|
|
p->tcfa_qstats.qlen) < 0)
|
2005-04-16 22:20:36 +00:00
|
|
|
goto errout;
|
|
|
|
|
|
|
|
if (gnet_stats_finish_copy(&d) < 0)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
errout:
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-07-05 14:24:33 +00:00
|
|
|
static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
|
2016-06-05 14:41:32 +00:00
|
|
|
u32 portid, u32 seq, u16 flags, int event, int bind,
|
2023-01-13 03:43:53 +00:00
|
|
|
int ref, struct netlink_ext_ack *extack)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
struct tcamsg *t;
|
|
|
|
struct nlmsghdr *nlh;
|
2007-04-20 03:29:13 +00:00
|
|
|
unsigned char *b = skb_tail_pointer(skb);
|
2008-01-24 04:34:11 +00:00
|
|
|
struct nlattr *nest;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2012-09-07 20:12:54 +00:00
|
|
|
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags);
|
2012-06-27 04:39:32 +00:00
|
|
|
if (!nlh)
|
|
|
|
goto out_nlmsg_trim;
|
|
|
|
t = nlmsg_data(nlh);
|
2005-04-16 22:20:36 +00:00
|
|
|
t->tca_family = AF_UNSPEC;
|
2005-06-28 19:55:30 +00:00
|
|
|
t->tca__pad1 = 0;
|
|
|
|
t->tca__pad2 = 0;
|
2007-02-09 14:25:16 +00:00
|
|
|
|
2023-03-16 03:37:53 +00:00
|
|
|
if (extack && extack->_msg &&
|
|
|
|
nla_put_string(skb, TCA_ROOT_EXT_WARN_MSG, extack->_msg))
|
|
|
|
goto out_nlmsg_trim;
|
|
|
|
|
2019-04-26 09:13:06 +00:00
|
|
|
nest = nla_nest_start_noflag(skb, TCA_ACT_TAB);
|
2018-02-15 15:54:53 +00:00
|
|
|
if (!nest)
|
2012-06-27 04:39:32 +00:00
|
|
|
goto out_nlmsg_trim;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2020-05-15 11:40:12 +00:00
|
|
|
if (tcf_action_dump(skb, actions, bind, ref, false) < 0)
|
2012-06-27 04:39:32 +00:00
|
|
|
goto out_nlmsg_trim;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2023-03-16 03:37:52 +00:00
|
|
|
nla_nest_end(skb, nest);
|
|
|
|
|
2007-04-20 03:29:13 +00:00
|
|
|
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
|
2023-01-13 03:43:53 +00:00
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
return skb->len;
|
|
|
|
|
2012-06-27 04:39:32 +00:00
|
|
|
out_nlmsg_trim:
|
2007-03-26 06:06:12 +00:00
|
|
|
nlmsg_trim(skb, b);
|
2005-04-16 22:20:36 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2017-07-13 17:12:18 +00:00
|
|
|
tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
|
2018-07-05 14:24:33 +00:00
|
|
|
struct tc_action *actions[], int event,
|
2018-02-15 15:54:55 +00:00
|
|
|
struct netlink_ext_ack *extack)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
|
|
|
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
|
|
|
|
if (!skb)
|
|
|
|
return -ENOBUFS;
|
2016-06-05 14:41:32 +00:00
|
|
|
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
|
2023-01-13 03:43:53 +00:00
|
|
|
0, 1, NULL) <= 0) {
|
2018-02-15 15:54:55 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
|
2005-04-16 22:20:36 +00:00
|
|
|
kfree_skb(skb);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2006-08-15 07:30:25 +00:00
|
|
|
|
2012-09-07 20:12:54 +00:00
|
|
|
return rtnl_unicast(skb, net, portid);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2016-02-22 23:57:53 +00:00
|
|
|
static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
|
2018-02-15 15:54:55 +00:00
|
|
|
struct nlmsghdr *n, u32 portid,
|
|
|
|
struct netlink_ext_ack *extack)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2011-01-19 19:26:56 +00:00
|
|
|
struct nlattr *tb[TCA_ACT_MAX + 1];
|
2016-07-25 23:09:41 +00:00
|
|
|
const struct tc_action_ops *ops;
|
2005-04-16 22:20:36 +00:00
|
|
|
struct tc_action *a;
|
|
|
|
int index;
|
2008-01-24 04:33:13 +00:00
|
|
|
int err;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2019-09-19 01:44:43 +00:00
|
|
|
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
|
|
|
|
tcf_action_policy, extack);
|
2008-01-24 04:33:32 +00:00
|
|
|
if (err < 0)
|
2008-01-24 04:33:13 +00:00
|
|
|
goto err_out;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2008-01-24 04:33:32 +00:00
|
|
|
err = -EINVAL;
|
2008-01-23 06:11:50 +00:00
|
|
|
if (tb[TCA_ACT_INDEX] == NULL ||
|
2018-02-15 15:54:55 +00:00
|
|
|
nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) {
|
|
|
|
NL_SET_ERR_MSG(extack, "Invalid TC action index value");
|
2008-01-24 04:33:13 +00:00
|
|
|
goto err_out;
|
2018-02-15 15:54:55 +00:00
|
|
|
}
|
2008-01-24 04:35:03 +00:00
|
|
|
index = nla_get_u32(tb[TCA_ACT_INDEX]);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2008-01-24 04:33:13 +00:00
|
|
|
err = -EINVAL;
|
2016-07-25 23:09:41 +00:00
|
|
|
ops = tc_lookup_action(tb[TCA_ACT_KIND]);
|
2018-02-15 15:54:55 +00:00
|
|
|
if (!ops) { /* could happen in batch of actions */
|
2018-08-29 17:15:35 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "Specified TC action kind not found");
|
2016-07-25 23:09:41 +00:00
|
|
|
goto err_out;
|
2018-02-15 15:54:55 +00:00
|
|
|
}
|
2008-01-24 04:33:13 +00:00
|
|
|
err = -ENOENT;
|
2022-09-08 04:14:34 +00:00
|
|
|
if (__tcf_idr_search(net, ops, &a, index) == 0) {
|
2018-08-29 17:15:35 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "TC action with specified index not found");
|
2005-04-16 22:20:36 +00:00
|
|
|
goto err_mod;
|
2018-08-29 17:15:35 +00:00
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2016-07-25 23:09:41 +00:00
|
|
|
module_put(ops->owner);
|
2005-04-16 22:20:36 +00:00
|
|
|
return a;
|
2008-01-24 04:33:13 +00:00
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
err_mod:
|
2016-07-25 23:09:41 +00:00
|
|
|
module_put(ops->owner);
|
2008-01-24 04:33:13 +00:00
|
|
|
err_out:
|
|
|
|
return ERR_PTR(err);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2010-03-19 15:40:13 +00:00
|
|
|
static int tca_action_flush(struct net *net, struct nlattr *nla,
|
2018-02-15 15:54:55 +00:00
|
|
|
struct nlmsghdr *n, u32 portid,
|
|
|
|
struct netlink_ext_ack *extack)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
unsigned char *b;
|
|
|
|
struct nlmsghdr *nlh;
|
|
|
|
struct tcamsg *t;
|
|
|
|
struct netlink_callback dcb;
|
2008-01-24 04:34:11 +00:00
|
|
|
struct nlattr *nest;
|
2011-01-19 19:26:56 +00:00
|
|
|
struct nlattr *tb[TCA_ACT_MAX + 1];
|
2016-07-25 23:09:41 +00:00
|
|
|
const struct tc_action_ops *ops;
|
2008-01-23 06:11:50 +00:00
|
|
|
struct nlattr *kind;
|
2008-08-13 09:41:45 +00:00
|
|
|
int err = -ENOMEM;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
|
2018-02-15 15:54:55 +00:00
|
|
|
if (!skb)
|
2008-08-13 09:41:45 +00:00
|
|
|
return err;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2007-04-20 03:29:13 +00:00
|
|
|
b = skb_tail_pointer(skb);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2019-09-19 01:44:43 +00:00
|
|
|
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
|
|
|
|
tcf_action_policy, extack);
|
2008-01-24 04:33:32 +00:00
|
|
|
if (err < 0)
|
2005-04-16 22:20:36 +00:00
|
|
|
goto err_out;
|
|
|
|
|
2008-01-24 04:33:32 +00:00
|
|
|
err = -EINVAL;
|
2008-01-23 06:11:50 +00:00
|
|
|
kind = tb[TCA_ACT_KIND];
|
2016-07-25 23:09:41 +00:00
|
|
|
ops = tc_lookup_action(kind);
|
2018-02-15 15:54:55 +00:00
|
|
|
if (!ops) { /*some idjot trying to flush unknown action */
|
|
|
|
NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action");
|
2005-04-16 22:20:36 +00:00
|
|
|
goto err_out;
|
2018-02-15 15:54:55 +00:00
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2016-06-05 14:41:32 +00:00
|
|
|
nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
|
|
|
|
sizeof(*t), 0);
|
2018-02-15 15:54:55 +00:00
|
|
|
if (!nlh) {
|
|
|
|
NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification");
|
2012-06-27 04:39:32 +00:00
|
|
|
goto out_module_put;
|
2018-02-15 15:54:55 +00:00
|
|
|
}
|
2012-06-27 04:39:32 +00:00
|
|
|
t = nlmsg_data(nlh);
|
2005-04-16 22:20:36 +00:00
|
|
|
t->tca_family = AF_UNSPEC;
|
2005-06-28 19:55:30 +00:00
|
|
|
t->tca__pad1 = 0;
|
|
|
|
t->tca__pad2 = 0;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2019-04-26 09:13:06 +00:00
|
|
|
nest = nla_nest_start_noflag(skb, TCA_ACT_TAB);
|
2018-02-15 15:54:55 +00:00
|
|
|
if (!nest) {
|
|
|
|
NL_SET_ERR_MSG(extack, "Failed to add new netlink message");
|
2012-06-27 04:39:32 +00:00
|
|
|
goto out_module_put;
|
2018-02-15 15:54:55 +00:00
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2022-09-08 04:14:34 +00:00
|
|
|
err = __tcf_generic_walker(net, skb, &dcb, RTM_DELACTION, ops, extack);
|
2018-02-15 14:50:57 +00:00
|
|
|
if (err <= 0) {
|
|
|
|
nla_nest_cancel(skb, nest);
|
2012-06-27 04:39:32 +00:00
|
|
|
goto out_module_put;
|
2018-02-15 14:50:57 +00:00
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2008-01-24 04:34:11 +00:00
|
|
|
nla_nest_end(skb, nest);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2007-04-20 03:29:13 +00:00
|
|
|
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
|
2005-04-16 22:20:36 +00:00
|
|
|
nlh->nlmsg_flags |= NLM_F_ROOT;
|
2016-07-25 23:09:41 +00:00
|
|
|
module_put(ops->owner);
|
2012-09-07 20:12:54 +00:00
|
|
|
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
|
2011-01-19 19:26:56 +00:00
|
|
|
n->nlmsg_flags & NLM_F_ECHO);
|
2018-02-15 15:54:55 +00:00
|
|
|
if (err < 0)
|
|
|
|
NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
return err;
|
|
|
|
|
2012-06-27 04:39:32 +00:00
|
|
|
out_module_put:
|
2016-07-25 23:09:41 +00:00
|
|
|
module_put(ops->owner);
|
2005-04-16 22:20:36 +00:00
|
|
|
err_out:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2018-08-19 19:22:07 +00:00
|
|
|
static int tcf_action_delete(struct net *net, struct tc_action *actions[])
|
2018-07-05 14:24:29 +00:00
|
|
|
{
|
2023-12-01 17:50:12 +00:00
|
|
|
struct tc_action *a;
|
2018-08-19 19:22:06 +00:00
|
|
|
int i;
|
2018-07-05 14:24:29 +00:00
|
|
|
|
2023-12-01 17:50:12 +00:00
|
|
|
tcf_act_for_each_action(i, a, actions) {
|
2018-07-05 14:24:29 +00:00
|
|
|
const struct tc_action_ops *ops = a->ops;
|
|
|
|
/* Actions can be deleted concurrently so we must save their
|
|
|
|
* type and id to search again after reference is released.
|
|
|
|
*/
|
2018-08-19 19:22:06 +00:00
|
|
|
struct tcf_idrinfo *idrinfo = a->idrinfo;
|
|
|
|
u32 act_index = a->tcfa_index;
|
2018-07-05 14:24:29 +00:00
|
|
|
|
2018-09-03 07:04:55 +00:00
|
|
|
actions[i] = NULL;
|
2018-07-05 14:24:29 +00:00
|
|
|
if (tcf_action_put(a)) {
|
|
|
|
/* last reference, action was deleted concurrently */
|
|
|
|
module_put(ops->owner);
|
2023-12-01 17:50:12 +00:00
|
|
|
} else {
|
2018-08-19 19:22:06 +00:00
|
|
|
int ret;
|
|
|
|
|
2018-07-05 14:24:29 +00:00
|
|
|
/* now do the delete */
|
2018-08-19 19:22:06 +00:00
|
|
|
ret = tcf_idr_delete_index(idrinfo, act_index);
|
2018-08-19 19:22:05 +00:00
|
|
|
if (ret < 0)
|
2018-07-05 14:24:29 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-12-08 19:28:45 +00:00
|
|
|
static struct sk_buff *tcf_reoffload_del_notify_msg(struct net *net,
|
|
|
|
struct tc_action *action)
|
2021-12-17 18:16:27 +00:00
|
|
|
{
|
|
|
|
size_t attr_size = tcf_action_fill_size(action);
|
|
|
|
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {
|
|
|
|
[0] = action,
|
|
|
|
};
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
2023-12-08 19:28:44 +00:00
|
|
|
skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
|
2021-12-17 18:16:27 +00:00
|
|
|
if (!skb)
|
2023-12-08 19:28:45 +00:00
|
|
|
return ERR_PTR(-ENOBUFS);
|
2021-12-17 18:16:27 +00:00
|
|
|
|
2023-01-13 03:43:53 +00:00
|
|
|
if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) {
|
2021-12-17 18:16:27 +00:00
|
|
|
kfree_skb(skb);
|
2023-12-08 19:28:45 +00:00
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return skb;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tcf_reoffload_del_notify(struct net *net, struct tc_action *action)
|
|
|
|
{
|
|
|
|
const struct tc_action_ops *ops = action->ops;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!rtnl_notify_needed(net, 0, RTNLGRP_TC)) {
|
|
|
|
skb = NULL;
|
|
|
|
} else {
|
|
|
|
skb = tcf_reoffload_del_notify_msg(net, action);
|
|
|
|
if (IS_ERR(skb))
|
|
|
|
return PTR_ERR(skb);
|
2021-12-17 18:16:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = tcf_idr_release_unsafe(action);
|
|
|
|
if (ret == ACT_P_DELETED) {
|
|
|
|
module_put(ops->owner);
|
2023-12-08 19:28:45 +00:00
|
|
|
ret = rtnetlink_maybe_send(skb, net, 0, RTNLGRP_TC, 0);
|
2021-12-17 18:16:27 +00:00
|
|
|
} else {
|
|
|
|
kfree_skb(skb);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb,
|
|
|
|
void *cb_priv, bool add)
|
|
|
|
{
|
|
|
|
struct tc_act_pernet_id *id_ptr;
|
|
|
|
struct tcf_idrinfo *idrinfo;
|
|
|
|
struct tc_action_net *tn;
|
|
|
|
struct tc_action *p;
|
|
|
|
unsigned int act_id;
|
|
|
|
unsigned long tmp;
|
|
|
|
unsigned long id;
|
|
|
|
struct idr *idr;
|
|
|
|
struct net *net;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!cb)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
down_read(&net_rwsem);
|
|
|
|
mutex_lock(&act_id_mutex);
|
|
|
|
|
|
|
|
for_each_net(net) {
|
|
|
|
list_for_each_entry(id_ptr, &act_pernet_id_list, list) {
|
|
|
|
act_id = id_ptr->id;
|
|
|
|
tn = net_generic(net, act_id);
|
|
|
|
if (!tn)
|
|
|
|
continue;
|
|
|
|
idrinfo = tn->idrinfo;
|
|
|
|
if (!idrinfo)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
mutex_lock(&idrinfo->lock);
|
|
|
|
idr = &idrinfo->action_idr;
|
|
|
|
idr_for_each_entry_ul(idr, p, tmp, id) {
|
|
|
|
if (IS_ERR(p) || tc_act_bind(p->tcfa_flags))
|
|
|
|
continue;
|
|
|
|
if (add) {
|
|
|
|
tcf_action_offload_add_ex(p, NULL, cb,
|
|
|
|
cb_priv);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* cb unregister to update hw count */
|
|
|
|
ret = tcf_action_offload_del_ex(p, cb, cb_priv);
|
|
|
|
if (ret < 0)
|
|
|
|
continue;
|
|
|
|
if (tc_act_skip_sw(p->tcfa_flags) &&
|
|
|
|
!tc_act_in_hw(p))
|
|
|
|
tcf_reoffload_del_notify(net, p);
|
|
|
|
}
|
|
|
|
mutex_unlock(&idrinfo->lock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mutex_unlock(&act_id_mutex);
|
|
|
|
up_read(&net_rwsem);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-12-08 19:28:45 +00:00
|
|
|
static struct sk_buff *tcf_del_notify_msg(struct net *net, struct nlmsghdr *n,
|
|
|
|
struct tc_action *actions[],
|
|
|
|
u32 portid, size_t attr_size,
|
|
|
|
struct netlink_ext_ack *extack)
|
2014-01-10 00:14:00 +00:00
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
2023-12-08 19:28:44 +00:00
|
|
|
skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
|
2014-01-10 00:14:00 +00:00
|
|
|
if (!skb)
|
2023-12-08 19:28:45 +00:00
|
|
|
return ERR_PTR(-ENOBUFS);
|
2014-01-10 00:14:00 +00:00
|
|
|
|
|
|
|
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
|
2023-01-13 03:43:53 +00:00
|
|
|
0, 2, extack) <= 0) {
|
2018-02-15 15:54:55 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
|
2014-01-10 00:14:00 +00:00
|
|
|
kfree_skb(skb);
|
2023-12-08 19:28:45 +00:00
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return skb;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tcf_del_notify(struct net *net, struct nlmsghdr *n,
|
|
|
|
struct tc_action *actions[], u32 portid,
|
|
|
|
size_t attr_size, struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
|
|
|
|
skb = NULL;
|
|
|
|
} else {
|
|
|
|
skb = tcf_del_notify_msg(net, n, actions, portid, attr_size,
|
|
|
|
extack);
|
|
|
|
if (IS_ERR(skb))
|
|
|
|
return PTR_ERR(skb);
|
2014-01-10 00:14:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* now do the delete */
|
2018-08-19 19:22:07 +00:00
|
|
|
ret = tcf_action_delete(net, actions);
|
2014-02-12 01:07:34 +00:00
|
|
|
if (ret < 0) {
|
2018-02-15 15:54:55 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "Failed to delete TC action");
|
2014-02-12 01:07:34 +00:00
|
|
|
kfree_skb(skb);
|
|
|
|
return ret;
|
|
|
|
}
|
2014-01-10 00:14:00 +00:00
|
|
|
|
2023-12-08 19:28:45 +00:00
|
|
|
return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
|
|
|
|
n->nlmsg_flags & NLM_F_ECHO);
|
2014-01-10 00:14:00 +00:00
|
|
|
}
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
static int
|
2010-03-19 15:40:13 +00:00
|
|
|
tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
|
2018-02-15 15:54:55 +00:00
|
|
|
u32 portid, int event, struct netlink_ext_ack *extack)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2008-01-24 04:33:32 +00:00
|
|
|
int i, ret;
|
2011-01-19 19:26:56 +00:00
|
|
|
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
|
2013-12-16 04:15:05 +00:00
|
|
|
struct tc_action *act;
|
2018-03-08 21:59:17 +00:00
|
|
|
size_t attr_size = 0;
|
2018-08-19 19:22:05 +00:00
|
|
|
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
|
2005-04-16 22:20:36 +00:00
|
|
|
|
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 12:07:28 +00:00
|
|
|
ret = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL,
|
|
|
|
extack);
|
2008-01-24 04:33:32 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2011-01-19 19:26:56 +00:00
|
|
|
if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
|
2018-02-15 15:54:53 +00:00
|
|
|
if (tb[1])
|
2018-02-15 15:54:55 +00:00
|
|
|
return tca_action_flush(net, tb[1], n, portid, extack);
|
2018-02-15 15:54:53 +00:00
|
|
|
|
2018-02-15 15:54:55 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action");
|
2018-02-15 15:54:53 +00:00
|
|
|
return -EINVAL;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2008-01-23 06:11:50 +00:00
|
|
|
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
|
2018-02-15 15:54:55 +00:00
|
|
|
act = tcf_action_get_1(net, tb[i], n, portid, extack);
|
2008-01-24 04:33:13 +00:00
|
|
|
if (IS_ERR(act)) {
|
|
|
|
ret = PTR_ERR(act);
|
2005-04-16 22:20:36 +00:00
|
|
|
goto err;
|
2008-01-24 04:33:13 +00:00
|
|
|
}
|
2018-03-08 21:59:19 +00:00
|
|
|
attr_size += tcf_action_fill_size(act);
|
2018-07-05 14:24:33 +00:00
|
|
|
actions[i - 1] = act;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
2018-03-08 21:59:19 +00:00
|
|
|
|
|
|
|
attr_size = tcf_action_full_attrs_size(attr_size);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
if (event == RTM_GETACTION)
|
2018-07-05 14:24:33 +00:00
|
|
|
ret = tcf_get_notify(net, portid, n, actions, event, extack);
|
2005-04-16 22:20:36 +00:00
|
|
|
else { /* delete */
|
2018-08-19 19:22:05 +00:00
|
|
|
ret = tcf_del_notify(net, n, actions, portid, attr_size, extack);
|
2014-01-10 00:14:00 +00:00
|
|
|
if (ret)
|
2005-04-16 22:20:36 +00:00
|
|
|
goto err;
|
2018-08-19 19:22:05 +00:00
|
|
|
return 0;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
err:
|
2018-08-19 19:22:05 +00:00
|
|
|
tcf_action_put_many(actions);
|
2005-04-16 22:20:36 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2023-12-08 19:28:45 +00:00
|
|
|
static struct sk_buff *tcf_add_notify_msg(struct net *net, struct nlmsghdr *n,
|
|
|
|
struct tc_action *actions[],
|
|
|
|
u32 portid, size_t attr_size,
|
|
|
|
struct netlink_ext_ack *extack)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
2023-12-08 19:28:44 +00:00
|
|
|
skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
|
2005-04-16 22:20:36 +00:00
|
|
|
if (!skb)
|
2023-12-08 19:28:45 +00:00
|
|
|
return ERR_PTR(-ENOBUFS);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2014-01-10 00:14:00 +00:00
|
|
|
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
|
2023-01-13 03:43:53 +00:00
|
|
|
RTM_NEWACTION, 0, 0, extack) <= 0) {
|
2018-03-03 01:52:01 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
|
2014-01-10 00:14:00 +00:00
|
|
|
kfree_skb(skb);
|
2023-12-08 19:28:45 +00:00
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return skb;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tcf_add_notify(struct net *net, struct nlmsghdr *n,
|
|
|
|
struct tc_action *actions[], u32 portid,
|
|
|
|
size_t attr_size, struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
|
|
|
if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
|
|
|
|
skb = NULL;
|
|
|
|
} else {
|
|
|
|
skb = tcf_add_notify_msg(net, n, actions, portid, attr_size,
|
|
|
|
extack);
|
|
|
|
if (IS_ERR(skb))
|
|
|
|
return PTR_ERR(skb);
|
2014-01-10 00:14:00 +00:00
|
|
|
}
|
2007-02-09 14:25:16 +00:00
|
|
|
|
2023-12-08 19:28:45 +00:00
|
|
|
return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
|
|
|
|
n->nlmsg_flags & NLM_F_ECHO);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2016-09-18 12:45:33 +00:00
|
|
|
static int tcf_action_add(struct net *net, struct nlattr *nla,
|
2021-07-29 23:12:14 +00:00
|
|
|
struct nlmsghdr *n, u32 portid, u32 flags,
|
2018-02-15 15:54:54 +00:00
|
|
|
struct netlink_ext_ack *extack)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2018-03-08 21:59:17 +00:00
|
|
|
size_t attr_size = 0;
|
2023-12-01 17:50:13 +00:00
|
|
|
int loop, ret;
|
2018-07-05 14:24:33 +00:00
|
|
|
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
|
2021-04-07 15:36:03 +00:00
|
|
|
int init_res[TCA_ACT_MAX_PRIO] = {};
|
2005-04-16 22:20:36 +00:00
|
|
|
|
net: avoid potential infinite loop in tc_ctl_action()
tc_ctl_action() has the ability to loop forever if tcf_action_add()
returns -EAGAIN.
This special case has been done in case a module needed to be loaded,
but it turns out that tcf_add_notify() could also return -EAGAIN
if the socket sk_rcvbuf limit is hit.
We need to separate the two cases, and only loop for the module
loading case.
While we are at it, add a limit of 10 attempts since unbounded
loops are always scary.
syzbot repro was something like :
socket(PF_NETLINK, SOCK_RAW|SOCK_NONBLOCK, NETLINK_ROUTE) = 3
write(3, ..., 38) = 38
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [0], 4) = 0
sendmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{..., 388}], msg_controllen=0, msg_flags=0x10}, ...)
NMI backtrace for cpu 0
CPU: 0 PID: 1054 Comm: khungtaskd Not tainted 5.4.0-rc1+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x172/0x1f0 lib/dump_stack.c:113
nmi_cpu_backtrace.cold+0x70/0xb2 lib/nmi_backtrace.c:101
nmi_trigger_cpumask_backtrace+0x23b/0x28b lib/nmi_backtrace.c:62
arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
trigger_all_cpu_backtrace include/linux/nmi.h:146 [inline]
check_hung_uninterruptible_tasks kernel/hung_task.c:205 [inline]
watchdog+0x9d0/0xef0 kernel/hung_task.c:289
kthread+0x361/0x430 kernel/kthread.c:255
ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352
Sending NMI from CPU 0 to CPUs 1:
NMI backtrace for cpu 1
CPU: 1 PID: 8859 Comm: syz-executor910 Not tainted 5.4.0-rc1+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:arch_local_save_flags arch/x86/include/asm/paravirt.h:751 [inline]
RIP: 0010:lockdep_hardirqs_off+0x1df/0x2e0 kernel/locking/lockdep.c:3453
Code: 5c 08 00 00 5b 41 5c 41 5d 5d c3 48 c7 c0 58 1d f3 88 48 ba 00 00 00 00 00 fc ff df 48 c1 e8 03 80 3c 10 00 0f 85 d3 00 00 00 <48> 83 3d 21 9e 99 07 00 0f 84 b9 00 00 00 9c 58 0f 1f 44 00 00 f6
RSP: 0018:ffff8880a6f3f1b8 EFLAGS: 00000046
RAX: 1ffffffff11e63ab RBX: ffff88808c9c6080 RCX: 0000000000000000
RDX: dffffc0000000000 RSI: 0000000000000000 RDI: ffff88808c9c6914
RBP: ffff8880a6f3f1d0 R08: ffff88808c9c6080 R09: fffffbfff16be5d1
R10: fffffbfff16be5d0 R11: 0000000000000003 R12: ffffffff8746591f
R13: ffff88808c9c6080 R14: ffffffff8746591f R15: 0000000000000003
FS: 00000000011e4880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffff600400 CR3: 00000000a8920000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
trace_hardirqs_off+0x62/0x240 kernel/trace/trace_preemptirq.c:45
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline]
_raw_spin_lock_irqsave+0x6f/0xcd kernel/locking/spinlock.c:159
__wake_up_common_lock+0xc8/0x150 kernel/sched/wait.c:122
__wake_up+0xe/0x10 kernel/sched/wait.c:142
netlink_unlock_table net/netlink/af_netlink.c:466 [inline]
netlink_unlock_table net/netlink/af_netlink.c:463 [inline]
netlink_broadcast_filtered+0x705/0xb80 net/netlink/af_netlink.c:1514
netlink_broadcast+0x3a/0x50 net/netlink/af_netlink.c:1534
rtnetlink_send+0xdd/0x110 net/core/rtnetlink.c:714
tcf_add_notify net/sched/act_api.c:1343 [inline]
tcf_action_add+0x243/0x370 net/sched/act_api.c:1362
tc_ctl_action+0x3b5/0x4bc net/sched/act_api.c:1410
rtnetlink_rcv_msg+0x463/0xb00 net/core/rtnetlink.c:5386
netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5404
netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
netlink_unicast+0x531/0x710 net/netlink/af_netlink.c:1328
netlink_sendmsg+0x8a5/0xd60 net/netlink/af_netlink.c:1917
sock_sendmsg_nosec net/socket.c:637 [inline]
sock_sendmsg+0xd7/0x130 net/socket.c:657
___sys_sendmsg+0x803/0x920 net/socket.c:2311
__sys_sendmsg+0x105/0x1d0 net/socket.c:2356
__do_sys_sendmsg net/socket.c:2365 [inline]
__se_sys_sendmsg net/socket.c:2363 [inline]
__x64_sys_sendmsg+0x78/0xb0 net/socket.c:2363
do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x440939
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+cf0adbb9c28c8866c788@syzkaller.appspotmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-14 18:22:30 +00:00
|
|
|
for (loop = 0; loop < 10; loop++) {
|
2021-07-29 23:12:14 +00:00
|
|
|
ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res,
|
2021-12-17 18:16:28 +00:00
|
|
|
&attr_size, flags, 0, extack);
|
net: avoid potential infinite loop in tc_ctl_action()
tc_ctl_action() has the ability to loop forever if tcf_action_add()
returns -EAGAIN.
This special case has been done in case a module needed to be loaded,
but it turns out that tcf_add_notify() could also return -EAGAIN
if the socket sk_rcvbuf limit is hit.
We need to separate the two cases, and only loop for the module
loading case.
While we are at it, add a limit of 10 attempts since unbounded
loops are always scary.
syzbot repro was something like :
socket(PF_NETLINK, SOCK_RAW|SOCK_NONBLOCK, NETLINK_ROUTE) = 3
write(3, ..., 38) = 38
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [0], 4) = 0
sendmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{..., 388}], msg_controllen=0, msg_flags=0x10}, ...)
NMI backtrace for cpu 0
CPU: 0 PID: 1054 Comm: khungtaskd Not tainted 5.4.0-rc1+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x172/0x1f0 lib/dump_stack.c:113
nmi_cpu_backtrace.cold+0x70/0xb2 lib/nmi_backtrace.c:101
nmi_trigger_cpumask_backtrace+0x23b/0x28b lib/nmi_backtrace.c:62
arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
trigger_all_cpu_backtrace include/linux/nmi.h:146 [inline]
check_hung_uninterruptible_tasks kernel/hung_task.c:205 [inline]
watchdog+0x9d0/0xef0 kernel/hung_task.c:289
kthread+0x361/0x430 kernel/kthread.c:255
ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352
Sending NMI from CPU 0 to CPUs 1:
NMI backtrace for cpu 1
CPU: 1 PID: 8859 Comm: syz-executor910 Not tainted 5.4.0-rc1+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:arch_local_save_flags arch/x86/include/asm/paravirt.h:751 [inline]
RIP: 0010:lockdep_hardirqs_off+0x1df/0x2e0 kernel/locking/lockdep.c:3453
Code: 5c 08 00 00 5b 41 5c 41 5d 5d c3 48 c7 c0 58 1d f3 88 48 ba 00 00 00 00 00 fc ff df 48 c1 e8 03 80 3c 10 00 0f 85 d3 00 00 00 <48> 83 3d 21 9e 99 07 00 0f 84 b9 00 00 00 9c 58 0f 1f 44 00 00 f6
RSP: 0018:ffff8880a6f3f1b8 EFLAGS: 00000046
RAX: 1ffffffff11e63ab RBX: ffff88808c9c6080 RCX: 0000000000000000
RDX: dffffc0000000000 RSI: 0000000000000000 RDI: ffff88808c9c6914
RBP: ffff8880a6f3f1d0 R08: ffff88808c9c6080 R09: fffffbfff16be5d1
R10: fffffbfff16be5d0 R11: 0000000000000003 R12: ffffffff8746591f
R13: ffff88808c9c6080 R14: ffffffff8746591f R15: 0000000000000003
FS: 00000000011e4880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffff600400 CR3: 00000000a8920000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
trace_hardirqs_off+0x62/0x240 kernel/trace/trace_preemptirq.c:45
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline]
_raw_spin_lock_irqsave+0x6f/0xcd kernel/locking/spinlock.c:159
__wake_up_common_lock+0xc8/0x150 kernel/sched/wait.c:122
__wake_up+0xe/0x10 kernel/sched/wait.c:142
netlink_unlock_table net/netlink/af_netlink.c:466 [inline]
netlink_unlock_table net/netlink/af_netlink.c:463 [inline]
netlink_broadcast_filtered+0x705/0xb80 net/netlink/af_netlink.c:1514
netlink_broadcast+0x3a/0x50 net/netlink/af_netlink.c:1534
rtnetlink_send+0xdd/0x110 net/core/rtnetlink.c:714
tcf_add_notify net/sched/act_api.c:1343 [inline]
tcf_action_add+0x243/0x370 net/sched/act_api.c:1362
tc_ctl_action+0x3b5/0x4bc net/sched/act_api.c:1410
rtnetlink_rcv_msg+0x463/0xb00 net/core/rtnetlink.c:5386
netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5404
netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
netlink_unicast+0x531/0x710 net/netlink/af_netlink.c:1328
netlink_sendmsg+0x8a5/0xd60 net/netlink/af_netlink.c:1917
sock_sendmsg_nosec net/socket.c:637 [inline]
sock_sendmsg+0xd7/0x130 net/socket.c:657
___sys_sendmsg+0x803/0x920 net/socket.c:2311
__sys_sendmsg+0x105/0x1d0 net/socket.c:2356
__do_sys_sendmsg net/socket.c:2365 [inline]
__se_sys_sendmsg net/socket.c:2363 [inline]
__x64_sys_sendmsg+0x78/0xb0 net/socket.c:2363
do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x440939
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+cf0adbb9c28c8866c788@syzkaller.appspotmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-14 18:22:30 +00:00
|
|
|
if (ret != -EAGAIN)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-07-05 14:24:33 +00:00
|
|
|
if (ret < 0)
|
2016-08-14 05:34:56 +00:00
|
|
|
return ret;
|
2023-12-01 17:50:13 +00:00
|
|
|
|
2018-07-05 14:24:33 +00:00
|
|
|
ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
|
2021-04-07 15:36:03 +00:00
|
|
|
|
2023-12-01 17:50:13 +00:00
|
|
|
/* only put bound actions */
|
|
|
|
tca_put_bound_many(actions, init_res);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2018-07-05 14:24:31 +00:00
|
|
|
return ret;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2017-07-30 17:24:51 +00:00
|
|
|
static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
|
2020-11-24 16:40:54 +00:00
|
|
|
[TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAG_LARGE_DUMP_ON |
|
|
|
|
TCA_ACT_FLAG_TERSE_DUMP),
|
2017-07-30 17:24:52 +00:00
|
|
|
[TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 },
|
2017-07-30 17:24:51 +00:00
|
|
|
};
|
|
|
|
|
2017-04-16 16:48:24 +00:00
|
|
|
static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
|
|
|
|
struct netlink_ext_ack *extack)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2008-03-25 17:26:21 +00:00
|
|
|
struct net *net = sock_net(skb->sk);
|
2017-07-30 17:24:51 +00:00
|
|
|
struct nlattr *tca[TCA_ROOT_MAX + 1];
|
2020-06-19 19:24:13 +00:00
|
|
|
u32 portid = NETLINK_CB(skb).portid;
|
2021-07-29 23:12:14 +00:00
|
|
|
u32 flags = 0;
|
|
|
|
int ret = 0;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2016-06-05 14:41:32 +00:00
|
|
|
if ((n->nlmsg_type != RTM_GETACTION) &&
|
|
|
|
!netlink_capable(skb, CAP_NET_ADMIN))
|
2012-11-16 03:03:00 +00:00
|
|
|
return -EPERM;
|
|
|
|
|
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 12:07:28 +00:00
|
|
|
ret = nlmsg_parse_deprecated(n, sizeof(struct tcamsg), tca,
|
|
|
|
TCA_ROOT_MAX, NULL, extack);
|
2008-01-23 06:11:50 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (tca[TCA_ACT_TAB] == NULL) {
|
2018-02-15 15:54:55 +00:00
|
|
|
NL_SET_ERR_MSG(extack, "Netlink action attributes missing");
|
2005-04-16 22:20:36 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2011-01-19 19:26:56 +00:00
|
|
|
/* n->nlmsg_flags & NLM_F_CREATE */
|
2005-04-16 22:20:36 +00:00
|
|
|
switch (n->nlmsg_type) {
|
|
|
|
case RTM_NEWACTION:
|
|
|
|
/* we are going to assume all other flags
|
2011-03-31 01:57:33 +00:00
|
|
|
* imply create only if it doesn't exist
|
2005-04-16 22:20:36 +00:00
|
|
|
* Note that CREATE | EXCL implies that
|
|
|
|
* but since we want avoid ambiguity (eg when flags
|
|
|
|
* is zero) then just set this
|
|
|
|
*/
|
2011-01-19 19:26:56 +00:00
|
|
|
if (n->nlmsg_flags & NLM_F_REPLACE)
|
2021-07-29 23:12:14 +00:00
|
|
|
flags = TCA_ACT_FLAGS_REPLACE;
|
|
|
|
ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags,
|
2018-02-15 15:54:54 +00:00
|
|
|
extack);
|
2005-04-16 22:20:36 +00:00
|
|
|
break;
|
|
|
|
case RTM_DELACTION:
|
2010-03-19 15:40:13 +00:00
|
|
|
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
|
2018-02-15 15:54:55 +00:00
|
|
|
portid, RTM_DELACTION, extack);
|
2005-04-16 22:20:36 +00:00
|
|
|
break;
|
|
|
|
case RTM_GETACTION:
|
2010-03-19 15:40:13 +00:00
|
|
|
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
|
2018-02-15 15:54:55 +00:00
|
|
|
portid, RTM_GETACTION, extack);
|
2005-04-16 22:20:36 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-07-30 17:24:51 +00:00
|
|
|
static struct nlattr *find_dump_kind(struct nlattr **nla)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2011-01-19 19:26:56 +00:00
|
|
|
struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
|
2008-01-23 06:11:50 +00:00
|
|
|
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
|
|
|
|
struct nlattr *kind;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2008-01-23 06:11:50 +00:00
|
|
|
tb1 = nla[TCA_ACT_TAB];
|
2005-04-16 22:20:36 +00:00
|
|
|
if (tb1 == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 12:07:28 +00:00
|
|
|
if (nla_parse_deprecated(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0)
|
2005-04-16 22:20:36 +00:00
|
|
|
return NULL;
|
|
|
|
|
2008-01-24 04:32:42 +00:00
|
|
|
if (tb[1] == NULL)
|
|
|
|
return NULL;
|
2019-09-19 01:44:43 +00:00
|
|
|
if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], tcf_action_policy, NULL) < 0)
|
2005-04-16 22:20:36 +00:00
|
|
|
return NULL;
|
2008-01-23 06:11:50 +00:00
|
|
|
kind = tb2[TCA_ACT_KIND];
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2006-07-06 03:45:06 +00:00
|
|
|
return kind;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2016-09-18 12:45:33 +00:00
|
|
|
static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2016-02-22 23:57:53 +00:00
|
|
|
struct net *net = sock_net(skb->sk);
|
2005-04-16 22:20:36 +00:00
|
|
|
struct nlmsghdr *nlh;
|
2007-04-20 03:29:13 +00:00
|
|
|
unsigned char *b = skb_tail_pointer(skb);
|
2008-01-24 04:34:11 +00:00
|
|
|
struct nlattr *nest;
|
2005-04-16 22:20:36 +00:00
|
|
|
struct tc_action_ops *a_o;
|
|
|
|
int ret = 0;
|
2012-06-27 04:39:32 +00:00
|
|
|
struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
|
2017-07-30 17:24:51 +00:00
|
|
|
struct nlattr *tb[TCA_ROOT_MAX + 1];
|
|
|
|
struct nlattr *count_attr = NULL;
|
2017-07-30 17:24:52 +00:00
|
|
|
unsigned long jiffy_since = 0;
|
2017-07-30 17:24:51 +00:00
|
|
|
struct nlattr *kind = NULL;
|
|
|
|
struct nla_bitfield32 bf;
|
2017-07-30 17:24:52 +00:00
|
|
|
u32 msecs_since = 0;
|
2017-07-30 17:24:51 +00:00
|
|
|
u32 act_count = 0;
|
|
|
|
|
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 12:07:28 +00:00
|
|
|
ret = nlmsg_parse_deprecated(cb->nlh, sizeof(struct tcamsg), tb,
|
|
|
|
TCA_ROOT_MAX, tcaa_policy, cb->extack);
|
2017-07-30 17:24:51 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2017-07-30 17:24:51 +00:00
|
|
|
kind = find_dump_kind(tb);
|
2005-04-16 22:20:36 +00:00
|
|
|
if (kind == NULL) {
|
2010-05-12 06:37:05 +00:00
|
|
|
pr_info("tc_dump_action: action bad kind\n");
|
2005-04-16 22:20:36 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-07-06 03:45:06 +00:00
|
|
|
a_o = tc_lookup_action(kind);
|
2011-01-19 19:26:56 +00:00
|
|
|
if (a_o == NULL)
|
2005-04-16 22:20:36 +00:00
|
|
|
return 0;
|
|
|
|
|
2017-07-30 17:24:51 +00:00
|
|
|
cb->args[2] = 0;
|
|
|
|
if (tb[TCA_ROOT_FLAGS]) {
|
|
|
|
bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]);
|
|
|
|
cb->args[2] = bf.value;
|
|
|
|
}
|
|
|
|
|
2017-07-30 17:24:52 +00:00
|
|
|
if (tb[TCA_ROOT_TIME_DELTA]) {
|
|
|
|
msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]);
|
|
|
|
}
|
|
|
|
|
2012-09-07 20:12:54 +00:00
|
|
|
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
|
2012-06-27 04:39:32 +00:00
|
|
|
cb->nlh->nlmsg_type, sizeof(*t), 0);
|
|
|
|
if (!nlh)
|
|
|
|
goto out_module_put;
|
2017-07-30 17:24:51 +00:00
|
|
|
|
2017-07-30 17:24:52 +00:00
|
|
|
if (msecs_since)
|
|
|
|
jiffy_since = jiffies - msecs_to_jiffies(msecs_since);
|
|
|
|
|
2012-06-27 04:39:32 +00:00
|
|
|
t = nlmsg_data(nlh);
|
2005-04-16 22:20:36 +00:00
|
|
|
t->tca_family = AF_UNSPEC;
|
2005-06-28 19:55:30 +00:00
|
|
|
t->tca__pad1 = 0;
|
|
|
|
t->tca__pad2 = 0;
|
2017-07-30 17:24:52 +00:00
|
|
|
cb->args[3] = jiffy_since;
|
2017-07-30 17:24:51 +00:00
|
|
|
count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
|
|
|
|
if (!count_attr)
|
|
|
|
goto out_module_put;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2019-04-26 09:13:06 +00:00
|
|
|
nest = nla_nest_start_noflag(skb, TCA_ACT_TAB);
|
2008-01-24 04:34:11 +00:00
|
|
|
if (nest == NULL)
|
2012-06-27 04:39:32 +00:00
|
|
|
goto out_module_put;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2022-09-08 04:14:34 +00:00
|
|
|
ret = __tcf_generic_walker(net, skb, cb, RTM_GETACTION, a_o, NULL);
|
2005-04-16 22:20:36 +00:00
|
|
|
if (ret < 0)
|
2012-06-27 04:39:32 +00:00
|
|
|
goto out_module_put;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
if (ret > 0) {
|
2008-01-24 04:34:11 +00:00
|
|
|
nla_nest_end(skb, nest);
|
2005-04-16 22:20:36 +00:00
|
|
|
ret = skb->len;
|
2017-07-30 17:24:51 +00:00
|
|
|
act_count = cb->args[1];
|
|
|
|
memcpy(nla_data(count_attr), &act_count, sizeof(u32));
|
|
|
|
cb->args[1] = 0;
|
2005-04-16 22:20:36 +00:00
|
|
|
} else
|
2016-06-13 22:08:42 +00:00
|
|
|
nlmsg_trim(skb, b);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2007-04-20 03:29:13 +00:00
|
|
|
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
|
2012-09-07 20:12:54 +00:00
|
|
|
if (NETLINK_CB(cb->skb).portid && ret)
|
2005-04-16 22:20:36 +00:00
|
|
|
nlh->nlmsg_flags |= NLM_F_MULTI;
|
|
|
|
module_put(a_o->owner);
|
|
|
|
return skb->len;
|
|
|
|
|
2012-06-27 04:39:32 +00:00
|
|
|
out_module_put:
|
2005-04-16 22:20:36 +00:00
|
|
|
module_put(a_o->owner);
|
2007-03-26 06:06:12 +00:00
|
|
|
nlmsg_trim(skb, b);
|
2005-04-16 22:20:36 +00:00
|
|
|
return skb->len;
|
|
|
|
}
|
|
|
|
|
2024-10-14 20:18:21 +00:00
|
|
|
static const struct rtnl_msg_handler tc_action_rtnl_msg_handlers[] __initconst = {
|
|
|
|
{.msgtype = RTM_NEWACTION, .doit = tc_ctl_action},
|
|
|
|
{.msgtype = RTM_DELACTION, .doit = tc_ctl_action},
|
|
|
|
{.msgtype = RTM_GETACTION, .doit = tc_ctl_action,
|
|
|
|
.dumpit = tc_dump_action},
|
|
|
|
};
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
static int __init tc_action_init(void)
|
|
|
|
{
|
2024-10-14 20:18:21 +00:00
|
|
|
rtnl_register_many(tc_action_rtnl_msg_handlers);
|
2005-04-16 22:20:36 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
subsys_initcall(tc_action_init);
|