mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-15 09:34:17 +00:00
1cb6f0bae5
Pedro Pinto and later independently also Hyunwoo Kim and Wongi Lee reported an issue that the tcx_entry can be released too early leading to a use after free (UAF) when an active old-style ingress or clsact qdisc with a shared tc block is later replaced by another ingress or clsact instance. Essentially, the sequence to trigger the UAF (one example) can be as follows: 1. A network namespace is created 2. An ingress qdisc is created. This allocates a tcx_entry, and &tcx_entry->miniq is stored in the qdisc's miniqp->p_miniq. At the same time, a tcf block with index 1 is created. 3. chain0 is attached to the tcf block. chain0 must be connected to the block linked to the ingress qdisc to later reach the function tcf_chain0_head_change_cb_del() which triggers the UAF. 4. Create and graft a clsact qdisc. This causes the ingress qdisc created in step 1 to be removed, thus freeing the previously linked tcx_entry: rtnetlink_rcv_msg() => tc_modify_qdisc() => qdisc_create() => clsact_init() [a] => qdisc_graft() => qdisc_destroy() => __qdisc_destroy() => ingress_destroy() [b] => tcx_entry_free() => kfree_rcu() // tcx_entry freed 5. Finally, the network namespace is closed. This registers the cleanup_net worker, and during the process of releasing the remaining clsact qdisc, it accesses the tcx_entry that was already freed in step 4, causing the UAF to occur: cleanup_net() => ops_exit_list() => default_device_exit_batch() => unregister_netdevice_many() => unregister_netdevice_many_notify() => dev_shutdown() => qdisc_put() => clsact_destroy() [c] => tcf_block_put_ext() => tcf_chain0_head_change_cb_del() => tcf_chain_head_change_item() => clsact_chain_head_change() => mini_qdisc_pair_swap() // UAF There are also other variants, the gist is to add an ingress (or clsact) qdisc with a specific shared block, then to replace that qdisc, waiting for the tcx_entry kfree_rcu() to be executed and subsequently accessing the current active qdisc's miniq one way or another. The correct fix is to turn the miniq_active boolean into a counter. What can be observed, at step 2 above, the counter transitions from 0->1, at step [a] from 1->2 (in order for the miniq object to remain active during the replacement), then in [b] from 2->1 and finally [c] 1->0 with the eventual release. The reference counter in general ranges from [0,2] and it does not need to be atomic since all access to the counter is protected by the rtnl mutex. With this in place, there is no longer a UAF happening and the tcx_entry is freed at the correct time. Fixes: e420bed02507 ("bpf: Add fd-based tcx multi-prog infra with link support") Reported-by: Pedro Pinto <xten@osec.io> Co-developed-by: Pedro Pinto <xten@osec.io> Signed-off-by: Pedro Pinto <xten@osec.io> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: Hyunwoo Kim <v4bel@theori.io> Cc: Wongi Lee <qwerty@theori.io> Cc: Martin KaFai Lau <martin.lau@kernel.org> Link: https://lore.kernel.org/r/20240708133130.11609-1-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
208 lines
4.4 KiB
C
208 lines
4.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/* Copyright (c) 2023 Isovalent */
|
|
#ifndef __NET_TCX_H
|
|
#define __NET_TCX_H
|
|
|
|
#include <linux/bpf.h>
|
|
#include <linux/bpf_mprog.h>
|
|
|
|
#include <net/sch_generic.h>
|
|
|
|
struct mini_Qdisc;
|
|
|
|
struct tcx_entry {
|
|
struct mini_Qdisc __rcu *miniq;
|
|
struct bpf_mprog_bundle bundle;
|
|
u32 miniq_active;
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
struct tcx_link {
|
|
struct bpf_link link;
|
|
struct net_device *dev;
|
|
u32 location;
|
|
};
|
|
|
|
static inline void tcx_set_ingress(struct sk_buff *skb, bool ingress)
|
|
{
|
|
#ifdef CONFIG_NET_XGRESS
|
|
skb->tc_at_ingress = ingress;
|
|
#endif
|
|
}
|
|
|
|
#ifdef CONFIG_NET_XGRESS
|
|
static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry)
|
|
{
|
|
struct bpf_mprog_bundle *bundle = entry->parent;
|
|
|
|
return container_of(bundle, struct tcx_entry, bundle);
|
|
}
|
|
|
|
static inline struct tcx_link *tcx_link(const struct bpf_link *link)
|
|
{
|
|
return container_of(link, struct tcx_link, link);
|
|
}
|
|
|
|
void tcx_inc(void);
|
|
void tcx_dec(void);
|
|
|
|
static inline void tcx_entry_sync(void)
|
|
{
|
|
/* bpf_mprog_entry got a/b swapped, therefore ensure that
|
|
* there are no inflight users on the old one anymore.
|
|
*/
|
|
synchronize_rcu();
|
|
}
|
|
|
|
static inline void
|
|
tcx_entry_update(struct net_device *dev, struct bpf_mprog_entry *entry,
|
|
bool ingress)
|
|
{
|
|
ASSERT_RTNL();
|
|
if (ingress)
|
|
rcu_assign_pointer(dev->tcx_ingress, entry);
|
|
else
|
|
rcu_assign_pointer(dev->tcx_egress, entry);
|
|
}
|
|
|
|
static inline struct bpf_mprog_entry *
|
|
tcx_entry_fetch(struct net_device *dev, bool ingress)
|
|
{
|
|
ASSERT_RTNL();
|
|
if (ingress)
|
|
return rcu_dereference_rtnl(dev->tcx_ingress);
|
|
else
|
|
return rcu_dereference_rtnl(dev->tcx_egress);
|
|
}
|
|
|
|
static inline struct bpf_mprog_entry *tcx_entry_create_noprof(void)
|
|
{
|
|
struct tcx_entry *tcx = kzalloc_noprof(sizeof(*tcx), GFP_KERNEL);
|
|
|
|
if (tcx) {
|
|
bpf_mprog_bundle_init(&tcx->bundle);
|
|
return &tcx->bundle.a;
|
|
}
|
|
return NULL;
|
|
}
|
|
#define tcx_entry_create(...) alloc_hooks(tcx_entry_create_noprof(__VA_ARGS__))
|
|
|
|
static inline void tcx_entry_free(struct bpf_mprog_entry *entry)
|
|
{
|
|
kfree_rcu(tcx_entry(entry), rcu);
|
|
}
|
|
|
|
static inline struct bpf_mprog_entry *
|
|
tcx_entry_fetch_or_create(struct net_device *dev, bool ingress, bool *created)
|
|
{
|
|
struct bpf_mprog_entry *entry = tcx_entry_fetch(dev, ingress);
|
|
|
|
*created = false;
|
|
if (!entry) {
|
|
entry = tcx_entry_create();
|
|
if (!entry)
|
|
return NULL;
|
|
*created = true;
|
|
}
|
|
return entry;
|
|
}
|
|
|
|
static inline void tcx_skeys_inc(bool ingress)
|
|
{
|
|
tcx_inc();
|
|
if (ingress)
|
|
net_inc_ingress_queue();
|
|
else
|
|
net_inc_egress_queue();
|
|
}
|
|
|
|
static inline void tcx_skeys_dec(bool ingress)
|
|
{
|
|
if (ingress)
|
|
net_dec_ingress_queue();
|
|
else
|
|
net_dec_egress_queue();
|
|
tcx_dec();
|
|
}
|
|
|
|
static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry)
|
|
{
|
|
ASSERT_RTNL();
|
|
tcx_entry(entry)->miniq_active++;
|
|
}
|
|
|
|
static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry)
|
|
{
|
|
ASSERT_RTNL();
|
|
tcx_entry(entry)->miniq_active--;
|
|
}
|
|
|
|
static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
|
|
{
|
|
ASSERT_RTNL();
|
|
return bpf_mprog_total(entry) || tcx_entry(entry)->miniq_active;
|
|
}
|
|
|
|
static inline enum tcx_action_base tcx_action_code(struct sk_buff *skb,
|
|
int code)
|
|
{
|
|
switch (code) {
|
|
case TCX_PASS:
|
|
skb->tc_index = qdisc_skb_cb(skb)->tc_classid;
|
|
fallthrough;
|
|
case TCX_DROP:
|
|
case TCX_REDIRECT:
|
|
return code;
|
|
case TCX_NEXT:
|
|
default:
|
|
return TCX_NEXT;
|
|
}
|
|
}
|
|
#endif /* CONFIG_NET_XGRESS */
|
|
|
|
#if defined(CONFIG_NET_XGRESS) && defined(CONFIG_BPF_SYSCALL)
|
|
int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
|
|
int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
|
|
int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
|
|
void tcx_uninstall(struct net_device *dev, bool ingress);
|
|
|
|
int tcx_prog_query(const union bpf_attr *attr,
|
|
union bpf_attr __user *uattr);
|
|
|
|
static inline void dev_tcx_uninstall(struct net_device *dev)
|
|
{
|
|
ASSERT_RTNL();
|
|
tcx_uninstall(dev, true);
|
|
tcx_uninstall(dev, false);
|
|
}
|
|
#else
|
|
static inline int tcx_prog_attach(const union bpf_attr *attr,
|
|
struct bpf_prog *prog)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline int tcx_link_attach(const union bpf_attr *attr,
|
|
struct bpf_prog *prog)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline int tcx_prog_detach(const union bpf_attr *attr,
|
|
struct bpf_prog *prog)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline int tcx_prog_query(const union bpf_attr *attr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline void dev_tcx_uninstall(struct net_device *dev)
|
|
{
|
|
}
|
|
#endif /* CONFIG_NET_XGRESS && CONFIG_BPF_SYSCALL */
|
|
#endif /* __NET_TCX_H */
|