mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-10 07:50:04 +00:00
84601d6ee6
Add bpf_link support skeleton. To keep this reviewable, no bpf program can be invoked yet, if a program is attached only a c-stub is called and not the actual bpf program. Defaults to 'y' if both netfilter and bpf syscall are enabled in kconfig. Uapi example usage: union bpf_attr attr = { }; attr.link_create.prog_fd = progfd; attr.link_create.attach_type = 0; /* unused */ attr.link_create.netfilter.pf = PF_INET; attr.link_create.netfilter.hooknum = NF_INET_LOCAL_IN; attr.link_create.netfilter.priority = -128; err = bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); ... this would attach progfd to ipv4:input hook. Such hook gets removed automatically if the calling program exits. BPF_NETFILTER program invocation is added in followup change. NF_HOOK_OP_BPF enum will eventually be read from nfnetlink_hook, it allows to tell userspace which program is attached at the given hook when user runs 'nft hook list' command rather than just the priority and not-very-helpful 'this hook runs a bpf prog but I can't tell which one'. Will also be used to disallow registration of two bpf programs with same priority in a followup patch. v4: arm32 cmpxchg only supports 32bit operand s/prio/priority/ v3: restrict prog attachment to ip/ip6 for now, lets lift restrictions if more use cases pop up (arptables, ebtables, netdev ingress/egress etc). Signed-off-by: Florian Westphal <fw@strlen.de> Link: https://lore.kernel.org/r/20230421170300.24115-2-fw@strlen.de Signed-off-by: Alexei Starovoitov <ast@kernel.org>
501 lines
14 KiB
C
501 lines
14 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __LINUX_NETFILTER_H
|
|
#define __LINUX_NETFILTER_H
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/net.h>
|
|
#include <linux/if.h>
|
|
#include <linux/in.h>
|
|
#include <linux/in6.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/list.h>
|
|
#include <linux/static_key.h>
|
|
#include <linux/netfilter_defs.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/sockptr.h>
|
|
#include <net/net_namespace.h>
|
|
|
|
static inline int NF_DROP_GETERR(int verdict)
|
|
{
|
|
return -(verdict >> NF_VERDICT_QBITS);
|
|
}
|
|
|
|
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
|
|
const union nf_inet_addr *a2)
|
|
{
|
|
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
|
|
const unsigned long *ul1 = (const unsigned long *)a1;
|
|
const unsigned long *ul2 = (const unsigned long *)a2;
|
|
|
|
return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL;
|
|
#else
|
|
return a1->all[0] == a2->all[0] &&
|
|
a1->all[1] == a2->all[1] &&
|
|
a1->all[2] == a2->all[2] &&
|
|
a1->all[3] == a2->all[3];
|
|
#endif
|
|
}
|
|
|
|
static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
|
|
union nf_inet_addr *result,
|
|
const union nf_inet_addr *mask)
|
|
{
|
|
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
|
|
const unsigned long *ua = (const unsigned long *)a1;
|
|
unsigned long *ur = (unsigned long *)result;
|
|
const unsigned long *um = (const unsigned long *)mask;
|
|
|
|
ur[0] = ua[0] & um[0];
|
|
ur[1] = ua[1] & um[1];
|
|
#else
|
|
result->all[0] = a1->all[0] & mask->all[0];
|
|
result->all[1] = a1->all[1] & mask->all[1];
|
|
result->all[2] = a1->all[2] & mask->all[2];
|
|
result->all[3] = a1->all[3] & mask->all[3];
|
|
#endif
|
|
}
|
|
|
|
int netfilter_init(void);
|
|
|
|
struct sk_buff;
|
|
|
|
struct nf_hook_ops;
|
|
|
|
struct sock;
|
|
|
|
struct nf_hook_state {
|
|
u8 hook;
|
|
u8 pf;
|
|
struct net_device *in;
|
|
struct net_device *out;
|
|
struct sock *sk;
|
|
struct net *net;
|
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *);
|
|
};
|
|
|
|
typedef unsigned int nf_hookfn(void *priv,
|
|
struct sk_buff *skb,
|
|
const struct nf_hook_state *state);
|
|
enum nf_hook_ops_type {
|
|
NF_HOOK_OP_UNDEFINED,
|
|
NF_HOOK_OP_NF_TABLES,
|
|
NF_HOOK_OP_BPF,
|
|
};
|
|
|
|
struct nf_hook_ops {
|
|
/* User fills in from here down. */
|
|
nf_hookfn *hook;
|
|
struct net_device *dev;
|
|
void *priv;
|
|
u8 pf;
|
|
enum nf_hook_ops_type hook_ops_type:8;
|
|
unsigned int hooknum;
|
|
/* Hooks are ordered in ascending priority. */
|
|
int priority;
|
|
};
|
|
|
|
struct nf_hook_entry {
|
|
nf_hookfn *hook;
|
|
void *priv;
|
|
};
|
|
|
|
struct nf_hook_entries_rcu_head {
|
|
struct rcu_head head;
|
|
void *allocation;
|
|
};
|
|
|
|
struct nf_hook_entries {
|
|
u16 num_hook_entries;
|
|
/* padding */
|
|
struct nf_hook_entry hooks[];
|
|
|
|
/* trailer: pointers to original orig_ops of each hook,
|
|
* followed by rcu_head and scratch space used for freeing
|
|
* the structure via call_rcu.
|
|
*
|
|
* This is not part of struct nf_hook_entry since its only
|
|
* needed in slow path (hook register/unregister):
|
|
* const struct nf_hook_ops *orig_ops[]
|
|
*
|
|
* For the same reason, we store this at end -- its
|
|
* only needed when a hook is deleted, not during
|
|
* packet path processing:
|
|
* struct nf_hook_entries_rcu_head head
|
|
*/
|
|
};
|
|
|
|
#ifdef CONFIG_NETFILTER
|
|
static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e)
|
|
{
|
|
unsigned int n = e->num_hook_entries;
|
|
const void *hook_end;
|
|
|
|
hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */
|
|
|
|
return (struct nf_hook_ops **)hook_end;
|
|
}
|
|
|
|
static inline int
|
|
nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb,
|
|
struct nf_hook_state *state)
|
|
{
|
|
return entry->hook(entry->priv, skb, state);
|
|
}
|
|
|
|
static inline void nf_hook_state_init(struct nf_hook_state *p,
|
|
unsigned int hook,
|
|
u_int8_t pf,
|
|
struct net_device *indev,
|
|
struct net_device *outdev,
|
|
struct sock *sk,
|
|
struct net *net,
|
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
|
|
{
|
|
p->hook = hook;
|
|
p->pf = pf;
|
|
p->in = indev;
|
|
p->out = outdev;
|
|
p->sk = sk;
|
|
p->net = net;
|
|
p->okfn = okfn;
|
|
}
|
|
|
|
|
|
|
|
struct nf_sockopt_ops {
|
|
struct list_head list;
|
|
|
|
u_int8_t pf;
|
|
|
|
/* Non-inclusive ranges: use 0/0/NULL to never get called. */
|
|
int set_optmin;
|
|
int set_optmax;
|
|
int (*set)(struct sock *sk, int optval, sockptr_t arg,
|
|
unsigned int len);
|
|
int get_optmin;
|
|
int get_optmax;
|
|
int (*get)(struct sock *sk, int optval, void __user *user, int *len);
|
|
/* Use the module struct to lock set/get code in place */
|
|
struct module *owner;
|
|
};
|
|
|
|
/* Function to register/unregister hook points. */
|
|
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops);
|
|
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops);
|
|
int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
|
|
unsigned int n);
|
|
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
|
|
unsigned int n);
|
|
|
|
/* Functions to register get/setsockopt ranges (non-inclusive). You
|
|
need to check permissions yourself! */
|
|
int nf_register_sockopt(struct nf_sockopt_ops *reg);
|
|
void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
|
|
|
|
#ifdef CONFIG_JUMP_LABEL
|
|
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
|
#endif
|
|
|
|
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
|
|
const struct nf_hook_entries *e, unsigned int i);
|
|
|
|
void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
|
|
const struct nf_hook_entries *e);
|
|
/**
|
|
* nf_hook - call a netfilter hook
|
|
*
|
|
* Returns 1 if the hook has allowed the packet to pass. The function
|
|
* okfn must be invoked by the caller in this case. Any other return
|
|
* value indicates the packet has been consumed by the hook.
|
|
*/
|
|
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
|
|
struct sock *sk, struct sk_buff *skb,
|
|
struct net_device *indev, struct net_device *outdev,
|
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
|
|
{
|
|
struct nf_hook_entries *hook_head = NULL;
|
|
int ret = 1;
|
|
|
|
#ifdef CONFIG_JUMP_LABEL
|
|
if (__builtin_constant_p(pf) &&
|
|
__builtin_constant_p(hook) &&
|
|
!static_key_false(&nf_hooks_needed[pf][hook]))
|
|
return 1;
|
|
#endif
|
|
|
|
rcu_read_lock();
|
|
switch (pf) {
|
|
case NFPROTO_IPV4:
|
|
hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
|
|
break;
|
|
case NFPROTO_IPV6:
|
|
hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
|
|
break;
|
|
case NFPROTO_ARP:
|
|
#ifdef CONFIG_NETFILTER_FAMILY_ARP
|
|
if (WARN_ON_ONCE(hook >= ARRAY_SIZE(net->nf.hooks_arp)))
|
|
break;
|
|
hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
|
|
#endif
|
|
break;
|
|
case NFPROTO_BRIDGE:
|
|
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
|
|
hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
|
|
#endif
|
|
break;
|
|
default:
|
|
WARN_ON_ONCE(1);
|
|
break;
|
|
}
|
|
|
|
if (hook_head) {
|
|
struct nf_hook_state state;
|
|
|
|
nf_hook_state_init(&state, hook, pf, indev, outdev,
|
|
sk, net, okfn);
|
|
|
|
ret = nf_hook_slow(skb, &state, hook_head, 0);
|
|
}
|
|
rcu_read_unlock();
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* Activate hook; either okfn or kfree_skb called, unless a hook
|
|
returns NF_STOLEN (in which case, it's up to the hook to deal with
|
|
the consequences).
|
|
|
|
Returns -ERRNO if packet dropped. Zero means queued, stolen or
|
|
accepted.
|
|
*/
|
|
|
|
/* RR:
|
|
> I don't want nf_hook to return anything because people might forget
|
|
> about async and trust the return value to mean "packet was ok".
|
|
|
|
AK:
|
|
Just document it clearly, then you can expect some sense from kernel
|
|
coders :)
|
|
*/
|
|
|
|
static inline int
|
|
NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
|
|
struct sk_buff *skb, struct net_device *in, struct net_device *out,
|
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *),
|
|
bool cond)
|
|
{
|
|
int ret;
|
|
|
|
if (!cond ||
|
|
((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1))
|
|
ret = okfn(net, sk, skb);
|
|
return ret;
|
|
}
|
|
|
|
static inline int
|
|
NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb,
|
|
struct net_device *in, struct net_device *out,
|
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
|
|
{
|
|
int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
|
|
if (ret == 1)
|
|
ret = okfn(net, sk, skb);
|
|
return ret;
|
|
}
|
|
|
|
static inline void
|
|
NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
|
|
struct list_head *head, struct net_device *in, struct net_device *out,
|
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
|
|
{
|
|
struct nf_hook_entries *hook_head = NULL;
|
|
|
|
#ifdef CONFIG_JUMP_LABEL
|
|
if (__builtin_constant_p(pf) &&
|
|
__builtin_constant_p(hook) &&
|
|
!static_key_false(&nf_hooks_needed[pf][hook]))
|
|
return;
|
|
#endif
|
|
|
|
rcu_read_lock();
|
|
switch (pf) {
|
|
case NFPROTO_IPV4:
|
|
hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
|
|
break;
|
|
case NFPROTO_IPV6:
|
|
hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
|
|
break;
|
|
default:
|
|
WARN_ON_ONCE(1);
|
|
break;
|
|
}
|
|
|
|
if (hook_head) {
|
|
struct nf_hook_state state;
|
|
|
|
nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);
|
|
|
|
nf_hook_slow_list(head, &state, hook_head);
|
|
}
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
/* Call setsockopt() */
|
|
int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, sockptr_t opt,
|
|
unsigned int len);
|
|
int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
|
|
int *len);
|
|
|
|
struct flowi;
|
|
struct nf_queue_entry;
|
|
|
|
__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
|
|
unsigned int dataoff, u_int8_t protocol,
|
|
unsigned short family);
|
|
|
|
__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
|
unsigned int dataoff, unsigned int len,
|
|
u_int8_t protocol, unsigned short family);
|
|
int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
|
bool strict, unsigned short family);
|
|
int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry);
|
|
|
|
#include <net/flow.h>
|
|
|
|
struct nf_conn;
|
|
enum nf_nat_manip_type;
|
|
struct nlattr;
|
|
enum ip_conntrack_dir;
|
|
|
|
struct nf_nat_hook {
|
|
int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip,
|
|
const struct nlattr *attr);
|
|
void (*decode_session)(struct sk_buff *skb, struct flowi *fl);
|
|
unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct,
|
|
enum nf_nat_manip_type mtype,
|
|
enum ip_conntrack_dir dir);
|
|
void (*remove_nat_bysrc)(struct nf_conn *ct);
|
|
};
|
|
|
|
extern const struct nf_nat_hook __rcu *nf_nat_hook;
|
|
|
|
static inline void
|
|
nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
|
|
{
|
|
#if IS_ENABLED(CONFIG_NF_NAT)
|
|
const struct nf_nat_hook *nat_hook;
|
|
|
|
rcu_read_lock();
|
|
nat_hook = rcu_dereference(nf_nat_hook);
|
|
if (nat_hook && nat_hook->decode_session)
|
|
nat_hook->decode_session(skb, fl);
|
|
rcu_read_unlock();
|
|
#endif
|
|
}
|
|
|
|
#else /* !CONFIG_NETFILTER */
|
|
static inline int
|
|
NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
|
|
struct sk_buff *skb, struct net_device *in, struct net_device *out,
|
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *),
|
|
bool cond)
|
|
{
|
|
return okfn(net, sk, skb);
|
|
}
|
|
|
|
static inline int
|
|
NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
|
|
struct sk_buff *skb, struct net_device *in, struct net_device *out,
|
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
|
|
{
|
|
return okfn(net, sk, skb);
|
|
}
|
|
|
|
static inline void
|
|
NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
|
|
struct list_head *head, struct net_device *in, struct net_device *out,
|
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
|
|
{
|
|
/* nothing to do */
|
|
}
|
|
|
|
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
|
|
struct sock *sk, struct sk_buff *skb,
|
|
struct net_device *indev, struct net_device *outdev,
|
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
|
|
{
|
|
return 1;
|
|
}
|
|
struct flowi;
|
|
static inline void
|
|
nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
|
|
{
|
|
}
|
|
#endif /*CONFIG_NETFILTER*/
|
|
|
|
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
|
|
#include <linux/netfilter/nf_conntrack_zones_common.h>
|
|
|
|
void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
|
|
void nf_ct_set_closing(struct nf_conntrack *nfct);
|
|
struct nf_conntrack_tuple;
|
|
bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
|
|
const struct sk_buff *skb);
|
|
#else
|
|
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
|
|
static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {}
|
|
struct nf_conntrack_tuple;
|
|
static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
|
|
const struct sk_buff *skb)
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
struct nf_conn;
|
|
enum ip_conntrack_info;
|
|
|
|
struct nf_ct_hook {
|
|
int (*update)(struct net *net, struct sk_buff *skb);
|
|
void (*destroy)(struct nf_conntrack *);
|
|
bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
|
|
const struct sk_buff *);
|
|
void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
|
|
void (*set_closing)(struct nf_conntrack *nfct);
|
|
};
|
|
extern const struct nf_ct_hook __rcu *nf_ct_hook;
|
|
|
|
struct nlattr;
|
|
|
|
struct nfnl_ct_hook {
|
|
size_t (*build_size)(const struct nf_conn *ct);
|
|
int (*build)(struct sk_buff *skb, struct nf_conn *ct,
|
|
enum ip_conntrack_info ctinfo,
|
|
u_int16_t ct_attr, u_int16_t ct_info_attr);
|
|
int (*parse)(const struct nlattr *attr, struct nf_conn *ct);
|
|
int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct,
|
|
u32 portid, u32 report);
|
|
void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct,
|
|
enum ip_conntrack_info ctinfo, s32 off);
|
|
};
|
|
extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook;
|
|
|
|
/**
|
|
* nf_skb_duplicated - TEE target has sent a packet
|
|
*
|
|
* When a xtables target sends a packet, the OUTPUT and POSTROUTING
|
|
* hooks are traversed again, i.e. nft and xtables are invoked recursively.
|
|
*
|
|
* This is used by xtables TEE target to prevent the duplicated skb from
|
|
* being duplicated again.
|
|
*/
|
|
DECLARE_PER_CPU(bool, nf_skb_duplicated);
|
|
|
|
/**
|
|
* Contains bitmask of ctnetlink event subscribers, if any.
|
|
* Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag.
|
|
*/
|
|
extern u8 nf_ctnetlink_has_listener;
|
|
#endif /*__LINUX_NETFILTER_H*/
|