netfilter next pull request 2023-10-18

-----BEGIN PGP SIGNATURE-----
 
 iQJBBAABCAArFiEEgKkgxbID4Gn1hq6fcJGo2a1f9gAFAmUvltYNHGZ3QHN0cmxl
 bi5kZQAKCRBwkajZrV/2AGbwEADT9pI6dIyEJt0xMte+Ld41E+lfBgLXxv/cAFQo
 5NjP5SF75iMh0PoRrw1f9DJJNGo4C2OhcWcltosYLdE2xoDNjYw/J2oi/WdYHLe7
 tEFZWB0KfOBl17Gh0l9Gue8MyVKKamNDuMvfd/MQB9af7VQ6lPi/hlWyjAk4hTGu
 6Ly1asHM1kXFt6Z0mKMRQomXKZ46mEVWX8a11wtfLnd3vmCqrHlKwGKlNVi9U5ir
 aQnitw60OmCvbYLBdjDugPibSTeoha674vxVtjWuwq+1uQrkx0js/ASKPGrT2a33
 yo2/nuprdZc135YCXAlccb4TwgKTMT1R1pf9JY5fhSIPKfY49yR0JKUckXpieefy
 t/VAJWVsVvXg27XF1LCommMOhbM0c+WE/BpJ8ASTSt+79nXhLcISDirkgUcGvUKZ
 LNp1sa3QqwR7auTTzd9aKIWvZIN82MLOJJpYkhNOqYheCAm9Y2O1kClQB7t7Zi3r
 K7L9clpexYWcJQmDox2SwU7nVWeqJP6e2Y2f4W+B9aKuiyHNF/B/pEE5LO6hqZ8C
 WJ+TZZVaxDTxtVtC2W3tFFhtd/ZzuIPryXNL127XN83L0iQZ+STDe6CzTXyysD3h
 14iCr4pY4u5y5w0jqilQYdMCyCK2LUKmUoZUKGpwSlDQtEZU6BZsFnE12C6+v9N+
 BXr7aA==
 =/3kX
 -----END PGP SIGNATURE-----

Merge tag 'nf-next-23-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Florian Westphal says:

====================
netfilter next pull request 2023-10-18

This series contains initial netfilter skb drop_reason support, from
myself.

First few patches fix up a few spots to make sure we won't trip
when followup patches embed error numbers in the upper bits
(we already do this in some places).

Then, nftables and bridge netfilter get converted to call kfree_skb_reason
directly to let tooling pinpoint exact location of packet drops,
rather than the existing NF_DROP catchall in nf_hook_slow().

I would like to eventually convert all netfilter modules, but as some
callers cannot deal with NF_STOLEN (notably act_ct), more preparation
work is needed for this.

Last patch gets rid of an ugly 'de-const' cast in nftables.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2023-10-18 11:05:21 +01:00
commit 37fb1c81d2
13 changed files with 100 additions and 69 deletions

View File

@ -22,6 +22,16 @@ static inline int NF_DROP_GETERR(int verdict)
return -(verdict >> NF_VERDICT_QBITS);
}
static __always_inline int
NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err)
{
BUILD_BUG_ON(err > 0xffff);
kfree_skb_reason(skb, reason);
return ((err << 16) | NF_STOLEN);
}
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
const union nf_inet_addr *a2)
{

View File

@ -462,7 +462,7 @@ struct nft_set_ops {
const struct nft_set *set,
const struct nft_set_elem *elem,
unsigned int flags);
void (*commit)(const struct nft_set *set);
void (*commit)(struct nft_set *set);
void (*abort)(const struct nft_set *set);
u64 (*privsize)(const struct nlattr * const nla[],
const struct nft_set_desc *desc);

View File

@ -486,11 +486,11 @@ static unsigned int br_nf_pre_routing(void *priv,
struct brnf_net *brnet;
if (unlikely(!pskb_may_pull(skb, len)))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0);
p = br_port_get_rcu(state->in);
if (p == NULL)
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
br = p->br;
brnet = net_generic(state->net, brnf_net_id);
@ -501,7 +501,7 @@ static unsigned int br_nf_pre_routing(void *priv,
return NF_ACCEPT;
if (!ipv6_mod_enabled()) {
pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported.");
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_IPV6DISABLED, 0);
}
nf_bridge_pull_encap_header_rcsum(skb);
@ -518,12 +518,12 @@ static unsigned int br_nf_pre_routing(void *priv,
nf_bridge_pull_encap_header_rcsum(skb);
if (br_validate_ipv4(state->net, skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
if (!nf_bridge_alloc(skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
if (!setup_pre_routing(skb, state->net))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
nf_bridge = nf_bridge_info_get(skb);
nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
@ -590,15 +590,15 @@ static unsigned int br_nf_forward_ip(void *priv,
/* Need exclusive nf_bridge_info since we might have multiple
* different physoutdevs. */
if (!nf_bridge_unshare(skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
nf_bridge = nf_bridge_info_get(skb);
if (!nf_bridge)
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
parent = bridge_parent(state->out);
if (!parent)
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
is_pppoe_ip(skb, state->net))
@ -618,13 +618,13 @@ static unsigned int br_nf_forward_ip(void *priv,
if (pf == NFPROTO_IPV4) {
if (br_validate_ipv4(state->net, skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
if (pf == NFPROTO_IPV6) {
if (br_validate_ipv6(state->net, skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
@ -666,7 +666,7 @@ static unsigned int br_nf_forward_arp(void *priv,
}
if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr))))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0);
if (arp_hdr(skb)->ar_pln != 4) {
if (is_vlan_arp(skb, state->net))
@ -831,7 +831,7 @@ static unsigned int br_nf_post_routing(void *priv,
return NF_ACCEPT;
if (!realoutdev)
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
is_pppoe_ip(skb, state->net))

View File

@ -161,13 +161,13 @@ unsigned int br_nf_pre_routing_ipv6(void *priv,
struct nf_bridge_info *nf_bridge;
if (br_validate_ipv6(state->net, skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
nf_bridge = nf_bridge_alloc(skb);
if (!nf_bridge)
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
if (!setup_pre_routing(skb, state->net))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
nf_bridge = nf_bridge_info_get(skb);
nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;

View File

@ -36,12 +36,12 @@ static const struct xt_table packet_mangler = {
static unsigned int
ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
unsigned int ret;
unsigned int ret, verdict;
const struct iphdr *iph;
u_int8_t tos;
__be32 saddr, daddr;
u_int32_t mark;
u32 mark;
int err;
u8 tos;
/* Save things which could affect route */
mark = skb->mark;
@ -51,8 +51,9 @@ ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
tos = iph->tos;
ret = ipt_do_table(priv, skb, state);
verdict = ret & NF_VERDICT_MASK;
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN) {
if (verdict != NF_DROP && verdict != NF_STOLEN) {
iph = ip_hdr(skb);
if (iph->saddr != saddr ||

View File

@ -31,10 +31,10 @@ static const struct xt_table packet_mangler = {
static unsigned int
ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
unsigned int ret;
struct in6_addr saddr, daddr;
u_int8_t hop_limit;
u_int32_t flowlabel, mark;
unsigned int ret, verdict;
u32 flowlabel, mark;
u8 hop_limit;
int err;
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
@ -47,8 +47,9 @@ ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *sta
flowlabel = *((u_int32_t *)ipv6_hdr(skb));
ret = ip6t_do_table(priv, skb, state);
verdict = ret & NF_VERDICT_MASK;
if (ret != NF_DROP && ret != NF_STOLEN &&
if (verdict != NF_DROP && verdict != NF_STOLEN &&
(!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
skb->mark != mark ||

View File

@ -639,10 +639,10 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
if (ret == 1)
continue;
return ret;
case NF_STOLEN:
return NF_DROP_GETERR(verdict);
default:
/* Implicit handling for NF_STOLEN, as well as any other
* non conventional verdicts.
*/
WARN_ON_ONCE(1);
return 0;
}
}

View File

@ -2169,11 +2169,11 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
if (dataoff <= 0)
return -1;
return NF_DROP;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
l4num, net, &tuple))
return -1;
return NF_DROP;
if (ct->status & IPS_SRC_NAT) {
memcpy(tuple.src.u3.all,
@ -2193,7 +2193,7 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple);
if (!h)
return 0;
return NF_ACCEPT;
/* Store status bits of the conntrack that is clashing to re-do NAT
* mangling according to what it has been done already to this packet.
@ -2206,19 +2206,25 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
nat_hook = rcu_dereference(nf_nat_hook);
if (!nat_hook)
return 0;
return NF_ACCEPT;
if (status & IPS_SRC_NAT &&
nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC,
IP_CT_DIR_ORIGINAL) == NF_DROP)
return -1;
if (status & IPS_SRC_NAT) {
unsigned int verdict = nat_hook->manip_pkt(skb, ct,
NF_NAT_MANIP_SRC,
IP_CT_DIR_ORIGINAL);
if (verdict != NF_ACCEPT)
return verdict;
}
if (status & IPS_DST_NAT &&
nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST,
IP_CT_DIR_ORIGINAL) == NF_DROP)
return -1;
if (status & IPS_DST_NAT) {
unsigned int verdict = nat_hook->manip_pkt(skb, ct,
NF_NAT_MANIP_DST,
IP_CT_DIR_ORIGINAL);
if (verdict != NF_ACCEPT)
return verdict;
}
return 0;
return NF_ACCEPT;
}
/* This packet is coming from userspace via nf_queue, complete the packet
@ -2233,14 +2239,14 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
help = nfct_help(ct);
if (!help)
return 0;
return NF_ACCEPT;
helper = rcu_dereference(help->helper);
if (!helper)
return 0;
return NF_ACCEPT;
if (!(helper->flags & NF_CT_HELPER_F_USERSPACE))
return 0;
return NF_ACCEPT;
switch (nf_ct_l3num(ct)) {
case NFPROTO_IPV4:
@ -2255,42 +2261,44 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
return 0;
return NF_ACCEPT;
break;
}
#endif
default:
return 0;
return NF_ACCEPT;
}
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return -1;
return NF_DROP;
}
}
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb) == NF_DROP ? - 1 : 0;
return nf_conntrack_confirm(skb);
}
static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
int err;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return 0;
return NF_ACCEPT;
if (!nf_ct_is_confirmed(ct)) {
err = __nf_conntrack_update(net, skb, ct, ctinfo);
if (err < 0)
return err;
int ret = __nf_conntrack_update(net, skb, ct, ctinfo);
if (ret != NF_ACCEPT)
return ret;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return NF_ACCEPT;
}
return nf_confirm_cthelper(skb, ct, ctinfo);

View File

@ -999,11 +999,12 @@ static unsigned int
nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
unsigned int ret;
unsigned int ret, verdict;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
ret = nf_nat_ipv6_fn(priv, skb, state);
if (ret != NF_DROP && ret != NF_STOLEN &&
verdict = ret & NF_VERDICT_MASK;
if (verdict != NF_DROP && verdict != NF_STOLEN &&
ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
skb_dst_drop(skb);

View File

@ -115,7 +115,7 @@ static noinline void __nft_trace_verdict(const struct nft_pktinfo *pkt,
{
enum nft_trace_types type;
switch (regs->verdict.code) {
switch (regs->verdict.code & NF_VERDICT_MASK) {
case NFT_CONTINUE:
case NFT_RETURN:
type = NFT_TRACETYPE_RETURN;
@ -308,10 +308,11 @@ next_rule:
switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
case NF_STOLEN:
return regs.verdict.code;
case NF_DROP:
return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM);
}
switch (regs.verdict.code) {
@ -342,6 +343,9 @@ next_rule:
if (static_branch_unlikely(&nft_counters_enabled))
nft_update_chain_stats(basechain, pkt);
if (nft_base_chain(basechain)->policy == NF_DROP)
return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM);
return nft_base_chain(basechain)->policy;
}
EXPORT_SYMBOL_GPL(nft_do_chain);

View File

@ -258,17 +258,21 @@ void nft_trace_notify(const struct nft_pktinfo *pkt,
case __NFT_TRACETYPE_MAX:
break;
case NFT_TRACETYPE_RETURN:
case NFT_TRACETYPE_RULE:
case NFT_TRACETYPE_RULE: {
unsigned int v;
if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict))
goto nla_put_failure;
/* pkt->skb undefined iff NF_STOLEN, disable dump */
if (verdict->code == NF_STOLEN)
v = verdict->code & NF_VERDICT_MASK;
if (v == NF_STOLEN)
info->packet_dumped = true;
else
mark = pkt->skb->mark;
break;
}
case NFT_TRACETYPE_POLICY:
mark = pkt->skb->mark;

View File

@ -228,19 +228,22 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
const struct nf_ct_hook *ct_hook;
int err;
if (verdict == NF_ACCEPT ||
verdict == NF_REPEAT ||
verdict == NF_STOP) {
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
if (ct_hook) {
err = ct_hook->update(entry->state.net, entry->skb);
if (err < 0)
verdict = NF_DROP;
}
if (ct_hook)
verdict = ct_hook->update(entry->state.net, entry->skb);
rcu_read_unlock();
switch (verdict & NF_VERDICT_MASK) {
case NF_STOLEN:
nf_queue_entry_free(entry);
return;
}
}
nf_reinject(entry, verdict);
}

View File

@ -1549,12 +1549,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
* @_set: nftables API set representation
* @set: nftables API set representation
* @m: Matching data
*/
static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
int rules_f0, first_rule = 0;
@ -1672,7 +1671,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
* We also need to create a new working copy for subsequent insertions and
* deletions.
*/
static void nft_pipapo_commit(const struct nft_set *set)
static void nft_pipapo_commit(struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *new_clone, *old;