inetpeer: do not get a refcount in inet_getpeer()

All inet_getpeer() callers except ip4_frag_init() don't need
to acquire a permanent refcount on the inetpeer.

They can switch to full RCU protection.

Move the refcount_inc_not_zero() into ip4_frag_init(),
so that all the other callers no longer have to
perform a pair of expensive atomic operations on
a possibly contended cache line.

inet_putpeer() no longer needs to be exported.

After this patch, my DUT can receive 8,400,000 UDP packets
per second targeting closed ports, using 50% less cpu cycles
than before.

Also change two calls to l3mdev_master_ifindex() by
l3mdev_master_ifindex_rcu() (Ido ideas)

Fixes: 8c2bd38b95f7 ("icmp: change the order of rate limits")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241215175629.1248773-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Eric Dumazet 2024-12-15 17:56:29 +00:00 committed by Jakub Kicinski
parent 50b362f21d
commit a853c60950
7 changed files with 32 additions and 29 deletions

View File

@ -312,7 +312,6 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
struct dst_entry *dst = &rt->dst; struct dst_entry *dst = &rt->dst;
struct inet_peer *peer; struct inet_peer *peer;
bool rc = true; bool rc = true;
int vif;
if (!apply_ratelimit) if (!apply_ratelimit)
return true; return true;
@ -321,12 +320,12 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
goto out; goto out;
vif = l3mdev_master_ifindex(dst->dev); rcu_read_lock();
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
l3mdev_master_ifindex_rcu(dst->dev));
rc = inet_peer_xrlim_allow(peer, rc = inet_peer_xrlim_allow(peer,
READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
if (peer) rcu_read_unlock();
inet_putpeer(peer);
out: out:
if (!rc) if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);

View File

@ -109,8 +109,6 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
p = rb_entry(parent, struct inet_peer, rb_node); p = rb_entry(parent, struct inet_peer, rb_node);
cmp = inetpeer_addr_cmp(daddr, &p->daddr); cmp = inetpeer_addr_cmp(daddr, &p->daddr);
if (cmp == 0) { if (cmp == 0) {
if (!refcount_inc_not_zero(&p->refcnt))
break;
now = jiffies; now = jiffies;
if (READ_ONCE(p->dtime) != now) if (READ_ONCE(p->dtime) != now)
WRITE_ONCE(p->dtime, now); WRITE_ONCE(p->dtime, now);
@ -169,6 +167,7 @@ static void inet_peer_gc(struct inet_peer_base *base,
} }
} }
/* Must be called under RCU : No refcount change is done here. */
struct inet_peer *inet_getpeer(struct inet_peer_base *base, struct inet_peer *inet_getpeer(struct inet_peer_base *base,
const struct inetpeer_addr *daddr) const struct inetpeer_addr *daddr)
{ {
@ -179,10 +178,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
/* Attempt a lockless lookup first. /* Attempt a lockless lookup first.
* Because of a concurrent writer, we might not find an existing entry. * Because of a concurrent writer, we might not find an existing entry.
*/ */
rcu_read_lock();
seq = read_seqbegin(&base->lock); seq = read_seqbegin(&base->lock);
p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
rcu_read_unlock();
if (p) if (p)
return p; return p;
@ -200,7 +197,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
if (p) { if (p) {
p->daddr = *daddr; p->daddr = *daddr;
p->dtime = (__u32)jiffies; p->dtime = (__u32)jiffies;
refcount_set(&p->refcnt, 2); refcount_set(&p->refcnt, 1);
atomic_set(&p->rid, 0); atomic_set(&p->rid, 0);
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0; p->rate_tokens = 0;
@ -228,7 +225,6 @@ void inet_putpeer(struct inet_peer *p)
if (refcount_dec_and_test(&p->refcnt)) if (refcount_dec_and_test(&p->refcnt))
kfree_rcu(p, rcu); kfree_rcu(p, rcu);
} }
EXPORT_SYMBOL_GPL(inet_putpeer);
/* /*
* Check transmit rate limitation for given message. * Check transmit rate limitation for given message.

View File

@ -82,15 +82,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
static void ip4_frag_init(struct inet_frag_queue *q, const void *a) static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{ {
struct ipq *qp = container_of(q, struct ipq, q); struct ipq *qp = container_of(q, struct ipq, q);
struct net *net = q->fqdir->net;
const struct frag_v4_compare_key *key = a; const struct frag_v4_compare_key *key = a;
struct net *net = q->fqdir->net;
struct inet_peer *p = NULL;
q->key.v4 = *key; q->key.v4 = *key;
qp->ecn = 0; qp->ecn = 0;
qp->peer = q->fqdir->max_dist ? if (q->fqdir->max_dist) {
inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif) : rcu_read_lock();
NULL; p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif);
if (p && !refcount_inc_not_zero(&p->refcnt))
p = NULL;
rcu_read_unlock();
}
qp->peer = p;
} }
static void ip4_frag_free(struct inet_frag_queue *q) static void ip4_frag_free(struct inet_frag_queue *q)

View File

@ -870,11 +870,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
} }
log_martians = IN_DEV_LOG_MARTIANS(in_dev); log_martians = IN_DEV_LOG_MARTIANS(in_dev);
vif = l3mdev_master_ifindex_rcu(rt->dst.dev); vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
rcu_read_unlock();
net = dev_net(rt->dst.dev); net = dev_net(rt->dst.dev);
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif); peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif);
if (!peer) { if (!peer) {
rcu_read_unlock();
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
rt_nexthop(rt, ip_hdr(skb)->daddr)); rt_nexthop(rt, ip_hdr(skb)->daddr));
return; return;
@ -893,7 +893,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
*/ */
if (peer->n_redirects >= ip_rt_redirect_number) { if (peer->n_redirects >= ip_rt_redirect_number) {
peer->rate_last = jiffies; peer->rate_last = jiffies;
goto out_put_peer; goto out_unlock;
} }
/* Check for load limit; set rate_last to the latest sent /* Check for load limit; set rate_last to the latest sent
@ -914,8 +914,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
&ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw); &ip_hdr(skb)->daddr, &gw);
} }
out_put_peer: out_unlock:
inet_putpeer(peer); rcu_read_unlock();
} }
static int ip_error(struct sk_buff *skb) static int ip_error(struct sk_buff *skb)
@ -975,9 +975,9 @@ static int ip_error(struct sk_buff *skb)
break; break;
} }
rcu_read_lock();
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
l3mdev_master_ifindex(skb->dev)); l3mdev_master_ifindex_rcu(skb->dev));
send = true; send = true;
if (peer) { if (peer) {
now = jiffies; now = jiffies;
@ -989,8 +989,9 @@ static int ip_error(struct sk_buff *skb)
peer->rate_tokens -= ip_rt_error_cost; peer->rate_tokens -= ip_rt_error_cost;
else else
send = false; send = false;
inet_putpeer(peer);
} }
rcu_read_unlock();
if (send) if (send)
icmp_send(skb, ICMP_DEST_UNREACH, code, 0); icmp_send(skb, ICMP_DEST_UNREACH, code, 0);

View File

@ -222,10 +222,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (rt->rt6i_dst.plen < 128) if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5); tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
rcu_read_lock();
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
res = inet_peer_xrlim_allow(peer, tmo); res = inet_peer_xrlim_allow(peer, tmo);
if (peer) rcu_read_unlock();
inet_putpeer(peer);
} }
if (!res) if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst), __ICMP6_INC_STATS(net, ip6_dst_idev(dst),

View File

@ -613,6 +613,7 @@ int ip6_forward(struct sk_buff *skb)
else else
target = &hdr->daddr; target = &hdr->daddr;
rcu_read_lock();
peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr); peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
/* Limit redirects both by destination (here) /* Limit redirects both by destination (here)
@ -620,8 +621,7 @@ int ip6_forward(struct sk_buff *skb)
*/ */
if (inet_peer_xrlim_allow(peer, 1*HZ)) if (inet_peer_xrlim_allow(peer, 1*HZ))
ndisc_send_redirect(skb, target); ndisc_send_redirect(skb, target);
if (peer) rcu_read_unlock();
inet_putpeer(peer);
} else { } else {
int addrtype = ipv6_addr_type(&hdr->saddr); int addrtype = ipv6_addr_type(&hdr->saddr);

View File

@ -1731,10 +1731,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
"Redirect: destination is not a neighbour\n"); "Redirect: destination is not a neighbour\n");
goto release; goto release;
} }
rcu_read_lock();
peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr); peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
ret = inet_peer_xrlim_allow(peer, 1*HZ); ret = inet_peer_xrlim_allow(peer, 1*HZ);
if (peer) rcu_read_unlock();
inet_putpeer(peer);
if (!ret) if (!ret)
goto release; goto release;