mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 10:43:43 +00:00
net: dst: Switch to rcuref_t reference counting
Under high contention dst_entry::__refcnt becomes a significant bottleneck. atomic_inc_not_zero() is implemented with a cmpxchg() loop, which goes into high retry rates on contention. Switch the reference count to rcuref_t which results in a significant performance gain. Rename the reference count member to __rcuref to reflect the change. The gain depends on the micro-architecture and the number of concurrent operations and has been measured in the range of +25% to +130% with a localhost memtier/memcached benchmark which amplifies the problem massively. Running the memtier/memcached benchmark over a real (1Gb) network connection the conversion on top of the false sharing fix for struct dst_entry::__refcnt results in a total gain in the 2%-5% range over the upstream baseline. Reported-by: Wangyang Guo <wangyang.guo@intel.com> Reported-by: Arjan Van De Ven <arjan.van.de.ven@intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20230307125538.989175656@linutronix.de Link: https://lore.kernel.org/r/20230323102800.215027837@linutronix.de Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
d288a162dd
commit
bc9d3a9f2a
@ -16,6 +16,7 @@
|
|||||||
#include <linux/bug.h>
|
#include <linux/bug.h>
|
||||||
#include <linux/jiffies.h>
|
#include <linux/jiffies.h>
|
||||||
#include <linux/refcount.h>
|
#include <linux/refcount.h>
|
||||||
|
#include <linux/rcuref.h>
|
||||||
#include <net/neighbour.h>
|
#include <net/neighbour.h>
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <linux/indirect_call_wrapper.h>
|
#include <linux/indirect_call_wrapper.h>
|
||||||
@ -61,11 +62,11 @@ struct dst_entry {
|
|||||||
unsigned short trailer_len; /* space to reserve at tail */
|
unsigned short trailer_len; /* space to reserve at tail */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* __refcnt wants to be on a different cache line from
|
* __rcuref wants to be on a different cache line from
|
||||||
* input/output/ops or performance tanks badly
|
* input/output/ops or performance tanks badly
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_64BIT
|
#ifdef CONFIG_64BIT
|
||||||
atomic_t __refcnt; /* 64-bit offset 64 */
|
rcuref_t __rcuref; /* 64-bit offset 64 */
|
||||||
#endif
|
#endif
|
||||||
int __use;
|
int __use;
|
||||||
unsigned long lastuse;
|
unsigned long lastuse;
|
||||||
@ -75,16 +76,16 @@ struct dst_entry {
|
|||||||
__u32 tclassid;
|
__u32 tclassid;
|
||||||
#ifndef CONFIG_64BIT
|
#ifndef CONFIG_64BIT
|
||||||
struct lwtunnel_state *lwtstate;
|
struct lwtunnel_state *lwtstate;
|
||||||
atomic_t __refcnt; /* 32-bit offset 64 */
|
rcuref_t __rcuref; /* 32-bit offset 64 */
|
||||||
#endif
|
#endif
|
||||||
netdevice_tracker dev_tracker;
|
netdevice_tracker dev_tracker;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Used by rtable and rt6_info. Moves lwtstate into the next cache
|
* Used by rtable and rt6_info. Moves lwtstate into the next cache
|
||||||
* line on 64bit so that lwtstate does not cause false sharing with
|
* line on 64bit so that lwtstate does not cause false sharing with
|
||||||
* __refcnt under contention of __refcnt. This also puts the
|
* __rcuref under contention of __rcuref. This also puts the
|
||||||
* frequently accessed members of rtable and rt6_info out of the
|
* frequently accessed members of rtable and rt6_info out of the
|
||||||
* __refcnt cache line.
|
* __rcuref cache line.
|
||||||
*/
|
*/
|
||||||
struct list_head rt_uncached;
|
struct list_head rt_uncached;
|
||||||
struct uncached_list *rt_uncached_list;
|
struct uncached_list *rt_uncached_list;
|
||||||
@ -238,10 +239,10 @@ static inline void dst_hold(struct dst_entry *dst)
|
|||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If your kernel compilation stops here, please check
|
* If your kernel compilation stops here, please check
|
||||||
* the placement of __refcnt in struct dst_entry
|
* the placement of __rcuref in struct dst_entry
|
||||||
*/
|
*/
|
||||||
BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
|
BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63);
|
||||||
WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0);
|
WARN_ON(!rcuref_get(&dst->__rcuref));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
|
static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
|
||||||
@ -305,7 +306,7 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
|
|||||||
*/
|
*/
|
||||||
static inline bool dst_hold_safe(struct dst_entry *dst)
|
static inline bool dst_hold_safe(struct dst_entry *dst)
|
||||||
{
|
{
|
||||||
return atomic_inc_not_zero(&dst->__refcnt);
|
return rcuref_get(&dst->__rcuref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2131,7 +2131,7 @@ sk_dst_get(struct sock *sk)
|
|||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
dst = rcu_dereference(sk->sk_dst_cache);
|
dst = rcu_dereference(sk->sk_dst_cache);
|
||||||
if (dst && !atomic_inc_not_zero(&dst->__refcnt))
|
if (dst && !rcuref_get(&dst->__rcuref))
|
||||||
dst = NULL;
|
dst = NULL;
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return dst;
|
return dst;
|
||||||
|
@ -73,7 +73,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
|
|||||||
{
|
{
|
||||||
struct rtable *rt = &br->fake_rtable;
|
struct rtable *rt = &br->fake_rtable;
|
||||||
|
|
||||||
atomic_set(&rt->dst.__refcnt, 1);
|
rcuref_init(&rt->dst.__rcuref, 1);
|
||||||
rt->dst.dev = br->dev;
|
rt->dst.dev = br->dev;
|
||||||
dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
|
dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
|
||||||
rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
|
rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
|
||||||
|
@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
|
|||||||
dst->tclassid = 0;
|
dst->tclassid = 0;
|
||||||
#endif
|
#endif
|
||||||
dst->lwtstate = NULL;
|
dst->lwtstate = NULL;
|
||||||
atomic_set(&dst->__refcnt, initial_ref);
|
rcuref_init(&dst->__rcuref, initial_ref);
|
||||||
dst->__use = 0;
|
dst->__use = 0;
|
||||||
dst->lastuse = jiffies;
|
dst->lastuse = jiffies;
|
||||||
dst->flags = flags;
|
dst->flags = flags;
|
||||||
@ -162,32 +162,16 @@ EXPORT_SYMBOL(dst_dev_put);
|
|||||||
|
|
||||||
void dst_release(struct dst_entry *dst)
|
void dst_release(struct dst_entry *dst)
|
||||||
{
|
{
|
||||||
if (dst) {
|
if (dst && rcuref_put(&dst->__rcuref))
|
||||||
int newrefcnt;
|
|
||||||
|
|
||||||
newrefcnt = atomic_dec_return(&dst->__refcnt);
|
|
||||||
if (WARN_ONCE(newrefcnt < 0, "dst_release underflow"))
|
|
||||||
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
|
|
||||||
__func__, dst, newrefcnt);
|
|
||||||
if (!newrefcnt)
|
|
||||||
call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
|
call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(dst_release);
|
EXPORT_SYMBOL(dst_release);
|
||||||
|
|
||||||
void dst_release_immediate(struct dst_entry *dst)
|
void dst_release_immediate(struct dst_entry *dst)
|
||||||
{
|
{
|
||||||
if (dst) {
|
if (dst && rcuref_put(&dst->__rcuref))
|
||||||
int newrefcnt;
|
|
||||||
|
|
||||||
newrefcnt = atomic_dec_return(&dst->__refcnt);
|
|
||||||
if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow"))
|
|
||||||
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
|
|
||||||
__func__, dst, newrefcnt);
|
|
||||||
if (!newrefcnt)
|
|
||||||
dst_destroy(dst);
|
dst_destroy(dst);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(dst_release_immediate);
|
EXPORT_SYMBOL(dst_release_immediate);
|
||||||
|
|
||||||
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
|
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
|
||||||
|
@ -843,7 +843,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
|
|||||||
if (dst) {
|
if (dst) {
|
||||||
ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse);
|
ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse);
|
||||||
ci.rta_used = dst->__use;
|
ci.rta_used = dst->__use;
|
||||||
ci.rta_clntref = atomic_read(&dst->__refcnt);
|
ci.rta_clntref = rcuref_read(&dst->__rcuref);
|
||||||
}
|
}
|
||||||
if (expires) {
|
if (expires) {
|
||||||
unsigned long clock;
|
unsigned long clock;
|
||||||
|
@ -293,7 +293,7 @@ static const struct fib6_info fib6_null_entry_template = {
|
|||||||
|
|
||||||
static const struct rt6_info ip6_null_entry_template = {
|
static const struct rt6_info ip6_null_entry_template = {
|
||||||
.dst = {
|
.dst = {
|
||||||
.__refcnt = ATOMIC_INIT(1),
|
.__rcuref = RCUREF_INIT(1),
|
||||||
.__use = 1,
|
.__use = 1,
|
||||||
.obsolete = DST_OBSOLETE_FORCE_CHK,
|
.obsolete = DST_OBSOLETE_FORCE_CHK,
|
||||||
.error = -ENETUNREACH,
|
.error = -ENETUNREACH,
|
||||||
@ -307,7 +307,7 @@ static const struct rt6_info ip6_null_entry_template = {
|
|||||||
|
|
||||||
static const struct rt6_info ip6_prohibit_entry_template = {
|
static const struct rt6_info ip6_prohibit_entry_template = {
|
||||||
.dst = {
|
.dst = {
|
||||||
.__refcnt = ATOMIC_INIT(1),
|
.__rcuref = RCUREF_INIT(1),
|
||||||
.__use = 1,
|
.__use = 1,
|
||||||
.obsolete = DST_OBSOLETE_FORCE_CHK,
|
.obsolete = DST_OBSOLETE_FORCE_CHK,
|
||||||
.error = -EACCES,
|
.error = -EACCES,
|
||||||
@ -319,7 +319,7 @@ static const struct rt6_info ip6_prohibit_entry_template = {
|
|||||||
|
|
||||||
static const struct rt6_info ip6_blk_hole_entry_template = {
|
static const struct rt6_info ip6_blk_hole_entry_template = {
|
||||||
.dst = {
|
.dst = {
|
||||||
.__refcnt = ATOMIC_INIT(1),
|
.__rcuref = RCUREF_INIT(1),
|
||||||
.__use = 1,
|
.__use = 1,
|
||||||
.obsolete = DST_OBSOLETE_FORCE_CHK,
|
.obsolete = DST_OBSOLETE_FORCE_CHK,
|
||||||
.error = -EINVAL,
|
.error = -EINVAL,
|
||||||
|
@ -339,7 +339,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
|
|||||||
spin_unlock_bh(&dest->dst_lock);
|
spin_unlock_bh(&dest->dst_lock);
|
||||||
IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
|
IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
|
||||||
&dest->addr.ip, &dest_dst->dst_saddr.ip,
|
&dest->addr.ip, &dest_dst->dst_saddr.ip,
|
||||||
atomic_read(&rt->dst.__refcnt));
|
rcuref_read(&rt->dst.__rcuref));
|
||||||
}
|
}
|
||||||
if (ret_saddr)
|
if (ret_saddr)
|
||||||
*ret_saddr = dest_dst->dst_saddr.ip;
|
*ret_saddr = dest_dst->dst_saddr.ip;
|
||||||
@ -507,7 +507,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
|
|||||||
spin_unlock_bh(&dest->dst_lock);
|
spin_unlock_bh(&dest->dst_lock);
|
||||||
IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
|
IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
|
||||||
&dest->addr.in6, &dest_dst->dst_saddr.in6,
|
&dest->addr.in6, &dest_dst->dst_saddr.in6,
|
||||||
atomic_read(&rt->dst.__refcnt));
|
rcuref_read(&rt->dst.__rcuref));
|
||||||
}
|
}
|
||||||
if (ret_saddr)
|
if (ret_saddr)
|
||||||
*ret_saddr = dest_dst->dst_saddr.in6;
|
*ret_saddr = dest_dst->dst_saddr.in6;
|
||||||
|
Loading…
Reference in New Issue
Block a user