mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-10 07:50:04 +00:00
2b77bdde97
Profiles show false sharing in addr_compare() because refcnt/dtime changes dirty the first inet_peer cache line, where are lying the keys used at lookup time. If many cpus are calling inet_getpeer() and inet_putpeer(), or need frag ids, addr_compare() is in 2nd position in "perf top". Before patch, my udpflood bench (16 threads) on my 2x4x2 machine : 5784.00 9.7% csum_partial_copy_generic [kernel] 3356.00 5.6% addr_compare [kernel] 2638.00 4.4% fib_table_lookup [kernel] 2625.00 4.4% ip_fragment [kernel] 1934.00 3.2% neigh_lookup [kernel] 1617.00 2.7% udp_sendmsg [kernel] 1608.00 2.7% __ip_route_output_key [kernel] 1480.00 2.5% __ip_append_data [kernel] 1396.00 2.3% kfree [kernel] 1195.00 2.0% kmem_cache_free [kernel] 1157.00 1.9% inet_getpeer [kernel] 1121.00 1.9% neigh_resolve_output [kernel] 1012.00 1.7% dev_queue_xmit [kernel] # time ./udpflood.sh real 0m44.511s user 0m20.020s sys 11m22.780s # time ./udpflood.sh real 0m44.099s user 0m20.140s sys 11m15.870s After patch, no more addr_compare() in profiles : 4171.00 10.7% csum_partial_copy_generic [kernel] 1787.00 4.6% fib_table_lookup [kernel] 1756.00 4.5% ip_fragment [kernel] 1234.00 3.2% udp_sendmsg [kernel] 1191.00 3.0% neigh_lookup [kernel] 1118.00 2.9% __ip_append_data [kernel] 1022.00 2.6% kfree [kernel] 993.00 2.5% __ip_route_output_key [kernel] 841.00 2.2% neigh_resolve_output [kernel] 816.00 2.1% kmem_cache_free [kernel] 658.00 1.7% ia32_sysenter_target [kernel] 632.00 1.6% kmem_cache_alloc_node [kernel] # time ./udpflood.sh real 0m41.587s user 0m19.190s sys 10m36.370s # time ./udpflood.sh real 0m41.486s user 0m19.290s sys 10m33.650s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
117 lines
2.9 KiB
C
117 lines
2.9 KiB
C
/*
|
|
* INETPEER - A storage for permanent information about peers
|
|
*
|
|
* Authors: Andrey V. Savochkin <saw@msu.ru>
|
|
*/
|
|
|
|
#ifndef _NET_INETPEER_H
|
|
#define _NET_INETPEER_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/init.h>
|
|
#include <linux/jiffies.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/rtnetlink.h>
|
|
#include <net/ipv6.h>
|
|
#include <asm/atomic.h>
|
|
|
|
struct inetpeer_addr_base {
|
|
union {
|
|
__be32 a4;
|
|
__be32 a6[4];
|
|
};
|
|
};
|
|
|
|
struct inetpeer_addr {
|
|
struct inetpeer_addr_base addr;
|
|
__u16 family;
|
|
};
|
|
|
|
struct inet_peer {
|
|
/* group together avl_left,avl_right,v4daddr to speedup lookups */
|
|
struct inet_peer __rcu *avl_left, *avl_right;
|
|
struct inetpeer_addr daddr;
|
|
__u32 avl_height;
|
|
|
|
u32 metrics[RTAX_MAX];
|
|
u32 rate_tokens; /* rate limiting for ICMP */
|
|
unsigned long rate_last;
|
|
unsigned long pmtu_expires;
|
|
u32 pmtu_orig;
|
|
u32 pmtu_learned;
|
|
struct inetpeer_addr_base redirect_learned;
|
|
/*
|
|
* Once inet_peer is queued for deletion (refcnt == -1), following fields
|
|
* are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
|
|
* We can share memory with rcu_head to help keep inet_peer small.
|
|
*/
|
|
union {
|
|
struct {
|
|
atomic_t rid; /* Frag reception counter */
|
|
atomic_t ip_id_count; /* IP ID for the next packet */
|
|
__u32 tcp_ts;
|
|
__u32 tcp_ts_stamp;
|
|
};
|
|
struct rcu_head rcu;
|
|
struct inet_peer *gc_next;
|
|
};
|
|
|
|
/* following fields might be frequently dirtied */
|
|
__u32 dtime; /* the time of last use of not referenced entries */
|
|
atomic_t refcnt;
|
|
};
|
|
|
|
void inet_initpeers(void) __init;
|
|
|
|
#define INETPEER_METRICS_NEW (~(u32) 0)
|
|
|
|
static inline bool inet_metrics_new(const struct inet_peer *p)
|
|
{
|
|
return p->metrics[RTAX_LOCK-1] == INETPEER_METRICS_NEW;
|
|
}
|
|
|
|
/* can be called with or without local BH being disabled */
|
|
struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create);
|
|
|
|
static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
|
|
{
|
|
struct inetpeer_addr daddr;
|
|
|
|
daddr.addr.a4 = v4daddr;
|
|
daddr.family = AF_INET;
|
|
return inet_getpeer(&daddr, create);
|
|
}
|
|
|
|
static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create)
|
|
{
|
|
struct inetpeer_addr daddr;
|
|
|
|
ipv6_addr_copy((struct in6_addr *)daddr.addr.a6, v6daddr);
|
|
daddr.family = AF_INET6;
|
|
return inet_getpeer(&daddr, create);
|
|
}
|
|
|
|
/* can be called from BH context or outside */
|
|
extern void inet_putpeer(struct inet_peer *p);
|
|
extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
|
|
|
|
/*
|
|
* temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
|
|
* tcp_ts_stamp if no refcount is taken on inet_peer
|
|
*/
|
|
static inline void inet_peer_refcheck(const struct inet_peer *p)
|
|
{
|
|
WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
|
|
}
|
|
|
|
|
|
/* can be called with or without local BH being disabled */
|
|
static inline __u16 inet_getid(struct inet_peer *p, int more)
|
|
{
|
|
more++;
|
|
inet_peer_refcheck(p);
|
|
return atomic_add_return(more, &p->ip_id_count) - more;
|
|
}
|
|
|
|
#endif /* _NET_INETPEER_H */
|