mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 23:39:18 +00:00
[NET]: Define infrastructure to keep 'inuse' changes in an efficent SMP/NUMA way.
"struct proto" currently uses an array stats[NR_CPUS] to track change on 'inuse' sockets per protocol. If NR_CPUS is big, this means we use a big memory area for this. Moreover, all this memory area is located on a single node on NUMA machines, increasing memory pressure on the boot node. In this patch, I tried to : - Keep a fast !CONFIG_SMP implementation - Keep a fast CONFIG_SMP implementation for often used protocols (tcp,udp,raw,...) - Introduce a NUMA efficient implementation Some helper macros are defined in include/net/sock.h These macros take into account CONFIG_SMP If a "struct proto" is declared without using DEFINE_PROTO_INUSE / REF_PROTO_INUSE macros, it will automatically use a default implementation, using a dynamically allocated percpu zone. This default implementation will be NUMA efficient, but might use 32/64 bytes per possible cpu because of current alloc_percpu() implementation. However it still should be better than previous implementation based on stats[NR_CPUS] field. When a "struct proto" is changed to use the new macros, we use a single static "int" percpu variable, lowering the memory and cpu costs, still preserving NUMA efficiency. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
91781004b9
commit
286ab3d460
@ -560,6 +560,14 @@ struct proto {
|
|||||||
void (*unhash)(struct sock *sk);
|
void (*unhash)(struct sock *sk);
|
||||||
int (*get_port)(struct sock *sk, unsigned short snum);
|
int (*get_port)(struct sock *sk, unsigned short snum);
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
/* Keeping track of sockets in use */
|
||||||
|
void (*inuse_add)(struct proto *prot, int inc);
|
||||||
|
int (*inuse_getval)(const struct proto *prot);
|
||||||
|
int *inuse_ptr;
|
||||||
|
#else
|
||||||
|
int inuse;
|
||||||
|
#endif
|
||||||
/* Memory pressure */
|
/* Memory pressure */
|
||||||
void (*enter_memory_pressure)(void);
|
void (*enter_memory_pressure)(void);
|
||||||
atomic_t *memory_allocated; /* Current allocated memory. */
|
atomic_t *memory_allocated; /* Current allocated memory. */
|
||||||
@ -592,12 +600,38 @@ struct proto {
|
|||||||
#ifdef SOCK_REFCNT_DEBUG
|
#ifdef SOCK_REFCNT_DEBUG
|
||||||
atomic_t socks;
|
atomic_t socks;
|
||||||
#endif
|
#endif
|
||||||
struct {
|
|
||||||
int inuse;
|
|
||||||
u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
|
|
||||||
} stats[NR_CPUS];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Special macros to let protos use a fast version of inuse{get|add}
|
||||||
|
* using a static percpu variable per proto instead of an allocated one,
|
||||||
|
* saving one dereference.
|
||||||
|
* This might be changed if/when dynamic percpu vars become fast.
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
# define DEFINE_PROTO_INUSE(NAME) \
|
||||||
|
static DEFINE_PER_CPU(int, NAME##_inuse); \
|
||||||
|
static void NAME##_inuse_add(struct proto *prot, int inc) \
|
||||||
|
{ \
|
||||||
|
__get_cpu_var(NAME##_inuse) += inc; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static int NAME##_inuse_getval(const struct proto *prot)\
|
||||||
|
{ \
|
||||||
|
int res = 0, cpu; \
|
||||||
|
\
|
||||||
|
for_each_possible_cpu(cpu) \
|
||||||
|
res += per_cpu(NAME##_inuse, cpu); \
|
||||||
|
return res; \
|
||||||
|
}
|
||||||
|
# define REF_PROTO_INUSE(NAME) \
|
||||||
|
.inuse_add = NAME##_inuse_add, \
|
||||||
|
.inuse_getval = NAME##_inuse_getval,
|
||||||
|
#else
|
||||||
|
# define DEFINE_PROTO_INUSE(NAME)
|
||||||
|
# define REF_PROTO_INUSE(NAME)
|
||||||
|
#endif
|
||||||
|
|
||||||
extern int proto_register(struct proto *prot, int alloc_slab);
|
extern int proto_register(struct proto *prot, int alloc_slab);
|
||||||
extern void proto_unregister(struct proto *prot);
|
extern void proto_unregister(struct proto *prot);
|
||||||
|
|
||||||
@ -629,12 +663,29 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
|
|||||||
/* Called with local bh disabled */
|
/* Called with local bh disabled */
|
||||||
static __inline__ void sock_prot_inc_use(struct proto *prot)
|
static __inline__ void sock_prot_inc_use(struct proto *prot)
|
||||||
{
|
{
|
||||||
prot->stats[smp_processor_id()].inuse++;
|
#ifdef CONFIG_SMP
|
||||||
|
prot->inuse_add(prot, 1);
|
||||||
|
#else
|
||||||
|
prot->inuse++;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void sock_prot_dec_use(struct proto *prot)
|
static __inline__ void sock_prot_dec_use(struct proto *prot)
|
||||||
{
|
{
|
||||||
prot->stats[smp_processor_id()].inuse--;
|
#ifdef CONFIG_SMP
|
||||||
|
prot->inuse_add(prot, -1);
|
||||||
|
#else
|
||||||
|
prot->inuse--;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ int sock_prot_inuse(struct proto *proto)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
return proto->inuse_getval(proto);
|
||||||
|
#else
|
||||||
|
return proto->inuse;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* With per-bucket locks this operation is not-atomic, so that
|
/* With per-bucket locks this operation is not-atomic, so that
|
||||||
|
@ -1801,12 +1801,41 @@ EXPORT_SYMBOL(sk_common_release);
|
|||||||
static DEFINE_RWLOCK(proto_list_lock);
|
static DEFINE_RWLOCK(proto_list_lock);
|
||||||
static LIST_HEAD(proto_list);
|
static LIST_HEAD(proto_list);
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
/*
|
||||||
|
* Define default functions to keep track of inuse sockets per protocol
|
||||||
|
* Note that often used protocols use dedicated functions to get a speed increase.
|
||||||
|
* (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE)
|
||||||
|
*/
|
||||||
|
static void inuse_add(struct proto *prot, int inc)
|
||||||
|
{
|
||||||
|
per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int inuse_get(const struct proto *prot)
|
||||||
|
{
|
||||||
|
int res = 0, cpu;
|
||||||
|
for_each_possible_cpu(cpu)
|
||||||
|
res += per_cpu_ptr(prot->inuse_ptr, cpu)[0];
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int proto_register(struct proto *prot, int alloc_slab)
|
int proto_register(struct proto *prot, int alloc_slab)
|
||||||
{
|
{
|
||||||
char *request_sock_slab_name = NULL;
|
char *request_sock_slab_name = NULL;
|
||||||
char *timewait_sock_slab_name;
|
char *timewait_sock_slab_name;
|
||||||
int rc = -ENOBUFS;
|
int rc = -ENOBUFS;
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
if (!prot->inuse_getval || !prot->inuse_add) {
|
||||||
|
prot->inuse_ptr = alloc_percpu(int);
|
||||||
|
if (prot->inuse_ptr == NULL)
|
||||||
|
goto out;
|
||||||
|
prot->inuse_getval = inuse_get;
|
||||||
|
prot->inuse_add = inuse_add;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (alloc_slab) {
|
if (alloc_slab) {
|
||||||
prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
|
prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
|
||||||
SLAB_HWCACHE_ALIGN, NULL);
|
SLAB_HWCACHE_ALIGN, NULL);
|
||||||
@ -1814,7 +1843,7 @@ int proto_register(struct proto *prot, int alloc_slab)
|
|||||||
if (prot->slab == NULL) {
|
if (prot->slab == NULL) {
|
||||||
printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
|
printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
|
||||||
prot->name);
|
prot->name);
|
||||||
goto out;
|
goto out_free_inuse;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prot->rsk_prot != NULL) {
|
if (prot->rsk_prot != NULL) {
|
||||||
@ -1873,6 +1902,15 @@ out_free_request_sock_slab_name:
|
|||||||
out_free_sock_slab:
|
out_free_sock_slab:
|
||||||
kmem_cache_destroy(prot->slab);
|
kmem_cache_destroy(prot->slab);
|
||||||
prot->slab = NULL;
|
prot->slab = NULL;
|
||||||
|
out_free_inuse:
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
if (prot->inuse_ptr != NULL) {
|
||||||
|
free_percpu(prot->inuse_ptr);
|
||||||
|
prot->inuse_ptr = NULL;
|
||||||
|
prot->inuse_getval = NULL;
|
||||||
|
prot->inuse_add = NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1884,6 +1922,14 @@ void proto_unregister(struct proto *prot)
|
|||||||
list_del(&prot->node);
|
list_del(&prot->node);
|
||||||
write_unlock(&proto_list_lock);
|
write_unlock(&proto_list_lock);
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
if (prot->inuse_ptr != NULL) {
|
||||||
|
free_percpu(prot->inuse_ptr);
|
||||||
|
prot->inuse_ptr = NULL;
|
||||||
|
prot->inuse_getval = NULL;
|
||||||
|
prot->inuse_add = NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (prot->slab != NULL) {
|
if (prot->slab != NULL) {
|
||||||
kmem_cache_destroy(prot->slab);
|
kmem_cache_destroy(prot->slab);
|
||||||
prot->slab = NULL;
|
prot->slab = NULL;
|
||||||
|
@ -46,17 +46,6 @@
|
|||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
#include <net/raw.h>
|
#include <net/raw.h>
|
||||||
|
|
||||||
static int fold_prot_inuse(struct proto *proto)
|
|
||||||
{
|
|
||||||
int res = 0;
|
|
||||||
int cpu;
|
|
||||||
|
|
||||||
for_each_possible_cpu(cpu)
|
|
||||||
res += proto->stats[cpu].inuse;
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Report socket allocation statistics [mea@utu.fi]
|
* Report socket allocation statistics [mea@utu.fi]
|
||||||
*/
|
*/
|
||||||
@ -64,12 +53,12 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
|
|||||||
{
|
{
|
||||||
socket_seq_show(seq);
|
socket_seq_show(seq);
|
||||||
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
|
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
|
||||||
fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
|
sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
|
||||||
tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
|
tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
|
||||||
atomic_read(&tcp_memory_allocated));
|
atomic_read(&tcp_memory_allocated));
|
||||||
seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
|
seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot));
|
||||||
seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
|
seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot));
|
||||||
seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
|
seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot));
|
||||||
seq_printf(seq, "FRAG: inuse %d memory %d\n",
|
seq_printf(seq, "FRAG: inuse %d memory %d\n",
|
||||||
ip_frag_nqueues(), ip_frag_mem());
|
ip_frag_nqueues(), ip_frag_mem());
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -32,27 +32,16 @@
|
|||||||
|
|
||||||
static struct proc_dir_entry *proc_net_devsnmp6;
|
static struct proc_dir_entry *proc_net_devsnmp6;
|
||||||
|
|
||||||
static int fold_prot_inuse(struct proto *proto)
|
|
||||||
{
|
|
||||||
int res = 0;
|
|
||||||
int cpu;
|
|
||||||
|
|
||||||
for_each_possible_cpu(cpu)
|
|
||||||
res += proto->stats[cpu].inuse;
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int sockstat6_seq_show(struct seq_file *seq, void *v)
|
static int sockstat6_seq_show(struct seq_file *seq, void *v)
|
||||||
{
|
{
|
||||||
seq_printf(seq, "TCP6: inuse %d\n",
|
seq_printf(seq, "TCP6: inuse %d\n",
|
||||||
fold_prot_inuse(&tcpv6_prot));
|
sock_prot_inuse(&tcpv6_prot));
|
||||||
seq_printf(seq, "UDP6: inuse %d\n",
|
seq_printf(seq, "UDP6: inuse %d\n",
|
||||||
fold_prot_inuse(&udpv6_prot));
|
sock_prot_inuse(&udpv6_prot));
|
||||||
seq_printf(seq, "UDPLITE6: inuse %d\n",
|
seq_printf(seq, "UDPLITE6: inuse %d\n",
|
||||||
fold_prot_inuse(&udplitev6_prot));
|
sock_prot_inuse(&udplitev6_prot));
|
||||||
seq_printf(seq, "RAW6: inuse %d\n",
|
seq_printf(seq, "RAW6: inuse %d\n",
|
||||||
fold_prot_inuse(&rawv6_prot));
|
sock_prot_inuse(&rawv6_prot));
|
||||||
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
|
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
|
||||||
ip6_frag_nqueues(), ip6_frag_mem());
|
ip6_frag_nqueues(), ip6_frag_mem());
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user