mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-13 01:08:50 +00:00
2b035b3997
It is fairly common to kill several network namespaces at once. Either because they are nested one inside the other or because they are cooperating in multiple machine networking experiments. As the network stack control logic does not parallelize easily batch up multiple network namespaces existing together. To get the full benefit of batching the virtual network devices to be removed must be all removed in one batch. For that purpose I have added a loop after the last network device operations have run that batches up all remaining network devices and deletes them. An extra benefit is that the reorganization slightly shrinks the size of the per network namespace data structures replaceing a work_struct with a list_head. In a trivial test with 4K namespaces this change reduced the cost of a destroying 4K namespaces from 7+ minutes (at 12% cpu) to 44 seconds (at 60% cpu). The bulk of that 44s was spent in inet_twsk_purge. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>
280 lines
6.7 KiB
C
280 lines
6.7 KiB
C
/*
|
|
* Operations on the network namespace
|
|
*/
|
|
#ifndef __NET_NET_NAMESPACE_H
|
|
#define __NET_NET_NAMESPACE_H
|
|
|
|
#include <asm/atomic.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/list.h>
|
|
|
|
#include <net/netns/core.h>
|
|
#include <net/netns/mib.h>
|
|
#include <net/netns/unix.h>
|
|
#include <net/netns/packet.h>
|
|
#include <net/netns/ipv4.h>
|
|
#include <net/netns/ipv6.h>
|
|
#include <net/netns/dccp.h>
|
|
#include <net/netns/x_tables.h>
|
|
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
|
#include <net/netns/conntrack.h>
|
|
#endif
|
|
#include <net/netns/xfrm.h>
|
|
|
|
struct proc_dir_entry;
|
|
struct net_device;
|
|
struct sock;
|
|
struct ctl_table_header;
|
|
struct net_generic;
|
|
struct sock;
|
|
|
|
|
|
#define NETDEV_HASHBITS 8
|
|
#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
|
|
|
|
struct net {
|
|
atomic_t count; /* To decided when the network
|
|
* namespace should be freed.
|
|
*/
|
|
#ifdef NETNS_REFCNT_DEBUG
|
|
atomic_t use_count; /* To track references we
|
|
* destroy on demand
|
|
*/
|
|
#endif
|
|
struct list_head list; /* list of network namespaces */
|
|
struct list_head cleanup_list; /* namespaces on death row */
|
|
|
|
struct proc_dir_entry *proc_net;
|
|
struct proc_dir_entry *proc_net_stat;
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
struct ctl_table_set sysctls;
|
|
#endif
|
|
|
|
struct net_device *loopback_dev; /* The loopback */
|
|
|
|
struct list_head dev_base_head;
|
|
struct hlist_head *dev_name_head;
|
|
struct hlist_head *dev_index_head;
|
|
|
|
/* core fib_rules */
|
|
struct list_head rules_ops;
|
|
spinlock_t rules_mod_lock;
|
|
|
|
struct sock *rtnl; /* rtnetlink socket */
|
|
struct sock *genl_sock;
|
|
|
|
struct netns_core core;
|
|
struct netns_mib mib;
|
|
struct netns_packet packet;
|
|
struct netns_unix unx;
|
|
struct netns_ipv4 ipv4;
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
struct netns_ipv6 ipv6;
|
|
#endif
|
|
#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
|
|
struct netns_dccp dccp;
|
|
#endif
|
|
#ifdef CONFIG_NETFILTER
|
|
struct netns_xt xt;
|
|
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
|
struct netns_ct ct;
|
|
#endif
|
|
#endif
|
|
#ifdef CONFIG_XFRM
|
|
struct netns_xfrm xfrm;
|
|
#endif
|
|
#ifdef CONFIG_WEXT_CORE
|
|
struct sk_buff_head wext_nlevents;
|
|
#endif
|
|
struct net_generic *gen;
|
|
};
|
|
|
|
|
|
#include <linux/seq_file_net.h>
|
|
|
|
/* Init's network namespace */
|
|
extern struct net init_net;
|
|
|
|
#ifdef CONFIG_NET
|
|
#define INIT_NET_NS(net_ns) .net_ns = &init_net,
|
|
|
|
extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns);
|
|
|
|
#else /* CONFIG_NET */
|
|
|
|
#define INIT_NET_NS(net_ns)
|
|
|
|
static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
|
|
{
|
|
/* There is nothing to copy so this is a noop */
|
|
return net_ns;
|
|
}
|
|
#endif /* CONFIG_NET */
|
|
|
|
|
|
extern struct list_head net_namespace_list;
|
|
|
|
extern struct net *get_net_ns_by_pid(pid_t pid);
|
|
|
|
#ifdef CONFIG_NET_NS
|
|
extern void __put_net(struct net *net);
|
|
|
|
static inline struct net *get_net(struct net *net)
|
|
{
|
|
atomic_inc(&net->count);
|
|
return net;
|
|
}
|
|
|
|
static inline struct net *maybe_get_net(struct net *net)
|
|
{
|
|
/* Used when we know struct net exists but we
|
|
* aren't guaranteed a previous reference count
|
|
* exists. If the reference count is zero this
|
|
* function fails and returns NULL.
|
|
*/
|
|
if (!atomic_inc_not_zero(&net->count))
|
|
net = NULL;
|
|
return net;
|
|
}
|
|
|
|
static inline void put_net(struct net *net)
|
|
{
|
|
if (atomic_dec_and_test(&net->count))
|
|
__put_net(net);
|
|
}
|
|
|
|
static inline
|
|
int net_eq(const struct net *net1, const struct net *net2)
|
|
{
|
|
return net1 == net2;
|
|
}
|
|
#else
|
|
|
|
static inline struct net *get_net(struct net *net)
|
|
{
|
|
return net;
|
|
}
|
|
|
|
static inline void put_net(struct net *net)
|
|
{
|
|
}
|
|
|
|
static inline struct net *maybe_get_net(struct net *net)
|
|
{
|
|
return net;
|
|
}
|
|
|
|
static inline
|
|
int net_eq(const struct net *net1, const struct net *net2)
|
|
{
|
|
return 1;
|
|
}
|
|
#endif
|
|
|
|
|
|
#ifdef NETNS_REFCNT_DEBUG
|
|
static inline struct net *hold_net(struct net *net)
|
|
{
|
|
if (net)
|
|
atomic_inc(&net->use_count);
|
|
return net;
|
|
}
|
|
|
|
static inline void release_net(struct net *net)
|
|
{
|
|
if (net)
|
|
atomic_dec(&net->use_count);
|
|
}
|
|
#else
|
|
static inline struct net *hold_net(struct net *net)
|
|
{
|
|
return net;
|
|
}
|
|
|
|
static inline void release_net(struct net *net)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_NET_NS
|
|
|
|
static inline void write_pnet(struct net **pnet, struct net *net)
|
|
{
|
|
*pnet = net;
|
|
}
|
|
|
|
static inline struct net *read_pnet(struct net * const *pnet)
|
|
{
|
|
return *pnet;
|
|
}
|
|
|
|
#else
|
|
|
|
#define write_pnet(pnet, net) do { (void)(net);} while (0)
|
|
#define read_pnet(pnet) (&init_net)
|
|
|
|
#endif
|
|
|
|
#define for_each_net(VAR) \
|
|
list_for_each_entry(VAR, &net_namespace_list, list)
|
|
|
|
#define for_each_net_rcu(VAR) \
|
|
list_for_each_entry_rcu(VAR, &net_namespace_list, list)
|
|
|
|
#ifdef CONFIG_NET_NS
|
|
#define __net_init
|
|
#define __net_exit
|
|
#define __net_initdata
|
|
#else
|
|
#define __net_init __init
|
|
#define __net_exit __exit_refok
|
|
#define __net_initdata __initdata
|
|
#endif
|
|
|
|
struct pernet_operations {
|
|
struct list_head list;
|
|
int (*init)(struct net *net);
|
|
void (*exit)(struct net *net);
|
|
};
|
|
|
|
/*
|
|
* Use these carefully. If you implement a network device and it
|
|
* needs per network namespace operations use device pernet operations,
|
|
* otherwise use pernet subsys operations.
|
|
*
|
|
* Network interfaces need to be removed from a dying netns _before_
|
|
* subsys notifiers can be called, as most of the network code cleanup
|
|
* (which is done from subsys notifiers) runs with the assumption that
|
|
* dev_remove_pack has been called so no new packets will arrive during
|
|
* and after the cleanup functions have been called. dev_remove_pack
|
|
* is not per namespace so instead the guarantee of no more packets
|
|
* arriving in a network namespace is provided by ensuring that all
|
|
* network devices and all sockets have left the network namespace
|
|
* before the cleanup methods are called.
|
|
*
|
|
* For the longest time the ipv4 icmp code was registered as a pernet
|
|
* device which caused kernel oops, and panics during network
|
|
* namespace cleanup. So please don't get this wrong.
|
|
*/
|
|
extern int register_pernet_subsys(struct pernet_operations *);
|
|
extern void unregister_pernet_subsys(struct pernet_operations *);
|
|
extern int register_pernet_gen_subsys(int *id, struct pernet_operations *);
|
|
extern void unregister_pernet_gen_subsys(int id, struct pernet_operations *);
|
|
extern int register_pernet_device(struct pernet_operations *);
|
|
extern void unregister_pernet_device(struct pernet_operations *);
|
|
extern int register_pernet_gen_device(int *id, struct pernet_operations *);
|
|
extern void unregister_pernet_gen_device(int id, struct pernet_operations *);
|
|
|
|
struct ctl_path;
|
|
struct ctl_table;
|
|
struct ctl_table_header;
|
|
|
|
extern struct ctl_table_header *register_net_sysctl_table(struct net *net,
|
|
const struct ctl_path *path, struct ctl_table *table);
|
|
extern struct ctl_table_header *register_net_sysctl_rotable(
|
|
const struct ctl_path *path, struct ctl_table *table);
|
|
extern void unregister_net_sysctl_table(struct ctl_table_header *header);
|
|
|
|
#endif /* __NET_NET_NAMESPACE_H */
|