mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-29 01:05:29 +00:00
rtnetlink: Add per-netns RTNL.
The goal is to break RTNL down into per-netns mutex. This patch adds per-netns mutex and its helper functions, rtnl_net_lock() and rtnl_net_unlock(). rtnl_net_lock() acquires the global RTNL and per-netns RTNL mutex, and rtnl_net_unlock() releases them. We will replace 800+ rtnl_lock() with rtnl_net_lock() and finally removes rtnl_lock() in rtnl_net_lock(). When we need to nest per-netns RTNL mutex, we will use __rtnl_net_lock(), and its locking order is defined by rtnl_net_lock_cmp_fn() as follows: 1. init_net is first 2. netns address ascending order Note that the conversion will be done under CONFIG_DEBUG_NET_SMALL_RTNL with LOCKDEP so that we can carefully add the extra mutex without slowing down RTNL operations during conversion. Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
parent
ec763c234d
commit
76aed95319
@ -92,6 +92,27 @@ static inline bool lockdep_rtnl_is_held(void)
|
|||||||
#define rcu_replace_pointer_rtnl(rp, p) \
|
#define rcu_replace_pointer_rtnl(rp, p) \
|
||||||
rcu_replace_pointer(rp, p, lockdep_rtnl_is_held())
|
rcu_replace_pointer(rp, p, lockdep_rtnl_is_held())
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
|
||||||
|
void __rtnl_net_lock(struct net *net);
|
||||||
|
void __rtnl_net_unlock(struct net *net);
|
||||||
|
void rtnl_net_lock(struct net *net);
|
||||||
|
void rtnl_net_unlock(struct net *net);
|
||||||
|
int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
|
||||||
|
#else
|
||||||
|
static inline void __rtnl_net_lock(struct net *net) {}
|
||||||
|
static inline void __rtnl_net_unlock(struct net *net) {}
|
||||||
|
|
||||||
|
static inline void rtnl_net_lock(struct net *net)
|
||||||
|
{
|
||||||
|
rtnl_lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rtnl_net_unlock(struct net *net)
|
||||||
|
{
|
||||||
|
rtnl_unlock();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
|
static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
|
||||||
{
|
{
|
||||||
return rtnl_dereference(dev->ingress_queue);
|
return rtnl_dereference(dev->ingress_queue);
|
||||||
|
@ -188,6 +188,10 @@ struct net {
|
|||||||
#if IS_ENABLED(CONFIG_SMC)
|
#if IS_ENABLED(CONFIG_SMC)
|
||||||
struct netns_smc smc;
|
struct netns_smc smc;
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
|
||||||
|
/* Move to a better place when the config guard is removed. */
|
||||||
|
struct mutex rtnl_mutex;
|
||||||
|
#endif
|
||||||
} __randomize_layout;
|
} __randomize_layout;
|
||||||
|
|
||||||
#include <linux/seq_file_net.h>
|
#include <linux/seq_file_net.h>
|
||||||
|
@ -24,3 +24,18 @@ config DEBUG_NET
|
|||||||
help
|
help
|
||||||
Enable extra sanity checks in networking.
|
Enable extra sanity checks in networking.
|
||||||
This is mostly used by fuzzers, but is safe to select.
|
This is mostly used by fuzzers, but is safe to select.
|
||||||
|
|
||||||
|
config DEBUG_NET_SMALL_RTNL
|
||||||
|
bool "Add extra per-netns mutex inside RTNL"
|
||||||
|
depends on DEBUG_KERNEL && NET && LOCK_DEBUGGING_SUPPORT
|
||||||
|
select PROVE_LOCKING
|
||||||
|
default n
|
||||||
|
help
|
||||||
|
rtnl_lock() is being replaced with rtnl_net_lock() that
|
||||||
|
acquires the global RTNL and a small per-netns RTNL mutex.
|
||||||
|
|
||||||
|
During the conversion, rtnl_net_lock() just adds an extra
|
||||||
|
mutex in every RTNL scope and slows down the operations.
|
||||||
|
|
||||||
|
Once the conversion completes, rtnl_lock() will be removed
|
||||||
|
and rtnetlink will gain per-netns scalability.
|
||||||
|
@ -334,6 +334,12 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
|
|||||||
idr_init(&net->netns_ids);
|
idr_init(&net->netns_ids);
|
||||||
spin_lock_init(&net->nsid_lock);
|
spin_lock_init(&net->nsid_lock);
|
||||||
mutex_init(&net->ipv4.ra_mutex);
|
mutex_init(&net->ipv4.ra_mutex);
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
|
||||||
|
mutex_init(&net->rtnl_mutex);
|
||||||
|
lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
preinit_net_sysctl(net);
|
preinit_net_sysctl(net);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -179,6 +179,64 @@ bool lockdep_rtnl_is_held(void)
|
|||||||
EXPORT_SYMBOL(lockdep_rtnl_is_held);
|
EXPORT_SYMBOL(lockdep_rtnl_is_held);
|
||||||
#endif /* #ifdef CONFIG_PROVE_LOCKING */
|
#endif /* #ifdef CONFIG_PROVE_LOCKING */
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
|
||||||
|
void __rtnl_net_lock(struct net *net)
|
||||||
|
{
|
||||||
|
ASSERT_RTNL();
|
||||||
|
|
||||||
|
mutex_lock(&net->rtnl_mutex);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(__rtnl_net_lock);
|
||||||
|
|
||||||
|
void __rtnl_net_unlock(struct net *net)
|
||||||
|
{
|
||||||
|
ASSERT_RTNL();
|
||||||
|
|
||||||
|
mutex_unlock(&net->rtnl_mutex);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(__rtnl_net_unlock);
|
||||||
|
|
||||||
|
void rtnl_net_lock(struct net *net)
|
||||||
|
{
|
||||||
|
rtnl_lock();
|
||||||
|
__rtnl_net_lock(net);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(rtnl_net_lock);
|
||||||
|
|
||||||
|
void rtnl_net_unlock(struct net *net)
|
||||||
|
{
|
||||||
|
__rtnl_net_unlock(net);
|
||||||
|
rtnl_unlock();
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(rtnl_net_unlock);
|
||||||
|
|
||||||
|
static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
|
||||||
|
{
|
||||||
|
if (net_eq(net_a, net_b))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* always init_net first */
|
||||||
|
if (net_eq(net_a, &init_net))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (net_eq(net_b, &init_net))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/* otherwise lock in ascending order */
|
||||||
|
return net_a < net_b ? -1 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b)
|
||||||
|
{
|
||||||
|
const struct net *net_a, *net_b;
|
||||||
|
|
||||||
|
net_a = container_of(a, struct net, rtnl_mutex.dep_map);
|
||||||
|
net_b = container_of(b, struct net, rtnl_mutex.dep_map);
|
||||||
|
|
||||||
|
return rtnl_net_cmp_locks(net_a, net_b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
|
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
|
||||||
|
|
||||||
static inline int rtm_msgindex(int msgtype)
|
static inline int rtm_msgindex(int msgtype)
|
||||||
|
Loading…
Reference in New Issue
Block a user