mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-09 23:00:21 +00:00
netlink: Lockless lookup with RCU grace period in socket release
Defers the release of the socket reference using call_rcu() to allow using an RCU read-side protected call to rhashtable_lookup() This restores behaviour and performance gains as previously introduced by e341694 ("netlink: Convert netlink_lookup() to use RCU protected hash table") without the side effect of severely delayed socket destruction. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
f89bd6f87a
commit
21e4902aea
@ -97,12 +97,12 @@ static int netlink_dump(struct sock *sk);
|
|||||||
static void netlink_skb_destructor(struct sk_buff *skb);
|
static void netlink_skb_destructor(struct sk_buff *skb);
|
||||||
|
|
||||||
/* nl_table locking explained:
|
/* nl_table locking explained:
|
||||||
* Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock
|
* Lookup and traversal are protected with an RCU read-side lock. Insertion
|
||||||
* combined with an RCU read-side lock. Insertion and removal are protected
|
* and removal are protected with nl_sk_hash_lock while using RCU list
|
||||||
* with nl_sk_hash_lock while using RCU list modification primitives and may
|
* modification primitives and may run in parallel to RCU protected lookups.
|
||||||
* run in parallel to nl_table_lock protected lookups. Destruction of the
|
* Destruction of the Netlink socket may only occur *after* nl_table_lock has
|
||||||
* Netlink socket may only occur *after* nl_table_lock has been acquired
|
* been acquired * either during or after the socket has been removed from
|
||||||
* either during or after the socket has been removed from the list.
|
* the list and after an RCU grace period.
|
||||||
*/
|
*/
|
||||||
DEFINE_RWLOCK(nl_table_lock);
|
DEFINE_RWLOCK(nl_table_lock);
|
||||||
EXPORT_SYMBOL_GPL(nl_table_lock);
|
EXPORT_SYMBOL_GPL(nl_table_lock);
|
||||||
@ -1003,13 +1003,11 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
|
|||||||
struct netlink_table *table = &nl_table[protocol];
|
struct netlink_table *table = &nl_table[protocol];
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
|
||||||
read_lock(&nl_table_lock);
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
sk = __netlink_lookup(table, portid, net);
|
sk = __netlink_lookup(table, portid, net);
|
||||||
if (sk)
|
if (sk)
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
read_unlock(&nl_table_lock);
|
|
||||||
|
|
||||||
return sk;
|
return sk;
|
||||||
}
|
}
|
||||||
@ -1183,6 +1181,13 @@ out_module:
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void deferred_put_nlk_sk(struct rcu_head *head)
|
||||||
|
{
|
||||||
|
struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
|
||||||
|
|
||||||
|
sock_put(&nlk->sk);
|
||||||
|
}
|
||||||
|
|
||||||
static int netlink_release(struct socket *sock)
|
static int netlink_release(struct socket *sock)
|
||||||
{
|
{
|
||||||
struct sock *sk = sock->sk;
|
struct sock *sk = sock->sk;
|
||||||
@ -1248,7 +1253,7 @@ static int netlink_release(struct socket *sock)
|
|||||||
local_bh_disable();
|
local_bh_disable();
|
||||||
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
|
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
sock_put(sk);
|
call_rcu(&nlk->rcu, deferred_put_nlk_sk);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1263,7 +1268,6 @@ static int netlink_autobind(struct socket *sock)
|
|||||||
|
|
||||||
retry:
|
retry:
|
||||||
cond_resched();
|
cond_resched();
|
||||||
netlink_table_grab();
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
if (__netlink_lookup(table, portid, net)) {
|
if (__netlink_lookup(table, portid, net)) {
|
||||||
/* Bind collision, search negative portid values. */
|
/* Bind collision, search negative portid values. */
|
||||||
@ -1271,11 +1275,9 @@ retry:
|
|||||||
if (rover > -4097)
|
if (rover > -4097)
|
||||||
rover = -4097;
|
rover = -4097;
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
netlink_table_ungrab();
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
netlink_table_ungrab();
|
|
||||||
|
|
||||||
err = netlink_insert(sk, net, portid);
|
err = netlink_insert(sk, net, portid);
|
||||||
if (err == -EADDRINUSE)
|
if (err == -EADDRINUSE)
|
||||||
@ -2910,9 +2912,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
|
static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
|
||||||
__acquires(nl_table_lock) __acquires(RCU)
|
__acquires(RCU)
|
||||||
{
|
{
|
||||||
read_lock(&nl_table_lock);
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
|
return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
|
||||||
}
|
}
|
||||||
@ -2964,10 +2965,9 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void netlink_seq_stop(struct seq_file *seq, void *v)
|
static void netlink_seq_stop(struct seq_file *seq, void *v)
|
||||||
__releases(RCU) __releases(nl_table_lock)
|
__releases(RCU)
|
||||||
{
|
{
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
read_unlock(&nl_table_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,6 +50,7 @@ struct netlink_sock {
|
|||||||
#endif /* CONFIG_NETLINK_MMAP */
|
#endif /* CONFIG_NETLINK_MMAP */
|
||||||
|
|
||||||
struct rhash_head node;
|
struct rhash_head node;
|
||||||
|
struct rcu_head rcu;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct netlink_sock *nlk_sk(struct sock *sk)
|
static inline struct netlink_sock *nlk_sk(struct sock *sk)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user