neighbour: Create netdev->neighbour association

Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241107160444.2913124-7-gnaaman@drivenets.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Gilad Naaman 2024-11-07 16:04:43 +00:00 committed by Jakub Kicinski
parent a01a67ab2f
commit f7f5273863
5 changed files with 80 additions and 45 deletions

View File

@ -188,4 +188,5 @@ u64 max_pacing_offload_horizon
struct_napi_config* napi_config struct_napi_config* napi_config
unsigned_long gro_flush_timeout unsigned_long gro_flush_timeout
u32 napi_defer_hard_irqs u32 napi_defer_hard_irqs
struct hlist_head neighbours[2]
=================================== =========================== =================== =================== =================================================================================== =================================== =========================== =================== =================== ===================================================================================

View File

@ -52,6 +52,7 @@
#include <net/net_trackers.h> #include <net/net_trackers.h>
#include <net/net_debug.h> #include <net/net_debug.h>
#include <net/dropreason-core.h> #include <net/dropreason-core.h>
#include <net/neighbour_tables.h>
struct netpoll_info; struct netpoll_info;
struct device; struct device;
@ -2032,6 +2033,9 @@ enum netdev_reg_state {
* @napi_defer_hard_irqs: If not zero, provides a counter that would * @napi_defer_hard_irqs: If not zero, provides a counter that would
* allow to avoid NIC hard IRQ, on busy queues. * allow to avoid NIC hard IRQ, on busy queues.
* *
* @neighbours: List heads pointing to this device's neighbours'
* dev_list, one per address-family.
*
* FIXME: cleanup struct net_device such that network protocol info * FIXME: cleanup struct net_device such that network protocol info
* moves out. * moves out.
*/ */
@ -2440,6 +2444,9 @@ struct net_device {
*/ */
struct net_shaper_hierarchy *net_shaper_hierarchy; struct net_shaper_hierarchy *net_shaper_hierarchy;
#endif #endif
struct hlist_head neighbours[NEIGH_NR_TABLES];
u8 priv[] ____cacheline_aligned u8 priv[] ____cacheline_aligned
__counted_by(priv_len); __counted_by(priv_len);
} ____cacheline_aligned; } ____cacheline_aligned;

View File

@ -29,6 +29,7 @@
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <net/rtnetlink.h> #include <net/rtnetlink.h>
#include <net/neighbour_tables.h>
/* /*
* NUD stands for "neighbor unreachability detection" * NUD stands for "neighbor unreachability detection"
@ -136,6 +137,7 @@ struct neigh_statistics {
struct neighbour { struct neighbour {
struct hlist_node hash; struct hlist_node hash;
struct hlist_node dev_list;
struct neigh_table *tbl; struct neigh_table *tbl;
struct neigh_parms *parms; struct neigh_parms *parms;
unsigned long confirmed; unsigned long confirmed;
@ -236,13 +238,6 @@ struct neigh_table {
struct pneigh_entry **phash_buckets; struct pneigh_entry **phash_buckets;
}; };
enum {
NEIGH_ARP_TABLE = 0,
NEIGH_ND_TABLE = 1,
NEIGH_NR_TABLES,
NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
};
static inline int neigh_parms_family(struct neigh_parms *p) static inline int neigh_parms_family(struct neigh_parms *p)
{ {
return p->tbl->family; return p->tbl->family;

View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_NEIGHBOUR_TABLES_H
#define _NET_NEIGHBOUR_TABLES_H
enum {
NEIGH_ARP_TABLE = 0,
NEIGH_ND_TABLE = 1,
NEIGH_NR_TABLES,
NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
};
#endif

View File

@ -60,6 +60,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
static const struct seq_operations neigh_stat_seq_ops; static const struct seq_operations neigh_stat_seq_ops;
#endif #endif
static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
{
int i;
switch (family) {
default:
DEBUG_NET_WARN_ON_ONCE(1);
fallthrough; /* to avoid panic by null-ptr-deref */
case AF_INET:
i = NEIGH_ARP_TABLE;
break;
case AF_INET6:
i = NEIGH_ND_TABLE;
break;
}
return &dev->neighbours[i];
}
/* /*
Neighbour hash table buckets are protected with rwlock tbl->lock. Neighbour hash table buckets are protected with rwlock tbl->lock.
@ -211,6 +230,7 @@ bool neigh_remove_one(struct neighbour *n)
write_lock(&n->lock); write_lock(&n->lock);
if (refcount_read(&n->refcnt) == 1) { if (refcount_read(&n->refcnt) == 1) {
hlist_del_rcu(&n->hash); hlist_del_rcu(&n->hash);
hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n); neigh_mark_dead(n);
retval = true; retval = true;
} }
@ -351,48 +371,42 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm) bool skip_perm)
{ {
int i; struct hlist_head *dev_head;
struct neigh_hash_table *nht; struct hlist_node *tmp;
struct neighbour *n;
nht = rcu_dereference_protected(tbl->nht, dev_head = neigh_get_dev_table(dev, tbl->family);
lockdep_is_held(&tbl->lock));
for (i = 0; i < (1 << nht->hash_shift); i++) { hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
struct hlist_node *tmp; if (skip_perm && n->nud_state & NUD_PERMANENT)
struct neighbour *n; continue;
neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) { hlist_del_rcu(&n->hash);
if (dev && n->dev != dev) hlist_del_rcu(&n->dev_list);
continue; write_lock(&n->lock);
if (skip_perm && n->nud_state & NUD_PERMANENT) neigh_del_timer(n);
continue; neigh_mark_dead(n);
if (refcount_read(&n->refcnt) != 1) {
hlist_del_rcu(&n->hash); /* The most unpleasant situation.
write_lock(&n->lock); * We must destroy neighbour entry,
neigh_del_timer(n); * but someone still uses it.
neigh_mark_dead(n); *
if (refcount_read(&n->refcnt) != 1) { * The destroy will be delayed until
/* The most unpleasant situation. * the last user releases us, but
We must destroy neighbour entry, * we must kill timers etc. and move
but someone still uses it. * it to safe state.
*/
The destroy will be delayed until __skb_queue_purge(&n->arp_queue);
the last user releases us, but n->arp_queue_len_bytes = 0;
we must kill timers etc. and move WRITE_ONCE(n->output, neigh_blackhole);
it to safe state. if (n->nud_state & NUD_VALID)
*/ n->nud_state = NUD_NOARP;
__skb_queue_purge(&n->arp_queue); else
n->arp_queue_len_bytes = 0; n->nud_state = NUD_NONE;
WRITE_ONCE(n->output, neigh_blackhole); neigh_dbg(2, "neigh %p is stray\n", n);
if (n->nud_state & NUD_VALID)
n->nud_state = NUD_NOARP;
else
n->nud_state = NUD_NONE;
neigh_dbg(2, "neigh %p is stray\n", n);
}
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
} }
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
} }
} }
@ -655,6 +669,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
if (want_ref) if (want_ref)
neigh_hold(n); neigh_hold(n);
hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]); hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
hlist_add_head_rcu(&n->dev_list,
neigh_get_dev_table(dev, tbl->family));
write_unlock_bh(&tbl->lock); write_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n); neigh_dbg(2, "neigh %p is created\n", n);
rc = n; rc = n;
@ -935,6 +953,7 @@ static void neigh_periodic_work(struct work_struct *work)
!time_in_range_open(jiffies, n->used, !time_in_range_open(jiffies, n->used,
n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
hlist_del_rcu(&n->hash); hlist_del_rcu(&n->hash);
hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n); neigh_mark_dead(n);
write_unlock(&n->lock); write_unlock(&n->lock);
neigh_cleanup_and_release(n); neigh_cleanup_and_release(n);
@ -3054,6 +3073,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
release = cb(n); release = cb(n);
if (release) { if (release) {
hlist_del_rcu(&n->hash); hlist_del_rcu(&n->hash);
hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n); neigh_mark_dead(n);
} }
write_unlock(&n->lock); write_unlock(&n->lock);