bpf: Use raw_spinlock_t for LPM trie

After switching from kmalloc() to the bpf memory allocator, there will be
no blocking operation during the update of LPM trie. Therefore, change
trie->lock from spinlock_t to raw_spinlock_t to make LPM trie usable in
atomic context, even on RT kernels.

The max value of prefixlen is 2048. Therefore, update or deletion
operations will find the target after at most 2048 comparisons.
Constructing a test case which updates an element after 2048 comparisons
under a 8 CPU VM, and the average time and the maximal time for such
update operation is about 210us and 900us.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20241206110622.1161752-8-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Hou Tao 2024-12-06 19:06:20 +08:00 committed by Alexei Starovoitov
parent 3d8dc43eb2
commit 6a5c63d43c

View File

@ -36,7 +36,7 @@ struct lpm_trie {
size_t n_entries; size_t n_entries;
size_t max_prefixlen; size_t max_prefixlen;
size_t data_size; size_t data_size;
spinlock_t lock; raw_spinlock_t lock;
}; };
/* This trie implements a longest prefix match algorithm that can be used to /* This trie implements a longest prefix match algorithm that can be used to
@ -349,7 +349,7 @@ static long trie_update_elem(struct bpf_map *map,
if (!new_node) if (!new_node)
return -ENOMEM; return -ENOMEM;
spin_lock_irqsave(&trie->lock, irq_flags); raw_spin_lock_irqsave(&trie->lock, irq_flags);
new_node->prefixlen = key->prefixlen; new_node->prefixlen = key->prefixlen;
RCU_INIT_POINTER(new_node->child[0], NULL); RCU_INIT_POINTER(new_node->child[0], NULL);
@ -450,7 +450,7 @@ static long trie_update_elem(struct bpf_map *map,
rcu_assign_pointer(*slot, im_node); rcu_assign_pointer(*slot, im_node);
out: out:
spin_unlock_irqrestore(&trie->lock, irq_flags); raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
migrate_disable(); migrate_disable();
if (ret) if (ret)
@ -477,7 +477,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
if (key->prefixlen > trie->max_prefixlen) if (key->prefixlen > trie->max_prefixlen)
return -EINVAL; return -EINVAL;
spin_lock_irqsave(&trie->lock, irq_flags); raw_spin_lock_irqsave(&trie->lock, irq_flags);
/* Walk the tree looking for an exact key/length match and keeping /* Walk the tree looking for an exact key/length match and keeping
* track of the path we traverse. We will need to know the node * track of the path we traverse. We will need to know the node
@ -553,7 +553,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
free_node = node; free_node = node;
out: out:
spin_unlock_irqrestore(&trie->lock, irq_flags); raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
migrate_disable(); migrate_disable();
bpf_mem_cache_free_rcu(&trie->ma, free_parent); bpf_mem_cache_free_rcu(&trie->ma, free_parent);
@ -604,7 +604,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
offsetof(struct bpf_lpm_trie_key_u8, data); offsetof(struct bpf_lpm_trie_key_u8, data);
trie->max_prefixlen = trie->data_size * 8; trie->max_prefixlen = trie->data_size * 8;
spin_lock_init(&trie->lock); raw_spin_lock_init(&trie->lock);
/* Allocate intermediate and leaf nodes from the same allocator */ /* Allocate intermediate and leaf nodes from the same allocator */
leaf_size = sizeof(struct lpm_trie_node) + trie->data_size + leaf_size = sizeof(struct lpm_trie_node) + trie->data_size +