mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 11:57:46 +00:00
bpf/lpm_trie: Inline longest_prefix_match for fastpath
The BPF map type LPM (Longest Prefix Match) is used heavily in production by multiple products that have BPF components. Perf data shows trie_lookup_elem() and longest_prefix_match() being part of kernels perf top. For every level in the LPM tree trie_lookup_elem() calls out to longest_prefix_match(). The compiler is free to inline this call, but chooses not to inline, because other slowpath callers (that can be invoked via syscall) exists like trie_update_elem(), trie_delete_elem() or trie_get_next_key(). bcc/tools/funccount -Ti 1 'trie_lookup_elem|longest_prefix_match.isra.0' FUNC COUNT trie_lookup_elem 664945 longest_prefix_match.isra.0 8101507 Observation on a single random machine shows a factor 12 between the two functions. Given an average of 12 levels in the trie being searched. This patch force inlining longest_prefix_match(), but only for the lookup fastpath to balance object instruction size. In production with AMD CPUs, measuring the function latency of 'trie_lookup_elem' (bcc/tools/funclatency) we are seeing an improvement function latency reduction 7-8% with this patch applied (to production kernels 6.6 and 6.1). Analyzing perf data, we can explain this rather large improvement due to reducing the overhead for AMD side-channel mitigation SRSO (Speculative Return Stack Overflow). Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Jesper Dangaard Brouer <hawk@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Yonghong Song <yonghong.song@linux.dev> Link: https://lore.kernel.org/bpf/171076828575.2141737.18370644069389889027.stgit@firesoul
This commit is contained in:
parent
c733239f8f
commit
1a4a0cb798
@ -155,16 +155,17 @@ static inline int extract_bit(const u8 *data, size_t index)
|
||||
}
|
||||
|
||||
/**
|
||||
* longest_prefix_match() - determine the longest prefix
|
||||
* __longest_prefix_match() - determine the longest prefix
|
||||
* @trie: The trie to get internal sizes from
|
||||
* @node: The node to operate on
|
||||
* @key: The key to compare to @node
|
||||
*
|
||||
* Determine the longest prefix of @node that matches the bits in @key.
|
||||
*/
|
||||
static size_t longest_prefix_match(const struct lpm_trie *trie,
|
||||
const struct lpm_trie_node *node,
|
||||
const struct bpf_lpm_trie_key_u8 *key)
|
||||
static __always_inline
|
||||
size_t __longest_prefix_match(const struct lpm_trie *trie,
|
||||
const struct lpm_trie_node *node,
|
||||
const struct bpf_lpm_trie_key_u8 *key)
|
||||
{
|
||||
u32 limit = min(node->prefixlen, key->prefixlen);
|
||||
u32 prefixlen = 0, i = 0;
|
||||
@ -224,6 +225,13 @@ static size_t longest_prefix_match(const struct lpm_trie *trie,
|
||||
return prefixlen;
|
||||
}
|
||||
|
||||
static size_t longest_prefix_match(const struct lpm_trie *trie,
|
||||
const struct lpm_trie_node *node,
|
||||
const struct bpf_lpm_trie_key_u8 *key)
|
||||
{
|
||||
return __longest_prefix_match(trie, node, key);
|
||||
}
|
||||
|
||||
/* Called from syscall or from eBPF program */
|
||||
static void *trie_lookup_elem(struct bpf_map *map, void *_key)
|
||||
{
|
||||
@ -245,7 +253,7 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
|
||||
* If it's the maximum possible prefix for this trie, we have
|
||||
* an exact match and can return it directly.
|
||||
*/
|
||||
matchlen = longest_prefix_match(trie, node, key);
|
||||
matchlen = __longest_prefix_match(trie, node, key);
|
||||
if (matchlen == trie->max_prefixlen) {
|
||||
found = node;
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user