Merge branch 'fixes-for-lpm-trie'

Hou Tao says:

====================
This patch set fixes several issues for LPM trie. These issues were
found during adding new test cases or were reported by syzbot.

The patch set is structured as follows:

Patch #1~#2 are clean-ups for lpm_trie_update_elem().
Patch #3 handles BPF_EXIST and BPF_NOEXIST correctly for LPM trie.
Patch #4 fixes the accounting of n_entries when doing in-place update.
Patch #5 fixes the exact match condition in trie_get_next_key() and it
may skip keys when the passed key is not found in the map.
Patch #6~#7 switch from kmalloc() to bpf memory allocator for LPM trie
to fix several lock order warnings reported by syzbot. It also enables
raw_spinlock_t for LPM trie again. After these changes, the LPM trie will
be closer to being usable in any context (though the reentrance check of
trie->lock is still missing, but it is on my todo list).
Patch #8: move test_lpm_map to map_tests to make it run regularly.
Patch #9: add test cases for the issues fixed by patch #3~#5.

Please see individual patches for more details. Comments are always
welcome.

Change Log:
v3:
  * patch #2: remove the unnecessary NULL-init for im_node
  * patch #6: alloc the leaf node before disabling IRQ to low
    the possibility of -ENOMEM when leaf_size is large; Free
    these nodes outside the trie lock (Suggested by Alexei)
  * collect review and ack tags (Thanks for Toke & Daniel)

v2: https://lore.kernel.org/bpf/20241127004641.1118269-1-houtao@huaweicloud.com/
  * collect review tags (Thanks for Toke)
  * drop "Add bpf_mem_cache_is_mergeable() helper" patch
  * patch #3~#4: add fix tag
  * patch #4: rename the helper to trie_check_add_elem() and increase
    n_entries in it.
  * patch #6: use one bpf mem allocator and update commit message to
    clarify that using bpf mem allocator is more appropriate.
  * patch #7: update commit message to add the possible max running time
    for update operation.
  * patch #9: update commit message to specify the purpose of these test
    cases.

v1: https://lore.kernel.org/bpf/20241118010808.2243555-1-houtao@huaweicloud.com/
====================

Link: https://lore.kernel.org/all/20241206110622.1161752-1-houtao@huaweicloud.com/
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2024-12-06 09:14:26 -08:00
commit 509df676c2
4 changed files with 484 additions and 57 deletions

View File

@ -15,6 +15,7 @@
#include <net/ipv6.h>
#include <uapi/linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/bpf_mem_alloc.h>
/* Intermediate node */
#define LPM_TREE_NODE_FLAG_IM BIT(0)
@ -22,7 +23,6 @@
struct lpm_trie_node;
struct lpm_trie_node {
struct rcu_head rcu;
struct lpm_trie_node __rcu *child[2];
u32 prefixlen;
u32 flags;
@ -32,10 +32,11 @@ struct lpm_trie_node {
struct lpm_trie {
struct bpf_map map;
struct lpm_trie_node __rcu *root;
struct bpf_mem_alloc ma;
size_t n_entries;
size_t max_prefixlen;
size_t data_size;
spinlock_t lock;
raw_spinlock_t lock;
};
/* This trie implements a longest prefix match algorithm that can be used to
@ -287,17 +288,18 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
return found->data + trie->data_size;
}
static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
const void *value)
static struct lpm_trie_node *lpm_trie_node_alloc(struct lpm_trie *trie,
const void *value,
bool disable_migration)
{
struct lpm_trie_node *node;
size_t size = sizeof(struct lpm_trie_node) + trie->data_size;
if (value)
size += trie->map.value_size;
if (disable_migration)
migrate_disable();
node = bpf_mem_cache_alloc(&trie->ma);
if (disable_migration)
migrate_enable();
node = bpf_map_kmalloc_node(&trie->map, size, GFP_NOWAIT | __GFP_NOWARN,
trie->map.numa_node);
if (!node)
return NULL;
@ -310,12 +312,22 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
return node;
}
static int trie_check_add_elem(struct lpm_trie *trie, u64 flags)
{
if (flags == BPF_EXIST)
return -ENOENT;
if (trie->n_entries == trie->map.max_entries)
return -ENOSPC;
trie->n_entries++;
return 0;
}
/* Called from syscall or from eBPF program */
static long trie_update_elem(struct bpf_map *map,
void *_key, void *value, u64 flags)
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
struct lpm_trie_node *node, *im_node, *new_node;
struct lpm_trie_node *free_node = NULL;
struct lpm_trie_node __rcu **slot;
struct bpf_lpm_trie_key_u8 *key = _key;
@ -330,22 +342,14 @@ static long trie_update_elem(struct bpf_map *map,
if (key->prefixlen > trie->max_prefixlen)
return -EINVAL;
spin_lock_irqsave(&trie->lock, irq_flags);
/* Allocate and fill a new node. Need to disable migration before
* invoking bpf_mem_cache_alloc().
*/
new_node = lpm_trie_node_alloc(trie, value, true);
if (!new_node)
return -ENOMEM;
/* Allocate and fill a new node */
if (trie->n_entries == trie->map.max_entries) {
ret = -ENOSPC;
goto out;
}
new_node = lpm_trie_node_alloc(trie, value);
if (!new_node) {
ret = -ENOMEM;
goto out;
}
trie->n_entries++;
raw_spin_lock_irqsave(&trie->lock, irq_flags);
new_node->prefixlen = key->prefixlen;
RCU_INIT_POINTER(new_node->child[0], NULL);
@ -364,8 +368,7 @@ static long trie_update_elem(struct bpf_map *map,
matchlen = longest_prefix_match(trie, node, key);
if (node->prefixlen != matchlen ||
node->prefixlen == key->prefixlen ||
node->prefixlen == trie->max_prefixlen)
node->prefixlen == key->prefixlen)
break;
next_bit = extract_bit(key->data, node->prefixlen);
@ -376,6 +379,10 @@ static long trie_update_elem(struct bpf_map *map,
* simply assign the @new_node to that slot and be done.
*/
if (!node) {
ret = trie_check_add_elem(trie, flags);
if (ret)
goto out;
rcu_assign_pointer(*slot, new_node);
goto out;
}
@ -384,18 +391,30 @@ static long trie_update_elem(struct bpf_map *map,
* which already has the correct data array set.
*/
if (node->prefixlen == matchlen) {
if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) {
if (flags == BPF_NOEXIST) {
ret = -EEXIST;
goto out;
}
} else {
ret = trie_check_add_elem(trie, flags);
if (ret)
goto out;
}
new_node->child[0] = node->child[0];
new_node->child[1] = node->child[1];
if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
trie->n_entries--;
rcu_assign_pointer(*slot, new_node);
free_node = node;
goto out;
}
ret = trie_check_add_elem(trie, flags);
if (ret)
goto out;
/* If the new node matches the prefix completely, it must be inserted
* as an ancestor. Simply insert it between @node and *@slot.
*/
@ -406,8 +425,10 @@ static long trie_update_elem(struct bpf_map *map,
goto out;
}
im_node = lpm_trie_node_alloc(trie, NULL);
/* migration is disabled within the locked scope */
im_node = lpm_trie_node_alloc(trie, NULL, false);
if (!im_node) {
trie->n_entries--;
ret = -ENOMEM;
goto out;
}
@ -429,16 +450,13 @@ static long trie_update_elem(struct bpf_map *map,
rcu_assign_pointer(*slot, im_node);
out:
if (ret) {
if (new_node)
trie->n_entries--;
raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree(new_node);
kfree(im_node);
}
spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree_rcu(free_node, rcu);
migrate_disable();
if (ret)
bpf_mem_cache_free(&trie->ma, new_node);
bpf_mem_cache_free_rcu(&trie->ma, free_node);
migrate_enable();
return ret;
}
@ -459,7 +477,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
if (key->prefixlen > trie->max_prefixlen)
return -EINVAL;
spin_lock_irqsave(&trie->lock, irq_flags);
raw_spin_lock_irqsave(&trie->lock, irq_flags);
/* Walk the tree looking for an exact key/length match and keeping
* track of the path we traverse. We will need to know the node
@ -535,9 +553,12 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
free_node = node;
out:
spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree_rcu(free_parent, rcu);
kfree_rcu(free_node, rcu);
raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
migrate_disable();
bpf_mem_cache_free_rcu(&trie->ma, free_parent);
bpf_mem_cache_free_rcu(&trie->ma, free_node);
migrate_enable();
return ret;
}
@ -559,6 +580,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
static struct bpf_map *trie_alloc(union bpf_attr *attr)
{
struct lpm_trie *trie;
size_t leaf_size;
int err;
/* check sanity of attributes */
if (attr->max_entries == 0 ||
@ -581,9 +604,19 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
offsetof(struct bpf_lpm_trie_key_u8, data);
trie->max_prefixlen = trie->data_size * 8;
spin_lock_init(&trie->lock);
raw_spin_lock_init(&trie->lock);
/* Allocate intermediate and leaf nodes from the same allocator */
leaf_size = sizeof(struct lpm_trie_node) + trie->data_size +
trie->map.value_size;
err = bpf_mem_alloc_init(&trie->ma, leaf_size, false);
if (err)
goto free_out;
return &trie->map;
free_out:
bpf_map_area_free(trie);
return ERR_PTR(err);
}
static void trie_free(struct bpf_map *map)
@ -615,13 +648,17 @@ static void trie_free(struct bpf_map *map)
continue;
}
kfree(node);
/* No bpf program may access the map, so freeing the
* node without waiting for the extra RCU GP.
*/
bpf_mem_cache_raw_free(node);
RCU_INIT_POINTER(*slot, NULL);
break;
}
}
out:
bpf_mem_alloc_destroy(&trie->ma);
bpf_map_area_free(trie);
}
@ -633,7 +670,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
struct lpm_trie_node **node_stack = NULL;
int err = 0, stack_ptr = -1;
unsigned int next_bit;
size_t matchlen;
size_t matchlen = 0;
/* The get_next_key follows postorder. For the 4 node example in
* the top of this file, the trie_get_next_key() returns the following
@ -672,7 +709,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
next_bit = extract_bit(key->data, node->prefixlen);
node = rcu_dereference(node->child[next_bit]);
}
if (!node || node->prefixlen != key->prefixlen ||
if (!node || node->prefixlen != matchlen ||
(node->flags & LPM_TREE_NODE_FLAG_IM))
goto find_leftmost;

View File

@ -5,7 +5,6 @@ bpf-syscall*
test_verifier
test_maps
test_lru_map
test_lpm_map
test_tag
FEATURE-DUMP.libbpf
FEATURE-DUMP.selftests

View File

@ -83,7 +83,7 @@ CLANG_CPUV4 := 1
endif
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \
test_sockmap \
test_tcpnotify_user test_sysctl \
test_progs-no_alu32

View File

@ -20,10 +20,12 @@
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <endian.h>
#include <arpa/inet.h>
#include <sys/time.h>
#include <bpf/bpf.h>
#include <test_maps.h>
#include "bpf_util.h"
@ -33,6 +35,22 @@ struct tlpm_node {
uint8_t key[];
};
struct lpm_trie_bytes_key {
union {
struct bpf_lpm_trie_key_hdr hdr;
__u32 prefixlen;
};
unsigned char data[8];
};
struct lpm_trie_int_key {
union {
struct bpf_lpm_trie_key_hdr hdr;
__u32 prefixlen;
};
unsigned int data;
};
static struct tlpm_node *tlpm_match(struct tlpm_node *list,
const uint8_t *key,
size_t n_bits);
@ -223,7 +241,7 @@ static void test_lpm_map(int keysize)
n_matches = 0;
n_matches_after_delete = 0;
n_nodes = 1 << 8;
n_lookups = 1 << 16;
n_lookups = 1 << 9;
data = alloca(keysize);
memset(data, 0, keysize);
@ -770,16 +788,385 @@ static void test_lpm_multi_thread(void)
close(map_fd);
}
int main(void)
static int lpm_trie_create(unsigned int key_size, unsigned int value_size, unsigned int max_entries)
{
LIBBPF_OPTS(bpf_map_create_opts, opts);
int fd;
opts.map_flags = BPF_F_NO_PREALLOC;
fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie", key_size, value_size, max_entries,
&opts);
CHECK(fd < 0, "bpf_map_create", "error %d\n", errno);
return fd;
}
static void test_lpm_trie_update_flags(void)
{
struct lpm_trie_int_key key;
unsigned int value, got;
int fd, err;
fd = lpm_trie_create(sizeof(key), sizeof(value), 3);
/* invalid flags (Error) */
key.prefixlen = 32;
key.data = 0;
value = 0;
err = bpf_map_update_elem(fd, &key, &value, BPF_F_LOCK);
CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err);
/* invalid flags (Error) */
key.prefixlen = 32;
key.data = 0;
value = 0;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST | BPF_EXIST);
CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err);
/* overwrite an empty qp-trie (Error) */
key.prefixlen = 32;
key.data = 0;
value = 2;
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
CHECK(err != -ENOENT, "overwrite empty qp-trie", "error %d\n", err);
/* add a new node */
key.prefixlen = 16;
key.data = 0;
value = 1;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* add the same node as new node (Error) */
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err != -EEXIST, "add new elem again", "error %d\n", err);
/* overwrite the existed node */
value = 4;
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
CHECK(err, "overwrite elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* overwrite the node */
value = 1;
err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
CHECK(err, "update elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* overwrite a non-existent node which is the prefix of the first
* node (Error).
*/
key.prefixlen = 8;
key.data = 0;
value = 2;
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err);
/* add a new node which is the prefix of the first node */
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup key", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* add another new node which will be the sibling of the first node */
key.prefixlen = 9;
key.data = htobe32(1 << 23);
value = 5;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup key", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* overwrite the third node */
value = 3;
err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
CHECK(err, "overwrite elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup key", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* delete the second node to make it an intermediate node */
key.prefixlen = 8;
key.data = 0;
err = bpf_map_delete_elem(fd, &key);
CHECK(err, "del elem", "error %d\n", err);
/* overwrite the intermediate node (Error) */
value = 2;
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err);
close(fd);
}
static void test_lpm_trie_update_full_map(void)
{
struct lpm_trie_int_key key;
int value, got;
int fd, err;
fd = lpm_trie_create(sizeof(key), sizeof(value), 3);
/* add a new node */
key.prefixlen = 16;
key.data = 0;
value = 0;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* add new node */
key.prefixlen = 8;
key.data = 0;
value = 1;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* add new node */
key.prefixlen = 9;
key.data = htobe32(1 << 23);
value = 2;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* try to add more node (Error) */
key.prefixlen = 32;
key.data = 0;
value = 3;
err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
CHECK(err != -ENOSPC, "add to full trie", "error %d\n", err);
/* update the value of an existed node with BPF_EXIST */
key.prefixlen = 16;
key.data = 0;
value = 4;
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
CHECK(err, "overwrite elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* update the value of an existed node with BPF_ANY */
key.prefixlen = 9;
key.data = htobe32(1 << 23);
value = 5;
err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
CHECK(err, "overwrite elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
close(fd);
}
static int cmp_str(const void *a, const void *b)
{
const char *str_a = *(const char **)a, *str_b = *(const char **)b;
return strcmp(str_a, str_b);
}
/* Save strings in LPM trie. The trailing '\0' for each string will be
* accounted in the prefixlen. The strings returned during the iteration
* should be sorted as expected.
*/
static void test_lpm_trie_iterate_strs(void)
{
static const char * const keys[] = {
"ab", "abO", "abc", "abo", "abS", "abcd",
};
const char *sorted_keys[ARRAY_SIZE(keys)];
struct lpm_trie_bytes_key key, next_key;
unsigned int value, got, i, j, len;
struct lpm_trie_bytes_key *cur;
int fd, err;
fd = lpm_trie_create(sizeof(key), sizeof(value), ARRAY_SIZE(keys));
for (i = 0; i < ARRAY_SIZE(keys); i++) {
unsigned int flags;
/* add i-th element */
flags = i % 2 ? BPF_NOEXIST : 0;
len = strlen(keys[i]);
/* include the trailing '\0' */
key.prefixlen = (len + 1) * 8;
memset(key.data, 0, sizeof(key.data));
memcpy(key.data, keys[i], len);
value = i + 100;
err = bpf_map_update_elem(fd, &key, &value, flags);
CHECK(err, "add elem", "#%u error %d\n", i, err);
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "#%u error %d\n", i, err);
CHECK(got != value, "lookup elem", "#%u expect %u got %u\n", i, value, got);
/* re-add i-th element (Error) */
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err != -EEXIST, "re-add elem", "#%u error %d\n", i, err);
/* Overwrite i-th element */
flags = i % 2 ? 0 : BPF_EXIST;
value = i;
err = bpf_map_update_elem(fd, &key, &value, flags);
CHECK(err, "update elem", "error %d\n", err);
/* Lookup #[0~i] elements */
for (j = 0; j <= i; j++) {
len = strlen(keys[j]);
key.prefixlen = (len + 1) * 8;
memset(key.data, 0, sizeof(key.data));
memcpy(key.data, keys[j], len);
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "#%u/%u error %d\n", i, j, err);
CHECK(got != j, "lookup elem", "#%u/%u expect %u got %u\n",
i, j, value, got);
}
}
/* Add element to a full qp-trie (Error) */
key.prefixlen = sizeof(key.data) * 8;
memset(key.data, 0, sizeof(key.data));
value = 0;
err = bpf_map_update_elem(fd, &key, &value, 0);
CHECK(err != -ENOSPC, "add to full qp-trie", "error %d\n", err);
/* Iterate sorted elements: no deletion */
memcpy(sorted_keys, keys, sizeof(keys));
qsort(sorted_keys, ARRAY_SIZE(sorted_keys), sizeof(sorted_keys[0]), cmp_str);
cur = NULL;
for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) {
len = strlen(sorted_keys[i]);
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err, "iterate", "#%u error %d\n", i, err);
CHECK(next_key.prefixlen != (len + 1) * 8, "iterate",
"#%u invalid len %u expect %u\n",
i, next_key.prefixlen, (len + 1) * 8);
CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate",
"#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]);
cur = &next_key;
}
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err != -ENOENT, "more element", "error %d\n", err);
/* Iterate sorted elements: delete the found key after each iteration */
cur = NULL;
for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) {
len = strlen(sorted_keys[i]);
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err, "iterate", "#%u error %d\n", i, err);
CHECK(next_key.prefixlen != (len + 1) * 8, "iterate",
"#%u invalid len %u expect %u\n",
i, next_key.prefixlen, (len + 1) * 8);
CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate",
"#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]);
cur = &next_key;
err = bpf_map_delete_elem(fd, cur);
CHECK(err, "delete", "#%u error %d\n", i, err);
}
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err != -ENOENT, "non-empty qp-trie", "error %d\n", err);
close(fd);
}
/* Use the fixed prefixlen (32) and save integers in LPM trie. The iteration of
* LPM trie will return these integers in big-endian order, therefore, convert
* these integers to big-endian before update. After each iteration, delete the
* found key (the smallest integer) and expect the next iteration will return
* the second smallest number.
*/
static void test_lpm_trie_iterate_ints(void)
{
struct lpm_trie_int_key key, next_key;
unsigned int i, max_entries;
struct lpm_trie_int_key *cur;
unsigned int *data_set;
int fd, err;
bool value;
max_entries = 4096;
data_set = calloc(max_entries, sizeof(*data_set));
CHECK(!data_set, "malloc", "no mem\n");
for (i = 0; i < max_entries; i++)
data_set[i] = i;
fd = lpm_trie_create(sizeof(key), sizeof(value), max_entries);
value = true;
for (i = 0; i < max_entries; i++) {
key.prefixlen = 32;
key.data = htobe32(data_set[i]);
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add elem", "#%u error %d\n", i, err);
}
cur = NULL;
for (i = 0; i < max_entries; i++) {
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err, "iterate", "#%u error %d\n", i, err);
CHECK(next_key.prefixlen != 32, "iterate", "#%u invalid len %u\n",
i, next_key.prefixlen);
CHECK(be32toh(next_key.data) != data_set[i], "iterate", "#%u got 0x%x exp 0x%x\n",
i, be32toh(next_key.data), data_set[i]);
cur = &next_key;
/*
* Delete the minimal key, the next call of bpf_get_next_key()
* will return the second minimal key.
*/
err = bpf_map_delete_elem(fd, &next_key);
CHECK(err, "del elem", "#%u elem error %d\n", i, err);
}
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err != -ENOENT, "more element", "error %d\n", err);
err = bpf_map_get_next_key(fd, NULL, &next_key);
CHECK(err != -ENOENT, "no-empty qp-trie", "error %d\n", err);
free(data_set);
close(fd);
}
void test_lpm_trie_map_basic_ops(void)
{
int i;
/* we want predictable, pseudo random tests */
srand(0xf00ba1);
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
test_lpm_basic();
test_lpm_order();
@ -792,6 +1179,10 @@ int main(void)
test_lpm_get_next_key();
test_lpm_multi_thread();
printf("test_lpm: OK\n");
return 0;
test_lpm_trie_update_flags();
test_lpm_trie_update_full_map();
test_lpm_trie_iterate_strs();
test_lpm_trie_iterate_ints();
printf("%s: PASS\n", __func__);
}