Merge branch 'fix-lockdep-warning-for-htab-of-map'

Hou Tao says:

====================
The patch set fixes a lockdep warning for htab of map. The
warning is found when running test_maps. The warning occurs when
htab_put_fd_value() attempts to acquire map_idr_lock to free the map id
of the inner map while already holding the bucket lock (raw_spinlock_t).

The fix moves the invocation of free_htab_elem() after
htab_unlock_bucket() and adds a test case to verify the solution.
====================

Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
This commit is contained in:
Alexei Starovoitov 2024-11-08 11:40:16 -08:00 committed by Andrii Nakryiko
commit 266a557981
9 changed files with 203 additions and 38 deletions

View File

@ -896,9 +896,12 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{
check_and_free_fields(htab, l);
migrate_disable();
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
bpf_mem_cache_free(&htab->ma, l);
migrate_enable();
}
static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
@ -948,7 +951,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
if (htab_is_prealloc(htab)) {
bpf_map_dec_elem_count(&htab->map);
check_and_free_fields(htab, l);
__pcpu_freelist_push(&htab->freelist, &l->fnode);
pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
dec_elem_count(htab);
htab_elem_free(htab, l);
@ -1018,7 +1021,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
*/
pl_new = this_cpu_ptr(htab->extra_elems);
l_new = *pl_new;
htab_put_fd_value(htab, old_elem);
*pl_new = old_elem;
} else {
struct pcpu_freelist_node *l;
@ -1105,6 +1107,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
struct htab_elem *l_new = NULL, *l_old;
struct hlist_nulls_head *head;
unsigned long flags;
void *old_map_ptr;
struct bucket *b;
u32 key_size, hash;
int ret;
@ -1183,12 +1186,27 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
hlist_nulls_del_rcu(&l_old->hash_node);
if (!htab_is_prealloc(htab))
free_htab_elem(htab, l_old);
else
/* l_old has already been stashed in htab->extra_elems, free
* its special fields before it is available for reuse. Also
* save the old map pointer in htab of maps before unlock
* and release it after unlock.
*/
old_map_ptr = NULL;
if (htab_is_prealloc(htab)) {
if (map->ops->map_fd_put_ptr)
old_map_ptr = fd_htab_map_get_ptr(map, l_old);
check_and_free_fields(htab, l_old);
}
ret = 0;
}
htab_unlock_bucket(htab, b, hash, flags);
if (l_old) {
if (old_map_ptr)
map->ops->map_fd_put_ptr(map, old_map_ptr, true);
if (!htab_is_prealloc(htab))
free_htab_elem(htab, l_old);
}
return 0;
err:
htab_unlock_bucket(htab, b, hash, flags);
return ret;
@ -1432,15 +1450,15 @@ static long htab_map_delete_elem(struct bpf_map *map, void *key)
return ret;
l = lookup_elem_raw(head, hash, key, key_size);
if (l) {
if (l)
hlist_nulls_del_rcu(&l->hash_node);
free_htab_elem(htab, l);
} else {
else
ret = -ENOENT;
}
htab_unlock_bucket(htab, b, hash, flags);
if (l)
free_htab_elem(htab, l);
return ret;
}
@ -1853,13 +1871,14 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
* may cause deadlock. See comments in function
* prealloc_lru_pop(). Let us do bpf_lru_push_free()
* after releasing the bucket lock.
*
* For htab of maps, htab_put_fd_value() in
* free_htab_elem() may acquire a spinlock with bucket
* lock being held and it violates the lock rule, so
* invoke free_htab_elem() after unlock as well.
*/
if (is_lru_map) {
l->batch_flink = node_to_free;
node_to_free = l;
} else {
free_htab_elem(htab, l);
}
}
dst_key += key_size;
dst_val += value_size;
@ -1871,7 +1890,10 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
while (node_to_free) {
l = node_to_free;
node_to_free = node_to_free->batch_flink;
if (is_lru_map)
htab_lru_push_free(htab, l);
else
free_htab_elem(htab, l);
}
next_batch:

View File

@ -67,5 +67,8 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
#define sys_gettid() syscall(SYS_gettid)
#endif
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
#endif /* __BPF_UTIL__ */

View File

@ -16,10 +16,6 @@
#include "tcp_ca_kfunc.skel.h"
#include "bpf_cc_cubic.skel.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
static const unsigned int total_bytes = 10 * 1024 * 1024;
static int expected_stg = 0xeB9F;

View File

@ -10,10 +10,6 @@
#include "cgroup_helpers.h"
#include "network_helpers.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
static struct btf *btf;
static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func)

View File

@ -5,7 +5,9 @@
#include <sys/syscall.h>
#include <test_progs.h>
#include <bpf/btf.h>
#include "access_map_in_map.skel.h"
#include "update_map_in_htab.skel.h"
struct thread_ctx {
pthread_barrier_t barrier;
@ -127,6 +129,131 @@ static void test_map_in_map_access(const char *prog_name, const char *map_name)
access_map_in_map__destroy(skel);
}
static void add_del_fd_htab(int outer_fd)
{
int inner_fd, err;
int key = 1;
inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL);
if (!ASSERT_OK_FD(inner_fd, "inner1"))
return;
err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST);
close(inner_fd);
if (!ASSERT_OK(err, "add"))
return;
/* Delete */
err = bpf_map_delete_elem(outer_fd, &key);
ASSERT_OK(err, "del");
}
static void overwrite_fd_htab(int outer_fd)
{
int inner_fd, err;
int key = 1;
inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL);
if (!ASSERT_OK_FD(inner_fd, "inner1"))
return;
err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST);
close(inner_fd);
if (!ASSERT_OK(err, "add"))
return;
/* Overwrite */
inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr2", 4, 4, 1, NULL);
if (!ASSERT_OK_FD(inner_fd, "inner2"))
goto out;
err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_EXIST);
close(inner_fd);
if (!ASSERT_OK(err, "overwrite"))
goto out;
err = bpf_map_delete_elem(outer_fd, &key);
ASSERT_OK(err, "del");
return;
out:
bpf_map_delete_elem(outer_fd, &key);
}
static void lookup_delete_fd_htab(int outer_fd)
{
int key = 1, value;
int inner_fd, err;
inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL);
if (!ASSERT_OK_FD(inner_fd, "inner1"))
return;
err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST);
close(inner_fd);
if (!ASSERT_OK(err, "add"))
return;
/* lookup_and_delete is not supported for htab of maps */
err = bpf_map_lookup_and_delete_elem(outer_fd, &key, &value);
ASSERT_EQ(err, -ENOTSUPP, "lookup_del");
err = bpf_map_delete_elem(outer_fd, &key);
ASSERT_OK(err, "del");
}
static void batched_lookup_delete_fd_htab(int outer_fd)
{
int keys[2] = {1, 2}, values[2];
unsigned int cnt, batch;
int inner_fd, err;
inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL);
if (!ASSERT_OK_FD(inner_fd, "inner1"))
return;
err = bpf_map_update_elem(outer_fd, &keys[0], &inner_fd, BPF_NOEXIST);
close(inner_fd);
if (!ASSERT_OK(err, "add1"))
return;
inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr2", 4, 4, 1, NULL);
if (!ASSERT_OK_FD(inner_fd, "inner2"))
goto out;
err = bpf_map_update_elem(outer_fd, &keys[1], &inner_fd, BPF_NOEXIST);
close(inner_fd);
if (!ASSERT_OK(err, "add2"))
goto out;
/* batched lookup_and_delete */
cnt = ARRAY_SIZE(keys);
err = bpf_map_lookup_and_delete_batch(outer_fd, NULL, &batch, keys, values, &cnt, NULL);
ASSERT_TRUE((!err || err == -ENOENT), "delete_batch ret");
ASSERT_EQ(cnt, ARRAY_SIZE(keys), "delete_batch cnt");
out:
bpf_map_delete_elem(outer_fd, &keys[0]);
}
static void test_update_map_in_htab(bool preallocate)
{
struct update_map_in_htab *skel;
int err, fd;
skel = update_map_in_htab__open();
if (!ASSERT_OK_PTR(skel, "open"))
return;
err = update_map_in_htab__load(skel);
if (!ASSERT_OK(err, "load"))
goto out;
fd = preallocate ? bpf_map__fd(skel->maps.outer_htab_map) :
bpf_map__fd(skel->maps.outer_alloc_htab_map);
add_del_fd_htab(fd);
overwrite_fd_htab(fd);
lookup_delete_fd_htab(fd);
batched_lookup_delete_fd_htab(fd);
out:
update_map_in_htab__destroy(skel);
}
void test_map_in_map(void)
{
if (test__start_subtest("acc_map_in_array"))
@ -137,5 +264,8 @@ void test_map_in_map(void)
test_map_in_map_access("access_map_in_htab", "outer_htab_map");
if (test__start_subtest("sleepable_acc_map_in_htab"))
test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map");
if (test__start_subtest("update_map_in_htab"))
test_update_map_in_htab(true);
if (test__start_subtest("update_map_in_alloc_htab"))
test_update_map_in_htab(false);
}

View File

@ -23,10 +23,6 @@
#include "getpeername_unix_prog.skel.h"
#include "network_helpers.h"
#ifndef ENOTSUPP
# define ENOTSUPP 524
#endif
#define TEST_NS "sock_addr"
#define TEST_IF_PREFIX "test_sock_addr"
#define TEST_IPV4 "127.0.0.4"

View File

@ -0,0 +1,30 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
struct inner_map_type {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(key_size, 4);
__uint(value_size, 4);
__uint(max_entries, 1);
} inner_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
__type(key, int);
__type(value, int);
__uint(max_entries, 2);
__array(values, struct inner_map_type);
} outer_htab_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, int);
__uint(max_entries, 2);
__array(values, struct inner_map_type);
} outer_alloc_htab_map SEC(".maps");
char _license[] SEC("license") = "GPL";

View File

@ -26,10 +26,6 @@
#include "test_maps.h"
#include "testing_helpers.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
int skips;
static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) };

View File

@ -42,10 +42,6 @@
#include "../../../include/linux/filter.h"
#include "testing_helpers.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
#define MAX_INSNS BPF_MAXINSNS
#define MAX_EXPECTED_INSNS 32
#define MAX_UNEXPECTED_INSNS 32