BPF fixes:

- Fix several issues for BPF LPM trie map which were found by
   syzbot and during addition of new test cases (Hou Tao)
 
 - Fix a missing process_iter_arg register type check in the
   BPF verifier (Kumar Kartikeya Dwivedi, Tao Lyu)
 
 - Fix several correctness gaps in the BPF verifier when
   interacting with the BPF stack without CAP_PERFMON
   (Kumar Kartikeya Dwivedi, Eduard Zingerman, Tao Lyu)
 
 - Fix OOB BPF map writes when deleting elements for the case of
   xsk map as well as devmap (Maciej Fijalkowski)
 
 - Fix xsk sockets to always clear DMA mapping information when
   unmapping the pool (Larysa Zaremba)
 
 - Fix sk_mem_uncharge logic in tcp_bpf_sendmsg to only uncharge
   after sent bytes have been finalized (Zijian Zhang)
 
 - Fix BPF sockmap with vsocks which was missing a queue check
   in poll and sockmap cleanup on close (Michal Luczaj)
 
 - Fix tools infra to override makefile ARCH variable if defined
   but empty, which addresses cross-building tools. (Björn Töpel)
 
 - Fix two resolve_btfids build warnings on unresolved bpf_lsm
   symbols (Thomas Weißschuh)
 
 - Fix a NULL pointer dereference in bpftool (Amir Mohammadi)
 
 - Fix BPF selftests to check for CONFIG_PREEMPTION instead of
   CONFIG_PREEMPT (Sebastian Andrzej Siewior)
 
 Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
 -----BEGIN PGP SIGNATURE-----
 
 iIsEABYKADMWIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZ1N8bhUcZGFuaWVsQGlv
 Z2VhcmJveC5uZXQACgkQ2yufC7HISIO6ZAD+ITpujJgxvFGC0R7E9o3XJ7V1SpmR
 SlW0lGpj6vOHTUAA/2MRoZurJSTbdT3fbWiCUgU1rMcwkoErkyxUaPuBci0D
 =kgXL
 -----END PGP SIGNATURE-----

Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Pull bpf fixes from Daniel Borkmann::

 - Fix several issues for BPF LPM trie map which were found by syzbot
   and during addition of new test cases (Hou Tao)

 - Fix a missing process_iter_arg register type check in the BPF
   verifier (Kumar Kartikeya Dwivedi, Tao Lyu)

 - Fix several correctness gaps in the BPF verifier when interacting
   with the BPF stack without CAP_PERFMON (Kumar Kartikeya Dwivedi,
   Eduard Zingerman, Tao Lyu)

 - Fix OOB BPF map writes when deleting elements for the case of xsk map
   as well as devmap (Maciej Fijalkowski)

 - Fix xsk sockets to always clear DMA mapping information when
   unmapping the pool (Larysa Zaremba)

 - Fix sk_mem_uncharge logic in tcp_bpf_sendmsg to only uncharge after
   sent bytes have been finalized (Zijian Zhang)

 - Fix BPF sockmap with vsocks which was missing a queue check in poll
   and sockmap cleanup on close (Michal Luczaj)

 - Fix tools infra to override makefile ARCH variable if defined but
   empty, which addresses cross-building tools. (Björn Töpel)

 - Fix two resolve_btfids build warnings on unresolved bpf_lsm symbols
   (Thomas Weißschuh)

 - Fix a NULL pointer dereference in bpftool (Amir Mohammadi)

 - Fix BPF selftests to check for CONFIG_PREEMPTION instead of
   CONFIG_PREEMPT (Sebastian Andrzej Siewior)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: (31 commits)
  selftests/bpf: Add more test cases for LPM trie
  selftests/bpf: Move test_lpm_map.c to map_tests
  bpf: Use raw_spinlock_t for LPM trie
  bpf: Switch to bpf mem allocator for LPM trie
  bpf: Fix exact match conditions in trie_get_next_key()
  bpf: Handle in-place update for full LPM trie correctly
  bpf: Handle BPF_EXIST and BPF_NOEXIST for LPM trie
  bpf: Remove unnecessary kfree(im_node) in lpm_trie_update_elem
  bpf: Remove unnecessary check when updating LPM trie
  selftests/bpf: Add test for narrow spill into 64-bit spilled scalar
  selftests/bpf: Add test for reading from STACK_INVALID slots
  selftests/bpf: Introduce __caps_unpriv annotation for tests
  bpf: Fix narrow scalar spill onto 64-bit spilled scalar slots
  bpf: Don't mark STACK_INVALID as STACK_MISC in mark_stack_slot_misc
  samples/bpf: Remove unnecessary -I flags from libbpf EXTRA_CFLAGS
  bpf: Zero index arg error string for dynptr and iter
  selftests/bpf: Add tests for iter arg check
  bpf: Ensure reg is PTR_TO_STACK in process_iter_arg
  tools: Override makefile ARCH variable if defined, but empty
  selftests/bpf: Add apply_bytes test to test_txmsg_redir_wait_sndmem in test_sockmap
  ...
This commit is contained in:
Linus Torvalds 2024-12-06 15:07:48 -08:00
commit b5f217084a
31 changed files with 813 additions and 174 deletions

View File

@ -375,8 +375,6 @@ BTF_ID(func, bpf_lsm_socket_socketpair)
BTF_ID(func, bpf_lsm_syslog)
BTF_ID(func, bpf_lsm_task_alloc)
BTF_ID(func, bpf_lsm_current_getsecid_subj)
BTF_ID(func, bpf_lsm_task_getsecid_obj)
BTF_ID(func, bpf_lsm_task_prctl)
BTF_ID(func, bpf_lsm_task_setscheduler)
BTF_ID(func, bpf_lsm_task_to_inode)

View File

@ -184,7 +184,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
static void dev_map_free(struct bpf_map *map)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
int i;
u32 i;
/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
* so the programs (can be more than one that used this map) were
@ -821,7 +821,7 @@ static long dev_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *old_dev;
int k = *(u32 *)key;
u32 k = *(u32 *)key;
if (k >= map->max_entries)
return -EINVAL;
@ -838,7 +838,7 @@ static long dev_map_hash_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *old_dev;
int k = *(u32 *)key;
u32 k = *(u32 *)key;
unsigned long flags;
int ret = -ENOENT;

View File

@ -15,6 +15,7 @@
#include <net/ipv6.h>
#include <uapi/linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/bpf_mem_alloc.h>
/* Intermediate node */
#define LPM_TREE_NODE_FLAG_IM BIT(0)
@ -22,7 +23,6 @@
struct lpm_trie_node;
struct lpm_trie_node {
struct rcu_head rcu;
struct lpm_trie_node __rcu *child[2];
u32 prefixlen;
u32 flags;
@ -32,10 +32,11 @@ struct lpm_trie_node {
struct lpm_trie {
struct bpf_map map;
struct lpm_trie_node __rcu *root;
struct bpf_mem_alloc ma;
size_t n_entries;
size_t max_prefixlen;
size_t data_size;
spinlock_t lock;
raw_spinlock_t lock;
};
/* This trie implements a longest prefix match algorithm that can be used to
@ -287,17 +288,18 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
return found->data + trie->data_size;
}
static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
const void *value)
static struct lpm_trie_node *lpm_trie_node_alloc(struct lpm_trie *trie,
const void *value,
bool disable_migration)
{
struct lpm_trie_node *node;
size_t size = sizeof(struct lpm_trie_node) + trie->data_size;
if (value)
size += trie->map.value_size;
if (disable_migration)
migrate_disable();
node = bpf_mem_cache_alloc(&trie->ma);
if (disable_migration)
migrate_enable();
node = bpf_map_kmalloc_node(&trie->map, size, GFP_NOWAIT | __GFP_NOWARN,
trie->map.numa_node);
if (!node)
return NULL;
@ -310,12 +312,22 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
return node;
}
static int trie_check_add_elem(struct lpm_trie *trie, u64 flags)
{
if (flags == BPF_EXIST)
return -ENOENT;
if (trie->n_entries == trie->map.max_entries)
return -ENOSPC;
trie->n_entries++;
return 0;
}
/* Called from syscall or from eBPF program */
static long trie_update_elem(struct bpf_map *map,
void *_key, void *value, u64 flags)
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
struct lpm_trie_node *node, *im_node, *new_node;
struct lpm_trie_node *free_node = NULL;
struct lpm_trie_node __rcu **slot;
struct bpf_lpm_trie_key_u8 *key = _key;
@ -330,22 +342,14 @@ static long trie_update_elem(struct bpf_map *map,
if (key->prefixlen > trie->max_prefixlen)
return -EINVAL;
spin_lock_irqsave(&trie->lock, irq_flags);
/* Allocate and fill a new node. Need to disable migration before
* invoking bpf_mem_cache_alloc().
*/
new_node = lpm_trie_node_alloc(trie, value, true);
if (!new_node)
return -ENOMEM;
/* Allocate and fill a new node */
if (trie->n_entries == trie->map.max_entries) {
ret = -ENOSPC;
goto out;
}
new_node = lpm_trie_node_alloc(trie, value);
if (!new_node) {
ret = -ENOMEM;
goto out;
}
trie->n_entries++;
raw_spin_lock_irqsave(&trie->lock, irq_flags);
new_node->prefixlen = key->prefixlen;
RCU_INIT_POINTER(new_node->child[0], NULL);
@ -364,8 +368,7 @@ static long trie_update_elem(struct bpf_map *map,
matchlen = longest_prefix_match(trie, node, key);
if (node->prefixlen != matchlen ||
node->prefixlen == key->prefixlen ||
node->prefixlen == trie->max_prefixlen)
node->prefixlen == key->prefixlen)
break;
next_bit = extract_bit(key->data, node->prefixlen);
@ -376,6 +379,10 @@ static long trie_update_elem(struct bpf_map *map,
* simply assign the @new_node to that slot and be done.
*/
if (!node) {
ret = trie_check_add_elem(trie, flags);
if (ret)
goto out;
rcu_assign_pointer(*slot, new_node);
goto out;
}
@ -384,18 +391,30 @@ static long trie_update_elem(struct bpf_map *map,
* which already has the correct data array set.
*/
if (node->prefixlen == matchlen) {
if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) {
if (flags == BPF_NOEXIST) {
ret = -EEXIST;
goto out;
}
} else {
ret = trie_check_add_elem(trie, flags);
if (ret)
goto out;
}
new_node->child[0] = node->child[0];
new_node->child[1] = node->child[1];
if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
trie->n_entries--;
rcu_assign_pointer(*slot, new_node);
free_node = node;
goto out;
}
ret = trie_check_add_elem(trie, flags);
if (ret)
goto out;
/* If the new node matches the prefix completely, it must be inserted
* as an ancestor. Simply insert it between @node and *@slot.
*/
@ -406,8 +425,10 @@ static long trie_update_elem(struct bpf_map *map,
goto out;
}
im_node = lpm_trie_node_alloc(trie, NULL);
/* migration is disabled within the locked scope */
im_node = lpm_trie_node_alloc(trie, NULL, false);
if (!im_node) {
trie->n_entries--;
ret = -ENOMEM;
goto out;
}
@ -429,16 +450,13 @@ static long trie_update_elem(struct bpf_map *map,
rcu_assign_pointer(*slot, im_node);
out:
if (ret) {
if (new_node)
trie->n_entries--;
raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree(new_node);
kfree(im_node);
}
spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree_rcu(free_node, rcu);
migrate_disable();
if (ret)
bpf_mem_cache_free(&trie->ma, new_node);
bpf_mem_cache_free_rcu(&trie->ma, free_node);
migrate_enable();
return ret;
}
@ -459,7 +477,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
if (key->prefixlen > trie->max_prefixlen)
return -EINVAL;
spin_lock_irqsave(&trie->lock, irq_flags);
raw_spin_lock_irqsave(&trie->lock, irq_flags);
/* Walk the tree looking for an exact key/length match and keeping
* track of the path we traverse. We will need to know the node
@ -535,9 +553,12 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
free_node = node;
out:
spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree_rcu(free_parent, rcu);
kfree_rcu(free_node, rcu);
raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
migrate_disable();
bpf_mem_cache_free_rcu(&trie->ma, free_parent);
bpf_mem_cache_free_rcu(&trie->ma, free_node);
migrate_enable();
return ret;
}
@ -559,6 +580,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
static struct bpf_map *trie_alloc(union bpf_attr *attr)
{
struct lpm_trie *trie;
size_t leaf_size;
int err;
/* check sanity of attributes */
if (attr->max_entries == 0 ||
@ -581,9 +604,19 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
offsetof(struct bpf_lpm_trie_key_u8, data);
trie->max_prefixlen = trie->data_size * 8;
spin_lock_init(&trie->lock);
raw_spin_lock_init(&trie->lock);
/* Allocate intermediate and leaf nodes from the same allocator */
leaf_size = sizeof(struct lpm_trie_node) + trie->data_size +
trie->map.value_size;
err = bpf_mem_alloc_init(&trie->ma, leaf_size, false);
if (err)
goto free_out;
return &trie->map;
free_out:
bpf_map_area_free(trie);
return ERR_PTR(err);
}
static void trie_free(struct bpf_map *map)
@ -615,13 +648,17 @@ static void trie_free(struct bpf_map *map)
continue;
}
kfree(node);
/* No bpf program may access the map, so freeing the
* node without waiting for the extra RCU GP.
*/
bpf_mem_cache_raw_free(node);
RCU_INIT_POINTER(*slot, NULL);
break;
}
}
out:
bpf_mem_alloc_destroy(&trie->ma);
bpf_map_area_free(trie);
}
@ -633,7 +670,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
struct lpm_trie_node **node_stack = NULL;
int err = 0, stack_ptr = -1;
unsigned int next_bit;
size_t matchlen;
size_t matchlen = 0;
/* The get_next_key follows postorder. For the 4 node example in
* the top of this file, the trie_get_next_key() returns the following
@ -672,7 +709,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
next_bit = extract_bit(key->data, node->prefixlen);
node = rcu_dereference(node->child[next_bit]);
}
if (!node || node->prefixlen != key->prefixlen ||
if (!node || node->prefixlen != matchlen ||
(node->flags & LPM_TREE_NODE_FLAG_IM))
goto find_leftmost;

View File

@ -1202,14 +1202,17 @@ static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
/* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
* case they are equivalent, or it's STACK_ZERO, in which case we preserve
* more precise STACK_ZERO.
* Note, in uprivileged mode leaving STACK_INVALID is wrong, so we take
* env->allow_ptr_leaks into account and force STACK_MISC, if necessary.
* Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
* mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
* unnecessary as both are considered equivalent when loading data and pruning,
* in case of unprivileged mode it will be incorrect to allow reads of invalid
* slots.
*/
static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
{
if (*stype == STACK_ZERO)
return;
if (env->allow_ptr_leaks && *stype == STACK_INVALID)
if (*stype == STACK_INVALID)
return;
*stype = STACK_MISC;
}
@ -4700,6 +4703,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
*/
if (!env->allow_ptr_leaks &&
is_spilled_reg(&state->stack[spi]) &&
!is_spilled_scalar_reg(&state->stack[spi]) &&
size != BPF_REG_SIZE) {
verbose(env, "attempt to corrupt spilled pointer on stack\n");
return -EACCES;
@ -8071,7 +8075,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
verbose(env,
"arg#%d expected pointer to stack or const struct bpf_dynptr\n",
regno);
regno - 1);
return -EINVAL;
}
@ -8125,7 +8129,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
if (!is_dynptr_reg_valid_init(env, reg)) {
verbose(env,
"Expected an initialized dynptr as arg #%d\n",
regno);
regno - 1);
return -EINVAL;
}
@ -8133,7 +8137,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
verbose(env,
"Expected a dynptr of type %s as arg #%d\n",
dynptr_type_str(arg_to_dynptr_type(arg_type)), regno);
dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1);
return -EINVAL;
}
@ -8189,6 +8193,11 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
const struct btf_type *t;
int spi, err, i, nr_slots, btf_id;
if (reg->type != PTR_TO_STACK) {
verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1);
return -EINVAL;
}
/* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
* ensures struct convention, so we wouldn't need to do any BTF
* validation here. But given iter state can be passed as a parameter
@ -8197,7 +8206,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
*/
btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
if (btf_id < 0) {
verbose(env, "expected valid iter pointer as arg #%d\n", regno);
verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1);
return -EINVAL;
}
t = btf_type_by_id(meta->btf, btf_id);
@ -8207,7 +8216,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
/* bpf_iter_<type>_new() expects pointer to uninit iter state */
if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
verbose(env, "expected uninitialized iter_%s as arg #%d\n",
iter_type_str(meta->btf, btf_id), regno);
iter_type_str(meta->btf, btf_id), regno - 1);
return -EINVAL;
}
@ -8231,7 +8240,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
break;
case -EINVAL:
verbose(env, "expected an initialized iter_%s as arg #%d\n",
iter_type_str(meta->btf, btf_id), regno);
iter_type_str(meta->btf, btf_id), regno - 1);
return err;
case -EPROTO:
verbose(env, "expected an RCU CS when using %s\n", meta->func_name);

View File

@ -441,7 +441,6 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
cork = true;
psock->cork = NULL;
}
sk_msg_return(sk, msg, tosend);
release_sock(sk);
origsize = msg->sg.size;
@ -453,8 +452,9 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
sock_put(sk_redir);
lock_sock(sk);
sk_mem_uncharge(sk, sent);
if (unlikely(ret < 0)) {
int free = sk_msg_free_nocharge(sk, msg);
int free = sk_msg_free(sk, msg);
if (!cork)
*copied -= free;
@ -468,7 +468,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
break;
case __SK_DROP:
default:
sk_msg_free_partial(sk, msg, tosend);
sk_msg_free(sk, msg);
sk_msg_apply_bytes(psock, tosend);
*copied -= (tosend + delta);
return -EACCES;
@ -484,12 +484,9 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
}
if (msg &&
msg->sg.data[msg->sg.start].page_link &&
msg->sg.data[msg->sg.start].length) {
if (eval == __SK_REDIRECT)
sk_mem_charge(sk, tosend - sent);
msg->sg.data[msg->sg.start].length)
goto more_data;
}
}
return ret;
}

View File

@ -117,12 +117,14 @@
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
static void vsock_sk_destruct(struct sock *sk);
static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
static void vsock_close(struct sock *sk, long timeout);
/* Protocol family. */
struct proto vsock_proto = {
.name = "AF_VSOCK",
.owner = THIS_MODULE,
.obj_size = sizeof(struct vsock_sock),
.close = vsock_close,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = vsock_bpf_update_proto,
#endif
@ -797,9 +799,8 @@ static bool sock_type_connectible(u16 type)
static void __vsock_release(struct sock *sk, int level)
{
if (sk) {
struct sock *pending;
struct vsock_sock *vsk;
struct sock *pending;
vsk = vsock_sk(sk);
pending = NULL; /* Compiler warning. */
@ -829,7 +830,6 @@ static void __vsock_release(struct sock *sk, int level)
release_sock(sk);
sock_put(sk);
}
}
static void vsock_sk_destruct(struct sock *sk)
@ -901,9 +901,22 @@ void vsock_data_ready(struct sock *sk)
}
EXPORT_SYMBOL_GPL(vsock_data_ready);
/* Dummy callback required by sockmap.
* See unconditional call of saved_close() in sock_map_close().
*/
static void vsock_close(struct sock *sk, long timeout)
{
}
static int vsock_release(struct socket *sock)
{
__vsock_release(sock->sk, 0);
struct sock *sk = sock->sk;
if (!sk)
return 0;
sk->sk_prot->close(sk, 0);
__vsock_release(sk, 0);
sock->sk = NULL;
sock->state = SS_FREE;
@ -1054,6 +1067,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
mask |= EPOLLRDHUP;
}
if (sk_is_readable(sk))
mask |= EPOLLIN | EPOLLRDNORM;
if (sock->type == SOCK_DGRAM) {
/* For datagram sockets we can read if there is something in
* the queue and write as long as the socket isn't shutdown for

View File

@ -387,10 +387,9 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
return;
}
if (!refcount_dec_and_test(&dma_map->users))
return;
if (refcount_dec_and_test(&dma_map->users))
__xp_dma_unmap(dma_map, attrs);
kvfree(pool->dma_pages);
pool->dma_pages = NULL;
pool->dma_pages_cnt = 0;

View File

@ -224,7 +224,7 @@ static long xsk_map_delete_elem(struct bpf_map *map, void *key)
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct xdp_sock __rcu **map_entry;
struct xdp_sock *old_xs;
int k = *(u32 *)key;
u32 k = *(u32 *)key;
if (k >= map->max_entries)
return -EINVAL;

View File

@ -146,13 +146,14 @@ ifeq ($(ARCH), x86)
BPF_EXTRA_CFLAGS += -fcf-protection
endif
TPROGS_CFLAGS += -Wall -O2
TPROGS_CFLAGS += -Wmissing-prototypes
TPROGS_CFLAGS += -Wstrict-prototypes
TPROGS_CFLAGS += $(call try-run,\
COMMON_CFLAGS += -Wall -O2
COMMON_CFLAGS += -Wmissing-prototypes
COMMON_CFLAGS += -Wstrict-prototypes
COMMON_CFLAGS += $(call try-run,\
printf "int main() { return 0; }" |\
$(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,)
TPROGS_CFLAGS += $(COMMON_CFLAGS)
TPROGS_CFLAGS += -I$(objtree)/usr/include
TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
@ -162,7 +163,7 @@ TPROGS_CFLAGS += -I$(srctree)/tools/lib
TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0
ifdef SYSROOT
TPROGS_CFLAGS += --sysroot=$(SYSROOT)
COMMON_CFLAGS += --sysroot=$(SYSROOT)
TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
endif
@ -229,7 +230,7 @@ clean:
$(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT)
# Fix up variables inherited from Kbuild that tools/ build system won't like
$(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
$(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \
LDFLAGS="$(TPROGS_LDFLAGS)" srctree=$(BPF_SAMPLES_PATH)/../../ \
O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
$@ install_headers

View File

@ -822,11 +822,18 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
printf("%s:\n", sym_name);
}
if (ksyms) {
if (disasm_print_insn(img, lens[i], opcodes,
name, disasm_opt, btf,
prog_linfo, ksyms[i], i,
linum))
goto exit_free;
} else {
if (disasm_print_insn(img, lens[i], opcodes,
name, disasm_opt, btf,
NULL, 0, 0, false))
goto exit_free;
}
img += lens[i];

View File

@ -7,8 +7,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
-e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
-e s/riscv.*/riscv/ -e s/loongarch.*/loongarch/)
ifndef ARCH
ARCH := $(HOSTARCH)
ifeq ($(strip $(ARCH)),)
override ARCH := $(HOSTARCH)
endif
SRCARCH := $(ARCH)

View File

@ -5,7 +5,6 @@ bpf-syscall*
test_verifier
test_maps
test_lru_map
test_lpm_map
test_tag
FEATURE-DUMP.libbpf
FEATURE-DUMP.selftests

View File

@ -83,7 +83,7 @@ CLANG_CPUV4 := 1
endif
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \
test_sockmap \
test_tcpnotify_user test_sysctl \
test_progs-no_alu32

View File

@ -20,10 +20,12 @@
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <endian.h>
#include <arpa/inet.h>
#include <sys/time.h>
#include <bpf/bpf.h>
#include <test_maps.h>
#include "bpf_util.h"
@ -33,6 +35,22 @@ struct tlpm_node {
uint8_t key[];
};
struct lpm_trie_bytes_key {
union {
struct bpf_lpm_trie_key_hdr hdr;
__u32 prefixlen;
};
unsigned char data[8];
};
struct lpm_trie_int_key {
union {
struct bpf_lpm_trie_key_hdr hdr;
__u32 prefixlen;
};
unsigned int data;
};
static struct tlpm_node *tlpm_match(struct tlpm_node *list,
const uint8_t *key,
size_t n_bits);
@ -223,7 +241,7 @@ static void test_lpm_map(int keysize)
n_matches = 0;
n_matches_after_delete = 0;
n_nodes = 1 << 8;
n_lookups = 1 << 16;
n_lookups = 1 << 9;
data = alloca(keysize);
memset(data, 0, keysize);
@ -770,16 +788,385 @@ static void test_lpm_multi_thread(void)
close(map_fd);
}
int main(void)
static int lpm_trie_create(unsigned int key_size, unsigned int value_size, unsigned int max_entries)
{
LIBBPF_OPTS(bpf_map_create_opts, opts);
int fd;
opts.map_flags = BPF_F_NO_PREALLOC;
fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie", key_size, value_size, max_entries,
&opts);
CHECK(fd < 0, "bpf_map_create", "error %d\n", errno);
return fd;
}
static void test_lpm_trie_update_flags(void)
{
struct lpm_trie_int_key key;
unsigned int value, got;
int fd, err;
fd = lpm_trie_create(sizeof(key), sizeof(value), 3);
/* invalid flags (Error) */
key.prefixlen = 32;
key.data = 0;
value = 0;
err = bpf_map_update_elem(fd, &key, &value, BPF_F_LOCK);
CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err);
/* invalid flags (Error) */
key.prefixlen = 32;
key.data = 0;
value = 0;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST | BPF_EXIST);
CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err);
/* overwrite an empty qp-trie (Error) */
key.prefixlen = 32;
key.data = 0;
value = 2;
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
CHECK(err != -ENOENT, "overwrite empty qp-trie", "error %d\n", err);
/* add a new node */
key.prefixlen = 16;
key.data = 0;
value = 1;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* add the same node as new node (Error) */
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err != -EEXIST, "add new elem again", "error %d\n", err);
/* overwrite the existed node */
value = 4;
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
CHECK(err, "overwrite elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* overwrite the node */
value = 1;
err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
CHECK(err, "update elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* overwrite a non-existent node which is the prefix of the first
* node (Error).
*/
key.prefixlen = 8;
key.data = 0;
value = 2;
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err);
/* add a new node which is the prefix of the first node */
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup key", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* add another new node which will be the sibling of the first node */
key.prefixlen = 9;
key.data = htobe32(1 << 23);
value = 5;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup key", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* overwrite the third node */
value = 3;
err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
CHECK(err, "overwrite elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup key", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* delete the second node to make it an intermediate node */
key.prefixlen = 8;
key.data = 0;
err = bpf_map_delete_elem(fd, &key);
CHECK(err, "del elem", "error %d\n", err);
/* overwrite the intermediate node (Error) */
value = 2;
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err);
close(fd);
}
static void test_lpm_trie_update_full_map(void)
{
struct lpm_trie_int_key key;
int value, got;
int fd, err;
fd = lpm_trie_create(sizeof(key), sizeof(value), 3);
/* add a new node */
key.prefixlen = 16;
key.data = 0;
value = 0;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* add new node */
key.prefixlen = 8;
key.data = 0;
value = 1;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* add new node */
key.prefixlen = 9;
key.data = htobe32(1 << 23);
value = 2;
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add new elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* try to add more node (Error) */
key.prefixlen = 32;
key.data = 0;
value = 3;
err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
CHECK(err != -ENOSPC, "add to full trie", "error %d\n", err);
/* update the value of an existed node with BPF_EXIST */
key.prefixlen = 16;
key.data = 0;
value = 4;
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
CHECK(err, "overwrite elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
/* update the value of an existed node with BPF_ANY */
key.prefixlen = 9;
key.data = htobe32(1 << 23);
value = 5;
err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
CHECK(err, "overwrite elem", "error %d\n", err);
got = 0;
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "error %d\n", err);
CHECK(got != value, "check value", "got %d exp %d\n", got, value);
close(fd);
}
static int cmp_str(const void *a, const void *b)
{
const char *str_a = *(const char **)a, *str_b = *(const char **)b;
return strcmp(str_a, str_b);
}
/* Save strings in LPM trie. The trailing '\0' for each string will be
* accounted in the prefixlen. The strings returned during the iteration
* should be sorted as expected.
*/
static void test_lpm_trie_iterate_strs(void)
{
static const char * const keys[] = {
"ab", "abO", "abc", "abo", "abS", "abcd",
};
const char *sorted_keys[ARRAY_SIZE(keys)];
struct lpm_trie_bytes_key key, next_key;
unsigned int value, got, i, j, len;
struct lpm_trie_bytes_key *cur;
int fd, err;
fd = lpm_trie_create(sizeof(key), sizeof(value), ARRAY_SIZE(keys));
for (i = 0; i < ARRAY_SIZE(keys); i++) {
unsigned int flags;
/* add i-th element */
flags = i % 2 ? BPF_NOEXIST : 0;
len = strlen(keys[i]);
/* include the trailing '\0' */
key.prefixlen = (len + 1) * 8;
memset(key.data, 0, sizeof(key.data));
memcpy(key.data, keys[i], len);
value = i + 100;
err = bpf_map_update_elem(fd, &key, &value, flags);
CHECK(err, "add elem", "#%u error %d\n", i, err);
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "#%u error %d\n", i, err);
CHECK(got != value, "lookup elem", "#%u expect %u got %u\n", i, value, got);
/* re-add i-th element (Error) */
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err != -EEXIST, "re-add elem", "#%u error %d\n", i, err);
/* Overwrite i-th element */
flags = i % 2 ? 0 : BPF_EXIST;
value = i;
err = bpf_map_update_elem(fd, &key, &value, flags);
CHECK(err, "update elem", "error %d\n", err);
/* Lookup #[0~i] elements */
for (j = 0; j <= i; j++) {
len = strlen(keys[j]);
key.prefixlen = (len + 1) * 8;
memset(key.data, 0, sizeof(key.data));
memcpy(key.data, keys[j], len);
err = bpf_map_lookup_elem(fd, &key, &got);
CHECK(err, "lookup elem", "#%u/%u error %d\n", i, j, err);
CHECK(got != j, "lookup elem", "#%u/%u expect %u got %u\n",
i, j, value, got);
}
}
/* Add element to a full qp-trie (Error) */
key.prefixlen = sizeof(key.data) * 8;
memset(key.data, 0, sizeof(key.data));
value = 0;
err = bpf_map_update_elem(fd, &key, &value, 0);
CHECK(err != -ENOSPC, "add to full qp-trie", "error %d\n", err);
/* Iterate sorted elements: no deletion */
memcpy(sorted_keys, keys, sizeof(keys));
qsort(sorted_keys, ARRAY_SIZE(sorted_keys), sizeof(sorted_keys[0]), cmp_str);
cur = NULL;
for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) {
len = strlen(sorted_keys[i]);
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err, "iterate", "#%u error %d\n", i, err);
CHECK(next_key.prefixlen != (len + 1) * 8, "iterate",
"#%u invalid len %u expect %u\n",
i, next_key.prefixlen, (len + 1) * 8);
CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate",
"#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]);
cur = &next_key;
}
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err != -ENOENT, "more element", "error %d\n", err);
/* Iterate sorted elements: delete the found key after each iteration */
cur = NULL;
for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) {
len = strlen(sorted_keys[i]);
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err, "iterate", "#%u error %d\n", i, err);
CHECK(next_key.prefixlen != (len + 1) * 8, "iterate",
"#%u invalid len %u expect %u\n",
i, next_key.prefixlen, (len + 1) * 8);
CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate",
"#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]);
cur = &next_key;
err = bpf_map_delete_elem(fd, cur);
CHECK(err, "delete", "#%u error %d\n", i, err);
}
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err != -ENOENT, "non-empty qp-trie", "error %d\n", err);
close(fd);
}
/* Use the fixed prefixlen (32) and save integers in LPM trie. The iteration of
* LPM trie will return these integers in big-endian order, therefore, convert
* these integers to big-endian before update. After each iteration, delete the
* found key (the smallest integer) and expect the next iteration will return
* the second smallest number.
*/
static void test_lpm_trie_iterate_ints(void)
{
struct lpm_trie_int_key key, next_key;
unsigned int i, max_entries;
struct lpm_trie_int_key *cur;
unsigned int *data_set;
int fd, err;
bool value;
max_entries = 4096;
data_set = calloc(max_entries, sizeof(*data_set));
CHECK(!data_set, "malloc", "no mem\n");
for (i = 0; i < max_entries; i++)
data_set[i] = i;
fd = lpm_trie_create(sizeof(key), sizeof(value), max_entries);
value = true;
for (i = 0; i < max_entries; i++) {
key.prefixlen = 32;
key.data = htobe32(data_set[i]);
err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
CHECK(err, "add elem", "#%u error %d\n", i, err);
}
cur = NULL;
for (i = 0; i < max_entries; i++) {
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err, "iterate", "#%u error %d\n", i, err);
CHECK(next_key.prefixlen != 32, "iterate", "#%u invalid len %u\n",
i, next_key.prefixlen);
CHECK(be32toh(next_key.data) != data_set[i], "iterate", "#%u got 0x%x exp 0x%x\n",
i, be32toh(next_key.data), data_set[i]);
cur = &next_key;
/*
* Delete the minimal key, the next call of bpf_get_next_key()
* will return the second minimal key.
*/
err = bpf_map_delete_elem(fd, &next_key);
CHECK(err, "del elem", "#%u elem error %d\n", i, err);
}
err = bpf_map_get_next_key(fd, cur, &next_key);
CHECK(err != -ENOENT, "more element", "error %d\n", err);
err = bpf_map_get_next_key(fd, NULL, &next_key);
CHECK(err != -ENOENT, "no-empty qp-trie", "error %d\n", err);
free(data_set);
close(fd);
}
void test_lpm_trie_map_basic_ops(void)
{
int i;
/* we want predictable, pseudo random tests */
srand(0xf00ba1);
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
test_lpm_basic();
test_lpm_order();
@ -792,6 +1179,10 @@ int main(void)
test_lpm_get_next_key();
test_lpm_multi_thread();
printf("test_lpm: OK\n");
return 0;
test_lpm_trie_update_flags();
test_lpm_trie_update_full_map();
test_lpm_trie_iterate_strs();
test_lpm_trie_iterate_ints();
printf("%s: PASS\n", __func__);
}

View File

@ -78,8 +78,8 @@ void test_task_storage_map_stress_lookup(void)
CHECK(err, "open_and_load", "error %d\n", err);
/* Only for a fully preemptible kernel */
if (!skel->kconfig->CONFIG_PREEMPT) {
printf("%s SKIP (no CONFIG_PREEMPT)\n", __func__);
if (!skel->kconfig->CONFIG_PREEMPTION) {
printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__);
read_bpf_task_storage_busy__destroy(skel);
skips++;
return;

View File

@ -108,6 +108,35 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
close(s);
}
static void test_sockmap_vsock_delete_on_close(void)
{
int err, c, p, map;
const int zero = 0;
err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
if (!ASSERT_OK(err, "create_pair(AF_VSOCK)"))
return;
map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
sizeof(int), 1, NULL);
if (!ASSERT_GE(map, 0, "bpf_map_create")) {
close(c);
goto out;
}
err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
close(c);
if (!ASSERT_OK(err, "bpf_map_update"))
goto out;
err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
ASSERT_OK(err, "after close(), bpf_map_update");
out:
close(p);
close(map);
}
static void test_skmsg_helpers(enum bpf_map_type map_type)
{
struct test_skmsg_load_helpers *skel;
@ -937,12 +966,58 @@ static void test_sockmap_same_sock(void)
test_sockmap_pass_prog__destroy(skel);
}
static void test_sockmap_skb_verdict_vsock_poll(void)
{
struct test_sockmap_pass_prog *skel;
int err, map, conn, peer;
struct bpf_program *prog;
struct bpf_link *link;
char buf = 'x';
int zero = 0;
skel = test_sockmap_pass_prog__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_and_load"))
return;
if (create_pair(AF_VSOCK, SOCK_STREAM, &conn, &peer))
goto destroy;
prog = skel->progs.prog_skb_verdict;
map = bpf_map__fd(skel->maps.sock_map_rx);
link = bpf_program__attach_sockmap(prog, map);
if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
goto close;
err = bpf_map_update_elem(map, &zero, &conn, BPF_ANY);
if (!ASSERT_OK(err, "bpf_map_update_elem"))
goto detach;
if (xsend(peer, &buf, 1, 0) != 1)
goto detach;
err = poll_read(conn, IO_TIMEOUT_SEC);
if (!ASSERT_OK(err, "poll"))
goto detach;
if (xrecv_nonblock(conn, &buf, 1, 0) != 1)
FAIL("xrecv_nonblock");
detach:
bpf_link__detach(link);
close:
xclose(conn);
xclose(peer);
destroy:
test_sockmap_pass_prog__destroy(skel);
}
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
if (test__start_subtest("sockmap vsock delete on close"))
test_sockmap_vsock_delete_on_close();
if (test__start_subtest("sockmap sk_msg load helpers"))
test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash sk_msg load helpers"))
@ -997,4 +1072,6 @@ void test_sockmap_basic(void)
test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link"))
test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
if (test__start_subtest("sockmap skb_verdict vsock poll"))
test_sockmap_skb_verdict_vsock_poll();
}

View File

@ -197,7 +197,7 @@ static void test_nodeadlock(void)
/* Unnecessary recursion and deadlock detection are reproducible
* in the preemptible kernel.
*/
if (!skel->kconfig->CONFIG_PREEMPT) {
if (!skel->kconfig->CONFIG_PREEMPTION) {
test__skip();
goto done;
}

View File

@ -225,24 +225,7 @@ void test_verifier_xdp(void) { RUN(verifier_xdp); }
void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
void test_verifier_lsm(void) { RUN(verifier_lsm); }
void test_verifier_mtu(void)
{
__u64 caps = 0;
int ret;
/* In case CAP_BPF and CAP_PERFMON is not set */
ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps);
if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin"))
return;
ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL);
if (!ASSERT_OK(ret, "disable_cap_sys_admin"))
goto restore_cap;
RUN(verifier_mtu);
restore_cap:
if (caps)
cap_enable_effective(caps, NULL);
}
void test_verifier_mtu(void) { RUN(verifier_mtu); }
static int init_test_val_map(struct bpf_object *obj, char *map_name)
{

View File

@ -5,6 +5,10 @@
#define XSTR(s) STR(s)
#define STR(s) #s
/* Expand a macro and then stringize the expansion */
#define QUOTE(str) #str
#define EXPAND_QUOTE(str) QUOTE(str)
/* This set of attributes controls behavior of the
* test_loader.c:test_loader__run_subtests().
*
@ -106,6 +110,7 @@
* __arch_* Specify on which architecture the test case should be tested.
* Several __arch_* annotations could be specified at once.
* When test case is not run on current arch it is marked as skipped.
* __caps_unpriv Specify the capabilities that should be set when running the test.
*/
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
#define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg)))
@ -129,6 +134,13 @@
#define __arch_x86_64 __arch("X86_64")
#define __arch_arm64 __arch("ARM64")
#define __arch_riscv64 __arch("RISCV64")
#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
/* Define common capabilities tested using __caps_unpriv */
#define CAP_NET_ADMIN 12
#define CAP_SYS_ADMIN 21
#define CAP_PERFMON 38
#define CAP_BPF 39
/* Convenience macro for use with 'asm volatile' blocks */
#define __naked __attribute__((naked))

View File

@ -149,7 +149,7 @@ int ringbuf_release_uninit_dynptr(void *ctx)
/* A dynptr can't be used after it has been invalidated */
SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #3")
__failure __msg("Expected an initialized dynptr as arg #2")
int use_after_invalid(void *ctx)
{
struct bpf_dynptr ptr;
@ -428,7 +428,7 @@ int invalid_helper2(void *ctx)
/* A bpf_dynptr is invalidated if it's been written into */
SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
__failure __msg("Expected an initialized dynptr as arg #0")
int invalid_write1(void *ctx)
{
struct bpf_dynptr ptr;
@ -1407,7 +1407,7 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)
/* bpf_dynptr_adjust can only be called on initialized dynptrs */
SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
__failure __msg("Expected an initialized dynptr as arg #0")
int dynptr_adjust_invalid(void *ctx)
{
struct bpf_dynptr ptr = {};
@ -1420,7 +1420,7 @@ int dynptr_adjust_invalid(void *ctx)
/* bpf_dynptr_is_null can only be called on initialized dynptrs */
SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
__failure __msg("Expected an initialized dynptr as arg #0")
int dynptr_is_null_invalid(void *ctx)
{
struct bpf_dynptr ptr = {};
@ -1433,7 +1433,7 @@ int dynptr_is_null_invalid(void *ctx)
/* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */
SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
__failure __msg("Expected an initialized dynptr as arg #0")
int dynptr_is_rdonly_invalid(void *ctx)
{
struct bpf_dynptr ptr = {};
@ -1446,7 +1446,7 @@ int dynptr_is_rdonly_invalid(void *ctx)
/* bpf_dynptr_size can only be called on initialized dynptrs */
SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
__failure __msg("Expected an initialized dynptr as arg #0")
int dynptr_size_invalid(void *ctx)
{
struct bpf_dynptr ptr = {};
@ -1459,7 +1459,7 @@ int dynptr_size_invalid(void *ctx)
/* Only initialized dynptrs can be cloned */
SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
__failure __msg("Expected an initialized dynptr as arg #0")
int clone_invalid1(void *ctx)
{
struct bpf_dynptr ptr1 = {};
@ -1493,7 +1493,7 @@ int clone_invalid2(struct xdp_md *xdp)
/* Invalidating a dynptr should invalidate its clones */
SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #3")
__failure __msg("Expected an initialized dynptr as arg #2")
int clone_invalidate1(void *ctx)
{
struct bpf_dynptr clone;
@ -1514,7 +1514,7 @@ int clone_invalidate1(void *ctx)
/* Invalidating a dynptr should invalidate its parent */
SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #3")
__failure __msg("Expected an initialized dynptr as arg #2")
int clone_invalidate2(void *ctx)
{
struct bpf_dynptr ptr;
@ -1535,7 +1535,7 @@ int clone_invalidate2(void *ctx)
/* Invalidating a dynptr should invalidate its siblings */
SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #3")
__failure __msg("Expected an initialized dynptr as arg #2")
int clone_invalidate3(void *ctx)
{
struct bpf_dynptr ptr;
@ -1723,7 +1723,7 @@ __noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr)
}
SEC("?raw_tp")
__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr")
__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
int test_dynptr_reg_type(void *ctx)
{
struct task_struct *current = NULL;

View File

@ -1486,4 +1486,30 @@ int iter_subprog_check_stacksafe(const void *ctx)
return 0;
}
struct bpf_iter_num global_it;
SEC("raw_tp")
__failure __msg("arg#0 expected pointer to an iterator on stack")
int iter_new_bad_arg(const void *ctx)
{
bpf_iter_num_new(&global_it, 0, 1);
return 0;
}
SEC("raw_tp")
__failure __msg("arg#0 expected pointer to an iterator on stack")
int iter_next_bad_arg(const void *ctx)
{
bpf_iter_num_next(&global_it);
return 0;
}
SEC("raw_tp")
__failure __msg("arg#0 expected pointer to an iterator on stack")
int iter_destroy_bad_arg(const void *ctx)
{
bpf_iter_num_destroy(&global_it);
return 0;
}
char _license[] SEC("license") = "GPL";

View File

@ -73,7 +73,7 @@ int create_and_forget_to_destroy_fail(void *ctx)
}
SEC("?raw_tp")
__failure __msg("expected an initialized iter_num as arg #1")
__failure __msg("expected an initialized iter_num as arg #0")
int destroy_without_creating_fail(void *ctx)
{
/* init with zeros to stop verifier complaining about uninit stack */
@ -91,7 +91,7 @@ int destroy_without_creating_fail(void *ctx)
}
SEC("?raw_tp")
__failure __msg("expected an initialized iter_num as arg #1")
__failure __msg("expected an initialized iter_num as arg #0")
int compromise_iter_w_direct_write_fail(void *ctx)
{
struct bpf_iter_num iter;
@ -143,7 +143,7 @@ int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx)
}
SEC("?raw_tp")
__failure __msg("expected an initialized iter_num as arg #1")
__failure __msg("expected an initialized iter_num as arg #0")
int compromise_iter_w_helper_write_fail(void *ctx)
{
struct bpf_iter_num iter;
@ -230,7 +230,7 @@ int valid_stack_reuse(void *ctx)
}
SEC("?raw_tp")
__failure __msg("expected uninitialized iter_num as arg #1")
__failure __msg("expected uninitialized iter_num as arg #0")
int double_create_fail(void *ctx)
{
struct bpf_iter_num iter;
@ -258,7 +258,7 @@ int double_create_fail(void *ctx)
}
SEC("?raw_tp")
__failure __msg("expected an initialized iter_num as arg #1")
__failure __msg("expected an initialized iter_num as arg #0")
int double_destroy_fail(void *ctx)
{
struct bpf_iter_num iter;
@ -284,7 +284,7 @@ int double_destroy_fail(void *ctx)
}
SEC("?raw_tp")
__failure __msg("expected an initialized iter_num as arg #1")
__failure __msg("expected an initialized iter_num as arg #0")
int next_without_new_fail(void *ctx)
{
struct bpf_iter_num iter;
@ -305,7 +305,7 @@ int next_without_new_fail(void *ctx)
}
SEC("?raw_tp")
__failure __msg("expected an initialized iter_num as arg #1")
__failure __msg("expected an initialized iter_num as arg #0")
int next_after_destroy_fail(void *ctx)
{
struct bpf_iter_num iter;

View File

@ -79,7 +79,7 @@ int testmod_seq_truncated(const void *ctx)
SEC("?raw_tp")
__failure
__msg("expected an initialized iter_testmod_seq as arg #2")
__msg("expected an initialized iter_testmod_seq as arg #1")
int testmod_seq_getter_before_bad(const void *ctx)
{
struct bpf_iter_testmod_seq it;
@ -89,7 +89,7 @@ int testmod_seq_getter_before_bad(const void *ctx)
SEC("?raw_tp")
__failure
__msg("expected an initialized iter_testmod_seq as arg #2")
__msg("expected an initialized iter_testmod_seq as arg #1")
int testmod_seq_getter_after_bad(const void *ctx)
{
struct bpf_iter_testmod_seq it;

View File

@ -4,7 +4,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
extern bool CONFIG_PREEMPT __kconfig __weak;
extern bool CONFIG_PREEMPTION __kconfig __weak;
extern const int bpf_task_storage_busy __ksym;
char _license[] SEC("license") = "GPL";
@ -24,7 +24,7 @@ int BPF_PROG(read_bpf_task_storage_busy)
{
int *value;
if (!CONFIG_PREEMPT)
if (!CONFIG_PREEMPTION)
return 0;
if (bpf_get_current_pid_tgid() >> 32 != pid)

View File

@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL";
#define EBUSY 16
#endif
extern bool CONFIG_PREEMPT __kconfig __weak;
extern bool CONFIG_PREEMPTION __kconfig __weak;
int nr_get_errs = 0;
int nr_del_errs = 0;
@ -29,7 +29,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
int ret, zero = 0;
int *value;
if (!CONFIG_PREEMPT)
if (!CONFIG_PREEMPTION)
return 0;
task = bpf_get_current_task_btf();

View File

@ -45,7 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
}
SEC("?lsm.s/bpf")
__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr")
__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
{
unsigned long val = 0;

View File

@ -32,18 +32,18 @@ int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp)
SEC("iter/cgroup")
__description("uninitialized iter in ->next()")
__failure __msg("expected an initialized iter_bits as arg #1")
__failure __msg("expected an initialized iter_bits as arg #0")
int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
{
struct bpf_iter_bits *it = NULL;
struct bpf_iter_bits it = {};
bpf_iter_bits_next(it);
bpf_iter_bits_next(&it);
return 0;
}
SEC("iter/cgroup")
__description("uninitialized iter in ->destroy()")
__failure __msg("expected an initialized iter_bits as arg #1")
__failure __msg("expected an initialized iter_bits as arg #0")
int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
{
struct bpf_iter_bits it = {};

View File

@ -6,7 +6,9 @@
SEC("tc/ingress")
__description("uninit/mtu: write rejected")
__failure __msg("invalid indirect read from stack")
__success
__caps_unpriv(CAP_BPF|CAP_NET_ADMIN)
__failure_unpriv __msg_unpriv("invalid indirect read from stack")
int tc_uninit_mtu(struct __sk_buff *ctx)
{
__u32 mtu;

View File

@ -1244,4 +1244,39 @@ __naked void old_stack_misc_vs_cur_ctx_ptr(void)
: __clobber_all);
}
SEC("socket")
__description("stack_noperfmon: reject read of invalid slots")
__success
__caps_unpriv(CAP_BPF)
__failure_unpriv __msg_unpriv("invalid read from stack off -8+1 size 8")
__naked void stack_noperfmon_reject_invalid_read(void)
{
asm volatile (" \
r2 = 1; \
r6 = r10; \
r6 += -8; \
*(u8 *)(r6 + 0) = r2; \
r2 = *(u64 *)(r6 + 0); \
r0 = 0; \
exit; \
" ::: __clobber_all);
}
SEC("socket")
__description("stack_noperfmon: narrow spill onto 64-bit scalar spilled slots")
__success
__caps_unpriv(CAP_BPF)
__success_unpriv
__naked void stack_noperfmon_spill_32bit_onto_64bit_slot(void)
{
asm volatile(" \
r0 = 0; \
*(u64 *)(r10 - 8) = r0; \
*(u32 *)(r10 - 8) = r0; \
exit; \
" :
:
: __clobber_all);
}
char _license[] SEC("license") = "GPL";

View File

@ -36,6 +36,7 @@
#define TEST_TAG_ARCH "comment:test_arch="
#define TEST_TAG_JITED_PFX "comment:test_jited="
#define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="
/* Warning: duplicated in bpf_misc.h */
#define POINTER_VALUE 0xcafe4all
@ -74,6 +75,7 @@ struct test_subspec {
struct expected_msgs jited;
int retval;
bool execute;
__u64 caps;
};
struct test_spec {
@ -276,6 +278,37 @@ static int parse_int(const char *str, int *val, const char *name)
return 0;
}
static int parse_caps(const char *str, __u64 *val, const char *name)
{
int cap_flag = 0;
char *token = NULL, *saveptr = NULL;
char *str_cpy = strdup(str);
if (str_cpy == NULL) {
PRINT_FAIL("Memory allocation failed\n");
return -EINVAL;
}
token = strtok_r(str_cpy, "|", &saveptr);
while (token != NULL) {
errno = 0;
if (!strncmp("CAP_", token, sizeof("CAP_") - 1)) {
PRINT_FAIL("define %s constant in bpf_misc.h, failed to parse caps\n", token);
return -EINVAL;
}
cap_flag = strtol(token, NULL, 10);
if (!cap_flag || errno) {
PRINT_FAIL("failed to parse caps %s\n", name);
return -EINVAL;
}
*val |= (1ULL << cap_flag);
token = strtok_r(NULL, "|", &saveptr);
}
free(str_cpy);
return 0;
}
static int parse_retval(const char *str, int *val, const char *name)
{
struct {
@ -541,6 +574,12 @@ static int parse_test_spec(struct test_loader *tester,
jit_on_next_line = true;
} else if (str_has_pfx(s, TEST_BTF_PATH)) {
spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1;
} else if (str_has_pfx(s, TEST_TAG_CAPS_UNPRIV)) {
val = s + sizeof(TEST_TAG_CAPS_UNPRIV) - 1;
err = parse_caps(val, &spec->unpriv.caps, "test caps");
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
}
}
@ -917,6 +956,13 @@ void run_subtest(struct test_loader *tester,
test__end_subtest();
return;
}
if (subspec->caps) {
err = cap_enable_effective(subspec->caps, NULL);
if (err) {
PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err));
goto subtest_cleanup;
}
}
}
/* Implicitly reset to NULL if next test case doesn't specify */

View File

@ -1579,8 +1579,12 @@ static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt)
{
txmsg_redir = 1;
opt->tx_wait_mem = true;
txmsg_redir = 1;
test_send_large(opt, cgrp);
txmsg_redir = 1;
txmsg_apply = 4097;
test_send_large(opt, cgrp);
opt->tx_wait_mem = false;
}