BPF fixes:

- Fix several issues for BPF LPM trie map which were found by syzbot and during addition of new test cases (Hou Tao) - Fix a missing process_iter_arg register type check in the BPF verifier (Kumar Kartikeya Dwivedi, Tao Lyu) - Fix several correctness gaps in the BPF verifier when interacting with the BPF stack without CAP_PERFMON (Kumar Kartikeya Dwivedi, Eduard Zingerman, Tao Lyu) - Fix OOB BPF map writes when deleting elements for the case of xsk map as well as devmap (Maciej Fijalkowski) - Fix xsk sockets to always clear DMA mapping information when unmapping the pool (Larysa Zaremba) - Fix sk_mem_uncharge logic in tcp_bpf_sendmsg to only uncharge after sent bytes have been finalized (Zijian Zhang) - Fix BPF sockmap with vsocks which was missing a queue check in poll and sockmap cleanup on close (Michal Luczaj) - Fix tools infra to override makefile ARCH variable if defined but empty, which addresses cross-building tools. (Björn Töpel) - Fix two resolve_btfids build warnings on unresolved bpf_lsm symbols (Thomas Weißschuh) - Fix a NULL pointer dereference in bpftool (Amir Mohammadi) - Fix BPF selftests to check for CONFIG_PREEMPTION instead of CONFIG_PREEMPT (Sebastian Andrzej Siewior) Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -----BEGIN PGP SIGNATURE----- iIsEABYKADMWIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZ1N8bhUcZGFuaWVsQGlv Z2VhcmJveC5uZXQACgkQ2yufC7HISIO6ZAD+ITpujJgxvFGC0R7E9o3XJ7V1SpmR SlW0lGpj6vOHTUAA/2MRoZurJSTbdT3fbWiCUgU1rMcwkoErkyxUaPuBci0D =kgXL -----END PGP SIGNATURE----- Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf Pull bpf fixes from Daniel Borkmann:: - Fix several issues for BPF LPM trie map which were found by syzbot and during addition of new test cases (Hou Tao) - Fix a missing process_iter_arg register type check in the BPF verifier (Kumar Kartikeya Dwivedi, Tao Lyu) - Fix several correctness gaps in the BPF verifier when interacting with the BPF stack without CAP_PERFMON (Kumar Kartikeya Dwivedi, Eduard Zingerman, Tao Lyu) - Fix OOB BPF map writes when deleting elements for the case of xsk map as well as devmap (Maciej Fijalkowski) - Fix xsk sockets to always clear DMA mapping information when unmapping the pool (Larysa Zaremba) - Fix sk_mem_uncharge logic in tcp_bpf_sendmsg to only uncharge after sent bytes have been finalized (Zijian Zhang) - Fix BPF sockmap with vsocks which was missing a queue check in poll and sockmap cleanup on close (Michal Luczaj) - Fix tools infra to override makefile ARCH variable if defined but empty, which addresses cross-building tools. (Björn Töpel) - Fix two resolve_btfids build warnings on unresolved bpf_lsm symbols (Thomas Weißschuh) - Fix a NULL pointer dereference in bpftool (Amir Mohammadi) - Fix BPF selftests to check for CONFIG_PREEMPTION instead of CONFIG_PREEMPT (Sebastian Andrzej Siewior) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: (31 commits) selftests/bpf: Add more test cases for LPM trie selftests/bpf: Move test_lpm_map.c to map_tests bpf: Use raw_spinlock_t for LPM trie bpf: Switch to bpf mem allocator for LPM trie bpf: Fix exact match conditions in trie_get_next_key() bpf: Handle in-place update for full LPM trie correctly bpf: Handle BPF_EXIST and BPF_NOEXIST for LPM trie bpf: Remove unnecessary kfree(im_node) in lpm_trie_update_elem bpf: Remove unnecessary check when updating LPM trie selftests/bpf: Add test for narrow spill into 64-bit spilled scalar selftests/bpf: Add test for reading from STACK_INVALID slots selftests/bpf: Introduce __caps_unpriv annotation for tests bpf: Fix narrow scalar spill onto 64-bit spilled scalar slots bpf: Don't mark STACK_INVALID as STACK_MISC in mark_stack_slot_misc samples/bpf: Remove unnecessary -I flags from libbpf EXTRA_CFLAGS bpf: Zero index arg error string for dynptr and iter selftests/bpf: Add tests for iter arg check bpf: Ensure reg is PTR_TO_STACK in process_iter_arg tools: Override makefile ARCH variable if defined, but empty selftests/bpf: Add apply_bytes test to test_txmsg_redir_wait_sndmem in test_sockmap ...
2024-12-28 00:35:01 +00:00 · 2024-12-06 15:07:48 -08:00 · 2024-12-06 15:07:48 -08:00 · b5f217084a
commit b5f217084a
parent f3ddc438a2 509df676c2
31 changed files with 813 additions and 174 deletions
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@ -375,8 +375,6 @@ BTF_ID(func, bpf_lsm_socket_socketpair)

 BTF_ID(func, bpf_lsm_syslog)
 BTF_ID(func, bpf_lsm_task_alloc)
-BTF_ID(func, bpf_lsm_current_getsecid_subj)
-BTF_ID(func, bpf_lsm_task_getsecid_obj)
 BTF_ID(func, bpf_lsm_task_prctl)
 BTF_ID(func, bpf_lsm_task_setscheduler)
 BTF_ID(func, bpf_lsm_task_to_inode)
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@ -184,7 +184,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 static void dev_map_free(struct bpf_map *map)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-	int i;
+	u32 i;

 	/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
 	 * so the programs (can be more than one that used this map) were
@ -821,7 +821,7 @@ static long dev_map_delete_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *old_dev;
-	int k = *(u32 *)key;
+	u32 k = *(u32 *)key;

 	if (k >= map->max_entries)
 		return -EINVAL;
@ -838,7 +838,7 @@ static long dev_map_hash_delete_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *old_dev;
-	int k = *(u32 *)key;
+	u32 k = *(u32 *)key;
 	unsigned long flags;
 	int ret = -ENOENT;

--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@ -15,6 +15,7 @@
 #include <net/ipv6.h>
 #include <uapi/linux/btf.h>
 #include <linux/btf_ids.h>
+#include <linux/bpf_mem_alloc.h>

 /* Intermediate node */
 #define LPM_TREE_NODE_FLAG_IM BIT(0)
@ -22,7 +23,6 @@
 struct lpm_trie_node;

 struct lpm_trie_node {
-	struct rcu_head rcu;
 	struct lpm_trie_node __rcu	*child[2];
 	u32				prefixlen;
 	u32				flags;
@ -32,10 +32,11 @@ struct lpm_trie_node {
 struct lpm_trie {
 	struct bpf_map			map;
 	struct lpm_trie_node __rcu	*root;
+	struct bpf_mem_alloc		ma;
 	size_t				n_entries;
 	size_t				max_prefixlen;
 	size_t				data_size;
-	spinlock_t			lock;
+	raw_spinlock_t			lock;
 };

 /* This trie implements a longest prefix match algorithm that can be used to
@ -287,17 +288,18 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
 	return found->data + trie->data_size;
 }

-static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
-						 const void *value)
+static struct lpm_trie_node *lpm_trie_node_alloc(struct lpm_trie *trie,
+						 const void *value,
+						 bool disable_migration)
 {
 	struct lpm_trie_node *node;
-	size_t size = sizeof(struct lpm_trie_node) + trie->data_size;

-	if (value)
-		size += trie->map.value_size;
+	if (disable_migration)
+		migrate_disable();
+	node = bpf_mem_cache_alloc(&trie->ma);
+	if (disable_migration)
+		migrate_enable();

-	node = bpf_map_kmalloc_node(&trie->map, size, GFP_NOWAIT | __GFP_NOWARN,
-				    trie->map.numa_node);
 	if (!node)
 		return NULL;

@ -310,12 +312,22 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
 	return node;
 }

+static int trie_check_add_elem(struct lpm_trie *trie, u64 flags)
+{
+	if (flags == BPF_EXIST)
+		return -ENOENT;
+	if (trie->n_entries == trie->map.max_entries)
+		return -ENOSPC;
+	trie->n_entries++;
+	return 0;
+}
+
 /* Called from syscall or from eBPF program */
 static long trie_update_elem(struct bpf_map *map,
 			     void *_key, void *value, u64 flags)
 {
 	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
-	struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
+	struct lpm_trie_node *node, *im_node, *new_node;
 	struct lpm_trie_node *free_node = NULL;
 	struct lpm_trie_node __rcu **slot;
 	struct bpf_lpm_trie_key_u8 *key = _key;
@ -330,22 +342,14 @@ static long trie_update_elem(struct bpf_map *map,
 	if (key->prefixlen > trie->max_prefixlen)
 		return -EINVAL;

-	spin_lock_irqsave(&trie->lock, irq_flags);
+	/* Allocate and fill a new node. Need to disable migration before
+	 * invoking bpf_mem_cache_alloc().
+	 */
+	new_node = lpm_trie_node_alloc(trie, value, true);
+	if (!new_node)
+		return -ENOMEM;

-	/* Allocate and fill a new node */
-
-	if (trie->n_entries == trie->map.max_entries) {
-		ret = -ENOSPC;
-		goto out;
-	}
-
-	new_node = lpm_trie_node_alloc(trie, value);
-	if (!new_node) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	trie->n_entries++;
+	raw_spin_lock_irqsave(&trie->lock, irq_flags);

 	new_node->prefixlen = key->prefixlen;
 	RCU_INIT_POINTER(new_node->child[0], NULL);
@ -364,8 +368,7 @@ static long trie_update_elem(struct bpf_map *map,
 		matchlen = longest_prefix_match(trie, node, key);

 		if (node->prefixlen != matchlen ||
-		    node->prefixlen == key->prefixlen ||
-		    node->prefixlen == trie->max_prefixlen)
+		    node->prefixlen == key->prefixlen)
 			break;

 		next_bit = extract_bit(key->data, node->prefixlen);
@ -376,6 +379,10 @@ static long trie_update_elem(struct bpf_map *map,
 	 * simply assign the @new_node to that slot and be done.
 	 */
 	if (!node) {
+		ret = trie_check_add_elem(trie, flags);
+		if (ret)
+			goto out;
+
 		rcu_assign_pointer(*slot, new_node);
 		goto out;
 	}
@ -384,18 +391,30 @@ static long trie_update_elem(struct bpf_map *map,
 	 * which already has the correct data array set.
 	 */
 	if (node->prefixlen == matchlen) {
+		if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) {
+			if (flags == BPF_NOEXIST) {
+				ret = -EEXIST;
+				goto out;
+			}
+		} else {
+			ret = trie_check_add_elem(trie, flags);
+			if (ret)
+				goto out;
+		}
+
 		new_node->child[0] = node->child[0];
 		new_node->child[1] = node->child[1];

-		if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
-			trie->n_entries--;
-
 		rcu_assign_pointer(*slot, new_node);
 		free_node = node;

 		goto out;
 	}

+	ret = trie_check_add_elem(trie, flags);
+	if (ret)
+		goto out;
+
 	/* If the new node matches the prefix completely, it must be inserted
 	 * as an ancestor. Simply insert it between @node and *@slot.
 	 */
@ -406,8 +425,10 @@ static long trie_update_elem(struct bpf_map *map,
 		goto out;
 	}

-	im_node = lpm_trie_node_alloc(trie, NULL);
+	/* migration is disabled within the locked scope */
+	im_node = lpm_trie_node_alloc(trie, NULL, false);
 	if (!im_node) {
+		trie->n_entries--;
 		ret = -ENOMEM;
 		goto out;
 	}
@ -429,16 +450,13 @@ static long trie_update_elem(struct bpf_map *map,
 	rcu_assign_pointer(*slot, im_node);

 out:
-	if (ret) {
-		if (new_node)
-			trie->n_entries--;
+	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);

-		kfree(new_node);
-		kfree(im_node);
-	}
-
-	spin_unlock_irqrestore(&trie->lock, irq_flags);
-	kfree_rcu(free_node, rcu);
+	migrate_disable();
+	if (ret)
+		bpf_mem_cache_free(&trie->ma, new_node);
+	bpf_mem_cache_free_rcu(&trie->ma, free_node);
+	migrate_enable();

 	return ret;
 }
@ -459,7 +477,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
 	if (key->prefixlen > trie->max_prefixlen)
 		return -EINVAL;

-	spin_lock_irqsave(&trie->lock, irq_flags);
+	raw_spin_lock_irqsave(&trie->lock, irq_flags);

 	/* Walk the tree looking for an exact key/length match and keeping
 	 * track of the path we traverse.  We will need to know the node
@ -535,9 +553,12 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
 	free_node = node;

 out:
-	spin_unlock_irqrestore(&trie->lock, irq_flags);
-	kfree_rcu(free_parent, rcu);
-	kfree_rcu(free_node, rcu);
+	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
+
+	migrate_disable();
+	bpf_mem_cache_free_rcu(&trie->ma, free_parent);
+	bpf_mem_cache_free_rcu(&trie->ma, free_node);
+	migrate_enable();

 	return ret;
 }
@ -559,6 +580,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
 static struct bpf_map *trie_alloc(union bpf_attr *attr)
 {
 	struct lpm_trie *trie;
+	size_t leaf_size;
+	int err;

 	/* check sanity of attributes */
 	if (attr->max_entries == 0 ||
@ -581,9 +604,19 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 			  offsetof(struct bpf_lpm_trie_key_u8, data);
 	trie->max_prefixlen = trie->data_size * 8;

-	spin_lock_init(&trie->lock);
+	raw_spin_lock_init(&trie->lock);

+	/* Allocate intermediate and leaf nodes from the same allocator */
+	leaf_size = sizeof(struct lpm_trie_node) + trie->data_size +
+		    trie->map.value_size;
+	err = bpf_mem_alloc_init(&trie->ma, leaf_size, false);
+	if (err)
+		goto free_out;
 	return &trie->map;
+
+free_out:
+	bpf_map_area_free(trie);
+	return ERR_PTR(err);
 }

 static void trie_free(struct bpf_map *map)
@ -615,13 +648,17 @@ static void trie_free(struct bpf_map *map)
 				continue;
 			}

-			kfree(node);
+			/* No bpf program may access the map, so freeing the
+			 * node without waiting for the extra RCU GP.
+			 */
+			bpf_mem_cache_raw_free(node);
 			RCU_INIT_POINTER(*slot, NULL);
 			break;
 		}
 	}

 out:
+	bpf_mem_alloc_destroy(&trie->ma);
 	bpf_map_area_free(trie);
 }

@ -633,7 +670,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
 	struct lpm_trie_node **node_stack = NULL;
 	int err = 0, stack_ptr = -1;
 	unsigned int next_bit;
-	size_t matchlen;
+	size_t matchlen = 0;

 	/* The get_next_key follows postorder. For the 4 node example in
 	 * the top of this file, the trie_get_next_key() returns the following
@ -672,7 +709,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
 		next_bit = extract_bit(key->data, node->prefixlen);
 		node = rcu_dereference(node->child[next_bit]);
 	}
-	if (!node || node->prefixlen != key->prefixlen ||
+	if (!node || node->prefixlen != matchlen ||
 	    (node->flags & LPM_TREE_NODE_FLAG_IM))
 		goto find_leftmost;

--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@ -1202,14 +1202,17 @@ static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
 /* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
 * case they are equivalent, or it's STACK_ZERO, in which case we preserve
 * more precise STACK_ZERO.
- * Note, in uprivileged mode leaving STACK_INVALID is wrong, so we take
- * env->allow_ptr_leaks into account and force STACK_MISC, if necessary.
+ * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
+ * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
+ * unnecessary as both are considered equivalent when loading data and pruning,
+ * in case of unprivileged mode it will be incorrect to allow reads of invalid
+ * slots.
 */
 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
 {
 	if (*stype == STACK_ZERO)
 		return;
-	if (env->allow_ptr_leaks && *stype == STACK_INVALID)
+	if (*stype == STACK_INVALID)
 		return;
 	*stype = STACK_MISC;
 }
@ -4700,6 +4703,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	 */
 	if (!env->allow_ptr_leaks &&
 	    is_spilled_reg(&state->stack[spi]) &&
+	    !is_spilled_scalar_reg(&state->stack[spi]) &&
 	    size != BPF_REG_SIZE) {
 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
 		return -EACCES;
@ -8071,7 +8075,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
 	if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
 		verbose(env,
 			"arg#%d expected pointer to stack or const struct bpf_dynptr\n",
-			regno);
+			regno - 1);
 		return -EINVAL;
 	}

@ -8125,7 +8129,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
 		if (!is_dynptr_reg_valid_init(env, reg)) {
 			verbose(env,
 				"Expected an initialized dynptr as arg #%d\n",
-				regno);
+				regno - 1);
 			return -EINVAL;
 		}

@ -8133,7 +8137,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
 		if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
 			verbose(env,
 				"Expected a dynptr of type %s as arg #%d\n",
-				dynptr_type_str(arg_to_dynptr_type(arg_type)), regno);
+				dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1);
 			return -EINVAL;
 		}

@ -8189,6 +8193,11 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
 	const struct btf_type *t;
 	int spi, err, i, nr_slots, btf_id;

+	if (reg->type != PTR_TO_STACK) {
+		verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1);
+		return -EINVAL;
+	}
+
 	/* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
 	 * ensures struct convention, so we wouldn't need to do any BTF
 	 * validation here. But given iter state can be passed as a parameter
@ -8197,7 +8206,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
 	 */
 	btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
 	if (btf_id < 0) {
-		verbose(env, "expected valid iter pointer as arg #%d\n", regno);
+		verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1);
 		return -EINVAL;
 	}
 	t = btf_type_by_id(meta->btf, btf_id);
@ -8207,7 +8216,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
 		/* bpf_iter_<type>_new() expects pointer to uninit iter state */
 		if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
 			verbose(env, "expected uninitialized iter_%s as arg #%d\n",
-				iter_type_str(meta->btf, btf_id), regno);
+				iter_type_str(meta->btf, btf_id), regno - 1);
 			return -EINVAL;
 		}

@ -8231,7 +8240,7 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
 			break;
 		case -EINVAL:
 			verbose(env, "expected an initialized iter_%s as arg #%d\n",
-				iter_type_str(meta->btf, btf_id), regno);
+				iter_type_str(meta->btf, btf_id), regno - 1);
 			return err;
 		case -EPROTO:
 			verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@ -441,7 +441,6 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
 			cork = true;
 			psock->cork = NULL;
 		}
-		sk_msg_return(sk, msg, tosend);
 		release_sock(sk);

 		origsize = msg->sg.size;
@ -453,8 +452,9 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
 			sock_put(sk_redir);

 		lock_sock(sk);
+		sk_mem_uncharge(sk, sent);
 		if (unlikely(ret < 0)) {
-			int free = sk_msg_free_nocharge(sk, msg);
+			int free = sk_msg_free(sk, msg);

 			if (!cork)
 				*copied -= free;
@ -468,7 +468,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
 		break;
 	case __SK_DROP:
 	default:
-		sk_msg_free_partial(sk, msg, tosend);
+		sk_msg_free(sk, msg);
 		sk_msg_apply_bytes(psock, tosend);
 		*copied -= (tosend + delta);
 		return -EACCES;
@ -484,12 +484,9 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
 		}
 		if (msg &&
 		    msg->sg.data[msg->sg.start].page_link &&
-		    msg->sg.data[msg->sg.start].length) {
-			if (eval == __SK_REDIRECT)
-				sk_mem_charge(sk, tosend - sent);
+		    msg->sg.data[msg->sg.start].length)
 			goto more_data;
 	}
-	}
 	return ret;
 }

--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@ -117,12 +117,14 @@
 static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
 static void vsock_sk_destruct(struct sock *sk);
 static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+static void vsock_close(struct sock *sk, long timeout);

 /* Protocol family. */
 struct proto vsock_proto = {
 	.name = "AF_VSOCK",
 	.owner = THIS_MODULE,
 	.obj_size = sizeof(struct vsock_sock),
+	.close = vsock_close,
 #ifdef CONFIG_BPF_SYSCALL
 	.psock_update_sk_prot = vsock_bpf_update_proto,
 #endif
@ -797,9 +799,8 @@ static bool sock_type_connectible(u16 type)

 static void __vsock_release(struct sock *sk, int level)
 {
-	if (sk) {
-		struct sock *pending;
 	struct vsock_sock *vsk;
+	struct sock *pending;

 	vsk = vsock_sk(sk);
 	pending = NULL;	/* Compiler warning. */
@ -829,7 +830,6 @@ static void __vsock_release(struct sock *sk, int level)

 	release_sock(sk);
 	sock_put(sk);
-	}
 }

 static void vsock_sk_destruct(struct sock *sk)
@ -901,9 +901,22 @@ void vsock_data_ready(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(vsock_data_ready);

+/* Dummy callback required by sockmap.
+ * See unconditional call of saved_close() in sock_map_close().
+ */
+static void vsock_close(struct sock *sk, long timeout)
+{
+}
+
 static int vsock_release(struct socket *sock)
 {
-	__vsock_release(sock->sk, 0);
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	sk->sk_prot->close(sk, 0);
+	__vsock_release(sk, 0);
 	sock->sk = NULL;
 	sock->state = SS_FREE;

@ -1054,6 +1067,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
 		mask |= EPOLLRDHUP;
 	}

+	if (sk_is_readable(sk))
+		mask |= EPOLLIN | EPOLLRDNORM;
+
 	if (sock->type == SOCK_DGRAM) {
 		/* For datagram sockets we can read if there is something in
 		 * the queue and write as long as the socket isn't shutdown for
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@ -387,10 +387,9 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
 		return;
 	}

-	if (!refcount_dec_and_test(&dma_map->users))
-		return;
-
+	if (refcount_dec_and_test(&dma_map->users))
 		__xp_dma_unmap(dma_map, attrs);
+
 	kvfree(pool->dma_pages);
 	pool->dma_pages = NULL;
 	pool->dma_pages_cnt = 0;
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@ -224,7 +224,7 @@ static long xsk_map_delete_elem(struct bpf_map *map, void *key)
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
 	struct xdp_sock __rcu **map_entry;
 	struct xdp_sock *old_xs;
-	int k = *(u32 *)key;
+	u32 k = *(u32 *)key;

 	if (k >= map->max_entries)
 		return -EINVAL;
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@ -146,13 +146,14 @@ ifeq ($(ARCH), x86)
 BPF_EXTRA_CFLAGS += -fcf-protection
 endif

-TPROGS_CFLAGS += -Wall -O2
-TPROGS_CFLAGS += -Wmissing-prototypes
-TPROGS_CFLAGS += -Wstrict-prototypes
-TPROGS_CFLAGS += $(call try-run,\
+COMMON_CFLAGS += -Wall -O2
+COMMON_CFLAGS += -Wmissing-prototypes
+COMMON_CFLAGS += -Wstrict-prototypes
+COMMON_CFLAGS += $(call try-run,\
 	printf "int main() { return 0; }" |\
 	$(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,)

+TPROGS_CFLAGS += $(COMMON_CFLAGS)
 TPROGS_CFLAGS += -I$(objtree)/usr/include
 TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
 TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
@ -162,7 +163,7 @@ TPROGS_CFLAGS += -I$(srctree)/tools/lib
 TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0

 ifdef SYSROOT
-TPROGS_CFLAGS += --sysroot=$(SYSROOT)
+COMMON_CFLAGS += --sysroot=$(SYSROOT)
 TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
 endif

@ -229,7 +230,7 @@ clean:

 $(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT)
 # Fix up variables inherited from Kbuild that tools/ build system won't like
-	$(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
+	$(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \
 		LDFLAGS="$(TPROGS_LDFLAGS)" srctree=$(BPF_SAMPLES_PATH)/../../ \
 		O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
 		$@ install_headers
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@ -822,11 +822,18 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
 					printf("%s:\n", sym_name);
 				}

+				if (ksyms) {
 					if (disasm_print_insn(img, lens[i], opcodes,
 							      name, disasm_opt, btf,
 							      prog_linfo, ksyms[i], i,
 							      linum))
 						goto exit_free;
+				} else {
+					if (disasm_print_insn(img, lens[i], opcodes,
+							      name, disasm_opt, btf,
+							      NULL, 0, 0, false))
+						goto exit_free;
+				}

 				img += lens[i];

--- a/tools/scripts/Makefile.arch
+++ b/tools/scripts/Makefile.arch
@ -7,8 +7,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
                                  -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
                                  -e s/riscv.*/riscv/ -e s/loongarch.*/loongarch/)

-ifndef ARCH
-ARCH := $(HOSTARCH)
+ifeq ($(strip $(ARCH)),)
+override ARCH := $(HOSTARCH)
 endif

 SRCARCH := $(ARCH)
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@ -5,7 +5,6 @@ bpf-syscall*
 test_verifier
 test_maps
 test_lru_map
-test_lpm_map
 test_tag
 FEATURE-DUMP.libbpf
 FEATURE-DUMP.selftests
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@ -83,7 +83,7 @@ CLANG_CPUV4 := 1
 endif

 # Order correspond to 'make run_tests' order
-TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \
 	test_sockmap \
 	test_tcpnotify_user test_sysctl \
 	test_progs-no_alu32
--- a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c
@ -20,10 +20,12 @@
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
+#include <endian.h>
 #include <arpa/inet.h>
 #include <sys/time.h>

 #include <bpf/bpf.h>
+#include <test_maps.h>

 #include "bpf_util.h"

@ -33,6 +35,22 @@ struct tlpm_node {
 	uint8_t key[];
 };

+struct lpm_trie_bytes_key {
+	union {
+		struct bpf_lpm_trie_key_hdr hdr;
+		__u32 prefixlen;
+	};
+	unsigned char data[8];
+};
+
+struct lpm_trie_int_key {
+	union {
+		struct bpf_lpm_trie_key_hdr hdr;
+		__u32 prefixlen;
+	};
+	unsigned int data;
+};
+
 static struct tlpm_node *tlpm_match(struct tlpm_node *list,
 				    const uint8_t *key,
 				    size_t n_bits);
@ -223,7 +241,7 @@ static void test_lpm_map(int keysize)
 	n_matches = 0;
 	n_matches_after_delete = 0;
 	n_nodes = 1 << 8;
-	n_lookups = 1 << 16;
+	n_lookups = 1 << 9;

 	data = alloca(keysize);
 	memset(data, 0, keysize);
@ -770,16 +788,385 @@ static void test_lpm_multi_thread(void)
 	close(map_fd);
 }

-int main(void)
+static int lpm_trie_create(unsigned int key_size, unsigned int value_size, unsigned int max_entries)
+{
+	LIBBPF_OPTS(bpf_map_create_opts, opts);
+	int fd;
+
+	opts.map_flags = BPF_F_NO_PREALLOC;
+	fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie", key_size, value_size, max_entries,
+			    &opts);
+	CHECK(fd < 0, "bpf_map_create", "error %d\n", errno);
+
+	return fd;
+}
+
+static void test_lpm_trie_update_flags(void)
+{
+	struct lpm_trie_int_key key;
+	unsigned int value, got;
+	int fd, err;
+
+	fd = lpm_trie_create(sizeof(key), sizeof(value), 3);
+
+	/* invalid flags (Error) */
+	key.prefixlen = 32;
+	key.data = 0;
+	value = 0;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_F_LOCK);
+	CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err);
+
+	/* invalid flags (Error) */
+	key.prefixlen = 32;
+	key.data = 0;
+	value = 0;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST | BPF_EXIST);
+	CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err);
+
+	/* overwrite an empty qp-trie (Error) */
+	key.prefixlen = 32;
+	key.data = 0;
+	value = 2;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+	CHECK(err != -ENOENT, "overwrite empty qp-trie", "error %d\n", err);
+
+	/* add a new node */
+	key.prefixlen = 16;
+	key.data = 0;
+	value = 1;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* add the same node as new node (Error) */
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err != -EEXIST, "add new elem again", "error %d\n", err);
+
+	/* overwrite the existed node */
+	value = 4;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+	CHECK(err, "overwrite elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* overwrite the node */
+	value = 1;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+	CHECK(err, "update elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* overwrite a non-existent node which is the prefix of the first
+	 * node (Error).
+	 */
+	key.prefixlen = 8;
+	key.data = 0;
+	value = 2;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+	CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err);
+
+	/* add a new node which is the prefix of the first node */
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup key", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* add another new node which will be the sibling of the first node */
+	key.prefixlen = 9;
+	key.data = htobe32(1 << 23);
+	value = 5;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup key", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* overwrite the third node */
+	value = 3;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+	CHECK(err, "overwrite elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup key", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* delete the second node to make it an intermediate node */
+	key.prefixlen = 8;
+	key.data = 0;
+	err = bpf_map_delete_elem(fd, &key);
+	CHECK(err, "del elem", "error %d\n", err);
+
+	/* overwrite the intermediate node (Error) */
+	value = 2;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+	CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err);
+
+	close(fd);
+}
+
+static void test_lpm_trie_update_full_map(void)
+{
+	struct lpm_trie_int_key key;
+	int value, got;
+	int fd, err;
+
+	fd = lpm_trie_create(sizeof(key), sizeof(value), 3);
+
+	/* add a new node */
+	key.prefixlen = 16;
+	key.data = 0;
+	value = 0;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* add new node */
+	key.prefixlen = 8;
+	key.data = 0;
+	value = 1;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* add new node */
+	key.prefixlen = 9;
+	key.data = htobe32(1 << 23);
+	value = 2;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* try to add more node (Error) */
+	key.prefixlen = 32;
+	key.data = 0;
+	value = 3;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+	CHECK(err != -ENOSPC, "add to full trie", "error %d\n", err);
+
+	/* update the value of an existed node with BPF_EXIST */
+	key.prefixlen = 16;
+	key.data = 0;
+	value = 4;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+	CHECK(err, "overwrite elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* update the value of an existed node with BPF_ANY */
+	key.prefixlen = 9;
+	key.data = htobe32(1 << 23);
+	value = 5;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+	CHECK(err, "overwrite elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	close(fd);
+}
+
+static int cmp_str(const void *a, const void *b)
+{
+	const char *str_a = *(const char **)a, *str_b = *(const char **)b;
+
+	return strcmp(str_a, str_b);
+}
+
+/* Save strings in LPM trie. The trailing '\0' for each string will be
+ * accounted in the prefixlen. The strings returned during the iteration
+ * should be sorted as expected.
+ */
+static void test_lpm_trie_iterate_strs(void)
+{
+	static const char * const keys[] = {
+		"ab", "abO", "abc", "abo", "abS", "abcd",
+	};
+	const char *sorted_keys[ARRAY_SIZE(keys)];
+	struct lpm_trie_bytes_key key, next_key;
+	unsigned int value, got, i, j, len;
+	struct lpm_trie_bytes_key *cur;
+	int fd, err;
+
+	fd = lpm_trie_create(sizeof(key), sizeof(value), ARRAY_SIZE(keys));
+
+	for (i = 0; i < ARRAY_SIZE(keys); i++) {
+		unsigned int flags;
+
+		/* add i-th element */
+		flags = i % 2 ? BPF_NOEXIST : 0;
+		len = strlen(keys[i]);
+		/* include the trailing '\0' */
+		key.prefixlen = (len + 1) * 8;
+		memset(key.data, 0, sizeof(key.data));
+		memcpy(key.data, keys[i], len);
+		value = i + 100;
+		err = bpf_map_update_elem(fd, &key, &value, flags);
+		CHECK(err, "add elem", "#%u error %d\n", i, err);
+
+		err = bpf_map_lookup_elem(fd, &key, &got);
+		CHECK(err, "lookup elem", "#%u error %d\n", i, err);
+		CHECK(got != value, "lookup elem", "#%u expect %u got %u\n", i, value, got);
+
+		/* re-add i-th element (Error) */
+		err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+		CHECK(err != -EEXIST, "re-add elem", "#%u error %d\n", i, err);
+
+		/* Overwrite i-th element */
+		flags = i % 2 ? 0 : BPF_EXIST;
+		value = i;
+		err = bpf_map_update_elem(fd, &key, &value, flags);
+		CHECK(err, "update elem", "error %d\n", err);
+
+		/* Lookup #[0~i] elements */
+		for (j = 0; j <= i; j++) {
+			len = strlen(keys[j]);
+			key.prefixlen = (len + 1) * 8;
+			memset(key.data, 0, sizeof(key.data));
+			memcpy(key.data, keys[j], len);
+			err = bpf_map_lookup_elem(fd, &key, &got);
+			CHECK(err, "lookup elem", "#%u/%u error %d\n", i, j, err);
+			CHECK(got != j, "lookup elem", "#%u/%u expect %u got %u\n",
+			      i, j, value, got);
+		}
+	}
+
+	/* Add element to a full qp-trie (Error) */
+	key.prefixlen = sizeof(key.data) * 8;
+	memset(key.data, 0, sizeof(key.data));
+	value = 0;
+	err = bpf_map_update_elem(fd, &key, &value, 0);
+	CHECK(err != -ENOSPC, "add to full qp-trie", "error %d\n", err);
+
+	/* Iterate sorted elements: no deletion */
+	memcpy(sorted_keys, keys, sizeof(keys));
+	qsort(sorted_keys, ARRAY_SIZE(sorted_keys), sizeof(sorted_keys[0]), cmp_str);
+	cur = NULL;
+	for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) {
+		len = strlen(sorted_keys[i]);
+		err = bpf_map_get_next_key(fd, cur, &next_key);
+		CHECK(err, "iterate", "#%u error %d\n", i, err);
+		CHECK(next_key.prefixlen != (len + 1) * 8, "iterate",
+		      "#%u invalid len %u expect %u\n",
+		      i, next_key.prefixlen, (len + 1) * 8);
+		CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate",
+		      "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]);
+
+		cur = &next_key;
+	}
+	err = bpf_map_get_next_key(fd, cur, &next_key);
+	CHECK(err != -ENOENT, "more element", "error %d\n", err);
+
+	/* Iterate sorted elements: delete the found key after each iteration */
+	cur = NULL;
+	for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) {
+		len = strlen(sorted_keys[i]);
+		err = bpf_map_get_next_key(fd, cur, &next_key);
+		CHECK(err, "iterate", "#%u error %d\n", i, err);
+		CHECK(next_key.prefixlen != (len + 1) * 8, "iterate",
+		      "#%u invalid len %u expect %u\n",
+		      i, next_key.prefixlen, (len + 1) * 8);
+		CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate",
+		      "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]);
+
+		cur = &next_key;
+
+		err = bpf_map_delete_elem(fd, cur);
+		CHECK(err, "delete", "#%u error %d\n", i, err);
+	}
+	err = bpf_map_get_next_key(fd, cur, &next_key);
+	CHECK(err != -ENOENT, "non-empty qp-trie", "error %d\n", err);
+
+	close(fd);
+}
+
+/* Use the fixed prefixlen (32) and save integers in LPM trie. The iteration of
+ * LPM trie will return these integers in big-endian order, therefore, convert
+ * these integers to big-endian before update. After each iteration, delete the
+ * found key (the smallest integer) and expect the next iteration will return
+ * the second smallest number.
+ */
+static void test_lpm_trie_iterate_ints(void)
+{
+	struct lpm_trie_int_key key, next_key;
+	unsigned int i, max_entries;
+	struct lpm_trie_int_key *cur;
+	unsigned int *data_set;
+	int fd, err;
+	bool value;
+
+	max_entries = 4096;
+	data_set = calloc(max_entries, sizeof(*data_set));
+	CHECK(!data_set, "malloc", "no mem\n");
+	for (i = 0; i < max_entries; i++)
+		data_set[i] = i;
+
+	fd = lpm_trie_create(sizeof(key), sizeof(value), max_entries);
+	value = true;
+	for (i = 0; i < max_entries; i++) {
+		key.prefixlen = 32;
+		key.data = htobe32(data_set[i]);
+
+		err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+		CHECK(err, "add elem", "#%u error %d\n", i, err);
+	}
+
+	cur = NULL;
+	for (i = 0; i < max_entries; i++) {
+		err = bpf_map_get_next_key(fd, cur, &next_key);
+		CHECK(err, "iterate", "#%u error %d\n", i, err);
+		CHECK(next_key.prefixlen != 32, "iterate", "#%u invalid len %u\n",
+		      i, next_key.prefixlen);
+		CHECK(be32toh(next_key.data) != data_set[i], "iterate", "#%u got 0x%x exp 0x%x\n",
+		      i, be32toh(next_key.data), data_set[i]);
+		cur = &next_key;
+
+		/*
+		 * Delete the minimal key, the next call of bpf_get_next_key()
+		 * will return the second minimal key.
+		 */
+		err = bpf_map_delete_elem(fd, &next_key);
+		CHECK(err, "del elem", "#%u elem error %d\n", i, err);
+	}
+	err = bpf_map_get_next_key(fd, cur, &next_key);
+	CHECK(err != -ENOENT, "more element", "error %d\n", err);
+
+	err = bpf_map_get_next_key(fd, NULL, &next_key);
+	CHECK(err != -ENOENT, "no-empty qp-trie", "error %d\n", err);
+
+	free(data_set);
+
+	close(fd);
+}
+
+void test_lpm_trie_map_basic_ops(void)
 {
 	int i;

 	/* we want predictable, pseudo random tests */
 	srand(0xf00ba1);

-	/* Use libbpf 1.0 API mode */
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
 	test_lpm_basic();
 	test_lpm_order();

@ -792,6 +1179,10 @@ int main(void)
 	test_lpm_get_next_key();
 	test_lpm_multi_thread();

-	printf("test_lpm: OK\n");
-	return 0;
+	test_lpm_trie_update_flags();
+	test_lpm_trie_update_full_map();
+	test_lpm_trie_iterate_strs();
+	test_lpm_trie_iterate_ints();
+
+	printf("%s: PASS\n", __func__);
 }
--- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c
+++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
@ -78,8 +78,8 @@ void test_task_storage_map_stress_lookup(void)
 	CHECK(err, "open_and_load", "error %d\n", err);

 	/* Only for a fully preemptible kernel */
-	if (!skel->kconfig->CONFIG_PREEMPT) {
-		printf("%s SKIP (no CONFIG_PREEMPT)\n", __func__);
+	if (!skel->kconfig->CONFIG_PREEMPTION) {
+		printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__);
 		read_bpf_task_storage_busy__destroy(skel);
 		skips++;
 		return;
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@ -108,6 +108,35 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
 	close(s);
 }

+static void test_sockmap_vsock_delete_on_close(void)
+{
+	int err, c, p, map;
+	const int zero = 0;
+
+	err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+	if (!ASSERT_OK(err, "create_pair(AF_VSOCK)"))
+		return;
+
+	map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
+			     sizeof(int), 1, NULL);
+	if (!ASSERT_GE(map, 0, "bpf_map_create")) {
+		close(c);
+		goto out;
+	}
+
+	err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
+	close(c);
+	if (!ASSERT_OK(err, "bpf_map_update"))
+		goto out;
+
+	err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+	ASSERT_OK(err, "after close(), bpf_map_update");
+
+out:
+	close(p);
+	close(map);
+}
+
 static void test_skmsg_helpers(enum bpf_map_type map_type)
 {
 	struct test_skmsg_load_helpers *skel;
@ -937,12 +966,58 @@ static void test_sockmap_same_sock(void)
 	test_sockmap_pass_prog__destroy(skel);
 }

+static void test_sockmap_skb_verdict_vsock_poll(void)
+{
+	struct test_sockmap_pass_prog *skel;
+	int err, map, conn, peer;
+	struct bpf_program *prog;
+	struct bpf_link *link;
+	char buf = 'x';
+	int zero = 0;
+
+	skel = test_sockmap_pass_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	if (create_pair(AF_VSOCK, SOCK_STREAM, &conn, &peer))
+		goto destroy;
+
+	prog = skel->progs.prog_skb_verdict;
+	map = bpf_map__fd(skel->maps.sock_map_rx);
+	link = bpf_program__attach_sockmap(prog, map);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+		goto close;
+
+	err = bpf_map_update_elem(map, &zero, &conn, BPF_ANY);
+	if (!ASSERT_OK(err, "bpf_map_update_elem"))
+		goto detach;
+
+	if (xsend(peer, &buf, 1, 0) != 1)
+		goto detach;
+
+	err = poll_read(conn, IO_TIMEOUT_SEC);
+	if (!ASSERT_OK(err, "poll"))
+		goto detach;
+
+	if (xrecv_nonblock(conn, &buf, 1, 0) != 1)
+		FAIL("xrecv_nonblock");
+detach:
+	bpf_link__detach(link);
+close:
+	xclose(conn);
+	xclose(peer);
+destroy:
+	test_sockmap_pass_prog__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
 		test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash create_update_free"))
 		test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap vsock delete on close"))
+		test_sockmap_vsock_delete_on_close();
 	if (test__start_subtest("sockmap sk_msg load helpers"))
 		test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash sk_msg load helpers"))
@ -997,4 +1072,6 @@ void test_sockmap_basic(void)
 		test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link"))
 		test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap skb_verdict vsock poll"))
+		test_sockmap_skb_verdict_vsock_poll();
 }
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@ -197,7 +197,7 @@ static void test_nodeadlock(void)
 	/* Unnecessary recursion and deadlock detection are reproducible
 	 * in the preemptible kernel.
 	 */
-	if (!skel->kconfig->CONFIG_PREEMPT) {
+	if (!skel->kconfig->CONFIG_PREEMPTION) {
 		test__skip();
 		goto done;
 	}
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@ -225,24 +225,7 @@ void test_verifier_xdp(void)                  { RUN(verifier_xdp); }
 void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
 void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
 void test_verifier_lsm(void)                  { RUN(verifier_lsm); }
-
-void test_verifier_mtu(void)
-{
-	__u64 caps = 0;
-	int ret;
-
-	/* In case CAP_BPF and CAP_PERFMON is not set */
-	ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps);
-	if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin"))
-		return;
-	ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL);
-	if (!ASSERT_OK(ret, "disable_cap_sys_admin"))
-		goto restore_cap;
-	RUN(verifier_mtu);
-restore_cap:
-	if (caps)
-		cap_enable_effective(caps, NULL);
-}
+void test_verifier_mtu(void)		      { RUN(verifier_mtu); }

 static int init_test_val_map(struct bpf_object *obj, char *map_name)
 {
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@ -5,6 +5,10 @@
 #define XSTR(s) STR(s)
 #define STR(s) #s

+/* Expand a macro and then stringize the expansion */
+#define QUOTE(str) #str
+#define EXPAND_QUOTE(str) QUOTE(str)
+
 /* This set of attributes controls behavior of the
 * test_loader.c:test_loader__run_subtests().
 *
@ -106,6 +110,7 @@
 * __arch_*          Specify on which architecture the test case should be tested.
 *                   Several __arch_* annotations could be specified at once.
 *                   When test case is not run on current arch it is marked as skipped.
+ * __caps_unpriv     Specify the capabilities that should be set when running the test.
 */
 #define __msg(msg)		__attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
 #define __xlated(msg)		__attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg)))
@ -129,6 +134,13 @@
 #define __arch_x86_64		__arch("X86_64")
 #define __arch_arm64		__arch("ARM64")
 #define __arch_riscv64		__arch("RISCV64")
+#define __caps_unpriv(caps)	__attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
+
+/* Define common capabilities tested using __caps_unpriv */
+#define CAP_NET_ADMIN		12
+#define CAP_SYS_ADMIN		21
+#define CAP_PERFMON		38
+#define CAP_BPF			39

 /* Convenience macro for use with 'asm volatile' blocks */
 #define __naked __attribute__((naked))
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@ -149,7 +149,7 @@ int ringbuf_release_uninit_dynptr(void *ctx)

 /* A dynptr can't be used after it has been invalidated */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #3")
+__failure __msg("Expected an initialized dynptr as arg #2")
 int use_after_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr;
@ -428,7 +428,7 @@ int invalid_helper2(void *ctx)

 /* A bpf_dynptr is invalidated if it's been written into */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int invalid_write1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@ -1407,7 +1407,7 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)

 /* bpf_dynptr_adjust can only be called on initialized dynptrs */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int dynptr_adjust_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr = {};
@ -1420,7 +1420,7 @@ int dynptr_adjust_invalid(void *ctx)

 /* bpf_dynptr_is_null can only be called on initialized dynptrs */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int dynptr_is_null_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr = {};
@ -1433,7 +1433,7 @@ int dynptr_is_null_invalid(void *ctx)

 /* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int dynptr_is_rdonly_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr = {};
@ -1446,7 +1446,7 @@ int dynptr_is_rdonly_invalid(void *ctx)

 /* bpf_dynptr_size can only be called on initialized dynptrs */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int dynptr_size_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr = {};
@ -1459,7 +1459,7 @@ int dynptr_size_invalid(void *ctx)

 /* Only initialized dynptrs can be cloned */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int clone_invalid1(void *ctx)
 {
 	struct bpf_dynptr ptr1 = {};
@ -1493,7 +1493,7 @@ int clone_invalid2(struct xdp_md *xdp)

 /* Invalidating a dynptr should invalidate its clones */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #3")
+__failure __msg("Expected an initialized dynptr as arg #2")
 int clone_invalidate1(void *ctx)
 {
 	struct bpf_dynptr clone;
@ -1514,7 +1514,7 @@ int clone_invalidate1(void *ctx)

 /* Invalidating a dynptr should invalidate its parent */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #3")
+__failure __msg("Expected an initialized dynptr as arg #2")
 int clone_invalidate2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@ -1535,7 +1535,7 @@ int clone_invalidate2(void *ctx)

 /* Invalidating a dynptr should invalidate its siblings */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #3")
+__failure __msg("Expected an initialized dynptr as arg #2")
 int clone_invalidate3(void *ctx)
 {
 	struct bpf_dynptr ptr;
@ -1723,7 +1723,7 @@ __noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr)
 }

 SEC("?raw_tp")
-__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr")
+__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
 int test_dynptr_reg_type(void *ctx)
 {
 	struct task_struct *current = NULL;
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@ -1486,4 +1486,30 @@ int iter_subprog_check_stacksafe(const void *ctx)
 	return 0;
 }

+struct bpf_iter_num global_it;
+
+SEC("raw_tp")
+__failure __msg("arg#0 expected pointer to an iterator on stack")
+int iter_new_bad_arg(const void *ctx)
+{
+	bpf_iter_num_new(&global_it, 0, 1);
+	return 0;
+}
+
+SEC("raw_tp")
+__failure __msg("arg#0 expected pointer to an iterator on stack")
+int iter_next_bad_arg(const void *ctx)
+{
+	bpf_iter_num_next(&global_it);
+	return 0;
+}
+
+SEC("raw_tp")
+__failure __msg("arg#0 expected pointer to an iterator on stack")
+int iter_destroy_bad_arg(const void *ctx)
+{
+	bpf_iter_num_destroy(&global_it);
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
--- a/tools/testing/selftests/bpf/progs/iters_state_safety.c
+++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c
@ -73,7 +73,7 @@ int create_and_forget_to_destroy_fail(void *ctx)
 }

 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int destroy_without_creating_fail(void *ctx)
 {
 	/* init with zeros to stop verifier complaining about uninit stack */
@ -91,7 +91,7 @@ int destroy_without_creating_fail(void *ctx)
 }

 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int compromise_iter_w_direct_write_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@ -143,7 +143,7 @@ int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx)
 }

 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int compromise_iter_w_helper_write_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@ -230,7 +230,7 @@ int valid_stack_reuse(void *ctx)
 }

 SEC("?raw_tp")
-__failure __msg("expected uninitialized iter_num as arg #1")
+__failure __msg("expected uninitialized iter_num as arg #0")
 int double_create_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@ -258,7 +258,7 @@ int double_create_fail(void *ctx)
 }

 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int double_destroy_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@ -284,7 +284,7 @@ int double_destroy_fail(void *ctx)
 }

 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int next_without_new_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@ -305,7 +305,7 @@ int next_without_new_fail(void *ctx)
 }

 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int next_after_destroy_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
--- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
+++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
@ -79,7 +79,7 @@ int testmod_seq_truncated(const void *ctx)

 SEC("?raw_tp")
 __failure
-__msg("expected an initialized iter_testmod_seq as arg #2")
+__msg("expected an initialized iter_testmod_seq as arg #1")
 int testmod_seq_getter_before_bad(const void *ctx)
 {
 	struct bpf_iter_testmod_seq it;
@ -89,7 +89,7 @@ int testmod_seq_getter_before_bad(const void *ctx)

 SEC("?raw_tp")
 __failure
-__msg("expected an initialized iter_testmod_seq as arg #2")
+__msg("expected an initialized iter_testmod_seq as arg #1")
 int testmod_seq_getter_after_bad(const void *ctx)
 {
 	struct bpf_iter_testmod_seq it;
--- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
+++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
@ -4,7 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>

-extern bool CONFIG_PREEMPT __kconfig __weak;
+extern bool CONFIG_PREEMPTION __kconfig __weak;
 extern const int bpf_task_storage_busy __ksym;

 char _license[] SEC("license") = "GPL";
@ -24,7 +24,7 @@ int BPF_PROG(read_bpf_task_storage_busy)
 {
 	int *value;

-	if (!CONFIG_PREEMPT)
+	if (!CONFIG_PREEMPTION)
 		return 0;

 	if (bpf_get_current_pid_tgid() >> 32 != pid)
--- a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
+++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL";
 #define EBUSY 16
 #endif

-extern bool CONFIG_PREEMPT __kconfig __weak;
+extern bool CONFIG_PREEMPTION __kconfig __weak;
 int nr_get_errs = 0;
 int nr_del_errs = 0;

@ -29,7 +29,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
 	int ret, zero = 0;
 	int *value;

-	if (!CONFIG_PREEMPT)
+	if (!CONFIG_PREEMPTION)
 		return 0;

 	task = bpf_get_current_task_btf();
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@ -45,7 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
 }

 SEC("?lsm.s/bpf")
-__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr")
+__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
 int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
 {
 	unsigned long val = 0;
--- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
@ -32,18 +32,18 @@ int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp)

 SEC("iter/cgroup")
 __description("uninitialized iter in ->next()")
-__failure __msg("expected an initialized iter_bits as arg #1")
+__failure __msg("expected an initialized iter_bits as arg #0")
 int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
 {
-	struct bpf_iter_bits *it = NULL;
+	struct bpf_iter_bits it = {};

-	bpf_iter_bits_next(it);
+	bpf_iter_bits_next(&it);
 	return 0;
 }

 SEC("iter/cgroup")
 __description("uninitialized iter in ->destroy()")
-__failure __msg("expected an initialized iter_bits as arg #1")
+__failure __msg("expected an initialized iter_bits as arg #0")
 int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
 {
 	struct bpf_iter_bits it = {};
--- a/tools/testing/selftests/bpf/progs/verifier_mtu.c
+++ b/tools/testing/selftests/bpf/progs/verifier_mtu.c
@ -6,7 +6,9 @@

 SEC("tc/ingress")
 __description("uninit/mtu: write rejected")
-__failure __msg("invalid indirect read from stack")
+__success
+__caps_unpriv(CAP_BPF|CAP_NET_ADMIN)
+__failure_unpriv __msg_unpriv("invalid indirect read from stack")
 int tc_uninit_mtu(struct __sk_buff *ctx)
 {
 	__u32 mtu;
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@ -1244,4 +1244,39 @@ __naked void old_stack_misc_vs_cur_ctx_ptr(void)
 	: __clobber_all);
 }

+SEC("socket")
+__description("stack_noperfmon: reject read of invalid slots")
+__success
+__caps_unpriv(CAP_BPF)
+__failure_unpriv __msg_unpriv("invalid read from stack off -8+1 size 8")
+__naked void stack_noperfmon_reject_invalid_read(void)
+{
+	asm volatile ("					\
+	r2 = 1;						\
+	r6 = r10;					\
+	r6 += -8;					\
+	*(u8 *)(r6 + 0) = r2;				\
+	r2 = *(u64 *)(r6 + 0);				\
+	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("socket")
+__description("stack_noperfmon: narrow spill onto 64-bit scalar spilled slots")
+__success
+__caps_unpriv(CAP_BPF)
+__success_unpriv
+__naked void stack_noperfmon_spill_32bit_onto_64bit_slot(void)
+{
+	asm volatile("					\
+	r0 = 0;						\
+	*(u64 *)(r10 - 8) = r0;				\
+	*(u32 *)(r10 - 8) = r0;				\
+	exit;						\
+"	:
+	:
+	: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@ -36,6 +36,7 @@
 #define TEST_TAG_ARCH "comment:test_arch="
 #define TEST_TAG_JITED_PFX "comment:test_jited="
 #define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
+#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="

 /* Warning: duplicated in bpf_misc.h */
 #define POINTER_VALUE	0xcafe4all
@ -74,6 +75,7 @@ struct test_subspec {
 	struct expected_msgs jited;
 	int retval;
 	bool execute;
+	__u64 caps;
 };

 struct test_spec {
@ -276,6 +278,37 @@ static int parse_int(const char *str, int *val, const char *name)
 	return 0;
 }

+static int parse_caps(const char *str, __u64 *val, const char *name)
+{
+	int cap_flag = 0;
+	char *token = NULL, *saveptr = NULL;
+
+	char *str_cpy = strdup(str);
+	if (str_cpy == NULL) {
+		PRINT_FAIL("Memory allocation failed\n");
+		return -EINVAL;
+	}
+
+	token = strtok_r(str_cpy, "|", &saveptr);
+	while (token != NULL) {
+		errno = 0;
+		if (!strncmp("CAP_", token, sizeof("CAP_") - 1)) {
+			PRINT_FAIL("define %s constant in bpf_misc.h, failed to parse caps\n", token);
+			return -EINVAL;
+		}
+		cap_flag = strtol(token, NULL, 10);
+		if (!cap_flag || errno) {
+			PRINT_FAIL("failed to parse caps %s\n", name);
+			return -EINVAL;
+		}
+		*val |= (1ULL << cap_flag);
+		token = strtok_r(NULL, "|", &saveptr);
+	}
+
+	free(str_cpy);
+	return 0;
+}
+
 static int parse_retval(const char *str, int *val, const char *name)
 {
 	struct {
@ -541,6 +574,12 @@ static int parse_test_spec(struct test_loader *tester,
 			jit_on_next_line = true;
 		} else if (str_has_pfx(s, TEST_BTF_PATH)) {
 			spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1;
+		} else if (str_has_pfx(s, TEST_TAG_CAPS_UNPRIV)) {
+			val = s + sizeof(TEST_TAG_CAPS_UNPRIV) - 1;
+			err = parse_caps(val, &spec->unpriv.caps, "test caps");
+			if (err)
+				goto cleanup;
+			spec->mode_mask |= UNPRIV;
 		}
 	}

@ -917,6 +956,13 @@ void run_subtest(struct test_loader *tester,
 			test__end_subtest();
 			return;
 		}
+		if (subspec->caps) {
+			err = cap_enable_effective(subspec->caps, NULL);
+			if (err) {
+				PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err));
+				goto subtest_cleanup;
+			}
+		}
 	}

 	/* Implicitly reset to NULL if next test case doesn't specify */
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@ -1579,8 +1579,12 @@ static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)

 static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt)
 {
-	txmsg_redir = 1;
 	opt->tx_wait_mem = true;
+	txmsg_redir = 1;
+	test_send_large(opt, cgrp);
+
+	txmsg_redir = 1;
+	txmsg_apply = 4097;
 	test_send_large(opt, cgrp);
 	opt->tx_wait_mem = false;
 }