bpf-for-netdev

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZdaBCwAKCRDbK58LschI
 g3EhAP0d+S18mNabiEGz8efnE2yz3XcFchJgjiRS8WjOv75GvQEA6/sWncFjbc8k
 EqxPHmeJa19rWhQlFrmlyNQfLYGe4gY=
 =VkOs
 -----END PGP SIGNATURE-----

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2024-02-22

The following pull-request contains BPF updates for your *net* tree.

We've added 11 non-merge commits during the last 24 day(s) which contain
a total of 15 files changed, 217 insertions(+), 17 deletions(-).

The main changes are:

1) Fix a syzkaller-triggered oops when attempting to read the vsyscall
   page through bpf_probe_read_kernel and friends, from Hou Tao.

2) Fix a kernel panic due to uninitialized iter position pointer in
   bpf_iter_task, from Yafang Shao.

3) Fix a race between bpf_timer_cancel_and_free and bpf_timer_cancel,
   from Martin KaFai Lau.

4) Fix a xsk warning in skb_add_rx_frag() (under CONFIG_DEBUG_NET)
   due to incorrect truesize accounting, from Sebastian Andrzej Siewior.

5) Fix a NULL pointer dereference in sk_psock_verdict_data_ready,
   from Shigeru Yoshida.

6) Fix a resolve_btfids warning when bpf_cpumask symbol cannot be
   resolved, from Hari Bathini.

bpf-for-netdev

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready()
  selftests/bpf: Add negtive test cases for task iter
  bpf: Fix an issue due to uninitialized bpf_iter_task
  selftests/bpf: Test racing between bpf_timer_cancel_and_free and bpf_timer_cancel
  bpf: Fix racing between bpf_timer_cancel_and_free and bpf_timer_cancel
  selftest/bpf: Test the read of vsyscall page under x86-64
  x86/mm: Disallow vsyscall page read for copy_from_kernel_nofault()
  x86/mm: Move is_vsyscall_vaddr() into asm/vsyscall.h
  bpf, scripts: Correct GPL license name
  xsk: Add truesize to skb_add_rx_frag().
  bpf: Fix warning for bpf_cpumask in verifier
====================

Link: https://lore.kernel.org/r/20240221231826.1404-1-daniel@iogearbox.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni 2024-02-22 10:04:46 +01:00
commit fdcd4467ba
15 changed files with 217 additions and 17 deletions

View File

@ -4,6 +4,7 @@
#include <linux/seqlock.h>
#include <uapi/asm/vsyscall.h>
#include <asm/page_types.h>
#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code,
}
#endif
/*
* The (legacy) vsyscall page is the long page in the kernel portion
* of the address space that has user-accessible permissions.
*/
static inline bool is_vsyscall_vaddr(unsigned long vaddr)
{
return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
}
#endif /* _ASM_X86_VSYSCALL_H */

View File

@ -798,15 +798,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
show_opcodes(regs, loglvl);
}
/*
* The (legacy) vsyscall page is the long page in the kernel portion
* of the address space that has user-accessible permissions.
*/
static bool is_vsyscall_vaddr(unsigned long vaddr)
{
return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
}
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address, u32 pkey, int si_code)

View File

@ -3,6 +3,8 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <asm/vsyscall.h>
#ifdef CONFIG_X86_64
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
@ -15,6 +17,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
if (vaddr < TASK_SIZE_MAX + PAGE_SIZE)
return false;
/*
* Reading from the vsyscall page may cause an unhandled fault in
* certain cases. Though it is at an address above TASK_SIZE_MAX, it is
* usually considered as a user space address.
*/
if (is_vsyscall_vaddr(vaddr))
return false;
/*
* Allow everything during early boot before 'x86_virt_bits'
* is initialized. Needed for instruction decoding in early

View File

@ -1101,6 +1101,7 @@ struct bpf_hrtimer {
struct bpf_prog *prog;
void __rcu *callback_fn;
void *value;
struct rcu_head rcu;
};
/* the actual struct hidden inside uapi struct bpf_timer */
@ -1332,6 +1333,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
if (in_nmi())
return -EOPNOTSUPP;
rcu_read_lock();
__bpf_spin_lock_irqsave(&timer->lock);
t = timer->timer;
if (!t) {
@ -1353,6 +1355,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
* if it was running.
*/
ret = ret ?: hrtimer_cancel(&t->timer);
rcu_read_unlock();
return ret;
}
@ -1407,7 +1410,7 @@ void bpf_timer_cancel_and_free(void *val)
*/
if (this_cpu_read(hrtimer_running) != t)
hrtimer_cancel(&t->timer);
kfree(t);
kfree_rcu(t, rcu);
}
BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)

View File

@ -978,6 +978,8 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it,
BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) !=
__alignof__(struct bpf_iter_task));
kit->pos = NULL;
switch (flags) {
case BPF_TASK_ITER_ALL_THREADS:
case BPF_TASK_ITER_ALL_PROCS:

View File

@ -5227,7 +5227,9 @@ BTF_ID(struct, prog_test_ref_kfunc)
#ifdef CONFIG_CGROUPS
BTF_ID(struct, cgroup)
#endif
#ifdef CONFIG_BPF_JIT
BTF_ID(struct, bpf_cpumask)
#endif
BTF_ID(struct, task_struct)
BTF_SET_END(rcu_protected_types)

View File

@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
if (psock)
psock->saved_data_ready(sk);
if (psock) {
read_lock_bh(&sk->sk_callback_lock);
sk_psock_data_ready(sk, psock);
read_unlock_bh(&sk->sk_callback_lock);
}
rcu_read_unlock();
}
}

View File

@ -722,7 +722,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
memcpy(vaddr, buffer, len);
kunmap_local(vaddr);
skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
}
if (first_frag && desc->options & XDP_TX_METADATA) {

View File

@ -513,7 +513,7 @@ eBPF programs can have an associated license, passed along with the bytecode
instructions to the kernel when the programs are loaded. The format for that
string is identical to the one in use for kernel modules (Dual licenses, such
as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
programs that are compatible with the GNU Privacy License (GPL).
programs that are compatible with the GNU General Public License (GNU GPL).
In order to use such helpers, the eBPF program must be loaded with the correct
license string passed (via **attr**) to the **bpf**\\ () system call, and this

View File

@ -193,6 +193,7 @@ static void subtest_task_iters(void)
ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt");
ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt");
ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt");
ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt");
pthread_mutex_unlock(&do_nothing_mutex);
for (int i = 0; i < thread_num; i++)
ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join");

View File

@ -0,0 +1,57 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
#include "test_progs.h"
#include "read_vsyscall.skel.h"
#if defined(__x86_64__)
/* For VSYSCALL_ADDR */
#include <asm/vsyscall.h>
#else
/* To prevent build failure on non-x86 arch */
#define VSYSCALL_ADDR 0UL
#endif
struct read_ret_desc {
const char *name;
int ret;
} all_read[] = {
{ .name = "probe_read_kernel", .ret = -ERANGE },
{ .name = "probe_read_kernel_str", .ret = -ERANGE },
{ .name = "probe_read", .ret = -ERANGE },
{ .name = "probe_read_str", .ret = -ERANGE },
{ .name = "probe_read_user", .ret = -EFAULT },
{ .name = "probe_read_user_str", .ret = -EFAULT },
{ .name = "copy_from_user", .ret = -EFAULT },
{ .name = "copy_from_user_task", .ret = -EFAULT },
};
void test_read_vsyscall(void)
{
struct read_vsyscall *skel;
unsigned int i;
int err;
#if !defined(__x86_64__)
test__skip();
return;
#endif
skel = read_vsyscall__open_and_load();
if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
return;
skel->bss->target_pid = getpid();
err = read_vsyscall__attach(skel);
if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
goto out;
/* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE,
* but it doesn't affect the returned error codes.
*/
skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
usleep(1);
for (i = 0; i < ARRAY_SIZE(all_read); i++)
ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name);
out:
read_vsyscall__destroy(skel);
}

View File

@ -4,10 +4,29 @@
#include "timer.skel.h"
#include "timer_failure.skel.h"
#define NUM_THR 8
static void *spin_lock_thread(void *arg)
{
int i, err, prog_fd = *(int *)arg;
LIBBPF_OPTS(bpf_test_run_opts, topts);
for (i = 0; i < 10000; i++) {
err = bpf_prog_test_run_opts(prog_fd, &topts);
if (!ASSERT_OK(err, "test_run_opts err") ||
!ASSERT_OK(topts.retval, "test_run_opts retval"))
break;
}
pthread_exit(arg);
}
static int timer(struct timer *timer_skel)
{
int err, prog_fd;
int i, err, prog_fd;
LIBBPF_OPTS(bpf_test_run_opts, topts);
pthread_t thread_id[NUM_THR];
void *ret;
err = timer__attach(timer_skel);
if (!ASSERT_OK(err, "timer_attach"))
@ -43,6 +62,20 @@ static int timer(struct timer *timer_skel)
/* check that code paths completed */
ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
prog_fd = bpf_program__fd(timer_skel->progs.race);
for (i = 0; i < NUM_THR; i++) {
err = pthread_create(&thread_id[i], NULL,
&spin_lock_thread, &prog_fd);
if (!ASSERT_OK(err, "pthread_create"))
break;
}
while (i) {
err = pthread_join(thread_id[--i], &ret);
if (ASSERT_OK(err, "pthread_join"))
ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join");
}
return 0;
}

View File

@ -10,7 +10,7 @@
char _license[] SEC("license") = "GPL";
pid_t target_pid;
int procs_cnt, threads_cnt, proc_threads_cnt;
int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
@ -26,6 +26,16 @@ int iter_task_for_each_sleep(void *ctx)
procs_cnt = threads_cnt = proc_threads_cnt = 0;
bpf_rcu_read_lock();
bpf_for_each(task, pos, NULL, ~0U) {
/* Below instructions shouldn't be executed for invalid flags */
invalid_cnt++;
}
bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) {
/* Below instructions shouldn't be executed for invalid task__nullable */
invalid_cnt++;
}
bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS)
if (pos->pid == target_pid)
procs_cnt++;

View File

@ -0,0 +1,45 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
int target_pid = 0;
void *user_ptr = 0;
int read_ret[8];
char _license[] SEC("license") = "GPL";
SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int do_probe_read(void *ctx)
{
char buf[8];
if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
return 0;
read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr);
read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr);
read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr);
read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr);
read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr);
read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
return 0;
}
SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
int do_copy_from_user(void *ctx)
{
char buf[8];
if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
return 0;
read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr);
read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
bpf_get_current_task_btf(), 0);
return 0;
}

View File

@ -51,7 +51,8 @@ struct {
__uint(max_entries, 1);
__type(key, int);
__type(value, struct elem);
} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps");
} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"),
race_array SEC(".maps");
__u64 bss_data;
__u64 abs_data;
@ -390,3 +391,34 @@ int BPF_PROG2(test5, int, a)
return 0;
}
static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer)
{
bpf_timer_start(timer, 1000000, 0);
return 0;
}
SEC("syscall")
int race(void *ctx)
{
struct bpf_timer *timer;
int err, race_key = 0;
struct elem init;
__builtin_memset(&init, 0, sizeof(struct elem));
bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY);
timer = bpf_map_lookup_elem(&race_array, &race_key);
if (!timer)
return 1;
err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC);
if (err && err != -EBUSY)
return 1;
bpf_timer_set_callback(timer, race_timer_callback);
bpf_timer_start(timer, 0, 0);
bpf_timer_cancel(timer);
return 0;
}