BPF fixes:

- Fix inlining of bpf_get_smp_processor_id helper for !CONFIG_SMP
   systems (Andrea Righi)
 
 - Fix BPF USDT selftests helper code to use asm constraint "m"
   for LoongArch (Tiezhu Yang)
 
 - Fix BPF selftest compilation error in get_uprobe_offset when
   PROCMAP_QUERY is not defined (Jerome Marchand)
 
 - Fix BPF bpf_skb_change_tail helper when used in context of
   BPF sockmap to handle negative skb header offsets (Cong Wang)
 
 - Several fixes to BPF sockmap code, among others, in the area
   of socket buffer accounting (Levi Zim, Zijian Zhang, Cong Wang)
 
 Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
 -----BEGIN PGP SIGNATURE-----
 
 iIsEABYKADMWIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZ2YJABUcZGFuaWVsQGlv
 Z2VhcmJveC5uZXQACgkQ2yufC7HISINDEgD+N4uVg+rp8Z8pg9jcai4WUERmRG20
 NcQTfBXczLHkwIcBALvn7NVvbTAINJzBTnukbjX3XbWFz2cJ/xHxDYXycP4I
 =SwXG
 -----END PGP SIGNATURE-----

Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Pull BPF fixes from Daniel Borkmann:

 - Fix inlining of bpf_get_smp_processor_id helper for !CONFIG_SMP
   systems (Andrea Righi)

 - Fix BPF USDT selftests helper code to use asm constraint "m" for
   LoongArch (Tiezhu Yang)

 - Fix BPF selftest compilation error in get_uprobe_offset when
   PROCMAP_QUERY is not defined (Jerome Marchand)

 - Fix BPF bpf_skb_change_tail helper when used in context of BPF
   sockmap to handle negative skb header offsets (Cong Wang)

 - Several fixes to BPF sockmap code, among others, in the area of
   socket buffer accounting (Levi Zim, Zijian Zhang, Cong Wang)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests/bpf: Test bpf_skb_change_tail() in TC ingress
  selftests/bpf: Introduce socket_helpers.h for TC tests
  selftests/bpf: Add a BPF selftest for bpf_skb_change_tail()
  bpf: Check negative offsets in __bpf_skb_min_len()
  tcp_bpf: Fix copied value in tcp_bpf_sendmsg
  skmsg: Return copied bytes in sk_msg_memcopy_from_iter
  tcp_bpf: Add sk_rmem_alloc related logic for tcp_bpf ingress redirection
  tcp_bpf: Charge receive socket buffer in bpf_tcp_ingress()
  selftests/bpf: Fix compilation error in get_uprobe_offset()
  selftests/bpf: Use asm constraint "m" for LoongArch
  bpf: Fix bpf_get_smp_processor_id() on !CONFIG_SMP
This commit is contained in:
Linus Torvalds 2024-12-21 11:07:19 -08:00
commit 9c707ba99f
14 changed files with 712 additions and 405 deletions

View File

@ -317,17 +317,22 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
}
static inline void sk_psock_queue_msg(struct sk_psock *psock,
static inline bool sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
bool ret;
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
list_add_tail(&msg->list, &psock->ingress_msg);
else {
ret = true;
} else {
sk_msg_free(psock->sk, msg);
kfree(msg);
ret = false;
}
spin_unlock_bh(&psock->ingress_lock);
return ret;
}
static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)

View File

@ -1527,7 +1527,7 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size)
}
static inline bool
sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
__sk_rmem_schedule(struct sock *sk, int size, bool pfmemalloc)
{
int delta;
@ -1535,7 +1535,13 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
return true;
delta = size - sk->sk_forward_alloc;
return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
skb_pfmemalloc(skb);
pfmemalloc;
}
static inline bool
sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
{
return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb));
}
static inline int sk_unused_reserved_mem(const struct sock *sk)

View File

@ -21281,11 +21281,15 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
* changed in some incompatible and hard to support
* way, it's fine to back out this inlining logic
*/
#ifdef CONFIG_SMP
insn_buf[0] = BPF_MOV32_IMM(BPF_REG_0, (u32)(unsigned long)&pcpu_hot.cpu_number);
insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
cnt = 3;
#else
insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
cnt = 1;
#endif
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;

View File

@ -3734,13 +3734,22 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
static u32 __bpf_skb_min_len(const struct sk_buff *skb)
{
u32 min_len = skb_network_offset(skb);
int offset = skb_network_offset(skb);
u32 min_len = 0;
if (skb_transport_header_was_set(skb))
min_len = skb_transport_offset(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL)
min_len = skb_checksum_start_offset(skb) +
skb->csum_offset + sizeof(__sum16);
if (offset > 0)
min_len = offset;
if (skb_transport_header_was_set(skb)) {
offset = skb_transport_offset(skb);
if (offset > 0)
min_len = offset;
}
if (skb->ip_summed == CHECKSUM_PARTIAL) {
offset = skb_checksum_start_offset(skb) +
skb->csum_offset + sizeof(__sum16);
if (offset > 0)
min_len = offset;
}
return min_len;
}

View File

@ -369,8 +369,8 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes)
{
int ret = -ENOSPC, i = msg->sg.curr;
u32 copy, buf_size, copied = 0;
struct scatterlist *sge;
u32 copy, buf_size;
void *to;
do {
@ -397,6 +397,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
goto out;
}
bytes -= copy;
copied += copy;
if (!bytes)
break;
msg->sg.copybreak = 0;
@ -404,7 +405,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
} while (i != msg->sg.end);
out:
msg->sg.curr = i;
return ret;
return (ret < 0) ? ret : copied;
}
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
@ -445,8 +446,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
if (likely(!peek)) {
sge->offset += copy;
sge->length -= copy;
if (!msg_rx->skb)
if (!msg_rx->skb) {
sk_mem_uncharge(sk, copy);
atomic_sub(copy, &sk->sk_rmem_alloc);
}
msg_rx->sg.size -= copy;
if (!sge->length) {
@ -772,6 +775,8 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
list_del(&msg->list);
if (!msg->skb)
atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
sk_msg_free(psock->sk, msg);
kfree(msg);
}

View File

@ -49,13 +49,14 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
sge = sk_msg_elem(msg, i);
size = (apply && apply_bytes < sge->length) ?
apply_bytes : sge->length;
if (!sk_wmem_schedule(sk, size)) {
if (!__sk_rmem_schedule(sk, size, false)) {
if (!copied)
ret = -ENOMEM;
break;
}
sk_mem_charge(sk, size);
atomic_add(size, &sk->sk_rmem_alloc);
sk_msg_xfer(tmp, msg, i, size);
copied += size;
if (sge->length)
@ -74,7 +75,8 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
if (!ret) {
msg->sg.start = i;
sk_psock_queue_msg(psock, tmp);
if (!sk_psock_queue_msg(psock, tmp))
atomic_sub(copied, &sk->sk_rmem_alloc);
sk_psock_data_ready(sk, psock);
} else {
sk_msg_free(sk, tmp);
@ -493,7 +495,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
struct sk_msg tmp, *msg_tx = NULL;
int copied = 0, err = 0;
int copied = 0, err = 0, ret = 0;
struct sk_psock *psock;
long timeo;
int flags;
@ -536,14 +538,14 @@ static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
copy = msg_tx->sg.size - osize;
}
err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx,
ret = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx,
copy);
if (err < 0) {
if (ret < 0) {
sk_msg_trim(sk, msg_tx, osize);
goto out_err;
}
copied += copy;
copied += ret;
if (psock->cork_bytes) {
if (size > psock->cork_bytes)
psock->cork_bytes = 0;

View File

@ -0,0 +1,394 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __SOCKET_HELPERS__
#define __SOCKET_HELPERS__
#include <linux/vm_sockets.h>
/* include/linux/net.h */
#define SOCK_TYPE_MASK 0xf
#define IO_TIMEOUT_SEC 30
#define MAX_STRERR_LEN 256
/* workaround for older vm_sockets.h */
#ifndef VMADDR_CID_LOCAL
#define VMADDR_CID_LOCAL 1
#endif
/* include/linux/cleanup.h */
#define __get_and_null(p, nullvalue) \
({ \
__auto_type __ptr = &(p); \
__auto_type __val = *__ptr; \
*__ptr = nullvalue; \
__val; \
})
#define take_fd(fd) __get_and_null(fd, -EBADF)
/* Wrappers that fail the test on error and report it. */
#define _FAIL(errnum, fmt...) \
({ \
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
CHECK_FAIL(true); \
})
#define FAIL(fmt...) _FAIL(0, fmt)
#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
#define FAIL_LIBBPF(err, msg) \
({ \
char __buf[MAX_STRERR_LEN]; \
libbpf_strerror((err), __buf, sizeof(__buf)); \
FAIL("%s: %s", (msg), __buf); \
})
#define xaccept_nonblock(fd, addr, len) \
({ \
int __ret = \
accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("accept"); \
__ret; \
})
#define xbind(fd, addr, len) \
({ \
int __ret = bind((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("bind"); \
__ret; \
})
#define xclose(fd) \
({ \
int __ret = close((fd)); \
if (__ret == -1) \
FAIL_ERRNO("close"); \
__ret; \
})
#define xconnect(fd, addr, len) \
({ \
int __ret = connect((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("connect"); \
__ret; \
})
#define xgetsockname(fd, addr, len) \
({ \
int __ret = getsockname((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockname"); \
__ret; \
})
#define xgetsockopt(fd, level, name, val, len) \
({ \
int __ret = getsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockopt(" #name ")"); \
__ret; \
})
#define xlisten(fd, backlog) \
({ \
int __ret = listen((fd), (backlog)); \
if (__ret == -1) \
FAIL_ERRNO("listen"); \
__ret; \
})
#define xsetsockopt(fd, level, name, val, len) \
({ \
int __ret = setsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("setsockopt(" #name ")"); \
__ret; \
})
#define xsend(fd, buf, len, flags) \
({ \
ssize_t __ret = send((fd), (buf), (len), (flags)); \
if (__ret == -1) \
FAIL_ERRNO("send"); \
__ret; \
})
#define xrecv_nonblock(fd, buf, len, flags) \
({ \
ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("recv"); \
__ret; \
})
#define xsocket(family, sotype, flags) \
({ \
int __ret = socket(family, sotype, flags); \
if (__ret == -1) \
FAIL_ERRNO("socket"); \
__ret; \
})
static inline void close_fd(int *fd)
{
if (*fd >= 0)
xclose(*fd);
}
#define __close_fd __attribute__((cleanup(close_fd)))
static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
{
return (struct sockaddr *)ss;
}
static inline void init_addr_loopback4(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
addr4->sin_family = AF_INET;
addr4->sin_port = 0;
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
*len = sizeof(*addr4);
}
static inline void init_addr_loopback6(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
addr6->sin6_family = AF_INET6;
addr6->sin6_port = 0;
addr6->sin6_addr = in6addr_loopback;
*len = sizeof(*addr6);
}
static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
addr->svm_family = AF_VSOCK;
addr->svm_port = VMADDR_PORT_ANY;
addr->svm_cid = VMADDR_CID_LOCAL;
*len = sizeof(*addr);
}
static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
socklen_t *len)
{
switch (family) {
case AF_INET:
init_addr_loopback4(ss, len);
return;
case AF_INET6:
init_addr_loopback6(ss, len);
return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
default:
FAIL("unsupported address family %d", family);
}
}
static inline int enable_reuseport(int s, int progfd)
{
int err, one = 1;
err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
if (err)
return -1;
err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
sizeof(progfd));
if (err)
return -1;
return 0;
}
static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
{
struct sockaddr_storage addr;
socklen_t len = 0;
int err, s;
init_addr_loopback(family, &addr, &len);
s = xsocket(family, sotype, 0);
if (s == -1)
return -1;
if (progfd >= 0)
enable_reuseport(s, progfd);
err = xbind(s, sockaddr(&addr), len);
if (err)
goto close;
if (sotype & SOCK_DGRAM)
return s;
err = xlisten(s, SOMAXCONN);
if (err)
goto close;
return s;
close:
xclose(s);
return -1;
}
static inline int socket_loopback(int family, int sotype)
{
return socket_loopback_reuseport(family, sotype, -1);
}
static inline int poll_connect(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
fd_set wfds;
int r, eval;
socklen_t esize = sizeof(eval);
FD_ZERO(&wfds);
FD_SET(fd, &wfds);
r = select(fd + 1, NULL, &wfds, NULL, &timeout);
if (r == 0)
errno = ETIME;
if (r != 1)
return -1;
if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
return -1;
if (eval != 0) {
errno = eval;
return -1;
}
return 0;
}
static inline int poll_read(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
fd_set rfds;
int r;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
r = select(fd + 1, &rfds, NULL, NULL, &timeout);
if (r == 0)
errno = ETIME;
return r == 1 ? 0 : -1;
}
static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return accept(fd, addr, len);
}
static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return recv(fd, buf, len, flags);
}
static inline int create_pair(int family, int sotype, int *p0, int *p1)
{
__close_fd int s, c = -1, p = -1;
struct sockaddr_storage addr;
socklen_t len = sizeof(addr);
int err;
s = socket_loopback(family, sotype);
if (s < 0)
return s;
err = xgetsockname(s, sockaddr(&addr), &len);
if (err)
return err;
c = xsocket(family, sotype, 0);
if (c < 0)
return c;
err = connect(c, sockaddr(&addr), len);
if (err) {
if (errno != EINPROGRESS) {
FAIL_ERRNO("connect");
return err;
}
err = poll_connect(c, IO_TIMEOUT_SEC);
if (err) {
FAIL_ERRNO("poll_connect");
return err;
}
}
switch (sotype & SOCK_TYPE_MASK) {
case SOCK_DGRAM:
err = xgetsockname(c, sockaddr(&addr), &len);
if (err)
return err;
err = xconnect(s, sockaddr(&addr), len);
if (err)
return err;
*p0 = take_fd(s);
break;
case SOCK_STREAM:
case SOCK_SEQPACKET:
p = xaccept_nonblock(s, NULL, NULL);
if (p < 0)
return p;
*p0 = take_fd(p);
break;
default:
FAIL("Unsupported socket type %#x", sotype);
return -EOPNOTSUPP;
}
*p1 = take_fd(c);
return 0;
}
static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
int *p0, int *p1)
{
int err;
err = create_pair(family, sotype, c0, p0);
if (err)
return err;
err = create_pair(family, sotype, c1, p1);
if (err) {
close(*c0);
close(*p0);
}
return err;
}
#endif // __SOCKET_HELPERS__

View File

@ -12,6 +12,7 @@
#include "test_sockmap_progs_query.skel.h"
#include "test_sockmap_pass_prog.skel.h"
#include "test_sockmap_drop_prog.skel.h"
#include "test_sockmap_change_tail.skel.h"
#include "bpf_iter_sockmap.skel.h"
#include "sockmap_helpers.h"
@ -643,6 +644,54 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
test_sockmap_drop_prog__destroy(drop);
}
static void test_sockmap_skb_verdict_change_tail(void)
{
struct test_sockmap_change_tail *skel;
int err, map, verdict;
int c1, p1, sent, recvd;
int zero = 0;
char buf[2];
skel = test_sockmap_change_tail__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_and_load"))
return;
verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
map = bpf_map__fd(skel->maps.sock_map_rx);
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
if (!ASSERT_OK(err, "create_pair()"))
goto out;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
goto out_close;
sent = xsend(p1, "Tr", 2, 0);
ASSERT_EQ(sent, 2, "xsend(p1)");
recvd = recv(c1, buf, 2, 0);
ASSERT_EQ(recvd, 1, "recv(c1)");
ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
sent = xsend(p1, "G", 1, 0);
ASSERT_EQ(sent, 1, "xsend(p1)");
recvd = recv(c1, buf, 2, 0);
ASSERT_EQ(recvd, 2, "recv(c1)");
ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
sent = xsend(p1, "E", 1, 0);
ASSERT_EQ(sent, 1, "xsend(p1)");
recvd = recv(c1, buf, 1, 0);
ASSERT_EQ(recvd, 1, "recv(c1)");
ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
out_close:
close(c1);
close(p1);
out:
test_sockmap_change_tail__destroy(skel);
}
static void test_sockmap_skb_verdict_peek_helper(int map)
{
int err, c1, p1, zero = 0, sent, recvd, avail;
@ -1058,6 +1107,8 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_fionread(true);
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
test_sockmap_skb_verdict_fionread(false);
if (test__start_subtest("sockmap skb_verdict change tail"))
test_sockmap_skb_verdict_change_tail();
if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
test_sockmap_skb_verdict_peek();
if (test__start_subtest("sockmap skb_verdict msg_f_peek with link"))

View File

@ -1,139 +1,12 @@
#ifndef __SOCKMAP_HELPERS__
#define __SOCKMAP_HELPERS__
#include <linux/vm_sockets.h>
#include "socket_helpers.h"
/* include/linux/net.h */
#define SOCK_TYPE_MASK 0xf
#define IO_TIMEOUT_SEC 30
#define MAX_STRERR_LEN 256
#define MAX_TEST_NAME 80
/* workaround for older vm_sockets.h */
#ifndef VMADDR_CID_LOCAL
#define VMADDR_CID_LOCAL 1
#endif
#define __always_unused __attribute__((__unused__))
/* include/linux/cleanup.h */
#define __get_and_null(p, nullvalue) \
({ \
__auto_type __ptr = &(p); \
__auto_type __val = *__ptr; \
*__ptr = nullvalue; \
__val; \
})
#define take_fd(fd) __get_and_null(fd, -EBADF)
#define _FAIL(errnum, fmt...) \
({ \
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
CHECK_FAIL(true); \
})
#define FAIL(fmt...) _FAIL(0, fmt)
#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
#define FAIL_LIBBPF(err, msg) \
({ \
char __buf[MAX_STRERR_LEN]; \
libbpf_strerror((err), __buf, sizeof(__buf)); \
FAIL("%s: %s", (msg), __buf); \
})
/* Wrappers that fail the test on error and report it. */
#define xaccept_nonblock(fd, addr, len) \
({ \
int __ret = \
accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("accept"); \
__ret; \
})
#define xbind(fd, addr, len) \
({ \
int __ret = bind((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("bind"); \
__ret; \
})
#define xclose(fd) \
({ \
int __ret = close((fd)); \
if (__ret == -1) \
FAIL_ERRNO("close"); \
__ret; \
})
#define xconnect(fd, addr, len) \
({ \
int __ret = connect((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("connect"); \
__ret; \
})
#define xgetsockname(fd, addr, len) \
({ \
int __ret = getsockname((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockname"); \
__ret; \
})
#define xgetsockopt(fd, level, name, val, len) \
({ \
int __ret = getsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockopt(" #name ")"); \
__ret; \
})
#define xlisten(fd, backlog) \
({ \
int __ret = listen((fd), (backlog)); \
if (__ret == -1) \
FAIL_ERRNO("listen"); \
__ret; \
})
#define xsetsockopt(fd, level, name, val, len) \
({ \
int __ret = setsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("setsockopt(" #name ")"); \
__ret; \
})
#define xsend(fd, buf, len, flags) \
({ \
ssize_t __ret = send((fd), (buf), (len), (flags)); \
if (__ret == -1) \
FAIL_ERRNO("send"); \
__ret; \
})
#define xrecv_nonblock(fd, buf, len, flags) \
({ \
ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("recv"); \
__ret; \
})
#define xsocket(family, sotype, flags) \
({ \
int __ret = socket(family, sotype, flags); \
if (__ret == -1) \
FAIL_ERRNO("socket"); \
__ret; \
})
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
@ -193,130 +66,6 @@
__ret; \
})
static inline void close_fd(int *fd)
{
if (*fd >= 0)
xclose(*fd);
}
#define __close_fd __attribute__((cleanup(close_fd)))
static inline int poll_connect(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
fd_set wfds;
int r, eval;
socklen_t esize = sizeof(eval);
FD_ZERO(&wfds);
FD_SET(fd, &wfds);
r = select(fd + 1, NULL, &wfds, NULL, &timeout);
if (r == 0)
errno = ETIME;
if (r != 1)
return -1;
if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
return -1;
if (eval != 0) {
errno = eval;
return -1;
}
return 0;
}
static inline int poll_read(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
fd_set rfds;
int r;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
r = select(fd + 1, &rfds, NULL, NULL, &timeout);
if (r == 0)
errno = ETIME;
return r == 1 ? 0 : -1;
}
static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return accept(fd, addr, len);
}
static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return recv(fd, buf, len, flags);
}
static inline void init_addr_loopback4(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
addr4->sin_family = AF_INET;
addr4->sin_port = 0;
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
*len = sizeof(*addr4);
}
static inline void init_addr_loopback6(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
addr6->sin6_family = AF_INET6;
addr6->sin6_port = 0;
addr6->sin6_addr = in6addr_loopback;
*len = sizeof(*addr6);
}
static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
addr->svm_family = AF_VSOCK;
addr->svm_port = VMADDR_PORT_ANY;
addr->svm_cid = VMADDR_CID_LOCAL;
*len = sizeof(*addr);
}
static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
socklen_t *len)
{
switch (family) {
case AF_INET:
init_addr_loopback4(ss, len);
return;
case AF_INET6:
init_addr_loopback6(ss, len);
return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
default:
FAIL("unsupported address family %d", family);
}
}
static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
{
return (struct sockaddr *)ss;
}
static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
{
u64 value;
@ -334,136 +83,4 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
}
static inline int enable_reuseport(int s, int progfd)
{
int err, one = 1;
err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
if (err)
return -1;
err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
sizeof(progfd));
if (err)
return -1;
return 0;
}
static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
{
struct sockaddr_storage addr;
socklen_t len = 0;
int err, s;
init_addr_loopback(family, &addr, &len);
s = xsocket(family, sotype, 0);
if (s == -1)
return -1;
if (progfd >= 0)
enable_reuseport(s, progfd);
err = xbind(s, sockaddr(&addr), len);
if (err)
goto close;
if (sotype & SOCK_DGRAM)
return s;
err = xlisten(s, SOMAXCONN);
if (err)
goto close;
return s;
close:
xclose(s);
return -1;
}
static inline int socket_loopback(int family, int sotype)
{
return socket_loopback_reuseport(family, sotype, -1);
}
static inline int create_pair(int family, int sotype, int *p0, int *p1)
{
__close_fd int s, c = -1, p = -1;
struct sockaddr_storage addr;
socklen_t len = sizeof(addr);
int err;
s = socket_loopback(family, sotype);
if (s < 0)
return s;
err = xgetsockname(s, sockaddr(&addr), &len);
if (err)
return err;
c = xsocket(family, sotype, 0);
if (c < 0)
return c;
err = connect(c, sockaddr(&addr), len);
if (err) {
if (errno != EINPROGRESS) {
FAIL_ERRNO("connect");
return err;
}
err = poll_connect(c, IO_TIMEOUT_SEC);
if (err) {
FAIL_ERRNO("poll_connect");
return err;
}
}
switch (sotype & SOCK_TYPE_MASK) {
case SOCK_DGRAM:
err = xgetsockname(c, sockaddr(&addr), &len);
if (err)
return err;
err = xconnect(s, sockaddr(&addr), len);
if (err)
return err;
*p0 = take_fd(s);
break;
case SOCK_STREAM:
case SOCK_SEQPACKET:
p = xaccept_nonblock(s, NULL, NULL);
if (p < 0)
return p;
*p0 = take_fd(p);
break;
default:
FAIL("Unsupported socket type %#x", sotype);
return -EOPNOTSUPP;
}
*p1 = take_fd(c);
return 0;
}
static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
int *p0, int *p1)
{
int err;
err = create_pair(family, sotype, c0, p0);
if (err)
return err;
err = create_pair(family, sotype, c1, p1);
if (err) {
close(*c0);
close(*p0);
}
return err;
}
#endif // __SOCKMAP_HELPERS__

View File

@ -0,0 +1,62 @@
// SPDX-License-Identifier: GPL-2.0
#include <error.h>
#include <test_progs.h>
#include <linux/pkt_cls.h>
#include "test_tc_change_tail.skel.h"
#include "socket_helpers.h"
#define LO_IFINDEX 1
void test_tc_change_tail(void)
{
LIBBPF_OPTS(bpf_tcx_opts, tcx_opts);
struct test_tc_change_tail *skel = NULL;
struct bpf_link *link;
int c1, p1;
char buf[2];
int ret;
skel = test_tc_change_tail__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tc_change_tail__open_and_load"))
return;
link = bpf_program__attach_tcx(skel->progs.change_tail, LO_IFINDEX,
&tcx_opts);
if (!ASSERT_OK_PTR(link, "bpf_program__attach_tcx"))
goto destroy;
skel->links.change_tail = link;
ret = create_pair(AF_INET, SOCK_DGRAM, &c1, &p1);
if (!ASSERT_OK(ret, "create_pair"))
goto destroy;
ret = xsend(p1, "Tr", 2, 0);
ASSERT_EQ(ret, 2, "xsend(p1)");
ret = recv(c1, buf, 2, 0);
ASSERT_EQ(ret, 2, "recv(c1)");
ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
ret = xsend(p1, "G", 1, 0);
ASSERT_EQ(ret, 1, "xsend(p1)");
ret = recv(c1, buf, 2, 0);
ASSERT_EQ(ret, 1, "recv(c1)");
ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
ret = xsend(p1, "E", 1, 0);
ASSERT_EQ(ret, 1, "xsend(p1)");
ret = recv(c1, buf, 1, 0);
ASSERT_EQ(ret, 1, "recv(c1)");
ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
ret = xsend(p1, "Z", 1, 0);
ASSERT_EQ(ret, 1, "xsend(p1)");
ret = recv(c1, buf, 1, 0);
ASSERT_EQ(ret, 1, "recv(c1)");
ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
close(c1);
close(p1);
destroy:
test_tc_change_tail__destroy(skel);
}

View File

@ -0,0 +1,40 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 ByteDance */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 1);
__type(key, int);
__type(value, int);
} sock_map_rx SEC(".maps");
long change_tail_ret = 1;
SEC("sk_skb")
int prog_skb_verdict(struct __sk_buff *skb)
{
char *data, *data_end;
bpf_skb_pull_data(skb, 1);
data = (char *)(unsigned long)skb->data;
data_end = (char *)(unsigned long)skb->data_end;
if (data + 1 > data_end)
return SK_PASS;
if (data[0] == 'T') { /* Trim the packet */
change_tail_ret = bpf_skb_change_tail(skb, skb->len - 1, 0);
return SK_PASS;
} else if (data[0] == 'G') { /* Grow the packet */
change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0);
return SK_PASS;
} else if (data[0] == 'E') { /* Error */
change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
return SK_PASS;
}
return SK_PASS;
}
char _license[] SEC("license") = "GPL";

View File

@ -0,0 +1,106 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/pkt_cls.h>
long change_tail_ret = 1;
static __always_inline struct iphdr *parse_ip_header(struct __sk_buff *skb, int *ip_proto)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
struct ethhdr *eth = data;
struct iphdr *iph;
/* Verify Ethernet header */
if ((void *)(data + sizeof(*eth)) > data_end)
return NULL;
/* Skip Ethernet header to get to IP header */
iph = (void *)(data + sizeof(struct ethhdr));
/* Verify IP header */
if ((void *)(data + sizeof(struct ethhdr) + sizeof(*iph)) > data_end)
return NULL;
/* Basic IP header validation */
if (iph->version != 4) /* Only support IPv4 */
return NULL;
if (iph->ihl < 5) /* Minimum IP header length */
return NULL;
*ip_proto = iph->protocol;
return iph;
}
static __always_inline struct udphdr *parse_udp_header(struct __sk_buff *skb, struct iphdr *iph)
{
void *data_end = (void *)(long)skb->data_end;
void *hdr = (void *)iph;
struct udphdr *udp;
/* Calculate UDP header position */
udp = hdr + (iph->ihl * 4);
hdr = (void *)udp;
/* Verify UDP header bounds */
if ((void *)(hdr + sizeof(*udp)) > data_end)
return NULL;
return udp;
}
SEC("tc/ingress")
int change_tail(struct __sk_buff *skb)
{
int len = skb->len;
struct udphdr *udp;
struct iphdr *iph;
void *data_end;
char *payload;
int ip_proto;
bpf_skb_pull_data(skb, len);
data_end = (void *)(long)skb->data_end;
iph = parse_ip_header(skb, &ip_proto);
if (!iph)
return TCX_PASS;
if (ip_proto != IPPROTO_UDP)
return TCX_PASS;
udp = parse_udp_header(skb, iph);
if (!udp)
return TCX_PASS;
payload = (char *)udp + (sizeof(struct udphdr));
if (payload + 1 > (char *)data_end)
return TCX_PASS;
if (payload[0] == 'T') { /* Trim the packet */
change_tail_ret = bpf_skb_change_tail(skb, len - 1, 0);
if (!change_tail_ret)
bpf_skb_change_tail(skb, len, 0);
return TCX_PASS;
} else if (payload[0] == 'G') { /* Grow the packet */
change_tail_ret = bpf_skb_change_tail(skb, len + 1, 0);
if (!change_tail_ret)
bpf_skb_change_tail(skb, len, 0);
return TCX_PASS;
} else if (payload[0] == 'E') { /* Error */
change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
return TCX_PASS;
} else if (payload[0] == 'Z') { /* Zero */
change_tail_ret = bpf_skb_change_tail(skb, 0, 0);
return TCX_PASS;
}
return TCX_DROP;
}
char _license[] SEC("license") = "GPL";

View File

@ -102,6 +102,8 @@
# define STAP_SDT_ARG_CONSTRAINT nZr
# elif defined __arm__
# define STAP_SDT_ARG_CONSTRAINT g
# elif defined __loongarch__
# define STAP_SDT_ARG_CONSTRAINT nmr
# else
# define STAP_SDT_ARG_CONSTRAINT nor
# endif

View File

@ -293,6 +293,10 @@ static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *st
return 0;
}
#else
# ifndef PROCMAP_QUERY_VMA_EXECUTABLE
# define PROCMAP_QUERY_VMA_EXECUTABLE 0x04
# endif
static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags)
{
return -EOPNOTSUPP;