selftests: net: Add busy_poll_test

Add an epoll busy poll test using netdevsim.

This test is comprised of:
  - busy_poller (via busy_poller.c)
  - busy_poll_test.sh which loads netdevsim, sets up network namespaces,
    and runs busy_poller to receive data and socat to send data.

The selftest tests two different scenarios:
  - busy poll (the pre-existing version in the kernel)
  - busy poll with suspend enabled (what this series adds)

The data transmit is a 1MiB temporary file generated from /dev/urandom
and the test is considered passing if the md5sum of the input file to
socat matches the md5sum of the output file from busy_poller.

netdevsim was chosen instead of veth due to netdevsim's support for
netdev-genl.

For now, this test uses the functionality that netdevsim provides. In the
future, perhaps netdevsim can be extended to emulate device IRQs to more
thoroughly test all pre-existing kernel options (like defer_hard_irqs)
and suspend.

Signed-off-by: Joe Damato <jdamato@fastly.com>
Co-developed-by: Martin Karsten <mkarsten@uwaterloo.ca>
Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20241109050245.191288-6-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Joe Damato 2024-11-09 05:02:35 +00:00 committed by Jakub Kicinski
parent 8a6de2627f
commit 347fcdc414
4 changed files with 521 additions and 0 deletions

View File

@ -2,6 +2,7 @@
bind_bhash
bind_timewait
bind_wildcard
busy_poller
cmsg_sender
diag_uid
epoll_busy_poll

View File

@ -97,6 +97,11 @@ TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
TEST_PROGS += bpf_offload.py
TEST_PROGS += ipv6_route_update_soft_lockup.sh
TEST_PROGS += busy_poll_test.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller
TEST_GEN_FILES += $(YNL_GEN_FILES)
TEST_FILES := settings
TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
@ -107,6 +112,10 @@ TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
# YNL build
YNL_GENS := netdev
include ynl.mk
$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto

View File

@ -0,0 +1,165 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
source net_helper.sh
NSIM_SV_ID=$((256 + RANDOM % 256))
NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
NSIM_CL_ID=$((512 + RANDOM % 256))
NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID
NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
SERVER_IP=192.168.1.1
CLIENT_IP=192.168.1.2
SERVER_PORT=48675
# busy poll config
MAX_EVENTS=8
BUSY_POLL_USECS=0
BUSY_POLL_BUDGET=16
PREFER_BUSY_POLL=1
# IRQ deferral config
NAPI_DEFER_HARD_IRQS=100
GRO_FLUSH_TIMEOUT=50000
SUSPEND_TIMEOUT=20000000
setup_ns()
{
set -e
ip netns add nssv
ip netns add nscl
NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \
-path $NSIM_SV_SYS/net -exec basename {} \;)
NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \
-path $NSIM_CL_SYS/net -exec basename {} \;)
# ensure the server has 1 queue
ethtool -L $NSIM_SV_NAME combined 1 2>/dev/null
ip link set $NSIM_SV_NAME netns nssv
ip link set $NSIM_CL_NAME netns nscl
ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME
ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME
ip netns exec nssv ip link set dev $NSIM_SV_NAME up
ip netns exec nscl ip link set dev $NSIM_CL_NAME up
set +e
}
cleanup_ns()
{
ip netns del nscl
ip netns del nssv
}
test_busypoll()
{
suspend_value=${1:-0}
tmp_file=$(mktemp)
out_file=$(mktemp)
# fill a test file with random data
dd if=/dev/urandom of=${tmp_file} bs=1M count=1 2> /dev/null
timeout -k 1s 30s ip netns exec nssv ./busy_poller \
-p${SERVER_PORT} \
-b${SERVER_IP} \
-m${MAX_EVENTS} \
-u${BUSY_POLL_USECS} \
-P${PREFER_BUSY_POLL} \
-g${BUSY_POLL_BUDGET} \
-i${NSIM_SV_IFIDX} \
-s${suspend_value} \
-o${out_file}&
wait_local_port_listen nssv ${SERVER_PORT} tcp
ip netns exec nscl socat -u $tmp_file TCP:${SERVER_IP}:${SERVER_PORT}
wait
tmp_file_md5sum=$(md5sum $tmp_file | cut -f1 -d' ')
out_file_md5sum=$(md5sum $out_file | cut -f1 -d' ')
if [ "$tmp_file_md5sum" = "$out_file_md5sum" ]; then
res=0
else
echo "md5sum mismatch"
echo "input file md5sum: ${tmp_file_md5sum}";
echo "output file md5sum: ${out_file_md5sum}";
res=1
fi
rm $out_file $tmp_file
return $res
}
test_busypoll_with_suspend()
{
test_busypoll ${SUSPEND_TIMEOUT}
return $?
}
###
### Code start
###
modprobe netdevsim
# linking
echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW
echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW
udevadm settle
setup_ns
NSIM_SV_FD=$((256 + RANDOM % 256))
exec {NSIM_SV_FD}</var/run/netns/nssv
NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex)
NSIM_CL_FD=$((256 + RANDOM % 256))
exec {NSIM_CL_FD}</var/run/netns/nscl
NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex)
echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \
$NSIM_DEV_SYS_LINK
if [ $? -ne 0 ]; then
echo "linking netdevsim1 with netdevsim2 should succeed"
cleanup_ns
exit 1
fi
test_busypoll
if [ $? -ne 0 ]; then
echo "test_busypoll failed"
cleanup_ns
exit 1
fi
test_busypoll_with_suspend
if [ $? -ne 0 ]; then
echo "test_busypoll_with_suspend failed"
cleanup_ns
exit 1
fi
echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
cleanup_ns
modprobe -r netdevsim
exit 0

View File

@ -0,0 +1,346 @@
// SPDX-License-Identifier: GPL-2.0
#include <assert.h>
#include <errno.h>
#include <error.h>
#include <fcntl.h>
#include <inttypes.h>
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <ynl.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <linux/genetlink.h>
#include <linux/netlink.h>
#include "netdev-user.h"
/* The below ifdef blob is required because:
*
* - sys/epoll.h does not (yet) have the ioctl definitions included. So,
* systems with older glibcs will not have them available. However,
* sys/epoll.h does include the type definition for epoll_data, which is
* needed by the user program (e.g. epoll_event.data.fd)
*
* - linux/eventpoll.h does not define the epoll_data type, it is simply an
* opaque __u64. It does, however, include the ioctl definition.
*
* Including both headers is impossible (types would be redefined), so I've
* opted instead to take sys/epoll.h, and include the blob below.
*
* Someday, when glibc is globally up to date, the blob below can be removed.
*/
#if !defined(EPOLL_IOC_TYPE)
struct epoll_params {
uint32_t busy_poll_usecs;
uint16_t busy_poll_budget;
uint8_t prefer_busy_poll;
/* pad the struct to a multiple of 64bits */
uint8_t __pad;
};
#define EPOLL_IOC_TYPE 0x8A
#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
#endif
static uint32_t cfg_port = 8000;
static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
static char *cfg_outfile;
static int cfg_max_events = 8;
static int cfg_ifindex;
/* busy poll params */
static uint32_t cfg_busy_poll_usecs;
static uint32_t cfg_busy_poll_budget;
static uint32_t cfg_prefer_busy_poll;
/* IRQ params */
static uint32_t cfg_defer_hard_irqs;
static uint64_t cfg_gro_flush_timeout;
static uint64_t cfg_irq_suspend_timeout;
static void usage(const char *filepath)
{
error(1, 0,
"Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>",
filepath);
}
static void parse_opts(int argc, char **argv)
{
int ret;
int c;
if (argc <= 1)
usage(argv[0]);
while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
switch (c) {
case 'u':
cfg_busy_poll_usecs = strtoul(optarg, NULL, 0);
if (cfg_busy_poll_usecs == ULONG_MAX ||
cfg_busy_poll_usecs > UINT32_MAX)
error(1, ERANGE, "busy_poll_usecs too large");
break;
case 'P':
cfg_prefer_busy_poll = strtoul(optarg, NULL, 0);
if (cfg_prefer_busy_poll == ULONG_MAX ||
cfg_prefer_busy_poll > 1)
error(1, ERANGE,
"prefer busy poll should be 0 or 1");
break;
case 'g':
cfg_busy_poll_budget = strtoul(optarg, NULL, 0);
if (cfg_busy_poll_budget == ULONG_MAX ||
cfg_busy_poll_budget > UINT16_MAX)
error(1, ERANGE,
"busy poll budget must be [0, UINT16_MAX]");
break;
case 'p':
cfg_port = strtoul(optarg, NULL, 0);
if (cfg_port > UINT16_MAX)
error(1, ERANGE, "port must be <= 65535");
break;
case 'b':
ret = inet_aton(optarg, &cfg_bind_addr);
if (ret == 0)
error(1, errno,
"bind address %s invalid", optarg);
break;
case 'o':
cfg_outfile = strdup(optarg);
if (!cfg_outfile)
error(1, 0, "outfile invalid");
break;
case 'm':
cfg_max_events = strtol(optarg, NULL, 0);
if (cfg_max_events == LONG_MIN ||
cfg_max_events == LONG_MAX ||
cfg_max_events <= 0)
error(1, ERANGE,
"max events must be > 0 and < LONG_MAX");
break;
case 'd':
cfg_defer_hard_irqs = strtoul(optarg, NULL, 0);
if (cfg_defer_hard_irqs == ULONG_MAX ||
cfg_defer_hard_irqs > INT32_MAX)
error(1, ERANGE,
"defer_hard_irqs must be <= INT32_MAX");
break;
case 'r':
cfg_gro_flush_timeout = strtoull(optarg, NULL, 0);
if (cfg_gro_flush_timeout == ULLONG_MAX)
error(1, ERANGE,
"gro_flush_timeout must be < ULLONG_MAX");
break;
case 's':
cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0);
if (cfg_irq_suspend_timeout == ULLONG_MAX)
error(1, ERANGE,
"irq_suspend_timeout must be < ULLONG_MAX");
break;
case 'i':
cfg_ifindex = strtoul(optarg, NULL, 0);
if (cfg_ifindex == ULONG_MAX)
error(1, ERANGE,
"ifindex must be < ULONG_MAX");
break;
}
}
if (!cfg_ifindex)
usage(argv[0]);
if (optind != argc)
usage(argv[0]);
}
static void epoll_ctl_add(int epfd, int fd, uint32_t events)
{
struct epoll_event ev;
ev.events = events;
ev.data.fd = fd;
if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1)
error(1, errno, "epoll_ctl add fd: %d", fd);
}
static void setnonblock(int sockfd)
{
int flags;
flags = fcntl(sockfd, F_GETFL, 0);
if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1)
error(1, errno, "unable to set socket to nonblocking mode");
}
static void write_chunk(int fd, char *buf, ssize_t buflen)
{
ssize_t remaining = buflen;
char *buf_offset = buf;
ssize_t writelen = 0;
ssize_t write_result;
while (writelen < buflen) {
write_result = write(fd, buf_offset, remaining);
if (write_result == -1)
error(1, errno, "unable to write data to outfile");
writelen += write_result;
remaining -= write_result;
buf_offset += write_result;
}
}
static void setup_queue(void)
{
struct netdev_napi_get_list *napi_list = NULL;
struct netdev_napi_get_req_dump *req = NULL;
struct netdev_napi_set_req *set_req = NULL;
struct ynl_sock *ys;
struct ynl_error yerr;
uint32_t napi_id;
ys = ynl_sock_create(&ynl_netdev_family, &yerr);
if (!ys)
error(1, 0, "YNL: %s", yerr.msg);
req = netdev_napi_get_req_dump_alloc();
netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex);
napi_list = netdev_napi_get_dump(ys, req);
/* assume there is 1 NAPI configured and take the first */
if (napi_list->obj._present.id)
napi_id = napi_list->obj.id;
else
error(1, 0, "napi ID not present?");
set_req = netdev_napi_set_req_alloc();
netdev_napi_set_req_set_id(set_req, napi_id);
netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs);
netdev_napi_set_req_set_gro_flush_timeout(set_req,
cfg_gro_flush_timeout);
netdev_napi_set_req_set_irq_suspend_timeout(set_req,
cfg_irq_suspend_timeout);
if (netdev_napi_set(ys, set_req))
error(1, 0, "can't set NAPI params: %s\n", yerr.msg);
netdev_napi_get_list_free(napi_list);
netdev_napi_get_req_dump_free(req);
netdev_napi_set_req_free(set_req);
ynl_sock_destroy(ys);
}
static void run_poller(void)
{
struct epoll_event events[cfg_max_events];
struct epoll_params epoll_params = {0};
struct sockaddr_in server_addr;
int i, epfd, nfds;
ssize_t readlen;
int outfile_fd;
char buf[1024];
int sockfd;
int conn;
int val;
outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644);
if (outfile_fd == -1)
error(1, errno, "unable to open outfile: %s", cfg_outfile);
sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (sockfd == -1)
error(1, errno, "unable to create listen socket");
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(cfg_port);
server_addr.sin_addr = cfg_bind_addr;
/* these values are range checked during parse_opts, so casting is safe
* here
*/
epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget;
epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll;
epoll_params.__pad = 0;
val = 1;
if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)))
error(1, errno, "poller setsockopt reuseaddr");
setnonblock(sockfd);
if (bind(sockfd, (struct sockaddr *)&server_addr,
sizeof(struct sockaddr_in)))
error(0, errno, "poller bind to port: %d\n", cfg_port);
if (listen(sockfd, 1))
error(1, errno, "poller listen");
epfd = epoll_create1(0);
if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1)
error(1, errno, "unable to set busy poll params");
epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET);
for (;;) {
nfds = epoll_wait(epfd, events, cfg_max_events, -1);
for (i = 0; i < nfds; i++) {
if (events[i].data.fd == sockfd) {
conn = accept(sockfd, NULL, NULL);
if (conn == -1)
error(1, errno,
"accepting incoming connection failed");
setnonblock(conn);
epoll_ctl_add(epfd, conn,
EPOLLIN | EPOLLET | EPOLLRDHUP |
EPOLLHUP);
} else if (events[i].events & EPOLLIN) {
for (;;) {
readlen = read(events[i].data.fd, buf,
sizeof(buf));
if (readlen > 0)
write_chunk(outfile_fd, buf,
readlen);
else
break;
}
} else {
/* spurious event ? */
}
if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) {
epoll_ctl(epfd, EPOLL_CTL_DEL,
events[i].data.fd, NULL);
close(events[i].data.fd);
close(outfile_fd);
return;
}
}
}
}
int main(int argc, char *argv[])
{
parse_opts(argc, argv);
setup_queue();
run_poller();
return 0;
}