mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-11 00:08:50 +00:00
60523115c1
When we get a packet on port 9091, we swap src/dst and send it out. At this point we also request the timestamp and checksum offloads. Checksum offload is verified by looking at the tcpdump on the other side. The tool prints pseudo-header csum and the final one it expects. The final checksum actually matches the incoming packets checksum because we only flip the src/dst and don't change the payload. Some other related changes: - switched to zerocopy mode by default; new flag can be used to force old behavior - request fixed tx_metadata_len headroom - some other small fixes (umem size, fill idx+i, etc) mvbz3:~# ./xdp_hw_metadata eth3 ... xsk_ring_cons__peek: 1 0x19546f8: rx_desc[0]->addr=80100 addr=80100 comp_addr=80100 rx_hash: 0x80B7EA8B with RSS type:0x2A rx_timestamp: 1697580171852147395 (sec:1697580171.8521) HW RX-time: 1697580171852147395 (sec:1697580171.8521), delta to User RX-time sec:0.2797 (279673.082 usec) XDP RX-time: 1697580172131699047 (sec:1697580172.1317), delta to User RX-time sec:0.0001 (121.430 usec) 0x19546f8: ping-pong with csum=3b8e (want d862) csum_start=54 csum_offset=6 0x19546f8: complete tx idx=0 addr=8 tx_timestamp: 1697580172056756493 (sec:1697580172.0568) HW TX-complete-time: 1697580172056756493 (sec:1697580172.0568), delta to User TX-complete-time sec:0.0852 (85175.537 usec) XDP RX-time: 1697580172131699047 (sec:1697580172.1317), delta to User TX-complete-time sec:0.0102 (10232.983 usec) HW RX-time: 1697580171852147395 (sec:1697580171.8521), delta to HW TX-complete-time sec:0.2046 (204609.098 usec) 0x19546f8: complete rx idx=128 addr=80100 mvbz4:~# nc -Nu -q1 ${MVBZ3_LINK_LOCAL_IP}%eth3 9091 mvbz4:~# tcpdump -vvx -i eth3 udp tcpdump: listening on eth3, link-type EN10MB (Ethernet), snapshot length 262144 bytes 12:26:09.301074 IP6 (flowlabel 0x35fa5, hlim 127, next-header UDP (17) payload length: 11) fe80::1270:fdff:fe48:1087.55807 > fe80::1270:fdff:fe48:1077.9091: [bad udp cksum 0x3b8e -> 0xde7e!] UDP, length 3 0x0000: 6003 5fa5 000b 117f fe80 0000 0000 0000 0x0010: 1270 fdff fe48 1087 fe80 0000 0000 0000 0x0020: 1270 fdff fe48 1077 d9ff 2383 000b 3b8e 0x0030: 7864 70 12:26:09.301976 IP6 (flowlabel 0x35fa5, hlim 127, next-header UDP (17) payload length: 11) fe80::1270:fdff:fe48:1077.9091 > fe80::1270:fdff:fe48:1087.55807: [udp sum ok] UDP, length 3 0x0000: 6003 5fa5 000b 117f fe80 0000 0000 0000 0x0010: 1270 fdff fe48 1077 fe80 0000 0000 0000 0x0020: 1270 fdff fe48 1087 2383 d9ff 000b de7e 0x0030: 7864 70 Signed-off-by: Stanislav Fomichev <sdf@google.com> Link: https://lore.kernel.org/r/20231127190319.1190813-14-sdf@google.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
724 lines
17 KiB
C
724 lines
17 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/* Reference program for verifying XDP metadata on real HW. Functional test
|
|
* only, doesn't test the performance.
|
|
*
|
|
* RX:
|
|
* - UDP 9091 packets are diverted into AF_XDP
|
|
* - Metadata verified:
|
|
* - rx_timestamp
|
|
* - rx_hash
|
|
*
|
|
* TX:
|
|
* - UDP 9091 packets trigger TX reply
|
|
* - TX HW timestamp is requested and reported back upon completion
|
|
* - TX checksum is requested
|
|
*/
|
|
|
|
#include <test_progs.h>
|
|
#include <network_helpers.h>
|
|
#include "xdp_hw_metadata.skel.h"
|
|
#include "xsk.h"
|
|
|
|
#include <error.h>
|
|
#include <linux/errqueue.h>
|
|
#include <linux/if_link.h>
|
|
#include <linux/net_tstamp.h>
|
|
#include <linux/udp.h>
|
|
#include <linux/sockios.h>
|
|
#include <linux/if_xdp.h>
|
|
#include <sys/mman.h>
|
|
#include <net/if.h>
|
|
#include <ctype.h>
|
|
#include <poll.h>
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
#include <libgen.h>
|
|
|
|
#include "xdp_metadata.h"
|
|
|
|
#define UMEM_NUM 256
|
|
#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
|
|
#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
|
|
#define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
|
|
|
|
struct xsk {
|
|
void *umem_area;
|
|
struct xsk_umem *umem;
|
|
struct xsk_ring_prod fill;
|
|
struct xsk_ring_cons comp;
|
|
struct xsk_ring_prod tx;
|
|
struct xsk_ring_cons rx;
|
|
struct xsk_socket *socket;
|
|
};
|
|
|
|
struct xdp_hw_metadata *bpf_obj;
|
|
__u16 bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
|
|
struct xsk *rx_xsk;
|
|
const char *ifname;
|
|
int ifindex;
|
|
int rxq;
|
|
bool skip_tx;
|
|
__u64 last_hw_rx_timestamp;
|
|
__u64 last_xdp_rx_timestamp;
|
|
|
|
void test__fail(void) { /* for network_helpers.c */ }
|
|
|
|
static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
|
|
{
|
|
int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
|
|
const struct xsk_socket_config socket_config = {
|
|
.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
|
|
.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
|
|
.bind_flags = bind_flags,
|
|
};
|
|
const struct xsk_umem_config umem_config = {
|
|
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
|
|
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
|
|
.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
|
|
.flags = XSK_UMEM__DEFAULT_FLAGS,
|
|
.tx_metadata_len = sizeof(struct xsk_tx_metadata),
|
|
};
|
|
__u32 idx;
|
|
u64 addr;
|
|
int ret;
|
|
int i;
|
|
|
|
xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
|
|
if (xsk->umem_area == MAP_FAILED)
|
|
return -ENOMEM;
|
|
|
|
ret = xsk_umem__create(&xsk->umem,
|
|
xsk->umem_area, UMEM_SIZE,
|
|
&xsk->fill,
|
|
&xsk->comp,
|
|
&umem_config);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
|
|
xsk->umem,
|
|
&xsk->rx,
|
|
&xsk->tx,
|
|
&socket_config);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/* First half of umem is for TX. This way address matches 1-to-1
|
|
* to the completion queue index.
|
|
*/
|
|
|
|
for (i = 0; i < UMEM_NUM / 2; i++) {
|
|
addr = i * UMEM_FRAME_SIZE;
|
|
printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
|
|
}
|
|
|
|
/* Second half of umem is for RX. */
|
|
|
|
ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
|
|
for (i = 0; i < UMEM_NUM / 2; i++) {
|
|
addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
|
|
printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
|
|
*xsk_ring_prod__fill_addr(&xsk->fill, idx + i) = addr;
|
|
}
|
|
xsk_ring_prod__submit(&xsk->fill, ret);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void close_xsk(struct xsk *xsk)
|
|
{
|
|
if (xsk->umem)
|
|
xsk_umem__delete(xsk->umem);
|
|
if (xsk->socket)
|
|
xsk_socket__delete(xsk->socket);
|
|
munmap(xsk->umem_area, UMEM_SIZE);
|
|
}
|
|
|
|
static void refill_rx(struct xsk *xsk, __u64 addr)
|
|
{
|
|
__u32 idx;
|
|
|
|
if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
|
|
printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr);
|
|
*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
|
|
xsk_ring_prod__submit(&xsk->fill, 1);
|
|
}
|
|
}
|
|
|
|
static int kick_tx(struct xsk *xsk)
|
|
{
|
|
return sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
|
|
}
|
|
|
|
static int kick_rx(struct xsk *xsk)
|
|
{
|
|
return recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
|
|
}
|
|
|
|
#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
|
|
static __u64 gettime(clockid_t clock_id)
|
|
{
|
|
struct timespec t;
|
|
int res;
|
|
|
|
/* See man clock_gettime(2) for type of clock_id's */
|
|
res = clock_gettime(clock_id, &t);
|
|
|
|
if (res < 0)
|
|
error(res, errno, "Error with clock_gettime()");
|
|
|
|
return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
|
|
}
|
|
|
|
static void print_tstamp_delta(const char *name, const char *refname,
|
|
__u64 tstamp, __u64 reference)
|
|
{
|
|
__s64 delta = (__s64)reference - (__s64)tstamp;
|
|
|
|
printf("%s: %llu (sec:%0.4f) delta to %s sec:%0.4f (%0.3f usec)\n",
|
|
name, tstamp, (double)tstamp / NANOSEC_PER_SEC, refname,
|
|
(double)delta / NANOSEC_PER_SEC,
|
|
(double)delta / 1000);
|
|
}
|
|
|
|
static void verify_xdp_metadata(void *data, clockid_t clock_id)
|
|
{
|
|
struct xdp_meta *meta;
|
|
|
|
meta = data - sizeof(*meta);
|
|
|
|
if (meta->rx_hash_err < 0)
|
|
printf("No rx_hash err=%d\n", meta->rx_hash_err);
|
|
else
|
|
printf("rx_hash: 0x%X with RSS type:0x%X\n",
|
|
meta->rx_hash, meta->rx_hash_type);
|
|
|
|
if (meta->rx_timestamp) {
|
|
__u64 ref_tstamp = gettime(clock_id);
|
|
|
|
/* store received timestamps to calculate a delta at tx */
|
|
last_hw_rx_timestamp = meta->rx_timestamp;
|
|
last_xdp_rx_timestamp = meta->xdp_timestamp;
|
|
|
|
print_tstamp_delta("HW RX-time", "User RX-time",
|
|
meta->rx_timestamp, ref_tstamp);
|
|
print_tstamp_delta("XDP RX-time", "User RX-time",
|
|
meta->xdp_timestamp, ref_tstamp);
|
|
} else {
|
|
printf("No rx_timestamp\n");
|
|
}
|
|
}
|
|
|
|
static void verify_skb_metadata(int fd)
|
|
{
|
|
char cmsg_buf[1024];
|
|
char packet_buf[128];
|
|
|
|
struct scm_timestamping *ts;
|
|
struct iovec packet_iov;
|
|
struct cmsghdr *cmsg;
|
|
struct msghdr hdr;
|
|
|
|
memset(&hdr, 0, sizeof(hdr));
|
|
hdr.msg_iov = &packet_iov;
|
|
hdr.msg_iovlen = 1;
|
|
packet_iov.iov_base = packet_buf;
|
|
packet_iov.iov_len = sizeof(packet_buf);
|
|
|
|
hdr.msg_control = cmsg_buf;
|
|
hdr.msg_controllen = sizeof(cmsg_buf);
|
|
|
|
if (recvmsg(fd, &hdr, 0) < 0)
|
|
error(1, errno, "recvmsg");
|
|
|
|
for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
|
|
cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
|
|
|
|
if (cmsg->cmsg_level != SOL_SOCKET)
|
|
continue;
|
|
|
|
switch (cmsg->cmsg_type) {
|
|
case SCM_TIMESTAMPING:
|
|
ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
|
|
if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
|
|
printf("found skb hwtstamp = %lu.%lu\n",
|
|
ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
|
|
return;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
printf("skb hwtstamp is not found!\n");
|
|
}
|
|
|
|
static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
|
|
{
|
|
struct xsk_tx_metadata *meta;
|
|
__u64 addr;
|
|
void *data;
|
|
__u32 idx;
|
|
|
|
if (!xsk_ring_cons__peek(&xsk->comp, 1, &idx))
|
|
return false;
|
|
|
|
addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
|
|
data = xsk_umem__get_data(xsk->umem_area, addr);
|
|
meta = data - sizeof(struct xsk_tx_metadata);
|
|
|
|
printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
|
|
|
|
if (meta->completion.tx_timestamp) {
|
|
__u64 ref_tstamp = gettime(clock_id);
|
|
|
|
print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
|
|
meta->completion.tx_timestamp, ref_tstamp);
|
|
print_tstamp_delta("XDP RX-time", "User TX-complete-time",
|
|
last_xdp_rx_timestamp, ref_tstamp);
|
|
print_tstamp_delta("HW RX-time", "HW TX-complete-time",
|
|
last_hw_rx_timestamp, meta->completion.tx_timestamp);
|
|
} else {
|
|
printf("No tx_timestamp\n");
|
|
}
|
|
|
|
xsk_ring_cons__release(&xsk->comp, 1);
|
|
|
|
return true;
|
|
}
|
|
|
|
#define swap(a, b, len) do { \
|
|
for (int i = 0; i < len; i++) { \
|
|
__u8 tmp = ((__u8 *)a)[i]; \
|
|
((__u8 *)a)[i] = ((__u8 *)b)[i]; \
|
|
((__u8 *)b)[i] = tmp; \
|
|
} \
|
|
} while (0)
|
|
|
|
static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
|
|
{
|
|
struct xsk_tx_metadata *meta;
|
|
struct ipv6hdr *ip6h = NULL;
|
|
struct iphdr *iph = NULL;
|
|
struct xdp_desc *tx_desc;
|
|
struct udphdr *udph;
|
|
struct ethhdr *eth;
|
|
__sum16 want_csum;
|
|
void *data;
|
|
__u32 idx;
|
|
int ret;
|
|
int len;
|
|
|
|
ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
|
|
if (ret != 1) {
|
|
printf("%p: failed to reserve tx slot\n", xsk);
|
|
return;
|
|
}
|
|
|
|
tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
|
|
tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata);
|
|
data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
|
|
|
|
meta = data - sizeof(struct xsk_tx_metadata);
|
|
memset(meta, 0, sizeof(*meta));
|
|
meta->flags = XDP_TXMD_FLAGS_TIMESTAMP;
|
|
|
|
eth = rx_packet;
|
|
|
|
if (eth->h_proto == htons(ETH_P_IP)) {
|
|
iph = (void *)(eth + 1);
|
|
udph = (void *)(iph + 1);
|
|
} else if (eth->h_proto == htons(ETH_P_IPV6)) {
|
|
ip6h = (void *)(eth + 1);
|
|
udph = (void *)(ip6h + 1);
|
|
} else {
|
|
printf("%p: failed to detect IP version for ping pong %04x\n", xsk, eth->h_proto);
|
|
xsk_ring_prod__cancel(&xsk->tx, 1);
|
|
return;
|
|
}
|
|
|
|
len = ETH_HLEN;
|
|
if (ip6h)
|
|
len += sizeof(*ip6h) + ntohs(ip6h->payload_len);
|
|
if (iph)
|
|
len += ntohs(iph->tot_len);
|
|
|
|
swap(eth->h_dest, eth->h_source, ETH_ALEN);
|
|
if (iph)
|
|
swap(&iph->saddr, &iph->daddr, 4);
|
|
else
|
|
swap(&ip6h->saddr, &ip6h->daddr, 16);
|
|
swap(&udph->source, &udph->dest, 2);
|
|
|
|
want_csum = udph->check;
|
|
if (ip6h)
|
|
udph->check = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
|
|
ntohs(udph->len), IPPROTO_UDP, 0);
|
|
else
|
|
udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
|
|
ntohs(udph->len), IPPROTO_UDP, 0);
|
|
|
|
meta->flags |= XDP_TXMD_FLAGS_CHECKSUM;
|
|
if (iph)
|
|
meta->request.csum_start = sizeof(*eth) + sizeof(*iph);
|
|
else
|
|
meta->request.csum_start = sizeof(*eth) + sizeof(*ip6h);
|
|
meta->request.csum_offset = offsetof(struct udphdr, check);
|
|
|
|
printf("%p: ping-pong with csum=%04x (want %04x) csum_start=%d csum_offset=%d\n",
|
|
xsk, ntohs(udph->check), ntohs(want_csum),
|
|
meta->request.csum_start, meta->request.csum_offset);
|
|
|
|
memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
|
|
tx_desc->options |= XDP_TX_METADATA;
|
|
tx_desc->len = len;
|
|
|
|
xsk_ring_prod__submit(&xsk->tx, 1);
|
|
}
|
|
|
|
static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
|
|
{
|
|
const struct xdp_desc *rx_desc;
|
|
struct pollfd fds[rxq + 1];
|
|
__u64 comp_addr;
|
|
__u64 addr;
|
|
__u32 idx = 0;
|
|
int ret;
|
|
int i;
|
|
|
|
for (i = 0; i < rxq; i++) {
|
|
fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
|
|
fds[i].events = POLLIN;
|
|
fds[i].revents = 0;
|
|
}
|
|
|
|
fds[rxq].fd = server_fd;
|
|
fds[rxq].events = POLLIN;
|
|
fds[rxq].revents = 0;
|
|
|
|
while (true) {
|
|
errno = 0;
|
|
|
|
for (i = 0; i < rxq; i++) {
|
|
ret = kick_rx(&rx_xsk[i]);
|
|
if (ret)
|
|
printf("kick_rx ret=%d\n", ret);
|
|
}
|
|
|
|
ret = poll(fds, rxq + 1, 1000);
|
|
printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
|
|
ret, errno, bpf_obj->bss->pkts_skip,
|
|
bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
|
|
if (ret < 0)
|
|
break;
|
|
if (ret == 0)
|
|
continue;
|
|
|
|
if (fds[rxq].revents)
|
|
verify_skb_metadata(server_fd);
|
|
|
|
for (i = 0; i < rxq; i++) {
|
|
bool first_seg = true;
|
|
bool is_eop = true;
|
|
|
|
if (fds[i].revents == 0)
|
|
continue;
|
|
|
|
struct xsk *xsk = &rx_xsk[i];
|
|
peek:
|
|
ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
|
|
printf("xsk_ring_cons__peek: %d\n", ret);
|
|
if (ret != 1)
|
|
continue;
|
|
|
|
rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
|
|
comp_addr = xsk_umem__extract_addr(rx_desc->addr);
|
|
addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
|
|
is_eop = !(rx_desc->options & XDP_PKT_CONTD);
|
|
printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n",
|
|
xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : "");
|
|
if (first_seg) {
|
|
verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
|
|
clock_id);
|
|
first_seg = false;
|
|
|
|
if (!skip_tx) {
|
|
/* mirror first chunk back */
|
|
ping_pong(xsk, xsk_umem__get_data(xsk->umem_area, addr),
|
|
clock_id);
|
|
|
|
ret = kick_tx(xsk);
|
|
if (ret)
|
|
printf("kick_tx ret=%d\n", ret);
|
|
|
|
for (int j = 0; j < 500; j++) {
|
|
if (complete_tx(xsk, clock_id))
|
|
break;
|
|
usleep(10*1000);
|
|
}
|
|
}
|
|
}
|
|
|
|
xsk_ring_cons__release(&xsk->rx, 1);
|
|
refill_rx(xsk, comp_addr);
|
|
if (!is_eop)
|
|
goto peek;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct ethtool_channels {
|
|
__u32 cmd;
|
|
__u32 max_rx;
|
|
__u32 max_tx;
|
|
__u32 max_other;
|
|
__u32 max_combined;
|
|
__u32 rx_count;
|
|
__u32 tx_count;
|
|
__u32 other_count;
|
|
__u32 combined_count;
|
|
};
|
|
|
|
#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
|
|
|
|
static int rxq_num(const char *ifname)
|
|
{
|
|
struct ethtool_channels ch = {
|
|
.cmd = ETHTOOL_GCHANNELS,
|
|
};
|
|
|
|
struct ifreq ifr = {
|
|
.ifr_data = (void *)&ch,
|
|
};
|
|
strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
|
|
int fd, ret;
|
|
|
|
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
|
|
if (fd < 0)
|
|
error(1, errno, "socket");
|
|
|
|
ret = ioctl(fd, SIOCETHTOOL, &ifr);
|
|
if (ret < 0)
|
|
error(1, errno, "ioctl(SIOCETHTOOL)");
|
|
|
|
close(fd);
|
|
|
|
return ch.rx_count + ch.combined_count;
|
|
}
|
|
|
|
static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
|
|
{
|
|
struct ifreq ifr = {
|
|
.ifr_data = (void *)cfg,
|
|
};
|
|
strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
|
|
int fd, ret;
|
|
|
|
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
|
|
if (fd < 0)
|
|
error(1, errno, "socket");
|
|
|
|
ret = ioctl(fd, op, &ifr);
|
|
if (ret < 0)
|
|
error(1, errno, "ioctl(%d)", op);
|
|
|
|
close(fd);
|
|
}
|
|
|
|
static struct hwtstamp_config saved_hwtstamp_cfg;
|
|
static const char *saved_hwtstamp_ifname;
|
|
|
|
static void hwtstamp_restore(void)
|
|
{
|
|
hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
|
|
}
|
|
|
|
static void hwtstamp_enable(const char *ifname)
|
|
{
|
|
struct hwtstamp_config cfg = {
|
|
.rx_filter = HWTSTAMP_FILTER_ALL,
|
|
};
|
|
|
|
hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
|
|
saved_hwtstamp_ifname = strdup(ifname);
|
|
atexit(hwtstamp_restore);
|
|
|
|
hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
|
|
}
|
|
|
|
static void cleanup(void)
|
|
{
|
|
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
|
|
int ret;
|
|
int i;
|
|
|
|
if (bpf_obj) {
|
|
opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
|
|
if (opts.old_prog_fd >= 0) {
|
|
printf("detaching bpf program....\n");
|
|
ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
|
|
if (ret)
|
|
printf("failed to detach XDP program: %d\n", ret);
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < rxq; i++)
|
|
close_xsk(&rx_xsk[i]);
|
|
|
|
if (bpf_obj)
|
|
xdp_hw_metadata__destroy(bpf_obj);
|
|
}
|
|
|
|
static void handle_signal(int sig)
|
|
{
|
|
/* interrupting poll() is all we need */
|
|
}
|
|
|
|
static void timestamping_enable(int fd, int val)
|
|
{
|
|
int ret;
|
|
|
|
ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
|
|
if (ret < 0)
|
|
error(1, errno, "setsockopt(SO_TIMESTAMPING)");
|
|
}
|
|
|
|
static void print_usage(void)
|
|
{
|
|
const char *usage =
|
|
"Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n"
|
|
" -c Run in copy mode (zerocopy is default)\n"
|
|
" -h Display this help and exit\n\n"
|
|
" -m Enable multi-buffer XDP for larger MTU\n"
|
|
" -r Don't generate AF_XDP reply (rx metadata only)\n"
|
|
"Generate test packets on the other machine with:\n"
|
|
" echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
|
|
|
|
printf("%s", usage);
|
|
}
|
|
|
|
static void read_args(int argc, char *argv[])
|
|
{
|
|
int opt;
|
|
|
|
while ((opt = getopt(argc, argv, "chmr")) != -1) {
|
|
switch (opt) {
|
|
case 'c':
|
|
bind_flags &= ~XDP_USE_NEED_WAKEUP;
|
|
bind_flags &= ~XDP_ZEROCOPY;
|
|
bind_flags |= XDP_COPY;
|
|
break;
|
|
case 'h':
|
|
print_usage();
|
|
exit(0);
|
|
case 'm':
|
|
bind_flags |= XDP_USE_SG;
|
|
break;
|
|
case 'r':
|
|
skip_tx = true;
|
|
break;
|
|
case '?':
|
|
if (isprint(optopt))
|
|
fprintf(stderr, "Unknown option: -%c\n", optopt);
|
|
fallthrough;
|
|
default:
|
|
print_usage();
|
|
error(-1, opterr, "Command line options error");
|
|
}
|
|
}
|
|
|
|
if (optind >= argc) {
|
|
fprintf(stderr, "No device name provided\n");
|
|
print_usage();
|
|
exit(-1);
|
|
}
|
|
|
|
ifname = argv[optind];
|
|
ifindex = if_nametoindex(ifname);
|
|
|
|
if (!ifname)
|
|
error(-1, errno, "Invalid interface name");
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
clockid_t clock_id = CLOCK_TAI;
|
|
int server_fd = -1;
|
|
int ret;
|
|
int i;
|
|
|
|
struct bpf_program *prog;
|
|
|
|
read_args(argc, argv);
|
|
|
|
rxq = rxq_num(ifname);
|
|
|
|
printf("rxq: %d\n", rxq);
|
|
|
|
hwtstamp_enable(ifname);
|
|
|
|
rx_xsk = malloc(sizeof(struct xsk) * rxq);
|
|
if (!rx_xsk)
|
|
error(1, ENOMEM, "malloc");
|
|
|
|
for (i = 0; i < rxq; i++) {
|
|
printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
|
|
ret = open_xsk(ifindex, &rx_xsk[i], i);
|
|
if (ret)
|
|
error(1, -ret, "open_xsk");
|
|
|
|
printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
|
|
}
|
|
|
|
printf("open bpf program...\n");
|
|
bpf_obj = xdp_hw_metadata__open();
|
|
if (libbpf_get_error(bpf_obj))
|
|
error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
|
|
|
|
prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
|
|
bpf_program__set_ifindex(prog, ifindex);
|
|
bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
|
|
|
|
printf("load bpf program...\n");
|
|
ret = xdp_hw_metadata__load(bpf_obj);
|
|
if (ret)
|
|
error(1, -ret, "xdp_hw_metadata__load");
|
|
|
|
printf("prepare skb endpoint...\n");
|
|
server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
|
|
if (server_fd < 0)
|
|
error(1, errno, "start_server");
|
|
timestamping_enable(server_fd,
|
|
SOF_TIMESTAMPING_SOFTWARE |
|
|
SOF_TIMESTAMPING_RAW_HARDWARE);
|
|
|
|
printf("prepare xsk map...\n");
|
|
for (i = 0; i < rxq; i++) {
|
|
int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
|
|
__u32 queue_id = i;
|
|
|
|
printf("map[%d] = %d\n", queue_id, sock_fd);
|
|
ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
|
|
if (ret)
|
|
error(1, -ret, "bpf_map_update_elem");
|
|
}
|
|
|
|
printf("attach bpf program...\n");
|
|
ret = bpf_xdp_attach(ifindex,
|
|
bpf_program__fd(bpf_obj->progs.rx),
|
|
XDP_FLAGS, NULL);
|
|
if (ret)
|
|
error(1, -ret, "bpf_xdp_attach");
|
|
|
|
signal(SIGINT, handle_signal);
|
|
ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
|
|
close(server_fd);
|
|
cleanup();
|
|
if (ret)
|
|
error(1, -ret, "verify_metadata");
|
|
}
|