mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-09 06:43:09 +00:00
xdp: Sample xdp program implementing ip forward
Implements port to port forwarding with route table and arp table lookup for ipv4 packets using bpf_redirect helper function and lpm_trie map. Signed-off-by: Christina Jacob <Christina.Jacob@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
4ad1ceec05
commit
3e29cd0e65
@ -29,6 +29,7 @@ hostprogs-y += test_cgrp2_sock
|
||||
hostprogs-y += test_cgrp2_sock2
|
||||
hostprogs-y += xdp1
|
||||
hostprogs-y += xdp2
|
||||
hostprogs-y += xdp_router_ipv4
|
||||
hostprogs-y += test_current_task_under_cgroup
|
||||
hostprogs-y += trace_event
|
||||
hostprogs-y += sampleip
|
||||
@ -76,6 +77,7 @@ test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o
|
||||
xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
|
||||
# reuse xdp1 source intentionally
|
||||
xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
|
||||
xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
|
||||
test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \
|
||||
test_current_task_under_cgroup_user.o
|
||||
trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
|
||||
@ -118,6 +120,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
|
||||
always += test_cgrp2_tc_kern.o
|
||||
always += xdp1_kern.o
|
||||
always += xdp2_kern.o
|
||||
always += xdp_router_ipv4_kern.o
|
||||
always += test_current_task_under_cgroup_kern.o
|
||||
always += trace_event_kern.o
|
||||
always += sampleip_kern.o
|
||||
@ -166,6 +169,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
|
||||
HOSTLOADLIBES_test_overhead += -lelf -lrt
|
||||
HOSTLOADLIBES_xdp1 += -lelf
|
||||
HOSTLOADLIBES_xdp2 += -lelf
|
||||
HOSTLOADLIBES_xdp_router_ipv4 += -lelf
|
||||
HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
|
||||
HOSTLOADLIBES_trace_event += -lelf
|
||||
HOSTLOADLIBES_sampleip += -lelf
|
||||
|
186
samples/bpf/xdp_router_ipv4_kern.c
Normal file
186
samples/bpf/xdp_router_ipv4_kern.c
Normal file
@ -0,0 +1,186 @@
|
||||
/* Copyright (C) 2017 Cavium, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2 of the GNU General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*/
|
||||
#define KBUILD_MODNAME "foo"
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/if_packet.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include "bpf_helpers.h"
|
||||
#include <linux/slab.h>
|
||||
#include <net/ip_fib.h>
|
||||
|
||||
struct trie_value {
|
||||
__u8 prefix[4];
|
||||
__be64 value;
|
||||
int ifindex;
|
||||
int metric;
|
||||
__be32 gw;
|
||||
};
|
||||
|
||||
/* Key for lpm_trie*/
|
||||
union key_4 {
|
||||
u32 b32[2];
|
||||
u8 b8[8];
|
||||
};
|
||||
|
||||
struct arp_entry {
|
||||
__be64 mac;
|
||||
__be32 dst;
|
||||
};
|
||||
|
||||
struct direct_map {
|
||||
struct arp_entry arp;
|
||||
int ifindex;
|
||||
__be64 mac;
|
||||
};
|
||||
|
||||
/* Map for trie implementation*/
|
||||
struct bpf_map_def SEC("maps") lpm_map = {
|
||||
.type = BPF_MAP_TYPE_LPM_TRIE,
|
||||
.key_size = 8,
|
||||
.value_size = sizeof(struct trie_value),
|
||||
.max_entries = 50,
|
||||
.map_flags = BPF_F_NO_PREALLOC,
|
||||
};
|
||||
|
||||
/* Map for counter*/
|
||||
struct bpf_map_def SEC("maps") rxcnt = {
|
||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||
.key_size = sizeof(u32),
|
||||
.value_size = sizeof(u64),
|
||||
.max_entries = 256,
|
||||
};
|
||||
|
||||
/* Map for ARP table*/
|
||||
struct bpf_map_def SEC("maps") arp_table = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(__be32),
|
||||
.value_size = sizeof(__be64),
|
||||
.max_entries = 50,
|
||||
};
|
||||
|
||||
/* Map to keep the exact match entries in the route table*/
|
||||
struct bpf_map_def SEC("maps") exact_match = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(__be32),
|
||||
.value_size = sizeof(struct direct_map),
|
||||
.max_entries = 50,
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") tx_port = {
|
||||
.type = BPF_MAP_TYPE_DEVMAP,
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.max_entries = 100,
|
||||
};
|
||||
|
||||
/* Function to set source and destination mac of the packet */
|
||||
static inline void set_src_dst_mac(void *data, void *src, void *dst)
|
||||
{
|
||||
unsigned short *source = src;
|
||||
unsigned short *dest = dst;
|
||||
unsigned short *p = data;
|
||||
|
||||
__builtin_memcpy(p, dest, 6);
|
||||
__builtin_memcpy(p + 3, source, 6);
|
||||
}
|
||||
|
||||
/* Parse IPV4 packet to get SRC, DST IP and protocol */
|
||||
static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
|
||||
__be32 *src, __be32 *dest)
|
||||
{
|
||||
struct iphdr *iph = data + nh_off;
|
||||
|
||||
if (iph + 1 > data_end)
|
||||
return 0;
|
||||
*src = iph->saddr;
|
||||
*dest = iph->daddr;
|
||||
return iph->protocol;
|
||||
}
|
||||
|
||||
SEC("xdp_router_ipv4")
|
||||
int xdp_router_ipv4_prog(struct xdp_md *ctx)
|
||||
{
|
||||
void *data_end = (void *)(long)ctx->data_end;
|
||||
__be64 *dest_mac = NULL, *src_mac = NULL;
|
||||
void *data = (void *)(long)ctx->data;
|
||||
struct trie_value *prefix_value;
|
||||
int rc = XDP_DROP, forward_to;
|
||||
struct ethhdr *eth = data;
|
||||
union key_4 key4;
|
||||
long *value;
|
||||
u16 h_proto;
|
||||
u32 ipproto;
|
||||
u64 nh_off;
|
||||
|
||||
nh_off = sizeof(*eth);
|
||||
if (data + nh_off > data_end)
|
||||
return rc;
|
||||
|
||||
h_proto = eth->h_proto;
|
||||
|
||||
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
|
||||
struct vlan_hdr *vhdr;
|
||||
|
||||
vhdr = data + nh_off;
|
||||
nh_off += sizeof(struct vlan_hdr);
|
||||
if (data + nh_off > data_end)
|
||||
return rc;
|
||||
h_proto = vhdr->h_vlan_encapsulated_proto;
|
||||
}
|
||||
if (h_proto == htons(ETH_P_ARP)) {
|
||||
return XDP_PASS;
|
||||
} else if (h_proto == htons(ETH_P_IP)) {
|
||||
struct direct_map *direct_entry;
|
||||
__be32 src_ip = 0, dest_ip = 0;
|
||||
|
||||
ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip);
|
||||
direct_entry = bpf_map_lookup_elem(&exact_match, &dest_ip);
|
||||
/* Check for exact match, this would give a faster lookup*/
|
||||
if (direct_entry && direct_entry->mac && direct_entry->arp.mac) {
|
||||
src_mac = &direct_entry->mac;
|
||||
dest_mac = &direct_entry->arp.mac;
|
||||
forward_to = direct_entry->ifindex;
|
||||
} else {
|
||||
/* Look up in the trie for lpm*/
|
||||
key4.b32[0] = 32;
|
||||
key4.b8[4] = dest_ip & 0xff;
|
||||
key4.b8[5] = (dest_ip >> 8) & 0xff;
|
||||
key4.b8[6] = (dest_ip >> 16) & 0xff;
|
||||
key4.b8[7] = (dest_ip >> 24) & 0xff;
|
||||
prefix_value = bpf_map_lookup_elem(&lpm_map, &key4);
|
||||
if (!prefix_value)
|
||||
return XDP_DROP;
|
||||
src_mac = &prefix_value->value;
|
||||
if (!src_mac)
|
||||
return XDP_DROP;
|
||||
dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip);
|
||||
if (!dest_mac) {
|
||||
if (!prefix_value->gw)
|
||||
return XDP_DROP;
|
||||
dest_ip = prefix_value->gw;
|
||||
dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip);
|
||||
}
|
||||
forward_to = prefix_value->ifindex;
|
||||
}
|
||||
} else {
|
||||
ipproto = 0;
|
||||
}
|
||||
if (src_mac && dest_mac) {
|
||||
set_src_dst_mac(data, src_mac, dest_mac);
|
||||
value = bpf_map_lookup_elem(&rxcnt, &ipproto);
|
||||
if (value)
|
||||
*value += 1;
|
||||
return bpf_redirect_map(&tx_port, forward_to, 0);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
659
samples/bpf/xdp_router_ipv4_user.c
Normal file
659
samples/bpf/xdp_router_ipv4_user.c
Normal file
@ -0,0 +1,659 @@
|
||||
/* Copyright (C) 2017 Cavium, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2 of the GNU General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*/
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/netlink.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/socket.h>
|
||||
#include <unistd.h>
|
||||
#include "bpf_load.h"
|
||||
#include "libbpf.h"
|
||||
#include <arpa/inet.h>
|
||||
#include <fcntl.h>
|
||||
#include <poll.h>
|
||||
#include <net/if.h>
|
||||
#include <netdb.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/syscall.h>
|
||||
#include "bpf_util.h"
|
||||
|
||||
int sock, sock_arp, flags = 0;
|
||||
static int total_ifindex;
|
||||
int *ifindex_list;
|
||||
char buf[8192];
|
||||
|
||||
static int get_route_table(int rtm_family);
|
||||
static void int_exit(int sig)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < total_ifindex; i++)
|
||||
set_link_xdp_fd(ifindex_list[i], -1, flags);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static void close_and_exit(int sig)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
close(sock);
|
||||
close(sock_arp);
|
||||
|
||||
for (i = 0; i < total_ifindex; i++)
|
||||
set_link_xdp_fd(ifindex_list[i], -1, flags);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/* Get the mac address of the interface given interface name */
|
||||
static __be64 getmac(char *iface)
|
||||
{
|
||||
struct ifreq ifr;
|
||||
__be64 mac = 0;
|
||||
int fd, i;
|
||||
|
||||
fd = socket(AF_INET, SOCK_DGRAM, 0);
|
||||
ifr.ifr_addr.sa_family = AF_INET;
|
||||
strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1);
|
||||
if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
|
||||
printf("ioctl failed leaving....\n");
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; i < 6 ; i++)
|
||||
*((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i];
|
||||
close(fd);
|
||||
return mac;
|
||||
}
|
||||
|
||||
static int recv_msg(struct sockaddr_nl sock_addr, int sock)
|
||||
{
|
||||
struct nlmsghdr *nh;
|
||||
int len, nll = 0;
|
||||
char *buf_ptr;
|
||||
|
||||
buf_ptr = buf;
|
||||
while (1) {
|
||||
len = recv(sock, buf_ptr, sizeof(buf) - nll, 0);
|
||||
if (len < 0)
|
||||
return len;
|
||||
|
||||
nh = (struct nlmsghdr *)buf_ptr;
|
||||
|
||||
if (nh->nlmsg_type == NLMSG_DONE)
|
||||
break;
|
||||
buf_ptr += len;
|
||||
nll += len;
|
||||
if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH)
|
||||
break;
|
||||
|
||||
if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE)
|
||||
break;
|
||||
}
|
||||
return nll;
|
||||
}
|
||||
|
||||
/* Function to parse the route entry returned by netlink
|
||||
* Updates the route entry related map entries
|
||||
*/
|
||||
static void read_route(struct nlmsghdr *nh, int nll)
|
||||
{
|
||||
char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
|
||||
struct bpf_lpm_trie_key *prefix_key;
|
||||
struct rtattr *rt_attr;
|
||||
struct rtmsg *rt_msg;
|
||||
int rtm_family;
|
||||
int rtl;
|
||||
int i;
|
||||
struct route_table {
|
||||
int dst_len, iface, metric;
|
||||
char *iface_name;
|
||||
__be32 dst, gw;
|
||||
__be64 mac;
|
||||
} route;
|
||||
struct arp_table {
|
||||
__be64 mac;
|
||||
__be32 dst;
|
||||
};
|
||||
|
||||
struct direct_map {
|
||||
struct arp_table arp;
|
||||
int ifindex;
|
||||
__be64 mac;
|
||||
} direct_entry;
|
||||
|
||||
if (nh->nlmsg_type == RTM_DELROUTE)
|
||||
printf("DELETING Route entry\n");
|
||||
else if (nh->nlmsg_type == RTM_GETROUTE)
|
||||
printf("READING Route entry\n");
|
||||
else if (nh->nlmsg_type == RTM_NEWROUTE)
|
||||
printf("NEW Route entry\n");
|
||||
else
|
||||
printf("%d\n", nh->nlmsg_type);
|
||||
|
||||
memset(&route, 0, sizeof(route));
|
||||
printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n");
|
||||
for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
|
||||
rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
|
||||
rtm_family = rt_msg->rtm_family;
|
||||
if (rtm_family == AF_INET)
|
||||
if (rt_msg->rtm_table != RT_TABLE_MAIN)
|
||||
continue;
|
||||
rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
|
||||
rtl = RTM_PAYLOAD(nh);
|
||||
|
||||
for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
|
||||
switch (rt_attr->rta_type) {
|
||||
case NDA_DST:
|
||||
sprintf(dsts, "%u",
|
||||
(*((__be32 *)RTA_DATA(rt_attr))));
|
||||
break;
|
||||
case RTA_GATEWAY:
|
||||
sprintf(gws, "%u",
|
||||
*((__be32 *)RTA_DATA(rt_attr)));
|
||||
break;
|
||||
case RTA_OIF:
|
||||
sprintf(ifs, "%u",
|
||||
*((int *)RTA_DATA(rt_attr)));
|
||||
break;
|
||||
case RTA_METRICS:
|
||||
sprintf(metrics, "%u",
|
||||
*((int *)RTA_DATA(rt_attr)));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
sprintf(dsts_len, "%d", rt_msg->rtm_dst_len);
|
||||
route.dst = atoi(dsts);
|
||||
route.dst_len = atoi(dsts_len);
|
||||
route.gw = atoi(gws);
|
||||
route.iface = atoi(ifs);
|
||||
route.metric = atoi(metrics);
|
||||
route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
|
||||
route.iface_name = if_indextoname(route.iface, route.iface_name);
|
||||
route.mac = getmac(route.iface_name);
|
||||
if (route.mac == -1) {
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < total_ifindex; i++)
|
||||
set_link_xdp_fd(ifindex_list[i], -1, flags);
|
||||
exit(0);
|
||||
}
|
||||
assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0);
|
||||
if (rtm_family == AF_INET) {
|
||||
struct trie_value {
|
||||
__u8 prefix[4];
|
||||
__be64 value;
|
||||
int ifindex;
|
||||
int metric;
|
||||
__be32 gw;
|
||||
} *prefix_value;
|
||||
|
||||
prefix_key = alloca(sizeof(*prefix_key) + 3);
|
||||
prefix_value = alloca(sizeof(*prefix_value));
|
||||
|
||||
prefix_key->prefixlen = 32;
|
||||
prefix_key->prefixlen = route.dst_len;
|
||||
direct_entry.mac = route.mac & 0xffffffffffff;
|
||||
direct_entry.ifindex = route.iface;
|
||||
direct_entry.arp.mac = 0;
|
||||
direct_entry.arp.dst = 0;
|
||||
if (route.dst_len == 32) {
|
||||
if (nh->nlmsg_type == RTM_DELROUTE)
|
||||
assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0);
|
||||
else
|
||||
if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0)
|
||||
direct_entry.arp.dst = route.dst;
|
||||
assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0);
|
||||
}
|
||||
for (i = 0; i < 4; i++)
|
||||
prefix_key->data[i] = (route.dst >> i * 8) & 0xff;
|
||||
|
||||
printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n",
|
||||
(int)prefix_key->data[0],
|
||||
(int)prefix_key->data[1],
|
||||
(int)prefix_key->data[2],
|
||||
(int)prefix_key->data[3],
|
||||
route.gw, route.dst_len,
|
||||
route.metric,
|
||||
route.iface_name);
|
||||
if (bpf_map_lookup_elem(map_fd[0], prefix_key,
|
||||
prefix_value) < 0) {
|
||||
for (i = 0; i < 4; i++)
|
||||
prefix_value->prefix[i] = prefix_key->data[i];
|
||||
prefix_value->value = route.mac & 0xffffffffffff;
|
||||
prefix_value->ifindex = route.iface;
|
||||
prefix_value->gw = route.gw;
|
||||
prefix_value->metric = route.metric;
|
||||
|
||||
assert(bpf_map_update_elem(map_fd[0],
|
||||
prefix_key,
|
||||
prefix_value, 0
|
||||
) == 0);
|
||||
} else {
|
||||
if (nh->nlmsg_type == RTM_DELROUTE) {
|
||||
printf("deleting entry\n");
|
||||
printf("prefix key=%d.%d.%d.%d/%d",
|
||||
prefix_key->data[0],
|
||||
prefix_key->data[1],
|
||||
prefix_key->data[2],
|
||||
prefix_key->data[3],
|
||||
prefix_key->prefixlen);
|
||||
assert(bpf_map_delete_elem(map_fd[0],
|
||||
prefix_key
|
||||
) == 0);
|
||||
/* Rereading the route table to check if
|
||||
* there is an entry with the same
|
||||
* prefix but a different metric as the
|
||||
* deleted enty.
|
||||
*/
|
||||
get_route_table(AF_INET);
|
||||
} else if (prefix_key->data[0] ==
|
||||
prefix_value->prefix[0] &&
|
||||
prefix_key->data[1] ==
|
||||
prefix_value->prefix[1] &&
|
||||
prefix_key->data[2] ==
|
||||
prefix_value->prefix[2] &&
|
||||
prefix_key->data[3] ==
|
||||
prefix_value->prefix[3] &&
|
||||
route.metric >= prefix_value->metric) {
|
||||
continue;
|
||||
} else {
|
||||
for (i = 0; i < 4; i++)
|
||||
prefix_value->prefix[i] =
|
||||
prefix_key->data[i];
|
||||
prefix_value->value =
|
||||
route.mac & 0xffffffffffff;
|
||||
prefix_value->ifindex = route.iface;
|
||||
prefix_value->gw = route.gw;
|
||||
prefix_value->metric = route.metric;
|
||||
assert(bpf_map_update_elem(
|
||||
map_fd[0],
|
||||
prefix_key,
|
||||
prefix_value,
|
||||
0) == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
memset(&route, 0, sizeof(route));
|
||||
memset(dsts, 0, sizeof(dsts));
|
||||
memset(dsts_len, 0, sizeof(dsts_len));
|
||||
memset(gws, 0, sizeof(gws));
|
||||
memset(ifs, 0, sizeof(ifs));
|
||||
memset(&route, 0, sizeof(route));
|
||||
}
|
||||
}
|
||||
|
||||
/* Function to read the existing route table when the process is launched*/
|
||||
static int get_route_table(int rtm_family)
|
||||
{
|
||||
struct sockaddr_nl sa;
|
||||
struct nlmsghdr *nh;
|
||||
int sock, seq = 0;
|
||||
struct msghdr msg;
|
||||
struct iovec iov;
|
||||
int ret = 0;
|
||||
int nll;
|
||||
|
||||
struct {
|
||||
struct nlmsghdr nl;
|
||||
struct rtmsg rt;
|
||||
char buf[8192];
|
||||
} req;
|
||||
|
||||
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
|
||||
if (sock < 0) {
|
||||
printf("open netlink socket: %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.nl_family = AF_NETLINK;
|
||||
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
|
||||
printf("bind to netlink: %s\n", strerror(errno));
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
memset(&req, 0, sizeof(req));
|
||||
req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
|
||||
req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
|
||||
req.nl.nlmsg_type = RTM_GETROUTE;
|
||||
|
||||
req.rt.rtm_family = rtm_family;
|
||||
req.rt.rtm_table = RT_TABLE_MAIN;
|
||||
req.nl.nlmsg_pid = 0;
|
||||
req.nl.nlmsg_seq = ++seq;
|
||||
memset(&msg, 0, sizeof(msg));
|
||||
iov.iov_base = (void *)&req.nl;
|
||||
iov.iov_len = req.nl.nlmsg_len;
|
||||
msg.msg_iov = &iov;
|
||||
msg.msg_iovlen = 1;
|
||||
ret = sendmsg(sock, &msg, 0);
|
||||
if (ret < 0) {
|
||||
printf("send to netlink: %s\n", strerror(errno));
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
memset(buf, 0, sizeof(buf));
|
||||
nll = recv_msg(sa, sock);
|
||||
if (nll < 0) {
|
||||
printf("recv from netlink: %s\n", strerror(nll));
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
nh = (struct nlmsghdr *)buf;
|
||||
read_route(nh, nll);
|
||||
cleanup:
|
||||
close(sock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function to parse the arp entry returned by netlink
|
||||
* Updates the arp entry related map entries
|
||||
*/
|
||||
static void read_arp(struct nlmsghdr *nh, int nll)
|
||||
{
|
||||
struct rtattr *rt_attr;
|
||||
char dsts[24], mac[24];
|
||||
struct ndmsg *rt_msg;
|
||||
int rtl, ndm_family;
|
||||
|
||||
struct arp_table {
|
||||
__be64 mac;
|
||||
__be32 dst;
|
||||
} arp_entry;
|
||||
struct direct_map {
|
||||
struct arp_table arp;
|
||||
int ifindex;
|
||||
__be64 mac;
|
||||
} direct_entry;
|
||||
|
||||
if (nh->nlmsg_type == RTM_GETNEIGH)
|
||||
printf("READING arp entry\n");
|
||||
printf("Address\tHwAddress\n");
|
||||
for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
|
||||
rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
|
||||
rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
|
||||
ndm_family = rt_msg->ndm_family;
|
||||
rtl = RTM_PAYLOAD(nh);
|
||||
for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
|
||||
switch (rt_attr->rta_type) {
|
||||
case NDA_DST:
|
||||
sprintf(dsts, "%u",
|
||||
*((__be32 *)RTA_DATA(rt_attr)));
|
||||
break;
|
||||
case NDA_LLADDR:
|
||||
sprintf(mac, "%lld",
|
||||
*((__be64 *)RTA_DATA(rt_attr)));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
arp_entry.dst = atoi(dsts);
|
||||
arp_entry.mac = atol(mac);
|
||||
printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac);
|
||||
if (ndm_family == AF_INET) {
|
||||
if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst,
|
||||
&direct_entry) == 0) {
|
||||
if (nh->nlmsg_type == RTM_DELNEIGH) {
|
||||
direct_entry.arp.dst = 0;
|
||||
direct_entry.arp.mac = 0;
|
||||
} else if (nh->nlmsg_type == RTM_NEWNEIGH) {
|
||||
direct_entry.arp.dst = arp_entry.dst;
|
||||
direct_entry.arp.mac = arp_entry.mac;
|
||||
}
|
||||
assert(bpf_map_update_elem(map_fd[3],
|
||||
&arp_entry.dst,
|
||||
&direct_entry, 0
|
||||
) == 0);
|
||||
memset(&direct_entry, 0, sizeof(direct_entry));
|
||||
}
|
||||
if (nh->nlmsg_type == RTM_DELNEIGH) {
|
||||
assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0);
|
||||
} else if (nh->nlmsg_type == RTM_NEWNEIGH) {
|
||||
assert(bpf_map_update_elem(map_fd[2],
|
||||
&arp_entry.dst,
|
||||
&arp_entry.mac, 0
|
||||
) == 0);
|
||||
}
|
||||
}
|
||||
memset(&arp_entry, 0, sizeof(arp_entry));
|
||||
memset(dsts, 0, sizeof(dsts));
|
||||
}
|
||||
}
|
||||
|
||||
/* Function to read the existing arp table when the process is launched*/
|
||||
static int get_arp_table(int rtm_family)
|
||||
{
|
||||
struct sockaddr_nl sa;
|
||||
struct nlmsghdr *nh;
|
||||
int sock, seq = 0;
|
||||
struct msghdr msg;
|
||||
struct iovec iov;
|
||||
int ret = 0;
|
||||
int nll;
|
||||
struct {
|
||||
struct nlmsghdr nl;
|
||||
struct ndmsg rt;
|
||||
char buf[8192];
|
||||
} req;
|
||||
|
||||
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
|
||||
if (sock < 0) {
|
||||
printf("open netlink socket: %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.nl_family = AF_NETLINK;
|
||||
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
|
||||
printf("bind to netlink: %s\n", strerror(errno));
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
memset(&req, 0, sizeof(req));
|
||||
req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
|
||||
req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
|
||||
req.nl.nlmsg_type = RTM_GETNEIGH;
|
||||
req.rt.ndm_state = NUD_REACHABLE;
|
||||
req.rt.ndm_family = rtm_family;
|
||||
req.nl.nlmsg_pid = 0;
|
||||
req.nl.nlmsg_seq = ++seq;
|
||||
memset(&msg, 0, sizeof(msg));
|
||||
iov.iov_base = (void *)&req.nl;
|
||||
iov.iov_len = req.nl.nlmsg_len;
|
||||
msg.msg_iov = &iov;
|
||||
msg.msg_iovlen = 1;
|
||||
ret = sendmsg(sock, &msg, 0);
|
||||
if (ret < 0) {
|
||||
printf("send to netlink: %s\n", strerror(errno));
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
memset(buf, 0, sizeof(buf));
|
||||
nll = recv_msg(sa, sock);
|
||||
if (nll < 0) {
|
||||
printf("recv from netlink: %s\n", strerror(nll));
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
nh = (struct nlmsghdr *)buf;
|
||||
read_arp(nh, nll);
|
||||
cleanup:
|
||||
close(sock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function to keep track and update changes in route and arp table
|
||||
* Give regular statistics of packets forwarded
|
||||
*/
|
||||
static int monitor_route(void)
|
||||
{
|
||||
unsigned int nr_cpus = bpf_num_possible_cpus();
|
||||
const unsigned int nr_keys = 256;
|
||||
struct pollfd fds_route, fds_arp;
|
||||
__u64 prev[nr_keys][nr_cpus];
|
||||
struct sockaddr_nl la, lr;
|
||||
__u64 values[nr_cpus];
|
||||
struct nlmsghdr *nh;
|
||||
int nll, ret = 0;
|
||||
int interval = 5;
|
||||
__u32 key;
|
||||
int i;
|
||||
|
||||
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
|
||||
if (sock < 0) {
|
||||
printf("open netlink socket: %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
fcntl(sock, F_SETFL, O_NONBLOCK);
|
||||
memset(&lr, 0, sizeof(lr));
|
||||
lr.nl_family = AF_NETLINK;
|
||||
lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
|
||||
if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
|
||||
printf("bind to netlink: %s\n", strerror(errno));
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
fds_route.fd = sock;
|
||||
fds_route.events = POLL_IN;
|
||||
|
||||
sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
|
||||
if (sock_arp < 0) {
|
||||
printf("open netlink socket: %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
fcntl(sock_arp, F_SETFL, O_NONBLOCK);
|
||||
memset(&la, 0, sizeof(la));
|
||||
la.nl_family = AF_NETLINK;
|
||||
la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
|
||||
if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
|
||||
printf("bind to netlink: %s\n", strerror(errno));
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
fds_arp.fd = sock_arp;
|
||||
fds_arp.events = POLL_IN;
|
||||
|
||||
memset(prev, 0, sizeof(prev));
|
||||
do {
|
||||
signal(SIGINT, close_and_exit);
|
||||
signal(SIGTERM, close_and_exit);
|
||||
|
||||
sleep(interval);
|
||||
for (key = 0; key < nr_keys; key++) {
|
||||
__u64 sum = 0;
|
||||
|
||||
assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
|
||||
for (i = 0; i < nr_cpus; i++)
|
||||
sum += (values[i] - prev[key][i]);
|
||||
if (sum)
|
||||
printf("proto %u: %10llu pkt/s\n",
|
||||
key, sum / interval);
|
||||
memcpy(prev[key], values, sizeof(values));
|
||||
}
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
if (poll(&fds_route, 1, 3) == POLL_IN) {
|
||||
nll = recv_msg(lr, sock);
|
||||
if (nll < 0) {
|
||||
printf("recv from netlink: %s\n", strerror(nll));
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
nh = (struct nlmsghdr *)buf;
|
||||
printf("Routing table updated.\n");
|
||||
read_route(nh, nll);
|
||||
}
|
||||
memset(buf, 0, sizeof(buf));
|
||||
if (poll(&fds_arp, 1, 3) == POLL_IN) {
|
||||
nll = recv_msg(la, sock_arp);
|
||||
if (nll < 0) {
|
||||
printf("recv from netlink: %s\n", strerror(nll));
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
nh = (struct nlmsghdr *)buf;
|
||||
read_arp(nh, nll);
|
||||
}
|
||||
|
||||
} while (1);
|
||||
cleanup:
|
||||
close(sock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main(int ac, char **argv)
|
||||
{
|
||||
char filename[256];
|
||||
char **ifname_list;
|
||||
int i = 1;
|
||||
|
||||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||
if (ac < 2) {
|
||||
printf("usage: %s [-S] Interface name list\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
if (!strcmp(argv[1], "-S")) {
|
||||
flags = XDP_FLAGS_SKB_MODE;
|
||||
total_ifindex = ac - 2;
|
||||
ifname_list = (argv + 2);
|
||||
} else {
|
||||
flags = 0;
|
||||
total_ifindex = ac - 1;
|
||||
ifname_list = (argv + 1);
|
||||
}
|
||||
if (load_bpf_file(filename)) {
|
||||
printf("%s", bpf_log_buf);
|
||||
return 1;
|
||||
}
|
||||
printf("\n**************loading bpf file*********************\n\n\n");
|
||||
if (!prog_fd[0]) {
|
||||
printf("load_bpf_file: %s\n", strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
ifindex_list = (int *)malloc(total_ifindex * sizeof(int *));
|
||||
for (i = 0; i < total_ifindex; i++) {
|
||||
ifindex_list[i] = if_nametoindex(ifname_list[i]);
|
||||
if (!ifindex_list[i]) {
|
||||
printf("Couldn't translate interface name: %s",
|
||||
strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < total_ifindex; i++) {
|
||||
if (set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
|
||||
printf("link set xdp fd failed\n");
|
||||
int recovery_index = i;
|
||||
|
||||
for (i = 0; i < recovery_index; i++)
|
||||
set_link_xdp_fd(ifindex_list[i], -1, flags);
|
||||
|
||||
return 1;
|
||||
}
|
||||
printf("Attached to %d\n", ifindex_list[i]);
|
||||
}
|
||||
signal(SIGINT, int_exit);
|
||||
signal(SIGTERM, int_exit);
|
||||
|
||||
printf("*******************ROUTE TABLE*************************\n\n\n");
|
||||
get_route_table(AF_INET);
|
||||
printf("*******************ARP TABLE***************************\n\n\n");
|
||||
get_arp_table(AF_INET);
|
||||
if (monitor_route() < 0) {
|
||||
printf("Error in receiving route update");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user