linux/drivers/net/netkit.c
Cong Wang 4832756676 rtnetlink: fix double call of rtnl_link_get_net_ifla()
Currently rtnl_link_get_net_ifla() gets called twice when we create
peer devices, once in rtnl_add_peer_net() and once in each ->newlink()
implementation.

This looks safer, however, it leads to a classic Time-of-Check to
Time-of-Use (TOCTOU) bug since IFLA_NET_NS_PID is very dynamic. And
because of the lack of checking error pointer of the second call, it
also leads to a kernel crash as reported by syzbot.

Fix this by getting rid of the second call, which already becomes
redudant after Kuniyuki's work. We have to propagate the result of the
first rtnl_link_get_net_ifla() down to each ->newlink().

Reported-by: syzbot+21ba4d5adff0b6a7cfc6@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=21ba4d5adff0b6a7cfc6
Fixes: 0eb87b02a7 ("veth: Set VETH_INFO_PEER to veth_link_ops.peer_type.")
Fixes: 6b84e558e9 ("vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type.")
Fixes: fefd5d0821 ("netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type.")
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241129212519.825567-1-xiyou.wangcong@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-12-03 11:29:29 +01:00

995 lines
24 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2023 Isovalent */
#include <linux/netdevice.h>
#include <linux/ethtool.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
#include <linux/netfilter_netdev.h>
#include <linux/bpf_mprog.h>
#include <linux/indirect_call_wrapper.h>
#include <net/netkit.h>
#include <net/dst.h>
#include <net/tcx.h>
#define DRV_NAME "netkit"
struct netkit {
/* Needed in fast-path */
struct net_device __rcu *peer;
struct bpf_mprog_entry __rcu *active;
enum netkit_action policy;
enum netkit_scrub scrub;
struct bpf_mprog_bundle bundle;
/* Needed in slow-path */
enum netkit_mode mode;
bool primary;
u32 headroom;
};
struct netkit_link {
struct bpf_link link;
struct net_device *dev;
u32 location;
};
static __always_inline int
netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
enum netkit_action ret)
{
const struct bpf_mprog_fp *fp;
const struct bpf_prog *prog;
bpf_mprog_foreach_prog(entry, fp, prog) {
bpf_compute_data_pointers(skb);
ret = bpf_prog_run(prog, skb);
if (ret != NETKIT_NEXT)
break;
}
return ret;
}
static void netkit_xnet(struct sk_buff *skb)
{
skb->priority = 0;
skb->mark = 0;
}
static void netkit_prep_forward(struct sk_buff *skb,
bool xnet, bool xnet_scrub)
{
skb_scrub_packet(skb, false);
nf_skip_egress(skb, true);
skb_reset_mac_header(skb);
if (!xnet)
return;
ipvs_reset(skb);
skb_clear_tstamp(skb);
if (xnet_scrub)
netkit_xnet(skb);
}
static struct netkit *netkit_priv(const struct net_device *dev)
{
return netdev_priv(dev);
}
static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct netkit *nk = netkit_priv(dev);
enum netkit_action ret = READ_ONCE(nk->policy);
netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
const struct bpf_mprog_entry *entry;
struct net_device *peer;
int len = skb->len;
bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
rcu_read_lock();
peer = rcu_dereference(nk->peer);
if (unlikely(!peer || !(peer->flags & IFF_UP) ||
!pskb_may_pull(skb, ETH_HLEN) ||
skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;
netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)),
nk->scrub);
eth_skb_pkt_type(skb, peer);
skb->dev = peer;
entry = rcu_dereference(nk->active);
if (entry)
ret = netkit_run(entry, skb, ret);
switch (ret) {
case NETKIT_NEXT:
case NETKIT_PASS:
eth_skb_pull_mac(skb);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
dev_sw_netstats_tx_add(dev, 1, len);
dev_sw_netstats_rx_add(peer, len);
} else {
goto drop_stats;
}
break;
case NETKIT_REDIRECT:
dev_sw_netstats_tx_add(dev, 1, len);
skb_do_redirect(skb);
break;
case NETKIT_DROP:
default:
drop:
kfree_skb(skb);
drop_stats:
dev_core_stats_tx_dropped_inc(dev);
ret_dev = NET_XMIT_DROP;
break;
}
rcu_read_unlock();
bpf_net_ctx_clear(bpf_net_ctx);
return ret_dev;
}
static int netkit_open(struct net_device *dev)
{
struct netkit *nk = netkit_priv(dev);
struct net_device *peer = rtnl_dereference(nk->peer);
if (!peer)
return -ENOTCONN;
if (peer->flags & IFF_UP) {
netif_carrier_on(dev);
netif_carrier_on(peer);
}
return 0;
}
static int netkit_close(struct net_device *dev)
{
struct netkit *nk = netkit_priv(dev);
struct net_device *peer = rtnl_dereference(nk->peer);
netif_carrier_off(dev);
if (peer)
netif_carrier_off(peer);
return 0;
}
static int netkit_get_iflink(const struct net_device *dev)
{
struct netkit *nk = netkit_priv(dev);
struct net_device *peer;
int iflink = 0;
rcu_read_lock();
peer = rcu_dereference(nk->peer);
if (peer)
iflink = READ_ONCE(peer->ifindex);
rcu_read_unlock();
return iflink;
}
static void netkit_set_multicast(struct net_device *dev)
{
/* Nothing to do, we receive whatever gets pushed to us! */
}
static int netkit_set_macaddr(struct net_device *dev, void *sa)
{
struct netkit *nk = netkit_priv(dev);
if (nk->mode != NETKIT_L2)
return -EOPNOTSUPP;
return eth_mac_addr(dev, sa);
}
static void netkit_set_headroom(struct net_device *dev, int headroom)
{
struct netkit *nk = netkit_priv(dev), *nk2;
struct net_device *peer;
if (headroom < 0)
headroom = NET_SKB_PAD;
rcu_read_lock();
peer = rcu_dereference(nk->peer);
if (unlikely(!peer))
goto out;
nk2 = netkit_priv(peer);
nk->headroom = headroom;
headroom = max(nk->headroom, nk2->headroom);
peer->needed_headroom = headroom;
dev->needed_headroom = headroom;
out:
rcu_read_unlock();
}
INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev)
{
return rcu_dereference(netkit_priv(dev)->peer);
}
static void netkit_get_stats(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
dev_fetch_sw_netstats(stats, dev->tstats);
stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
}
static void netkit_uninit(struct net_device *dev);
static const struct net_device_ops netkit_netdev_ops = {
.ndo_open = netkit_open,
.ndo_stop = netkit_close,
.ndo_start_xmit = netkit_xmit,
.ndo_set_rx_mode = netkit_set_multicast,
.ndo_set_rx_headroom = netkit_set_headroom,
.ndo_set_mac_address = netkit_set_macaddr,
.ndo_get_iflink = netkit_get_iflink,
.ndo_get_peer_dev = netkit_peer_dev,
.ndo_get_stats64 = netkit_get_stats,
.ndo_uninit = netkit_uninit,
.ndo_features_check = passthru_features_check,
};
static void netkit_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
strscpy(info->driver, DRV_NAME, sizeof(info->driver));
}
static const struct ethtool_ops netkit_ethtool_ops = {
.get_drvinfo = netkit_get_drvinfo,
};
static void netkit_setup(struct net_device *dev)
{
static const netdev_features_t netkit_features_hw_vlan =
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_STAG_TX |
NETIF_F_HW_VLAN_STAG_RX;
static const netdev_features_t netkit_features =
netkit_features_hw_vlan |
NETIF_F_SG |
NETIF_F_FRAGLIST |
NETIF_F_HW_CSUM |
NETIF_F_RXCSUM |
NETIF_F_SCTP_CRC |
NETIF_F_HIGHDMA |
NETIF_F_GSO_SOFTWARE |
NETIF_F_GSO_ENCAP_ALL;
ether_setup(dev);
dev->max_mtu = ETH_MAX_MTU;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->flags |= IFF_NOARP;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
dev->priv_flags |= IFF_PHONY_HEADROOM;
dev->priv_flags |= IFF_NO_QUEUE;
dev->priv_flags |= IFF_DISABLE_NETPOLL;
dev->lltx = true;
dev->ethtool_ops = &netkit_ethtool_ops;
dev->netdev_ops = &netkit_netdev_ops;
dev->features |= netkit_features;
dev->hw_features = netkit_features;
dev->hw_enc_features = netkit_features;
dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
dev->vlan_features = dev->features & ~netkit_features_hw_vlan;
dev->needs_free_netdev = true;
netif_set_tso_max_size(dev, GSO_MAX_SIZE);
}
static struct net *netkit_get_link_net(const struct net_device *dev)
{
struct netkit *nk = netkit_priv(dev);
struct net_device *peer = rtnl_dereference(nk->peer);
return peer ? dev_net(peer) : dev_net(dev);
}
static int netkit_check_policy(int policy, struct nlattr *tb,
struct netlink_ext_ack *extack)
{
switch (policy) {
case NETKIT_PASS:
case NETKIT_DROP:
return 0;
default:
NL_SET_ERR_MSG_ATTR(extack, tb,
"Provided default xmit policy not supported");
return -EINVAL;
}
}
static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct nlattr *attr = tb[IFLA_ADDRESS];
if (!attr)
return 0;
if (nla_len(attr) != ETH_ALEN)
return -EINVAL;
if (!is_valid_ether_addr(nla_data(attr)))
return -EADDRNOTAVAIL;
return 0;
}
static struct rtnl_link_ops netkit_link_ops;
static int netkit_new_link(struct net *peer_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr;
enum netkit_action policy_prim = NETKIT_PASS;
enum netkit_action policy_peer = NETKIT_PASS;
enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT;
enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
enum netkit_mode mode = NETKIT_L3;
unsigned char ifname_assign_type;
struct ifinfomsg *ifmp = NULL;
struct net_device *peer;
char ifname[IFNAMSIZ];
struct netkit *nk;
int err;
if (data) {
if (data[IFLA_NETKIT_MODE])
mode = nla_get_u32(data[IFLA_NETKIT_MODE]);
if (data[IFLA_NETKIT_PEER_INFO]) {
attr = data[IFLA_NETKIT_PEER_INFO];
ifmp = nla_data(attr);
rtnl_nla_parse_ifinfomsg(peer_tb, attr, extack);
tbp = peer_tb;
}
if (data[IFLA_NETKIT_SCRUB])
scrub_prim = nla_get_u32(data[IFLA_NETKIT_SCRUB]);
if (data[IFLA_NETKIT_PEER_SCRUB])
scrub_peer = nla_get_u32(data[IFLA_NETKIT_PEER_SCRUB]);
if (data[IFLA_NETKIT_POLICY]) {
attr = data[IFLA_NETKIT_POLICY];
policy_prim = nla_get_u32(attr);
err = netkit_check_policy(policy_prim, attr, extack);
if (err < 0)
return err;
}
if (data[IFLA_NETKIT_PEER_POLICY]) {
attr = data[IFLA_NETKIT_PEER_POLICY];
policy_peer = nla_get_u32(attr);
err = netkit_check_policy(policy_peer, attr, extack);
if (err < 0)
return err;
}
}
if (ifmp && tbp[IFLA_IFNAME]) {
nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
ifname_assign_type = NET_NAME_USER;
} else {
strscpy(ifname, "nk%d", IFNAMSIZ);
ifname_assign_type = NET_NAME_ENUM;
}
if (mode != NETKIT_L2 &&
(tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS]))
return -EOPNOTSUPP;
peer = rtnl_create_link(peer_net, ifname, ifname_assign_type,
&netkit_link_ops, tbp, extack);
if (IS_ERR(peer))
return PTR_ERR(peer);
netif_inherit_tso_max(peer, dev);
if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
eth_hw_addr_random(peer);
if (ifmp && dev->ifindex)
peer->ifindex = ifmp->ifi_index;
nk = netkit_priv(peer);
nk->primary = false;
nk->policy = policy_peer;
nk->scrub = scrub_peer;
nk->mode = mode;
bpf_mprog_bundle_init(&nk->bundle);
err = register_netdevice(peer);
if (err < 0)
goto err_register_peer;
netif_carrier_off(peer);
if (mode == NETKIT_L2)
dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL);
err = rtnl_configure_link(peer, NULL, 0, NULL);
if (err < 0)
goto err_configure_peer;
if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev);
if (tb[IFLA_IFNAME])
nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
else
strscpy(dev->name, "nk%d", IFNAMSIZ);
nk = netkit_priv(dev);
nk->primary = true;
nk->policy = policy_prim;
nk->scrub = scrub_prim;
nk->mode = mode;
bpf_mprog_bundle_init(&nk->bundle);
err = register_netdevice(dev);
if (err < 0)
goto err_configure_peer;
netif_carrier_off(dev);
if (mode == NETKIT_L2)
dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL);
rcu_assign_pointer(netkit_priv(dev)->peer, peer);
rcu_assign_pointer(netkit_priv(peer)->peer, dev);
return 0;
err_configure_peer:
unregister_netdevice(peer);
return err;
err_register_peer:
free_netdev(peer);
return err;
}
static struct bpf_mprog_entry *netkit_entry_fetch(struct net_device *dev,
bool bundle_fallback)
{
struct netkit *nk = netkit_priv(dev);
struct bpf_mprog_entry *entry;
ASSERT_RTNL();
entry = rcu_dereference_rtnl(nk->active);
if (entry)
return entry;
if (bundle_fallback)
return &nk->bundle.a;
return NULL;
}
static void netkit_entry_update(struct net_device *dev,
struct bpf_mprog_entry *entry)
{
struct netkit *nk = netkit_priv(dev);
ASSERT_RTNL();
rcu_assign_pointer(nk->active, entry);
}
static void netkit_entry_sync(void)
{
synchronize_rcu();
}
static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 which)
{
struct net_device *dev;
struct netkit *nk;
ASSERT_RTNL();
switch (which) {
case BPF_NETKIT_PRIMARY:
case BPF_NETKIT_PEER:
break;
default:
return ERR_PTR(-EINVAL);
}
dev = __dev_get_by_index(net, ifindex);
if (!dev)
return ERR_PTR(-ENODEV);
if (dev->netdev_ops != &netkit_netdev_ops)
return ERR_PTR(-ENXIO);
nk = netkit_priv(dev);
if (!nk->primary)
return ERR_PTR(-EACCES);
if (which == BPF_NETKIT_PEER) {
dev = rcu_dereference_rtnl(nk->peer);
if (!dev)
return ERR_PTR(-ENODEV);
}
return dev;
}
int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_mprog_entry *entry, *entry_new;
struct bpf_prog *replace_prog = NULL;
struct net_device *dev;
int ret;
rtnl_lock();
dev = netkit_dev_fetch(current->nsproxy->net_ns, attr->target_ifindex,
attr->attach_type);
if (IS_ERR(dev)) {
ret = PTR_ERR(dev);
goto out;
}
entry = netkit_entry_fetch(dev, true);
if (attr->attach_flags & BPF_F_REPLACE) {
replace_prog = bpf_prog_get_type(attr->replace_bpf_fd,
prog->type);
if (IS_ERR(replace_prog)) {
ret = PTR_ERR(replace_prog);
replace_prog = NULL;
goto out;
}
}
ret = bpf_mprog_attach(entry, &entry_new, prog, NULL, replace_prog,
attr->attach_flags, attr->relative_fd,
attr->expected_revision);
if (!ret) {
if (entry != entry_new) {
netkit_entry_update(dev, entry_new);
netkit_entry_sync();
}
bpf_mprog_commit(entry);
}
out:
if (replace_prog)
bpf_prog_put(replace_prog);
rtnl_unlock();
return ret;
}
int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_mprog_entry *entry, *entry_new;
struct net_device *dev;
int ret;
rtnl_lock();
dev = netkit_dev_fetch(current->nsproxy->net_ns, attr->target_ifindex,
attr->attach_type);
if (IS_ERR(dev)) {
ret = PTR_ERR(dev);
goto out;
}
entry = netkit_entry_fetch(dev, false);
if (!entry) {
ret = -ENOENT;
goto out;
}
ret = bpf_mprog_detach(entry, &entry_new, prog, NULL, attr->attach_flags,
attr->relative_fd, attr->expected_revision);
if (!ret) {
if (!bpf_mprog_total(entry_new))
entry_new = NULL;
netkit_entry_update(dev, entry_new);
netkit_entry_sync();
bpf_mprog_commit(entry);
}
out:
rtnl_unlock();
return ret;
}
int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
{
struct net_device *dev;
int ret;
rtnl_lock();
dev = netkit_dev_fetch(current->nsproxy->net_ns,
attr->query.target_ifindex,
attr->query.attach_type);
if (IS_ERR(dev)) {
ret = PTR_ERR(dev);
goto out;
}
ret = bpf_mprog_query(attr, uattr, netkit_entry_fetch(dev, false));
out:
rtnl_unlock();
return ret;
}
static struct netkit_link *netkit_link(const struct bpf_link *link)
{
return container_of(link, struct netkit_link, link);
}
static int netkit_link_prog_attach(struct bpf_link *link, u32 flags,
u32 id_or_fd, u64 revision)
{
struct netkit_link *nkl = netkit_link(link);
struct bpf_mprog_entry *entry, *entry_new;
struct net_device *dev = nkl->dev;
int ret;
ASSERT_RTNL();
entry = netkit_entry_fetch(dev, true);
ret = bpf_mprog_attach(entry, &entry_new, link->prog, link, NULL, flags,
id_or_fd, revision);
if (!ret) {
if (entry != entry_new) {
netkit_entry_update(dev, entry_new);
netkit_entry_sync();
}
bpf_mprog_commit(entry);
}
return ret;
}
static void netkit_link_release(struct bpf_link *link)
{
struct netkit_link *nkl = netkit_link(link);
struct bpf_mprog_entry *entry, *entry_new;
struct net_device *dev;
int ret = 0;
rtnl_lock();
dev = nkl->dev;
if (!dev)
goto out;
entry = netkit_entry_fetch(dev, false);
if (!entry) {
ret = -ENOENT;
goto out;
}
ret = bpf_mprog_detach(entry, &entry_new, link->prog, link, 0, 0, 0);
if (!ret) {
if (!bpf_mprog_total(entry_new))
entry_new = NULL;
netkit_entry_update(dev, entry_new);
netkit_entry_sync();
bpf_mprog_commit(entry);
nkl->dev = NULL;
}
out:
WARN_ON_ONCE(ret);
rtnl_unlock();
}
static int netkit_link_update(struct bpf_link *link, struct bpf_prog *nprog,
struct bpf_prog *oprog)
{
struct netkit_link *nkl = netkit_link(link);
struct bpf_mprog_entry *entry, *entry_new;
struct net_device *dev;
int ret = 0;
rtnl_lock();
dev = nkl->dev;
if (!dev) {
ret = -ENOLINK;
goto out;
}
if (oprog && link->prog != oprog) {
ret = -EPERM;
goto out;
}
oprog = link->prog;
if (oprog == nprog) {
bpf_prog_put(nprog);
goto out;
}
entry = netkit_entry_fetch(dev, false);
if (!entry) {
ret = -ENOENT;
goto out;
}
ret = bpf_mprog_attach(entry, &entry_new, nprog, link, oprog,
BPF_F_REPLACE | BPF_F_ID,
link->prog->aux->id, 0);
if (!ret) {
WARN_ON_ONCE(entry != entry_new);
oprog = xchg(&link->prog, nprog);
bpf_prog_put(oprog);
bpf_mprog_commit(entry);
}
out:
rtnl_unlock();
return ret;
}
static void netkit_link_dealloc(struct bpf_link *link)
{
kfree(netkit_link(link));
}
static void netkit_link_fdinfo(const struct bpf_link *link, struct seq_file *seq)
{
const struct netkit_link *nkl = netkit_link(link);
u32 ifindex = 0;
rtnl_lock();
if (nkl->dev)
ifindex = nkl->dev->ifindex;
rtnl_unlock();
seq_printf(seq, "ifindex:\t%u\n", ifindex);
seq_printf(seq, "attach_type:\t%u (%s)\n",
nkl->location,
nkl->location == BPF_NETKIT_PRIMARY ? "primary" : "peer");
}
static int netkit_link_fill_info(const struct bpf_link *link,
struct bpf_link_info *info)
{
const struct netkit_link *nkl = netkit_link(link);
u32 ifindex = 0;
rtnl_lock();
if (nkl->dev)
ifindex = nkl->dev->ifindex;
rtnl_unlock();
info->netkit.ifindex = ifindex;
info->netkit.attach_type = nkl->location;
return 0;
}
static int netkit_link_detach(struct bpf_link *link)
{
netkit_link_release(link);
return 0;
}
static const struct bpf_link_ops netkit_link_lops = {
.release = netkit_link_release,
.detach = netkit_link_detach,
.dealloc = netkit_link_dealloc,
.update_prog = netkit_link_update,
.show_fdinfo = netkit_link_fdinfo,
.fill_link_info = netkit_link_fill_info,
};
static int netkit_link_init(struct netkit_link *nkl,
struct bpf_link_primer *link_primer,
const union bpf_attr *attr,
struct net_device *dev,
struct bpf_prog *prog)
{
bpf_link_init(&nkl->link, BPF_LINK_TYPE_NETKIT,
&netkit_link_lops, prog);
nkl->location = attr->link_create.attach_type;
nkl->dev = dev;
return bpf_link_prime(&nkl->link, link_primer);
}
int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_link_primer link_primer;
struct netkit_link *nkl;
struct net_device *dev;
int ret;
rtnl_lock();
dev = netkit_dev_fetch(current->nsproxy->net_ns,
attr->link_create.target_ifindex,
attr->link_create.attach_type);
if (IS_ERR(dev)) {
ret = PTR_ERR(dev);
goto out;
}
nkl = kzalloc(sizeof(*nkl), GFP_KERNEL_ACCOUNT);
if (!nkl) {
ret = -ENOMEM;
goto out;
}
ret = netkit_link_init(nkl, &link_primer, attr, dev, prog);
if (ret) {
kfree(nkl);
goto out;
}
ret = netkit_link_prog_attach(&nkl->link,
attr->link_create.flags,
attr->link_create.netkit.relative_fd,
attr->link_create.netkit.expected_revision);
if (ret) {
nkl->dev = NULL;
bpf_link_cleanup(&link_primer);
goto out;
}
ret = bpf_link_settle(&link_primer);
out:
rtnl_unlock();
return ret;
}
static void netkit_release_all(struct net_device *dev)
{
struct bpf_mprog_entry *entry;
struct bpf_tuple tuple = {};
struct bpf_mprog_fp *fp;
struct bpf_mprog_cp *cp;
entry = netkit_entry_fetch(dev, false);
if (!entry)
return;
netkit_entry_update(dev, NULL);
netkit_entry_sync();
bpf_mprog_foreach_tuple(entry, fp, cp, tuple) {
if (tuple.link)
netkit_link(tuple.link)->dev = NULL;
else
bpf_prog_put(tuple.prog);
}
}
static void netkit_uninit(struct net_device *dev)
{
netkit_release_all(dev);
}
static void netkit_del_link(struct net_device *dev, struct list_head *head)
{
struct netkit *nk = netkit_priv(dev);
struct net_device *peer = rtnl_dereference(nk->peer);
RCU_INIT_POINTER(nk->peer, NULL);
unregister_netdevice_queue(dev, head);
if (peer) {
nk = netkit_priv(peer);
RCU_INIT_POINTER(nk->peer, NULL);
unregister_netdevice_queue(peer, head);
}
}
static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct netkit *nk = netkit_priv(dev);
struct net_device *peer = rtnl_dereference(nk->peer);
enum netkit_action policy;
struct nlattr *attr;
int err;
if (!nk->primary) {
NL_SET_ERR_MSG(extack,
"netkit link settings can be changed only through the primary device");
return -EACCES;
}
if (data[IFLA_NETKIT_MODE]) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_MODE],
"netkit link operating mode cannot be changed after device creation");
return -EACCES;
}
if (data[IFLA_NETKIT_SCRUB]) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB],
"netkit scrubbing cannot be changed after device creation");
return -EACCES;
}
if (data[IFLA_NETKIT_PEER_SCRUB]) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB],
"netkit scrubbing cannot be changed after device creation");
return -EACCES;
}
if (data[IFLA_NETKIT_PEER_INFO]) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO],
"netkit peer info cannot be changed after device creation");
return -EINVAL;
}
if (data[IFLA_NETKIT_POLICY]) {
attr = data[IFLA_NETKIT_POLICY];
policy = nla_get_u32(attr);
err = netkit_check_policy(policy, attr, extack);
if (err)
return err;
WRITE_ONCE(nk->policy, policy);
}
if (data[IFLA_NETKIT_PEER_POLICY]) {
err = -EOPNOTSUPP;
attr = data[IFLA_NETKIT_PEER_POLICY];
policy = nla_get_u32(attr);
if (peer)
err = netkit_check_policy(policy, attr, extack);
if (err)
return err;
nk = netkit_priv(peer);
WRITE_ONCE(nk->policy, policy);
}
return 0;
}
static size_t netkit_get_size(const struct net_device *dev)
{
return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_SCRUB */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */
nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
0;
}
static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct netkit *nk = netkit_priv(dev);
struct net_device *peer = rtnl_dereference(nk->peer);
if (nla_put_u8(skb, IFLA_NETKIT_PRIMARY, nk->primary))
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_POLICY, nk->policy))
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode))
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub))
return -EMSGSIZE;
if (peer) {
nk = netkit_priv(peer);
if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy))
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_PEER_SCRUB, nk->scrub))
return -EMSGSIZE;
}
return 0;
}
static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
[IFLA_NETKIT_PEER_INFO] = { .len = sizeof(struct ifinfomsg) },
[IFLA_NETKIT_MODE] = NLA_POLICY_MAX(NLA_U32, NETKIT_L3),
[IFLA_NETKIT_POLICY] = { .type = NLA_U32 },
[IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 },
[IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
[IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
[IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT,
.reject_message = "Primary attribute is read-only" },
};
static struct rtnl_link_ops netkit_link_ops = {
.kind = DRV_NAME,
.priv_size = sizeof(struct netkit),
.setup = netkit_setup,
.newlink = netkit_new_link,
.dellink = netkit_del_link,
.changelink = netkit_change_link,
.get_link_net = netkit_get_link_net,
.get_size = netkit_get_size,
.fill_info = netkit_fill_info,
.policy = netkit_policy,
.validate = netkit_validate,
.peer_type = IFLA_NETKIT_PEER_INFO,
.maxtype = IFLA_NETKIT_MAX,
};
static __init int netkit_init(void)
{
BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT ||
(int)NETKIT_PASS != (int)TCX_PASS ||
(int)NETKIT_DROP != (int)TCX_DROP ||
(int)NETKIT_REDIRECT != (int)TCX_REDIRECT);
return rtnl_link_register(&netkit_link_ops);
}
static __exit void netkit_exit(void)
{
rtnl_link_unregister(&netkit_link_ops);
}
module_init(netkit_init);
module_exit(netkit_exit);
MODULE_DESCRIPTION("BPF-programmable network device");
MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>");
MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK(DRV_NAME);