mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-15 09:34:17 +00:00
Daniel Borkmann says: ==================== bpf-next 2021-08-10 We've added 31 non-merge commits during the last 8 day(s) which contain a total of 28 files changed, 3644 insertions(+), 519 deletions(-). 1) Native XDP support for bonding driver & related BPF selftests, from Jussi Maki. 2) Large batch of new BPF JIT tests for test_bpf.ko that came out as a result from 32-bit MIPS JIT development, from Johan Almbladh. 3) Rewrite of netcnt BPF selftest and merge into test_progs, from Stanislav Fomichev. 4) Fix XDP bpf_prog_test_run infra after net to net-next merge, from Andrii Nakryiko. 5) Follow-up fix in unix_bpf_update_proto() to enforce socket type, from Cong Wang. 6) Fix bpf-iter-tcp4 selftest to print the correct dest IP, from Jose Blanquicet. 7) Various misc BPF XDP sample improvements, from Niklas Söderlund, Matthew Cover, and Muhammad Falak R Wani. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (31 commits) bpf, tests: Add tail call test suite bpf, tests: Add tests for BPF_CMPXCHG bpf, tests: Add tests for atomic operations bpf, tests: Add test for 32-bit context pointer argument passing bpf, tests: Add branch conversion JIT test bpf, tests: Add word-order tests for load/store of double words bpf, tests: Add tests for ALU operations implemented with function calls bpf, tests: Add more ALU64 BPF_MUL tests bpf, tests: Add more BPF_LSH/RSH/ARSH tests for ALU64 bpf, tests: Add more ALU32 tests for BPF_LSH/RSH/ARSH bpf, tests: Add more tests of ALU32 and ALU64 bitwise operations bpf, tests: Fix typos in test case descriptions bpf, tests: Add BPF_MOV tests for zero and sign extension bpf, tests: Add BPF_JMP32 test cases samples, bpf: Add an explict comment to handle nested vlan tagging. selftests/bpf: Add tests for XDP bonding selftests/bpf: Fix xdp_tx.c prog section name net, core: Allow netdev_lower_get_next_private_rcu in bh context bpf, devmap: Exclude XDP broadcast to master device net, bonding: Add XDP support to the bonding driver ... ==================== Link: https://lore.kernel.org/r/20210810130038.16927-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
d1a4e0a957
@ -317,6 +317,19 @@ bool bond_sk_check(struct bonding *bond)
|
||||
}
|
||||
}
|
||||
|
||||
static bool bond_xdp_check(struct bonding *bond)
|
||||
{
|
||||
switch (BOND_MODE(bond)) {
|
||||
case BOND_MODE_ROUNDROBIN:
|
||||
case BOND_MODE_ACTIVEBACKUP:
|
||||
case BOND_MODE_8023AD:
|
||||
case BOND_MODE_XOR:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*---------------------------------- VLAN -----------------------------------*/
|
||||
|
||||
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
|
||||
@ -2133,6 +2146,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
|
||||
bond_update_slave_arr(bond, NULL);
|
||||
|
||||
|
||||
if (!slave_dev->netdev_ops->ndo_bpf ||
|
||||
!slave_dev->netdev_ops->ndo_xdp_xmit) {
|
||||
if (bond->xdp_prog) {
|
||||
NL_SET_ERR_MSG(extack, "Slave does not support XDP");
|
||||
slave_err(bond_dev, slave_dev, "Slave does not support XDP\n");
|
||||
res = -EOPNOTSUPP;
|
||||
goto err_sysfs_del;
|
||||
}
|
||||
} else {
|
||||
struct netdev_bpf xdp = {
|
||||
.command = XDP_SETUP_PROG,
|
||||
.flags = 0,
|
||||
.prog = bond->xdp_prog,
|
||||
.extack = extack,
|
||||
};
|
||||
|
||||
if (dev_xdp_prog_count(slave_dev) > 0) {
|
||||
NL_SET_ERR_MSG(extack,
|
||||
"Slave has XDP program loaded, please unload before enslaving");
|
||||
slave_err(bond_dev, slave_dev,
|
||||
"Slave has XDP program loaded, please unload before enslaving\n");
|
||||
res = -EOPNOTSUPP;
|
||||
goto err_sysfs_del;
|
||||
}
|
||||
|
||||
res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
|
||||
if (res < 0) {
|
||||
/* ndo_bpf() sets extack error message */
|
||||
slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
|
||||
goto err_sysfs_del;
|
||||
}
|
||||
if (bond->xdp_prog)
|
||||
bpf_prog_inc(bond->xdp_prog);
|
||||
}
|
||||
|
||||
slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
|
||||
bond_is_active_slave(new_slave) ? "an active" : "a backup",
|
||||
new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
|
||||
@ -2252,6 +2300,17 @@ static int __bond_release_one(struct net_device *bond_dev,
|
||||
/* recompute stats just before removing the slave */
|
||||
bond_get_stats(bond->dev, &bond->bond_stats);
|
||||
|
||||
if (bond->xdp_prog) {
|
||||
struct netdev_bpf xdp = {
|
||||
.command = XDP_SETUP_PROG,
|
||||
.flags = 0,
|
||||
.prog = NULL,
|
||||
.extack = NULL,
|
||||
};
|
||||
if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
|
||||
slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
|
||||
}
|
||||
|
||||
/* unregister rx_handler early so bond_handle_frame wouldn't be called
|
||||
* for this slave anymore.
|
||||
*/
|
||||
@ -3614,55 +3673,80 @@ static struct notifier_block bond_netdev_notifier = {
|
||||
|
||||
/*---------------------------- Hashing Policies -----------------------------*/
|
||||
|
||||
/* L2 hash helper */
|
||||
static inline u32 bond_eth_hash(struct sk_buff *skb)
|
||||
/* Helper to access data in a packet, with or without a backing skb.
|
||||
* If skb is given the data is linearized if necessary via pskb_may_pull.
|
||||
*/
|
||||
static inline const void *bond_pull_data(struct sk_buff *skb,
|
||||
const void *data, int hlen, int n)
|
||||
{
|
||||
struct ethhdr *ep, hdr_tmp;
|
||||
if (likely(n <= hlen))
|
||||
return data;
|
||||
else if (skb && likely(pskb_may_pull(skb, n)))
|
||||
return skb->head;
|
||||
|
||||
ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp);
|
||||
if (ep)
|
||||
return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto;
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
|
||||
int *noff, int *proto, bool l34)
|
||||
/* L2 hash helper */
|
||||
static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
|
||||
{
|
||||
struct ethhdr *ep;
|
||||
|
||||
data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
|
||||
if (!data)
|
||||
return 0;
|
||||
|
||||
ep = (struct ethhdr *)(data + mhoff);
|
||||
return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto);
|
||||
}
|
||||
|
||||
static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data,
|
||||
int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34)
|
||||
{
|
||||
const struct ipv6hdr *iph6;
|
||||
const struct iphdr *iph;
|
||||
|
||||
if (skb->protocol == htons(ETH_P_IP)) {
|
||||
if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph))))
|
||||
if (l2_proto == htons(ETH_P_IP)) {
|
||||
data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph));
|
||||
if (!data)
|
||||
return false;
|
||||
iph = (const struct iphdr *)(skb->data + *noff);
|
||||
|
||||
iph = (const struct iphdr *)(data + *nhoff);
|
||||
iph_to_flow_copy_v4addrs(fk, iph);
|
||||
*noff += iph->ihl << 2;
|
||||
*nhoff += iph->ihl << 2;
|
||||
if (!ip_is_fragment(iph))
|
||||
*proto = iph->protocol;
|
||||
} else if (skb->protocol == htons(ETH_P_IPV6)) {
|
||||
if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6))))
|
||||
*ip_proto = iph->protocol;
|
||||
} else if (l2_proto == htons(ETH_P_IPV6)) {
|
||||
data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6));
|
||||
if (!data)
|
||||
return false;
|
||||
iph6 = (const struct ipv6hdr *)(skb->data + *noff);
|
||||
|
||||
iph6 = (const struct ipv6hdr *)(data + *nhoff);
|
||||
iph_to_flow_copy_v6addrs(fk, iph6);
|
||||
*noff += sizeof(*iph6);
|
||||
*proto = iph6->nexthdr;
|
||||
*nhoff += sizeof(*iph6);
|
||||
*ip_proto = iph6->nexthdr;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (l34 && *proto >= 0)
|
||||
fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto);
|
||||
if (l34 && *ip_proto >= 0)
|
||||
fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
|
||||
static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
|
||||
{
|
||||
struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
|
||||
struct ethhdr *mac_hdr;
|
||||
u32 srcmac_vendor = 0, srcmac_dev = 0;
|
||||
u16 vlan;
|
||||
int i;
|
||||
|
||||
data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
|
||||
if (!data)
|
||||
return 0;
|
||||
mac_hdr = (struct ethhdr *)(data + mhoff);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i];
|
||||
|
||||
@ -3678,26 +3762,25 @@ static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
|
||||
}
|
||||
|
||||
/* Extract the appropriate headers based on bond's xmit policy */
|
||||
static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
|
||||
struct flow_keys *fk)
|
||||
static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data,
|
||||
__be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk)
|
||||
{
|
||||
bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
|
||||
int noff, proto = -1;
|
||||
int ip_proto = -1;
|
||||
|
||||
switch (bond->params.xmit_policy) {
|
||||
case BOND_XMIT_POLICY_ENCAP23:
|
||||
case BOND_XMIT_POLICY_ENCAP34:
|
||||
memset(fk, 0, sizeof(*fk));
|
||||
return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
|
||||
fk, NULL, 0, 0, 0, 0);
|
||||
fk, data, l2_proto, nhoff, hlen, 0);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
fk->ports.ports = 0;
|
||||
memset(&fk->icmp, 0, sizeof(fk->icmp));
|
||||
noff = skb_network_offset(skb);
|
||||
if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
|
||||
if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34))
|
||||
return false;
|
||||
|
||||
/* ICMP error packets contains at least 8 bytes of the header
|
||||
@ -3705,22 +3788,20 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
|
||||
* to correlate ICMP error packets within the same flow which
|
||||
* generated the error.
|
||||
*/
|
||||
if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
|
||||
skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
|
||||
skb_transport_offset(skb),
|
||||
skb_headlen(skb));
|
||||
if (proto == IPPROTO_ICMP) {
|
||||
if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
|
||||
skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen);
|
||||
if (ip_proto == IPPROTO_ICMP) {
|
||||
if (!icmp_is_err(fk->icmp.type))
|
||||
return true;
|
||||
|
||||
noff += sizeof(struct icmphdr);
|
||||
} else if (proto == IPPROTO_ICMPV6) {
|
||||
nhoff += sizeof(struct icmphdr);
|
||||
} else if (ip_proto == IPPROTO_ICMPV6) {
|
||||
if (!icmpv6_is_err(fk->icmp.type))
|
||||
return true;
|
||||
|
||||
noff += sizeof(struct icmp6hdr);
|
||||
nhoff += sizeof(struct icmp6hdr);
|
||||
}
|
||||
return bond_flow_ip(skb, fk, &noff, &proto, l34);
|
||||
return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -3736,6 +3817,36 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
|
||||
return hash >> 1;
|
||||
}
|
||||
|
||||
/* Generate hash based on xmit policy. If @skb is given it is used to linearize
|
||||
* the data as required, but this function can be used without it if the data is
|
||||
* known to be linear (e.g. with xdp_buff).
|
||||
*/
|
||||
static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data,
|
||||
__be16 l2_proto, int mhoff, int nhoff, int hlen)
|
||||
{
|
||||
struct flow_keys flow;
|
||||
u32 hash;
|
||||
|
||||
if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
|
||||
return bond_vlan_srcmac_hash(skb, data, mhoff, hlen);
|
||||
|
||||
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
|
||||
!bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow))
|
||||
return bond_eth_hash(skb, data, mhoff, hlen);
|
||||
|
||||
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
|
||||
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
|
||||
hash = bond_eth_hash(skb, data, mhoff, hlen);
|
||||
} else {
|
||||
if (flow.icmp.id)
|
||||
memcpy(&hash, &flow.icmp, sizeof(hash));
|
||||
else
|
||||
memcpy(&hash, &flow.ports.ports, sizeof(hash));
|
||||
}
|
||||
|
||||
return bond_ip_hash(hash, &flow);
|
||||
}
|
||||
|
||||
/**
|
||||
* bond_xmit_hash - generate a hash value based on the xmit policy
|
||||
* @bond: bonding device
|
||||
@ -3746,31 +3857,33 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
|
||||
*/
|
||||
u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
|
||||
{
|
||||
struct flow_keys flow;
|
||||
u32 hash;
|
||||
|
||||
if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
|
||||
skb->l4_hash)
|
||||
return skb->hash;
|
||||
|
||||
if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
|
||||
return bond_vlan_srcmac_hash(skb);
|
||||
return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
|
||||
skb->mac_header, skb->network_header,
|
||||
skb_headlen(skb));
|
||||
}
|
||||
|
||||
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
|
||||
!bond_flow_dissect(bond, skb, &flow))
|
||||
return bond_eth_hash(skb);
|
||||
/**
|
||||
* bond_xmit_hash_xdp - generate a hash value based on the xmit policy
|
||||
* @bond: bonding device
|
||||
* @xdp: buffer to use for headers
|
||||
*
|
||||
* The XDP variant of bond_xmit_hash.
|
||||
*/
|
||||
static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
|
||||
{
|
||||
struct ethhdr *eth;
|
||||
|
||||
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
|
||||
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
|
||||
hash = bond_eth_hash(skb);
|
||||
} else {
|
||||
if (flow.icmp.id)
|
||||
memcpy(&hash, &flow.icmp, sizeof(hash));
|
||||
else
|
||||
memcpy(&hash, &flow.ports.ports, sizeof(hash));
|
||||
}
|
||||
if (xdp->data + sizeof(struct ethhdr) > xdp->data_end)
|
||||
return 0;
|
||||
|
||||
return bond_ip_hash(hash, &flow);
|
||||
eth = (struct ethhdr *)xdp->data;
|
||||
|
||||
return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0,
|
||||
sizeof(struct ethhdr), xdp->data_end - xdp->data);
|
||||
}
|
||||
|
||||
/*-------------------------- Device entry points ----------------------------*/
|
||||
@ -4421,6 +4534,47 @@ non_igmp:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond,
|
||||
struct xdp_buff *xdp)
|
||||
{
|
||||
struct slave *slave;
|
||||
int slave_cnt;
|
||||
u32 slave_id;
|
||||
const struct ethhdr *eth;
|
||||
void *data = xdp->data;
|
||||
|
||||
if (data + sizeof(struct ethhdr) > xdp->data_end)
|
||||
goto non_igmp;
|
||||
|
||||
eth = (struct ethhdr *)data;
|
||||
data += sizeof(struct ethhdr);
|
||||
|
||||
/* See comment on IGMP in bond_xmit_roundrobin_slave_get() */
|
||||
if (eth->h_proto == htons(ETH_P_IP)) {
|
||||
const struct iphdr *iph;
|
||||
|
||||
if (data + sizeof(struct iphdr) > xdp->data_end)
|
||||
goto non_igmp;
|
||||
|
||||
iph = (struct iphdr *)data;
|
||||
|
||||
if (iph->protocol == IPPROTO_IGMP) {
|
||||
slave = rcu_dereference(bond->curr_active_slave);
|
||||
if (slave)
|
||||
return slave;
|
||||
return bond_get_slave_by_id(bond, 0);
|
||||
}
|
||||
}
|
||||
|
||||
non_igmp:
|
||||
slave_cnt = READ_ONCE(bond->slave_cnt);
|
||||
if (likely(slave_cnt)) {
|
||||
slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
|
||||
return bond_get_slave_by_id(bond, slave_id);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
|
||||
struct net_device *bond_dev)
|
||||
{
|
||||
@ -4434,8 +4588,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
|
||||
return bond_tx_drop(bond_dev, skb);
|
||||
}
|
||||
|
||||
static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
|
||||
struct sk_buff *skb)
|
||||
static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond)
|
||||
{
|
||||
return rcu_dereference(bond->curr_active_slave);
|
||||
}
|
||||
@ -4449,7 +4602,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
|
||||
struct bonding *bond = netdev_priv(bond_dev);
|
||||
struct slave *slave;
|
||||
|
||||
slave = bond_xmit_activebackup_slave_get(bond, skb);
|
||||
slave = bond_xmit_activebackup_slave_get(bond);
|
||||
if (slave)
|
||||
return bond_dev_queue_xmit(bond, skb, slave->dev);
|
||||
|
||||
@ -4637,6 +4790,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
|
||||
return slave;
|
||||
}
|
||||
|
||||
static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
|
||||
struct xdp_buff *xdp)
|
||||
{
|
||||
struct bond_up_slave *slaves;
|
||||
unsigned int count;
|
||||
u32 hash;
|
||||
|
||||
hash = bond_xmit_hash_xdp(bond, xdp);
|
||||
slaves = rcu_dereference(bond->usable_slaves);
|
||||
count = slaves ? READ_ONCE(slaves->count) : 0;
|
||||
if (unlikely(!count))
|
||||
return NULL;
|
||||
|
||||
return slaves->arr[hash % count];
|
||||
}
|
||||
|
||||
/* Use this Xmit function for 3AD as well as XOR modes. The current
|
||||
* usable slave array is formed in the control path. The xmit function
|
||||
* just calculates hash and sends the packet out.
|
||||
@ -4747,7 +4916,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
|
||||
slave = bond_xmit_roundrobin_slave_get(bond, skb);
|
||||
break;
|
||||
case BOND_MODE_ACTIVEBACKUP:
|
||||
slave = bond_xmit_activebackup_slave_get(bond, skb);
|
||||
slave = bond_xmit_activebackup_slave_get(bond);
|
||||
break;
|
||||
case BOND_MODE_8023AD:
|
||||
case BOND_MODE_XOR:
|
||||
@ -4921,6 +5090,174 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct net_device *
|
||||
bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
|
||||
{
|
||||
struct bonding *bond = netdev_priv(bond_dev);
|
||||
struct slave *slave;
|
||||
|
||||
/* Caller needs to hold rcu_read_lock() */
|
||||
|
||||
switch (BOND_MODE(bond)) {
|
||||
case BOND_MODE_ROUNDROBIN:
|
||||
slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp);
|
||||
break;
|
||||
|
||||
case BOND_MODE_ACTIVEBACKUP:
|
||||
slave = bond_xmit_activebackup_slave_get(bond);
|
||||
break;
|
||||
|
||||
case BOND_MODE_8023AD:
|
||||
case BOND_MODE_XOR:
|
||||
slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Should never happen. Mode guarded by bond_xdp_check() */
|
||||
netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
|
||||
WARN_ON_ONCE(1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (slave)
|
||||
return slave->dev;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int bond_xdp_xmit(struct net_device *bond_dev,
|
||||
int n, struct xdp_frame **frames, u32 flags)
|
||||
{
|
||||
int nxmit, err = -ENXIO;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
for (nxmit = 0; nxmit < n; nxmit++) {
|
||||
struct xdp_frame *frame = frames[nxmit];
|
||||
struct xdp_frame *frames1[] = {frame};
|
||||
struct net_device *slave_dev;
|
||||
struct xdp_buff xdp;
|
||||
|
||||
xdp_convert_frame_to_buff(frame, &xdp);
|
||||
|
||||
slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp);
|
||||
if (!slave_dev) {
|
||||
err = -ENXIO;
|
||||
break;
|
||||
}
|
||||
|
||||
err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags);
|
||||
if (err < 1)
|
||||
break;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
/* If error happened on the first frame then we can pass the error up, otherwise
|
||||
* report the number of frames that were xmitted.
|
||||
*/
|
||||
if (err < 0)
|
||||
return (nxmit == 0 ? err : nxmit);
|
||||
|
||||
return nxmit;
|
||||
}
|
||||
|
||||
static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct bonding *bond = netdev_priv(dev);
|
||||
struct list_head *iter;
|
||||
struct slave *slave, *rollback_slave;
|
||||
struct bpf_prog *old_prog;
|
||||
struct netdev_bpf xdp = {
|
||||
.command = XDP_SETUP_PROG,
|
||||
.flags = 0,
|
||||
.prog = prog,
|
||||
.extack = extack,
|
||||
};
|
||||
int err;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
if (!bond_xdp_check(bond))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
old_prog = bond->xdp_prog;
|
||||
bond->xdp_prog = prog;
|
||||
|
||||
bond_for_each_slave(bond, slave, iter) {
|
||||
struct net_device *slave_dev = slave->dev;
|
||||
|
||||
if (!slave_dev->netdev_ops->ndo_bpf ||
|
||||
!slave_dev->netdev_ops->ndo_xdp_xmit) {
|
||||
NL_SET_ERR_MSG(extack, "Slave device does not support XDP");
|
||||
slave_err(dev, slave_dev, "Slave does not support XDP\n");
|
||||
err = -EOPNOTSUPP;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (dev_xdp_prog_count(slave_dev) > 0) {
|
||||
NL_SET_ERR_MSG(extack,
|
||||
"Slave has XDP program loaded, please unload before enslaving");
|
||||
slave_err(dev, slave_dev,
|
||||
"Slave has XDP program loaded, please unload before enslaving\n");
|
||||
err = -EOPNOTSUPP;
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
|
||||
if (err < 0) {
|
||||
/* ndo_bpf() sets extack error message */
|
||||
slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
|
||||
goto err;
|
||||
}
|
||||
if (prog)
|
||||
bpf_prog_inc(prog);
|
||||
}
|
||||
|
||||
if (old_prog)
|
||||
bpf_prog_put(old_prog);
|
||||
|
||||
if (prog)
|
||||
static_branch_inc(&bpf_master_redirect_enabled_key);
|
||||
else
|
||||
static_branch_dec(&bpf_master_redirect_enabled_key);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
/* unwind the program changes */
|
||||
bond->xdp_prog = old_prog;
|
||||
xdp.prog = old_prog;
|
||||
xdp.extack = NULL; /* do not overwrite original error */
|
||||
|
||||
bond_for_each_slave(bond, rollback_slave, iter) {
|
||||
struct net_device *slave_dev = rollback_slave->dev;
|
||||
int err_unwind;
|
||||
|
||||
if (slave == rollback_slave)
|
||||
break;
|
||||
|
||||
err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
|
||||
if (err_unwind < 0)
|
||||
slave_err(dev, slave_dev,
|
||||
"Error %d when unwinding XDP program change\n", err_unwind);
|
||||
else if (xdp.prog)
|
||||
bpf_prog_inc(xdp.prog);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp)
|
||||
{
|
||||
switch (xdp->command) {
|
||||
case XDP_SETUP_PROG:
|
||||
return bond_xdp_set(dev, xdp->prog, xdp->extack);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
|
||||
{
|
||||
if (speed == 0 || speed == SPEED_UNKNOWN)
|
||||
@ -5009,6 +5346,9 @@ static const struct net_device_ops bond_netdev_ops = {
|
||||
.ndo_features_check = passthru_features_check,
|
||||
.ndo_get_xmit_slave = bond_xmit_get_slave,
|
||||
.ndo_sk_get_lower_dev = bond_sk_get_lower_dev,
|
||||
.ndo_bpf = bond_xdp,
|
||||
.ndo_xdp_xmit = bond_xdp_xmit,
|
||||
.ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
|
||||
};
|
||||
|
||||
static const struct device_type bond_type = {
|
||||
|
@ -776,6 +776,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
|
||||
|
||||
DECLARE_BPF_DISPATCHER(xdp)
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
|
||||
|
||||
u32 xdp_master_redirect(struct xdp_buff *xdp);
|
||||
|
||||
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
|
||||
struct xdp_buff *xdp)
|
||||
{
|
||||
@ -783,7 +787,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
|
||||
* under local_bh_disable(), which provides the needed RCU protection
|
||||
* for accessing map entries.
|
||||
*/
|
||||
return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
|
||||
u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
|
||||
|
||||
if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
|
||||
if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
|
||||
act = xdp_master_redirect(xdp);
|
||||
}
|
||||
|
||||
return act;
|
||||
}
|
||||
|
||||
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
|
||||
|
@ -1318,6 +1318,9 @@ struct netdev_net_notifier {
|
||||
* that got dropped are freed/returned via xdp_return_frame().
|
||||
* Returns negative number, means general error invoking ndo, meaning
|
||||
* no frames were xmit'ed and core-caller will free all frames.
|
||||
* struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
|
||||
* struct xdp_buff *xdp);
|
||||
* Get the xmit slave of master device based on the xdp_buff.
|
||||
* int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
|
||||
* This function is used to wake up the softirq, ksoftirqd or kthread
|
||||
* responsible for sending and/or receiving packets on a specific
|
||||
@ -1545,6 +1548,8 @@ struct net_device_ops {
|
||||
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
|
||||
struct xdp_frame **xdp,
|
||||
u32 flags);
|
||||
struct net_device * (*ndo_xdp_get_xmit_slave)(struct net_device *dev,
|
||||
struct xdp_buff *xdp);
|
||||
int (*ndo_xsk_wakeup)(struct net_device *dev,
|
||||
u32 queue_id, u32 flags);
|
||||
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
|
||||
@ -4076,6 +4081,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
|
||||
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
|
||||
int fd, int expected_fd, u32 flags);
|
||||
int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
|
||||
u8 dev_xdp_prog_count(struct net_device *dev);
|
||||
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
|
||||
|
||||
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
|
||||
|
@ -259,6 +259,7 @@ struct bonding {
|
||||
/* protecting ipsec_list */
|
||||
spinlock_t ipsec_lock;
|
||||
#endif /* CONFIG_XFRM_OFFLOAD */
|
||||
struct bpf_prog *xdp_prog;
|
||||
};
|
||||
|
||||
#define bond_slave_get_rcu(dev) \
|
||||
|
@ -1562,7 +1562,7 @@ select_insn:
|
||||
|
||||
if (unlikely(index >= array->map.max_entries))
|
||||
goto out;
|
||||
if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
|
||||
if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT))
|
||||
goto out;
|
||||
|
||||
tail_call_cnt++;
|
||||
|
@ -534,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
||||
return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
|
||||
}
|
||||
|
||||
static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
|
||||
int exclude_ifindex)
|
||||
static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
|
||||
{
|
||||
if (!obj || obj->dev->ifindex == exclude_ifindex ||
|
||||
if (!obj ||
|
||||
!obj->dev->netdev_ops->ndo_xdp_xmit)
|
||||
return false;
|
||||
|
||||
@ -562,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
|
||||
{
|
||||
while (num_excluded--) {
|
||||
if (ifindex == excluded[num_excluded])
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Get ifindex of each upper device. 'indexes' must be able to hold at
|
||||
* least MAX_NEST_DEV elements.
|
||||
* Returns the number of ifindexes added.
|
||||
*/
|
||||
static int get_upper_ifindexes(struct net_device *dev, int *indexes)
|
||||
{
|
||||
struct net_device *upper;
|
||||
struct list_head *iter;
|
||||
int n = 0;
|
||||
|
||||
netdev_for_each_upper_dev_rcu(dev, upper, iter) {
|
||||
indexes[n++] = upper->ifindex;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
||||
struct bpf_map *map, bool exclude_ingress)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
|
||||
struct bpf_dtab_netdev *dst, *last_dst = NULL;
|
||||
int excluded_devices[1+MAX_NEST_DEV];
|
||||
struct hlist_head *head;
|
||||
struct xdp_frame *xdpf;
|
||||
int num_excluded = 0;
|
||||
unsigned int i;
|
||||
int err;
|
||||
|
||||
if (exclude_ingress) {
|
||||
num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
|
||||
excluded_devices[num_excluded++] = dev_rx->ifindex;
|
||||
}
|
||||
|
||||
xdpf = xdp_convert_buff_to_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
@ -581,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
||||
for (i = 0; i < map->max_entries; i++) {
|
||||
dst = rcu_dereference_check(dtab->netdev_map[i],
|
||||
rcu_read_lock_bh_held());
|
||||
if (!is_valid_dst(dst, xdp, exclude_ifindex))
|
||||
if (!is_valid_dst(dst, xdp))
|
||||
continue;
|
||||
|
||||
if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
|
||||
continue;
|
||||
|
||||
/* we only need n-1 clones; last_dst enqueued below */
|
||||
@ -601,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
||||
head = dev_map_index_hash(dtab, i);
|
||||
hlist_for_each_entry_rcu(dst, head, index_hlist,
|
||||
lockdep_is_held(&dtab->index_lock)) {
|
||||
if (!is_valid_dst(dst, xdp, exclude_ifindex))
|
||||
if (!is_valid_dst(dst, xdp))
|
||||
continue;
|
||||
|
||||
if (is_ifindex_excluded(excluded_devices, num_excluded,
|
||||
dst->dev->ifindex))
|
||||
continue;
|
||||
|
||||
/* we only need n-1 clones; last_dst enqueued below */
|
||||
@ -675,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
|
||||
bool exclude_ingress)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
|
||||
struct bpf_dtab_netdev *dst, *last_dst = NULL;
|
||||
int excluded_devices[1+MAX_NEST_DEV];
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *next;
|
||||
int num_excluded = 0;
|
||||
unsigned int i;
|
||||
int err;
|
||||
|
||||
if (exclude_ingress) {
|
||||
num_excluded = get_upper_ifindexes(dev, excluded_devices);
|
||||
excluded_devices[num_excluded++] = dev->ifindex;
|
||||
}
|
||||
|
||||
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
|
||||
for (i = 0; i < map->max_entries; i++) {
|
||||
dst = rcu_dereference_check(dtab->netdev_map[i],
|
||||
rcu_read_lock_bh_held());
|
||||
if (!dst || dst->dev->ifindex == exclude_ifindex)
|
||||
if (!dst)
|
||||
continue;
|
||||
|
||||
if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
|
||||
continue;
|
||||
|
||||
/* we only need n-1 clones; last_dst enqueued below */
|
||||
@ -700,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
|
||||
return err;
|
||||
|
||||
last_dst = dst;
|
||||
|
||||
}
|
||||
} else { /* BPF_MAP_TYPE_DEVMAP_HASH */
|
||||
for (i = 0; i < dtab->n_buckets; i++) {
|
||||
head = dev_map_index_hash(dtab, i);
|
||||
hlist_for_each_entry_safe(dst, next, head, index_hlist) {
|
||||
if (!dst || dst->dev->ifindex == exclude_ifindex)
|
||||
if (!dst)
|
||||
continue;
|
||||
|
||||
if (is_ifindex_excluded(excluded_devices, num_excluded,
|
||||
dst->dev->ifindex))
|
||||
continue;
|
||||
|
||||
/* we only need n-1 clones; last_dst enqueued below */
|
||||
|
2333
lib/test_bpf.c
2333
lib/test_bpf.c
File diff suppressed because it is too large
Load Diff
@ -763,8 +763,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
|
||||
prog->expected_attach_type == BPF_XDP_CPUMAP)
|
||||
return -EINVAL;
|
||||
if (kattr->test.ctx_in || kattr->test.ctx_out)
|
||||
return -EINVAL;
|
||||
|
||||
ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
|
||||
if (IS_ERR(ctx))
|
||||
return PTR_ERR(ctx);
|
||||
|
@ -7532,7 +7532,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
|
||||
{
|
||||
struct netdev_adjacent *lower;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
|
||||
|
||||
lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
|
||||
|
||||
@ -9297,7 +9297,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
|
||||
return dev->xdp_state[mode].prog;
|
||||
}
|
||||
|
||||
static u8 dev_xdp_prog_count(struct net_device *dev)
|
||||
u8 dev_xdp_prog_count(struct net_device *dev)
|
||||
{
|
||||
u8 count = 0;
|
||||
int i;
|
||||
@ -9307,6 +9307,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev)
|
||||
count++;
|
||||
return count;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
|
||||
|
||||
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
|
||||
{
|
||||
@ -9400,6 +9401,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
|
||||
{
|
||||
unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
|
||||
struct bpf_prog *cur_prog;
|
||||
struct net_device *upper;
|
||||
struct list_head *iter;
|
||||
enum bpf_xdp_mode mode;
|
||||
bpf_op_t bpf_op;
|
||||
int err;
|
||||
@ -9438,6 +9441,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/* don't allow if an upper device already has a program */
|
||||
netdev_for_each_upper_dev_rcu(dev, upper, iter) {
|
||||
if (dev_xdp_prog_count(upper) > 0) {
|
||||
NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
|
||||
cur_prog = dev_xdp_prog(dev, mode);
|
||||
/* can't replace attached prog with link */
|
||||
if (link && cur_prog) {
|
||||
|
@ -3933,6 +3933,31 @@ void bpf_clear_redirect_map(struct bpf_map *map)
|
||||
}
|
||||
}
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
|
||||
EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
|
||||
|
||||
u32 xdp_master_redirect(struct xdp_buff *xdp)
|
||||
{
|
||||
struct net_device *master, *slave;
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
|
||||
master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
|
||||
slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
|
||||
if (slave && slave != xdp->rxq->dev) {
|
||||
/* The target device is different from the receiving device, so
|
||||
* redirect it to the new device.
|
||||
* Using XDP_REDIRECT gets the correct behaviour from XDP enabled
|
||||
* drivers to unmap the packet from their rx ring.
|
||||
*/
|
||||
ri->tgt_index = slave->ifindex;
|
||||
ri->map_id = INT_MAX;
|
||||
ri->map_type = BPF_MAP_TYPE_UNSPEC;
|
||||
return XDP_REDIRECT;
|
||||
}
|
||||
return XDP_TX;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_master_redirect);
|
||||
|
||||
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
|
@ -105,6 +105,9 @@ static void unix_bpf_check_needs_rebuild(struct proto *ops)
|
||||
|
||||
int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
|
||||
{
|
||||
if (sk->sk_type != SOCK_DGRAM)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (restore) {
|
||||
sk->sk_write_space = psock->saved_write_space;
|
||||
WRITE_ONCE(sk->sk_prot, psock->sk_proto);
|
||||
|
@ -57,6 +57,7 @@ int xdp_prog1(struct xdp_md *ctx)
|
||||
|
||||
h_proto = eth->h_proto;
|
||||
|
||||
/* Handle VLAN tagged packet */
|
||||
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
|
||||
struct vlan_hdr *vhdr;
|
||||
|
||||
@ -66,6 +67,7 @@ int xdp_prog1(struct xdp_md *ctx)
|
||||
return rc;
|
||||
h_proto = vhdr->h_vlan_encapsulated_proto;
|
||||
}
|
||||
/* Handle double VLAN tagged packet */
|
||||
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
|
||||
struct vlan_hdr *vhdr;
|
||||
|
||||
|
@ -73,6 +73,7 @@ int xdp_prog1(struct xdp_md *ctx)
|
||||
|
||||
h_proto = eth->h_proto;
|
||||
|
||||
/* Handle VLAN tagged packet */
|
||||
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
|
||||
struct vlan_hdr *vhdr;
|
||||
|
||||
@ -82,6 +83,7 @@ int xdp_prog1(struct xdp_md *ctx)
|
||||
return rc;
|
||||
h_proto = vhdr->h_vlan_encapsulated_proto;
|
||||
}
|
||||
/* Handle double VLAN tagged packet */
|
||||
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
|
||||
struct vlan_hdr *vhdr;
|
||||
|
||||
|
@ -841,7 +841,7 @@ int main(int argc, char **argv)
|
||||
memset(cpu, 0, n_cpus * sizeof(int));
|
||||
|
||||
/* Parse commands line args */
|
||||
while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
|
||||
while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n",
|
||||
long_options, &longindex)) != -1) {
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
|
@ -1,12 +1,10 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright(c) 2017 - 2018 Intel Corporation. */
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <errno.h>
|
||||
#include <getopt.h>
|
||||
#include <libgen.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/if_link.h>
|
||||
#include <linux/if_xdp.h>
|
||||
#include <linux/if_ether.h>
|
||||
@ -653,17 +651,15 @@ out:
|
||||
return result;
|
||||
}
|
||||
|
||||
__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
|
||||
|
||||
/*
|
||||
* This is a version of ip_compute_csum() optimized for IP headers,
|
||||
* which always checksum on 4 octet boundaries.
|
||||
* This function code has been taken from
|
||||
* Linux kernel lib/checksum.c
|
||||
*/
|
||||
__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
|
||||
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
|
||||
{
|
||||
return (__force __sum16)~do_csum(iph, ihl * 4);
|
||||
return (__sum16)~do_csum(iph, ihl * 4);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -673,11 +669,11 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
|
||||
*/
|
||||
static inline __sum16 csum_fold(__wsum csum)
|
||||
{
|
||||
u32 sum = (__force u32)csum;
|
||||
u32 sum = (u32)csum;
|
||||
|
||||
sum = (sum & 0xffff) + (sum >> 16);
|
||||
sum = (sum & 0xffff) + (sum >> 16);
|
||||
return (__force __sum16)~sum;
|
||||
return (__sum16)~sum;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -703,16 +699,16 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
|
||||
__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
|
||||
__u32 len, __u8 proto, __wsum sum)
|
||||
{
|
||||
unsigned long long s = (__force u32)sum;
|
||||
unsigned long long s = (u32)sum;
|
||||
|
||||
s += (__force u32)saddr;
|
||||
s += (__force u32)daddr;
|
||||
s += (u32)saddr;
|
||||
s += (u32)daddr;
|
||||
#ifdef __BIG_ENDIAN__
|
||||
s += proto + len;
|
||||
#else
|
||||
s += (proto + len) << 8;
|
||||
#endif
|
||||
return (__force __wsum)from64to32(s);
|
||||
return (__wsum)from64to32(s);
|
||||
}
|
||||
|
||||
/*
|
||||
|
1
tools/testing/selftests/bpf/.gitignore
vendored
1
tools/testing/selftests/bpf/.gitignore
vendored
@ -23,7 +23,6 @@ test_skb_cgroup_id_user
|
||||
test_cgroup_storage
|
||||
test_flow_dissector
|
||||
flow_dissector_load
|
||||
test_netcnt
|
||||
test_tcpnotify_user
|
||||
test_libbpf
|
||||
test_tcp_check_syncookie_user
|
||||
|
@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
|
||||
test_verifier_log test_dev_cgroup \
|
||||
test_sock test_sockmap get_cgroup_id_user \
|
||||
test_cgroup_storage \
|
||||
test_netcnt test_tcpnotify_user test_sysctl \
|
||||
test_tcpnotify_user test_sysctl \
|
||||
test_progs-no_alu32
|
||||
|
||||
# Also test bpf-gcc, if present
|
||||
@ -197,7 +197,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c
|
||||
$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
|
||||
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
|
||||
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
|
||||
$(OUTPUT)/test_netcnt: cgroup_helpers.c
|
||||
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
|
||||
$(OUTPUT)/test_sysctl: cgroup_helpers.c
|
||||
|
||||
|
@ -310,3 +310,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *ping_command(int family)
|
||||
{
|
||||
if (family == AF_INET6) {
|
||||
/* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
|
||||
if (!system("which ping6 >/dev/null 2>&1"))
|
||||
return "ping6";
|
||||
else
|
||||
return "ping -6";
|
||||
}
|
||||
return "ping";
|
||||
}
|
||||
|
@ -46,5 +46,6 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
|
||||
int timeout_ms);
|
||||
int make_sockaddr(int family, const char *addr_str, __u16 port,
|
||||
struct sockaddr_storage *addr, socklen_t *len);
|
||||
char *ping_command(int family);
|
||||
|
||||
#endif
|
||||
|
82
tools/testing/selftests/bpf/prog_tests/netcnt.c
Normal file
82
tools/testing/selftests/bpf/prog_tests/netcnt.c
Normal file
@ -0,0 +1,82 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <sys/sysinfo.h>
|
||||
#include <test_progs.h>
|
||||
#include "network_helpers.h"
|
||||
#include "netcnt_prog.skel.h"
|
||||
#include "netcnt_common.h"
|
||||
|
||||
#define CG_NAME "/netcnt"
|
||||
|
||||
void test_netcnt(void)
|
||||
{
|
||||
union percpu_net_cnt *percpu_netcnt = NULL;
|
||||
struct bpf_cgroup_storage_key key;
|
||||
int map_fd, percpu_map_fd;
|
||||
struct netcnt_prog *skel;
|
||||
unsigned long packets;
|
||||
union net_cnt netcnt;
|
||||
unsigned long bytes;
|
||||
int cpu, nproc;
|
||||
int cg_fd = -1;
|
||||
char cmd[128];
|
||||
|
||||
skel = netcnt_prog__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
|
||||
return;
|
||||
|
||||
nproc = get_nprocs_conf();
|
||||
percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
|
||||
if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
|
||||
goto err;
|
||||
|
||||
cg_fd = test__join_cgroup(CG_NAME);
|
||||
if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
|
||||
goto err;
|
||||
|
||||
skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
|
||||
if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
|
||||
"attach_cgroup(bpf_nextcnt)"))
|
||||
goto err;
|
||||
|
||||
snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
|
||||
ASSERT_OK(system(cmd), cmd);
|
||||
|
||||
map_fd = bpf_map__fd(skel->maps.netcnt);
|
||||
if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
|
||||
goto err;
|
||||
|
||||
if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
|
||||
goto err;
|
||||
|
||||
percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
|
||||
if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
|
||||
"bpf_map_lookup_elem(percpu_netcnt)"))
|
||||
goto err;
|
||||
|
||||
/* Some packets can be still in per-cpu cache, but not more than
|
||||
* MAX_PERCPU_PACKETS.
|
||||
*/
|
||||
packets = netcnt.packets;
|
||||
bytes = netcnt.bytes;
|
||||
for (cpu = 0; cpu < nproc; cpu++) {
|
||||
ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
|
||||
|
||||
packets += percpu_netcnt[cpu].packets;
|
||||
bytes += percpu_netcnt[cpu].bytes;
|
||||
}
|
||||
|
||||
/* No packets should be lost */
|
||||
ASSERT_EQ(packets, 10000, "packets");
|
||||
|
||||
/* Let's check that bytes counter matches the number of packets
|
||||
* multiplied by the size of ipv6 ICMP packet.
|
||||
*/
|
||||
ASSERT_EQ(bytes, packets * 104, "bytes");
|
||||
|
||||
err:
|
||||
if (cg_fd != -1)
|
||||
close(cg_fd);
|
||||
free(percpu_netcnt);
|
||||
netcnt_prog__destroy(skel);
|
||||
}
|
@ -34,8 +34,8 @@ void test_reference_tracking(void)
|
||||
if (!test__start_subtest(title))
|
||||
continue;
|
||||
|
||||
/* Expect verifier failure if test name has 'fail' */
|
||||
if (strstr(title, "fail") != NULL) {
|
||||
/* Expect verifier failure if test name has 'err' */
|
||||
if (strstr(title, "err_") != NULL) {
|
||||
libbpf_print_fn_t old_print_fn;
|
||||
|
||||
old_print_fn = libbpf_set_print(NULL);
|
||||
|
@ -390,18 +390,6 @@ done:
|
||||
close(client_fd);
|
||||
}
|
||||
|
||||
static char *ping_command(int family)
|
||||
{
|
||||
if (family == AF_INET6) {
|
||||
/* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
|
||||
if (!system("which ping6 >/dev/null 2>&1"))
|
||||
return "ping6";
|
||||
else
|
||||
return "ping -6";
|
||||
}
|
||||
return "ping";
|
||||
}
|
||||
|
||||
static int test_ping(int family, const char *addr)
|
||||
{
|
||||
SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
|
||||
|
520
tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
Normal file
520
tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
Normal file
@ -0,0 +1,520 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/**
|
||||
* Test XDP bonding support
|
||||
*
|
||||
* Sets up two bonded veth pairs between two fresh namespaces
|
||||
* and verifies that XDP_TX program loaded on a bond device
|
||||
* are correctly loaded onto the slave devices and XDP_TX'd
|
||||
* packets are balanced using bonding.
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
#include <net/if.h>
|
||||
#include <linux/if_link.h>
|
||||
#include "test_progs.h"
|
||||
#include "network_helpers.h"
|
||||
#include <linux/if_bonding.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/udp.h>
|
||||
|
||||
#include "xdp_dummy.skel.h"
|
||||
#include "xdp_redirect_multi_kern.skel.h"
|
||||
#include "xdp_tx.skel.h"
|
||||
|
||||
#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
|
||||
#define BOND1_MAC_STR "00:11:22:33:44:55"
|
||||
#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
|
||||
#define BOND2_MAC_STR "00:22:33:44:55:66"
|
||||
#define NPACKETS 100
|
||||
|
||||
static int root_netns_fd = -1;
|
||||
|
||||
static void restore_root_netns(void)
|
||||
{
|
||||
ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
|
||||
}
|
||||
|
||||
static int setns_by_name(char *name)
|
||||
{
|
||||
int nsfd, err;
|
||||
char nspath[PATH_MAX];
|
||||
|
||||
snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
|
||||
nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
|
||||
if (nsfd < 0)
|
||||
return -1;
|
||||
|
||||
err = setns(nsfd, CLONE_NEWNET);
|
||||
close(nsfd);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int get_rx_packets(const char *iface)
|
||||
{
|
||||
FILE *f;
|
||||
char line[512];
|
||||
int iface_len = strlen(iface);
|
||||
|
||||
f = fopen("/proc/net/dev", "r");
|
||||
if (!f)
|
||||
return -1;
|
||||
|
||||
while (fgets(line, sizeof(line), f)) {
|
||||
char *p = line;
|
||||
|
||||
while (*p == ' ')
|
||||
p++; /* skip whitespace */
|
||||
if (!strncmp(p, iface, iface_len)) {
|
||||
p += iface_len;
|
||||
if (*p++ != ':')
|
||||
continue;
|
||||
while (*p == ' ')
|
||||
p++; /* skip whitespace */
|
||||
while (*p && *p != ' ')
|
||||
p++; /* skip rx bytes */
|
||||
while (*p == ' ')
|
||||
p++; /* skip whitespace */
|
||||
fclose(f);
|
||||
return atoi(p);
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define MAX_BPF_LINKS 8
|
||||
|
||||
struct skeletons {
|
||||
struct xdp_dummy *xdp_dummy;
|
||||
struct xdp_tx *xdp_tx;
|
||||
struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
|
||||
|
||||
int nlinks;
|
||||
struct bpf_link *links[MAX_BPF_LINKS];
|
||||
};
|
||||
|
||||
static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
int ifindex;
|
||||
|
||||
ifindex = if_nametoindex(iface);
|
||||
if (!ASSERT_GT(ifindex, 0, "get ifindex"))
|
||||
return -1;
|
||||
|
||||
if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
|
||||
return -1;
|
||||
|
||||
link = bpf_program__attach_xdp(prog, ifindex);
|
||||
if (!ASSERT_OK_PTR(link, "attach xdp program"))
|
||||
return -1;
|
||||
|
||||
skeletons->links[skeletons->nlinks++] = link;
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum {
|
||||
BOND_ONE_NO_ATTACH = 0,
|
||||
BOND_BOTH_AND_ATTACH,
|
||||
};
|
||||
|
||||
static const char * const mode_names[] = {
|
||||
[BOND_MODE_ROUNDROBIN] = "balance-rr",
|
||||
[BOND_MODE_ACTIVEBACKUP] = "active-backup",
|
||||
[BOND_MODE_XOR] = "balance-xor",
|
||||
[BOND_MODE_BROADCAST] = "broadcast",
|
||||
[BOND_MODE_8023AD] = "802.3ad",
|
||||
[BOND_MODE_TLB] = "balance-tlb",
|
||||
[BOND_MODE_ALB] = "balance-alb",
|
||||
};
|
||||
|
||||
static const char * const xmit_policy_names[] = {
|
||||
[BOND_XMIT_POLICY_LAYER2] = "layer2",
|
||||
[BOND_XMIT_POLICY_LAYER34] = "layer3+4",
|
||||
[BOND_XMIT_POLICY_LAYER23] = "layer2+3",
|
||||
[BOND_XMIT_POLICY_ENCAP23] = "encap2+3",
|
||||
[BOND_XMIT_POLICY_ENCAP34] = "encap3+4",
|
||||
};
|
||||
|
||||
static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
|
||||
int bond_both_attach)
|
||||
{
|
||||
#define SYS(fmt, ...) \
|
||||
({ \
|
||||
char cmd[1024]; \
|
||||
snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
|
||||
if (!ASSERT_OK(system(cmd), cmd)) \
|
||||
return -1; \
|
||||
})
|
||||
|
||||
SYS("ip netns add ns_dst");
|
||||
SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
|
||||
SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
|
||||
|
||||
SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
|
||||
mode_names[mode], xmit_policy_names[xmit_policy]);
|
||||
SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
|
||||
SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
|
||||
mode_names[mode], xmit_policy_names[xmit_policy]);
|
||||
SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
|
||||
|
||||
SYS("ip link set veth1_1 master bond1");
|
||||
if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
|
||||
SYS("ip link set veth1_2 master bond1");
|
||||
} else {
|
||||
SYS("ip link set veth1_2 up addrgenmode none");
|
||||
|
||||
if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
|
||||
return -1;
|
||||
}
|
||||
|
||||
SYS("ip -netns ns_dst link set veth2_1 master bond2");
|
||||
|
||||
if (bond_both_attach == BOND_BOTH_AND_ATTACH)
|
||||
SYS("ip -netns ns_dst link set veth2_2 master bond2");
|
||||
else
|
||||
SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
|
||||
|
||||
/* Load a dummy program on sending side as with veth peer needs to have a
|
||||
* XDP program loaded as well.
|
||||
*/
|
||||
if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
|
||||
return -1;
|
||||
|
||||
if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
|
||||
if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
|
||||
return -1;
|
||||
|
||||
if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
|
||||
return -1;
|
||||
|
||||
restore_root_netns();
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
#undef SYS
|
||||
}
|
||||
|
||||
static void bonding_cleanup(struct skeletons *skeletons)
|
||||
{
|
||||
restore_root_netns();
|
||||
while (skeletons->nlinks) {
|
||||
skeletons->nlinks--;
|
||||
bpf_link__destroy(skeletons->links[skeletons->nlinks]);
|
||||
}
|
||||
ASSERT_OK(system("ip link delete bond1"), "delete bond1");
|
||||
ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
|
||||
ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
|
||||
ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
|
||||
}
|
||||
|
||||
static int send_udp_packets(int vary_dst_ip)
|
||||
{
|
||||
struct ethhdr eh = {
|
||||
.h_source = BOND1_MAC,
|
||||
.h_dest = BOND2_MAC,
|
||||
.h_proto = htons(ETH_P_IP),
|
||||
};
|
||||
uint8_t buf[128] = {};
|
||||
struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh));
|
||||
struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph));
|
||||
int i, s = -1;
|
||||
int ifindex;
|
||||
|
||||
s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
|
||||
if (!ASSERT_GE(s, 0, "socket"))
|
||||
goto err;
|
||||
|
||||
ifindex = if_nametoindex("bond1");
|
||||
if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
|
||||
goto err;
|
||||
|
||||
memcpy(buf, &eh, sizeof(eh));
|
||||
iph->ihl = 5;
|
||||
iph->version = 4;
|
||||
iph->tos = 16;
|
||||
iph->id = 1;
|
||||
iph->ttl = 64;
|
||||
iph->protocol = IPPROTO_UDP;
|
||||
iph->saddr = 1;
|
||||
iph->daddr = 2;
|
||||
iph->tot_len = htons(sizeof(buf) - ETH_HLEN);
|
||||
iph->check = 0;
|
||||
|
||||
for (i = 1; i <= NPACKETS; i++) {
|
||||
int n;
|
||||
struct sockaddr_ll saddr_ll = {
|
||||
.sll_ifindex = ifindex,
|
||||
.sll_halen = ETH_ALEN,
|
||||
.sll_addr = BOND2_MAC,
|
||||
};
|
||||
|
||||
/* vary the UDP destination port for even distribution with roundrobin/xor modes */
|
||||
uh->dest++;
|
||||
|
||||
if (vary_dst_ip)
|
||||
iph->daddr++;
|
||||
|
||||
n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
|
||||
if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
if (s >= 0)
|
||||
close(s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
|
||||
{
|
||||
int bond1_rx;
|
||||
|
||||
if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
|
||||
goto out;
|
||||
|
||||
if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
|
||||
goto out;
|
||||
|
||||
bond1_rx = get_rx_packets("bond1");
|
||||
ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
|
||||
|
||||
switch (mode) {
|
||||
case BOND_MODE_ROUNDROBIN:
|
||||
case BOND_MODE_XOR: {
|
||||
int veth1_rx = get_rx_packets("veth1_1");
|
||||
int veth2_rx = get_rx_packets("veth1_2");
|
||||
int diff = abs(veth1_rx - veth2_rx);
|
||||
|
||||
ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
|
||||
|
||||
switch (xmit_policy) {
|
||||
case BOND_XMIT_POLICY_LAYER2:
|
||||
ASSERT_GE(diff, NPACKETS,
|
||||
"expected packets on only one of the interfaces");
|
||||
break;
|
||||
case BOND_XMIT_POLICY_LAYER23:
|
||||
case BOND_XMIT_POLICY_LAYER34:
|
||||
ASSERT_LT(diff, NPACKETS/2,
|
||||
"expected even distribution of packets");
|
||||
break;
|
||||
default:
|
||||
PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BOND_MODE_ACTIVEBACKUP: {
|
||||
int veth1_rx = get_rx_packets("veth1_1");
|
||||
int veth2_rx = get_rx_packets("veth1_2");
|
||||
int diff = abs(veth1_rx - veth2_rx);
|
||||
|
||||
ASSERT_GE(diff, NPACKETS,
|
||||
"expected packets on only one of the interfaces");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
bonding_cleanup(skeletons);
|
||||
}
|
||||
|
||||
/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
|
||||
* all the interfaces to it and checking that broadcasting won't send the packet
|
||||
* to neither the ingress bond device (bond2) or its slave (veth2_1).
|
||||
*/
|
||||
static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
|
||||
{
|
||||
static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
|
||||
int veth1_1_rx, veth1_2_rx;
|
||||
int err;
|
||||
|
||||
if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
|
||||
BOND_ONE_NO_ATTACH))
|
||||
goto out;
|
||||
|
||||
|
||||
if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
|
||||
goto out;
|
||||
|
||||
/* populate the devmap with the relevant interfaces */
|
||||
for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
|
||||
int ifindex = if_nametoindex(ifaces[i]);
|
||||
int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
|
||||
|
||||
if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
|
||||
goto out;
|
||||
|
||||
err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
|
||||
if (!ASSERT_OK(err, "add interface to map_all"))
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (xdp_attach(skeletons,
|
||||
skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
|
||||
"bond2"))
|
||||
goto out;
|
||||
|
||||
restore_root_netns();
|
||||
|
||||
if (send_udp_packets(BOND_MODE_ROUNDROBIN))
|
||||
goto out;
|
||||
|
||||
veth1_1_rx = get_rx_packets("veth1_1");
|
||||
veth1_2_rx = get_rx_packets("veth1_2");
|
||||
|
||||
ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
|
||||
ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
|
||||
|
||||
out:
|
||||
restore_root_netns();
|
||||
bonding_cleanup(skeletons);
|
||||
}
|
||||
|
||||
/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
|
||||
static void test_xdp_bonding_attach(struct skeletons *skeletons)
|
||||
{
|
||||
struct bpf_link *link = NULL;
|
||||
struct bpf_link *link2 = NULL;
|
||||
int veth, bond;
|
||||
int err;
|
||||
|
||||
if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
|
||||
goto out;
|
||||
if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
|
||||
goto out;
|
||||
|
||||
veth = if_nametoindex("veth");
|
||||
if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
|
||||
goto out;
|
||||
bond = if_nametoindex("bond");
|
||||
if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
|
||||
goto out;
|
||||
|
||||
/* enslaving with a XDP program loaded fails */
|
||||
link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
|
||||
if (!ASSERT_OK_PTR(link, "attach program to veth"))
|
||||
goto out;
|
||||
|
||||
err = system("ip link set veth master bond");
|
||||
if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail"))
|
||||
goto out;
|
||||
|
||||
bpf_link__destroy(link);
|
||||
link = NULL;
|
||||
|
||||
err = system("ip link set veth master bond");
|
||||
if (!ASSERT_OK(err, "set veth master"))
|
||||
goto out;
|
||||
|
||||
/* attaching to slave when master has no program is allowed */
|
||||
link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
|
||||
if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
|
||||
goto out;
|
||||
|
||||
/* attaching to master not allowed when slave has program loaded */
|
||||
link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
|
||||
if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
|
||||
goto out;
|
||||
|
||||
bpf_link__destroy(link);
|
||||
link = NULL;
|
||||
|
||||
/* attaching XDP program to master allowed when slave has no program */
|
||||
link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
|
||||
if (!ASSERT_OK_PTR(link, "attach program to master"))
|
||||
goto out;
|
||||
|
||||
/* attaching to slave not allowed when master has program loaded */
|
||||
link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
|
||||
ASSERT_ERR_PTR(link2, "attach program to slave when master has program");
|
||||
|
||||
out:
|
||||
bpf_link__destroy(link);
|
||||
bpf_link__destroy(link2);
|
||||
|
||||
system("ip link del veth");
|
||||
system("ip link del bond");
|
||||
}
|
||||
|
||||
static int libbpf_debug_print(enum libbpf_print_level level,
|
||||
const char *format, va_list args)
|
||||
{
|
||||
if (level != LIBBPF_WARN)
|
||||
vprintf(format, args);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct bond_test_case {
|
||||
char *name;
|
||||
int mode;
|
||||
int xmit_policy;
|
||||
};
|
||||
|
||||
static struct bond_test_case bond_test_cases[] = {
|
||||
{ "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
|
||||
{ "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
|
||||
|
||||
{ "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
|
||||
{ "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
|
||||
{ "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
|
||||
};
|
||||
|
||||
void test_xdp_bonding(void)
|
||||
{
|
||||
libbpf_print_fn_t old_print_fn;
|
||||
struct skeletons skeletons = {};
|
||||
int i;
|
||||
|
||||
old_print_fn = libbpf_set_print(libbpf_debug_print);
|
||||
|
||||
root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
|
||||
if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
|
||||
goto out;
|
||||
|
||||
skeletons.xdp_dummy = xdp_dummy__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
|
||||
goto out;
|
||||
|
||||
skeletons.xdp_tx = xdp_tx__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
|
||||
goto out;
|
||||
|
||||
skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
|
||||
"xdp_redirect_multi_kern__open_and_load"))
|
||||
goto out;
|
||||
|
||||
if (!test__start_subtest("xdp_bonding_attach"))
|
||||
test_xdp_bonding_attach(&skeletons);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
|
||||
struct bond_test_case *test_case = &bond_test_cases[i];
|
||||
|
||||
if (!test__start_subtest(test_case->name))
|
||||
test_xdp_bonding_with_mode(
|
||||
&skeletons,
|
||||
test_case->mode,
|
||||
test_case->xmit_policy);
|
||||
}
|
||||
|
||||
if (!test__start_subtest("xdp_bonding_redirect_multi"))
|
||||
test_xdp_bonding_redirect_multi(&skeletons);
|
||||
|
||||
out:
|
||||
xdp_dummy__destroy(skeletons.xdp_dummy);
|
||||
xdp_tx__destroy(skeletons.xdp_tx);
|
||||
xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
|
||||
|
||||
libbpf_set_print(old_print_fn);
|
||||
if (root_netns_fd >= 0)
|
||||
close(root_netns_fd);
|
||||
}
|
@ -121,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
|
||||
}
|
||||
|
||||
BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
|
||||
seq_num, src, srcp, destp, destp);
|
||||
seq_num, src, srcp, dest, destp);
|
||||
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
|
||||
state,
|
||||
tp->write_seq - tp->snd_una, rx_queue,
|
||||
|
@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("classifier/fail_use_after_free")
|
||||
SEC("classifier/err_use_after_free")
|
||||
int bpf_sk_lookup_uaf(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_sock_tuple tuple = {};
|
||||
@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
|
||||
return family;
|
||||
}
|
||||
|
||||
SEC("classifier/fail_modify_sk_pointer")
|
||||
SEC("classifier/err_modify_sk_pointer")
|
||||
int bpf_sk_lookup_modptr(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_sock_tuple tuple = {};
|
||||
@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("classifier/fail_modify_sk_or_null_pointer")
|
||||
SEC("classifier/err_modify_sk_or_null_pointer")
|
||||
int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_sock_tuple tuple = {};
|
||||
@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("classifier/fail_no_release")
|
||||
SEC("classifier/err_no_release")
|
||||
int bpf_sk_lookup_test2(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_sock_tuple tuple = {};
|
||||
@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("classifier/fail_release_twice")
|
||||
SEC("classifier/err_release_twice")
|
||||
int bpf_sk_lookup_test3(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_sock_tuple tuple = {};
|
||||
@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("classifier/fail_release_unchecked")
|
||||
SEC("classifier/err_release_unchecked")
|
||||
int bpf_sk_lookup_test4(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_sock_tuple tuple = {};
|
||||
@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb)
|
||||
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
|
||||
}
|
||||
|
||||
SEC("classifier/fail_no_release_subcall")
|
||||
SEC("classifier/err_no_release_subcall")
|
||||
int bpf_sk_lookup_test5(struct __sk_buff *skb)
|
||||
{
|
||||
lookup_no_release(skb);
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
SEC("tx")
|
||||
SEC("xdp")
|
||||
int xdp_tx(struct xdp_md *xdp)
|
||||
{
|
||||
return XDP_TX;
|
||||
|
@ -1,148 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
#include "cgroup_helpers.h"
|
||||
#include "bpf_rlimit.h"
|
||||
#include "netcnt_common.h"
|
||||
|
||||
#define BPF_PROG "./netcnt_prog.o"
|
||||
#define TEST_CGROUP "/test-network-counters/"
|
||||
|
||||
static int bpf_find_map(const char *test, struct bpf_object *obj,
|
||||
const char *name)
|
||||
{
|
||||
struct bpf_map *map;
|
||||
|
||||
map = bpf_object__find_map_by_name(obj, name);
|
||||
if (!map) {
|
||||
printf("%s:FAIL:map '%s' not found\n", test, name);
|
||||
return -1;
|
||||
}
|
||||
return bpf_map__fd(map);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
union percpu_net_cnt *percpu_netcnt;
|
||||
struct bpf_cgroup_storage_key key;
|
||||
int map_fd, percpu_map_fd;
|
||||
int error = EXIT_FAILURE;
|
||||
struct bpf_object *obj;
|
||||
int prog_fd, cgroup_fd;
|
||||
unsigned long packets;
|
||||
union net_cnt netcnt;
|
||||
unsigned long bytes;
|
||||
int cpu, nproc;
|
||||
__u32 prog_cnt;
|
||||
|
||||
nproc = get_nprocs_conf();
|
||||
percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
|
||||
if (!percpu_netcnt) {
|
||||
printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
|
||||
&obj, &prog_fd)) {
|
||||
printf("Failed to load bpf program\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
|
||||
if (cgroup_fd < 0)
|
||||
goto err;
|
||||
|
||||
/* Attach bpf program */
|
||||
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
|
||||
printf("Failed to attach bpf program");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (system("which ping6 &>/dev/null") == 0)
|
||||
assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
|
||||
else
|
||||
assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
|
||||
|
||||
if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
|
||||
&prog_cnt)) {
|
||||
printf("Failed to query attached programs");
|
||||
goto err;
|
||||
}
|
||||
|
||||
map_fd = bpf_find_map(__func__, obj, "netcnt");
|
||||
if (map_fd < 0) {
|
||||
printf("Failed to find bpf map with net counters");
|
||||
goto err;
|
||||
}
|
||||
|
||||
percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
|
||||
if (percpu_map_fd < 0) {
|
||||
printf("Failed to find bpf map with percpu net counters");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bpf_map_get_next_key(map_fd, NULL, &key)) {
|
||||
printf("Failed to get key in cgroup storage\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
|
||||
printf("Failed to lookup cgroup storage\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
|
||||
printf("Failed to lookup percpu cgroup storage\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Some packets can be still in per-cpu cache, but not more than
|
||||
* MAX_PERCPU_PACKETS.
|
||||
*/
|
||||
packets = netcnt.packets;
|
||||
bytes = netcnt.bytes;
|
||||
for (cpu = 0; cpu < nproc; cpu++) {
|
||||
if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
|
||||
printf("Unexpected percpu value: %llu\n",
|
||||
percpu_netcnt[cpu].packets);
|
||||
goto err;
|
||||
}
|
||||
|
||||
packets += percpu_netcnt[cpu].packets;
|
||||
bytes += percpu_netcnt[cpu].bytes;
|
||||
}
|
||||
|
||||
/* No packets should be lost */
|
||||
if (packets != 10000) {
|
||||
printf("Unexpected packet count: %lu\n", packets);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Let's check that bytes counter matches the number of packets
|
||||
* multiplied by the size of ipv6 ICMP packet.
|
||||
*/
|
||||
if (bytes != packets * 104) {
|
||||
printf("Unexpected bytes count: %lu\n", bytes);
|
||||
goto err;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
printf("test_netcnt:PASS\n");
|
||||
|
||||
err:
|
||||
cleanup_cgroup_environment();
|
||||
free(percpu_netcnt);
|
||||
|
||||
out:
|
||||
return error;
|
||||
}
|
@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
|
||||
ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
|
||||
|
||||
ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
|
||||
ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
|
||||
ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
|
||||
ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
|
||||
|
||||
trap cleanup EXIT
|
||||
|
Loading…
x
Reference in New Issue
Block a user