linux-next/include/linux/virtio_net.h
Eric Dumazet 0f6925b3e8 virtio_net: Do not pull payload in skb->head
Xuan Zhuo reported that commit 3226b158e67c ("net: avoid 32 x truesize
under-estimation for tiny skbs") brought  a ~10% performance drop.

The reason for the performance drop was that GRO was forced
to chain sk_buff (using skb_shinfo(skb)->frag_list), which
uses more memory but also cause packet consumers to go over
a lot of overhead handling all the tiny skbs.

It turns out that virtio_net page_to_skb() has a wrong strategy :
It allocates skbs with GOOD_COPY_LEN (128) bytes in skb->head, then
copies 128 bytes from the page, before feeding the packet to GRO stack.

This was suboptimal before commit 3226b158e67c ("net: avoid 32 x truesize
under-estimation for tiny skbs") because GRO was using 2 frags per MSS,
meaning we were not packing MSS with 100% efficiency.

Fix is to pull only the ethernet header in page_to_skb()

Then, we change virtio_net_hdr_to_skb() to pull the missing
headers, instead of assuming they were already pulled by callers.

This fixes the performance regression, but could also allow virtio_net
to accept packets with more than 128bytes of headers.

Many thanks to Xuan Zhuo for his report, and his tests/help.

Fixes: 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs")
Reported-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://www.spinics.net/lists/netdev/msg731397.html
Co-Developed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: virtualization@lists.linux-foundation.org
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-04-06 16:17:57 -07:00

181 lines
4.7 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_VIRTIO_NET_H
#define _LINUX_VIRTIO_NET_H
#include <linux/if_vlan.h>
#include <uapi/linux/tcp.h>
#include <uapi/linux/udp.h>
#include <uapi/linux/virtio_net.h>
static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
const struct virtio_net_hdr *hdr)
{
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4:
case VIRTIO_NET_HDR_GSO_UDP:
skb->protocol = cpu_to_be16(ETH_P_IP);
break;
case VIRTIO_NET_HDR_GSO_TCPV6:
skb->protocol = cpu_to_be16(ETH_P_IPV6);
break;
default:
return -EINVAL;
}
return 0;
}
static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
const struct virtio_net_hdr *hdr,
bool little_endian)
{
unsigned int gso_type = 0;
unsigned int thlen = 0;
unsigned int p_off = 0;
unsigned int ip_proto;
if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4:
gso_type = SKB_GSO_TCPV4;
ip_proto = IPPROTO_TCP;
thlen = sizeof(struct tcphdr);
break;
case VIRTIO_NET_HDR_GSO_TCPV6:
gso_type = SKB_GSO_TCPV6;
ip_proto = IPPROTO_TCP;
thlen = sizeof(struct tcphdr);
break;
case VIRTIO_NET_HDR_GSO_UDP:
gso_type = SKB_GSO_UDP;
ip_proto = IPPROTO_UDP;
thlen = sizeof(struct udphdr);
break;
default:
return -EINVAL;
}
if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
gso_type |= SKB_GSO_TCP_ECN;
if (hdr->gso_size == 0)
return -EINVAL;
}
skb_reset_mac_header(skb);
if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
u32 start = __virtio16_to_cpu(little_endian, hdr->csum_start);
u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16));
if (!pskb_may_pull(skb, needed))
return -EINVAL;
if (!skb_partial_csum_set(skb, start, off))
return -EINVAL;
p_off = skb_transport_offset(skb) + thlen;
if (!pskb_may_pull(skb, p_off))
return -EINVAL;
} else {
/* gso packets without NEEDS_CSUM do not set transport_offset.
* probe and drop if does not match one of the above types.
*/
if (gso_type && skb->network_header) {
struct flow_keys_basic keys;
if (!skb->protocol) {
__be16 protocol = dev_parse_header_protocol(skb);
virtio_net_hdr_set_proto(skb, hdr);
if (protocol && protocol != skb->protocol)
return -EINVAL;
}
retry:
if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
NULL, 0, 0, 0,
0)) {
/* UFO does not specify ipv4 or 6: try both */
if (gso_type & SKB_GSO_UDP &&
skb->protocol == htons(ETH_P_IP)) {
skb->protocol = htons(ETH_P_IPV6);
goto retry;
}
return -EINVAL;
}
p_off = keys.control.thoff + thlen;
if (!pskb_may_pull(skb, p_off) ||
keys.basic.ip_proto != ip_proto)
return -EINVAL;
skb_set_transport_header(skb, keys.control.thoff);
} else if (gso_type) {
p_off = thlen;
if (!pskb_may_pull(skb, p_off))
return -EINVAL;
}
}
if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
struct skb_shared_info *shinfo = skb_shinfo(skb);
/* Too small packets are not really GSO ones. */
if (skb->len - p_off > gso_size) {
shinfo->gso_size = gso_size;
shinfo->gso_type = gso_type;
/* Header must be checked, and gso_segs computed. */
shinfo->gso_type |= SKB_GSO_DODGY;
shinfo->gso_segs = 0;
}
}
return 0;
}
static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
struct virtio_net_hdr *hdr,
bool little_endian,
bool has_data_valid,
int vlan_hlen)
{
memset(hdr, 0, sizeof(*hdr)); /* no info leak */
if (skb_is_gso(skb)) {
struct skb_shared_info *sinfo = skb_shinfo(skb);
/* This is a hint as to how much should be linear. */
hdr->hdr_len = __cpu_to_virtio16(little_endian,
skb_headlen(skb));
hdr->gso_size = __cpu_to_virtio16(little_endian,
sinfo->gso_size);
if (sinfo->gso_type & SKB_GSO_TCPV4)
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
else if (sinfo->gso_type & SKB_GSO_TCPV6)
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
else
return -EINVAL;
if (sinfo->gso_type & SKB_GSO_TCP_ECN)
hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
} else
hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
if (skb->ip_summed == CHECKSUM_PARTIAL) {
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
hdr->csum_start = __cpu_to_virtio16(little_endian,
skb_checksum_start_offset(skb) + vlan_hlen);
hdr->csum_offset = __cpu_to_virtio16(little_endian,
skb->csum_offset);
} else if (has_data_valid &&
skb->ip_summed == CHECKSUM_UNNECESSARY) {
hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
} /* else everything is zero */
return 0;
}
#endif /* _LINUX_VIRTIO_NET_H */