Merge branch 'xdp: recycle Page Pool backed skbs built from XDP frames'

Alexander Lobakin says:

====================

Yeah, I still remember that "Who needs cpumap nowadays" (c), but anyway.

__xdp_build_skb_from_frame() missed the moment when the networking stack
became able to recycle skb pages backed by a page_pool. This was making
e.g. cpumap redirect even less effective than simple %XDP_PASS. veth was
also affected in some scenarios.
A lot of drivers use skb_mark_for_recycle() already, it's been almost
two years and seems like there are no issues in using it in the generic
code too. {__,}xdp_release_frame() can be then removed as it losts its
last user.
Page Pool becomes then zero-alloc (or almost) in the abovementioned
cases, too. Other memory type models (who needs them at this point)
have no changes.

Some numbers on 1 Xeon Platinum core bombed with 27 Mpps of 64-byte
IPv6 UDP, iavf w/XDP[0] (CONFIG_PAGE_POOL_STATS is enabled):

Plain %XDP_PASS on baseline, Page Pool driver:

src cpu Rx     drops  dst cpu Rx
  2.1 Mpps       N/A    2.1 Mpps

cpumap redirect (cross-core, w/o leaving its NUMA node) on baseline:

  6.8 Mpps  5.0 Mpps    1.8 Mpps

cpumap redirect with skb PP recycling:

  7.9 Mpps  5.7 Mpps    2.2 Mpps
                       +22% (from cpumap redir on baseline)

[0] https://github.com/alobakin/linux/commits/iavf-xdp
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2023-03-14 15:20:05 -07:00
commit 5584d9e63e
4 changed files with 30 additions and 58 deletions

View File

@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
#endif
}
#ifdef CONFIG_PAGE_POOL
static inline void skb_mark_for_recycle(struct sk_buff *skb)
{
#ifdef CONFIG_PAGE_POOL
skb->pp_recycle = 1;
}
#endif
}
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */

View File

@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq);
/* When sending xdp_frame into the network stack, then there is no
* return point callback, which is needed to release e.g. DMA-mapping
* resources with page_pool. Thus, have explicit function to release
* frame resources.
*/
void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
static inline void xdp_release_frame(struct xdp_frame *xdpf)
{
struct xdp_mem_info *mem = &xdpf->mem;
struct skb_shared_info *sinfo;
int i;
/* Curr only page_pool needs this */
if (mem->type != MEM_TYPE_PAGE_POOL)
return;
if (likely(!xdp_frame_has_frags(xdpf)))
goto out;
sinfo = xdp_get_shared_info_from_frame(xdpf);
for (i = 0; i < sinfo->nr_frags; i++) {
struct page *page = skb_frag_page(&sinfo->frags[i]);
__xdp_release_frame(page_address(page), mem);
}
out:
__xdp_release_frame(xdpf->data, mem);
}
static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
{
struct skb_shared_info *sinfo;

View File

@ -531,21 +531,6 @@ out:
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
{
struct xdp_mem_allocator *xa;
struct page *page;
rcu_read_lock();
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data);
if (xa)
page_pool_release_page(xa->page_pool, page);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(__xdp_release_frame);
void xdp_attachment_setup(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{
@ -658,8 +643,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
* - RX ring dev queue index (skb_record_rx_queue)
*/
/* Until page_pool get SKB return path, release DMA here */
xdp_release_frame(xdpf);
if (xdpf->mem.type == MEM_TYPE_PAGE_POOL)
skb_mark_for_recycle(skb);
/* Allow SKB to reuse area used by xdp_frame */
xdp_scrub_frame(xdpf);

View File

@ -4,6 +4,19 @@
#define ETH_ALEN 6
#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
/**
* enum frame_mark - magics to distinguish page/packet paths
* @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC.
* @MARK_IN: frame is being processed by the input XDP prog.
* @MARK_SKB: frame did hit the TC ingress hook as an skb.
*/
enum frame_mark {
MARK_XMIT = 0U,
MARK_IN = 0x42,
MARK_SKB = 0x45,
};
const volatile int ifindex_out;
const volatile int ifindex_in;
const volatile __u8 expect_dst[ETH_ALEN];
@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp)
if (*metadata != 0x42)
return XDP_ABORTED;
if (*payload == 0) {
*payload = 0x42;
if (*payload == MARK_XMIT)
pkts_seen_zero++;
}
*payload = MARK_IN;
if (bpf_xdp_adjust_meta(xdp, 4))
return XDP_ABORTED;
@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp)
return ret;
}
static bool check_pkt(void *data, void *data_end)
static bool check_pkt(void *data, void *data_end, const __u32 mark)
{
struct ipv6hdr *iph = data + sizeof(struct ethhdr);
__u8 *payload = data + HDR_SZ;
@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end)
if (payload + 1 > data_end)
return false;
if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN)
return false;
/* reset the payload so the same packet doesn't get counted twice when
* it cycles back through the kernel path and out the dst veth
*/
*payload = 0;
*payload = mark;
return true;
}
@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp)
void *data = (void *)(long)xdp->data;
void *data_end = (void *)(long)xdp->data_end;
if (check_pkt(data, data_end))
if (check_pkt(data, data_end, MARK_XMIT))
pkts_seen_xdp++;
/* Return XDP_DROP to make sure the data page is recycled, like when it
* exits a physical NIC. Recycled pages will be counted in the
/* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like
* it exited a physical NIC. Those pages will be counted in the
* pkts_seen_zero counter above.
*/
return XDP_DROP;
@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb)
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
if (check_pkt(data, data_end))
if (check_pkt(data, data_end, MARK_SKB))
pkts_seen_tc++;
/* Will be either recycled or freed, %MARK_SKB makes sure it won't
* hit any of the counters above.
*/
return 0;
}