mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-12 08:00:09 +00:00
a9744f7ca2
There is a potential race in the TX completion code for the SKB case. One process enters the sendmsg code of an AF_XDP socket in order to send a frame. The execution eventually trickles down to the driver that is told to send the packet. However, it decides to drop the packet due to some error condition (e.g., rings full) and frees the SKB. This will trigger the SKB destructor and a completion will be sent to the AF_XDP user space through its single-producer/single-consumer queues. At the same time a TX interrupt has fired on another core and it dispatches the TX completion code in the driver. It does its HW specific things and ends up freeing the SKB associated with the transmitted packet. This will trigger the SKB destructor and a completion will be sent to the AF_XDP user space through its single-producer/single-consumer queues. With a pseudo call stack, it would look like this: Core 1: sendmsg() being called in the application netdev_start_xmit() Driver entered through ndo_start_xmit Driver decides to free the SKB for some reason (e.g., rings full) Destructor of SKB called xskq_produce_addr() is called to signal completion to user space Core 2: TX completion irq NAPI loop Driver irq handler for TX completions Frees the SKB Destructor of SKB called xskq_produce_addr() is called to signal completion to user space We now have a violation of the single-producer/single-consumer principle for our queues as there are two threads trying to produce at the same time on the same queue. Fixed by introducing a spin_lock in the destructor. In regards to the performance, I get around 1.74 Mpps for txonly before and after the introduction of the spinlock. There is of course some impact due to the spin lock but it is in the less significant digits that are too noisy for me to measure. But let us say that the version without the spin lock got 1.745 Mpps in the best case and the version with 1.735 Mpps in the worst case, then that would mean a maximum drop in performance of 0.5%. Fixes: 35fcde7f8deb ("xsk: support for Tx") Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
104 lines
2.4 KiB
C
104 lines
2.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/* AF_XDP internal functions
|
|
* Copyright(c) 2018 Intel Corporation.
|
|
*/
|
|
|
|
#ifndef _LINUX_XDP_SOCK_H
|
|
#define _LINUX_XDP_SOCK_H
|
|
|
|
#include <linux/workqueue.h>
|
|
#include <linux/if_xdp.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/mm.h>
|
|
#include <net/sock.h>
|
|
|
|
struct net_device;
|
|
struct xsk_queue;
|
|
|
|
struct xdp_umem_props {
|
|
u64 chunk_mask;
|
|
u64 size;
|
|
};
|
|
|
|
struct xdp_umem_page {
|
|
void *addr;
|
|
dma_addr_t dma;
|
|
};
|
|
|
|
struct xdp_umem {
|
|
struct xsk_queue *fq;
|
|
struct xsk_queue *cq;
|
|
struct xdp_umem_page *pages;
|
|
struct xdp_umem_props props;
|
|
u32 headroom;
|
|
u32 chunk_size_nohr;
|
|
struct user_struct *user;
|
|
struct pid *pid;
|
|
unsigned long address;
|
|
refcount_t users;
|
|
struct work_struct work;
|
|
struct page **pgs;
|
|
u32 npgs;
|
|
struct net_device *dev;
|
|
u16 queue_id;
|
|
bool zc;
|
|
spinlock_t xsk_list_lock;
|
|
struct list_head xsk_list;
|
|
};
|
|
|
|
struct xdp_sock {
|
|
/* struct sock must be the first member of struct xdp_sock */
|
|
struct sock sk;
|
|
struct xsk_queue *rx;
|
|
struct net_device *dev;
|
|
struct xdp_umem *umem;
|
|
struct list_head flush_node;
|
|
u16 queue_id;
|
|
struct xsk_queue *tx ____cacheline_aligned_in_smp;
|
|
struct list_head list;
|
|
bool zc;
|
|
/* Protects multiple processes in the control path */
|
|
struct mutex mutex;
|
|
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
|
|
* in the SKB destructor callback.
|
|
*/
|
|
spinlock_t tx_completion_lock;
|
|
u64 rx_dropped;
|
|
};
|
|
|
|
struct xdp_buff;
|
|
#ifdef CONFIG_XDP_SOCKETS
|
|
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
|
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
|
void xsk_flush(struct xdp_sock *xs);
|
|
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
|
|
/* Used from netdev driver */
|
|
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
|
|
void xsk_umem_discard_addr(struct xdp_umem *umem);
|
|
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
|
|
bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
|
|
void xsk_umem_consume_tx_done(struct xdp_umem *umem);
|
|
#else
|
|
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
|
{
|
|
return -ENOTSUPP;
|
|
}
|
|
|
|
static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
|
{
|
|
return -ENOTSUPP;
|
|
}
|
|
|
|
static inline void xsk_flush(struct xdp_sock *xs)
|
|
{
|
|
}
|
|
|
|
static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
|
|
{
|
|
return false;
|
|
}
|
|
#endif /* CONFIG_XDP_SOCKETS */
|
|
|
|
#endif /* _LINUX_XDP_SOCK_H */
|