linux-next/include/net/busy_poll.h
Eric Dumazet 0dbffbb533 net: annotate data race around sk_ll_usec
sk_ll_usec is read locklessly from sk_can_busy_loop()
while another thread can change its value in sock_setsockopt()

This is correct but needs annotations.

BUG: KCSAN: data-race in __skb_try_recv_datagram / sock_setsockopt

write to 0xffff88814eb5f904 of 4 bytes by task 14011 on cpu 0:
 sock_setsockopt+0x1287/0x2090 net/core/sock.c:1175
 __sys_setsockopt+0x14f/0x200 net/socket.c:2100
 __do_sys_setsockopt net/socket.c:2115 [inline]
 __se_sys_setsockopt net/socket.c:2112 [inline]
 __x64_sys_setsockopt+0x62/0x70 net/socket.c:2112
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff88814eb5f904 of 4 bytes by task 14001 on cpu 1:
 sk_can_busy_loop include/net/busy_poll.h:41 [inline]
 __skb_try_recv_datagram+0x14f/0x320 net/core/datagram.c:273
 unix_dgram_recvmsg+0x14c/0x870 net/unix/af_unix.c:2101
 unix_seqpacket_recvmsg+0x5a/0x70 net/unix/af_unix.c:2067
 ____sys_recvmsg+0x15d/0x310 include/linux/uio.h:244
 ___sys_recvmsg net/socket.c:2598 [inline]
 do_recvmmsg+0x35c/0x9f0 net/socket.c:2692
 __sys_recvmmsg net/socket.c:2771 [inline]
 __do_sys_recvmmsg net/socket.c:2794 [inline]
 __se_sys_recvmmsg net/socket.c:2787 [inline]
 __x64_sys_recvmmsg+0xcf/0x150 net/socket.c:2787
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x00000000 -> 0x00000101

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 14001 Comm: syz-executor.3 Not tainted 5.13.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-01 11:23:50 -07:00

164 lines
3.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* net busy poll support
* Copyright(c) 2013 Intel Corporation.
*
* Author: Eliezer Tamir
*
* Contact Information:
* e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
*/
#ifndef _LINUX_NET_BUSY_POLL_H
#define _LINUX_NET_BUSY_POLL_H
#include <linux/netdevice.h>
#include <linux/sched/clock.h>
#include <linux/sched/signal.h>
#include <net/ip.h>
/* 0 - Reserved to indicate value not set
* 1..NR_CPUS - Reserved for sender_cpu
* NR_CPUS+1..~0 - Region available for NAPI IDs
*/
#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
#define BUSY_POLL_BUDGET 8
#ifdef CONFIG_NET_RX_BUSY_POLL
struct napi_struct;
extern unsigned int sysctl_net_busy_read __read_mostly;
extern unsigned int sysctl_net_busy_poll __read_mostly;
static inline bool net_busy_loop_on(void)
{
return sysctl_net_busy_poll;
}
static inline bool sk_can_busy_loop(const struct sock *sk)
{
return READ_ONCE(sk->sk_ll_usec) && !signal_pending(current);
}
bool sk_busy_loop_end(void *p, unsigned long start_time);
void napi_busy_loop(unsigned int napi_id,
bool (*loop_end)(void *, unsigned long),
void *loop_end_arg, bool prefer_busy_poll, u16 budget);
#else /* CONFIG_NET_RX_BUSY_POLL */
static inline unsigned long net_busy_loop_on(void)
{
return 0;
}
static inline bool sk_can_busy_loop(struct sock *sk)
{
return false;
}
#endif /* CONFIG_NET_RX_BUSY_POLL */
static inline unsigned long busy_loop_current_time(void)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
return (unsigned long)(local_clock() >> 10);
#else
return 0;
#endif
}
/* in poll/select we use the global sysctl_net_ll_poll value */
static inline bool busy_loop_timeout(unsigned long start_time)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned long bp_usec = READ_ONCE(sysctl_net_busy_poll);
if (bp_usec) {
unsigned long end_time = start_time + bp_usec;
unsigned long now = busy_loop_current_time();
return time_after(now, end_time);
}
#endif
return true;
}
static inline bool sk_busy_loop_timeout(struct sock *sk,
unsigned long start_time)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned long bp_usec = READ_ONCE(sk->sk_ll_usec);
if (bp_usec) {
unsigned long end_time = start_time + bp_usec;
unsigned long now = busy_loop_current_time();
return time_after(now, end_time);
}
#endif
return true;
}
static inline void sk_busy_loop(struct sock *sk, int nonblock)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
if (napi_id >= MIN_NAPI_ID)
napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
READ_ONCE(sk->sk_prefer_busy_poll),
READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
#endif
}
/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_napi_id(struct sk_buff *skb,
struct napi_struct *napi)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
/* If the skb was already marked with a valid NAPI ID, avoid overwriting
* it.
*/
if (skb->napi_id < MIN_NAPI_ID)
skb->napi_id = napi->napi_id;
#endif
}
/* used in the protocol hanlder to propagate the napi_id to the socket */
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
sk_rx_queue_set(sk, skb);
}
static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
if (!READ_ONCE(sk->sk_napi_id))
WRITE_ONCE(sk->sk_napi_id, napi_id);
#endif
}
/* variant used for unconnected sockets */
static inline void sk_mark_napi_id_once(struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
__sk_mark_napi_id_once(sk, skb->napi_id);
#endif
}
static inline void sk_mark_napi_id_once_xdp(struct sock *sk,
const struct xdp_buff *xdp)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
__sk_mark_napi_id_once(sk, xdp->rxq->napi_id);
#endif
}
#endif /* _LINUX_NET_BUSY_POLL_H */