mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
net: implement lockless setsockopt(SO_PEEK_OFF)
syzbot reported a lockdep violation [1] involving af_unix
support of SO_PEEK_OFF.
Since SO_PEEK_OFF is inherently not thread safe (it uses a per-socket
sk_peek_off field), there is really no point to enforce a pointless
thread safety in the kernel.
After this patch :
- setsockopt(SO_PEEK_OFF) no longer acquires the socket lock.
- skb_consume_udp() no longer has to acquire the socket lock.
- af_unix no longer needs a special version of sk_set_peek_off(),
because it does not lock u->iolock anymore.
As a followup, we could replace prot->set_peek_off to be a boolean
and avoid an indirect call, since we always use sk_set_peek_off().
[1]
WARNING: possible circular locking dependency detected
6.8.0-rc4-syzkaller-00267-g0f1dd5e91e2b #0 Not tainted
syz-executor.2/30025 is trying to acquire lock:
ffff8880765e7d80 (&u->iolock){+.+.}-{3:3}, at: unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789
but task is already holding lock:
ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1691 [inline]
ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sockopt_lock_sock net/core/sock.c:1060 [inline]
ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sk_setsockopt+0xe52/0x3360 net/core/sock.c:1193
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (sk_lock-AF_UNIX){+.+.}-{0:0}:
lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754
lock_sock_nested+0x48/0x100 net/core/sock.c:3524
lock_sock include/net/sock.h:1691 [inline]
__unix_dgram_recvmsg+0x1275/0x12c0 net/unix/af_unix.c:2415
sock_recvmsg_nosec+0x18e/0x1d0 net/socket.c:1046
____sys_recvmsg+0x3c0/0x470 net/socket.c:2801
___sys_recvmsg net/socket.c:2845 [inline]
do_recvmmsg+0x474/0xae0 net/socket.c:2939
__sys_recvmmsg net/socket.c:3018 [inline]
__do_sys_recvmmsg net/socket.c:3041 [inline]
__se_sys_recvmmsg net/socket.c:3034 [inline]
__x64_sys_recvmmsg+0x199/0x250 net/socket.c:3034
do_syscall_64+0xf9/0x240
entry_SYSCALL_64_after_hwframe+0x6f/0x77
-> #0 (&u->iolock){+.+.}-{3:3}:
check_prev_add kernel/locking/lockdep.c:3134 [inline]
check_prevs_add kernel/locking/lockdep.c:3253 [inline]
validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869
__lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137
lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754
__mutex_lock_common kernel/locking/mutex.c:608 [inline]
__mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752
unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789
sk_setsockopt+0x207e/0x3360
do_sock_setsockopt+0x2fb/0x720 net/socket.c:2307
__sys_setsockopt+0x1ad/0x250 net/socket.c:2334
__do_sys_setsockopt net/socket.c:2343 [inline]
__se_sys_setsockopt net/socket.c:2340 [inline]
__x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340
do_syscall_64+0xf9/0x240
entry_SYSCALL_64_after_hwframe+0x6f/0x77
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(sk_lock-AF_UNIX);
lock(&u->iolock);
lock(sk_lock-AF_UNIX);
lock(&u->iolock);
*** DEADLOCK ***
1 lock held by syz-executor.2/30025:
#0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1691 [inline]
#0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sockopt_lock_sock net/core/sock.c:1060 [inline]
#0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sk_setsockopt+0xe52/0x3360 net/core/sock.c:1193
stack backtrace:
CPU: 0 PID: 30025 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00267-g0f1dd5e91e2b #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106
check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187
check_prev_add kernel/locking/lockdep.c:3134 [inline]
check_prevs_add kernel/locking/lockdep.c:3253 [inline]
validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869
__lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137
lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754
__mutex_lock_common kernel/locking/mutex.c:608 [inline]
__mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752
unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789
sk_setsockopt+0x207e/0x3360
do_sock_setsockopt+0x2fb/0x720 net/socket.c:2307
__sys_setsockopt+0x1ad/0x250 net/socket.c:2334
__do_sys_setsockopt net/socket.c:2343 [inline]
__se_sys_setsockopt net/socket.c:2340 [inline]
__x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340
do_syscall_64+0xf9/0x240
entry_SYSCALL_64_after_hwframe+0x6f/0x77
RIP: 0033:0x7f78a1c7dda9
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f78a0fde0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036
RAX: ffffffffffffffda RBX: 00007f78a1dac050 RCX: 00007f78a1c7dda9
RDX: 000000000000002a RSI: 0000000000000001 RDI: 0000000000000006
RBP: 00007f78a1cca47a R08: 0000000000000004 R09: 0000000000000000
R10: 0000000020000180 R11: 0000000000000246 R12: 0000000000000000
R13: 000000000000006e R14: 00007f78a1dac050 R15: 00007ffe5cd81ae8
Fixes: 859051dd16
("bpf: Implement cgroup sockaddr hooks for unix sockets")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Cc: Daan De Meyer <daan.j.demeyer@gmail.com>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>
Cc: Martin KaFai Lau <martin.lau@kernel.org>
Cc: David Ahern <dsahern@kernel.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
3b1ae9b71c
commit
56667da739
@ -1188,6 +1188,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
*/
|
*/
|
||||||
WRITE_ONCE(sk->sk_txrehash, (u8)val);
|
WRITE_ONCE(sk->sk_txrehash, (u8)val);
|
||||||
return 0;
|
return 0;
|
||||||
|
case SO_PEEK_OFF:
|
||||||
|
{
|
||||||
|
int (*set_peek_off)(struct sock *sk, int val);
|
||||||
|
|
||||||
|
set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
|
||||||
|
if (set_peek_off)
|
||||||
|
ret = set_peek_off(sk, val);
|
||||||
|
else
|
||||||
|
ret = -EOPNOTSUPP;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sockopt_lock_sock(sk);
|
sockopt_lock_sock(sk);
|
||||||
@ -1430,18 +1441,6 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
|
sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SO_PEEK_OFF:
|
|
||||||
{
|
|
||||||
int (*set_peek_off)(struct sock *sk, int val);
|
|
||||||
|
|
||||||
set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
|
|
||||||
if (set_peek_off)
|
|
||||||
ret = set_peek_off(sk, val);
|
|
||||||
else
|
|
||||||
ret = -EOPNOTSUPP;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case SO_NOFCS:
|
case SO_NOFCS:
|
||||||
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
|
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
|
||||||
break;
|
break;
|
||||||
|
@ -1589,12 +1589,7 @@ int udp_init_sock(struct sock *sk)
|
|||||||
|
|
||||||
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
|
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
|
||||||
{
|
{
|
||||||
if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
|
sk_peek_offset_bwd(sk, len);
|
||||||
bool slow = lock_sock_fast(sk);
|
|
||||||
|
|
||||||
sk_peek_offset_bwd(sk, len);
|
|
||||||
unlock_sock_fast(sk, slow);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!skb_unref(skb))
|
if (!skb_unref(skb))
|
||||||
return;
|
return;
|
||||||
|
@ -782,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
|
|||||||
static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
|
static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
|
||||||
int);
|
int);
|
||||||
|
|
||||||
static int unix_set_peek_off(struct sock *sk, int val)
|
|
||||||
{
|
|
||||||
struct unix_sock *u = unix_sk(sk);
|
|
||||||
|
|
||||||
if (mutex_lock_interruptible(&u->iolock))
|
|
||||||
return -EINTR;
|
|
||||||
|
|
||||||
WRITE_ONCE(sk->sk_peek_off, val);
|
|
||||||
mutex_unlock(&u->iolock);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_PROC_FS
|
#ifdef CONFIG_PROC_FS
|
||||||
static int unix_count_nr_fds(struct sock *sk)
|
static int unix_count_nr_fds(struct sock *sk)
|
||||||
{
|
{
|
||||||
@ -862,7 +849,7 @@ static const struct proto_ops unix_stream_ops = {
|
|||||||
.read_skb = unix_stream_read_skb,
|
.read_skb = unix_stream_read_skb,
|
||||||
.mmap = sock_no_mmap,
|
.mmap = sock_no_mmap,
|
||||||
.splice_read = unix_stream_splice_read,
|
.splice_read = unix_stream_splice_read,
|
||||||
.set_peek_off = unix_set_peek_off,
|
.set_peek_off = sk_set_peek_off,
|
||||||
.show_fdinfo = unix_show_fdinfo,
|
.show_fdinfo = unix_show_fdinfo,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -886,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = {
|
|||||||
.read_skb = unix_read_skb,
|
.read_skb = unix_read_skb,
|
||||||
.recvmsg = unix_dgram_recvmsg,
|
.recvmsg = unix_dgram_recvmsg,
|
||||||
.mmap = sock_no_mmap,
|
.mmap = sock_no_mmap,
|
||||||
.set_peek_off = unix_set_peek_off,
|
.set_peek_off = sk_set_peek_off,
|
||||||
.show_fdinfo = unix_show_fdinfo,
|
.show_fdinfo = unix_show_fdinfo,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -909,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = {
|
|||||||
.sendmsg = unix_seqpacket_sendmsg,
|
.sendmsg = unix_seqpacket_sendmsg,
|
||||||
.recvmsg = unix_seqpacket_recvmsg,
|
.recvmsg = unix_seqpacket_recvmsg,
|
||||||
.mmap = sock_no_mmap,
|
.mmap = sock_no_mmap,
|
||||||
.set_peek_off = unix_set_peek_off,
|
.set_peek_off = sk_set_peek_off,
|
||||||
.show_fdinfo = unix_show_fdinfo,
|
.show_fdinfo = unix_show_fdinfo,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user