mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-07 14:32:23 +00:00
0a78cf7264
Dae R. Jeong reported a NULL deref in raw_get_next() [0]. It seems that the repro was running these sequences in parallel so that one thread was iterating on a socket that was being freed in another netns. unshare(0x40060200) r0 = syz_open_procfs(0x0, &(0x7f0000002080)='net/raw\x00') socket$inet_icmp_raw(0x2, 0x3, 0x1) pread64(r0, &(0x7f0000000000)=""/10, 0xa, 0x10000000007f) After commit0daf07e527
("raw: convert raw sockets to RCU"), we use RCU and hlist_nulls_for_each_entry() to iterate over SOCK_RAW sockets. However, we should use spinlock for slow paths to avoid the NULL deref. Also, SOCK_RAW does not use SLAB_TYPESAFE_BY_RCU, and the slab object is not reused during iteration in the grace period. In fact, the lockless readers do not check the nulls marker with get_nulls_value(). So, SOCK_RAW should use hlist instead of hlist_nulls. Instead of adding an unnecessary barrier by sk_nulls_for_each_rcu(), let's convert hlist_nulls to hlist and use sk_for_each_rcu() for fast paths and sk_for_each() and spinlock for /proc/net/raw. [0]: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] CPU: 2 PID: 20952 Comm: syz-executor.0 Not tainted 6.2.0-g048ec869bafd-dirty #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline] RIP: 0010:sock_net include/net/sock.h:649 [inline] RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline] RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline] RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995 Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206 RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000 RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338 RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9 R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78 R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030 FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055bb9614b35f CR3: 000000003c672000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> seq_read_iter+0x4c6/0x10f0 fs/seq_file.c:225 seq_read+0x224/0x320 fs/seq_file.c:162 pde_read fs/proc/inode.c:316 [inline] proc_reg_read+0x23f/0x330 fs/proc/inode.c:328 vfs_read+0x31e/0xd30 fs/read_write.c:468 ksys_pread64 fs/read_write.c:665 [inline] __do_sys_pread64 fs/read_write.c:675 [inline] __se_sys_pread64 fs/read_write.c:672 [inline] __x64_sys_pread64+0x1e9/0x280 fs/read_write.c:672 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x4e/0xa0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x478d29 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f843ae8dbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000011 RAX: ffffffffffffffda RBX: 0000000000791408 RCX: 0000000000478d29 RDX: 000000000000000a RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000f477909a R08: 0000000000000000 R09: 0000000000000000 R10: 000010000000007f R11: 0000000000000246 R12: 0000000000791740 R13: 0000000000791414 R14: 0000000000791408 R15: 00007ffc2eb48a50 </TASK> Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline] RIP: 0010:sock_net include/net/sock.h:649 [inline] RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline] RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline] RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995 Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206 RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000 RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338 RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9 R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78 R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030 FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f92ff166000 CR3: 000000003c672000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes:0daf07e527
("raw: convert raw sockets to RCU") Reported-by: syzbot <syzkaller@googlegroups.com> Reported-by: Dae R. Jeong <threeearcat@gmail.com> Link: https://lore.kernel.org/netdev/ZCA2mGV_cmq7lIfV@dragonet/ Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
103 lines
2.5 KiB
C
103 lines
2.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
* operating system. INET is implemented using the BSD Socket
|
|
* interface as the means of communication with the user level.
|
|
*
|
|
* Definitions for the RAW-IP module.
|
|
*
|
|
* Version: @(#)raw.h 1.0.2 05/07/93
|
|
*
|
|
* Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
|
|
*/
|
|
#ifndef _RAW_H
|
|
#define _RAW_H
|
|
|
|
#include <net/inet_sock.h>
|
|
#include <net/protocol.h>
|
|
#include <net/netns/hash.h>
|
|
#include <linux/hash.h>
|
|
#include <linux/icmp.h>
|
|
|
|
extern struct proto raw_prot;
|
|
|
|
extern struct raw_hashinfo raw_v4_hashinfo;
|
|
bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num,
|
|
__be32 raddr, __be32 laddr, int dif, int sdif);
|
|
|
|
int raw_abort(struct sock *sk, int err);
|
|
void raw_icmp_error(struct sk_buff *, int, u32);
|
|
int raw_local_deliver(struct sk_buff *, int);
|
|
|
|
int raw_rcv(struct sock *, struct sk_buff *);
|
|
|
|
#define RAW_HTABLE_LOG 8
|
|
#define RAW_HTABLE_SIZE (1U << RAW_HTABLE_LOG)
|
|
|
|
struct raw_hashinfo {
|
|
spinlock_t lock;
|
|
|
|
struct hlist_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned;
|
|
};
|
|
|
|
static inline u32 raw_hashfunc(const struct net *net, u32 proto)
|
|
{
|
|
return hash_32(net_hash_mix(net) ^ proto, RAW_HTABLE_LOG);
|
|
}
|
|
|
|
static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo)
|
|
{
|
|
int i;
|
|
|
|
spin_lock_init(&hashinfo->lock);
|
|
for (i = 0; i < RAW_HTABLE_SIZE; i++)
|
|
INIT_HLIST_HEAD(&hashinfo->ht[i]);
|
|
}
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
int raw_proc_init(void);
|
|
void raw_proc_exit(void);
|
|
|
|
struct raw_iter_state {
|
|
struct seq_net_private p;
|
|
int bucket;
|
|
};
|
|
|
|
static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq)
|
|
{
|
|
return seq->private;
|
|
}
|
|
void *raw_seq_start(struct seq_file *seq, loff_t *pos);
|
|
void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos);
|
|
void raw_seq_stop(struct seq_file *seq, void *v);
|
|
#endif
|
|
|
|
int raw_hash_sk(struct sock *sk);
|
|
void raw_unhash_sk(struct sock *sk);
|
|
void raw_init(void);
|
|
|
|
struct raw_sock {
|
|
/* inet_sock has to be the first member */
|
|
struct inet_sock inet;
|
|
struct icmp_filter filter;
|
|
u32 ipmr_table;
|
|
};
|
|
|
|
static inline struct raw_sock *raw_sk(const struct sock *sk)
|
|
{
|
|
return (struct raw_sock *)sk;
|
|
}
|
|
|
|
static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if,
|
|
int dif, int sdif)
|
|
{
|
|
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
|
|
return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept),
|
|
bound_dev_if, dif, sdif);
|
|
#else
|
|
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
|
|
#endif
|
|
}
|
|
|
|
#endif /* _RAW_H */
|