2019-06-04 10:11:33 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2020-07-22 17:32:06 +01:00
|
|
|
/* L2TP core.
|
2010-04-02 06:18:33 +00:00
|
|
|
*
|
|
|
|
* Copyright (c) 2008,2009,2010 Katalix Systems Ltd
|
|
|
|
*
|
|
|
|
* This file contains some code of the original L2TPv2 pppol2tp
|
|
|
|
* driver, which has the following copyright:
|
|
|
|
*
|
|
|
|
* Authors: Martijn van Oosterhout <kleptog@svana.org>
|
|
|
|
* James Chapman (jchapman@katalix.com)
|
|
|
|
* Contributors:
|
|
|
|
* Michal Ostrowski <mostrows@speakeasy.net>
|
|
|
|
* Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
|
|
|
|
* David S. Miller (davem@redhat.com)
|
|
|
|
*/
|
|
|
|
|
2012-05-16 09:55:56 +00:00
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
|
2010-04-02 06:18:33 +00:00
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/list.h>
|
2010-04-02 06:19:16 +00:00
|
|
|
#include <linux/rculist.h>
|
2010-04-02 06:18:33 +00:00
|
|
|
#include <linux/uaccess.h>
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/kthread.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/jiffies.h>
|
|
|
|
|
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/net.h>
|
|
|
|
#include <linux/inetdevice.h>
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/init.h>
|
2010-04-02 06:19:00 +00:00
|
|
|
#include <linux/in.h>
|
2010-04-02 06:18:33 +00:00
|
|
|
#include <linux/ip.h>
|
|
|
|
#include <linux/udp.h>
|
2010-04-02 06:19:00 +00:00
|
|
|
#include <linux/l2tp.h>
|
2010-04-02 06:18:33 +00:00
|
|
|
#include <linux/sort.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/nsproxy.h>
|
|
|
|
#include <net/net_namespace.h>
|
|
|
|
#include <net/netns/generic.h>
|
|
|
|
#include <net/dst.h>
|
|
|
|
#include <net/ip.h>
|
|
|
|
#include <net/udp.h>
|
2014-07-13 19:49:48 -07:00
|
|
|
#include <net/udp_tunnel.h>
|
2010-04-02 06:19:10 +00:00
|
|
|
#include <net/inet_common.h>
|
2010-04-02 06:18:33 +00:00
|
|
|
#include <net/xfrm.h>
|
2010-04-02 06:19:00 +00:00
|
|
|
#include <net/protocol.h>
|
2012-04-27 08:24:18 +00:00
|
|
|
#include <net/inet6_connection_sock.h>
|
|
|
|
#include <net/inet_ecn.h>
|
|
|
|
#include <net/ip6_route.h>
|
2012-04-30 13:21:28 -04:00
|
|
|
#include <net/ip6_checksum.h>
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
#include <asm/byteorder.h>
|
2011-07-26 16:09:06 -07:00
|
|
|
#include <linux/atomic.h>
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
#include "l2tp_core.h"
|
|
|
|
|
2020-08-22 15:59:04 +01:00
|
|
|
#define CREATE_TRACE_POINTS
|
|
|
|
#include "trace.h"
|
|
|
|
|
2010-04-02 06:18:33 +00:00
|
|
|
#define L2TP_DRV_VERSION "V2.0"
|
|
|
|
|
|
|
|
/* L2TP header constants */
|
|
|
|
#define L2TP_HDRFLAG_T 0x8000
|
|
|
|
#define L2TP_HDRFLAG_L 0x4000
|
|
|
|
#define L2TP_HDRFLAG_S 0x0800
|
|
|
|
#define L2TP_HDRFLAG_O 0x0200
|
|
|
|
#define L2TP_HDRFLAG_P 0x0100
|
|
|
|
|
|
|
|
#define L2TP_HDR_VER_MASK 0x000F
|
|
|
|
#define L2TP_HDR_VER_2 0x0002
|
2010-04-02 06:18:49 +00:00
|
|
|
#define L2TP_HDR_VER_3 0x0003
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
/* L2TPv3 default L2-specific sublayer */
|
|
|
|
#define L2TP_SLFLAG_S 0x40000000
|
|
|
|
#define L2TP_SL_SEQ_MASK 0x00ffffff
|
|
|
|
|
2019-01-31 15:18:56 +08:00
|
|
|
#define L2TP_HDR_SIZE_MAX 14
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
/* Default trace flags */
|
|
|
|
#define L2TP_DEFAULT_DEBUG_FLAGS 0
|
|
|
|
|
l2tp: fix lockdep splat
When l2tp tunnels use a socket provided by userspace, we can hit
lockdep splats like the below when data is transmitted through another
(unrelated) userspace socket which then gets routed over l2tp.
This issue was previously discussed here:
https://lore.kernel.org/netdev/87sfialu2n.fsf@cloudflare.com/
The solution is to have lockdep treat socket locks of l2tp tunnel
sockets separately than those of standard INET sockets. To do so, use
a different lockdep subclass where lock nesting is possible.
============================================
WARNING: possible recursive locking detected
6.10.0+ #34 Not tainted
--------------------------------------------
iperf3/771 is trying to acquire lock:
ffff8881027601d8 (slock-AF_INET/1){+.-.}-{2:2}, at: l2tp_xmit_skb+0x243/0x9d0
but task is already holding lock:
ffff888102650d98 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1848/0x1e10
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(slock-AF_INET/1);
lock(slock-AF_INET/1);
*** DEADLOCK ***
May be due to missing lock nesting notation
10 locks held by iperf3/771:
#0: ffff888102650258 (sk_lock-AF_INET){+.+.}-{0:0}, at: tcp_sendmsg+0x1a/0x40
#1: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x4b/0xbc0
#2: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x17a/0x1130
#3: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: process_backlog+0x28b/0x9f0
#4: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0xf9/0x260
#5: ffff888102650d98 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1848/0x1e10
#6: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x4b/0xbc0
#7: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x17a/0x1130
#8: ffffffff822ac1e0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0xcc/0x1450
#9: ffff888101f33258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock#2){+...}-{2:2}, at: __dev_queue_xmit+0x513/0x1450
stack backtrace:
CPU: 2 UID: 0 PID: 771 Comm: iperf3 Not tainted 6.10.0+ #34
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Call Trace:
<IRQ>
dump_stack_lvl+0x69/0xa0
dump_stack+0xc/0x20
__lock_acquire+0x135d/0x2600
? srso_alias_return_thunk+0x5/0xfbef5
lock_acquire+0xc4/0x2a0
? l2tp_xmit_skb+0x243/0x9d0
? __skb_checksum+0xa3/0x540
_raw_spin_lock_nested+0x35/0x50
? l2tp_xmit_skb+0x243/0x9d0
l2tp_xmit_skb+0x243/0x9d0
l2tp_eth_dev_xmit+0x3c/0xc0
dev_hard_start_xmit+0x11e/0x420
sch_direct_xmit+0xc3/0x640
__dev_queue_xmit+0x61c/0x1450
? ip_finish_output2+0xf4c/0x1130
ip_finish_output2+0x6b6/0x1130
? srso_alias_return_thunk+0x5/0xfbef5
? __ip_finish_output+0x217/0x380
? srso_alias_return_thunk+0x5/0xfbef5
__ip_finish_output+0x217/0x380
ip_output+0x99/0x120
__ip_queue_xmit+0xae4/0xbc0
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? tcp_options_write.constprop.0+0xcb/0x3e0
ip_queue_xmit+0x34/0x40
__tcp_transmit_skb+0x1625/0x1890
__tcp_send_ack+0x1b8/0x340
tcp_send_ack+0x23/0x30
__tcp_ack_snd_check+0xa8/0x530
? srso_alias_return_thunk+0x5/0xfbef5
tcp_rcv_established+0x412/0xd70
tcp_v4_do_rcv+0x299/0x420
tcp_v4_rcv+0x1991/0x1e10
ip_protocol_deliver_rcu+0x50/0x220
ip_local_deliver_finish+0x158/0x260
ip_local_deliver+0xc8/0xe0
ip_rcv+0xe5/0x1d0
? __pfx_ip_rcv+0x10/0x10
__netif_receive_skb_one_core+0xce/0xe0
? process_backlog+0x28b/0x9f0
__netif_receive_skb+0x34/0xd0
? process_backlog+0x28b/0x9f0
process_backlog+0x2cb/0x9f0
__napi_poll.constprop.0+0x61/0x280
net_rx_action+0x332/0x670
? srso_alias_return_thunk+0x5/0xfbef5
? find_held_lock+0x2b/0x80
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
handle_softirqs+0xda/0x480
? __dev_queue_xmit+0xa2c/0x1450
do_softirq+0xa1/0xd0
</IRQ>
<TASK>
__local_bh_enable_ip+0xc8/0xe0
? __dev_queue_xmit+0xa2c/0x1450
__dev_queue_xmit+0xa48/0x1450
? ip_finish_output2+0xf4c/0x1130
ip_finish_output2+0x6b6/0x1130
? srso_alias_return_thunk+0x5/0xfbef5
? __ip_finish_output+0x217/0x380
? srso_alias_return_thunk+0x5/0xfbef5
__ip_finish_output+0x217/0x380
ip_output+0x99/0x120
__ip_queue_xmit+0xae4/0xbc0
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? tcp_options_write.constprop.0+0xcb/0x3e0
ip_queue_xmit+0x34/0x40
__tcp_transmit_skb+0x1625/0x1890
tcp_write_xmit+0x766/0x2fb0
? __entry_text_end+0x102ba9/0x102bad
? srso_alias_return_thunk+0x5/0xfbef5
? __might_fault+0x74/0xc0
? srso_alias_return_thunk+0x5/0xfbef5
__tcp_push_pending_frames+0x56/0x190
tcp_push+0x117/0x310
tcp_sendmsg_locked+0x14c1/0x1740
tcp_sendmsg+0x28/0x40
inet_sendmsg+0x5d/0x90
sock_write_iter+0x242/0x2b0
vfs_write+0x68d/0x800
? __pfx_sock_write_iter+0x10/0x10
ksys_write+0xc8/0xf0
__x64_sys_write+0x3d/0x50
x64_sys_call+0xfaf/0x1f50
do_syscall_64+0x6d/0x140
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f4d143af992
Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 89 5c 24 08 0f 05 <c3> e9 01 cc ff ff 41 54 b8 02 00 00 0
RSP: 002b:00007ffd65032058 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f4d143af992
RDX: 0000000000000025 RSI: 00007f4d143f3bcc RDI: 0000000000000005
RBP: 00007f4d143f2b28 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4d143f3bcc
R13: 0000000000000005 R14: 0000000000000000 R15: 00007ffd650323f0
</TASK>
Fixes: 0b2c59720e65 ("l2tp: close all race conditions in l2tp_tunnel_register()")
Suggested-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+6acef9e0a4d1f46c83d4@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=6acef9e0a4d1f46c83d4
CC: gnault@redhat.com
CC: cong.wang@bytedance.com
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: Tom Parkin <tparkin@katalix.com>
Link: https://patch.msgid.link/20240806160626.1248317-1-jchapman@katalix.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-08-06 17:06:26 +01:00
|
|
|
#define L2TP_DEPTH_NESTING 2
|
|
|
|
#if L2TP_DEPTH_NESTING == SINGLE_DEPTH_NESTING
|
|
|
|
#error "L2TP requires its own lockdep subclass"
|
|
|
|
#endif
|
|
|
|
|
2010-04-02 06:18:33 +00:00
|
|
|
/* Private data stored for received packets in the skb.
|
|
|
|
*/
|
|
|
|
struct l2tp_skb_cb {
|
2010-04-02 06:18:49 +00:00
|
|
|
u32 ns;
|
2010-04-02 06:18:33 +00:00
|
|
|
u16 has_seq;
|
|
|
|
u16 length;
|
|
|
|
unsigned long expires;
|
|
|
|
};
|
|
|
|
|
2020-07-22 17:32:14 +01:00
|
|
|
#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *)&(skb)->cb[sizeof(struct inet_skb_parm)])
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2013-01-31 23:43:00 +00:00
|
|
|
static struct workqueue_struct *l2tp_wq;
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
/* per-net private data for this module */
|
|
|
|
static unsigned int l2tp_net_id;
|
|
|
|
struct l2tp_net {
|
2023-01-13 19:01:36 -08:00
|
|
|
/* Lock for write access to l2tp_tunnel_idr */
|
|
|
|
spinlock_t l2tp_tunnel_idr_lock;
|
|
|
|
struct idr l2tp_tunnel_idr;
|
2024-06-20 12:22:39 +01:00
|
|
|
/* Lock for write access to l2tp_v[23]_session_idr/htable */
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
spinlock_t l2tp_session_idr_lock;
|
2024-06-20 12:22:39 +01:00
|
|
|
struct idr l2tp_v2_session_idr;
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
struct idr l2tp_v3_session_idr;
|
|
|
|
struct hlist_head l2tp_v3_session_htable[16];
|
2010-04-02 06:18:33 +00:00
|
|
|
};
|
|
|
|
|
2024-08-07 07:54:45 +01:00
|
|
|
static u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id)
|
2024-06-20 12:22:39 +01:00
|
|
|
{
|
|
|
|
return ((u32)tunnel_id) << 16 | session_id;
|
|
|
|
}
|
|
|
|
|
2024-08-07 07:54:45 +01:00
|
|
|
static unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id)
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
{
|
|
|
|
return ((unsigned long)sk) + session_id;
|
|
|
|
}
|
|
|
|
|
2018-03-12 14:54:24 +01:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
static bool l2tp_sk_is_v6(struct sock *sk)
|
|
|
|
{
|
|
|
|
return sk->sk_family == PF_INET6 &&
|
|
|
|
!ipv6_addr_v4mapped(&sk->sk_v6_daddr);
|
|
|
|
}
|
|
|
|
#endif
|
2010-10-21 07:50:46 +00:00
|
|
|
|
2024-08-07 07:54:45 +01:00
|
|
|
static struct l2tp_net *l2tp_pernet(const struct net *net)
|
2010-04-02 06:18:33 +00:00
|
|
|
{
|
|
|
|
return net_generic(net, l2tp_net_id);
|
|
|
|
}
|
|
|
|
|
2020-07-28 18:20:29 +01:00
|
|
|
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
|
l2tp: fix races with tunnel socket close
The tunnel socket tunnel->sock (struct sock) is accessed when
preparing a new ppp session on a tunnel at pppol2tp_session_init. If
the socket is closed by a thread while another is creating a new
session, the threads race. In pppol2tp_connect, the tunnel object may
be created if the pppol2tp socket is associated with the special
session_id 0 and the tunnel socket is looked up using the provided
fd. When handling this, pppol2tp_connect cannot sock_hold the tunnel
socket to prevent it being destroyed during pppol2tp_connect since
this may itself may race with the socket being destroyed. Doing
sockfd_lookup in pppol2tp_connect isn't sufficient to prevent
tunnel->sock going away either because a given tunnel socket fd may be
reused between calls to pppol2tp_connect. Instead, have
l2tp_tunnel_create sock_hold the tunnel socket before it does
sockfd_put. This ensures that the tunnel's socket is always extant
while the tunnel object exists. Hold a ref on the socket until the
tunnel is destroyed and ensure that all tunnel destroy paths go
through a common function (l2tp_tunnel_delete) since this will do the
final sock_put to release the tunnel socket.
Since the tunnel's socket is now guaranteed to exist if the tunnel
exists, we no longer need to use sockfd_lookup via l2tp_sock_to_tunnel
to derive the tunnel from the socket since this is always
sk_user_data.
Also, sessions no longer sock_hold the tunnel socket since sessions
already hold a tunnel ref and the tunnel sock will not be freed until
the tunnel is freed. Removing these sock_holds in
l2tp_session_register avoids a possible sock leak in the
pppol2tp_connect error path if l2tp_session_register succeeds but
attaching a ppp channel fails. The pppol2tp_connect error path could
have been fixed instead and have the sock ref dropped when the session
is freed, but doing a sock_put of the tunnel socket when the session
is freed would require a new session_free callback. It is simpler to
just remove the sock_hold of the tunnel socket in
l2tp_session_register, now that the tunnel socket lifetime is
guaranteed.
Finally, some init code in l2tp_tunnel_create is reordered to ensure
that the new tunnel object's refcount is set and the tunnel socket ref
is taken before the tunnel socket destructor callbacks are set.
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 0 PID: 4360 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #34
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:pppol2tp_session_init+0x1d6/0x500
RSP: 0018:ffff88001377fb40 EFLAGS: 00010212
RAX: dffffc0000000000 RBX: ffff88001636a940 RCX: ffffffff84836c1d
RDX: 0000000000000045 RSI: 0000000055976744 RDI: 0000000000000228
RBP: ffff88001377fb60 R08: ffffffff84836bc8 R09: 0000000000000002
R10: ffff88001377fab8 R11: 0000000000000001 R12: 0000000000000000
R13: ffff88001636aac8 R14: ffff8800160f81c0 R15: 1ffff100026eff76
FS: 00007ffb3ea66700(0000) GS:ffff88001a400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020e77000 CR3: 0000000016261000 CR4: 00000000000006f0
Call Trace:
pppol2tp_connect+0xd18/0x13c0
? pppol2tp_session_create+0x170/0x170
? __might_fault+0x115/0x1d0
? lock_downgrade+0x860/0x860
? __might_fault+0xe5/0x1d0
? security_socket_connect+0x8e/0xc0
SYSC_connect+0x1b6/0x310
? SYSC_bind+0x280/0x280
? __do_page_fault+0x5d1/0xca0
? up_read+0x1f/0x40
? __do_page_fault+0x3c8/0xca0
SyS_connect+0x29/0x30
? SyS_accept+0x40/0x40
do_syscall_64+0x1e0/0x730
? trace_hardirqs_off_thunk+0x1a/0x1c
entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7ffb3e376259
RSP: 002b:00007ffeda4f6508 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000020e77012 RCX: 00007ffb3e376259
RDX: 000000000000002e RSI: 0000000020e77000 RDI: 0000000000000004
RBP: 00007ffeda4f6540 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60
R13: 00007ffeda4f6660 R14: 0000000000000000 R15: 0000000000000000
Code: 80 3d b0 ff 06 02 00 0f 84 07 02 00 00 e8 13 d6 db fc 49 8d bc 24 28 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 f
a 48 c1 ea 03 <80> 3c 02 00 0f 85 ed 02 00 00 4d 8b a4 24 28 02 00 00 e8 13 16
Fixes: 80d84ef3ff1dd ("l2tp: prevent l2tp_tunnel_delete racing with userspace close")
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-23 17:45:45 +00:00
|
|
|
{
|
l2tp: simplify tunnel and socket cleanup
When the l2tp tunnel socket used sk_user_data to point to its
associated l2tp tunnel, socket and tunnel cleanup had to make use of
the socket's destructor to free the tunnel only when the socket could
no longer be accessed.
Now that sk_user_data is no longer used, we can simplify socket and
tunnel cleanup:
* If the tunnel closes first, it cleans up and drops its socket ref
when the tunnel refcount drops to zero. If its socket was provided
by userspace, the socket is closed and freed asynchronously, when
userspace closes it. If its socket is a kernel socket, the tunnel
closes the socket itself during cleanup and drops its socket ref
when the tunnel's refcount drops to zero.
* If the socket closes first, we initiate the closing of its
associated tunnel. For UDP sockets, this is via the socket's
encap_destroy hook. For L2TPIP sockets, this is via the socket's
destroy callback. The tunnel holds a socket ref while it
references the sock. When the tunnel is freed, it drops its socket
ref and the socket will be cleaned up when its own refcount drops
to zero, asynchronous to the tunnel free.
* The tunnel socket destructor is no longer needed since the tunnel
is no longer freed through the socket destructor.
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-07-29 16:38:06 +01:00
|
|
|
struct sock *sk = tunnel->sock;
|
|
|
|
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_free_tunnel(tunnel);
|
l2tp: simplify tunnel and socket cleanup
When the l2tp tunnel socket used sk_user_data to point to its
associated l2tp tunnel, socket and tunnel cleanup had to make use of
the socket's destructor to free the tunnel only when the socket could
no longer be accessed.
Now that sk_user_data is no longer used, we can simplify socket and
tunnel cleanup:
* If the tunnel closes first, it cleans up and drops its socket ref
when the tunnel refcount drops to zero. If its socket was provided
by userspace, the socket is closed and freed asynchronously, when
userspace closes it. If its socket is a kernel socket, the tunnel
closes the socket itself during cleanup and drops its socket ref
when the tunnel's refcount drops to zero.
* If the socket closes first, we initiate the closing of its
associated tunnel. For UDP sockets, this is via the socket's
encap_destroy hook. For L2TPIP sockets, this is via the socket's
destroy callback. The tunnel holds a socket ref while it
references the sock. When the tunnel is freed, it drops its socket
ref and the socket will be cleaned up when its own refcount drops
to zero, asynchronous to the tunnel free.
* The tunnel socket destructor is no longer needed since the tunnel
is no longer freed through the socket destructor.
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-07-29 16:38:06 +01:00
|
|
|
|
|
|
|
if (sk) {
|
|
|
|
/* Disable udp encapsulation */
|
|
|
|
switch (tunnel->encap) {
|
|
|
|
case L2TP_ENCAPTYPE_UDP:
|
|
|
|
/* No longer an encapsulation socket. See net/ipv4/udp.c */
|
|
|
|
WRITE_ONCE(udp_sk(sk)->encap_type, 0);
|
|
|
|
udp_sk(sk)->encap_rcv = NULL;
|
|
|
|
udp_sk(sk)->encap_destroy = NULL;
|
|
|
|
break;
|
|
|
|
case L2TP_ENCAPTYPE_IP:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
tunnel->sock = NULL;
|
|
|
|
sock_put(sk);
|
|
|
|
}
|
|
|
|
|
|
|
|
kfree_rcu(tunnel, rcu);
|
l2tp: fix races with tunnel socket close
The tunnel socket tunnel->sock (struct sock) is accessed when
preparing a new ppp session on a tunnel at pppol2tp_session_init. If
the socket is closed by a thread while another is creating a new
session, the threads race. In pppol2tp_connect, the tunnel object may
be created if the pppol2tp socket is associated with the special
session_id 0 and the tunnel socket is looked up using the provided
fd. When handling this, pppol2tp_connect cannot sock_hold the tunnel
socket to prevent it being destroyed during pppol2tp_connect since
this may itself may race with the socket being destroyed. Doing
sockfd_lookup in pppol2tp_connect isn't sufficient to prevent
tunnel->sock going away either because a given tunnel socket fd may be
reused between calls to pppol2tp_connect. Instead, have
l2tp_tunnel_create sock_hold the tunnel socket before it does
sockfd_put. This ensures that the tunnel's socket is always extant
while the tunnel object exists. Hold a ref on the socket until the
tunnel is destroyed and ensure that all tunnel destroy paths go
through a common function (l2tp_tunnel_delete) since this will do the
final sock_put to release the tunnel socket.
Since the tunnel's socket is now guaranteed to exist if the tunnel
exists, we no longer need to use sockfd_lookup via l2tp_sock_to_tunnel
to derive the tunnel from the socket since this is always
sk_user_data.
Also, sessions no longer sock_hold the tunnel socket since sessions
already hold a tunnel ref and the tunnel sock will not be freed until
the tunnel is freed. Removing these sock_holds in
l2tp_session_register avoids a possible sock leak in the
pppol2tp_connect error path if l2tp_session_register succeeds but
attaching a ppp channel fails. The pppol2tp_connect error path could
have been fixed instead and have the sock ref dropped when the session
is freed, but doing a sock_put of the tunnel socket when the session
is freed would require a new session_free callback. It is simpler to
just remove the sock_hold of the tunnel socket in
l2tp_session_register, now that the tunnel socket lifetime is
guaranteed.
Finally, some init code in l2tp_tunnel_create is reordered to ensure
that the new tunnel object's refcount is set and the tunnel socket ref
is taken before the tunnel socket destructor callbacks are set.
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 0 PID: 4360 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #34
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:pppol2tp_session_init+0x1d6/0x500
RSP: 0018:ffff88001377fb40 EFLAGS: 00010212
RAX: dffffc0000000000 RBX: ffff88001636a940 RCX: ffffffff84836c1d
RDX: 0000000000000045 RSI: 0000000055976744 RDI: 0000000000000228
RBP: ffff88001377fb60 R08: ffffffff84836bc8 R09: 0000000000000002
R10: ffff88001377fab8 R11: 0000000000000001 R12: 0000000000000000
R13: ffff88001636aac8 R14: ffff8800160f81c0 R15: 1ffff100026eff76
FS: 00007ffb3ea66700(0000) GS:ffff88001a400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020e77000 CR3: 0000000016261000 CR4: 00000000000006f0
Call Trace:
pppol2tp_connect+0xd18/0x13c0
? pppol2tp_session_create+0x170/0x170
? __might_fault+0x115/0x1d0
? lock_downgrade+0x860/0x860
? __might_fault+0xe5/0x1d0
? security_socket_connect+0x8e/0xc0
SYSC_connect+0x1b6/0x310
? SYSC_bind+0x280/0x280
? __do_page_fault+0x5d1/0xca0
? up_read+0x1f/0x40
? __do_page_fault+0x3c8/0xca0
SyS_connect+0x29/0x30
? SyS_accept+0x40/0x40
do_syscall_64+0x1e0/0x730
? trace_hardirqs_off_thunk+0x1a/0x1c
entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7ffb3e376259
RSP: 002b:00007ffeda4f6508 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000020e77012 RCX: 00007ffb3e376259
RDX: 000000000000002e RSI: 0000000020e77000 RDI: 0000000000000004
RBP: 00007ffeda4f6540 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60
R13: 00007ffeda4f6660 R14: 0000000000000000 R15: 0000000000000000
Code: 80 3d b0 ff 06 02 00 0f 84 07 02 00 00 e8 13 d6 db fc 49 8d bc 24 28 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 f
a 48 c1 ea 03 <80> 3c 02 00 0f 85 ed 02 00 00 4d 8b a4 24 28 02 00 00 e8 13 16
Fixes: 80d84ef3ff1dd ("l2tp: prevent l2tp_tunnel_delete racing with userspace close")
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-23 17:45:45 +00:00
|
|
|
}
|
2020-07-28 18:20:29 +01:00
|
|
|
|
|
|
|
static void l2tp_session_free(struct l2tp_session *session)
|
|
|
|
{
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_free_session(session);
|
2020-09-03 09:54:51 +01:00
|
|
|
if (session->tunnel)
|
|
|
|
l2tp_tunnel_dec_refcount(session->tunnel);
|
2024-07-29 16:38:08 +01:00
|
|
|
kfree_rcu(session, rcu);
|
2020-09-03 09:54:51 +01:00
|
|
|
}
|
2020-08-22 15:59:06 +01:00
|
|
|
|
2024-07-29 16:38:00 +01:00
|
|
|
struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk)
|
|
|
|
{
|
|
|
|
const struct net *net = sock_net(sk);
|
|
|
|
unsigned long tunnel_id, tmp;
|
|
|
|
struct l2tp_tunnel *tunnel;
|
|
|
|
struct l2tp_net *pn;
|
|
|
|
|
|
|
|
rcu_read_lock_bh();
|
|
|
|
pn = l2tp_pernet(net);
|
|
|
|
idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
|
|
|
|
if (tunnel &&
|
|
|
|
tunnel->sock == sk &&
|
|
|
|
refcount_inc_not_zero(&tunnel->ref_count)) {
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
return tunnel;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rcu_read_unlock_bh();
|
2020-07-28 18:20:29 +01:00
|
|
|
|
2024-07-29 16:38:00 +01:00
|
|
|
return NULL;
|
2020-07-28 18:20:29 +01:00
|
|
|
}
|
2020-09-03 09:54:51 +01:00
|
|
|
EXPORT_SYMBOL_GPL(l2tp_sk_to_tunnel);
|
2020-07-28 18:20:29 +01:00
|
|
|
|
|
|
|
void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
|
|
|
|
{
|
|
|
|
refcount_inc(&tunnel->ref_count);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_tunnel_inc_refcount);
|
|
|
|
|
|
|
|
void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
|
|
|
|
{
|
|
|
|
if (refcount_dec_and_test(&tunnel->ref_count))
|
|
|
|
l2tp_tunnel_free(tunnel);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_tunnel_dec_refcount);
|
|
|
|
|
|
|
|
void l2tp_session_inc_refcount(struct l2tp_session *session)
|
|
|
|
{
|
|
|
|
refcount_inc(&session->ref_count);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_session_inc_refcount);
|
|
|
|
|
|
|
|
void l2tp_session_dec_refcount(struct l2tp_session *session)
|
|
|
|
{
|
|
|
|
if (refcount_dec_and_test(&session->ref_count))
|
|
|
|
l2tp_session_free(session);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_session_dec_refcount);
|
l2tp: fix races with tunnel socket close
The tunnel socket tunnel->sock (struct sock) is accessed when
preparing a new ppp session on a tunnel at pppol2tp_session_init. If
the socket is closed by a thread while another is creating a new
session, the threads race. In pppol2tp_connect, the tunnel object may
be created if the pppol2tp socket is associated with the special
session_id 0 and the tunnel socket is looked up using the provided
fd. When handling this, pppol2tp_connect cannot sock_hold the tunnel
socket to prevent it being destroyed during pppol2tp_connect since
this may itself may race with the socket being destroyed. Doing
sockfd_lookup in pppol2tp_connect isn't sufficient to prevent
tunnel->sock going away either because a given tunnel socket fd may be
reused between calls to pppol2tp_connect. Instead, have
l2tp_tunnel_create sock_hold the tunnel socket before it does
sockfd_put. This ensures that the tunnel's socket is always extant
while the tunnel object exists. Hold a ref on the socket until the
tunnel is destroyed and ensure that all tunnel destroy paths go
through a common function (l2tp_tunnel_delete) since this will do the
final sock_put to release the tunnel socket.
Since the tunnel's socket is now guaranteed to exist if the tunnel
exists, we no longer need to use sockfd_lookup via l2tp_sock_to_tunnel
to derive the tunnel from the socket since this is always
sk_user_data.
Also, sessions no longer sock_hold the tunnel socket since sessions
already hold a tunnel ref and the tunnel sock will not be freed until
the tunnel is freed. Removing these sock_holds in
l2tp_session_register avoids a possible sock leak in the
pppol2tp_connect error path if l2tp_session_register succeeds but
attaching a ppp channel fails. The pppol2tp_connect error path could
have been fixed instead and have the sock ref dropped when the session
is freed, but doing a sock_put of the tunnel socket when the session
is freed would require a new session_free callback. It is simpler to
just remove the sock_hold of the tunnel socket in
l2tp_session_register, now that the tunnel socket lifetime is
guaranteed.
Finally, some init code in l2tp_tunnel_create is reordered to ensure
that the new tunnel object's refcount is set and the tunnel socket ref
is taken before the tunnel socket destructor callbacks are set.
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 0 PID: 4360 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #34
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:pppol2tp_session_init+0x1d6/0x500
RSP: 0018:ffff88001377fb40 EFLAGS: 00010212
RAX: dffffc0000000000 RBX: ffff88001636a940 RCX: ffffffff84836c1d
RDX: 0000000000000045 RSI: 0000000055976744 RDI: 0000000000000228
RBP: ffff88001377fb60 R08: ffffffff84836bc8 R09: 0000000000000002
R10: ffff88001377fab8 R11: 0000000000000001 R12: 0000000000000000
R13: ffff88001636aac8 R14: ffff8800160f81c0 R15: 1ffff100026eff76
FS: 00007ffb3ea66700(0000) GS:ffff88001a400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020e77000 CR3: 0000000016261000 CR4: 00000000000006f0
Call Trace:
pppol2tp_connect+0xd18/0x13c0
? pppol2tp_session_create+0x170/0x170
? __might_fault+0x115/0x1d0
? lock_downgrade+0x860/0x860
? __might_fault+0xe5/0x1d0
? security_socket_connect+0x8e/0xc0
SYSC_connect+0x1b6/0x310
? SYSC_bind+0x280/0x280
? __do_page_fault+0x5d1/0xca0
? up_read+0x1f/0x40
? __do_page_fault+0x3c8/0xca0
SyS_connect+0x29/0x30
? SyS_accept+0x40/0x40
do_syscall_64+0x1e0/0x730
? trace_hardirqs_off_thunk+0x1a/0x1c
entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7ffb3e376259
RSP: 002b:00007ffeda4f6508 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000020e77012 RCX: 00007ffb3e376259
RDX: 000000000000002e RSI: 0000000020e77000 RDI: 0000000000000004
RBP: 00007ffeda4f6540 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60
R13: 00007ffeda4f6660 R14: 0000000000000000 R15: 0000000000000000
Code: 80 3d b0 ff 06 02 00 0f 84 07 02 00 00 e8 13 d6 db fc 49 8d bc 24 28 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 f
a 48 c1 ea 03 <80> 3c 02 00 0f 85 ed 02 00 00 4d 8b a4 24 28 02 00 00 e8 13 16
Fixes: 80d84ef3ff1dd ("l2tp: prevent l2tp_tunnel_delete racing with userspace close")
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-23 17:45:45 +00:00
|
|
|
|
2017-08-25 16:51:40 +02:00
|
|
|
/* Lookup a tunnel. A new reference is held on the returned tunnel. */
|
|
|
|
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
|
|
|
|
{
|
|
|
|
const struct l2tp_net *pn = l2tp_pernet(net);
|
|
|
|
struct l2tp_tunnel *tunnel;
|
|
|
|
|
|
|
|
rcu_read_lock_bh();
|
2023-01-13 19:01:36 -08:00
|
|
|
tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id);
|
|
|
|
if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) {
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
return tunnel;
|
2017-08-25 16:51:40 +02:00
|
|
|
}
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
|
|
|
|
|
2018-04-12 20:50:33 +02:00
|
|
|
struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
|
|
|
|
{
|
2023-01-13 19:01:36 -08:00
|
|
|
struct l2tp_net *pn = l2tp_pernet(net);
|
|
|
|
unsigned long tunnel_id, tmp;
|
2018-04-12 20:50:33 +02:00
|
|
|
struct l2tp_tunnel *tunnel;
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
rcu_read_lock_bh();
|
2023-01-13 19:01:36 -08:00
|
|
|
idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
|
|
|
|
if (tunnel && ++count > nth &&
|
2019-04-30 06:27:58 -07:00
|
|
|
refcount_inc_not_zero(&tunnel->ref_count)) {
|
2018-04-12 20:50:33 +02:00
|
|
|
rcu_read_unlock_bh();
|
|
|
|
return tunnel;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_tunnel_get_nth);
|
|
|
|
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id)
|
2018-08-10 13:21:57 +02:00
|
|
|
{
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
const struct l2tp_net *pn = l2tp_pernet(net);
|
2018-08-10 13:21:57 +02:00
|
|
|
struct l2tp_session *session;
|
|
|
|
|
|
|
|
rcu_read_lock_bh();
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
session = idr_find(&pn->l2tp_v3_session_idr, session_id);
|
|
|
|
if (session && !hash_hashed(&session->hlist) &&
|
|
|
|
refcount_inc_not_zero(&session->ref_count)) {
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
return session;
|
|
|
|
}
|
2017-03-31 13:02:25 +02:00
|
|
|
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
/* If we get here and session is non-NULL, the session_id
|
|
|
|
* collides with one in another tunnel. If sk is non-NULL,
|
|
|
|
* find the session matching sk.
|
|
|
|
*/
|
|
|
|
if (session && sk) {
|
|
|
|
unsigned long key = l2tp_v3_session_hashkey(sk, session->session_id);
|
|
|
|
|
|
|
|
hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session,
|
|
|
|
hlist, key) {
|
2024-07-29 16:38:10 +01:00
|
|
|
/* session->tunnel may be NULL if another thread is in
|
|
|
|
* l2tp_session_register and has added an item to
|
|
|
|
* l2tp_v3_session_htable but hasn't yet added the
|
|
|
|
* session to its tunnel's session_list.
|
|
|
|
*/
|
|
|
|
struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
|
|
|
|
|
2024-08-07 07:54:47 +01:00
|
|
|
if (session->session_id == session_id &&
|
|
|
|
tunnel && tunnel->sock == sk &&
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
refcount_inc_not_zero(&session->ref_count)) {
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
return session;
|
|
|
|
}
|
2017-03-31 13:02:25 +02:00
|
|
|
}
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
}
|
2018-08-10 13:21:57 +02:00
|
|
|
rcu_read_unlock_bh();
|
2017-03-31 13:02:25 +02:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
EXPORT_SYMBOL_GPL(l2tp_v3_session_get);
|
2017-03-31 13:02:25 +02:00
|
|
|
|
2024-06-20 12:22:39 +01:00
|
|
|
struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id)
|
|
|
|
{
|
|
|
|
u32 session_key = l2tp_v2_session_key(tunnel_id, session_id);
|
|
|
|
const struct l2tp_net *pn = l2tp_pernet(net);
|
|
|
|
struct l2tp_session *session;
|
|
|
|
|
|
|
|
rcu_read_lock_bh();
|
|
|
|
session = idr_find(&pn->l2tp_v2_session_idr, session_key);
|
|
|
|
if (session && refcount_inc_not_zero(&session->ref_count)) {
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
return session;
|
|
|
|
}
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_v2_session_get);
|
|
|
|
|
2024-06-20 12:22:42 +01:00
|
|
|
struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver,
|
|
|
|
u32 tunnel_id, u32 session_id)
|
|
|
|
{
|
|
|
|
if (pver == L2TP_HDR_VER_2)
|
|
|
|
return l2tp_v2_session_get(net, tunnel_id, session_id);
|
|
|
|
else
|
|
|
|
return l2tp_v3_session_get(net, sk, session_id);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_session_get);
|
|
|
|
|
2017-10-31 17:36:42 +01:00
|
|
|
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
|
2010-04-02 06:18:33 +00:00
|
|
|
{
|
|
|
|
struct l2tp_session *session;
|
|
|
|
int count = 0;
|
|
|
|
|
2021-11-26 16:09:03 +00:00
|
|
|
rcu_read_lock_bh();
|
2024-06-20 12:22:44 +01:00
|
|
|
list_for_each_entry_rcu(session, &tunnel->session_list, list) {
|
|
|
|
if (++count > nth) {
|
|
|
|
l2tp_session_inc_refcount(session);
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
return session;
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
}
|
2021-11-26 16:09:03 +00:00
|
|
|
rcu_read_unlock_bh();
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
2017-04-03 12:03:13 +02:00
|
|
|
EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2010-04-02 06:19:10 +00:00
|
|
|
/* Lookup a session by interface name.
|
|
|
|
* This is very inefficient but is only used by management interfaces.
|
|
|
|
*/
|
2017-04-12 10:05:29 +02:00
|
|
|
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
|
2017-10-31 17:36:42 +01:00
|
|
|
const char *ifname)
|
2010-04-02 06:19:10 +00:00
|
|
|
{
|
|
|
|
struct l2tp_net *pn = l2tp_pernet(net);
|
2024-06-20 12:22:44 +01:00
|
|
|
unsigned long tunnel_id, tmp;
|
2010-04-02 06:19:10 +00:00
|
|
|
struct l2tp_session *session;
|
2024-06-20 12:22:44 +01:00
|
|
|
struct l2tp_tunnel *tunnel;
|
2010-04-02 06:19:10 +00:00
|
|
|
|
2010-04-02 06:19:16 +00:00
|
|
|
rcu_read_lock_bh();
|
2024-06-20 12:22:44 +01:00
|
|
|
idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
|
|
|
|
if (tunnel) {
|
|
|
|
list_for_each_entry_rcu(session, &tunnel->session_list, list) {
|
|
|
|
if (!strcmp(session->ifname, ifname)) {
|
|
|
|
l2tp_session_inc_refcount(session);
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
|
|
|
|
return session;
|
|
|
|
}
|
2010-04-02 06:19:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-04-02 06:19:16 +00:00
|
|
|
rcu_read_unlock_bh();
|
2010-04-02 06:19:10 +00:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
2017-03-31 13:02:30 +02:00
|
|
|
EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
|
2010-04-02 06:19:10 +00:00
|
|
|
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
static void l2tp_session_coll_list_add(struct l2tp_session_coll_list *clist,
|
|
|
|
struct l2tp_session *session)
|
|
|
|
{
|
|
|
|
l2tp_session_inc_refcount(session);
|
|
|
|
WARN_ON_ONCE(session->coll_list);
|
|
|
|
session->coll_list = clist;
|
|
|
|
spin_lock(&clist->lock);
|
|
|
|
list_add(&session->clist, &clist->list);
|
|
|
|
spin_unlock(&clist->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int l2tp_session_collision_add(struct l2tp_net *pn,
|
|
|
|
struct l2tp_session *session1,
|
|
|
|
struct l2tp_session *session2)
|
|
|
|
{
|
|
|
|
struct l2tp_session_coll_list *clist;
|
|
|
|
|
|
|
|
lockdep_assert_held(&pn->l2tp_session_idr_lock);
|
|
|
|
|
|
|
|
if (!session2)
|
|
|
|
return -EEXIST;
|
|
|
|
|
|
|
|
/* If existing session is in IP-encap tunnel, refuse new session */
|
|
|
|
if (session2->tunnel->encap == L2TP_ENCAPTYPE_IP)
|
|
|
|
return -EEXIST;
|
|
|
|
|
|
|
|
clist = session2->coll_list;
|
|
|
|
if (!clist) {
|
|
|
|
/* First collision. Allocate list to manage the collided sessions
|
|
|
|
* and add the existing session to the list.
|
|
|
|
*/
|
|
|
|
clist = kmalloc(sizeof(*clist), GFP_ATOMIC);
|
|
|
|
if (!clist)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
spin_lock_init(&clist->lock);
|
|
|
|
INIT_LIST_HEAD(&clist->list);
|
|
|
|
refcount_set(&clist->ref_count, 1);
|
|
|
|
l2tp_session_coll_list_add(clist, session2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If existing session isn't already in the session hlist, add it. */
|
|
|
|
if (!hash_hashed(&session2->hlist))
|
2024-07-29 16:38:11 +01:00
|
|
|
hash_add_rcu(pn->l2tp_v3_session_htable, &session2->hlist,
|
|
|
|
session2->hlist_key);
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
|
|
|
|
/* Add new session to the hlist and collision list */
|
2024-07-29 16:38:11 +01:00
|
|
|
hash_add_rcu(pn->l2tp_v3_session_htable, &session1->hlist,
|
|
|
|
session1->hlist_key);
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
refcount_inc(&clist->ref_count);
|
|
|
|
l2tp_session_coll_list_add(clist, session1);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void l2tp_session_collision_del(struct l2tp_net *pn,
|
|
|
|
struct l2tp_session *session)
|
|
|
|
{
|
|
|
|
struct l2tp_session_coll_list *clist = session->coll_list;
|
|
|
|
unsigned long session_key = session->session_id;
|
|
|
|
struct l2tp_session *session2;
|
|
|
|
|
|
|
|
lockdep_assert_held(&pn->l2tp_session_idr_lock);
|
|
|
|
|
2024-07-29 16:38:11 +01:00
|
|
|
hash_del_rcu(&session->hlist);
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
|
|
|
|
if (clist) {
|
|
|
|
/* Remove session from its collision list. If there
|
|
|
|
* are other sessions with the same ID, replace this
|
|
|
|
* session's IDR entry with that session, otherwise
|
|
|
|
* remove the IDR entry. If this is the last session,
|
|
|
|
* the collision list data is freed.
|
|
|
|
*/
|
|
|
|
spin_lock(&clist->lock);
|
|
|
|
list_del_init(&session->clist);
|
|
|
|
session2 = list_first_entry_or_null(&clist->list, struct l2tp_session, clist);
|
|
|
|
if (session2) {
|
|
|
|
void *old = idr_replace(&pn->l2tp_v3_session_idr, session2, session_key);
|
|
|
|
|
|
|
|
WARN_ON_ONCE(IS_ERR_VALUE(old));
|
|
|
|
} else {
|
|
|
|
void *removed = idr_remove(&pn->l2tp_v3_session_idr, session_key);
|
|
|
|
|
|
|
|
WARN_ON_ONCE(removed != session);
|
|
|
|
}
|
|
|
|
session->coll_list = NULL;
|
|
|
|
spin_unlock(&clist->lock);
|
|
|
|
if (refcount_dec_and_test(&clist->ref_count))
|
|
|
|
kfree(clist);
|
|
|
|
l2tp_session_dec_refcount(session);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-27 16:51:50 +02:00
|
|
|
int l2tp_session_register(struct l2tp_session *session,
|
|
|
|
struct l2tp_tunnel *tunnel)
|
2017-03-31 13:02:27 +02:00
|
|
|
{
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
|
2024-07-09 17:28:39 +01:00
|
|
|
struct l2tp_session *other_session = NULL;
|
2024-07-29 16:38:12 +01:00
|
|
|
void *old = NULL;
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
u32 session_key;
|
2017-09-01 17:58:48 +02:00
|
|
|
int err;
|
2017-03-31 13:02:27 +02:00
|
|
|
|
2024-06-20 12:22:44 +01:00
|
|
|
spin_lock_bh(&tunnel->list_lock);
|
2024-07-18 14:43:48 +01:00
|
|
|
spin_lock_bh(&pn->l2tp_session_idr_lock);
|
|
|
|
|
2017-09-01 17:58:48 +02:00
|
|
|
if (!tunnel->acpt_newsess) {
|
|
|
|
err = -ENODEV;
|
2024-07-18 14:43:48 +01:00
|
|
|
goto out;
|
2017-09-01 17:58:48 +02:00
|
|
|
}
|
|
|
|
|
2017-03-31 13:02:27 +02:00
|
|
|
if (tunnel->version == L2TP_HDR_VER_3) {
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
session_key = session->session_id;
|
|
|
|
err = idr_alloc_u32(&pn->l2tp_v3_session_idr, NULL,
|
|
|
|
&session_key, session_key, GFP_ATOMIC);
|
2020-02-04 12:24:00 +13:00
|
|
|
/* IP encap expects session IDs to be globally unique, while
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
* UDP encap doesn't. This isn't per the RFC, which says that
|
|
|
|
* sessions are identified only by the session ID, but is to
|
|
|
|
* support existing userspace which depends on it.
|
2020-02-04 12:24:00 +13:00
|
|
|
*/
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
if (err == -ENOSPC && tunnel->encap == L2TP_ENCAPTYPE_UDP) {
|
2024-07-09 17:28:39 +01:00
|
|
|
other_session = idr_find(&pn->l2tp_v3_session_idr,
|
|
|
|
session_key);
|
|
|
|
err = l2tp_session_collision_add(pn, session,
|
|
|
|
other_session);
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
}
|
2024-06-20 12:22:39 +01:00
|
|
|
} else {
|
|
|
|
session_key = l2tp_v2_session_key(tunnel->tunnel_id,
|
|
|
|
session->session_id);
|
|
|
|
err = idr_alloc_u32(&pn->l2tp_v2_session_idr, NULL,
|
|
|
|
&session_key, session_key, GFP_ATOMIC);
|
2017-03-31 13:02:27 +02:00
|
|
|
}
|
|
|
|
|
2024-06-20 12:22:39 +01:00
|
|
|
if (err) {
|
|
|
|
if (err == -ENOSPC)
|
|
|
|
err = -EEXIST;
|
2024-07-18 14:43:48 +01:00
|
|
|
goto out;
|
2024-06-20 12:22:39 +01:00
|
|
|
}
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
|
|
|
|
l2tp_tunnel_inc_refcount(tunnel);
|
2024-07-29 16:38:10 +01:00
|
|
|
WRITE_ONCE(session->tunnel, tunnel);
|
2024-07-29 16:38:11 +01:00
|
|
|
list_add_rcu(&session->list, &tunnel->session_list);
|
2017-03-31 13:02:27 +02:00
|
|
|
|
2024-07-29 16:38:12 +01:00
|
|
|
/* this makes session available to lockless getters */
|
2024-07-09 17:28:39 +01:00
|
|
|
if (tunnel->version == L2TP_HDR_VER_3) {
|
|
|
|
if (!other_session)
|
2024-07-29 16:38:12 +01:00
|
|
|
old = idr_replace(&pn->l2tp_v3_session_idr, session, session_key);
|
2024-07-09 17:28:39 +01:00
|
|
|
} else {
|
2024-07-29 16:38:12 +01:00
|
|
|
old = idr_replace(&pn->l2tp_v2_session_idr, session, session_key);
|
2024-07-09 17:28:39 +01:00
|
|
|
}
|
2020-08-22 15:59:06 +01:00
|
|
|
|
2024-07-29 16:38:12 +01:00
|
|
|
/* old should be NULL, unless something removed or modified
|
|
|
|
* the IDR entry after our idr_alloc_32 above (which shouldn't
|
|
|
|
* happen).
|
|
|
|
*/
|
|
|
|
WARN_ON_ONCE(old);
|
2024-07-18 14:43:48 +01:00
|
|
|
out:
|
|
|
|
spin_unlock_bh(&pn->l2tp_session_idr_lock);
|
2024-06-20 12:22:44 +01:00
|
|
|
spin_unlock_bh(&tunnel->list_lock);
|
2017-03-31 13:02:27 +02:00
|
|
|
|
2024-07-18 14:43:48 +01:00
|
|
|
if (!err)
|
|
|
|
trace_register_session(session);
|
|
|
|
|
2017-09-01 17:58:48 +02:00
|
|
|
return err;
|
2017-03-31 13:02:27 +02:00
|
|
|
}
|
2017-10-27 16:51:50 +02:00
|
|
|
EXPORT_SYMBOL_GPL(l2tp_session_register);
|
2017-03-31 13:02:27 +02:00
|
|
|
|
2010-04-02 06:18:33 +00:00
|
|
|
/*****************************************************************************
|
|
|
|
* Receive data handling
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/* Queue a skb in order. We come here only if the skb has an L2TP sequence
|
|
|
|
* number.
|
|
|
|
*/
|
|
|
|
static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct sk_buff *skbp;
|
|
|
|
struct sk_buff *tmp;
|
2010-04-02 06:18:49 +00:00
|
|
|
u32 ns = L2TP_SKB_CB(skb)->ns;
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
spin_lock_bh(&session->reorder_q.lock);
|
|
|
|
skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
|
|
|
|
if (L2TP_SKB_CB(skbp)->ns > ns) {
|
|
|
|
__skb_queue_before(&session->reorder_q, skbp, skb);
|
2013-03-19 06:11:22 +00:00
|
|
|
atomic_long_inc(&session->stats.rx_oos_packets);
|
2010-04-02 06:18:33 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
__skb_queue_tail(&session->reorder_q, skb);
|
|
|
|
|
|
|
|
out:
|
|
|
|
spin_unlock_bh(&session->reorder_q.lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Dequeue a single skb.
|
|
|
|
*/
|
|
|
|
static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct l2tp_tunnel *tunnel = session->tunnel;
|
|
|
|
int length = L2TP_SKB_CB(skb)->length;
|
|
|
|
|
|
|
|
/* We're about to requeue the skb, so return resources
|
|
|
|
* to its current owner (a socket receive buffer).
|
|
|
|
*/
|
|
|
|
skb_orphan(skb);
|
|
|
|
|
2013-03-19 06:11:22 +00:00
|
|
|
atomic_long_inc(&tunnel->stats.rx_packets);
|
|
|
|
atomic_long_add(length, &tunnel->stats.rx_bytes);
|
|
|
|
atomic_long_inc(&session->stats.rx_packets);
|
|
|
|
atomic_long_add(length, &session->stats.rx_bytes);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
if (L2TP_SKB_CB(skb)->has_seq) {
|
|
|
|
/* Bump our Nr */
|
|
|
|
session->nr++;
|
2013-07-02 20:28:59 +01:00
|
|
|
session->nr &= session->nr_max;
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_session_seqnum_update(session);
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* call private receive handler */
|
2020-07-23 12:29:50 +01:00
|
|
|
if (session->recv_skb)
|
2010-04-02 06:18:33 +00:00
|
|
|
(*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
|
|
|
|
else
|
|
|
|
kfree_skb(skb);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Dequeue skbs from the session's reorder_q, subject to packet order.
|
|
|
|
* Skbs that have been in the queue for too long are simply discarded.
|
|
|
|
*/
|
|
|
|
static void l2tp_recv_dequeue(struct l2tp_session *session)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
struct sk_buff *tmp;
|
|
|
|
|
|
|
|
/* If the pkt at the head of the queue has the nr that we
|
|
|
|
* expect to send up next, dequeue it and any other
|
|
|
|
* in-sequence packets behind it.
|
|
|
|
*/
|
2011-11-02 22:47:44 +00:00
|
|
|
start:
|
2010-04-02 06:18:33 +00:00
|
|
|
spin_lock_bh(&session->reorder_q.lock);
|
|
|
|
skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
|
2020-08-22 15:59:06 +01:00
|
|
|
struct l2tp_skb_cb *cb = L2TP_SKB_CB(skb);
|
|
|
|
|
|
|
|
/* If the packet has been pending on the queue for too long, discard it */
|
|
|
|
if (time_after(jiffies, cb->expires)) {
|
2013-03-19 06:11:22 +00:00
|
|
|
atomic_long_inc(&session->stats.rx_seq_discards);
|
|
|
|
atomic_long_inc(&session->stats.rx_errors);
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_session_pkt_expired(session, cb->ns);
|
2012-05-09 23:43:08 +00:00
|
|
|
session->reorder_skip = 1;
|
2010-04-02 06:18:33 +00:00
|
|
|
__skb_unlink(skb, &session->reorder_q);
|
|
|
|
kfree_skb(skb);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-08-22 15:59:06 +01:00
|
|
|
if (cb->has_seq) {
|
2012-05-09 23:43:08 +00:00
|
|
|
if (session->reorder_skip) {
|
|
|
|
session->reorder_skip = 0;
|
2020-08-22 15:59:06 +01:00
|
|
|
session->nr = cb->ns;
|
|
|
|
trace_session_seqnum_reset(session);
|
2012-05-09 23:43:08 +00:00
|
|
|
}
|
2020-08-22 15:59:06 +01:00
|
|
|
if (cb->ns != session->nr)
|
2010-04-02 06:18:33 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
__skb_unlink(skb, &session->reorder_q);
|
|
|
|
|
|
|
|
/* Process the skb. We release the queue lock while we
|
|
|
|
* do so to let other contexts process the queue.
|
|
|
|
*/
|
|
|
|
spin_unlock_bh(&session->reorder_q.lock);
|
|
|
|
l2tp_recv_dequeue_skb(session, skb);
|
2011-11-02 22:47:44 +00:00
|
|
|
goto start;
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
spin_unlock_bh(&session->reorder_q.lock);
|
|
|
|
}
|
|
|
|
|
2013-07-02 20:28:59 +01:00
|
|
|
static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr)
|
|
|
|
{
|
|
|
|
u32 nws;
|
|
|
|
|
|
|
|
if (nr >= session->nr)
|
|
|
|
nws = nr - session->nr;
|
|
|
|
else
|
|
|
|
nws = (session->nr_max + 1) - (session->nr - nr);
|
|
|
|
|
|
|
|
return nws < session->nr_window_size;
|
|
|
|
}
|
|
|
|
|
2013-07-02 20:28:58 +01:00
|
|
|
/* If packet has sequence numbers, queue it if acceptable. Returns 0 if
|
|
|
|
* acceptable, else non-zero.
|
|
|
|
*/
|
|
|
|
static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb)
|
|
|
|
{
|
2020-08-22 15:59:06 +01:00
|
|
|
struct l2tp_skb_cb *cb = L2TP_SKB_CB(skb);
|
|
|
|
|
|
|
|
if (!l2tp_seq_check_rx_window(session, cb->ns)) {
|
2013-07-02 20:28:59 +01:00
|
|
|
/* Packet sequence number is outside allowed window.
|
|
|
|
* Discard it.
|
|
|
|
*/
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_session_pkt_outside_rx_window(session, cb->ns);
|
2013-07-02 20:28:59 +01:00
|
|
|
goto discard;
|
|
|
|
}
|
|
|
|
|
2013-07-02 20:28:58 +01:00
|
|
|
if (session->reorder_timeout != 0) {
|
|
|
|
/* Packet reordering enabled. Add skb to session's
|
|
|
|
* reorder queue, in order of ns.
|
|
|
|
*/
|
|
|
|
l2tp_recv_queue_skb(session, skb);
|
2013-07-02 20:29:00 +01:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Packet reordering disabled. Discard out-of-sequence packets, while
|
|
|
|
* tracking the number if in-sequence packets after the first OOS packet
|
|
|
|
* is seen. After nr_oos_count_max in-sequence packets, reset the
|
|
|
|
* sequence number to re-enable packet reception.
|
|
|
|
*/
|
2020-08-22 15:59:06 +01:00
|
|
|
if (cb->ns == session->nr) {
|
2013-07-02 20:29:00 +01:00
|
|
|
skb_queue_tail(&session->reorder_q, skb);
|
2013-07-02 20:28:58 +01:00
|
|
|
} else {
|
2020-08-22 15:59:06 +01:00
|
|
|
u32 nr_oos = cb->ns;
|
2013-07-02 20:29:00 +01:00
|
|
|
u32 nr_next = (session->nr_oos + 1) & session->nr_max;
|
|
|
|
|
|
|
|
if (nr_oos == nr_next)
|
|
|
|
session->nr_oos_count++;
|
|
|
|
else
|
|
|
|
session->nr_oos_count = 0;
|
|
|
|
|
|
|
|
session->nr_oos = nr_oos;
|
|
|
|
if (session->nr_oos_count > session->nr_oos_count_max) {
|
|
|
|
session->reorder_skip = 1;
|
|
|
|
}
|
|
|
|
if (!session->reorder_skip) {
|
2013-07-02 20:28:58 +01:00
|
|
|
atomic_long_inc(&session->stats.rx_seq_discards);
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_session_pkt_oos(session, cb->ns);
|
2013-07-02 20:28:58 +01:00
|
|
|
goto discard;
|
|
|
|
}
|
|
|
|
skb_queue_tail(&session->reorder_q, skb);
|
|
|
|
}
|
|
|
|
|
2013-07-02 20:29:00 +01:00
|
|
|
out:
|
2013-07-02 20:28:58 +01:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
discard:
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
/* Do receive processing of L2TP data frames. We handle both L2TPv2
|
|
|
|
* and L2TPv3 data frames here.
|
|
|
|
*
|
|
|
|
* L2TPv2 Data Message Header
|
|
|
|
*
|
|
|
|
* 0 1 2 3
|
|
|
|
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* |T|L|x|x|S|x|O|P|x|x|x|x| Ver | Length (opt) |
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* | Tunnel ID | Session ID |
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* | Ns (opt) | Nr (opt) |
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* | Offset Size (opt) | Offset pad... (opt)
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
*
|
|
|
|
* Data frames are marked by T=0. All other fields are the same as
|
|
|
|
* those in L2TP control frames.
|
|
|
|
*
|
|
|
|
* L2TPv3 Data Message Header
|
|
|
|
*
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* | L2TP Session Header |
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* | L2-Specific Sublayer |
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* | Tunnel Payload ...
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
*
|
|
|
|
* L2TPv3 Session Header Over IP
|
|
|
|
*
|
|
|
|
* 0 1 2 3
|
|
|
|
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* | Session ID |
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* | Cookie (optional, maximum 64 bits)...
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* |
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
*
|
|
|
|
* L2TPv3 L2-Specific Sublayer Format
|
|
|
|
*
|
|
|
|
* 0 1 2 3
|
|
|
|
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
* |x|S|x|x|x|x|x|x| Sequence Number |
|
|
|
|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
*
|
2018-01-05 19:47:14 +01:00
|
|
|
* Cookie value and sublayer format are negotiated with the peer when
|
|
|
|
* the session is set up. Unlike L2TPv2, we do not need to parse the
|
|
|
|
* packet header to determine if optional fields are present.
|
2010-04-02 06:18:49 +00:00
|
|
|
*
|
|
|
|
* Caller must already have parsed the frame and determined that it is
|
|
|
|
* a data (not control) frame before coming here. Fields up to the
|
|
|
|
* session-id have already been parsed and ptr points to the data
|
|
|
|
* after the session-id.
|
2010-04-02 06:18:33 +00:00
|
|
|
*/
|
2010-04-02 06:18:49 +00:00
|
|
|
void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
|
|
|
|
unsigned char *ptr, unsigned char *optr, u16 hdrflags,
|
2018-07-25 14:53:33 +02:00
|
|
|
int length)
|
2010-04-02 06:18:33 +00:00
|
|
|
{
|
2010-04-02 06:18:49 +00:00
|
|
|
struct l2tp_tunnel *tunnel = session->tunnel;
|
2010-04-02 06:18:33 +00:00
|
|
|
int offset;
|
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
/* Parse and check optional cookie */
|
|
|
|
if (session->peer_cookie_len > 0) {
|
|
|
|
if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
|
2021-03-03 16:50:49 +01:00
|
|
|
pr_debug_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n",
|
|
|
|
tunnel->name, tunnel->tunnel_id,
|
|
|
|
session->session_id);
|
2013-03-19 06:11:22 +00:00
|
|
|
atomic_long_inc(&session->stats.rx_cookie_discards);
|
2010-04-02 06:18:49 +00:00
|
|
|
goto discard;
|
|
|
|
}
|
|
|
|
ptr += session->peer_cookie_len;
|
|
|
|
}
|
|
|
|
|
2010-04-02 06:18:33 +00:00
|
|
|
/* Handle the optional sequence numbers. Sequence numbers are
|
|
|
|
* in different places for L2TPv2 and L2TPv3.
|
|
|
|
*
|
|
|
|
* If we are the LAC, enable/disable sequence numbers under
|
|
|
|
* the control of the LNS. If no sequence numbers present but
|
|
|
|
* we were expecting them, discard frame.
|
|
|
|
*/
|
|
|
|
L2TP_SKB_CB(skb)->has_seq = 0;
|
2010-04-02 06:18:49 +00:00
|
|
|
if (tunnel->version == L2TP_HDR_VER_2) {
|
|
|
|
if (hdrflags & L2TP_HDRFLAG_S) {
|
|
|
|
/* Store L2TP info in the skb */
|
2020-08-22 15:59:01 +01:00
|
|
|
L2TP_SKB_CB(skb)->ns = ntohs(*(__be16 *)ptr);
|
2010-04-02 06:18:49 +00:00
|
|
|
L2TP_SKB_CB(skb)->has_seq = 1;
|
2020-08-22 15:59:01 +01:00
|
|
|
ptr += 2;
|
|
|
|
/* Skip past nr in the header */
|
|
|
|
ptr += 2;
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
}
|
|
|
|
} else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
|
2020-07-22 17:32:05 +01:00
|
|
|
u32 l2h = ntohl(*(__be32 *)ptr);
|
2010-04-02 06:18:49 +00:00
|
|
|
|
|
|
|
if (l2h & 0x40000000) {
|
|
|
|
/* Store L2TP info in the skb */
|
2020-08-22 15:59:01 +01:00
|
|
|
L2TP_SKB_CB(skb)->ns = l2h & 0x00ffffff;
|
2010-04-02 06:18:49 +00:00
|
|
|
L2TP_SKB_CB(skb)->has_seq = 1;
|
|
|
|
}
|
2018-01-16 23:01:55 +01:00
|
|
|
ptr += 4;
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (L2TP_SKB_CB(skb)->has_seq) {
|
2020-07-22 17:32:06 +01:00
|
|
|
/* Received a packet with sequence numbers. If we're the LAC,
|
2010-04-02 06:18:33 +00:00
|
|
|
* check if we sre sending sequence numbers and if not,
|
|
|
|
* configure it so.
|
|
|
|
*/
|
2020-07-23 12:29:51 +01:00
|
|
|
if (!session->lns_mode && !session->send_seq) {
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_session_seqnum_lns_enable(session);
|
2016-11-07 20:39:28 +00:00
|
|
|
session->send_seq = 1;
|
2024-07-29 16:38:10 +01:00
|
|
|
l2tp_session_set_header_len(session, tunnel->version,
|
|
|
|
tunnel->encap);
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* No sequence numbers.
|
|
|
|
* If user has configured mandatory sequence numbers, discard.
|
|
|
|
*/
|
|
|
|
if (session->recv_seq) {
|
2021-03-03 16:50:49 +01:00
|
|
|
pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
|
|
|
|
session->name);
|
2013-03-19 06:11:22 +00:00
|
|
|
atomic_long_inc(&session->stats.rx_seq_discards);
|
2010-04-02 06:18:33 +00:00
|
|
|
goto discard;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we're the LAC and we're sending sequence numbers, the
|
|
|
|
* LNS has requested that we no longer send sequence numbers.
|
|
|
|
* If we're the LNS and we're sending sequence numbers, the
|
|
|
|
* LAC is broken. Discard the frame.
|
|
|
|
*/
|
2020-07-23 12:29:51 +01:00
|
|
|
if (!session->lns_mode && session->send_seq) {
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_session_seqnum_lns_disable(session);
|
2010-04-02 06:18:33 +00:00
|
|
|
session->send_seq = 0;
|
2024-07-29 16:38:10 +01:00
|
|
|
l2tp_session_set_header_len(session, tunnel->version,
|
|
|
|
tunnel->encap);
|
2010-04-02 06:18:33 +00:00
|
|
|
} else if (session->send_seq) {
|
2021-03-03 16:50:49 +01:00
|
|
|
pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
|
|
|
|
session->name);
|
2013-03-19 06:11:22 +00:00
|
|
|
atomic_long_inc(&session->stats.rx_seq_discards);
|
2010-04-02 06:18:33 +00:00
|
|
|
goto discard;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-03 22:48:06 +00:00
|
|
|
/* Session data offset is defined only for L2TPv2 and is
|
|
|
|
* indicated by an optional 16-bit value in the header.
|
2010-04-02 06:18:49 +00:00
|
|
|
*/
|
|
|
|
if (tunnel->version == L2TP_HDR_VER_2) {
|
|
|
|
/* If offset bit set, skip it. */
|
|
|
|
if (hdrflags & L2TP_HDRFLAG_O) {
|
|
|
|
offset = ntohs(*(__be16 *)ptr);
|
|
|
|
ptr += 2 + offset;
|
|
|
|
}
|
2018-01-03 22:48:06 +00:00
|
|
|
}
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
offset = ptr - optr;
|
|
|
|
if (!pskb_may_pull(skb, offset))
|
|
|
|
goto discard;
|
|
|
|
|
|
|
|
__skb_pull(skb, offset);
|
|
|
|
|
|
|
|
/* Prepare skb for adding to the session's reorder_q. Hold
|
|
|
|
* packets for max reorder_timeout or 1 second if not
|
|
|
|
* reordering.
|
|
|
|
*/
|
|
|
|
L2TP_SKB_CB(skb)->length = length;
|
|
|
|
L2TP_SKB_CB(skb)->expires = jiffies +
|
|
|
|
(session->reorder_timeout ? session->reorder_timeout : HZ);
|
|
|
|
|
|
|
|
/* Add packet to the session's receive queue. Reordering is done here, if
|
|
|
|
* enabled. Saved L2TP protocol info is stored in skb->sb[].
|
|
|
|
*/
|
|
|
|
if (L2TP_SKB_CB(skb)->has_seq) {
|
2013-07-02 20:28:58 +01:00
|
|
|
if (l2tp_recv_data_seq(session, skb))
|
|
|
|
goto discard;
|
2010-04-02 06:18:33 +00:00
|
|
|
} else {
|
|
|
|
/* No sequence numbers. Add the skb to the tail of the
|
|
|
|
* reorder queue. This ensures that it will be
|
|
|
|
* delivered after all previous sequenced skbs.
|
|
|
|
*/
|
|
|
|
skb_queue_tail(&session->reorder_q, skb);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Try to dequeue as many skbs from reorder_q as we can. */
|
|
|
|
l2tp_recv_dequeue(session);
|
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
return;
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
discard:
|
2013-03-19 06:11:22 +00:00
|
|
|
atomic_long_inc(&session->stats.rx_errors);
|
2010-04-02 06:18:33 +00:00
|
|
|
kfree_skb(skb);
|
2010-04-02 06:18:49 +00:00
|
|
|
}
|
2020-07-28 18:20:32 +01:00
|
|
|
EXPORT_SYMBOL_GPL(l2tp_recv_common);
|
2010-04-02 06:18:49 +00:00
|
|
|
|
2013-03-19 06:11:19 +00:00
|
|
|
/* Drop skbs from the session's reorder_q
|
|
|
|
*/
|
2020-07-24 16:31:55 +01:00
|
|
|
static void l2tp_session_queue_purge(struct l2tp_session *session)
|
2013-03-19 06:11:19 +00:00
|
|
|
{
|
|
|
|
struct sk_buff *skb = NULL;
|
2020-07-22 17:32:05 +01:00
|
|
|
|
2013-03-19 06:11:19 +00:00
|
|
|
while ((skb = skb_dequeue(&session->reorder_q))) {
|
|
|
|
atomic_long_inc(&session->stats.rx_errors);
|
|
|
|
kfree_skb(skb);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-20 12:22:40 +01:00
|
|
|
/* UDP encapsulation receive handler. See net/ipv4/udp.c for details. */
|
|
|
|
int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
2010-04-02 06:18:49 +00:00
|
|
|
{
|
|
|
|
struct l2tp_session *session = NULL;
|
2024-06-20 12:22:40 +01:00
|
|
|
struct l2tp_tunnel *tunnel = NULL;
|
|
|
|
struct net *net = sock_net(sk);
|
2010-04-02 06:18:49 +00:00
|
|
|
unsigned char *ptr, *optr;
|
|
|
|
u16 hdrflags;
|
|
|
|
u16 version;
|
|
|
|
int length;
|
|
|
|
|
2021-03-22 17:51:55 +05:30
|
|
|
/* UDP has verified checksum */
|
2010-04-02 06:18:49 +00:00
|
|
|
|
|
|
|
/* UDP always verifies the packet length. */
|
|
|
|
__skb_pull(skb, sizeof(struct udphdr));
|
|
|
|
|
|
|
|
/* Short packet? */
|
2024-06-20 12:22:40 +01:00
|
|
|
if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX))
|
|
|
|
goto pass;
|
2010-04-02 06:18:49 +00:00
|
|
|
|
2011-11-08 13:59:44 -05:00
|
|
|
/* Point to L2TP header */
|
2020-07-24 16:31:49 +01:00
|
|
|
optr = skb->data;
|
|
|
|
ptr = skb->data;
|
2011-11-08 13:59:44 -05:00
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
/* Get L2TP header flags */
|
2020-07-22 17:32:05 +01:00
|
|
|
hdrflags = ntohs(*(__be16 *)ptr);
|
2010-04-02 06:18:49 +00:00
|
|
|
|
2024-05-09 22:58:12 +02:00
|
|
|
/* Get protocol version */
|
2010-04-02 06:18:49 +00:00
|
|
|
version = hdrflags & L2TP_HDR_VER_MASK;
|
|
|
|
|
|
|
|
/* Get length of L2TP packet */
|
|
|
|
length = skb->len;
|
|
|
|
|
|
|
|
/* If type is control packet, it is handled by userspace. */
|
2020-08-22 15:59:01 +01:00
|
|
|
if (hdrflags & L2TP_HDRFLAG_T)
|
2021-03-03 16:50:49 +01:00
|
|
|
goto pass;
|
2010-04-02 06:18:49 +00:00
|
|
|
|
|
|
|
/* Skip flags */
|
|
|
|
ptr += 2;
|
|
|
|
|
2024-05-09 22:58:12 +02:00
|
|
|
if (version == L2TP_HDR_VER_2) {
|
2024-06-20 12:22:40 +01:00
|
|
|
u16 tunnel_id, session_id;
|
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
/* If length is present, skip it */
|
|
|
|
if (hdrflags & L2TP_HDRFLAG_L)
|
|
|
|
ptr += 2;
|
|
|
|
|
|
|
|
/* Extract tunnel and session ID */
|
2020-07-22 17:32:05 +01:00
|
|
|
tunnel_id = ntohs(*(__be16 *)ptr);
|
2010-04-02 06:18:49 +00:00
|
|
|
ptr += 2;
|
2020-07-22 17:32:05 +01:00
|
|
|
session_id = ntohs(*(__be16 *)ptr);
|
2010-04-02 06:18:49 +00:00
|
|
|
ptr += 2;
|
2024-06-20 12:22:40 +01:00
|
|
|
|
|
|
|
session = l2tp_v2_session_get(net, tunnel_id, session_id);
|
2010-04-02 06:18:49 +00:00
|
|
|
} else {
|
2024-06-20 12:22:40 +01:00
|
|
|
u32 session_id;
|
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
ptr += 2; /* skip reserved bits */
|
2020-07-22 17:32:05 +01:00
|
|
|
session_id = ntohl(*(__be32 *)ptr);
|
2010-04-02 06:18:49 +00:00
|
|
|
ptr += 4;
|
|
|
|
|
2024-06-20 12:22:40 +01:00
|
|
|
session = l2tp_v3_session_get(net, sk, session_id);
|
2024-05-09 22:58:12 +02:00
|
|
|
}
|
|
|
|
|
2010-04-02 06:19:10 +00:00
|
|
|
if (!session || !session->recv_skb) {
|
2017-10-31 17:36:42 +01:00
|
|
|
if (session)
|
2017-03-31 13:02:25 +02:00
|
|
|
l2tp_session_dec_refcount(session);
|
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
/* Not found? Pass to userspace to deal with */
|
2021-03-03 16:50:49 +01:00
|
|
|
goto pass;
|
2010-04-02 06:18:49 +00:00
|
|
|
}
|
|
|
|
|
2024-06-20 12:22:40 +01:00
|
|
|
tunnel = session->tunnel;
|
|
|
|
|
|
|
|
/* Check protocol version */
|
|
|
|
if (version != tunnel->version)
|
|
|
|
goto invalid;
|
|
|
|
|
|
|
|
if (version == L2TP_HDR_VER_3 &&
|
2021-09-09 12:32:00 +08:00
|
|
|
l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
|
|
|
|
l2tp_session_dec_refcount(session);
|
2021-03-03 16:50:49 +01:00
|
|
|
goto invalid;
|
2021-09-09 12:32:00 +08:00
|
|
|
}
|
2019-01-30 14:55:14 +08:00
|
|
|
|
2018-07-25 14:53:33 +02:00
|
|
|
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
|
2017-03-31 13:02:25 +02:00
|
|
|
l2tp_session_dec_refcount(session);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
2021-03-03 16:50:49 +01:00
|
|
|
invalid:
|
|
|
|
atomic_long_inc(&tunnel->stats.rx_invalid);
|
|
|
|
|
|
|
|
pass:
|
2010-04-02 06:18:33 +00:00
|
|
|
/* Put UDP header back */
|
|
|
|
__skb_push(skb, sizeof(struct udphdr));
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
|
|
|
|
|
2024-06-20 12:22:40 +01:00
|
|
|
/* UDP encapsulation receive error handler. See net/ipv4/udp.c for details. */
|
2024-05-13 18:22:47 +01:00
|
|
|
static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err,
|
|
|
|
__be16 port, u32 info, u8 *payload)
|
|
|
|
{
|
|
|
|
sk->sk_err = err;
|
|
|
|
sk_error_report(sk);
|
|
|
|
|
|
|
|
if (ip_hdr(skb)->version == IPVERSION) {
|
|
|
|
if (inet_test_bit(RECVERR, sk))
|
|
|
|
return ip_icmp_error(sk, skb, err, port, info, payload);
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
} else {
|
|
|
|
if (inet6_test_bit(RECVERR6, sk))
|
|
|
|
return ipv6_icmp_error(sk, skb, err, port, info, payload);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-04-02 06:18:33 +00:00
|
|
|
/************************************************************************
|
|
|
|
* Transmit handling
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
/* Build an L2TP header for the session into the buffer provided.
|
|
|
|
*/
|
2010-04-02 06:18:49 +00:00
|
|
|
static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
|
2010-04-02 06:18:33 +00:00
|
|
|
{
|
2010-04-02 06:18:49 +00:00
|
|
|
struct l2tp_tunnel *tunnel = session->tunnel;
|
2010-04-02 06:18:33 +00:00
|
|
|
__be16 *bufp = buf;
|
2010-04-02 06:18:49 +00:00
|
|
|
__be16 *optr = buf;
|
2010-04-02 06:18:33 +00:00
|
|
|
u16 flags = L2TP_HDR_VER_2;
|
|
|
|
u32 tunnel_id = tunnel->peer_tunnel_id;
|
|
|
|
u32 session_id = session->peer_session_id;
|
|
|
|
|
|
|
|
if (session->send_seq)
|
|
|
|
flags |= L2TP_HDRFLAG_S;
|
|
|
|
|
|
|
|
/* Setup L2TP header. */
|
|
|
|
*bufp++ = htons(flags);
|
|
|
|
*bufp++ = htons(tunnel_id);
|
|
|
|
*bufp++ = htons(session_id);
|
|
|
|
if (session->send_seq) {
|
|
|
|
*bufp++ = htons(session->ns);
|
|
|
|
*bufp++ = 0;
|
|
|
|
session->ns++;
|
2010-04-02 06:18:49 +00:00
|
|
|
session->ns &= 0xffff;
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_session_seqnum_update(session);
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
2010-04-02 06:18:49 +00:00
|
|
|
|
|
|
|
return bufp - optr;
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
|
2010-04-02 06:18:33 +00:00
|
|
|
{
|
2010-04-02 06:19:00 +00:00
|
|
|
struct l2tp_tunnel *tunnel = session->tunnel;
|
2010-04-02 06:18:49 +00:00
|
|
|
char *bufp = buf;
|
|
|
|
char *optr = bufp;
|
|
|
|
|
2010-04-02 06:19:00 +00:00
|
|
|
/* Setup L2TP header. The header differs slightly for UDP and
|
|
|
|
* IP encapsulations. For UDP, there is 4 bytes of flags.
|
|
|
|
*/
|
|
|
|
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
|
|
|
|
u16 flags = L2TP_HDR_VER_3;
|
2020-07-22 17:32:05 +01:00
|
|
|
*((__be16 *)bufp) = htons(flags);
|
2010-04-02 06:19:00 +00:00
|
|
|
bufp += 2;
|
2020-07-22 17:32:05 +01:00
|
|
|
*((__be16 *)bufp) = 0;
|
2010-04-02 06:19:00 +00:00
|
|
|
bufp += 2;
|
|
|
|
}
|
|
|
|
|
2020-07-22 17:32:05 +01:00
|
|
|
*((__be32 *)bufp) = htonl(session->peer_session_id);
|
2010-04-02 06:18:49 +00:00
|
|
|
bufp += 4;
|
|
|
|
if (session->cookie_len) {
|
|
|
|
memcpy(bufp, &session->cookie[0], session->cookie_len);
|
|
|
|
bufp += session->cookie_len;
|
|
|
|
}
|
2018-01-16 23:01:55 +01:00
|
|
|
if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
|
|
|
|
u32 l2h = 0;
|
2010-04-02 06:18:49 +00:00
|
|
|
|
2018-01-16 23:01:55 +01:00
|
|
|
if (session->send_seq) {
|
|
|
|
l2h = 0x40000000 | session->ns;
|
|
|
|
session->ns++;
|
|
|
|
session->ns &= 0xffffff;
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_session_seqnum_update(session);
|
2010-04-02 06:18:49 +00:00
|
|
|
}
|
2018-01-16 23:01:55 +01:00
|
|
|
|
|
|
|
*((__be32 *)bufp) = htonl(l2h);
|
|
|
|
bufp += 4;
|
2010-04-02 06:18:49 +00:00
|
|
|
}
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
return bufp - optr;
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
2020-09-03 09:54:50 +01:00
|
|
|
/* Queue the packet to IP for output: tunnel socket lock must be held */
|
|
|
|
static int l2tp_xmit_queue(struct l2tp_tunnel *tunnel, struct sk_buff *skb, struct flowi *fl)
|
2010-04-02 06:18:33 +00:00
|
|
|
{
|
2020-09-03 09:54:50 +01:00
|
|
|
int err;
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2014-05-04 16:39:18 -07:00
|
|
|
skb->ignore_df = 1;
|
2020-07-07 02:02:32 +08:00
|
|
|
skb_dst_drop(skb);
|
2012-04-27 08:24:18 +00:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
2018-03-12 14:54:24 +01:00
|
|
|
if (l2tp_sk_is_v6(tunnel->sock))
|
2020-09-03 09:54:50 +01:00
|
|
|
err = inet6_csk_xmit(tunnel->sock, skb, NULL);
|
2012-04-27 08:24:18 +00:00
|
|
|
else
|
|
|
|
#endif
|
2020-09-03 09:54:50 +01:00
|
|
|
err = ip_queue_xmit(tunnel->sock, skb, fl);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2020-09-03 09:54:50 +01:00
|
|
|
return err >= 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
2020-09-18 11:23:21 +01:00
|
|
|
static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, unsigned int *len)
|
2010-04-02 06:18:33 +00:00
|
|
|
{
|
2010-04-02 06:19:00 +00:00
|
|
|
struct l2tp_tunnel *tunnel = session->tunnel;
|
2020-09-03 09:54:50 +01:00
|
|
|
unsigned int data_len = skb->len;
|
2010-04-02 06:19:00 +00:00
|
|
|
struct sock *sk = tunnel->sock;
|
2020-09-03 09:54:50 +01:00
|
|
|
int headroom, uhlen, udp_len;
|
2012-06-28 20:15:13 +00:00
|
|
|
int ret = NET_XMIT_SUCCESS;
|
2020-09-03 09:54:50 +01:00
|
|
|
struct inet_sock *inet;
|
|
|
|
struct udphdr *uh;
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
/* Check that there's enough headroom in the skb to insert IP,
|
|
|
|
* UDP and L2TP headers. If not enough, expand it to
|
|
|
|
* make room. Adjust truesize.
|
|
|
|
*/
|
2020-09-03 09:54:50 +01:00
|
|
|
uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(*uh) : 0;
|
|
|
|
headroom = NET_SKB_PAD + sizeof(struct iphdr) + uhlen + session->hdr_len;
|
2011-10-07 05:35:46 +00:00
|
|
|
if (skb_cow_head(skb, headroom)) {
|
2012-06-28 20:15:13 +00:00
|
|
|
kfree_skb(skb);
|
|
|
|
return NET_XMIT_DROP;
|
2011-10-07 05:35:46 +00:00
|
|
|
}
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
/* Setup L2TP header */
|
2020-07-28 18:20:31 +01:00
|
|
|
if (tunnel->version == L2TP_HDR_VER_2)
|
2020-09-03 09:54:47 +01:00
|
|
|
l2tp_build_l2tpv2_header(session, __skb_push(skb, session->hdr_len));
|
2020-07-28 18:20:31 +01:00
|
|
|
else
|
2020-09-03 09:54:47 +01:00
|
|
|
l2tp_build_l2tpv3_header(session, __skb_push(skb, session->hdr_len));
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2010-04-02 06:19:00 +00:00
|
|
|
/* Reset skb netfilter state */
|
2010-04-02 06:18:33 +00:00
|
|
|
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
|
2020-09-03 09:54:50 +01:00
|
|
|
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED);
|
2019-09-29 20:54:03 +02:00
|
|
|
nf_reset_ct(skb);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
l2tp: fix lockdep splat
When l2tp tunnels use a socket provided by userspace, we can hit
lockdep splats like the below when data is transmitted through another
(unrelated) userspace socket which then gets routed over l2tp.
This issue was previously discussed here:
https://lore.kernel.org/netdev/87sfialu2n.fsf@cloudflare.com/
The solution is to have lockdep treat socket locks of l2tp tunnel
sockets separately than those of standard INET sockets. To do so, use
a different lockdep subclass where lock nesting is possible.
============================================
WARNING: possible recursive locking detected
6.10.0+ #34 Not tainted
--------------------------------------------
iperf3/771 is trying to acquire lock:
ffff8881027601d8 (slock-AF_INET/1){+.-.}-{2:2}, at: l2tp_xmit_skb+0x243/0x9d0
but task is already holding lock:
ffff888102650d98 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1848/0x1e10
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(slock-AF_INET/1);
lock(slock-AF_INET/1);
*** DEADLOCK ***
May be due to missing lock nesting notation
10 locks held by iperf3/771:
#0: ffff888102650258 (sk_lock-AF_INET){+.+.}-{0:0}, at: tcp_sendmsg+0x1a/0x40
#1: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x4b/0xbc0
#2: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x17a/0x1130
#3: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: process_backlog+0x28b/0x9f0
#4: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0xf9/0x260
#5: ffff888102650d98 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1848/0x1e10
#6: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x4b/0xbc0
#7: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x17a/0x1130
#8: ffffffff822ac1e0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0xcc/0x1450
#9: ffff888101f33258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock#2){+...}-{2:2}, at: __dev_queue_xmit+0x513/0x1450
stack backtrace:
CPU: 2 UID: 0 PID: 771 Comm: iperf3 Not tainted 6.10.0+ #34
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Call Trace:
<IRQ>
dump_stack_lvl+0x69/0xa0
dump_stack+0xc/0x20
__lock_acquire+0x135d/0x2600
? srso_alias_return_thunk+0x5/0xfbef5
lock_acquire+0xc4/0x2a0
? l2tp_xmit_skb+0x243/0x9d0
? __skb_checksum+0xa3/0x540
_raw_spin_lock_nested+0x35/0x50
? l2tp_xmit_skb+0x243/0x9d0
l2tp_xmit_skb+0x243/0x9d0
l2tp_eth_dev_xmit+0x3c/0xc0
dev_hard_start_xmit+0x11e/0x420
sch_direct_xmit+0xc3/0x640
__dev_queue_xmit+0x61c/0x1450
? ip_finish_output2+0xf4c/0x1130
ip_finish_output2+0x6b6/0x1130
? srso_alias_return_thunk+0x5/0xfbef5
? __ip_finish_output+0x217/0x380
? srso_alias_return_thunk+0x5/0xfbef5
__ip_finish_output+0x217/0x380
ip_output+0x99/0x120
__ip_queue_xmit+0xae4/0xbc0
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? tcp_options_write.constprop.0+0xcb/0x3e0
ip_queue_xmit+0x34/0x40
__tcp_transmit_skb+0x1625/0x1890
__tcp_send_ack+0x1b8/0x340
tcp_send_ack+0x23/0x30
__tcp_ack_snd_check+0xa8/0x530
? srso_alias_return_thunk+0x5/0xfbef5
tcp_rcv_established+0x412/0xd70
tcp_v4_do_rcv+0x299/0x420
tcp_v4_rcv+0x1991/0x1e10
ip_protocol_deliver_rcu+0x50/0x220
ip_local_deliver_finish+0x158/0x260
ip_local_deliver+0xc8/0xe0
ip_rcv+0xe5/0x1d0
? __pfx_ip_rcv+0x10/0x10
__netif_receive_skb_one_core+0xce/0xe0
? process_backlog+0x28b/0x9f0
__netif_receive_skb+0x34/0xd0
? process_backlog+0x28b/0x9f0
process_backlog+0x2cb/0x9f0
__napi_poll.constprop.0+0x61/0x280
net_rx_action+0x332/0x670
? srso_alias_return_thunk+0x5/0xfbef5
? find_held_lock+0x2b/0x80
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
handle_softirqs+0xda/0x480
? __dev_queue_xmit+0xa2c/0x1450
do_softirq+0xa1/0xd0
</IRQ>
<TASK>
__local_bh_enable_ip+0xc8/0xe0
? __dev_queue_xmit+0xa2c/0x1450
__dev_queue_xmit+0xa48/0x1450
? ip_finish_output2+0xf4c/0x1130
ip_finish_output2+0x6b6/0x1130
? srso_alias_return_thunk+0x5/0xfbef5
? __ip_finish_output+0x217/0x380
? srso_alias_return_thunk+0x5/0xfbef5
__ip_finish_output+0x217/0x380
ip_output+0x99/0x120
__ip_queue_xmit+0xae4/0xbc0
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? tcp_options_write.constprop.0+0xcb/0x3e0
ip_queue_xmit+0x34/0x40
__tcp_transmit_skb+0x1625/0x1890
tcp_write_xmit+0x766/0x2fb0
? __entry_text_end+0x102ba9/0x102bad
? srso_alias_return_thunk+0x5/0xfbef5
? __might_fault+0x74/0xc0
? srso_alias_return_thunk+0x5/0xfbef5
__tcp_push_pending_frames+0x56/0x190
tcp_push+0x117/0x310
tcp_sendmsg_locked+0x14c1/0x1740
tcp_sendmsg+0x28/0x40
inet_sendmsg+0x5d/0x90
sock_write_iter+0x242/0x2b0
vfs_write+0x68d/0x800
? __pfx_sock_write_iter+0x10/0x10
ksys_write+0xc8/0xf0
__x64_sys_write+0x3d/0x50
x64_sys_call+0xfaf/0x1f50
do_syscall_64+0x6d/0x140
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f4d143af992
Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 89 5c 24 08 0f 05 <c3> e9 01 cc ff ff 41 54 b8 02 00 00 0
RSP: 002b:00007ffd65032058 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f4d143af992
RDX: 0000000000000025 RSI: 00007f4d143f3bcc RDI: 0000000000000005
RBP: 00007f4d143f2b28 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4d143f3bcc
R13: 0000000000000005 R14: 0000000000000000 R15: 00007ffd650323f0
</TASK>
Fixes: 0b2c59720e65 ("l2tp: close all race conditions in l2tp_tunnel_register()")
Suggested-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+6acef9e0a4d1f46c83d4@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=6acef9e0a4d1f46c83d4
CC: gnault@redhat.com
CC: cong.wang@bytedance.com
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: Tom Parkin <tparkin@katalix.com>
Link: https://patch.msgid.link/20240806160626.1248317-1-jchapman@katalix.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-08-06 17:06:26 +01:00
|
|
|
/* L2TP uses its own lockdep subclass to avoid lockdep splats caused by
|
|
|
|
* nested socket calls on the same lockdep socket class. This can
|
|
|
|
* happen when data from a user socket is routed over l2tp, which uses
|
|
|
|
* another userspace socket.
|
|
|
|
*/
|
|
|
|
spin_lock_nested(&sk->sk_lock.slock, L2TP_DEPTH_NESTING);
|
|
|
|
|
2011-05-08 13:45:20 -07:00
|
|
|
if (sock_owned_by_user(sk)) {
|
2012-06-28 20:15:13 +00:00
|
|
|
kfree_skb(skb);
|
|
|
|
ret = NET_XMIT_DROP;
|
2011-05-08 13:45:20 -07:00
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
2018-03-12 14:54:24 +01:00
|
|
|
/* The user-space may change the connection status for the user-space
|
|
|
|
* provided socket at run time: we must check it under the socket lock
|
|
|
|
*/
|
|
|
|
if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) {
|
|
|
|
kfree_skb(skb);
|
|
|
|
ret = NET_XMIT_DROP;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
2020-09-18 11:23:21 +01:00
|
|
|
/* Report transmitted length before we add encap header, which keeps
|
|
|
|
* statistics consistent for both UDP and IP encap tx/rx paths.
|
|
|
|
*/
|
|
|
|
*len = skb->len;
|
|
|
|
|
2011-05-06 22:23:20 -07:00
|
|
|
inet = inet_sk(sk);
|
2010-04-02 06:19:00 +00:00
|
|
|
switch (tunnel->encap) {
|
|
|
|
case L2TP_ENCAPTYPE_UDP:
|
|
|
|
/* Setup UDP header */
|
|
|
|
__skb_push(skb, sizeof(*uh));
|
|
|
|
skb_reset_transport_header(skb);
|
|
|
|
uh = udp_hdr(skb);
|
|
|
|
uh->source = inet->inet_sport;
|
|
|
|
uh->dest = inet->inet_dport;
|
2020-09-03 09:54:47 +01:00
|
|
|
udp_len = uhlen + session->hdr_len + data_len;
|
2010-04-02 06:19:00 +00:00
|
|
|
uh->len = htons(udp_len);
|
|
|
|
|
|
|
|
/* Calculate UDP checksum if configured to do so */
|
2012-04-27 08:24:18 +00:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
2018-03-12 14:54:24 +01:00
|
|
|
if (l2tp_sk_is_v6(sk))
|
2014-06-04 17:19:56 -07:00
|
|
|
udp6_set_csum(udp_get_no_check6_tx(sk),
|
|
|
|
skb, &inet6_sk(sk)->saddr,
|
|
|
|
&sk->sk_v6_daddr, udp_len);
|
2012-04-27 08:24:18 +00:00
|
|
|
else
|
|
|
|
#endif
|
2020-07-22 17:32:09 +01:00
|
|
|
udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr,
|
|
|
|
inet->inet_daddr, udp_len);
|
2010-04-02 06:19:00 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case L2TP_ENCAPTYPE_IP:
|
|
|
|
break;
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
2020-09-03 09:54:50 +01:00
|
|
|
ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl);
|
|
|
|
|
2011-05-08 13:45:20 -07:00
|
|
|
out_unlock:
|
l2tp: fix lockdep splat
When l2tp tunnels use a socket provided by userspace, we can hit
lockdep splats like the below when data is transmitted through another
(unrelated) userspace socket which then gets routed over l2tp.
This issue was previously discussed here:
https://lore.kernel.org/netdev/87sfialu2n.fsf@cloudflare.com/
The solution is to have lockdep treat socket locks of l2tp tunnel
sockets separately than those of standard INET sockets. To do so, use
a different lockdep subclass where lock nesting is possible.
============================================
WARNING: possible recursive locking detected
6.10.0+ #34 Not tainted
--------------------------------------------
iperf3/771 is trying to acquire lock:
ffff8881027601d8 (slock-AF_INET/1){+.-.}-{2:2}, at: l2tp_xmit_skb+0x243/0x9d0
but task is already holding lock:
ffff888102650d98 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1848/0x1e10
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(slock-AF_INET/1);
lock(slock-AF_INET/1);
*** DEADLOCK ***
May be due to missing lock nesting notation
10 locks held by iperf3/771:
#0: ffff888102650258 (sk_lock-AF_INET){+.+.}-{0:0}, at: tcp_sendmsg+0x1a/0x40
#1: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x4b/0xbc0
#2: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x17a/0x1130
#3: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: process_backlog+0x28b/0x9f0
#4: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0xf9/0x260
#5: ffff888102650d98 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1848/0x1e10
#6: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x4b/0xbc0
#7: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x17a/0x1130
#8: ffffffff822ac1e0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0xcc/0x1450
#9: ffff888101f33258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock#2){+...}-{2:2}, at: __dev_queue_xmit+0x513/0x1450
stack backtrace:
CPU: 2 UID: 0 PID: 771 Comm: iperf3 Not tainted 6.10.0+ #34
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Call Trace:
<IRQ>
dump_stack_lvl+0x69/0xa0
dump_stack+0xc/0x20
__lock_acquire+0x135d/0x2600
? srso_alias_return_thunk+0x5/0xfbef5
lock_acquire+0xc4/0x2a0
? l2tp_xmit_skb+0x243/0x9d0
? __skb_checksum+0xa3/0x540
_raw_spin_lock_nested+0x35/0x50
? l2tp_xmit_skb+0x243/0x9d0
l2tp_xmit_skb+0x243/0x9d0
l2tp_eth_dev_xmit+0x3c/0xc0
dev_hard_start_xmit+0x11e/0x420
sch_direct_xmit+0xc3/0x640
__dev_queue_xmit+0x61c/0x1450
? ip_finish_output2+0xf4c/0x1130
ip_finish_output2+0x6b6/0x1130
? srso_alias_return_thunk+0x5/0xfbef5
? __ip_finish_output+0x217/0x380
? srso_alias_return_thunk+0x5/0xfbef5
__ip_finish_output+0x217/0x380
ip_output+0x99/0x120
__ip_queue_xmit+0xae4/0xbc0
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? tcp_options_write.constprop.0+0xcb/0x3e0
ip_queue_xmit+0x34/0x40
__tcp_transmit_skb+0x1625/0x1890
__tcp_send_ack+0x1b8/0x340
tcp_send_ack+0x23/0x30
__tcp_ack_snd_check+0xa8/0x530
? srso_alias_return_thunk+0x5/0xfbef5
tcp_rcv_established+0x412/0xd70
tcp_v4_do_rcv+0x299/0x420
tcp_v4_rcv+0x1991/0x1e10
ip_protocol_deliver_rcu+0x50/0x220
ip_local_deliver_finish+0x158/0x260
ip_local_deliver+0xc8/0xe0
ip_rcv+0xe5/0x1d0
? __pfx_ip_rcv+0x10/0x10
__netif_receive_skb_one_core+0xce/0xe0
? process_backlog+0x28b/0x9f0
__netif_receive_skb+0x34/0xd0
? process_backlog+0x28b/0x9f0
process_backlog+0x2cb/0x9f0
__napi_poll.constprop.0+0x61/0x280
net_rx_action+0x332/0x670
? srso_alias_return_thunk+0x5/0xfbef5
? find_held_lock+0x2b/0x80
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
handle_softirqs+0xda/0x480
? __dev_queue_xmit+0xa2c/0x1450
do_softirq+0xa1/0xd0
</IRQ>
<TASK>
__local_bh_enable_ip+0xc8/0xe0
? __dev_queue_xmit+0xa2c/0x1450
__dev_queue_xmit+0xa48/0x1450
? ip_finish_output2+0xf4c/0x1130
ip_finish_output2+0x6b6/0x1130
? srso_alias_return_thunk+0x5/0xfbef5
? __ip_finish_output+0x217/0x380
? srso_alias_return_thunk+0x5/0xfbef5
__ip_finish_output+0x217/0x380
ip_output+0x99/0x120
__ip_queue_xmit+0xae4/0xbc0
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? tcp_options_write.constprop.0+0xcb/0x3e0
ip_queue_xmit+0x34/0x40
__tcp_transmit_skb+0x1625/0x1890
tcp_write_xmit+0x766/0x2fb0
? __entry_text_end+0x102ba9/0x102bad
? srso_alias_return_thunk+0x5/0xfbef5
? __might_fault+0x74/0xc0
? srso_alias_return_thunk+0x5/0xfbef5
__tcp_push_pending_frames+0x56/0x190
tcp_push+0x117/0x310
tcp_sendmsg_locked+0x14c1/0x1740
tcp_sendmsg+0x28/0x40
inet_sendmsg+0x5d/0x90
sock_write_iter+0x242/0x2b0
vfs_write+0x68d/0x800
? __pfx_sock_write_iter+0x10/0x10
ksys_write+0xc8/0xf0
__x64_sys_write+0x3d/0x50
x64_sys_call+0xfaf/0x1f50
do_syscall_64+0x6d/0x140
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f4d143af992
Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 89 5c 24 08 0f 05 <c3> e9 01 cc ff ff 41 54 b8 02 00 00 0
RSP: 002b:00007ffd65032058 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f4d143af992
RDX: 0000000000000025 RSI: 00007f4d143f3bcc RDI: 0000000000000005
RBP: 00007f4d143f2b28 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4d143f3bcc
R13: 0000000000000005 R14: 0000000000000000 R15: 00007ffd650323f0
</TASK>
Fixes: 0b2c59720e65 ("l2tp: close all race conditions in l2tp_tunnel_register()")
Suggested-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+6acef9e0a4d1f46c83d4@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=6acef9e0a4d1f46c83d4
CC: gnault@redhat.com
CC: cong.wang@bytedance.com
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: Tom Parkin <tparkin@katalix.com>
Link: https://patch.msgid.link/20240806160626.1248317-1-jchapman@katalix.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-08-06 17:06:26 +01:00
|
|
|
spin_unlock(&sk->sk_lock.slock);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2012-06-28 20:15:13 +00:00
|
|
|
return ret;
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
2020-09-03 09:54:50 +01:00
|
|
|
|
|
|
|
/* If caller requires the skb to have a ppp header, the header must be
|
|
|
|
* inserted in the skb data before calling this function.
|
|
|
|
*/
|
|
|
|
int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb)
|
|
|
|
{
|
2020-09-18 11:23:21 +01:00
|
|
|
unsigned int len = 0;
|
2020-09-03 09:54:50 +01:00
|
|
|
int ret;
|
|
|
|
|
2020-09-18 11:23:21 +01:00
|
|
|
ret = l2tp_xmit_core(session, skb, &len);
|
2020-09-03 09:54:50 +01:00
|
|
|
if (ret == NET_XMIT_SUCCESS) {
|
|
|
|
atomic_long_inc(&session->tunnel->stats.tx_packets);
|
|
|
|
atomic_long_add(len, &session->tunnel->stats.tx_bytes);
|
|
|
|
atomic_long_inc(&session->stats.tx_packets);
|
|
|
|
atomic_long_add(len, &session->stats.tx_bytes);
|
|
|
|
} else {
|
|
|
|
atomic_long_inc(&session->tunnel->stats.tx_errors);
|
|
|
|
atomic_long_inc(&session->stats.tx_errors);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
2010-04-02 06:18:33 +00:00
|
|
|
EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
|
|
* Tinnel and session create/destroy.
|
|
|
|
*****************************************************************************/
|
|
|
|
|
2024-06-20 12:22:44 +01:00
|
|
|
/* Remove an l2tp session from l2tp_core's lists. */
|
2020-07-28 18:20:28 +01:00
|
|
|
static void l2tp_session_unhash(struct l2tp_session *session)
|
|
|
|
{
|
|
|
|
struct l2tp_tunnel *tunnel = session->tunnel;
|
|
|
|
|
|
|
|
if (tunnel) {
|
2024-06-20 12:22:39 +01:00
|
|
|
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
|
|
|
|
struct l2tp_session *removed = session;
|
|
|
|
|
2024-06-20 12:22:44 +01:00
|
|
|
spin_lock_bh(&tunnel->list_lock);
|
2024-07-18 14:43:48 +01:00
|
|
|
spin_lock_bh(&pn->l2tp_session_idr_lock);
|
|
|
|
|
|
|
|
/* Remove from the per-tunnel list */
|
2024-06-20 12:22:44 +01:00
|
|
|
list_del_init(&session->list);
|
2020-07-28 18:20:28 +01:00
|
|
|
|
2024-06-20 12:22:39 +01:00
|
|
|
/* Remove from per-net IDR */
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
if (tunnel->version == L2TP_HDR_VER_3) {
|
|
|
|
if (hash_hashed(&session->hlist))
|
|
|
|
l2tp_session_collision_del(pn, session);
|
|
|
|
else
|
|
|
|
removed = idr_remove(&pn->l2tp_v3_session_idr,
|
|
|
|
session->session_id);
|
2024-06-20 12:22:39 +01:00
|
|
|
} else {
|
|
|
|
u32 session_key = l2tp_v2_session_key(tunnel->tunnel_id,
|
|
|
|
session->session_id);
|
|
|
|
removed = idr_remove(&pn->l2tp_v2_session_idr,
|
|
|
|
session_key);
|
2020-07-28 18:20:28 +01:00
|
|
|
}
|
2024-06-20 12:22:39 +01:00
|
|
|
WARN_ON_ONCE(removed && removed != session);
|
2024-07-18 14:43:48 +01:00
|
|
|
|
2024-06-20 12:22:39 +01:00
|
|
|
spin_unlock_bh(&pn->l2tp_session_idr_lock);
|
2024-07-18 14:43:48 +01:00
|
|
|
spin_unlock_bh(&tunnel->list_lock);
|
2020-07-28 18:20:28 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-04-02 06:18:33 +00:00
|
|
|
/* When the tunnel is closed, all the attached sessions need to go too.
|
|
|
|
*/
|
2018-06-25 16:07:23 +02:00
|
|
|
static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
|
2010-04-02 06:18:33 +00:00
|
|
|
{
|
|
|
|
struct l2tp_session *session;
|
|
|
|
|
2024-06-20 12:22:44 +01:00
|
|
|
spin_lock_bh(&tunnel->list_lock);
|
2017-09-01 17:58:48 +02:00
|
|
|
tunnel->acpt_newsess = false;
|
2024-07-29 16:38:07 +01:00
|
|
|
list_for_each_entry(session, &tunnel->session_list, list)
|
2024-06-20 12:22:44 +01:00
|
|
|
l2tp_session_delete(session);
|
|
|
|
spin_unlock_bh(&tunnel->list_lock);
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
2013-03-19 06:11:13 +00:00
|
|
|
/* Tunnel socket destroy hook for UDP encapsulation */
|
|
|
|
static void l2tp_udp_encap_destroy(struct sock *sk)
|
|
|
|
{
|
2024-07-29 16:38:00 +01:00
|
|
|
struct l2tp_tunnel *tunnel;
|
l2tp: fix races with tunnel socket close
The tunnel socket tunnel->sock (struct sock) is accessed when
preparing a new ppp session on a tunnel at pppol2tp_session_init. If
the socket is closed by a thread while another is creating a new
session, the threads race. In pppol2tp_connect, the tunnel object may
be created if the pppol2tp socket is associated with the special
session_id 0 and the tunnel socket is looked up using the provided
fd. When handling this, pppol2tp_connect cannot sock_hold the tunnel
socket to prevent it being destroyed during pppol2tp_connect since
this may itself may race with the socket being destroyed. Doing
sockfd_lookup in pppol2tp_connect isn't sufficient to prevent
tunnel->sock going away either because a given tunnel socket fd may be
reused between calls to pppol2tp_connect. Instead, have
l2tp_tunnel_create sock_hold the tunnel socket before it does
sockfd_put. This ensures that the tunnel's socket is always extant
while the tunnel object exists. Hold a ref on the socket until the
tunnel is destroyed and ensure that all tunnel destroy paths go
through a common function (l2tp_tunnel_delete) since this will do the
final sock_put to release the tunnel socket.
Since the tunnel's socket is now guaranteed to exist if the tunnel
exists, we no longer need to use sockfd_lookup via l2tp_sock_to_tunnel
to derive the tunnel from the socket since this is always
sk_user_data.
Also, sessions no longer sock_hold the tunnel socket since sessions
already hold a tunnel ref and the tunnel sock will not be freed until
the tunnel is freed. Removing these sock_holds in
l2tp_session_register avoids a possible sock leak in the
pppol2tp_connect error path if l2tp_session_register succeeds but
attaching a ppp channel fails. The pppol2tp_connect error path could
have been fixed instead and have the sock ref dropped when the session
is freed, but doing a sock_put of the tunnel socket when the session
is freed would require a new session_free callback. It is simpler to
just remove the sock_hold of the tunnel socket in
l2tp_session_register, now that the tunnel socket lifetime is
guaranteed.
Finally, some init code in l2tp_tunnel_create is reordered to ensure
that the new tunnel object's refcount is set and the tunnel socket ref
is taken before the tunnel socket destructor callbacks are set.
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 0 PID: 4360 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #34
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:pppol2tp_session_init+0x1d6/0x500
RSP: 0018:ffff88001377fb40 EFLAGS: 00010212
RAX: dffffc0000000000 RBX: ffff88001636a940 RCX: ffffffff84836c1d
RDX: 0000000000000045 RSI: 0000000055976744 RDI: 0000000000000228
RBP: ffff88001377fb60 R08: ffffffff84836bc8 R09: 0000000000000002
R10: ffff88001377fab8 R11: 0000000000000001 R12: 0000000000000000
R13: ffff88001636aac8 R14: ffff8800160f81c0 R15: 1ffff100026eff76
FS: 00007ffb3ea66700(0000) GS:ffff88001a400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020e77000 CR3: 0000000016261000 CR4: 00000000000006f0
Call Trace:
pppol2tp_connect+0xd18/0x13c0
? pppol2tp_session_create+0x170/0x170
? __might_fault+0x115/0x1d0
? lock_downgrade+0x860/0x860
? __might_fault+0xe5/0x1d0
? security_socket_connect+0x8e/0xc0
SYSC_connect+0x1b6/0x310
? SYSC_bind+0x280/0x280
? __do_page_fault+0x5d1/0xca0
? up_read+0x1f/0x40
? __do_page_fault+0x3c8/0xca0
SyS_connect+0x29/0x30
? SyS_accept+0x40/0x40
do_syscall_64+0x1e0/0x730
? trace_hardirqs_off_thunk+0x1a/0x1c
entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7ffb3e376259
RSP: 002b:00007ffeda4f6508 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000020e77012 RCX: 00007ffb3e376259
RDX: 000000000000002e RSI: 0000000020e77000 RDI: 0000000000000004
RBP: 00007ffeda4f6540 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60
R13: 00007ffeda4f6660 R14: 0000000000000000 R15: 0000000000000000
Code: 80 3d b0 ff 06 02 00 0f 84 07 02 00 00 e8 13 d6 db fc 49 8d bc 24 28 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 f
a 48 c1 ea 03 <80> 3c 02 00 0f 85 ed 02 00 00 4d 8b a4 24 28 02 00 00 e8 13 16
Fixes: 80d84ef3ff1dd ("l2tp: prevent l2tp_tunnel_delete racing with userspace close")
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-23 17:45:45 +00:00
|
|
|
|
2024-07-29 16:38:00 +01:00
|
|
|
tunnel = l2tp_sk_to_tunnel(sk);
|
|
|
|
if (tunnel) {
|
l2tp: fix races with tunnel socket close
The tunnel socket tunnel->sock (struct sock) is accessed when
preparing a new ppp session on a tunnel at pppol2tp_session_init. If
the socket is closed by a thread while another is creating a new
session, the threads race. In pppol2tp_connect, the tunnel object may
be created if the pppol2tp socket is associated with the special
session_id 0 and the tunnel socket is looked up using the provided
fd. When handling this, pppol2tp_connect cannot sock_hold the tunnel
socket to prevent it being destroyed during pppol2tp_connect since
this may itself may race with the socket being destroyed. Doing
sockfd_lookup in pppol2tp_connect isn't sufficient to prevent
tunnel->sock going away either because a given tunnel socket fd may be
reused between calls to pppol2tp_connect. Instead, have
l2tp_tunnel_create sock_hold the tunnel socket before it does
sockfd_put. This ensures that the tunnel's socket is always extant
while the tunnel object exists. Hold a ref on the socket until the
tunnel is destroyed and ensure that all tunnel destroy paths go
through a common function (l2tp_tunnel_delete) since this will do the
final sock_put to release the tunnel socket.
Since the tunnel's socket is now guaranteed to exist if the tunnel
exists, we no longer need to use sockfd_lookup via l2tp_sock_to_tunnel
to derive the tunnel from the socket since this is always
sk_user_data.
Also, sessions no longer sock_hold the tunnel socket since sessions
already hold a tunnel ref and the tunnel sock will not be freed until
the tunnel is freed. Removing these sock_holds in
l2tp_session_register avoids a possible sock leak in the
pppol2tp_connect error path if l2tp_session_register succeeds but
attaching a ppp channel fails. The pppol2tp_connect error path could
have been fixed instead and have the sock ref dropped when the session
is freed, but doing a sock_put of the tunnel socket when the session
is freed would require a new session_free callback. It is simpler to
just remove the sock_hold of the tunnel socket in
l2tp_session_register, now that the tunnel socket lifetime is
guaranteed.
Finally, some init code in l2tp_tunnel_create is reordered to ensure
that the new tunnel object's refcount is set and the tunnel socket ref
is taken before the tunnel socket destructor callbacks are set.
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 0 PID: 4360 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #34
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:pppol2tp_session_init+0x1d6/0x500
RSP: 0018:ffff88001377fb40 EFLAGS: 00010212
RAX: dffffc0000000000 RBX: ffff88001636a940 RCX: ffffffff84836c1d
RDX: 0000000000000045 RSI: 0000000055976744 RDI: 0000000000000228
RBP: ffff88001377fb60 R08: ffffffff84836bc8 R09: 0000000000000002
R10: ffff88001377fab8 R11: 0000000000000001 R12: 0000000000000000
R13: ffff88001636aac8 R14: ffff8800160f81c0 R15: 1ffff100026eff76
FS: 00007ffb3ea66700(0000) GS:ffff88001a400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020e77000 CR3: 0000000016261000 CR4: 00000000000006f0
Call Trace:
pppol2tp_connect+0xd18/0x13c0
? pppol2tp_session_create+0x170/0x170
? __might_fault+0x115/0x1d0
? lock_downgrade+0x860/0x860
? __might_fault+0xe5/0x1d0
? security_socket_connect+0x8e/0xc0
SYSC_connect+0x1b6/0x310
? SYSC_bind+0x280/0x280
? __do_page_fault+0x5d1/0xca0
? up_read+0x1f/0x40
? __do_page_fault+0x3c8/0xca0
SyS_connect+0x29/0x30
? SyS_accept+0x40/0x40
do_syscall_64+0x1e0/0x730
? trace_hardirqs_off_thunk+0x1a/0x1c
entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7ffb3e376259
RSP: 002b:00007ffeda4f6508 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000020e77012 RCX: 00007ffb3e376259
RDX: 000000000000002e RSI: 0000000020e77000 RDI: 0000000000000004
RBP: 00007ffeda4f6540 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60
R13: 00007ffeda4f6660 R14: 0000000000000000 R15: 0000000000000000
Code: 80 3d b0 ff 06 02 00 0f 84 07 02 00 00 e8 13 d6 db fc 49 8d bc 24 28 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 f
a 48 c1 ea 03 <80> 3c 02 00 0f 85 ed 02 00 00 4d 8b a4 24 28 02 00 00 e8 13 16
Fixes: 80d84ef3ff1dd ("l2tp: prevent l2tp_tunnel_delete racing with userspace close")
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-23 17:45:45 +00:00
|
|
|
l2tp_tunnel_delete(tunnel);
|
2024-07-29 16:38:00 +01:00
|
|
|
l2tp_tunnel_dec_refcount(tunnel);
|
|
|
|
}
|
2013-03-19 06:11:13 +00:00
|
|
|
}
|
|
|
|
|
2023-01-13 19:01:36 -08:00
|
|
|
static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel)
|
|
|
|
{
|
|
|
|
struct l2tp_net *pn = l2tp_pernet(net);
|
|
|
|
|
|
|
|
spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
|
|
|
|
idr_remove(&pn->l2tp_tunnel_idr, tunnel->tunnel_id);
|
|
|
|
spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
|
|
|
|
}
|
|
|
|
|
2013-01-31 23:43:00 +00:00
|
|
|
/* Workqueue tunnel deletion function */
|
|
|
|
static void l2tp_tunnel_del_work(struct work_struct *work)
|
|
|
|
{
|
l2tp: fix races with tunnel socket close
The tunnel socket tunnel->sock (struct sock) is accessed when
preparing a new ppp session on a tunnel at pppol2tp_session_init. If
the socket is closed by a thread while another is creating a new
session, the threads race. In pppol2tp_connect, the tunnel object may
be created if the pppol2tp socket is associated with the special
session_id 0 and the tunnel socket is looked up using the provided
fd. When handling this, pppol2tp_connect cannot sock_hold the tunnel
socket to prevent it being destroyed during pppol2tp_connect since
this may itself may race with the socket being destroyed. Doing
sockfd_lookup in pppol2tp_connect isn't sufficient to prevent
tunnel->sock going away either because a given tunnel socket fd may be
reused between calls to pppol2tp_connect. Instead, have
l2tp_tunnel_create sock_hold the tunnel socket before it does
sockfd_put. This ensures that the tunnel's socket is always extant
while the tunnel object exists. Hold a ref on the socket until the
tunnel is destroyed and ensure that all tunnel destroy paths go
through a common function (l2tp_tunnel_delete) since this will do the
final sock_put to release the tunnel socket.
Since the tunnel's socket is now guaranteed to exist if the tunnel
exists, we no longer need to use sockfd_lookup via l2tp_sock_to_tunnel
to derive the tunnel from the socket since this is always
sk_user_data.
Also, sessions no longer sock_hold the tunnel socket since sessions
already hold a tunnel ref and the tunnel sock will not be freed until
the tunnel is freed. Removing these sock_holds in
l2tp_session_register avoids a possible sock leak in the
pppol2tp_connect error path if l2tp_session_register succeeds but
attaching a ppp channel fails. The pppol2tp_connect error path could
have been fixed instead and have the sock ref dropped when the session
is freed, but doing a sock_put of the tunnel socket when the session
is freed would require a new session_free callback. It is simpler to
just remove the sock_hold of the tunnel socket in
l2tp_session_register, now that the tunnel socket lifetime is
guaranteed.
Finally, some init code in l2tp_tunnel_create is reordered to ensure
that the new tunnel object's refcount is set and the tunnel socket ref
is taken before the tunnel socket destructor callbacks are set.
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 0 PID: 4360 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #34
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:pppol2tp_session_init+0x1d6/0x500
RSP: 0018:ffff88001377fb40 EFLAGS: 00010212
RAX: dffffc0000000000 RBX: ffff88001636a940 RCX: ffffffff84836c1d
RDX: 0000000000000045 RSI: 0000000055976744 RDI: 0000000000000228
RBP: ffff88001377fb60 R08: ffffffff84836bc8 R09: 0000000000000002
R10: ffff88001377fab8 R11: 0000000000000001 R12: 0000000000000000
R13: ffff88001636aac8 R14: ffff8800160f81c0 R15: 1ffff100026eff76
FS: 00007ffb3ea66700(0000) GS:ffff88001a400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020e77000 CR3: 0000000016261000 CR4: 00000000000006f0
Call Trace:
pppol2tp_connect+0xd18/0x13c0
? pppol2tp_session_create+0x170/0x170
? __might_fault+0x115/0x1d0
? lock_downgrade+0x860/0x860
? __might_fault+0xe5/0x1d0
? security_socket_connect+0x8e/0xc0
SYSC_connect+0x1b6/0x310
? SYSC_bind+0x280/0x280
? __do_page_fault+0x5d1/0xca0
? up_read+0x1f/0x40
? __do_page_fault+0x3c8/0xca0
SyS_connect+0x29/0x30
? SyS_accept+0x40/0x40
do_syscall_64+0x1e0/0x730
? trace_hardirqs_off_thunk+0x1a/0x1c
entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7ffb3e376259
RSP: 002b:00007ffeda4f6508 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000020e77012 RCX: 00007ffb3e376259
RDX: 000000000000002e RSI: 0000000020e77000 RDI: 0000000000000004
RBP: 00007ffeda4f6540 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60
R13: 00007ffeda4f6660 R14: 0000000000000000 R15: 0000000000000000
Code: 80 3d b0 ff 06 02 00 0f 84 07 02 00 00 e8 13 d6 db fc 49 8d bc 24 28 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 f
a 48 c1 ea 03 <80> 3c 02 00 0f 85 ed 02 00 00 4d 8b a4 24 28 02 00 00 e8 13 16
Fixes: 80d84ef3ff1dd ("l2tp: prevent l2tp_tunnel_delete racing with userspace close")
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-23 17:45:45 +00:00
|
|
|
struct l2tp_tunnel *tunnel = container_of(work, struct l2tp_tunnel,
|
|
|
|
del_work);
|
|
|
|
struct sock *sk = tunnel->sock;
|
|
|
|
struct socket *sock = sk->sk_socket;
|
2017-02-22 14:59:49 +13:00
|
|
|
|
|
|
|
l2tp_tunnel_closeall(tunnel);
|
|
|
|
|
2018-02-23 17:45:43 +00:00
|
|
|
/* If the tunnel socket was created within the kernel, use
|
2013-03-19 06:11:18 +00:00
|
|
|
* the sk API to release it here.
|
2013-01-31 23:43:00 +00:00
|
|
|
*/
|
2018-02-23 17:45:43 +00:00
|
|
|
if (tunnel->fd < 0) {
|
2015-05-08 21:10:31 -05:00
|
|
|
if (sock) {
|
2013-03-19 06:11:18 +00:00
|
|
|
kernel_sock_shutdown(sock, SHUT_RDWR);
|
2015-05-08 21:10:31 -05:00
|
|
|
sock_release(sock);
|
|
|
|
}
|
2013-01-31 23:43:03 +00:00
|
|
|
}
|
2013-01-31 23:43:00 +00:00
|
|
|
|
2023-01-13 19:01:36 -08:00
|
|
|
l2tp_tunnel_remove(tunnel->l2tp_net, tunnel);
|
l2tp: fix races with tunnel socket close
The tunnel socket tunnel->sock (struct sock) is accessed when
preparing a new ppp session on a tunnel at pppol2tp_session_init. If
the socket is closed by a thread while another is creating a new
session, the threads race. In pppol2tp_connect, the tunnel object may
be created if the pppol2tp socket is associated with the special
session_id 0 and the tunnel socket is looked up using the provided
fd. When handling this, pppol2tp_connect cannot sock_hold the tunnel
socket to prevent it being destroyed during pppol2tp_connect since
this may itself may race with the socket being destroyed. Doing
sockfd_lookup in pppol2tp_connect isn't sufficient to prevent
tunnel->sock going away either because a given tunnel socket fd may be
reused between calls to pppol2tp_connect. Instead, have
l2tp_tunnel_create sock_hold the tunnel socket before it does
sockfd_put. This ensures that the tunnel's socket is always extant
while the tunnel object exists. Hold a ref on the socket until the
tunnel is destroyed and ensure that all tunnel destroy paths go
through a common function (l2tp_tunnel_delete) since this will do the
final sock_put to release the tunnel socket.
Since the tunnel's socket is now guaranteed to exist if the tunnel
exists, we no longer need to use sockfd_lookup via l2tp_sock_to_tunnel
to derive the tunnel from the socket since this is always
sk_user_data.
Also, sessions no longer sock_hold the tunnel socket since sessions
already hold a tunnel ref and the tunnel sock will not be freed until
the tunnel is freed. Removing these sock_holds in
l2tp_session_register avoids a possible sock leak in the
pppol2tp_connect error path if l2tp_session_register succeeds but
attaching a ppp channel fails. The pppol2tp_connect error path could
have been fixed instead and have the sock ref dropped when the session
is freed, but doing a sock_put of the tunnel socket when the session
is freed would require a new session_free callback. It is simpler to
just remove the sock_hold of the tunnel socket in
l2tp_session_register, now that the tunnel socket lifetime is
guaranteed.
Finally, some init code in l2tp_tunnel_create is reordered to ensure
that the new tunnel object's refcount is set and the tunnel socket ref
is taken before the tunnel socket destructor callbacks are set.
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 0 PID: 4360 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #34
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:pppol2tp_session_init+0x1d6/0x500
RSP: 0018:ffff88001377fb40 EFLAGS: 00010212
RAX: dffffc0000000000 RBX: ffff88001636a940 RCX: ffffffff84836c1d
RDX: 0000000000000045 RSI: 0000000055976744 RDI: 0000000000000228
RBP: ffff88001377fb60 R08: ffffffff84836bc8 R09: 0000000000000002
R10: ffff88001377fab8 R11: 0000000000000001 R12: 0000000000000000
R13: ffff88001636aac8 R14: ffff8800160f81c0 R15: 1ffff100026eff76
FS: 00007ffb3ea66700(0000) GS:ffff88001a400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020e77000 CR3: 0000000016261000 CR4: 00000000000006f0
Call Trace:
pppol2tp_connect+0xd18/0x13c0
? pppol2tp_session_create+0x170/0x170
? __might_fault+0x115/0x1d0
? lock_downgrade+0x860/0x860
? __might_fault+0xe5/0x1d0
? security_socket_connect+0x8e/0xc0
SYSC_connect+0x1b6/0x310
? SYSC_bind+0x280/0x280
? __do_page_fault+0x5d1/0xca0
? up_read+0x1f/0x40
? __do_page_fault+0x3c8/0xca0
SyS_connect+0x29/0x30
? SyS_accept+0x40/0x40
do_syscall_64+0x1e0/0x730
? trace_hardirqs_off_thunk+0x1a/0x1c
entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7ffb3e376259
RSP: 002b:00007ffeda4f6508 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000020e77012 RCX: 00007ffb3e376259
RDX: 000000000000002e RSI: 0000000020e77000 RDI: 0000000000000004
RBP: 00007ffeda4f6540 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60
R13: 00007ffeda4f6660 R14: 0000000000000000 R15: 0000000000000000
Code: 80 3d b0 ff 06 02 00 0f 84 07 02 00 00 e8 13 d6 db fc 49 8d bc 24 28 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 f
a 48 c1 ea 03 <80> 3c 02 00 0f 85 ed 02 00 00 4d 8b a4 24 28 02 00 00 e8 13 16
Fixes: 80d84ef3ff1dd ("l2tp: prevent l2tp_tunnel_delete racing with userspace close")
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-23 17:45:45 +00:00
|
|
|
/* drop initial ref */
|
|
|
|
l2tp_tunnel_dec_refcount(tunnel);
|
|
|
|
|
|
|
|
/* drop workqueue ref */
|
2015-09-28 11:32:42 +02:00
|
|
|
l2tp_tunnel_dec_refcount(tunnel);
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
2010-04-02 06:19:40 +00:00
|
|
|
/* Create a socket for the tunnel, if one isn't set up by
|
|
|
|
* userspace. This is used for static tunnels where there is no
|
|
|
|
* managing L2TP daemon.
|
2013-01-31 23:43:03 +00:00
|
|
|
*
|
|
|
|
* Since we don't want these sockets to keep a namespace alive by
|
|
|
|
* themselves, we drop the socket's namespace refcount after creation.
|
|
|
|
* These sockets are freed when the namespace exits using the pernet
|
|
|
|
* exit hook.
|
2010-04-02 06:19:40 +00:00
|
|
|
*/
|
2013-01-31 23:43:03 +00:00
|
|
|
static int l2tp_tunnel_sock_create(struct net *net,
|
2020-07-22 17:32:08 +01:00
|
|
|
u32 tunnel_id,
|
|
|
|
u32 peer_tunnel_id,
|
|
|
|
struct l2tp_tunnel_cfg *cfg,
|
|
|
|
struct socket **sockp)
|
2010-04-02 06:19:40 +00:00
|
|
|
{
|
|
|
|
int err = -EINVAL;
|
2013-01-31 23:43:03 +00:00
|
|
|
struct socket *sock = NULL;
|
2014-07-13 19:49:48 -07:00
|
|
|
struct udp_port_cfg udp_conf;
|
2010-04-02 06:19:40 +00:00
|
|
|
|
|
|
|
switch (cfg->encap) {
|
|
|
|
case L2TP_ENCAPTYPE_UDP:
|
2014-07-13 19:49:48 -07:00
|
|
|
memset(&udp_conf, 0, sizeof(udp_conf));
|
|
|
|
|
2012-04-29 21:48:52 +00:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
if (cfg->local_ip6 && cfg->peer_ip6) {
|
2014-07-13 19:49:48 -07:00
|
|
|
udp_conf.family = AF_INET6;
|
|
|
|
memcpy(&udp_conf.local_ip6, cfg->local_ip6,
|
|
|
|
sizeof(udp_conf.local_ip6));
|
|
|
|
memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
|
|
|
|
sizeof(udp_conf.peer_ip6));
|
|
|
|
udp_conf.use_udp6_tx_checksums =
|
2020-07-22 17:32:05 +01:00
|
|
|
!cfg->udp6_zero_tx_checksums;
|
2014-07-13 19:49:48 -07:00
|
|
|
udp_conf.use_udp6_rx_checksums =
|
2020-07-22 17:32:05 +01:00
|
|
|
!cfg->udp6_zero_rx_checksums;
|
2012-04-29 21:48:52 +00:00
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
{
|
2014-07-13 19:49:48 -07:00
|
|
|
udp_conf.family = AF_INET;
|
|
|
|
udp_conf.local_ip = cfg->local_ip;
|
|
|
|
udp_conf.peer_ip = cfg->peer_ip;
|
|
|
|
udp_conf.use_udp_checksums = cfg->use_udp_checksums;
|
2012-04-29 21:48:52 +00:00
|
|
|
}
|
2010-04-02 06:19:40 +00:00
|
|
|
|
2014-07-13 19:49:48 -07:00
|
|
|
udp_conf.local_udp_port = htons(cfg->local_udp_port);
|
|
|
|
udp_conf.peer_udp_port = htons(cfg->peer_udp_port);
|
|
|
|
|
|
|
|
err = udp_sock_create(net, &udp_conf, &sock);
|
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
2010-04-02 06:19:40 +00:00
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case L2TP_ENCAPTYPE_IP:
|
2012-04-29 21:48:52 +00:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
if (cfg->local_ip6 && cfg->peer_ip6) {
|
2014-07-13 19:49:48 -07:00
|
|
|
struct sockaddr_l2tpip6 ip6_addr = {0};
|
|
|
|
|
2015-05-08 21:10:31 -05:00
|
|
|
err = sock_create_kern(net, AF_INET6, SOCK_DGRAM,
|
2020-07-22 17:32:08 +01:00
|
|
|
IPPROTO_L2TP, &sock);
|
2012-04-29 21:48:55 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
2010-04-02 06:19:40 +00:00
|
|
|
|
2012-04-29 21:48:55 +00:00
|
|
|
ip6_addr.l2tp_family = AF_INET6;
|
|
|
|
memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6,
|
|
|
|
sizeof(ip6_addr.l2tp_addr));
|
|
|
|
ip6_addr.l2tp_conn_id = tunnel_id;
|
2020-07-22 17:32:05 +01:00
|
|
|
err = kernel_bind(sock, (struct sockaddr *)&ip6_addr,
|
2012-04-29 21:48:55 +00:00
|
|
|
sizeof(ip6_addr));
|
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
2010-04-02 06:19:40 +00:00
|
|
|
|
2012-04-29 21:48:55 +00:00
|
|
|
ip6_addr.l2tp_family = AF_INET6;
|
|
|
|
memcpy(&ip6_addr.l2tp_addr, cfg->peer_ip6,
|
|
|
|
sizeof(ip6_addr.l2tp_addr));
|
|
|
|
ip6_addr.l2tp_conn_id = peer_tunnel_id;
|
|
|
|
err = kernel_connect(sock,
|
2020-07-22 17:32:05 +01:00
|
|
|
(struct sockaddr *)&ip6_addr,
|
2012-04-29 21:48:55 +00:00
|
|
|
sizeof(ip6_addr), 0);
|
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
{
|
2014-07-13 19:49:48 -07:00
|
|
|
struct sockaddr_l2tpip ip_addr = {0};
|
|
|
|
|
2015-05-08 21:10:31 -05:00
|
|
|
err = sock_create_kern(net, AF_INET, SOCK_DGRAM,
|
2020-07-22 17:32:08 +01:00
|
|
|
IPPROTO_L2TP, &sock);
|
2012-04-29 21:48:55 +00:00
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
2010-04-02 06:19:40 +00:00
|
|
|
|
2012-04-29 21:48:55 +00:00
|
|
|
ip_addr.l2tp_family = AF_INET;
|
|
|
|
ip_addr.l2tp_addr = cfg->local_ip;
|
|
|
|
ip_addr.l2tp_conn_id = tunnel_id;
|
2020-07-22 17:32:05 +01:00
|
|
|
err = kernel_bind(sock, (struct sockaddr *)&ip_addr,
|
2012-04-29 21:48:55 +00:00
|
|
|
sizeof(ip_addr));
|
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ip_addr.l2tp_family = AF_INET;
|
|
|
|
ip_addr.l2tp_addr = cfg->peer_ip;
|
|
|
|
ip_addr.l2tp_conn_id = peer_tunnel_id;
|
2020-07-22 17:32:05 +01:00
|
|
|
err = kernel_connect(sock, (struct sockaddr *)&ip_addr,
|
2012-04-29 21:48:55 +00:00
|
|
|
sizeof(ip_addr), 0);
|
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
|
|
|
}
|
2010-04-02 06:19:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
2013-01-31 23:43:03 +00:00
|
|
|
*sockp = sock;
|
2020-07-23 12:29:51 +01:00
|
|
|
if (err < 0 && sock) {
|
2013-01-31 23:43:03 +00:00
|
|
|
kernel_sock_shutdown(sock, SHUT_RDWR);
|
2015-05-08 21:10:31 -05:00
|
|
|
sock_release(sock);
|
2010-04-02 06:19:40 +00:00
|
|
|
*sockp = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-09-03 09:54:49 +01:00
|
|
|
int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
|
2020-07-22 17:32:13 +01:00
|
|
|
struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
|
2010-04-02 06:18:33 +00:00
|
|
|
{
|
|
|
|
struct l2tp_tunnel *tunnel = NULL;
|
|
|
|
int err;
|
2010-04-02 06:19:00 +00:00
|
|
|
enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2020-07-23 12:29:50 +01:00
|
|
|
if (cfg)
|
2010-04-02 06:19:00 +00:00
|
|
|
encap = cfg->encap;
|
|
|
|
|
2020-07-23 12:29:55 +01:00
|
|
|
tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL);
|
2020-07-23 12:29:50 +01:00
|
|
|
if (!tunnel) {
|
2010-04-02 06:18:33 +00:00
|
|
|
err = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
tunnel->version = version;
|
|
|
|
tunnel->tunnel_id = tunnel_id;
|
|
|
|
tunnel->peer_tunnel_id = peer_tunnel_id;
|
|
|
|
|
|
|
|
sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
|
2024-06-20 12:22:44 +01:00
|
|
|
spin_lock_init(&tunnel->list_lock);
|
2017-09-01 17:58:48 +02:00
|
|
|
tunnel->acpt_newsess = true;
|
2024-06-20 12:22:44 +01:00
|
|
|
INIT_LIST_HEAD(&tunnel->session_list);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2010-04-02 06:19:00 +00:00
|
|
|
tunnel->encap = encap;
|
2010-04-02 06:18:33 +00:00
|
|
|
|
l2tp: fix races with tunnel socket close
The tunnel socket tunnel->sock (struct sock) is accessed when
preparing a new ppp session on a tunnel at pppol2tp_session_init. If
the socket is closed by a thread while another is creating a new
session, the threads race. In pppol2tp_connect, the tunnel object may
be created if the pppol2tp socket is associated with the special
session_id 0 and the tunnel socket is looked up using the provided
fd. When handling this, pppol2tp_connect cannot sock_hold the tunnel
socket to prevent it being destroyed during pppol2tp_connect since
this may itself may race with the socket being destroyed. Doing
sockfd_lookup in pppol2tp_connect isn't sufficient to prevent
tunnel->sock going away either because a given tunnel socket fd may be
reused between calls to pppol2tp_connect. Instead, have
l2tp_tunnel_create sock_hold the tunnel socket before it does
sockfd_put. This ensures that the tunnel's socket is always extant
while the tunnel object exists. Hold a ref on the socket until the
tunnel is destroyed and ensure that all tunnel destroy paths go
through a common function (l2tp_tunnel_delete) since this will do the
final sock_put to release the tunnel socket.
Since the tunnel's socket is now guaranteed to exist if the tunnel
exists, we no longer need to use sockfd_lookup via l2tp_sock_to_tunnel
to derive the tunnel from the socket since this is always
sk_user_data.
Also, sessions no longer sock_hold the tunnel socket since sessions
already hold a tunnel ref and the tunnel sock will not be freed until
the tunnel is freed. Removing these sock_holds in
l2tp_session_register avoids a possible sock leak in the
pppol2tp_connect error path if l2tp_session_register succeeds but
attaching a ppp channel fails. The pppol2tp_connect error path could
have been fixed instead and have the sock ref dropped when the session
is freed, but doing a sock_put of the tunnel socket when the session
is freed would require a new session_free callback. It is simpler to
just remove the sock_hold of the tunnel socket in
l2tp_session_register, now that the tunnel socket lifetime is
guaranteed.
Finally, some init code in l2tp_tunnel_create is reordered to ensure
that the new tunnel object's refcount is set and the tunnel socket ref
is taken before the tunnel socket destructor callbacks are set.
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 0 PID: 4360 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #34
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:pppol2tp_session_init+0x1d6/0x500
RSP: 0018:ffff88001377fb40 EFLAGS: 00010212
RAX: dffffc0000000000 RBX: ffff88001636a940 RCX: ffffffff84836c1d
RDX: 0000000000000045 RSI: 0000000055976744 RDI: 0000000000000228
RBP: ffff88001377fb60 R08: ffffffff84836bc8 R09: 0000000000000002
R10: ffff88001377fab8 R11: 0000000000000001 R12: 0000000000000000
R13: ffff88001636aac8 R14: ffff8800160f81c0 R15: 1ffff100026eff76
FS: 00007ffb3ea66700(0000) GS:ffff88001a400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020e77000 CR3: 0000000016261000 CR4: 00000000000006f0
Call Trace:
pppol2tp_connect+0xd18/0x13c0
? pppol2tp_session_create+0x170/0x170
? __might_fault+0x115/0x1d0
? lock_downgrade+0x860/0x860
? __might_fault+0xe5/0x1d0
? security_socket_connect+0x8e/0xc0
SYSC_connect+0x1b6/0x310
? SYSC_bind+0x280/0x280
? __do_page_fault+0x5d1/0xca0
? up_read+0x1f/0x40
? __do_page_fault+0x3c8/0xca0
SyS_connect+0x29/0x30
? SyS_accept+0x40/0x40
do_syscall_64+0x1e0/0x730
? trace_hardirqs_off_thunk+0x1a/0x1c
entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7ffb3e376259
RSP: 002b:00007ffeda4f6508 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000020e77012 RCX: 00007ffb3e376259
RDX: 000000000000002e RSI: 0000000020e77000 RDI: 0000000000000004
RBP: 00007ffeda4f6540 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60
R13: 00007ffeda4f6660 R14: 0000000000000000 R15: 0000000000000000
Code: 80 3d b0 ff 06 02 00 0f 84 07 02 00 00 e8 13 d6 db fc 49 8d bc 24 28 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 f
a 48 c1 ea 03 <80> 3c 02 00 0f 85 ed 02 00 00 4d 8b a4 24 28 02 00 00 e8 13 16
Fixes: 80d84ef3ff1dd ("l2tp: prevent l2tp_tunnel_delete racing with userspace close")
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-23 17:45:45 +00:00
|
|
|
refcount_set(&tunnel->ref_count, 1);
|
|
|
|
tunnel->fd = fd;
|
|
|
|
|
2013-01-31 23:43:00 +00:00
|
|
|
/* Init delete workqueue struct */
|
|
|
|
INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work);
|
|
|
|
|
2010-04-02 06:18:33 +00:00
|
|
|
err = 0;
|
|
|
|
err:
|
|
|
|
if (tunnelp)
|
|
|
|
*tunnelp = tunnel;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
|
|
|
|
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
|
|
|
|
enum l2tp_encap_type encap)
|
|
|
|
{
|
2024-07-29 16:38:04 +01:00
|
|
|
struct l2tp_tunnel *tunnel;
|
|
|
|
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
if (!net_eq(sock_net(sk), net))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (sk->sk_type != SOCK_DGRAM)
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
|
2020-05-29 11:32:25 -07:00
|
|
|
if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
if ((encap == L2TP_ENCAPTYPE_UDP && sk->sk_protocol != IPPROTO_UDP) ||
|
|
|
|
(encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP))
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
|
2024-07-29 16:38:04 +01:00
|
|
|
tunnel = l2tp_sk_to_tunnel(sk);
|
|
|
|
if (tunnel) {
|
|
|
|
l2tp_tunnel_dec_refcount(tunnel);
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
return -EBUSY;
|
2024-07-29 16:38:04 +01:00
|
|
|
}
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
|
|
|
|
struct l2tp_tunnel_cfg *cfg)
|
|
|
|
{
|
2023-01-13 19:01:36 -08:00
|
|
|
struct l2tp_net *pn = l2tp_pernet(net);
|
|
|
|
u32 tunnel_id = tunnel->tunnel_id;
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
struct socket *sock;
|
|
|
|
struct sock *sk;
|
|
|
|
int ret;
|
|
|
|
|
2023-01-13 19:01:36 -08:00
|
|
|
spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
|
|
|
|
ret = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id,
|
|
|
|
GFP_ATOMIC);
|
|
|
|
spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
|
|
|
|
if (ret)
|
|
|
|
return ret == -ENOSPC ? -EEXIST : ret;
|
|
|
|
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
if (tunnel->fd < 0) {
|
|
|
|
ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id,
|
|
|
|
tunnel->peer_tunnel_id, cfg,
|
|
|
|
&sock);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
} else {
|
|
|
|
sock = sockfd_lookup(tunnel->fd, &ret);
|
|
|
|
if (!sock)
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2022-11-14 20:16:19 +01:00
|
|
|
sk = sock->sk;
|
2023-01-13 19:01:37 -08:00
|
|
|
lock_sock(sk);
|
l2tp: Don't sleep and disable BH under writer-side sk_callback_lock
When holding a reader-writer spin lock we cannot sleep. Calling
setup_udp_tunnel_sock() with write lock held violates this rule, because we
end up calling percpu_down_read(), which might sleep, as syzbot reports
[1]:
__might_resched.cold+0x222/0x26b kernel/sched/core.c:9890
percpu_down_read include/linux/percpu-rwsem.h:49 [inline]
cpus_read_lock+0x1b/0x140 kernel/cpu.c:310
static_key_slow_inc+0x12/0x20 kernel/jump_label.c:158
udp_tunnel_encap_enable include/net/udp_tunnel.h:187 [inline]
setup_udp_tunnel_sock+0x43d/0x550 net/ipv4/udp_tunnel_core.c:81
l2tp_tunnel_register+0xc51/0x1210 net/l2tp/l2tp_core.c:1509
pppol2tp_connect+0xcdc/0x1a10 net/l2tp/l2tp_ppp.c:723
Trim the writer-side critical section for sk_callback_lock down to the
minimum, so that it covers only operations on sk_user_data.
Also, when grabbing the sk_callback_lock, we always need to disable BH, as
Eric points out. Failing to do so leads to deadlocks because we acquire
sk_callback_lock in softirq context, which can get stuck waiting on us if:
1) it runs on the same CPU, or
CPU0
----
lock(clock-AF_INET6);
<Interrupt>
lock(clock-AF_INET6);
2) lock ordering leads to priority inversion
CPU0 CPU1
---- ----
lock(clock-AF_INET6);
local_irq_disable();
lock(&tcp_hashinfo.bhash[i].lock);
lock(clock-AF_INET6);
<Interrupt>
lock(&tcp_hashinfo.bhash[i].lock);
... as syzbot reports [2,3]. Use the _bh variants for write_(un)lock.
[1] https://lore.kernel.org/netdev/0000000000004e78ec05eda79749@google.com/
[2] https://lore.kernel.org/netdev/000000000000e38b6605eda76f98@google.com/
[3] https://lore.kernel.org/netdev/000000000000dfa31e05eda76f75@google.com/
v2:
- Check and set sk_user_data while holding sk_callback_lock for both
L2TP encapsulation types (IP and UDP) (Tetsuo)
Cc: Tom Parkin <tparkin@katalix.com>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Fixes: b68777d54fac ("l2tp: Serialize access to sk_user_data with sk_callback_lock")
Reported-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+703d9e154b3b58277261@syzkaller.appspotmail.com
Reported-by: syzbot+50680ced9e98a61f7698@syzkaller.appspotmail.com
Reported-by: syzbot+de987172bb74a381879b@syzkaller.appspotmail.com
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-11-21 09:54:26 +01:00
|
|
|
write_lock_bh(&sk->sk_callback_lock);
|
2022-11-14 20:16:19 +01:00
|
|
|
ret = l2tp_validate_socket(sk, net, tunnel->encap);
|
2023-01-17 11:01:31 +00:00
|
|
|
if (ret < 0)
|
l2tp: Don't sleep and disable BH under writer-side sk_callback_lock
When holding a reader-writer spin lock we cannot sleep. Calling
setup_udp_tunnel_sock() with write lock held violates this rule, because we
end up calling percpu_down_read(), which might sleep, as syzbot reports
[1]:
__might_resched.cold+0x222/0x26b kernel/sched/core.c:9890
percpu_down_read include/linux/percpu-rwsem.h:49 [inline]
cpus_read_lock+0x1b/0x140 kernel/cpu.c:310
static_key_slow_inc+0x12/0x20 kernel/jump_label.c:158
udp_tunnel_encap_enable include/net/udp_tunnel.h:187 [inline]
setup_udp_tunnel_sock+0x43d/0x550 net/ipv4/udp_tunnel_core.c:81
l2tp_tunnel_register+0xc51/0x1210 net/l2tp/l2tp_core.c:1509
pppol2tp_connect+0xcdc/0x1a10 net/l2tp/l2tp_ppp.c:723
Trim the writer-side critical section for sk_callback_lock down to the
minimum, so that it covers only operations on sk_user_data.
Also, when grabbing the sk_callback_lock, we always need to disable BH, as
Eric points out. Failing to do so leads to deadlocks because we acquire
sk_callback_lock in softirq context, which can get stuck waiting on us if:
1) it runs on the same CPU, or
CPU0
----
lock(clock-AF_INET6);
<Interrupt>
lock(clock-AF_INET6);
2) lock ordering leads to priority inversion
CPU0 CPU1
---- ----
lock(clock-AF_INET6);
local_irq_disable();
lock(&tcp_hashinfo.bhash[i].lock);
lock(clock-AF_INET6);
<Interrupt>
lock(&tcp_hashinfo.bhash[i].lock);
... as syzbot reports [2,3]. Use the _bh variants for write_(un)lock.
[1] https://lore.kernel.org/netdev/0000000000004e78ec05eda79749@google.com/
[2] https://lore.kernel.org/netdev/000000000000e38b6605eda76f98@google.com/
[3] https://lore.kernel.org/netdev/000000000000dfa31e05eda76f75@google.com/
v2:
- Check and set sk_user_data while holding sk_callback_lock for both
L2TP encapsulation types (IP and UDP) (Tetsuo)
Cc: Tom Parkin <tparkin@katalix.com>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Fixes: b68777d54fac ("l2tp: Serialize access to sk_user_data with sk_callback_lock")
Reported-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+703d9e154b3b58277261@syzkaller.appspotmail.com
Reported-by: syzbot+50680ced9e98a61f7698@syzkaller.appspotmail.com
Reported-by: syzbot+de987172bb74a381879b@syzkaller.appspotmail.com
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-11-21 09:54:26 +01:00
|
|
|
goto err_inval_sock;
|
|
|
|
write_unlock_bh(&sk->sk_callback_lock);
|
2022-11-14 20:16:19 +01:00
|
|
|
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
|
|
|
|
struct udp_tunnel_sock_cfg udp_cfg = {
|
|
|
|
.encap_type = UDP_ENCAP_L2TPINUDP,
|
|
|
|
.encap_rcv = l2tp_udp_encap_recv,
|
2024-05-13 18:22:47 +01:00
|
|
|
.encap_err_rcv = l2tp_udp_encap_err_recv,
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
.encap_destroy = l2tp_udp_encap_destroy,
|
|
|
|
};
|
|
|
|
|
|
|
|
setup_udp_tunnel_sock(net, sock, &udp_cfg);
|
|
|
|
}
|
|
|
|
|
|
|
|
sk->sk_allocation = GFP_ATOMIC;
|
2023-01-13 19:01:37 -08:00
|
|
|
release_sock(sk);
|
|
|
|
|
|
|
|
sock_hold(sk);
|
|
|
|
tunnel->sock = sk;
|
|
|
|
tunnel->l2tp_net = net;
|
|
|
|
|
|
|
|
spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
|
|
|
|
idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id);
|
|
|
|
spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_register_tunnel(tunnel);
|
|
|
|
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
if (tunnel->fd >= 0)
|
|
|
|
sockfd_put(sock);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
l2tp: Don't sleep and disable BH under writer-side sk_callback_lock
When holding a reader-writer spin lock we cannot sleep. Calling
setup_udp_tunnel_sock() with write lock held violates this rule, because we
end up calling percpu_down_read(), which might sleep, as syzbot reports
[1]:
__might_resched.cold+0x222/0x26b kernel/sched/core.c:9890
percpu_down_read include/linux/percpu-rwsem.h:49 [inline]
cpus_read_lock+0x1b/0x140 kernel/cpu.c:310
static_key_slow_inc+0x12/0x20 kernel/jump_label.c:158
udp_tunnel_encap_enable include/net/udp_tunnel.h:187 [inline]
setup_udp_tunnel_sock+0x43d/0x550 net/ipv4/udp_tunnel_core.c:81
l2tp_tunnel_register+0xc51/0x1210 net/l2tp/l2tp_core.c:1509
pppol2tp_connect+0xcdc/0x1a10 net/l2tp/l2tp_ppp.c:723
Trim the writer-side critical section for sk_callback_lock down to the
minimum, so that it covers only operations on sk_user_data.
Also, when grabbing the sk_callback_lock, we always need to disable BH, as
Eric points out. Failing to do so leads to deadlocks because we acquire
sk_callback_lock in softirq context, which can get stuck waiting on us if:
1) it runs on the same CPU, or
CPU0
----
lock(clock-AF_INET6);
<Interrupt>
lock(clock-AF_INET6);
2) lock ordering leads to priority inversion
CPU0 CPU1
---- ----
lock(clock-AF_INET6);
local_irq_disable();
lock(&tcp_hashinfo.bhash[i].lock);
lock(clock-AF_INET6);
<Interrupt>
lock(&tcp_hashinfo.bhash[i].lock);
... as syzbot reports [2,3]. Use the _bh variants for write_(un)lock.
[1] https://lore.kernel.org/netdev/0000000000004e78ec05eda79749@google.com/
[2] https://lore.kernel.org/netdev/000000000000e38b6605eda76f98@google.com/
[3] https://lore.kernel.org/netdev/000000000000dfa31e05eda76f75@google.com/
v2:
- Check and set sk_user_data while holding sk_callback_lock for both
L2TP encapsulation types (IP and UDP) (Tetsuo)
Cc: Tom Parkin <tparkin@katalix.com>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Fixes: b68777d54fac ("l2tp: Serialize access to sk_user_data with sk_callback_lock")
Reported-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+703d9e154b3b58277261@syzkaller.appspotmail.com
Reported-by: syzbot+50680ced9e98a61f7698@syzkaller.appspotmail.com
Reported-by: syzbot+de987172bb74a381879b@syzkaller.appspotmail.com
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-11-21 09:54:26 +01:00
|
|
|
err_inval_sock:
|
|
|
|
write_unlock_bh(&sk->sk_callback_lock);
|
2023-01-17 11:01:31 +00:00
|
|
|
release_sock(sk);
|
l2tp: Don't sleep and disable BH under writer-side sk_callback_lock
When holding a reader-writer spin lock we cannot sleep. Calling
setup_udp_tunnel_sock() with write lock held violates this rule, because we
end up calling percpu_down_read(), which might sleep, as syzbot reports
[1]:
__might_resched.cold+0x222/0x26b kernel/sched/core.c:9890
percpu_down_read include/linux/percpu-rwsem.h:49 [inline]
cpus_read_lock+0x1b/0x140 kernel/cpu.c:310
static_key_slow_inc+0x12/0x20 kernel/jump_label.c:158
udp_tunnel_encap_enable include/net/udp_tunnel.h:187 [inline]
setup_udp_tunnel_sock+0x43d/0x550 net/ipv4/udp_tunnel_core.c:81
l2tp_tunnel_register+0xc51/0x1210 net/l2tp/l2tp_core.c:1509
pppol2tp_connect+0xcdc/0x1a10 net/l2tp/l2tp_ppp.c:723
Trim the writer-side critical section for sk_callback_lock down to the
minimum, so that it covers only operations on sk_user_data.
Also, when grabbing the sk_callback_lock, we always need to disable BH, as
Eric points out. Failing to do so leads to deadlocks because we acquire
sk_callback_lock in softirq context, which can get stuck waiting on us if:
1) it runs on the same CPU, or
CPU0
----
lock(clock-AF_INET6);
<Interrupt>
lock(clock-AF_INET6);
2) lock ordering leads to priority inversion
CPU0 CPU1
---- ----
lock(clock-AF_INET6);
local_irq_disable();
lock(&tcp_hashinfo.bhash[i].lock);
lock(clock-AF_INET6);
<Interrupt>
lock(&tcp_hashinfo.bhash[i].lock);
... as syzbot reports [2,3]. Use the _bh variants for write_(un)lock.
[1] https://lore.kernel.org/netdev/0000000000004e78ec05eda79749@google.com/
[2] https://lore.kernel.org/netdev/000000000000e38b6605eda76f98@google.com/
[3] https://lore.kernel.org/netdev/000000000000dfa31e05eda76f75@google.com/
v2:
- Check and set sk_user_data while holding sk_callback_lock for both
L2TP encapsulation types (IP and UDP) (Tetsuo)
Cc: Tom Parkin <tparkin@katalix.com>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Fixes: b68777d54fac ("l2tp: Serialize access to sk_user_data with sk_callback_lock")
Reported-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+703d9e154b3b58277261@syzkaller.appspotmail.com
Reported-by: syzbot+50680ced9e98a61f7698@syzkaller.appspotmail.com
Reported-by: syzbot+de987172bb74a381879b@syzkaller.appspotmail.com
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-11-21 09:54:26 +01:00
|
|
|
|
2018-04-10 21:01:13 +02:00
|
|
|
if (tunnel->fd < 0)
|
|
|
|
sock_release(sock);
|
|
|
|
else
|
|
|
|
sockfd_put(sock);
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
err:
|
2023-01-13 19:01:36 -08:00
|
|
|
l2tp_tunnel_remove(net, tunnel);
|
l2tp: fix races in tunnel creation
l2tp_tunnel_create() inserts the new tunnel into the namespace's tunnel
list and sets the socket's ->sk_user_data field, before returning it to
the caller. Therefore, there are two ways the tunnel can be accessed
and freed, before the caller even had the opportunity to take a
reference. In practice, syzbot could crash the module by closing the
socket right after a new tunnel was returned to pppol2tp_create().
This patch moves tunnel registration out of l2tp_tunnel_create(), so
that the caller can safely hold a reference before publishing the
tunnel. This second step is done with the new l2tp_tunnel_register()
function, which is now responsible for associating the tunnel to its
socket and for inserting it into the namespace's list.
While moving the code to l2tp_tunnel_register(), a few modifications
have been done. First, the socket validation tests are done in a helper
function, for clarity. Also, modifying the socket is now done after
having inserted the tunnel to the namespace's tunnels list. This will
allow insertion to fail, without having to revert theses modifications
in the error path (a followup patch will check for duplicate tunnels
before insertion). Either the socket is a kernel socket which we
control, or it is a user-space socket for which we have a reference on
the file descriptor. In any case, the socket isn't going to be closed
from under us.
Reported-by: syzbot+fbeeb5c3b538e8545644@syzkaller.appspotmail.com
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-10 21:01:12 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_tunnel_register);
|
|
|
|
|
2010-04-02 06:19:10 +00:00
|
|
|
/* This function is used by the netlink TUNNEL_DELETE command.
|
|
|
|
*/
|
2017-09-26 16:16:43 +02:00
|
|
|
void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
|
2010-04-02 06:19:10 +00:00
|
|
|
{
|
2017-09-26 16:16:43 +02:00
|
|
|
if (!test_and_set_bit(0, &tunnel->dead)) {
|
2020-08-22 15:59:06 +01:00
|
|
|
trace_delete_tunnel(tunnel);
|
2017-09-26 16:16:43 +02:00
|
|
|
l2tp_tunnel_inc_refcount(tunnel);
|
|
|
|
queue_work(l2tp_wq, &tunnel->del_work);
|
2015-09-28 11:32:42 +02:00
|
|
|
}
|
2010-04-02 06:19:10 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
|
|
|
|
|
2020-07-28 18:20:30 +01:00
|
|
|
void l2tp_session_delete(struct l2tp_session *session)
|
2010-04-02 06:19:10 +00:00
|
|
|
{
|
2024-07-29 16:38:07 +01:00
|
|
|
if (!test_and_set_bit(0, &session->dead)) {
|
|
|
|
trace_delete_session(session);
|
|
|
|
l2tp_session_inc_refcount(session);
|
|
|
|
queue_work(l2tp_wq, &session->del_work);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_session_delete);
|
|
|
|
|
|
|
|
/* Workqueue session deletion function */
|
|
|
|
static void l2tp_session_del_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct l2tp_session *session = container_of(work, struct l2tp_session,
|
|
|
|
del_work);
|
l2tp: fix race between l2tp_session_delete() and l2tp_tunnel_closeall()
There are several ways to remove L2TP sessions:
* deleting a session explicitly using the netlink interface (with
L2TP_CMD_SESSION_DELETE),
* deleting the session's parent tunnel (either by closing the
tunnel's file descriptor or using the netlink interface),
* closing the PPPOL2TP file descriptor of a PPP pseudo-wire.
In some cases, when these methods are used concurrently on the same
session, the session can be removed twice, leading to use-after-free
bugs.
This patch adds a 'dead' flag, used by l2tp_session_delete() and
l2tp_tunnel_closeall() to prevent them from stepping on each other's
toes.
The session deletion path used when closing a PPPOL2TP file descriptor
doesn't need to be adapted. It already has to ensure that a session
remains valid for the lifetime of its PPPOL2TP file descriptor.
So it takes an extra reference on the session in the ->session_close()
callback (pppol2tp_session_close()), which is eventually dropped
in the ->sk_destruct() callback of the PPPOL2TP socket
(pppol2tp_session_destruct()).
Still, __l2tp_session_unhash() and l2tp_session_queue_purge() can be
called twice and even concurrently for a given session, but thanks to
proper locking and re-initialisation of list fields, this is not an
issue.
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-22 15:39:24 +02:00
|
|
|
|
2020-07-28 18:20:28 +01:00
|
|
|
l2tp_session_unhash(session);
|
2013-03-19 06:11:20 +00:00
|
|
|
l2tp_session_queue_purge(session);
|
2020-07-23 12:29:50 +01:00
|
|
|
if (session->session_close)
|
2010-04-02 06:19:10 +00:00
|
|
|
(*session->session_close)(session);
|
2017-10-31 17:36:42 +01:00
|
|
|
|
2024-07-29 16:38:07 +01:00
|
|
|
/* drop initial ref */
|
|
|
|
l2tp_session_dec_refcount(session);
|
|
|
|
|
|
|
|
/* drop workqueue ref */
|
2010-04-02 06:19:10 +00:00
|
|
|
l2tp_session_dec_refcount(session);
|
|
|
|
}
|
|
|
|
|
2010-04-02 06:18:49 +00:00
|
|
|
/* We come here whenever a session's send_seq, cookie_len or
|
2018-01-16 23:01:55 +01:00
|
|
|
* l2specific_type parameters are set.
|
2010-04-02 06:18:49 +00:00
|
|
|
*/
|
2024-07-29 16:38:10 +01:00
|
|
|
void l2tp_session_set_header_len(struct l2tp_session *session, int version,
|
|
|
|
enum l2tp_encap_type encap)
|
2010-04-02 06:18:49 +00:00
|
|
|
{
|
|
|
|
if (version == L2TP_HDR_VER_2) {
|
|
|
|
session->hdr_len = 6;
|
|
|
|
if (session->send_seq)
|
|
|
|
session->hdr_len += 4;
|
|
|
|
} else {
|
2018-01-16 23:01:55 +01:00
|
|
|
session->hdr_len = 4 + session->cookie_len;
|
|
|
|
session->hdr_len += l2tp_get_l2specific_len(session);
|
2024-07-29 16:38:10 +01:00
|
|
|
if (encap == L2TP_ENCAPTYPE_UDP)
|
2010-04-02 06:19:00 +00:00
|
|
|
session->hdr_len += 4;
|
2010-04-02 06:18:49 +00:00
|
|
|
}
|
|
|
|
}
|
2014-03-06 11:14:30 +01:00
|
|
|
EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
|
2010-04-02 06:18:49 +00:00
|
|
|
|
2020-07-22 17:32:13 +01:00
|
|
|
struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id,
|
|
|
|
u32 peer_session_id, struct l2tp_session_cfg *cfg)
|
2010-04-02 06:18:33 +00:00
|
|
|
{
|
|
|
|
struct l2tp_session *session;
|
|
|
|
|
2020-07-23 12:29:55 +01:00
|
|
|
session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL);
|
2020-07-23 12:29:50 +01:00
|
|
|
if (session) {
|
2010-04-02 06:18:33 +00:00
|
|
|
session->magic = L2TP_SESSION_MAGIC;
|
|
|
|
|
|
|
|
session->session_id = session_id;
|
|
|
|
session->peer_session_id = peer_session_id;
|
2012-05-09 23:43:09 +00:00
|
|
|
session->nr = 0;
|
2013-07-02 20:28:59 +01:00
|
|
|
if (tunnel->version == L2TP_HDR_VER_2)
|
|
|
|
session->nr_max = 0xffff;
|
|
|
|
else
|
|
|
|
session->nr_max = 0xffffff;
|
|
|
|
session->nr_window_size = session->nr_max / 2;
|
2013-07-02 20:29:00 +01:00
|
|
|
session->nr_oos_count_max = 4;
|
|
|
|
|
|
|
|
/* Use NR of first received packet */
|
|
|
|
session->reorder_skip = 1;
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
sprintf(&session->name[0], "sess %u/%u",
|
|
|
|
tunnel->tunnel_id, session->session_id);
|
|
|
|
|
|
|
|
skb_queue_head_init(&session->reorder_q);
|
|
|
|
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
session->hlist_key = l2tp_v3_session_hashkey(tunnel->sock, session->session_id);
|
2010-04-02 06:18:33 +00:00
|
|
|
INIT_HLIST_NODE(&session->hlist);
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
INIT_LIST_HEAD(&session->clist);
|
2024-06-20 12:22:44 +01:00
|
|
|
INIT_LIST_HEAD(&session->list);
|
2024-07-29 16:38:07 +01:00
|
|
|
INIT_WORK(&session->del_work, l2tp_session_del_work);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
if (cfg) {
|
2010-04-02 06:18:49 +00:00
|
|
|
session->pwtype = cfg->pw_type;
|
2010-04-02 06:18:33 +00:00
|
|
|
session->send_seq = cfg->send_seq;
|
|
|
|
session->recv_seq = cfg->recv_seq;
|
|
|
|
session->lns_mode = cfg->lns_mode;
|
2010-04-02 06:18:49 +00:00
|
|
|
session->reorder_timeout = cfg->reorder_timeout;
|
|
|
|
session->l2specific_type = cfg->l2specific_type;
|
|
|
|
session->cookie_len = cfg->cookie_len;
|
|
|
|
memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len);
|
|
|
|
session->peer_cookie_len = cfg->peer_cookie_len;
|
|
|
|
memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
2024-07-29 16:38:10 +01:00
|
|
|
l2tp_session_set_header_len(session, tunnel->version, tunnel->encap);
|
2010-04-02 06:18:49 +00:00
|
|
|
|
2017-08-25 16:22:17 +02:00
|
|
|
refcount_set(&session->ref_count, 1);
|
|
|
|
|
2017-03-31 13:02:27 +02:00
|
|
|
return session;
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
2017-03-31 13:02:27 +02:00
|
|
|
return ERR_PTR(-ENOMEM);
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(l2tp_session_create);
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
|
|
* Init and cleanup
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
static __net_init int l2tp_init_net(struct net *net)
|
|
|
|
{
|
2010-04-23 00:53:39 +00:00
|
|
|
struct l2tp_net *pn = net_generic(net, l2tp_net_id);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2023-01-13 19:01:36 -08:00
|
|
|
idr_init(&pn->l2tp_tunnel_idr);
|
|
|
|
spin_lock_init(&pn->l2tp_tunnel_idr_lock);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
2024-06-20 12:22:39 +01:00
|
|
|
idr_init(&pn->l2tp_v2_session_idr);
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
idr_init(&pn->l2tp_v3_session_idr);
|
|
|
|
spin_lock_init(&pn->l2tp_session_idr_lock);
|
2010-04-02 06:18:49 +00:00
|
|
|
|
2010-04-02 06:18:33 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-07-29 16:38:14 +01:00
|
|
|
static __net_exit void l2tp_pre_exit_net(struct net *net)
|
2013-01-31 23:43:03 +00:00
|
|
|
{
|
|
|
|
struct l2tp_net *pn = l2tp_pernet(net);
|
|
|
|
struct l2tp_tunnel *tunnel = NULL;
|
2023-01-13 19:01:36 -08:00
|
|
|
unsigned long tunnel_id, tmp;
|
2013-01-31 23:43:03 +00:00
|
|
|
|
|
|
|
rcu_read_lock_bh();
|
2023-01-13 19:01:36 -08:00
|
|
|
idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
|
|
|
|
if (tunnel)
|
|
|
|
l2tp_tunnel_delete(tunnel);
|
2013-01-31 23:43:03 +00:00
|
|
|
}
|
|
|
|
rcu_read_unlock_bh();
|
2016-09-02 10:22:54 +02:00
|
|
|
|
2019-05-06 22:44:04 +08:00
|
|
|
if (l2tp_wq)
|
2024-07-29 16:38:07 +01:00
|
|
|
drain_workqueue(l2tp_wq);
|
2024-07-29 16:38:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static __net_exit void l2tp_exit_net(struct net *net)
|
|
|
|
{
|
|
|
|
struct l2tp_net *pn = l2tp_pernet(net);
|
2017-11-12 22:30:31 +03:00
|
|
|
|
2024-06-20 12:22:39 +01:00
|
|
|
idr_destroy(&pn->l2tp_v2_session_idr);
|
l2tp: store l2tpv3 sessions in per-net IDR
L2TPv3 sessions are currently held in one of two fixed-size hash
lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist
(UDP-encap), keyed by the L2TPv3 32-bit session_id.
In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently
without finding the tunnel first via sk_user_data, UDP sessions are
now kept in a per-net session list, keyed by session ID. Convert the
existing per-net hashlist to use an IDR for better performance when
there are many sessions and have L2TPv3 UDP sessions use the same IDR.
Although the L2TPv3 RFC states that the session ID alone identifies
the session, our implementation has allowed the same session ID to be
used in different L2TP UDP tunnels. To retain support for this, a new
per-net session hashtable is used, keyed by the sock and session
ID. If on creating a new session, a session already exists with that
ID in the IDR, the colliding sessions are added to the new hashtable
and the existing IDR entry is flagged. When looking up sessions, the
approach is to first check the IDR and if no unflagged match is found,
check the new hashtable. The sock is made available to session getters
where session ID collisions are to be considered. In this way, the new
hashtable is used only for session ID collisions so can be kept small.
For managing session removal, we need a list of colliding sessions
matching a given ID in order to update or remove the IDR entry of the
ID. This is necessary to detect session ID collisions when future
sessions are created. The list head is allocated on first collision
of a given ID and refcounted.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-06-20 12:22:38 +01:00
|
|
|
idr_destroy(&pn->l2tp_v3_session_idr);
|
2023-01-13 19:01:36 -08:00
|
|
|
idr_destroy(&pn->l2tp_tunnel_idr);
|
2013-01-31 23:43:03 +00:00
|
|
|
}
|
|
|
|
|
2010-04-02 06:18:33 +00:00
|
|
|
static struct pernet_operations l2tp_net_ops = {
|
|
|
|
.init = l2tp_init_net,
|
2013-01-31 23:43:03 +00:00
|
|
|
.exit = l2tp_exit_net,
|
2024-07-29 16:38:14 +01:00
|
|
|
.pre_exit = l2tp_pre_exit_net,
|
2010-04-02 06:18:33 +00:00
|
|
|
.id = &l2tp_net_id,
|
|
|
|
.size = sizeof(struct l2tp_net),
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init l2tp_init(void)
|
|
|
|
{
|
|
|
|
int rc = 0;
|
|
|
|
|
|
|
|
rc = register_pernet_device(&l2tp_net_ops);
|
|
|
|
if (rc)
|
|
|
|
goto out;
|
|
|
|
|
2014-03-27 09:41:47 +08:00
|
|
|
l2tp_wq = alloc_workqueue("l2tp", WQ_UNBOUND, 0);
|
2013-01-31 23:43:00 +00:00
|
|
|
if (!l2tp_wq) {
|
|
|
|
pr_err("alloc_workqueue failed\n");
|
2015-04-03 13:46:09 -07:00
|
|
|
unregister_pernet_device(&l2tp_net_ops);
|
2013-01-31 23:43:00 +00:00
|
|
|
rc = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2012-05-16 09:55:56 +00:00
|
|
|
pr_info("L2TP core driver, %s\n", L2TP_DRV_VERSION);
|
2010-04-02 06:18:33 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit l2tp_exit(void)
|
|
|
|
{
|
|
|
|
unregister_pernet_device(&l2tp_net_ops);
|
2013-01-31 23:43:00 +00:00
|
|
|
if (l2tp_wq) {
|
|
|
|
destroy_workqueue(l2tp_wq);
|
|
|
|
l2tp_wq = NULL;
|
|
|
|
}
|
2010-04-02 06:18:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
module_init(l2tp_init);
|
|
|
|
module_exit(l2tp_exit);
|
|
|
|
|
|
|
|
MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
|
|
|
|
MODULE_DESCRIPTION("L2TP core");
|
|
|
|
MODULE_LICENSE("GPL");
|
|
|
|
MODULE_VERSION(L2TP_DRV_VERSION);
|