linux-next/net/hsr/hsr_forward.c

640 lines
17 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/* Copyright 2011-2014 Autronica Fire and Security AS
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*
* Frame router for HSR and PRP.
*/
#include "hsr_forward.h"
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/etherdevice.h>
#include <linux/if_vlan.h>
#include "hsr_main.h"
#include "hsr_framereg.h"
struct hsr_node;
/* The uses I can see for these HSR supervision frames are:
* 1) Use the frames that are sent after node initialization ("HSR_TLV.Type =
* 22") to reset any sequence_nr counters belonging to that node. Useful if
* the other node's counter has been reset for some reason.
* --
* Or not - resetting the counter and bridging the frame would create a
* loop, unfortunately.
*
* 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck
* frame is received from a particular node, we know something is wrong.
* We just register these (as with normal frames) and throw them away.
*
* 3) Allow different MAC addresses for the two slave interfaces, using the
* MacAddressA field.
*/
static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
{
struct ethhdr *eth_hdr;
struct hsr_sup_tag *hsr_sup_tag;
struct hsrv1_ethhdr_sp *hsr_V1_hdr;
struct hsr_sup_tlv *hsr_sup_tlv;
u16 total_length = 0;
WARN_ON_ONCE(!skb_mac_header_was_set(skb));
eth_hdr = (struct ethhdr *)skb_mac_header(skb);
/* Correct addr? */
if (!ether_addr_equal(eth_hdr->h_dest,
hsr->sup_multicast_addr))
return false;
/* Correct ether type?. */
if (!(eth_hdr->h_proto == htons(ETH_P_PRP) ||
eth_hdr->h_proto == htons(ETH_P_HSR)))
return false;
/* Get the supervision header from correct location. */
if (eth_hdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */
total_length = sizeof(struct hsrv1_ethhdr_sp);
if (!pskb_may_pull(skb, total_length))
return false;
hsr_V1_hdr = (struct hsrv1_ethhdr_sp *)skb_mac_header(skb);
if (hsr_V1_hdr->hsr.encap_proto != htons(ETH_P_PRP))
return false;
hsr_sup_tag = &hsr_V1_hdr->hsr_sup;
} else {
total_length = sizeof(struct hsrv0_ethhdr_sp);
if (!pskb_may_pull(skb, total_length))
return false;
hsr_sup_tag =
&((struct hsrv0_ethhdr_sp *)skb_mac_header(skb))->hsr_sup;
}
if (hsr_sup_tag->tlv.HSR_TLV_type != HSR_TLV_ANNOUNCE &&
hsr_sup_tag->tlv.HSR_TLV_type != HSR_TLV_LIFE_CHECK &&
hsr_sup_tag->tlv.HSR_TLV_type != PRP_TLV_LIFE_CHECK_DD &&
hsr_sup_tag->tlv.HSR_TLV_type != PRP_TLV_LIFE_CHECK_DA)
return false;
if (hsr_sup_tag->tlv.HSR_TLV_length != 12 &&
hsr_sup_tag->tlv.HSR_TLV_length != sizeof(struct hsr_sup_payload))
return false;
/* Get next tlv */
total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length;
if (!pskb_may_pull(skb, total_length))
return false;
skb_pull(skb, total_length);
hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data;
skb_push(skb, total_length);
/* if this is a redbox supervision frame we need to verify
* that more data is available
*/
if (hsr_sup_tlv->HSR_TLV_type == PRP_TLV_REDBOX_MAC) {
/* tlv length must be a length of a mac address */
if (hsr_sup_tlv->HSR_TLV_length != sizeof(struct hsr_sup_payload))
return false;
/* make sure another tlv follows */
total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tlv->HSR_TLV_length;
if (!pskb_may_pull(skb, total_length))
return false;
/* get next tlv */
skb_pull(skb, total_length);
hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data;
skb_push(skb, total_length);
}
/* end of tlvs must follow at the end */
if (hsr_sup_tlv->HSR_TLV_type == HSR_TLV_EOT &&
hsr_sup_tlv->HSR_TLV_length != 0)
return false;
return true;
}
static struct sk_buff *create_stripped_skb_hsr(struct sk_buff *skb_in,
struct hsr_frame_info *frame)
{
struct sk_buff *skb;
int copylen;
unsigned char *dst, *src;
skb_pull(skb_in, HSR_HLEN);
skb = __pskb_copy(skb_in, skb_headroom(skb_in) - HSR_HLEN, GFP_ATOMIC);
skb_push(skb_in, HSR_HLEN);
if (!skb)
return NULL;
skb_reset_mac_header(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL)
skb->csum_start -= HSR_HLEN;
copylen = 2 * ETH_ALEN;
if (frame->is_vlan)
copylen += VLAN_HLEN;
src = skb_mac_header(skb_in);
dst = skb_mac_header(skb);
memcpy(dst, src, copylen);
skb->protocol = eth_hdr(skb)->h_proto;
return skb;
}
struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame,
struct hsr_port *port)
{
if (!frame->skb_std) {
net: hsr: avoid possible NULL deref in skb_clone() syzbot got a crash [1] in skb_clone(), caused by a bug in hsr_get_untagged_frame(). When/if create_stripped_skb_hsr() returns NULL, we must not attempt to call skb_clone(). While we are at it, replace a WARN_ONCE() by netdev_warn_once(). [1] general protection fault, probably for non-canonical address 0xdffffc000000000f: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000078-0x000000000000007f] CPU: 1 PID: 754 Comm: syz-executor.0 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 RIP: 0010:skb_clone+0x108/0x3c0 net/core/skbuff.c:1641 Code: 93 02 00 00 49 83 7c 24 28 00 0f 85 e9 00 00 00 e8 5d 4a 29 fa 4c 8d 75 7e 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 <0f> b6 04 02 4c 89 f2 83 e2 07 38 d0 7f 08 84 c0 0f 85 9e 01 00 00 RSP: 0018:ffffc90003ccf4e0 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffffc90003ccf5f8 RCX: ffffc9000c24b000 RDX: 000000000000000f RSI: ffffffff8751cb13 RDI: 0000000000000000 RBP: 0000000000000000 R08: 00000000000000f0 R09: 0000000000000140 R10: fffffbfff181d972 R11: 0000000000000000 R12: ffff888161fc3640 R13: 0000000000000a20 R14: 000000000000007e R15: ffffffff8dc5f620 FS: 00007feb621e4700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007feb621e3ff8 CR3: 00000001643a9000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> hsr_get_untagged_frame+0x4e/0x610 net/hsr/hsr_forward.c:164 hsr_forward_do net/hsr/hsr_forward.c:461 [inline] hsr_forward_skb+0xcca/0x1d50 net/hsr/hsr_forward.c:623 hsr_handle_frame+0x588/0x7c0 net/hsr/hsr_slave.c:69 __netif_receive_skb_core+0x9fe/0x38f0 net/core/dev.c:5379 __netif_receive_skb_one_core+0xae/0x180 net/core/dev.c:5483 __netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5599 netif_receive_skb_internal net/core/dev.c:5685 [inline] netif_receive_skb+0x12f/0x8d0 net/core/dev.c:5744 tun_rx_batched+0x4ab/0x7a0 drivers/net/tun.c:1544 tun_get_user+0x2686/0x3a00 drivers/net/tun.c:1995 tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2025 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9e9/0xdd0 fs/read_write.c:584 ksys_write+0x127/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: f266a683a480 ("net/hsr: Better frame dispatch") Reported-by: syzbot <syzkaller@googlegroups.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/r/20221017165928.2150130-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-10-17 16:59:28 +00:00
if (frame->skb_hsr)
frame->skb_std =
create_stripped_skb_hsr(frame->skb_hsr, frame);
net: hsr: avoid possible NULL deref in skb_clone() syzbot got a crash [1] in skb_clone(), caused by a bug in hsr_get_untagged_frame(). When/if create_stripped_skb_hsr() returns NULL, we must not attempt to call skb_clone(). While we are at it, replace a WARN_ONCE() by netdev_warn_once(). [1] general protection fault, probably for non-canonical address 0xdffffc000000000f: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000078-0x000000000000007f] CPU: 1 PID: 754 Comm: syz-executor.0 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 RIP: 0010:skb_clone+0x108/0x3c0 net/core/skbuff.c:1641 Code: 93 02 00 00 49 83 7c 24 28 00 0f 85 e9 00 00 00 e8 5d 4a 29 fa 4c 8d 75 7e 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 <0f> b6 04 02 4c 89 f2 83 e2 07 38 d0 7f 08 84 c0 0f 85 9e 01 00 00 RSP: 0018:ffffc90003ccf4e0 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffffc90003ccf5f8 RCX: ffffc9000c24b000 RDX: 000000000000000f RSI: ffffffff8751cb13 RDI: 0000000000000000 RBP: 0000000000000000 R08: 00000000000000f0 R09: 0000000000000140 R10: fffffbfff181d972 R11: 0000000000000000 R12: ffff888161fc3640 R13: 0000000000000a20 R14: 000000000000007e R15: ffffffff8dc5f620 FS: 00007feb621e4700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007feb621e3ff8 CR3: 00000001643a9000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> hsr_get_untagged_frame+0x4e/0x610 net/hsr/hsr_forward.c:164 hsr_forward_do net/hsr/hsr_forward.c:461 [inline] hsr_forward_skb+0xcca/0x1d50 net/hsr/hsr_forward.c:623 hsr_handle_frame+0x588/0x7c0 net/hsr/hsr_slave.c:69 __netif_receive_skb_core+0x9fe/0x38f0 net/core/dev.c:5379 __netif_receive_skb_one_core+0xae/0x180 net/core/dev.c:5483 __netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5599 netif_receive_skb_internal net/core/dev.c:5685 [inline] netif_receive_skb+0x12f/0x8d0 net/core/dev.c:5744 tun_rx_batched+0x4ab/0x7a0 drivers/net/tun.c:1544 tun_get_user+0x2686/0x3a00 drivers/net/tun.c:1995 tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2025 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9e9/0xdd0 fs/read_write.c:584 ksys_write+0x127/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: f266a683a480 ("net/hsr: Better frame dispatch") Reported-by: syzbot <syzkaller@googlegroups.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/r/20221017165928.2150130-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-10-17 16:59:28 +00:00
else
netdev_warn_once(port->dev,
"Unexpected frame received in hsr_get_untagged_frame()\n");
if (!frame->skb_std)
return NULL;
}
return skb_clone(frame->skb_std, GFP_ATOMIC);
}
struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame,
struct hsr_port *port)
{
if (!frame->skb_std) {
if (frame->skb_prp) {
/* trim the skb by len - HSR_HLEN to exclude RCT */
skb_trim(frame->skb_prp,
frame->skb_prp->len - HSR_HLEN);
frame->skb_std =
__pskb_copy(frame->skb_prp,
skb_headroom(frame->skb_prp),
GFP_ATOMIC);
} else {
/* Unexpected */
WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n",
__FILE__, __LINE__, port->dev->name);
return NULL;
}
}
return skb_clone(frame->skb_std, GFP_ATOMIC);
}
static void prp_set_lan_id(struct prp_rct *trailer,
struct hsr_port *port)
{
int lane_id;
if (port->type == HSR_PT_SLAVE_A)
lane_id = 0;
else
lane_id = 1;
/* Add net_id in the upper 3 bits of lane_id */
lane_id |= port->hsr->net_id;
set_prp_lan_id(trailer, lane_id);
}
/* Tailroom for PRP rct should have been created before calling this */
static struct sk_buff *prp_fill_rct(struct sk_buff *skb,
struct hsr_frame_info *frame,
struct hsr_port *port)
{
struct prp_rct *trailer;
int min_size = ETH_ZLEN;
int lsdu_size;
if (!skb)
return skb;
if (frame->is_vlan)
min_size = VLAN_ETH_ZLEN;
if (skb_put_padto(skb, min_size))
return NULL;
trailer = (struct prp_rct *)skb_put(skb, HSR_HLEN);
lsdu_size = skb->len - 14;
if (frame->is_vlan)
lsdu_size -= 4;
prp_set_lan_id(trailer, port);
set_prp_LSDU_size(trailer, lsdu_size);
trailer->sequence_nr = htons(frame->sequence_nr);
trailer->PRP_suffix = htons(ETH_P_PRP);
skb->protocol = eth_hdr(skb)->h_proto;
return skb;
}
static void hsr_set_path_id(struct hsr_ethhdr *hsr_ethhdr,
struct hsr_port *port)
{
int path_id;
if (port->type == HSR_PT_SLAVE_A)
path_id = 0;
else
path_id = 1;
set_hsr_tag_path(&hsr_ethhdr->hsr_tag, path_id);
}
static struct sk_buff *hsr_fill_tag(struct sk_buff *skb,
struct hsr_frame_info *frame,
struct hsr_port *port, u8 proto_version)
{
struct hsr_ethhdr *hsr_ethhdr;
int lsdu_size;
/* pad to minimum packet size which is 60 + 6 (HSR tag) */
if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN))
return NULL;
lsdu_size = skb->len - 14;
if (frame->is_vlan)
lsdu_size -= 4;
hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb);
hsr_set_path_id(hsr_ethhdr, port);
set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size);
hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr);
hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto;
hsr_ethhdr->ethhdr.h_proto = htons(proto_version ?
ETH_P_HSR : ETH_P_PRP);
skb->protocol = hsr_ethhdr->ethhdr.h_proto;
return skb;
}
/* If the original frame was an HSR tagged frame, just clone it to be sent
* unchanged. Otherwise, create a private frame especially tagged for 'port'.
*/
struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame,
struct hsr_port *port)
{
unsigned char *dst, *src;
struct sk_buff *skb;
int movelen;
if (frame->skb_hsr) {
struct hsr_ethhdr *hsr_ethhdr =
(struct hsr_ethhdr *)skb_mac_header(frame->skb_hsr);
/* set the lane id properly */
hsr_set_path_id(hsr_ethhdr, port);
return skb_clone(frame->skb_hsr, GFP_ATOMIC);
} else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) {
return skb_clone(frame->skb_std, GFP_ATOMIC);
}
/* Create the new skb with enough headroom to fit the HSR tag */
skb = __pskb_copy(frame->skb_std,
skb_headroom(frame->skb_std) + HSR_HLEN, GFP_ATOMIC);
if (!skb)
return NULL;
skb_reset_mac_header(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL)
skb->csum_start += HSR_HLEN;
movelen = ETH_HLEN;
if (frame->is_vlan)
movelen += VLAN_HLEN;
src = skb_mac_header(skb);
dst = skb_push(skb, HSR_HLEN);
memmove(dst, src, movelen);
skb_reset_mac_header(skb);
/* skb_put_padto free skb on error and hsr_fill_tag returns NULL in
* that case
*/
return hsr_fill_tag(skb, frame, port, port->hsr->prot_version);
}
struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame,
struct hsr_port *port)
{
struct sk_buff *skb;
if (frame->skb_prp) {
struct prp_rct *trailer = skb_get_PRP_rct(frame->skb_prp);
if (trailer) {
prp_set_lan_id(trailer, port);
} else {
WARN_ONCE(!trailer, "errored PRP skb");
return NULL;
}
return skb_clone(frame->skb_prp, GFP_ATOMIC);
} else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) {
return skb_clone(frame->skb_std, GFP_ATOMIC);
}
skb = skb_copy_expand(frame->skb_std, 0,
skb_tailroom(frame->skb_std) + HSR_HLEN,
GFP_ATOMIC);
prp_fill_rct(skb, frame, port);
return skb;
}
static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
struct hsr_node *node_src)
{
bool was_multicast_frame;
int res, recv_len;
was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST);
hsr_addr_subst_source(node_src, skb);
skb_pull(skb, ETH_HLEN);
recv_len = skb->len;
res = netif_rx(skb);
if (res == NET_RX_DROP) {
dev->stats.rx_dropped++;
} else {
dev->stats.rx_packets++;
dev->stats.rx_bytes += recv_len;
if (was_multicast_frame)
dev->stats.multicast++;
}
}
static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port,
struct hsr_frame_info *frame)
{
if (frame->port_rcv->type == HSR_PT_MASTER) {
hsr_addr_subst_dest(frame->node_src, skb, port);
/* Address substitution (IEC62439-3 pp 26, 50): replace mac
* address of outgoing frame with that of the outgoing slave's.
*/
ether_addr_copy(eth_hdr(skb)->h_source, port->dev->dev_addr);
}
return dev_queue_xmit(skb);
}
bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
{
return ((frame->port_rcv->type == HSR_PT_SLAVE_A &&
port->type == HSR_PT_SLAVE_B) ||
(frame->port_rcv->type == HSR_PT_SLAVE_B &&
port->type == HSR_PT_SLAVE_A));
}
bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
{
if (port->dev->features & NETIF_F_HW_HSR_FWD)
return prp_drop_frame(frame, port);
return false;
}
/* Forward the frame through all devices except:
* - Back through the receiving device
* - If it's a HSR frame: through a device where it has passed before
* - if it's a PRP frame: through another PRP slave device (no bridge)
* - To the local HSR master only if the frame is directly addressed to it, or
* a non-supervision multicast or broadcast frame.
*
* HSR slave devices should insert a HSR tag into the frame, or forward the
* frame unchanged if it's already tagged. Interlink devices should strip HSR
* tags if they're of the non-HSR type (but only after duplicate discard). The
* master device always strips HSR tags.
*/
static void hsr_forward_do(struct hsr_frame_info *frame)
{
struct hsr_port *port;
struct sk_buff *skb;
bool sent = false;
hsr_for_each_port(frame->port_rcv->hsr, port) {
struct hsr_priv *hsr = port->hsr;
/* Don't send frame back the way it came */
if (port == frame->port_rcv)
continue;
/* Don't deliver locally unless we should */
if (port->type == HSR_PT_MASTER && !frame->is_local_dest)
continue;
/* Deliver frames directly addressed to us to master only */
if (port->type != HSR_PT_MASTER && frame->is_local_exclusive)
continue;
/* If hardware duplicate generation is enabled, only send out
* one port.
*/
if ((port->dev->features & NETIF_F_HW_HSR_DUP) && sent)
continue;
/* Don't send frame over port where it has been sent before.
* Also fro SAN, this shouldn't be done.
*/
if (!frame->is_from_san &&
hsr_register_frame_out(port, frame->node_src,
frame->sequence_nr))
continue;
if (frame->is_supervision && port->type == HSR_PT_MASTER) {
hsr_handle_sup_frame(frame);
continue;
}
/* Check if frame is to be dropped. Eg. for PRP no forward
* between ports.
*/
if (hsr->proto_ops->drop_frame &&
hsr->proto_ops->drop_frame(frame, port))
continue;
if (port->type != HSR_PT_MASTER)
skb = hsr->proto_ops->create_tagged_frame(frame, port);
else
skb = hsr->proto_ops->get_untagged_frame(frame, port);
if (!skb) {
frame->port_rcv->dev->stats.rx_dropped++;
continue;
}
skb->dev = port->dev;
if (port->type == HSR_PT_MASTER) {
hsr_deliver_master(skb, port->dev, frame->node_src);
} else {
if (!hsr_xmit(skb, port, frame))
sent = true;
}
}
}
static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb,
struct hsr_frame_info *frame)
{
if (hsr_addr_is_self(hsr, eth_hdr(skb)->h_dest)) {
frame->is_local_exclusive = true;
skb->pkt_type = PACKET_HOST;
} else {
frame->is_local_exclusive = false;
}
if (skb->pkt_type == PACKET_HOST ||
skb->pkt_type == PACKET_MULTICAST ||
skb->pkt_type == PACKET_BROADCAST) {
frame->is_local_dest = true;
} else {
frame->is_local_dest = false;
}
}
static void handle_std_frame(struct sk_buff *skb,
struct hsr_frame_info *frame)
{
struct hsr_port *port = frame->port_rcv;
struct hsr_priv *hsr = port->hsr;
frame->skb_hsr = NULL;
frame->skb_prp = NULL;
frame->skb_std = skb;
if (port->type != HSR_PT_MASTER) {
frame->is_from_san = true;
} else {
/* Sequence nr for the master node */
hsr: Synchronize sending frames to have always incremented outgoing seq nr. Sending frames via the hsr (master) device requires a sequence number which is tracked in hsr_priv::sequence_nr and protected by hsr_priv::seqnr_lock. Each time a new frame is sent, it will obtain a new id and then send it via the slave devices. Each time a packet is sent (via hsr_forward_do()) the sequence number is checked via hsr_register_frame_out() to ensure that a frame is not handled twice. This make sense for the receiving side to ensure that the frame is not injected into the stack twice after it has been received from both slave ports. There is no locking to cover the sending path which means the following scenario is possible: CPU0 CPU1 hsr_dev_xmit(skb1) hsr_dev_xmit(skb2) fill_frame_info() fill_frame_info() hsr_fill_frame_info() hsr_fill_frame_info() handle_std_frame() handle_std_frame() skb1's sequence_nr = 1 skb2's sequence_nr = 2 hsr_forward_do() hsr_forward_do() hsr_register_frame_out(, 2) // okay, send) hsr_register_frame_out(, 1) // stop, lower seq duplicate Both skbs (or their struct hsr_frame_info) received an unique id. However since skb2 was sent before skb1, the higher sequence number was recorded in hsr_register_frame_out() and the late arriving skb1 was dropped and never sent. This scenario has been observed in a three node HSR setup, with node1 + node2 having ping and iperf running in parallel. From time to time ping reported a missing packet. Based on tracing that missing ping packet did not leave the system. It might be possible (didn't check) to drop the sequence number check on the sending side. But if the higher sequence number leaves on wire before the lower does and the destination receives them in that order and it will drop the packet with the lower sequence number and never inject into the stack. Therefore it seems the only way is to lock the whole path from obtaining the sequence number and sending via dev_queue_xmit() and assuming the packets leave on wire in the same order (and don't get reordered by the NIC). Cover the whole path for the master interface from obtaining the ID until after it has been forwarded via hsr_forward_skb() to ensure the skbs are sent to the NIC in the order of the assigned sequence numbers. Fixes: f421436a591d3 ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-29 16:48:12 +00:00
lockdep_assert_held(&hsr->seqnr_lock);
frame->sequence_nr = hsr->sequence_nr;
hsr->sequence_nr++;
}
}
int hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
struct hsr_frame_info *frame)
{
struct hsr_port *port = frame->port_rcv;
struct hsr_priv *hsr = port->hsr;
/* HSRv0 supervisory frames double as a tag so treat them as tagged. */
if ((!hsr->prot_version && proto == htons(ETH_P_PRP)) ||
proto == htons(ETH_P_HSR)) {
/* Check if skb contains hsr_ethhdr */
if (skb->mac_len < sizeof(struct hsr_ethhdr))
return -EINVAL;
/* HSR tagged frame :- Data or Supervision */
frame->skb_std = NULL;
frame->skb_prp = NULL;
frame->skb_hsr = skb;
frame->sequence_nr = hsr_get_skb_sequence_nr(skb);
return 0;
}
/* Standard frame or PRP from master port */
handle_std_frame(skb, frame);
return 0;
}
int prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
struct hsr_frame_info *frame)
{
/* Supervision frame */
struct prp_rct *rct = skb_get_PRP_rct(skb);
if (rct &&
prp_check_lsdu_size(skb, rct, frame->is_supervision)) {
frame->skb_hsr = NULL;
frame->skb_std = NULL;
frame->skb_prp = skb;
frame->sequence_nr = prp_get_skb_sequence_nr(rct);
return 0;
}
handle_std_frame(skb, frame);
return 0;
}
static int fill_frame_info(struct hsr_frame_info *frame,
struct sk_buff *skb, struct hsr_port *port)
{
struct hsr_priv *hsr = port->hsr;
struct hsr_vlan_ethhdr *vlan_hdr;
struct ethhdr *ethhdr;
__be16 proto;
int ret;
/* Check if skb contains ethhdr */
if (skb->mac_len < sizeof(struct ethhdr))
return -EINVAL;
memset(frame, 0, sizeof(*frame));
frame->is_supervision = is_supervision_frame(port->hsr, skb);
Revert "net: hsr: use hlist_head instead of list_head for mac addresses" The hlist optimisation (which not only uses hlist_head instead of list_head but also splits hsr_priv::node_db into an array of 256 slots) does not consider the "node merge": Upon starting the hsr network (with three nodes) a packet that is sent from node1 to node3 will also be sent from node1 to node2 and then forwarded to node3. As a result node3 will receive 2 packets because it is not able to filter out the duplicate. Each packet received will create a new struct hsr_node with macaddress_A only set the MAC address it received from (the two MAC addesses from node1). At some point (early in the process) two supervision frames will be received from node1. They will be processed by hsr_handle_sup_frame() and one frame will leave early ("Node has already been merged") and does nothing. The other frame will be merged as portB and have its MAC address written to macaddress_B and the hsr_node (that was created for it as macaddress_A) will be removed. From now on HSR is able to identify a duplicate because both packets sent from one node will result in the same struct hsr_node because hsr_get_node() will find the MAC address either on macaddress_A or macaddress_B. Things get tricky with the optimisation: If sender's MAC address is saved as macaddress_A then the lookup will work as usual. If the MAC address has been merged into macaddress_B of another hsr_node then the lookup won't work because it is likely that the data structure is in another bucket. This results in creating a new struct hsr_node and not recognising a possible duplicate. A way around it would be to add another hsr_node::mac_list_B and attach it to the other bucket to ensure that this hsr_node will be looked up either via macaddress_A _or_ macaddress_B. I however prefer to revert it because it sounds like an academic problem rather than real life workload plus it adds complexity. I'm not an HSR expert with what is usual size of a network but I would guess 40 to 60 nodes. With 10.000 nodes and assuming 60us for pass-through (from node to node) then it would take almost 600ms for a packet to almost wrap around which sounds a lot. Revert the hash MAC addresses optimisation. Fixes: 4acc45db71158 ("net: hsr: use hlist_head instead of list_head for mac addresses") Cc: Juhee Kang <claudiajkang@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-29 16:48:08 +00:00
frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
frame->is_supervision,
port->type);
if (!frame->node_src)
return -1; /* Unknown node and !is_supervision, or no mem */
Revert "net: hsr: use hlist_head instead of list_head for mac addresses" The hlist optimisation (which not only uses hlist_head instead of list_head but also splits hsr_priv::node_db into an array of 256 slots) does not consider the "node merge": Upon starting the hsr network (with three nodes) a packet that is sent from node1 to node3 will also be sent from node1 to node2 and then forwarded to node3. As a result node3 will receive 2 packets because it is not able to filter out the duplicate. Each packet received will create a new struct hsr_node with macaddress_A only set the MAC address it received from (the two MAC addesses from node1). At some point (early in the process) two supervision frames will be received from node1. They will be processed by hsr_handle_sup_frame() and one frame will leave early ("Node has already been merged") and does nothing. The other frame will be merged as portB and have its MAC address written to macaddress_B and the hsr_node (that was created for it as macaddress_A) will be removed. From now on HSR is able to identify a duplicate because both packets sent from one node will result in the same struct hsr_node because hsr_get_node() will find the MAC address either on macaddress_A or macaddress_B. Things get tricky with the optimisation: If sender's MAC address is saved as macaddress_A then the lookup will work as usual. If the MAC address has been merged into macaddress_B of another hsr_node then the lookup won't work because it is likely that the data structure is in another bucket. This results in creating a new struct hsr_node and not recognising a possible duplicate. A way around it would be to add another hsr_node::mac_list_B and attach it to the other bucket to ensure that this hsr_node will be looked up either via macaddress_A _or_ macaddress_B. I however prefer to revert it because it sounds like an academic problem rather than real life workload plus it adds complexity. I'm not an HSR expert with what is usual size of a network but I would guess 40 to 60 nodes. With 10.000 nodes and assuming 60us for pass-through (from node to node) then it would take almost 600ms for a packet to almost wrap around which sounds a lot. Revert the hash MAC addresses optimisation. Fixes: 4acc45db71158 ("net: hsr: use hlist_head instead of list_head for mac addresses") Cc: Juhee Kang <claudiajkang@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-29 16:48:08 +00:00
ethhdr = (struct ethhdr *)skb_mac_header(skb);
frame->is_vlan = false;
proto = ethhdr->h_proto;
if (proto == htons(ETH_P_8021Q))
frame->is_vlan = true;
if (frame->is_vlan) {
vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr;
proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
/* FIXME: */
netdev_warn_once(skb->dev, "VLAN not yet supported");
}
frame->is_from_san = false;
frame->port_rcv = port;
ret = hsr->proto_ops->fill_frame_info(proto, skb, frame);
if (ret)
return ret;
check_local_dest(port->hsr, skb, frame);
return 0;
}
/* Must be called holding rcu read lock (because of the port parameter) */
void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port)
{
struct hsr_frame_info frame;
rcu_read_lock();
if (fill_frame_info(&frame, skb, port) < 0)
goto out_drop;
hsr_register_frame_in(frame.node_src, port, frame.sequence_nr);
hsr_forward_do(&frame);
rcu_read_unlock();
/* Gets called for ingress frames as well as egress from master port.
* So check and increment stats for master port only here.
*/
if (port->type == HSR_PT_MASTER) {
port->dev->stats.tx_packets++;
port->dev->stats.tx_bytes += skb->len;
}
kfree_skb(frame.skb_hsr);
kfree_skb(frame.skb_prp);
kfree_skb(frame.skb_std);
return;
out_drop:
rcu_read_unlock();
port->dev->stats.tx_dropped++;
kfree_skb(skb);
}