From f26bf06beae70175eda91e893190784bd1bcc7c0 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 24 Sep 2015 16:00:15 +0200 Subject: [PATCH 01/20] net: hisilicon: fix handling platform_get_irq result The function can return negative value. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2046107 Signed-off-by: Andrzej Hajda Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index cc2d8b4b18e3..253f8ed0537a 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -816,7 +816,7 @@ static int hip04_mac_probe(struct platform_device *pdev) struct net_device *ndev; struct hip04_priv *priv; struct resource *res; - unsigned int irq; + int irq; int ret; ndev = alloc_etherdev(sizeof(struct hip04_priv)); From 72521ea07c0af37b8cb21228368128191c3f1a58 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 24 Sep 2015 16:00:24 +0200 Subject: [PATCH 02/20] r8169: fix handling rtl_readphy result The function can return negative value. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2046107 Signed-off-by: Andrzej Hajda Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 2b32e0c5a0b4..b4f21232019a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6081,7 +6081,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) { void __iomem *ioaddr = tp->mmio_addr; struct pci_dev *pdev = tp->pci_dev; - u16 rg_saw_cnt; + int rg_saw_cnt; u32 data; static const struct ephy_info e_info_8168h_1[] = { { 0x1e, 0x0800, 0x0001 }, From c4bbac3913c0d649898a0d767728a585869a7d7d Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 28 Sep 2015 11:21:48 -0700 Subject: [PATCH 03/20] i40e: fix VLAN inside VXLAN Previously to this patch, the hardware was removing VLAN tags from the inner header of VXLAN packets. The hardware configuration can be changed to leave the packet alone since that is what the linux stack expects for this type of VLAN in VXLAN packet. Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 851c1a159be8..2fdf978ae6a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2672,7 +2672,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) rx_ctx.lrxqthresh = 2; rx_ctx.crcstrip = 1; rx_ctx.l2tsel = 1; - rx_ctx.showiv = 1; + /* this controls whether VLAN is stripped from inner headers */ + rx_ctx.showiv = 0; #ifdef I40E_FCOE rx_ctx.fc_ena = (vsi->type == I40E_VSI_FCOE); #endif From 43ae93a93e8c95c5e6389dc8e11704712b1ab2e9 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Mon, 28 Sep 2015 17:31:26 -0700 Subject: [PATCH 04/20] i40e/i40evf: check for stopped admin queue It's possible that while we are waiting for the spinlock, another entity (that owns the spinlock) has shut down the admin queue. If we then attempt to use the queue, we will panic. Add a check for this condition on the receive side. This matches an existing check on the send queue side. Signed-off-by: Mitch Williams Acked-by: Jesse Brandeburg Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_adminq.c | 9 +++++++++ drivers/net/ethernet/intel/i40evf/i40e_adminq.c | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 3e0d20037675..62488a67149d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -946,6 +946,13 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); + if (hw->aq.arq.count == 0) { + i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, + "AQRX: Admin queue not initialized.\n"); + ret_code = I40E_ERR_QUEUE_EMPTY; + goto clean_arq_element_err; + } + /* set next_to_use to head */ ntu = (rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK); if (ntu == ntc) { @@ -1007,6 +1014,8 @@ clean_arq_element_out: /* Set pending if needed, unlock and return */ if (pending != NULL) *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); + +clean_arq_element_err: mutex_unlock(&hw->aq.arq_mutex); if (i40e_is_nvm_update_op(&e->desc)) { diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index f08450b90774..929d47152bf2 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -887,6 +887,13 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw, /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); + if (hw->aq.arq.count == 0) { + i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, + "AQRX: Admin queue not initialized.\n"); + ret_code = I40E_ERR_QUEUE_EMPTY; + goto clean_arq_element_err; + } + /* set next_to_use to head */ ntu = (rd32(hw, hw->aq.arq.head) & I40E_VF_ARQH1_ARQH_MASK); if (ntu == ntc) { @@ -948,6 +955,8 @@ clean_arq_element_out: /* Set pending if needed, unlock and return */ if (pending != NULL) *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); + +clean_arq_element_err: mutex_unlock(&hw->aq.arq_mutex); return ret_code; From f05940e61845951517eda02a28ccc091888aaab9 Mon Sep 17 00:00:00 2001 From: Karl Heiss Date: Thu, 24 Sep 2015 12:15:06 -0400 Subject: [PATCH 05/20] sctp: Whitespace fix Fix indentation in sctp_generate_heartbeat_event. Signed-off-by: Karl Heiss Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 35df1266bf07..f554b9a96e07 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -388,8 +388,8 @@ void sctp_generate_heartbeat_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); - if (error) - asoc->base.sk->sk_err = -error; + if (error) + asoc->base.sk->sk_err = -error; out_unlock: bh_unlock_sock(asoc->base.sk); From 635682a14427d241bab7bbdeebb48a7d7b91638e Mon Sep 17 00:00:00 2001 From: Karl Heiss Date: Thu, 24 Sep 2015 12:15:07 -0400 Subject: [PATCH 06/20] sctp: Prevent soft lockup when sctp_accept() is called during a timeout event A case can occur when sctp_accept() is called by the user during a heartbeat timeout event after the 4-way handshake. Since sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the bh_sock_lock in sctp_generate_heartbeat_event() will be taken with the listening socket but released with the new association socket. The result is a deadlock on any future attempts to take the listening socket lock. Note that this race can occur with other SCTP timeouts that take the bh_lock_sock() in the event sctp_accept() is called. BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0] ... RIP: 0010:[] [] _spin_lock+0x1e/0x30 RSP: 0018:ffff880028323b20 EFLAGS: 00000206 RAX: 0000000000000002 RBX: ffff880028323b20 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff880028323be0 RDI: ffff8804632c4b48 RBP: ffffffff8100bb93 R08: 0000000000000000 R09: 0000000000000000 R10: ffff880610662280 R11: 0000000000000100 R12: ffff880028323aa0 R13: ffff8804383c3880 R14: ffff880028323a90 R15: ffffffff81534225 FS: 0000000000000000(0000) GS:ffff880028320000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00000000006df528 CR3: 0000000001a85000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 0, threadinfo ffff880616b70000, task ffff880616b6cab0) Stack: ffff880028323c40 ffffffffa01c2582 ffff880614cfb020 0000000000000000 0100000000000000 00000014383a6c44 ffff8804383c3880 ffff880614e93c00 ffff880614e93c00 0000000000000000 ffff8804632c4b00 ffff8804383c38b8 Call Trace: [] ? sctp_rcv+0x492/0xa10 [sctp] [] ? nf_iterate+0x69/0xb0 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? nf_hook_slow+0x76/0x120 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? ip_local_deliver_finish+0xdd/0x2d0 [] ? ip_local_deliver+0x98/0xa0 [] ? ip_rcv_finish+0x12d/0x440 [] ? ip_rcv+0x275/0x350 [] ? __netif_receive_skb+0x4ab/0x750 ... With lockdep debugging: ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- CslRx/12087 is trying to release lock (slock-AF_INET) at: [] sctp_generate_timeout_event+0x40/0xe0 [sctp] but there are no more locks to release! other info that might help us debug this: 2 locks held by CslRx/12087: #0: (&asoc->timers[i]){+.-...}, at: [] run_timer_softirq+0x16f/0x3e0 #1: (slock-AF_INET){+.-...}, at: [] sctp_generate_timeout_event+0x23/0xe0 [sctp] Ensure the socket taken is also the same one that is released by saving a copy of the socket before entering the timeout event critical section. Signed-off-by: Karl Heiss Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 42 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index f554b9a96e07..6098d4c42fa9 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned long peer) int error; struct sctp_transport *transport = (struct sctp_transport *) peer; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); /* Check whether a task is in the sock. */ - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -285,11 +286,12 @@ out_unlock: static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_event_timeout_t timeout_type) { - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); int error = 0; - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy: timer %d\n", __func__, timeout_type); @@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, (void *)timeout_type, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } @@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsigned long data) int error = 0; struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -389,10 +392,10 @@ void sctp_generate_heartbeat_event(unsigned long data) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(unsigned long data) { struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } From 661dfc65f7981481ba2e31aaa702371e82336e56 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Fri, 25 Sep 2015 11:52:27 +0400 Subject: [PATCH 07/20] net/ibm/emac: bump version numbers for correct work with ethtool The size of the MAC register dump used to be the size specified by the reg property in the device tree. Userland has no good way of finding out that size, and it was not specified consistently for each MAC type, so ethtool would end up printing junk at the end of the register dump if the device tree didn't match the size it assumed. Using the new version numbers indicates unambiguously that the size of the MAC register dump is dependent only on the MAC type. Fixes: 5369c71f7ca2 ("net/ibm/emac: fix size of emac dump memory areas") Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/core.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 28df37420da9..ac02c675c59c 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -460,8 +460,8 @@ struct emac_ethtool_regs_subhdr { u32 index; }; -#define EMAC_ETHTOOL_REGS_VER 0 -#define EMAC4_ETHTOOL_REGS_VER 1 -#define EMAC4SYNC_ETHTOOL_REGS_VER 2 +#define EMAC_ETHTOOL_REGS_VER 3 +#define EMAC4_ETHTOOL_REGS_VER 4 +#define EMAC4SYNC_ETHTOOL_REGS_VER 5 #endif /* __IBM_NEWEMAC_CORE_H */ From 06a15f51cf3618e32a73871ee6a547ef7fd902b5 Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Mon, 28 Sep 2015 11:32:42 +0200 Subject: [PATCH 08/20] l2tp: protect tunnel->del_work by ref_count There is a small chance that tunnel_free() is called before tunnel->del_work scheduled resulting in a zero pointer dereference. Signed-off-by: Alexander Couzens Acked-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index f6b090df3930..afca2eb4dfa7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1319,7 +1319,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work) tunnel = container_of(work, struct l2tp_tunnel, del_work); sk = l2tp_tunnel_sock_lookup(tunnel); if (!sk) - return; + goto out; sock = sk->sk_socket; @@ -1341,6 +1341,8 @@ static void l2tp_tunnel_del_work(struct work_struct *work) } l2tp_tunnel_sock_put(sk); +out: + l2tp_tunnel_dec_refcount(tunnel); } /* Create a socket for the tunnel, if one isn't set up by @@ -1636,8 +1638,13 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { + l2tp_tunnel_inc_refcount(tunnel); l2tp_tunnel_closeall(tunnel); - return (false == queue_work(l2tp_wq, &tunnel->del_work)); + if (false == queue_work(l2tp_wq, &tunnel->del_work)) { + l2tp_tunnel_dec_refcount(tunnel); + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); From 2103d6b818fcdae15ffa04cf385f770e6c3892c3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 28 Sep 2015 14:34:04 +0200 Subject: [PATCH 09/20] net: sctp: Don't use 64 kilobyte lookup table for four elements Seemingly innocuous sctp_trans_state_to_prio_map[] array is way bigger than it looks, since "[SCTP_UNKNOWN] = 2" expands into "[0xffff] = 2" ! This patch replaces it with switch() statement. Signed-off-by: Denys Vlasenko CC: Vlad Yasevich CC: Neil Horman CC: Marcelo Ricardo Leitner CC: linux-sctp@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/associola.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 197c3f59ecbf..b00f1f9611d6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1208,20 +1208,22 @@ void sctp_assoc_update(struct sctp_association *asoc, * within this document. * * Our basic strategy is to round-robin transports in priorities - * according to sctp_state_prio_map[] e.g., if no such + * according to sctp_trans_score() e.g., if no such * transport with state SCTP_ACTIVE exists, round-robin through * SCTP_UNKNOWN, etc. You get the picture. */ -static const u8 sctp_trans_state_to_prio_map[] = { - [SCTP_ACTIVE] = 3, /* best case */ - [SCTP_UNKNOWN] = 2, - [SCTP_PF] = 1, - [SCTP_INACTIVE] = 0, /* worst case */ -}; - static u8 sctp_trans_score(const struct sctp_transport *trans) { - return sctp_trans_state_to_prio_map[trans->state]; + switch (trans->state) { + case SCTP_ACTIVE: + return 3; /* best case */ + case SCTP_UNKNOWN: + return 2; + case SCTP_PF: + return 1; + default: /* case SCTP_INACTIVE */ + return 0; /* worst case */ + } } static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1, From 4613012db1d911f80897f9446a49de817b2c4c47 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Sat, 26 Sep 2015 18:50:42 -0400 Subject: [PATCH 10/20] af_unix: Convert the unix_sk macro to an inline function for type safety As suggested by Eric Dumazet this change replaces the #define with a static inline function to enjoy complaints by the compiler when misusing the API. Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- include/net/af_unix.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 4a167b30a12f..cb1b9bbda332 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -63,7 +63,11 @@ struct unix_sock { #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; }; -#define unix_sk(__sk) ((struct unix_sock *)__sk) + +static inline struct unix_sock *unix_sk(struct sock *sk) +{ + return (struct unix_sock *)sk; +} #define peer_wait peer_wq.wait From 9f389e35674f5b086edd70ed524ca0f287259725 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Sat, 26 Sep 2015 18:50:43 -0400 Subject: [PATCH 11/20] af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag AF_UNIX sockets now return multiple skbs from recv() when MSG_PEEK flag is set. This is referenced in kernel bugzilla #12323 @ https://bugzilla.kernel.org/show_bug.cgi?id=12323 As described both in the BZ and lkml thread @ http://lkml.org/lkml/2008/1/8/444 calling recv() with MSG_PEEK on an AF_UNIX socket only reads a single skb, where the desired effect is to return as much skb data has been queued, until hitting the recv buffer size (whichever comes first). The modified MSG_PEEK path will now move to the next skb in the tree and jump to the again: label, rather than following the natural loop structure. This requires duplicating some of the loop head actions. This was tested using the python socketpair python code attached to the bugzilla issue. Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- net/unix/af_unix.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 03ee4d359f6a..ef31b40ad550 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2179,8 +2179,21 @@ unlock: if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); - sk_peek_offset_fwd(sk, chunk); + if (skip) { + sk_peek_offset_fwd(sk, chunk); + skip -= chunk; + } + if (UNIXCB(skb).fp) + break; + + last = skb; + last_len = skb->len; + unix_state_lock(sk); + skb = skb_peek_next(skb, &sk->sk_receive_queue); + if (skb) + goto again; + unix_state_unlock(sk); break; } } while (size); From 4c52b1da538800a30f030eeb697366c23daf2ef3 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 28 Sep 2015 10:49:48 +0200 Subject: [PATCH 12/20] bna: fix error handling Several functions can return negative value in case of error, so their return type should be fixed as well as type of variables to which this value is assigned. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2046107 Signed-off-by: Andrzej Hajda Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bfa_ioc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index b7a0f7879de2..9e59663a6ead 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1543,7 +1543,7 @@ bfa_flash_cmd_act_check(void __iomem *pci_bar) } /* Flush FLI data fifo. */ -static u32 +static int bfa_flash_fifo_flush(void __iomem *pci_bar) { u32 i; @@ -1573,11 +1573,11 @@ bfa_flash_fifo_flush(void __iomem *pci_bar) } /* Read flash status. */ -static u32 +static int bfa_flash_status_read(void __iomem *pci_bar) { union bfa_flash_dev_status_reg dev_status; - u32 status; + int status; u32 ret_status; int i; @@ -1611,11 +1611,11 @@ bfa_flash_status_read(void __iomem *pci_bar) } /* Start flash read operation. */ -static u32 +static int bfa_flash_read_start(void __iomem *pci_bar, u32 offset, u32 len, char *buf) { - u32 status; + int status; /* len must be mutiple of 4 and not exceeding fifo size */ if (len == 0 || len > BFA_FLASH_FIFO_SIZE || (len & 0x03) != 0) @@ -1703,7 +1703,8 @@ static enum bfa_status bfa_flash_raw_read(void __iomem *pci_bar, u32 offset, char *buf, u32 len) { - u32 n, status; + u32 n; + int status; u32 off, l, s, residue, fifo_sz; residue = len; From 75c261b51ba19f0791de608f0acfb94956f78c76 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 28 Sep 2015 15:05:33 +0200 Subject: [PATCH 13/20] net sysfs: Print link speed as signed integer Otherwise 4294967295 (MBit/s) (-1) will be printed when there is no link. Documentation/ABI/testing/sysfs-class-net does not state if this shall be signed or unsigned. Also remove the now unused variable fmt_udec. Signed-off-by: Alexander Stein Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 805a95a48107..830f8a7c1cb1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -31,7 +31,6 @@ static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; -static const char fmt_udec[] = "%u\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; @@ -202,7 +201,7 @@ static ssize_t speed_show(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; if (!__ethtool_get_settings(netdev, &cmd)) - ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); + ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); } rtnl_unlock(); return ret; From 741a11d9e4103a8e1c590ef1280143fe654e4e33 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 28 Sep 2015 10:12:13 -0700 Subject: [PATCH 14/20] net: ipv6: Add RT6_LOOKUP_F_IFACE flag if oif is set Wolfgang reported that IPv6 stack is ignoring oif in output route lookups: With ipv6, ip -6 route get always returns the specific route. $ ip -6 r 2001:db8:e2::1 dev enp2s0 proto kernel metric 256 2001:db8:e2::/64 dev enp2s0 metric 1024 2001:db8:e3::1 dev enp3s0 proto kernel metric 256 2001:db8:e3::/64 dev enp3s0 metric 1024 fe80::/64 dev enp3s0 proto kernel metric 256 default via 2001:db8:e3::255 dev enp3s0 metric 1024 $ ip -6 r get 2001:db8:e2::100 2001:db8:e2::100 from :: dev enp2s0 src 2001:db8:e3::1 metric 0 cache $ ip -6 r get 2001:db8:e2::100 oif enp3s0 2001:db8:e2::100 from :: dev enp2s0 src 2001:db8:e3::1 metric 0 cache The stack does consider the oif but a mismatch in rt6_device_match is not considered fatal because RT6_LOOKUP_F_IFACE is not set in the flags. Cc: Wolfgang Nothdurft Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f204089e854c..cb32ce250db0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1193,7 +1193,8 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, fl6->flowi6_iif = LOOPBACK_IFINDEX; - if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || + fl6->flowi6_oif) flags |= RT6_LOOKUP_F_IFACE; if (!ipv6_addr_any(&fl6->saddr)) From 31b33dfb0a144469dd805514c9e63f4993729a48 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 28 Sep 2015 17:24:25 -0700 Subject: [PATCH 15/20] skbuff: Fix skb checksum partial check. Earlier patch 6ae459bda tried to detect void ckecksum partial skb by comparing pull length to checksum offset. But it does not work for all cases since checksum-offset depends on updates to skb->data. Following patch fixes it by validating checksum start offset after skb-data pointer is updated. Negative value of checksum offset start means there is no need to checksum. Fixes: 6ae459bda ("skbuff: Fix skb checksum flag on skb pull") Reported-by: Andrew Vagin Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/core/skbuff.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2b0a30a6e31c..4398411236f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2708,7 +2708,7 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); else if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_start_offset(skb) <= len) + skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dad4dd37e2aa..fab4599ba8b2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2958,11 +2958,12 @@ EXPORT_SYMBOL_GPL(skb_append_pagefrags); */ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { + unsigned char *data = skb->data; + BUG_ON(len > skb->len); - skb->len -= len; - BUG_ON(skb->len < skb->data_len); - skb_postpull_rcsum(skb, skb->data, len); - return skb->data += len; + __skb_pull(skb, len); + skb_postpull_rcsum(skb, data, len); + return skb->data; } EXPORT_SYMBOL_GPL(skb_pull_rcsum); From c047a1f918af75e572a19ba0581c3e3e202ed698 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 29 Sep 2015 01:50:56 +0200 Subject: [PATCH 16/20] dsa: mv88e6xxx: Enable forwarding for unknown to the CPU port Frames destined to an unknown address must be forwarded to the CPU port. Otherwise incoming ARP, dhcp leases, etc, do not work. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index f8baa897d1a0..1f7dd927cc5e 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2051,6 +2051,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA; else reg |= PORT_CONTROL_FRAME_MODE_DSA; + reg |= PORT_CONTROL_FORWARD_UNKNOWN | + PORT_CONTROL_FORWARD_UNKNOWN_MC; } if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || From 23860f103b53e764a3cbbf615d08f88362a28295 Mon Sep 17 00:00:00 2001 From: Robb Manes Date: Tue, 29 Sep 2015 11:03:37 -0400 Subject: [PATCH 17/20] net/mlx4: Handle return codes in mlx4_qp_attach_common Both new_steering_entry() and existing_steering_entry() return values based on their success or failure, but currently they fall through silently. This can make troubleshooting difficult, as we were unable to tell which one of these two functions returned errors or specifically what code was returned. This patch remedies that situation by passing the return codes to err, which is returned by mlx4_qp_attach_common() itself. This also addresses a leak in the call to mlx4_bitmap_free() as well. Signed-off-by: Robb Manes Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mcg.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index bd9ea0d01aae..1d4e2e054647 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1184,10 +1184,11 @@ out: if (prot == MLX4_PROT_ETH) { /* manage the steering entry for promisc mode */ if (new_entry) - new_steering_entry(dev, port, steer, index, qp->qpn); + err = new_steering_entry(dev, port, steer, + index, qp->qpn); else - existing_steering_entry(dev, port, steer, - index, qp->qpn); + err = existing_steering_entry(dev, port, steer, + index, qp->qpn); } if (err && link && index != -1) { if (index < dev->caps.num_mgms) From 9ae6d4935e3df35a23bbbe531c6b9ff314e7fd0f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 29 Sep 2015 17:45:28 +0200 Subject: [PATCH 18/20] testptp: Silence compiler warnings on ppc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling Documentation/ptp/testptp.c the following compiler warnings are printed out: Documentation/ptp/testptp.c: In function ‘main’: Documentation/ptp/testptp.c:367:11: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 3 has type ‘__s64’ [-Wformat=] event.t.sec, event.t.nsec); ^ Documentation/ptp/testptp.c:505:5: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 2 has type ‘__s64’ [-Wformat=] (pct+2*i)->sec, (pct+2*i)->nsec); ^ Documentation/ptp/testptp.c:507:5: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 2 has type ‘__s64’ [-Wformat=] (pct+2*i+1)->sec, (pct+2*i+1)->nsec); ^ Documentation/ptp/testptp.c:509:5: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 2 has type ‘__s64’ [-Wformat=] (pct+2*i+2)->sec, (pct+2*i+2)->nsec); This happens because __s64 is by default defined as "long" on ppc64, not as "long long". However, to fix these warnings, it's possible to define the __SANE_USERSPACE_TYPES__ so that __s64 gets defined to "long long" on ppc64, too. Signed-off-by: Thomas Huth Acked-by: Richard Cochran Signed-off-by: David S. Miller --- Documentation/ptp/testptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c index 2bc8abc57fa0..6c6247aaa7b9 100644 --- a/Documentation/ptp/testptp.c +++ b/Documentation/ptp/testptp.c @@ -18,6 +18,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ #include #include #include From 57a47532c4312159935c98b7f1cf0e62296b9171 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 29 Sep 2015 14:17:54 -0400 Subject: [PATCH 19/20] net: dsa: fix preparation of a port STP update Because of the default 0 value of ret in dsa_slave_port_attr_set, a driver may return -EOPNOTSUPP from the commit phase of a STP state, which triggers a WARN() from switchdev. This happened on a 6185 switch which does not support hardware bridging. Fixes: 3563606258cf ("switchdev: convert STP update to switchdev attr set") Reported-by: Andrew Lunn Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/slave.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index cce97385f743..7d91f4612ac0 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -458,12 +458,17 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state) static int dsa_slave_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) { - int ret = 0; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret; switch (attr->id) { case SWITCHDEV_ATTR_PORT_STP_STATE: - if (attr->trans == SWITCHDEV_TRANS_COMMIT) - ret = dsa_slave_stp_update(dev, attr->u.stp_state); + if (attr->trans == SWITCHDEV_TRANS_PREPARE) + ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP; + else + ret = ds->drv->port_stp_update(ds, p->port, + attr->u.stp_state); break; default: ret = -EOPNOTSUPP; From b84f78782052ee4516903e5d0566a5eee365b771 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 19:07:07 -0700 Subject: [PATCH 20/20] net: Initialize flow flags in input path The fib_table_lookup tracepoint found 2 places where the flowi4_flags is not initialized. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6fcbd215cdbc..690bcbc59f26 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -340,6 +340,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; + fl4.flowi4_flags = 0; no_addr = idev->ifa_list == NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c6ad99ad0ffb..c81deb85acb4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1737,6 +1737,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_flags = 0; fl4.daddr = daddr; fl4.saddr = saddr; err = fib_lookup(net, &fl4, &res, 0);