From fa51b247d67b521e1a1103cad75eb7a666fc9ff9 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 9 Feb 2016 21:11:14 +0100 Subject: [PATCH 1/4] net/mlx4: fix some error handling in mlx4_multi_func_init() The while loop after err_slaves should use post-decrement; otherwise we'll fail to do the kfrees for i==0, and will run into out-of-bounds accesses if the setup above failed already at i==0. [I'm not sure why one even bothers populating the ->vlan_filter array: mlx4.h isn't #included by anything outside drivers/net/ethernet/mellanox/mlx4/, and "git grep -C2 -w vlan_filter drivers/net/ethernet/mellanox/mlx4/" seems to suggest that the vlan_filter elements aren't used at all.] Signed-off-by: Rasmus Villemoes Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index d48d5793407d..e94ca1c3fc7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2429,7 +2429,7 @@ err_thread: flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); err_slaves: - while (--i) { + while (i--) { for (port = 1; port <= MLX4_MAX_PORTS; port++) kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]); } From ee50aeac60ba5c4c7e072fbc0c9aa2043c8896e6 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 11 Feb 2016 10:24:42 +0200 Subject: [PATCH 2/4] IB/core: Fix reading capability mask of the port info class When checking specific attribute from a bit mask, need to use bitwise AND and not logical AND, fixed that. Fixes: 145d9c541032 ('IB/core: Display extended counter set if available') Signed-off-by: Eran Ben Elisha Signed-off-by: Matan Barak Reviewed-by: Or Gerlitz Reviewed-by: Christoph Lameter Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index ec46386e3c7f..14606afbfaa8 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -720,12 +720,11 @@ static struct attribute_group *get_counter_table(struct ib_device *dev, if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO, &cpi, 40, sizeof(cpi)) >= 0) { - - if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH) + if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH) /* We have extended counters */ return &pma_group_ext; - if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF) + if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF) /* But not the IETF ones */ return &pma_group_noietf; } From 08bc327629cbd63bb2f66677e4b33b643695097c Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Thu, 11 Feb 2016 16:30:51 -0500 Subject: [PATCH 3/4] IB/ipoib: fix for rare multicast join race condition A narrow window for race condition still exist between multicast join thread and *dev_flush workers. A kernel crash caused by prolong erratic link state changes was observed (most likely a faulty cabling): [167275.656270] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 [167275.665973] IP: [] ipoib_mcast_join+0xae/0x1d0 [ib_ipoib] [167275.674443] PGD 0 [167275.677373] Oops: 0000 [#1] SMP ... [167275.977530] Call Trace: [167275.982225] [] ? ipoib_mcast_free+0x200/0x200 [ib_ipoib] [167275.992024] [] ipoib_mcast_join_task+0x2a7/0x490 [ib_ipoib] [167276.002149] [] process_one_work+0x17b/0x470 [167276.010754] [] worker_thread+0x11b/0x400 [167276.019088] [] ? rescuer_thread+0x400/0x400 [167276.027737] [] kthread+0xcf/0xe0 Here was a hit spot: ipoib_mcast_join() { .............. rec.qkey = priv->broadcast->mcmember.qkey; ^^^^^^^ ..... } Proposed patch should prevent multicast join task to continue if link state change is detected. Signed-off-by: Alex Estrin Changes from v4: - as suggested by Doug Ledford, optimized spinlock usage, i.e. ipoib_mcast_join() is called with lock held. Changes from v3: - sync with priv->lock before flag check. Chages from v2: - Move check for OPER_UP flag state to mcast_join() to ensure no event worker is in progress. - minor style fixes. Changes from v1: - No need to lock again if error detected. Signed-off-by: Doug Ledford --- .../infiniband/ulp/ipoib/ipoib_multicast.c | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 050dfa175d16..25889311b1e9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -456,7 +456,10 @@ out_locked: return status; } -static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) +/* + * Caller must hold 'priv->lock' + */ +static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_sa_multicast *multicast; @@ -466,6 +469,10 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) ib_sa_comp_mask comp_mask; int ret = 0; + if (!priv->broadcast || + !test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) + return -EINVAL; + ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw); rec.mgid = mcast->mcmember.mgid; @@ -525,20 +532,23 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) rec.join_state = 4; #endif } + spin_unlock_irq(&priv->lock); multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, &rec, comp_mask, GFP_KERNEL, ipoib_mcast_join_complete, mcast); + spin_lock_irq(&priv->lock); if (IS_ERR(multicast)) { ret = PTR_ERR(multicast); ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); - spin_lock_irq(&priv->lock); /* Requeue this join task with a backoff delay */ __ipoib_mcast_schedule_join_thread(priv, mcast, 1); clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); spin_unlock_irq(&priv->lock); complete(&mcast->done); + spin_lock_irq(&priv->lock); } + return 0; } void ipoib_mcast_join_task(struct work_struct *work) @@ -620,9 +630,10 @@ void ipoib_mcast_join_task(struct work_struct *work) /* Found the next unjoined group */ init_completion(&mcast->done); set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - spin_unlock_irq(&priv->lock); - ipoib_mcast_join(dev, mcast); - spin_lock_irq(&priv->lock); + if (ipoib_mcast_join(dev, mcast)) { + spin_unlock_irq(&priv->lock); + return; + } } else if (!delay_until || time_before(mcast->delay_until, delay_until)) delay_until = mcast->delay_until; @@ -641,10 +652,9 @@ out: if (mcast) { init_completion(&mcast->done); set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + ipoib_mcast_join(dev, mcast); } spin_unlock_irq(&priv->lock); - if (mcast) - ipoib_mcast_join(dev, mcast); } int ipoib_mcast_start_thread(struct net_device *dev) From 75c1657e1d50730dc0130a67977f7831a4e241f4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 11 Feb 2016 21:09:57 +0200 Subject: [PATCH 4/4] IB/mlx5: Fix RC transport send queue overhead computation Fix the RC QPs send queue overhead computation to take into account two additional segments in the WQE which are needed for registration operations. The ATOMIC and UMR segments can't coexist together, so chose maximum out of them. The commit 9e65dc371b5c ("IB/mlx5: Fix RC transport send queue overhead computation") was intended to update RC transport as commit messages states, but added the code to UC transport. Fixes: 9e65dc371b5c ("IB/mlx5: Fix RC transport send queue overhead computation") Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9116bc3988a6..34cb8e87c7b8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -270,8 +270,10 @@ static int sq_overhead(enum ib_qp_type qp_type) /* fall through */ case IB_QPT_RC: size += sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_atomic_seg) + - sizeof(struct mlx5_wqe_raddr_seg); + max(sizeof(struct mlx5_wqe_atomic_seg) + + sizeof(struct mlx5_wqe_raddr_seg), + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg)); break; case IB_QPT_XRC_TGT: @@ -279,9 +281,9 @@ static int sq_overhead(enum ib_qp_type qp_type) case IB_QPT_UC: size += sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_umr_ctrl_seg) + - sizeof(struct mlx5_mkey_seg); + max(sizeof(struct mlx5_wqe_raddr_seg), + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg)); break; case IB_QPT_UD: