mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-09 06:33:34 +00:00
a3e5fd9314
When adding a hairpin flow, a firmware-side send queue is created for
the peer net device, which claims some host memory pages for its
internal ring buffer. If the peer net device is removed/unbound before
the hairpin flow is deleted, then the send queue is not destroyed which
leads to a stack trace on pci device remove:
[ 748.005230] mlx5_core 0000:08:00.2: wait_func:1094:(pid 12985): MANAGE_PAGES(0x108) timeout. Will cause a leak of a command resource
[ 748.005231] mlx5_core 0000:08:00.2: reclaim_pages:514:(pid 12985): failed reclaiming pages: err -110
[ 748.001835] mlx5_core 0000:08:00.2: mlx5_reclaim_root_pages:653:(pid 12985): failed reclaiming pages (-110) for func id 0x0
[ 748.002171] ------------[ cut here ]------------
[ 748.001177] FW pages counter is 4 after reclaiming all pages
[ 748.001186] WARNING: CPU: 1 PID: 12985 at drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:685 mlx5_reclaim_startup_pages+0x34b/0x460 [mlx5_core] [ +0.002771] Modules linked in: cls_flower mlx5_ib mlx5_core ptp pps_core act_mirred sch_ingress openvswitch nsh xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi rdma_cm ib_umad ib_ipoib iw_cm ib_cm ib_uverbs ib_core overlay fuse [last unloaded: pps_core]
[ 748.007225] CPU: 1 PID: 12985 Comm: tee Not tainted 5.12.0+ #1
[ 748.001376] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[ 748.002315] RIP: 0010:mlx5_reclaim_startup_pages+0x34b/0x460 [mlx5_core]
[ 748.001679] Code: 28 00 00 00 0f 85 22 01 00 00 48 81 c4 b0 00 00 00 31 c0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 c7 c7 40 cc 19 a1 e8 9f 71 0e e2 <0f> 0b e9 30 ff ff ff 48 c7 c7 a0 cc 19 a1 e8 8c 71 0e e2 0f 0b e9
[ 748.003781] RSP: 0018:ffff88815220faf8 EFLAGS: 00010286
[ 748.001149] RAX: 0000000000000000 RBX: ffff8881b4900280 RCX: 0000000000000000
[ 748.001445] RDX: 0000000000000027 RSI: 0000000000000004 RDI: ffffed102a441f51
[ 748.001614] RBP: 00000000000032b9 R08: 0000000000000001 R09: ffffed1054a15ee8
[ 748.001446] R10: ffff8882a50af73b R11: ffffed1054a15ee7 R12: fffffbfff07c1e30
[ 748.001447] R13: dffffc0000000000 R14: ffff8881b492cba8 R15: 0000000000000000
[ 748.001429] FS: 00007f58bd08b580(0000) GS:ffff8882a5080000(0000) knlGS:0000000000000000
[ 748.001695] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 748.001309] CR2: 000055a026351740 CR3: 00000001d3b48006 CR4: 0000000000370ea0
[ 748.001506] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 748.001483] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 748.001654] Call Trace:
[ 748.000576] ? mlx5_satisfy_startup_pages+0x290/0x290 [mlx5_core]
[ 748.001416] ? mlx5_cmd_teardown_hca+0xa2/0xd0 [mlx5_core]
[ 748.001354] ? mlx5_cmd_init_hca+0x280/0x280 [mlx5_core]
[ 748.001203] mlx5_function_teardown+0x30/0x60 [mlx5_core]
[ 748.001275] mlx5_uninit_one+0xa7/0xc0 [mlx5_core]
[ 748.001200] remove_one+0x5f/0xc0 [mlx5_core]
[ 748.001075] pci_device_remove+0x9f/0x1d0
[ 748.000833] device_release_driver_internal+0x1e0/0x490
[ 748.001207] unbind_store+0x19f/0x200
[ 748.000942] ? sysfs_file_ops+0x170/0x170
[ 748.001000] kernfs_fop_write_iter+0x2bc/0x450
[ 748.000970] new_sync_write+0x373/0x610
[ 748.001124] ? new_sync_read+0x600/0x600
[ 748.001057] ? lock_acquire+0x4d6/0x700
[ 748.000908] ? lockdep_hardirqs_on_prepare+0x400/0x400
[ 748.001126] ? fd_install+0x1c9/0x4d0
[ 748.000951] vfs_write+0x4d0/0x800
[ 748.000804] ksys_write+0xf9/0x1d0
[ 748.000868] ? __x64_sys_read+0xb0/0xb0
[ 748.000811] ? filp_open+0x50/0x50
[ 748.000919] ? syscall_enter_from_user_mode+0x1d/0x50
[ 748.001223] do_syscall_64+0x3f/0x80
[ 748.000892] entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 748.001026] RIP: 0033:0x7f58bcfb22f7
[ 748.000944] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
[ 748.003925] RSP: 002b:00007fffd7f2aaa8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 748.001732] RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007f58bcfb22f7
[ 748.001426] RDX: 000000000000000d RSI: 00007fffd7f2abc0 RDI: 0000000000000003
[ 748.001746] RBP: 00007fffd7f2abc0 R08: 0000000000000000 R09: 0000000000000001
[ 748.001631] R10: 00000000000001b6 R11: 0000000000000246 R12: 000000000000000d
[ 748.001537] R13: 00005597ac2c24a0 R14: 000000000000000d R15: 00007f58bd084700
[ 748.001564] irq event stamp: 0
[ 748.000787] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[ 748.001399] hardirqs last disabled at (0): [<ffffffff813132cf>] copy_process+0x146f/0x5eb0
[ 748.001854] softirqs last enabled at (0): [<ffffffff8131330e>] copy_process+0x14ae/0x5eb0
[ 748.013431] softirqs last disabled at (0): [<0000000000000000>] 0x0
[ 748.001492] ---[ end trace a6fabd773d1c51ae ]---
Fix by destroying the send queue of a hairpin peer net device that is
being removed/unbound, which returns the allocated ring buffer pages to
the host.
Fixes: 4d8fcf216c
("net/mlx5e: Avoid unbounded peer devices when unpairing TC hairpin rules")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
90 lines
3.5 KiB
C
90 lines
3.5 KiB
C
/*
|
|
* Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
|
|
*
|
|
* This software is available to you under a choice of one of two
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
* General Public License (GPL) Version 2, available from the file
|
|
* COPYING in the main directory of this source tree, or the
|
|
* OpenIB.org BSD license below:
|
|
*
|
|
* Redistribution and use in source and binary forms, with or
|
|
* without modification, are permitted provided that the following
|
|
* conditions are met:
|
|
*
|
|
* - Redistributions of source code must retain the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer.
|
|
*
|
|
* - Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer in the documentation and/or other materials
|
|
* provided with the distribution.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#ifndef __TRANSOBJ_H__
|
|
#define __TRANSOBJ_H__
|
|
|
|
#include <linux/mlx5/driver.h>
|
|
|
|
int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
|
|
void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
|
|
int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
|
|
u32 *rqn);
|
|
int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in);
|
|
void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
|
|
int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out);
|
|
int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
|
|
u32 *sqn);
|
|
int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in);
|
|
void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
|
|
int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
|
|
int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state);
|
|
int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, u32 *tirn);
|
|
int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in);
|
|
void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
|
|
int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, u32 *tisn);
|
|
int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in);
|
|
void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
|
|
int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
|
|
u32 *rqtn);
|
|
int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
|
|
int inlen);
|
|
void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
|
|
|
|
struct mlx5_hairpin_params {
|
|
u8 log_data_size;
|
|
u8 log_num_packets;
|
|
u16 q_counter;
|
|
int num_channels;
|
|
};
|
|
|
|
struct mlx5_hairpin {
|
|
struct mlx5_core_dev *func_mdev;
|
|
struct mlx5_core_dev *peer_mdev;
|
|
|
|
int num_channels;
|
|
|
|
u32 *rqn;
|
|
u32 *sqn;
|
|
|
|
bool peer_gone;
|
|
};
|
|
|
|
struct mlx5_hairpin *
|
|
mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev,
|
|
struct mlx5_core_dev *peer_mdev,
|
|
struct mlx5_hairpin_params *params);
|
|
|
|
void mlx5_core_hairpin_destroy(struct mlx5_hairpin *pair);
|
|
void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp);
|
|
#endif /* __TRANSOBJ_H__ */
|