net/smc: protect link down work from execute after lgr freed

link down work may be scheduled before lgr freed but execute
after lgr freed, which may result in crash. So it is need to
hold a reference before shedule link down work, and put the
reference after work executed or canceled.

The relevant crash call stack as follows:
 list_del corruption. prev->next should be ffffb638c9c0fe20,
    but was 0000000000000000
 ------------[ cut here ]------------
 kernel BUG at lib/list_debug.c:51!
 invalid opcode: 0000 [#1] SMP NOPTI
 CPU: 6 PID: 978112 Comm: kworker/6:119 Kdump: loaded Tainted: G #1
 Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 2221b89 04/01/2014
 Workqueue: events smc_link_down_work [smc]
 RIP: 0010:__list_del_entry_valid.cold+0x31/0x47
 RSP: 0018:ffffb638c9c0fdd8 EFLAGS: 00010086
 RAX: 0000000000000054 RBX: ffff942fb75e5128 RCX: 0000000000000000
 RDX: ffff943520930aa0 RSI: ffff94352091fc80 RDI: ffff94352091fc80
 RBP: 0000000000000000 R08: 0000000000000000 R09: ffffb638c9c0fc38
 R10: ffffb638c9c0fc30 R11: ffffffffa015eb28 R12: 0000000000000002
 R13: ffffb638c9c0fe20 R14: 0000000000000001 R15: ffff942f9cd051c0
 FS:  0000000000000000(0000) GS:ffff943520900000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f4f25214000 CR3: 000000025fbae004 CR4: 00000000007706e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 PKRU: 55555554
 Call Trace:
  rwsem_down_write_slowpath+0x17e/0x470
  smc_link_down_work+0x3c/0x60 [smc]
  process_one_work+0x1ac/0x350
  worker_thread+0x49/0x2f0
  ? rescuer_thread+0x360/0x360
  kthread+0x118/0x140
  ? __kthread_bind_mask+0x60/0x60
  ret_from_fork+0x1f/0x30

Fixes: 541afa10c1 ("net/smc: add smcr_port_err() and smcr_link_down() processing")
Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Guangguan Wang 2024-12-11 17:21:16 +08:00 committed by David S. Miller
parent 429fde2d81
commit 2b33eb8f1b

View File

@ -1818,7 +1818,9 @@ void smcr_link_down_cond_sched(struct smc_link *lnk)
{ {
if (smc_link_downing(&lnk->state)) { if (smc_link_downing(&lnk->state)) {
trace_smcr_link_down(lnk, __builtin_return_address(0)); trace_smcr_link_down(lnk, __builtin_return_address(0));
schedule_work(&lnk->link_down_wrk); smcr_link_hold(lnk); /* smcr_link_put in link_down_wrk */
if (!schedule_work(&lnk->link_down_wrk))
smcr_link_put(lnk);
} }
} }
@ -1850,11 +1852,14 @@ static void smc_link_down_work(struct work_struct *work)
struct smc_link_group *lgr = link->lgr; struct smc_link_group *lgr = link->lgr;
if (list_empty(&lgr->list)) if (list_empty(&lgr->list))
return; goto out;
wake_up_all(&lgr->llc_msg_waiter); wake_up_all(&lgr->llc_msg_waiter);
down_write(&lgr->llc_conf_mutex); down_write(&lgr->llc_conf_mutex);
smcr_link_down(link); smcr_link_down(link);
up_write(&lgr->llc_conf_mutex); up_write(&lgr->llc_conf_mutex);
out:
smcr_link_put(link); /* smcr_link_hold by schedulers of link_down_work */
} }
static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev, static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,