From 0f9826f4753f74f935e18c2a640484ecbd941346 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Mar 2020 14:16:07 -0400 Subject: [PATCH 01/12] RDMA/odp: Fix leaking the tgid for implicit ODP The tgid used to be part of ib_umem_free_notifier(), when it was reworked it got moved to release, but it should have been unconditional as all umem alloc paths get the tgid. As is, creating an implicit ODP will leak the tgid reference. Link: https://lore.kernel.org/r/20200304181607.GA22412@ziepe.ca Cc: stable@kernel.org Fixes: f25a546e6529 ("RDMA/odp: Use mmu_interval_notifier_insert()") Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem_odp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index cd656ad4953b..3b1e627d9a8d 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -275,8 +275,8 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) mmu_interval_notifier_remove(&umem_odp->notifier); kvfree(umem_odp->dma_list); kvfree(umem_odp->page_list); - put_pid(umem_odp->tgid); } + put_pid(umem_odp->tgid); kfree(umem_odp); } EXPORT_SYMBOL(ib_umem_odp_release); From d372abf37676dab366518088f8871fab312ccc30 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Sat, 7 Mar 2020 17:58:34 +0800 Subject: [PATCH 02/12] MAINTAINERS: Update maintainers for HISILICON ROCE DRIVER Add myself as a maintainer for HNS RoCE drivers, and update Xavier's e-amil address. Link: https://lore.kernel.org/r/1583575114-32194-1-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Acked-by: Wei Hu (Xavier) Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a6fbdf354d34..33642f4003a3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7575,7 +7575,8 @@ F: Documentation/admin-guide/perf/hisi-pmu.rst HISILICON ROCE DRIVER M: Lijun Ou -M: Wei Hu(Xavier) +M: Wei Hu(Xavier) +M: Weihang Li L: linux-rdma@vger.kernel.org S: Maintained F: drivers/infiniband/hw/hns/ From ec16b6bbdab1ce2b03f46271460efc7f450658cd Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 5 Mar 2020 14:40:52 +0200 Subject: [PATCH 03/12] RDMA/mlx5: Fix the number of hwcounters of a dynamic counter When we read the global counter and there's any dynamic counter allocated, the value of a hwcounter is the sum of the default counter and all dynamic counters. So the number of hwcounters of a dynamically allocated counter must be same as of the default counter, otherwise there will be read violations. This fixes the KASAN slab-out-of-bounds bug: BUG: KASAN: slab-out-of-bounds in rdma_counter_get_hwstat_value+0x36d/0x390 [ib_core] Read of size 8 at addr ffff8884192a5778 by task rdma/10138 CPU: 7 PID: 10138 Comm: rdma Not tainted 5.5.0-for-upstream-dbg-2020-02-06_18-30-19-27 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0xb7/0x10b print_address_description.constprop.4+0x1e2/0x400 ? rdma_counter_get_hwstat_value+0x36d/0x390 [ib_core] __kasan_report+0x15c/0x1e0 ? mlx5_ib_query_q_counters+0x13f/0x270 [mlx5_ib] ? rdma_counter_get_hwstat_value+0x36d/0x390 [ib_core] kasan_report+0xe/0x20 rdma_counter_get_hwstat_value+0x36d/0x390 [ib_core] ? rdma_counter_query_stats+0xd0/0xd0 [ib_core] ? memcpy+0x34/0x50 ? nla_put+0xe2/0x170 nldev_stat_get_doit+0x9c7/0x14f0 [ib_core] ... do_syscall_64+0x95/0x490 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fcc457fe65a Code: bb 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 8b 05 fa f1 2b 00 45 89 c9 4c 63 d1 48 63 ff 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 f3 c3 0f 1f 40 00 41 55 41 54 4d 89 c5 55 RSP: 002b:00007ffc0586f868 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fcc457fe65a RDX: 0000000000000020 RSI: 00000000013db920 RDI: 0000000000000003 RBP: 00007ffc0586fa90 R08: 00007fcc45ac10e0 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004089c0 R13: 0000000000000000 R14: 00007ffc0586fab0 R15: 00000000013dc9a0 Allocated by task 9700: save_stack+0x19/0x80 __kasan_kmalloc.constprop.7+0xa0/0xd0 mlx5_ib_counter_alloc_stats+0xd1/0x1d0 [mlx5_ib] rdma_counter_alloc+0x16d/0x3f0 [ib_core] rdma_counter_bind_qpn_alloc+0x216/0x4e0 [ib_core] nldev_stat_set_doit+0x8c2/0xb10 [ib_core] rdma_nl_rcv_msg+0x3d2/0x730 [ib_core] rdma_nl_rcv+0x2a8/0x400 [ib_core] netlink_unicast+0x448/0x620 netlink_sendmsg+0x731/0xd10 sock_sendmsg+0xb1/0xf0 __sys_sendto+0x25d/0x2c0 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0x95/0x490 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 18d422ce8ccf ("IB/mlx5: Add counter_alloc_stats() and counter_update_stats() support") Link: https://lore.kernel.org/r/20200305124052.196688-1-leon@kernel.org Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e4bcfa81b70a..ffa7c2100edb 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5722,9 +5722,10 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port - 1); - /* Q counters are in the beginning of all counters */ return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters, + cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } From 7aefa6237cfe4a6fcf06a8656eee988b36f8fefc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 9 Mar 2020 16:05:15 -0300 Subject: [PATCH 04/12] RDMA/nl: Do not permit empty devices names during RDMA_NLDEV_CMD_NEWLINK/SET Empty device names cannot be added to sysfs and crash with: kobject: (00000000f9de3792): attempted to be registered with empty name! WARNING: CPU: 1 PID: 10856 at lib/kobject.c:234 kobject_add_internal+0x7ac/0x9a0 lib/kobject.c:234 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 10856 Comm: syz-executor459 Not tainted 5.6.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 panic+0x2e3/0x75c kernel/panic.c:221 __warn.cold+0x2f/0x3e kernel/panic.c:582 report_bug+0x289/0x300 lib/bug.c:195 fixup_bug arch/x86/kernel/traps.c:174 [inline] fixup_bug arch/x86/kernel/traps.c:169 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:267 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:286 invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027 RIP: 0010:kobject_add_internal+0x7ac/0x9a0 lib/kobject.c:234 Code: 7a ca ca f9 e9 f0 f8 ff ff 4c 89 f7 e8 cd ca ca f9 e9 95 f9 ff ff e8 13 25 8c f9 4c 89 e6 48 c7 c7 a0 08 1a 89 e8 a3 76 5c f9 <0f> 0b 41 bd ea ff ff ff e9 52 ff ff ff e8 f2 24 8c f9 0f 0b e8 eb RSP: 0018:ffffc90002006eb0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff815eae46 RDI: fffff52000400dc8 RBP: ffffc90002006f08 R08: ffff8880972ac500 R09: ffffed1015d26659 R10: ffffed1015d26658 R11: ffff8880ae9332c7 R12: ffff888093034668 R13: 0000000000000000 R14: ffff8880a69d7600 R15: 0000000000000001 kobject_add_varg lib/kobject.c:390 [inline] kobject_add+0x150/0x1c0 lib/kobject.c:442 device_add+0x3be/0x1d00 drivers/base/core.c:2412 ib_register_device drivers/infiniband/core/device.c:1371 [inline] ib_register_device+0x93e/0xe40 drivers/infiniband/core/device.c:1343 rxe_register_device+0x52e/0x655 drivers/infiniband/sw/rxe/rxe_verbs.c:1231 rxe_add+0x122b/0x1661 drivers/infiniband/sw/rxe/rxe.c:302 rxe_net_add+0x91/0xf0 drivers/infiniband/sw/rxe/rxe_net.c:539 rxe_newlink+0x39/0x90 drivers/infiniband/sw/rxe/rxe.c:318 nldev_newlink+0x28a/0x430 drivers/infiniband/core/nldev.c:1538 rdma_nl_rcv_msg drivers/infiniband/core/netlink.c:195 [inline] rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline] rdma_nl_rcv+0x5d9/0x980 drivers/infiniband/core/netlink.c:259 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:672 ____sys_sendmsg+0x753/0x880 net/socket.c:2343 ___sys_sendmsg+0x100/0x170 net/socket.c:2397 __sys_sendmsg+0x105/0x1d0 net/socket.c:2430 __do_sys_sendmsg net/socket.c:2439 [inline] __se_sys_sendmsg net/socket.c:2437 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe Prevent empty names when checking the name provided from userspace during newlink and rename. Fixes: 3856ec4b93c9 ("RDMA/core: Add RDMA_NLDEV_CMD_NEWLINK/DELLINK support") Fixes: 05d940d3a3ec ("RDMA/nldev: Allow IB device rename through RDMA netlink") Cc: stable@kernel.org Link: https://lore.kernel.org/r/20200309191648.GA30852@ziepe.ca Reported-and-tested-by: syzbot+da615ac67d4dbea32cbc@syzkaller.appspotmail.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e0b0a91da696..9eec26d10d7b 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -918,6 +918,10 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], IB_DEVICE_NAME_MAX); + if (strlen(name) == 0) { + err = -EINVAL; + goto done; + } err = ib_device_rename(device, name); goto done; } @@ -1514,7 +1518,7 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(ibdev_name)); - if (strchr(ibdev_name, '%')) + if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0) return -EINVAL; nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); From f2f2b3bbf0d9f8d090b9a019679223b2bd1c66c4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 9 Mar 2020 16:32:00 -0300 Subject: [PATCH 05/12] RDMA/core: Fix missing error check on dev_set_name() If name memory allocation fails the name will be left empty and device_add_one() will crash: kobject: (0000000004952746): attempted to be registered with empty name! WARNING: CPU: 0 PID: 329 at lib/kobject.c:234 kobject_add_internal+0x7ac/0x9a0 lib/kobject.c:234 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 329 Comm: syz-executor.5 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 panic+0x2e3/0x75c kernel/panic.c:221 __warn.cold+0x2f/0x3e kernel/panic.c:582 report_bug+0x289/0x300 lib/bug.c:195 fixup_bug arch/x86/kernel/traps.c:174 [inline] fixup_bug arch/x86/kernel/traps.c:169 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:267 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:286 invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027 RIP: 0010:kobject_add_internal+0x7ac/0x9a0 lib/kobject.c:234 Code: 1a 98 ca f9 e9 f0 f8 ff ff 4c 89 f7 e8 6d 98 ca f9 e9 95 f9 ff ff e8 c3 f0 8b f9 4c 89 e6 48 c7 c7 a0 0e 1a 89 e8 e3 41 5c f9 <0f> 0b 41 bd ea ff ff ff e9 52 ff ff ff e8 a2 f0 8b f9 0f 0b e8 9b RSP: 0018:ffffc90005b27908 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000040000 RSI: ffffffff815eae46 RDI: fffff52000b64f13 RBP: ffffc90005b27960 R08: ffff88805aeba480 R09: ffffed1015d06659 R10: ffffed1015d06658 R11: ffff8880ae8332c7 R12: ffff8880a37fd000 R13: 0000000000000000 R14: ffff888096691780 R15: 0000000000000001 kobject_add_varg lib/kobject.c:390 [inline] kobject_add+0x150/0x1c0 lib/kobject.c:442 device_add+0x3be/0x1d00 drivers/base/core.c:2412 add_one_compat_dev drivers/infiniband/core/device.c:901 [inline] add_one_compat_dev+0x46a/0x7e0 drivers/infiniband/core/device.c:857 rdma_dev_init_net+0x2eb/0x490 drivers/infiniband/core/device.c:1120 ops_init+0xb3/0x420 net/core/net_namespace.c:137 setup_net+0x2d5/0x8b0 net/core/net_namespace.c:327 copy_net_ns+0x29e/0x5a0 net/core/net_namespace.c:468 create_new_namespaces+0x403/0xb50 kernel/nsproxy.c:108 unshare_nsproxy_namespaces+0xc2/0x200 kernel/nsproxy.c:229 ksys_unshare+0x444/0x980 kernel/fork.c:2955 __do_sys_unshare kernel/fork.c:3023 [inline] __se_sys_unshare kernel/fork.c:3021 [inline] __x64_sys_unshare+0x31/0x40 kernel/fork.c:3021 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe Link: https://lore.kernel.org/r/20200309193200.GA10633@ziepe.ca Cc: stable@kernel.org Fixes: 4e0f7b907072 ("RDMA/core: Implement compat device/sysfs tree in net namespace") Reported-by: syzbot+ab4dae63f7d310641ded@syzkaller.appspotmail.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f6c255202d7f..d0b3d35ad3e4 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -896,7 +896,9 @@ static int add_one_compat_dev(struct ib_device *device, cdev->dev.parent = device->dev.parent; rdma_init_coredev(cdev, device, read_pnet(&rnet->net)); cdev->dev.release = compatdev_release; - dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); + ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); + if (ret) + goto add_err; ret = device_add(&cdev->dev); if (ret) From 5bdfa854013ce4193de0d097931fd841382c76a7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Mar 2020 09:53:39 +0200 Subject: [PATCH 06/12] RDMA/mad: Do not crash if the rdma device does not have a umad interface Non-IB devices do not have a umad interface and the client_data will be left set to NULL. In this case calling get_nl_info() will try to kref a NULL cdev causing a crash: general protection fault, probably for non-canonical address 0xdffffc00000000ba: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x00000000000005d0-0x00000000000005d7] CPU: 0 PID: 20851 Comm: syz-executor.0 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:kobject_get+0x35/0x150 lib/kobject.c:640 Code: 53 e8 3f b0 8b f9 4d 85 e4 0f 84 a2 00 00 00 e8 31 b0 8b f9 49 8d 7c 24 3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f b6 04 02 48 89 fa +83 e2 07 38 d0 7f 08 84 c0 0f 85 eb 00 00 00 RSP: 0018:ffffc9000946f1a0 EFLAGS: 00010203 RAX: dffffc0000000000 RBX: ffffffff85bdbbb0 RCX: ffffc9000bf22000 RDX: 00000000000000ba RSI: ffffffff87e9d78f RDI: 00000000000005d4 RBP: ffffc9000946f1b8 R08: ffff8880581a6440 R09: ffff8880581a6cd0 R10: fffffbfff154b838 R11: ffffffff8aa5c1c7 R12: 0000000000000598 R13: 0000000000000000 R14: ffffc9000946f278 R15: ffff88805cb0c4d0 FS: 00007faa9e8af700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b30121000 CR3: 000000004515d000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: get_device+0x25/0x40 drivers/base/core.c:2574 __ib_get_client_nl_info+0x205/0x2e0 drivers/infiniband/core/device.c:1861 ib_get_client_nl_info+0x35/0x180 drivers/infiniband/core/device.c:1881 nldev_get_chardev+0x575/0xac0 drivers/infiniband/core/nldev.c:1621 rdma_nl_rcv_msg drivers/infiniband/core/netlink.c:195 [inline] rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline] rdma_nl_rcv+0x5d9/0x980 drivers/infiniband/core/netlink.c:259 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:672 ____sys_sendmsg+0x753/0x880 net/socket.c:2343 ___sys_sendmsg+0x100/0x170 net/socket.c:2397 __sys_sendmsg+0x105/0x1d0 net/socket.c:2430 __do_sys_sendmsg net/socket.c:2439 [inline] __se_sys_sendmsg net/socket.c:2437 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe Cc: stable@kernel.org Fixes: 8f71bb0030b8 ("RDMA: Report available cdevs through RDMA_NLDEV_CMD_GET_CHARDEV") Link: https://lore.kernel.org/r/20200310075339.238090-1-leon@kernel.org Reported-by: syzbot+46fe08363dbba223dec5@syzkaller.appspotmail.com Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/user_mad.c | 33 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 1235ffb2389b..da229eab5903 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1129,17 +1129,30 @@ static const struct file_operations umad_sm_fops = { .llseek = no_llseek, }; +static struct ib_umad_port *get_port(struct ib_device *ibdev, + struct ib_umad_device *umad_dev, + unsigned int port) +{ + if (!umad_dev) + return ERR_PTR(-EOPNOTSUPP); + if (!rdma_is_port_valid(ibdev, port)) + return ERR_PTR(-EINVAL); + if (!rdma_cap_ib_mad(ibdev, port)) + return ERR_PTR(-EOPNOTSUPP); + + return &umad_dev->ports[port - rdma_start_port(ibdev)]; +} + static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data, struct ib_client_nl_info *res) { - struct ib_umad_device *umad_dev = client_data; + struct ib_umad_port *port = get_port(ibdev, client_data, res->port); - if (!rdma_is_port_valid(ibdev, res->port)) - return -EINVAL; + if (IS_ERR(port)) + return PTR_ERR(port); res->abi = IB_USER_MAD_ABI_VERSION; - res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].dev; - + res->cdev = &port->dev; return 0; } @@ -1154,15 +1167,13 @@ MODULE_ALIAS_RDMA_CLIENT("umad"); static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data, struct ib_client_nl_info *res) { - struct ib_umad_device *umad_dev = - ib_get_client_data(ibdev, &umad_client); + struct ib_umad_port *port = get_port(ibdev, client_data, res->port); - if (!rdma_is_port_valid(ibdev, res->port)) - return -EINVAL; + if (IS_ERR(port)) + return PTR_ERR(port); res->abi = IB_USER_MAD_ABI_VERSION; - res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].sm_dev; - + res->cdev = &port->sm_dev; return 0; } From 941224e09483ea3428ffc6402de56a4a2e2cb6da Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Fri, 13 Mar 2020 08:39:57 -0400 Subject: [PATCH 07/12] IB/rdmavt: Free kernel completion queue when done When a kernel ULP requests the rdmavt to create a completion queue, it allocated the queue and set cq->kqueue to point to it. However, when the completion queue is destroyed, cq->queue is freed instead, leading to a memory leak: https://lore.kernel.org/r/215235485.15264050.1583334487658.JavaMail.zimbra@redhat.com unreferenced object 0xffffc90006639000 (size 12288): comm "kworker/u128:0", pid 8, jiffies 4295777598 (age 589.085s) hex dump (first 32 bytes): 4d 00 00 00 4d 00 00 00 00 c0 08 ac 8b 88 ff ff M...M........... 00 00 00 00 80 00 00 00 00 00 00 00 10 00 00 00 ................ backtrace: [<0000000035a3d625>] __vmalloc_node_range+0x361/0x720 [<000000002942ce4f>] __vmalloc_node.constprop.30+0x63/0xb0 [<00000000f228f784>] rvt_create_cq+0x98a/0xd80 [rdmavt] [<00000000b84aec66>] __ib_alloc_cq_user+0x281/0x1260 [ib_core] [<00000000ef3764be>] nvme_rdma_cm_handler+0xdb7/0x1b80 [nvme_rdma] [<00000000936b401c>] cma_cm_event_handler+0xb7/0x550 [rdma_cm] [<00000000d9c40b7b>] addr_handler+0x195/0x310 [rdma_cm] [<00000000c7398a03>] process_one_req+0xdd/0x600 [ib_core] [<000000004d29675b>] process_one_work+0x920/0x1740 [<00000000efedcdb5>] worker_thread+0x87/0xb40 [<000000005688b340>] kthread+0x327/0x3f0 [<0000000043a168d6>] ret_from_fork+0x3a/0x50 This patch fixes the issue by freeing cq->kqueue instead. Fixes: 239b0e52d8aa ("IB/hfi1: Move rvt_cq_wc struct into uapi directory") Link: https://lore.kernel.org/r/20200313123957.14343.43879.stgit@awfm-01.aw.intel.com Cc: # 5.4.x Reported-by: Yi Zhang Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 13d7f66eadab..5724cbbe38b1 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -327,7 +327,7 @@ void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) if (cq->ip) kref_put(&cq->ip->ref, rvt_release_mmap_info); else - vfree(cq->queue); + vfree(cq->kqueue); } /** From 9a293d1e21a6461a11b4217b155bf445e57f4131 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 20 Mar 2020 16:02:10 -0400 Subject: [PATCH 08/12] IB/hfi1: Ensure pq is not left on waitlist The following warning can occur when a pq is left on the dmawait list and the pq is then freed: WARNING: CPU: 47 PID: 3546 at lib/list_debug.c:29 __list_add+0x65/0xc0 list_add corruption. next->prev should be prev (ffff939228da1880), but was ffff939cabb52230. (next=ffff939cabb52230). Modules linked in: mmfs26(OE) mmfslinux(OE) tracedev(OE) 8021q garp mrp ib_isert iscsi_target_mod target_core_mod crc_t10dif crct10dif_generic opa_vnic rpcrdma ib_iser libiscsi scsi_transport_iscsi ib_ipoib(OE) bridge stp llc iTCO_wdt iTCO_vendor_support intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crct10dif_pclmul crct10dif_common crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd ast ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm pcspkr joydev drm_panel_orientation_quirks i2c_i801 mei_me lpc_ich mei wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_power_meter acpi_pad hfi1(OE) rdmavt(OE) rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core binfmt_misc numatools(OE) xpmem(OE) ip_tables nfsv3 nfs_acl nfs lockd grace sunrpc fscache igb ahci libahci i2c_algo_bit dca libata ptp pps_core crc32c_intel [last unloaded: i2c_algo_bit] CPU: 47 PID: 3546 Comm: wrf.exe Kdump: loaded Tainted: G W OE ------------ 3.10.0-957.41.1.el7.x86_64 #1 Hardware name: HPE.COM HPE SGI 8600-XA730i Gen10/X11DPT-SB-SG007, BIOS SBED1229 01/22/2019 Call Trace: [] dump_stack+0x19/0x1b [] __warn+0xd8/0x100 [] warn_slowpath_fmt+0x5f/0x80 [] ? ___slab_alloc+0x24e/0x4f0 [] __list_add+0x65/0xc0 [] defer_packet_queue+0x145/0x1a0 [hfi1] [] sdma_check_progress+0x67/0xa0 [hfi1] [] sdma_send_txlist+0x432/0x550 [hfi1] [] ? kmem_cache_alloc+0x179/0x1f0 [] ? user_sdma_send_pkts+0xc3/0x1990 [hfi1] [] user_sdma_send_pkts+0x158a/0x1990 [hfi1] [] ? try_to_del_timer_sync+0x5e/0x90 [] ? __check_object_size+0x1ca/0x250 [] hfi1_user_sdma_process_request+0xd66/0x1280 [hfi1] [] hfi1_aio_write+0xca/0x120 [hfi1] [] do_sync_readv_writev+0x7b/0xd0 [] do_readv_writev+0xce/0x260 [] ? pick_next_task_fair+0x5f/0x1b0 [] ? sched_clock_cpu+0x85/0xc0 [] ? __schedule+0x13a/0x860 [] vfs_writev+0x35/0x60 [] SyS_writev+0x7f/0x110 [] system_call_fastpath+0x22/0x27 The issue happens when wait_event_interruptible_timeout() returns a value <= 0. In that case, the pq is left on the list. The code continues sending packets and potentially can complete the current request with the pq still on the dmawait list provided no descriptor shortage is seen. If the pq is torn down in that state, the sdma interrupt handler could find the now freed pq on the list with list corruption or memory corruption resulting. Fix by adding a flush routine to ensure that the pq is never on a list after processing a request. A follow-up patch series will address issues with seqlock surfaced in: https://lore.kernel.org/r/20200320003129.GP20941@ziepe.ca The seqlock use for sdma will then be converted to a spin lock since the list_empty() doesn't need the protection afforded by the sequence lock currently in use. Fixes: a0d406934a46 ("staging/rdma/hfi1: Add page lock limit check for SDMA requests") Link: https://lore.kernel.org/r/20200320200200.23203.37777.stgit@awfm-01.aw.intel.com Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/user_sdma.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index c2f0d9ba93de..13e4203497b3 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -141,6 +141,7 @@ static int defer_packet_queue( */ xchg(&pq->state, SDMA_PKT_Q_DEFERRED); if (list_empty(&pq->busy.list)) { + pq->busy.lock = &sde->waitlock; iowait_get_priority(&pq->busy); iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); } @@ -155,6 +156,7 @@ static void activate_packet_queue(struct iowait *wait, int reason) { struct hfi1_user_sdma_pkt_q *pq = container_of(wait, struct hfi1_user_sdma_pkt_q, busy); + pq->busy.lock = NULL; xchg(&pq->state, SDMA_PKT_Q_ACTIVE); wake_up(&wait->wait_dma); }; @@ -256,6 +258,21 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, return ret; } +static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq) +{ + unsigned long flags; + seqlock_t *lock = pq->busy.lock; + + if (!lock) + return; + write_seqlock_irqsave(lock, flags); + if (!list_empty(&pq->busy.list)) { + list_del_init(&pq->busy.list); + pq->busy.lock = NULL; + } + write_sequnlock_irqrestore(lock, flags); +} + int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt) { @@ -281,6 +298,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, kfree(pq->reqs); kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); + flush_pq_iowait(pq); kfree(pq); } else { spin_unlock(&fd->pq_rcu_lock); @@ -587,11 +605,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, if (ret < 0) { if (ret != -EBUSY) goto free_req; - wait_event_interruptible_timeout( + if (wait_event_interruptible_timeout( pq->busy.wait_dma, - (pq->state == SDMA_PKT_Q_ACTIVE), + pq->state == SDMA_PKT_Q_ACTIVE, msecs_to_jiffies( - SDMA_IOWAIT_TIMEOUT)); + SDMA_IOWAIT_TIMEOUT)) <= 0) + flush_pq_iowait(pq); } } *count += idx; From 1fa70778742d148ceaebae39c97683ef1607b482 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 12 Mar 2020 10:36:58 +0200 Subject: [PATCH 09/12] MAINTAINERS: Clean RXE section and add Zhu as RXE maintainer Zhu Yanjun contributed many patches to RXE and expressed genuine interest in improve RXE even more. Let's add him as a maintainer. Link: https://lore.kernel.org/r/20200312083658.29603-1-leon@kernel.org Signed-off-by: Leon Romanovsky Acked-by: Moni Shoua Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 33642f4003a3..ad18383a9ab9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15419,11 +15419,9 @@ F: drivers/infiniband/sw/siw/ F: include/uapi/rdma/siw-abi.h SOFT-ROCE DRIVER (rxe) -M: Moni Shoua +M: Zhu Yanjun L: linux-rdma@vger.kernel.org S: Supported -W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home -Q: http://patchwork.kernel.org/project/linux-rdma/list/ F: drivers/infiniband/sw/rxe/ F: include/uapi/rdma/rdma_user_rxe.h From 2d47fbacf2725a67869f4d3634c2415e7dfab2f4 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 13 Mar 2020 08:47:05 -0400 Subject: [PATCH 10/12] RDMA/core: Ensure security pkey modify is not lost The following modify sequence (loosely based on ipoib) will lose a pkey modifcation: - Modify (pkey index, port) - Modify (new pkey index, NO port) After the first modify, the qp_pps list will have saved the pkey and the unit on the main list. During the second modify, get_new_pps() will fetch the port from qp_pps and read the new pkey index from qp_attr->pkey_index. The state will still be zero, or IB_PORT_PKEY_NOT_VALID. Because of the invalid state, the new values will never replace the one in the qp pps list, losing the new pkey. This happens because the following if statements will never correct the state because the first term will be false. If the code had been executed, it would incorrectly overwrite valid values. if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) new_pps->main.state = IB_PORT_PKEY_VALID; } Fix by joining the two if statements with an or test to see if qp_pps is non-NULL and in the correct state. Fixes: 1dd017882e01 ("RDMA/core: Fix protection fault in get_pkey_idx_qp_list") Link: https://lore.kernel.org/r/20200313124704.14982.55907.stgit@awfm-01.aw.intel.com Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/security.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 2d5608315dc8..75e7ec017836 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -349,16 +349,11 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, else if (qp_pps) new_pps->main.pkey_index = qp_pps->main.pkey_index; - if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) + if (((qp_attr_mask & IB_QP_PKEY_INDEX) && + (qp_attr_mask & IB_QP_PORT)) || + (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)) new_pps->main.state = IB_PORT_PKEY_VALID; - if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) { - new_pps->main.port_num = qp_pps->main.port_num; - new_pps->main.pkey_index = qp_pps->main.pkey_index; - if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) - new_pps->main.state = IB_PORT_PKEY_VALID; - } - if (qp_attr_mask & IB_QP_ALT_PATH) { new_pps->alt.port_num = qp_attr->alt_port_num; new_pps->alt.pkey_index = qp_attr->alt_pkey_index; From 950bf4f17725556bbc773a5b71e88a6c14c9ff25 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 18 Mar 2020 11:16:40 +0200 Subject: [PATCH 11/12] RDMA/mlx5: Fix access to wrong pointer while performing flush due to error The main difference between send and receive SW completions is related to separate treatment of WQ queue. For receive completions, the initial index to be flushed is stored in "tail", while for send completions, it is in deleted "last_poll". CPU: 54 PID: 53405 Comm: kworker/u161:0 Kdump: loaded Tainted: G OE --------- -t - 4.18.0-147.el8.ppc64le #1 Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core] NIP: c000003c7c00a000 LR: c00800000e586af4 CTR: c000003c7c00a000 REGS: c0000036cc9db940 TRAP: 0400 Tainted: G OE --------- -t - (4.18.0-147.el8.ppc64le) MSR: 9000000010009033 CR: 24004488 XER: 20040000 CFAR: c00800000e586af0 IRQMASK: 0 GPR00: c00800000e586ab4 c0000036cc9dbbc0 c00800000e5f1a00 c0000037d8433800 GPR04: c000003895a26800 c0000037293f2000 0000000000000201 0000000000000011 GPR08: c000003895a26c80 c000003c7c00a000 0000000000000000 c00800000ed30438 GPR12: c000003c7c00a000 c000003fff684b80 c00000000017c388 c00000396ec4be40 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: c00000000151e498 0000000000000010 c000003895a26848 0000000000000010 GPR24: 0000000000000010 0000000000010000 c000003895a26800 0000000000000000 GPR28: 0000000000000010 c0000037d8433800 c000003895a26c80 c000003895a26800 NIP [c000003c7c00a000] 0xc000003c7c00a000 LR [c00800000e586af4] __ib_process_cq+0xec/0x1b0 [ib_core] Call Trace: [c0000036cc9dbbc0] [c00800000e586ab4] __ib_process_cq+0xac/0x1b0 [ib_core] (unreliable) [c0000036cc9dbc40] [c00800000e586c88] ib_cq_poll_work+0x40/0xb0 [ib_core] [c0000036cc9dbc70] [c000000000171f44] process_one_work+0x2f4/0x5c0 [c0000036cc9dbd10] [c000000000172a0c] worker_thread+0xcc/0x760 [c0000036cc9dbdc0] [c00000000017c52c] kthread+0x1ac/0x1c0 [c0000036cc9dbe30] [c00000000000b75c] ret_from_kernel_thread+0x5c/0x80 Fixes: 8e3b68830186 ("RDMA/mlx5: Delete unreachable handle_atomic code by simplifying SW completion") Link: https://lore.kernel.org/r/20200318091640.44069-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 27 +++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 1 + 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 367a71bc5f4b..3dec3de903b7 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -330,6 +330,22 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, dump_cqe(dev, cqe); } +static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, + u16 tail, u16 head) +{ + u16 idx; + + do { + idx = tail & (qp->sq.wqe_cnt - 1); + if (idx == head) + break; + + tail = qp->sq.w_list[idx].next; + } while (1); + tail = qp->sq.w_list[idx].next; + qp->sq.last_poll = tail; +} + static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { mlx5_frag_buf_free(dev->mdev, &buf->frag_buf); @@ -368,7 +384,7 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, } static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, - int *npolled, int is_send) + int *npolled, bool is_send) { struct mlx5_ib_wq *wq; unsigned int cur; @@ -383,10 +399,16 @@ static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, return; for (i = 0; i < cur && np < num_entries; i++) { - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + unsigned int idx; + + idx = (is_send) ? wq->last_poll : wq->tail; + idx &= (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; wc->status = IB_WC_WR_FLUSH_ERR; wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; wq->tail++; + if (is_send) + wq->last_poll = wq->w_list[idx].next; np++; wc->qp = &qp->ibqp; wc++; @@ -473,6 +495,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, wqe_ctr = be16_to_cpu(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); handle_good_req(wc, cqe64, wq, idx); + handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); wc->wr_id = wq->wrid[idx]; wq->tail = wq->wqe_head[idx] + 1; wc->status = IB_WC_SUCCESS; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bb78142bca5e..f3bdbd5e5096 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -288,6 +288,7 @@ struct mlx5_ib_wq { unsigned head; unsigned tail; u16 cur_post; + u16 last_poll; void *cur_edge; }; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 957f3a52589b..c2967982f02c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3775,6 +3775,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->sq.cur_post = 0; if (qp->sq.wqe_cnt) qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); + qp->sq.last_poll = 0; qp->db.db[MLX5_RCV_DBR] = 0; qp->db.db[MLX5_SND_DBR] = 0; } From ba80013fba656b9830ef45cd40a6a1e44707f47a Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 22 Mar 2020 14:49:06 +0200 Subject: [PATCH 12/12] RDMA/mlx5: Block delay drop to unprivileged users It has been discovered that this feature can globally block the RX port, so it should be allowed for highly privileged users only. Fixes: 03404e8ae652("IB/mlx5: Add support to dropless RQ") Link: https://lore.kernel.org/r/20200322124906.1173790-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c2967982f02c..8fe149e808af 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -6205,6 +6205,10 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); + if (!capable(CAP_SYS_RAWIO) && + init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) + return ERR_PTR(-EPERM); + dev = to_mdev(pd->device); switch (init_attr->wq_type) { case IB_WQT_RQ: