RDMA first rc pull for v6.7

Primarily rtrs and irdma fixes:
 
 - Fix uninitialized value in ib_get_eth_speed()
 
 - Fix hns refusing to work if userspace doesn't select the correct
   congestion control algorithm
 
 - Several irdma fixes - unreliable Send Queue Drain, use after free, 64k
   page size bugs, device removal races
 
 - Several rtrs bug fixes - crashes, memory leaks, use after free, bad
   credit accounting, bogus WARN_ON
 
 - Typos and a MAINTAINER update
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCZXMjNwAKCRCFwuHvBreF
 YdnyAP9h6dQG+KgIl26q7Bk1AmsmoseZ1HmeGRacdHsW0qgXPwD/XRX/IgNpl465
 fdt6Qh877LBEjlMFvyMxFTWG7v1ixwY=
 =Tn3a
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Primarily rtrs and irdma fixes:

   - Fix uninitialized value in ib_get_eth_speed()

   - Fix hns refusing to work if userspace doesn't select the correct
     congestion control algorithm

   - Several irdma fixes - unreliable Send Queue Drain, use after free,
     64k page size bugs, device removal races

   - Several rtrs bug fixes - crashes, memory leaks, use after free, bad
     credit accounting, bogus WARN_ON

   - Typos and a MAINTAINER update"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/irdma: Avoid free the non-cqp_request scratch
  RDMA/irdma: Fix support for 64k pages
  RDMA/irdma: Ensure iWarp QP queue memory is OS paged aligned
  RDMA/core: Fix umem iterator when PAGE_SIZE is greater then HCA pgsz
  RDMA/irdma: Fix UAF in irdma_sc_ccq_get_cqe_info()
  RDMA/bnxt_re: Correct module description string
  RDMA/rtrs-clt: Remove the warnings for req in_use check
  RDMA/rtrs-clt: Fix the max_send_wr setting
  RDMA/rtrs-srv: Destroy path files after making sure no IOs in-flight
  RDMA/rtrs-srv: Free srv_mr iu only when always_invalidate is true
  RDMA/rtrs-srv: Check return values while processing info request
  RDMA/rtrs-clt: Start hb after path_up
  RDMA/rtrs-srv: Do not unconditionally enable irq
  MAINTAINERS: Add Chengchang Tang as Hisilicon RoCE maintainer
  RDMA/irdma: Add wait for suspend on SQD
  RDMA/irdma: Do not modify to SQD on error
  RDMA/hns: Fix unnecessary err return when using invalid congest control algorithm
  RDMA/core: Fix uninit-value access in ib_get_eth_speed()
This commit is contained in:
Linus Torvalds 2023-12-08 12:27:11 -08:00
commit 8aa74869d2
14 changed files with 90 additions and 44 deletions

View File

@ -9573,6 +9573,7 @@ F: drivers/crypto/hisilicon/sgl.c
F: include/linux/hisi_acc_qm.h
HISILICON ROCE DRIVER
M: Chengchang Tang <tangchengchang@huawei.com>
M: Junxian Huang <huangjunxian6@hisilicon.com>
L: linux-rdma@vger.kernel.org
S: Maintained

View File

@ -96,12 +96,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
return page_size;
}
/* rdma_for_each_block() has a bug if the page size is smaller than the
* page size used to build the umem. For now prevent smaller page sizes
* from being returned.
*/
pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
/* The best result is the smallest page size that results in the minimum
* number of required pages. Compute the largest page size that could
* work based on VA address bits that don't change.

View File

@ -1971,7 +1971,7 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
int rc;
u32 netdev_speed;
struct net_device *netdev;
struct ethtool_link_ksettings lksettings;
struct ethtool_link_ksettings lksettings = {};
if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
return -EINVAL;

View File

@ -71,7 +71,7 @@ static char version[] =
BNXT_RE_DESC "\n";
MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
MODULE_DESCRIPTION(BNXT_RE_DESC);
MODULE_LICENSE("Dual BSD/GPL");
/* globals */

View File

@ -4760,10 +4760,15 @@ static int check_cong_type(struct ib_qp *ibqp,
cong_alg->wnd_mode_sel = WND_LIMIT;
break;
default:
ibdev_err(&hr_dev->ib_dev,
"error type(%u) for congestion selection.\n",
hr_dev->caps.cong_type);
return -EINVAL;
ibdev_warn(&hr_dev->ib_dev,
"invalid type(%u) for congestion selection.\n",
hr_dev->caps.cong_type);
hr_dev->caps.cong_type = CONG_TYPE_DCQCN;
cong_alg->alg_sel = CONG_DCQCN;
cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
cong_alg->dip_vld = DIP_INVALID;
cong_alg->wnd_mode_sel = WND_LIMIT;
break;
}
return 0;

View File

@ -321,7 +321,11 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
break;
case IRDMA_AE_QP_SUSPEND_COMPLETE:
if (iwqp->iwdev->vsi.tc_change_pending) {
atomic_dec(&iwqp->sc_qp.vsi->qp_suspend_reqs);
if (!atomic_dec_return(&qp->vsi->qp_suspend_reqs))
wake_up(&iwqp->iwdev->suspend_wq);
}
if (iwqp->suspend_pending) {
iwqp->suspend_pending = false;
wake_up(&iwqp->iwdev->suspend_wq);
}
break;
@ -581,9 +585,6 @@ static void irdma_destroy_cqp(struct irdma_pci_f *rf)
struct irdma_cqp *cqp = &rf->cqp;
int status = 0;
if (rf->cqp_cmpl_wq)
destroy_workqueue(rf->cqp_cmpl_wq);
status = irdma_sc_cqp_destroy(dev->cqp);
if (status)
ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status);
@ -748,6 +749,9 @@ static void irdma_destroy_ccq(struct irdma_pci_f *rf)
struct irdma_ccq *ccq = &rf->ccq;
int status = 0;
if (rf->cqp_cmpl_wq)
destroy_workqueue(rf->cqp_cmpl_wq);
if (!rf->reset)
status = irdma_sc_ccq_destroy(dev->ccq, 0, true);
if (status)
@ -1180,7 +1184,6 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
int status;
struct irdma_ceq_init_info info = {};
struct irdma_sc_dev *dev = &rf->sc_dev;
u64 scratch;
u32 ceq_size;
info.ceq_id = ceq_id;
@ -1201,14 +1204,13 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
iwceq->sc_ceq.ceq_id = ceq_id;
info.dev = dev;
info.vsi = vsi;
scratch = (uintptr_t)&rf->cqp.sc_cqp;
status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info);
if (!status) {
if (dev->ceq_valid)
status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq,
IRDMA_OP_CEQ_CREATE);
else
status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch);
status = irdma_sc_cceq_create(&iwceq->sc_ceq, 0);
}
if (status) {

View File

@ -48,7 +48,7 @@ static void irdma_prep_tc_change(struct irdma_device *iwdev)
/* Wait for all qp's to suspend */
wait_event_timeout(iwdev->suspend_wq,
!atomic_read(&iwdev->vsi.qp_suspend_reqs),
IRDMA_EVENT_TIMEOUT);
msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS));
irdma_ws_reset(&iwdev->vsi);
}

View File

@ -78,7 +78,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define MAX_DPC_ITERATIONS 128
#define IRDMA_EVENT_TIMEOUT 50000
#define IRDMA_EVENT_TIMEOUT_MS 5000
#define IRDMA_VCHNL_EVENT_TIMEOUT 100000
#define IRDMA_RST_TIMEOUT_HZ 4

View File

@ -1157,6 +1157,21 @@ static u8 irdma_roce_get_vlan_prio(const struct ib_gid_attr *attr, u8 prio)
return prio;
}
static int irdma_wait_for_suspend(struct irdma_qp *iwqp)
{
if (!wait_event_timeout(iwqp->iwdev->suspend_wq,
!iwqp->suspend_pending,
msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) {
iwqp->suspend_pending = false;
ibdev_warn(&iwqp->iwdev->ibdev,
"modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n",
iwqp->ibqp.qp_num, iwqp->last_aeq);
return -EBUSY;
}
return 0;
}
/**
* irdma_modify_qp_roce - modify qp request
* @ibqp: qp's pointer for modify
@ -1420,17 +1435,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
info.next_iwarp_state = IRDMA_QP_STATE_SQD;
issue_modify_qp = 1;
iwqp->suspend_pending = true;
break;
case IB_QPS_SQE:
case IB_QPS_ERR:
case IB_QPS_RESET:
if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) {
spin_unlock_irqrestore(&iwqp->lock, flags);
info.next_iwarp_state = IRDMA_QP_STATE_SQD;
irdma_hw_modify_qp(iwdev, iwqp, &info, true);
spin_lock_irqsave(&iwqp->lock, flags);
}
if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
spin_unlock_irqrestore(&iwqp->lock, flags);
if (udata && udata->inlen) {
@ -1467,6 +1476,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ctx_info->rem_endpoint_idx = udp_info->arp_idx;
if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
return -EINVAL;
if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) {
ret = irdma_wait_for_suspend(iwqp);
if (ret)
return ret;
}
spin_lock_irqsave(&iwqp->lock, flags);
if (iwqp->iwarp_state == info.curr_iwarp_state) {
iwqp->iwarp_state = info.next_iwarp_state;
@ -2900,7 +2914,7 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region,
iwmr->type = reg_type;
pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ?
iwdev->rf->sc_dev.hw_attrs.page_size_cap : PAGE_SIZE;
iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K;
iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt);
if (unlikely(!iwmr->page_size)) {
@ -2932,6 +2946,11 @@ static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req,
int err;
u8 lvl;
/* iWarp: Catch page not starting on OS page boundary */
if (!rdma_protocol_roce(&iwdev->ibdev, 1) &&
ib_umem_offset(iwmr->region))
return -EINVAL;
total = req.sq_pages + req.rq_pages + 1;
if (total > iwmr->page_cnt)
return -EINVAL;

View File

@ -198,6 +198,7 @@ struct irdma_qp {
u8 flush_issued : 1;
u8 sig_all : 1;
u8 pau_mode : 1;
u8 suspend_pending : 1;
u8 rsvd : 1;
u8 iwarp_state;
u16 term_sq_flush_code;

View File

@ -384,7 +384,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
struct rtrs_clt_path *clt_path;
int err;
if (WARN_ON(!req->in_use))
if (!req->in_use)
return;
if (WARN_ON(!req->con))
return;
@ -1699,7 +1699,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
clt_path->s.dev_ref++;
max_send_wr = min_t(int, wr_limit,
/* QD * (REQ + RSP + FR REGS or INVS) + drain */
clt_path->queue_depth * 3 + 1);
clt_path->queue_depth * 4 + 1);
max_recv_wr = min_t(int, wr_limit,
clt_path->queue_depth * 3 + 1);
max_send_sge = 2;
@ -2350,8 +2350,6 @@ static int init_conns(struct rtrs_clt_path *clt_path)
if (err)
goto destroy;
rtrs_start_hb(&clt_path->s);
return 0;
destroy:
@ -2625,6 +2623,7 @@ static int init_path(struct rtrs_clt_path *clt_path)
goto out;
}
rtrs_clt_path_up(clt_path);
rtrs_start_hb(&clt_path->s);
out:
mutex_unlock(&clt_path->init_mutex);

View File

@ -65,8 +65,9 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path,
{
enum rtrs_srv_state old_state;
bool changed = false;
unsigned long flags;
spin_lock_irq(&srv_path->state_lock);
spin_lock_irqsave(&srv_path->state_lock, flags);
old_state = srv_path->state;
switch (new_state) {
case RTRS_SRV_CONNECTED:
@ -87,7 +88,7 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path,
}
if (changed)
srv_path->state = new_state;
spin_unlock_irq(&srv_path->state_lock);
spin_unlock_irqrestore(&srv_path->state_lock, flags);
return changed;
}
@ -550,7 +551,10 @@ static void unmap_cont_bufs(struct rtrs_srv_path *srv_path)
struct rtrs_srv_mr *srv_mr;
srv_mr = &srv_path->mrs[i];
rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
if (always_invalidate)
rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
ib_dereg_mr(srv_mr->mr);
ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl,
srv_mr->sgt.nents, DMA_BIDIRECTIONAL);
@ -709,20 +713,23 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
WARN_ON(wc->opcode != IB_WC_SEND);
}
static void rtrs_srv_path_up(struct rtrs_srv_path *srv_path)
static int rtrs_srv_path_up(struct rtrs_srv_path *srv_path)
{
struct rtrs_srv_sess *srv = srv_path->srv;
struct rtrs_srv_ctx *ctx = srv->ctx;
int up;
int up, ret = 0;
mutex_lock(&srv->paths_ev_mutex);
up = ++srv->paths_up;
if (up == 1)
ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL);
ret = ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL);
mutex_unlock(&srv->paths_ev_mutex);
/* Mark session as established */
srv_path->established = true;
if (!ret)
srv_path->established = true;
return ret;
}
static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path)
@ -851,7 +858,12 @@ static int process_info_req(struct rtrs_srv_con *con,
goto iu_free;
kobject_get(&srv_path->kobj);
get_device(&srv_path->srv->dev);
rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED);
err = rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED);
if (!err) {
rtrs_err(s, "rtrs_srv_change_state(), err: %d\n", err);
goto iu_free;
}
rtrs_srv_start_hb(srv_path);
/*
@ -860,7 +872,11 @@ static int process_info_req(struct rtrs_srv_con *con,
* all connections are successfully established. Thus, simply notify
* listener with a proper event if we are the first path.
*/
rtrs_srv_path_up(srv_path);
err = rtrs_srv_path_up(srv_path);
if (err) {
rtrs_err(s, "rtrs_srv_path_up(), err: %d\n", err);
goto iu_free;
}
ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev,
tx_iu->dma_addr,
@ -1516,7 +1532,6 @@ static void rtrs_srv_close_work(struct work_struct *work)
srv_path = container_of(work, typeof(*srv_path), close_work);
rtrs_srv_destroy_path_files(srv_path);
rtrs_srv_stop_hb(srv_path);
for (i = 0; i < srv_path->s.con_num; i++) {
@ -1536,6 +1551,8 @@ static void rtrs_srv_close_work(struct work_struct *work)
/* Wait for all completion */
wait_for_completion(&srv_path->complete_done);
rtrs_srv_destroy_path_files(srv_path);
/* Notify upper layer if we are the last path */
rtrs_srv_path_down(srv_path);

View File

@ -77,6 +77,13 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
{
__rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl,
umem->sgt_append.sgt.nents, pgsz);
biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1);
biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz);
}
static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter)
{
return __rdma_block_iter_next(biter) && biter->__sg_numblocks--;
}
/**
@ -92,7 +99,7 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
*/
#define rdma_umem_for_each_dma_block(umem, biter, pgsz) \
for (__rdma_umem_block_iter_start(biter, umem, pgsz); \
__rdma_block_iter_next(biter);)
__rdma_umem_block_iter_next(biter);)
#ifdef CONFIG_INFINIBAND_USER_MEM

View File

@ -2850,6 +2850,7 @@ struct ib_block_iter {
/* internal states */
struct scatterlist *__sg; /* sg holding the current aligned block */
dma_addr_t __dma_addr; /* unaligned DMA address of this block */
size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */
unsigned int __sg_nents; /* number of SG entries */
unsigned int __sg_advance; /* number of bytes to advance in sg in next step */
unsigned int __pg_bit; /* alignment of current block */