From 476b7c7e00ec3d909bacaeaf1dc53eb2b2d0c41d Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Sat, 4 Nov 2023 08:56:43 +0100 Subject: [PATCH 01/66] RDMA/siw: Use ib_umem_get() to pin user pages Abandon siw private code to pin user pages during user memory registration, but use ib_umem_get() instead. This will help maintaining the driver in case of changes to the memory subsystem. Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20231104075643.195186-1-bmt@zurich.ibm.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw.h | 3 +- drivers/infiniband/sw/siw/siw_mem.c | 109 ++++++++++---------------- drivers/infiniband/sw/siw/siw_mem.h | 5 +- drivers/infiniband/sw/siw/siw_verbs.c | 19 +---- 4 files changed, 47 insertions(+), 89 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index cec5cccd2e75..849e496c7e67 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -121,11 +121,10 @@ struct siw_page_chunk { }; struct siw_umem { + struct ib_umem *base_mem; struct siw_page_chunk *page_chunk; int num_pages; - bool writable; u64 fp_addr; /* First page base address */ - struct mm_struct *owning_mm; }; struct siw_pble { diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index c5f7f1669d09..2110ceb0603c 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -60,28 +61,17 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index) return NULL; } -static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages, - bool dirty) +void siw_umem_release(struct siw_umem *umem) { - unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty); -} - -void siw_umem_release(struct siw_umem *umem, bool dirty) -{ - struct mm_struct *mm_s = umem->owning_mm; int i, num_pages = umem->num_pages; - for (i = 0; num_pages; i++) { - int to_free = min_t(int, PAGES_PER_CHUNK, num_pages); + if (umem->base_mem) + ib_umem_release(umem->base_mem); - siw_free_plist(&umem->page_chunk[i], to_free, - umem->writable && dirty); + for (i = 0; num_pages > 0; i++) { kfree(umem->page_chunk[i].plist); - num_pages -= to_free; + num_pages -= PAGES_PER_CHUNK; } - atomic64_sub(umem->num_pages, &mm_s->pinned_vm); - - mmdrop(mm_s); kfree(umem->page_chunk); kfree(umem); } @@ -145,7 +135,7 @@ void siw_free_mem(struct kref *ref) if (!mem->is_mw && mem->mem_obj) { if (mem->is_pbl == 0) - siw_umem_release(mem->umem, true); + siw_umem_release(mem->umem); else kfree(mem->pbl); } @@ -362,18 +352,16 @@ struct siw_pbl *siw_pbl_alloc(u32 num_buf) return pbl; } -struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) +struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start, + u64 len, int rights) { struct siw_umem *umem; - struct mm_struct *mm_s; + struct ib_umem *base_mem; + struct sg_page_iter sg_iter; + struct sg_table *sgt; u64 first_page_va; - unsigned long mlock_limit; - unsigned int foll_flags = FOLL_LONGTERM; int num_pages, num_chunks, i, rv = 0; - if (!can_do_mlock()) - return ERR_PTR(-EPERM); - if (!len) return ERR_PTR(-EINVAL); @@ -385,65 +373,50 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) if (!umem) return ERR_PTR(-ENOMEM); - mm_s = current->mm; - umem->owning_mm = mm_s; - umem->writable = writable; - - mmgrab(mm_s); - - if (writable) - foll_flags |= FOLL_WRITE; - - mmap_read_lock(mm_s); - - mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) { - rv = -ENOMEM; - goto out_sem_up; - } - umem->fp_addr = first_page_va; - umem->page_chunk = kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL); if (!umem->page_chunk) { rv = -ENOMEM; - goto out_sem_up; + goto err_out; } - for (i = 0; num_pages; i++) { + base_mem = ib_umem_get(base_dev, start, len, rights); + if (IS_ERR(base_mem)) { + rv = PTR_ERR(base_mem); + siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv); + goto err_out; + } + umem->fp_addr = first_page_va; + umem->base_mem = base_mem; + + sgt = &base_mem->sgt_append.sgt; + __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0); + + if (!__sg_page_iter_next(&sg_iter)) { + rv = -EINVAL; + goto err_out; + } + for (i = 0; num_pages > 0; i++) { int nents = min_t(int, num_pages, PAGES_PER_CHUNK); struct page **plist = kcalloc(nents, sizeof(struct page *), GFP_KERNEL); if (!plist) { rv = -ENOMEM; - goto out_sem_up; + goto err_out; } umem->page_chunk[i].plist = plist; - while (nents) { - rv = pin_user_pages(first_page_va, nents, foll_flags, - plist); - if (rv < 0) - goto out_sem_up; - - umem->num_pages += rv; - first_page_va += rv * PAGE_SIZE; - plist += rv; - nents -= rv; - num_pages -= rv; + while (nents--) { + *plist = sg_page_iter_page(&sg_iter); + umem->num_pages++; + num_pages--; + plist++; + if (!__sg_page_iter_next(&sg_iter)) + break; } } -out_sem_up: - mmap_read_unlock(mm_s); - - if (rv > 0) - return umem; - - /* Adjust accounting for pages not pinned */ - if (num_pages) - atomic64_sub(num_pages, &mm_s->pinned_vm); - - siw_umem_release(umem, false); + return umem; +err_out: + siw_umem_release(umem); return ERR_PTR(rv); } diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h index a2835284fe5b..e74cfcd6dbc1 100644 --- a/drivers/infiniband/sw/siw/siw_mem.h +++ b/drivers/infiniband/sw/siw/siw_mem.h @@ -6,8 +6,9 @@ #ifndef _SIW_MEM_H #define _SIW_MEM_H -struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable); -void siw_umem_release(struct siw_umem *umem, bool dirty); +struct siw_umem *siw_umem_get(struct ib_device *base_dave, u64 start, + u64 len, int rights); +void siw_umem_release(struct siw_umem *umem); struct siw_pbl *siw_pbl_alloc(u32 num_buf); dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index c5c27db9c2fe..27f7dda89e49 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -1321,8 +1321,6 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, struct siw_umem *umem = NULL; struct siw_ureq_reg_mr ureq; struct siw_device *sdev = to_siw_dev(pd->device); - - unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK); int rv; siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n", @@ -1338,20 +1336,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, rv = -EINVAL; goto err_out; } - if (mem_limit != RLIM_INFINITY) { - unsigned long num_pages = - (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT; - mem_limit >>= PAGE_SHIFT; - - if (num_pages > mem_limit - current->mm->locked_vm) { - siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n", - num_pages, mem_limit, - current->mm->locked_vm); - rv = -ENOMEM; - goto err_out; - } - } - umem = siw_umem_get(start, len, ib_access_writable(rights)); + umem = siw_umem_get(pd->device, start, len, rights); if (IS_ERR(umem)) { rv = PTR_ERR(umem); siw_dbg_pd(pd, "getting user memory failed: %d\n", rv); @@ -1404,7 +1389,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, kfree_rcu(mr, rcu); } else { if (umem) - siw_umem_release(umem, false); + siw_umem_release(umem); } return ERR_PTR(rv); } From c170d4ff21a8a525d782e3d1bc58d9538d95afe6 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 2 Nov 2023 20:13:09 +0100 Subject: [PATCH 02/66] RDMA/hfi1: Copy userspace arrays safely Currently, memdup_user() is utilized at two positions to copy userspace arrays. This is done without overflow checks. Use the new wrapper memdup_array_user() to copy the arrays more safely. Suggested-by: Dave Airlie Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20231102191308.52046-2-pstanner@redhat.com Acked-by: Dennis Dalessandro Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 4 ++-- drivers/infiniband/hw/hfi1/user_sdma.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 6419872f95cf..cf2d29098406 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -491,8 +491,8 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, if (unlikely(tinfo->tidcnt > fd->tid_used)) return -EINVAL; - tidinfo = memdup_user(u64_to_user_ptr(tinfo->tidlist), - sizeof(tidinfo[0]) * tinfo->tidcnt); + tidinfo = memdup_array_user(u64_to_user_ptr(tinfo->tidlist), + tinfo->tidcnt, sizeof(tidinfo[0])); if (IS_ERR(tidinfo)) return PTR_ERR(tidinfo); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 6a4aa59c0cbc..b72625283fcf 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -494,8 +494,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, * equal to the pkt count. However, there is no way to * tell at this point. */ - tmp = memdup_user(iovec[idx].iov_base, - ntids * sizeof(*req->tids)); + tmp = memdup_array_user(iovec[idx].iov_base, + ntids, sizeof(*req->tids)); if (IS_ERR(tmp)) { ret = PTR_ERR(tmp); SDMA_DBG(req, "Failed to copy %d TIDs (%d)", From 3a4304d82695015d0703ee0c3331458d22e3ba7c Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Mon, 23 Oct 2023 07:03:22 -0700 Subject: [PATCH 03/66] RDMA/bnxt_re: Refactor the queue index update The queue index wrap around logic is based on power of 2 size depth. All queues are created with power of 2 depth. This increases the memory usage by the driver. This change is required for the next patches that avoids the power of 2 depth requirement for each of the queues. Update the function that increments producer index and consumer index during wrap around. Also, changes the index handling across multiple functions. Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1698069803-1787-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 138 ++++++++++----------- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 20 ++- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 17 ++- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 4 +- drivers/infiniband/hw/bnxt_re/qplib_res.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_res.h | 37 ++++-- 6 files changed, 123 insertions(+), 95 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index abbabea7f5fa..b821c37dffd9 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -237,18 +237,15 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq) struct bnxt_qplib_hwq *hwq = &nq->hwq; struct nq_base *nqe, **nq_ptr; int budget = nq->budget; - u32 sw_cons, raw_cons; uintptr_t q_handle; u16 type; spin_lock_bh(&hwq->lock); /* Service the NQ until empty */ - raw_cons = hwq->cons; while (budget--) { - sw_cons = HWQ_CMP(raw_cons, hwq); nq_ptr = (struct nq_base **)hwq->pbl_ptr; - nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]; - if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) + nqe = &nq_ptr[NQE_PG(hwq->cons)][NQE_IDX(hwq->cons)]; + if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags)) break; /* @@ -276,7 +273,8 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq) default: break; } - raw_cons++; + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &nq->nq_db.dbinfo.flags); } spin_unlock_bh(&hwq->lock); } @@ -302,18 +300,16 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t) struct bnxt_qplib_hwq *hwq = &nq->hwq; struct bnxt_qplib_cq *cq; int budget = nq->budget; - u32 sw_cons, raw_cons; struct nq_base *nqe; uintptr_t q_handle; + u32 hw_polled = 0; u16 type; spin_lock_bh(&hwq->lock); /* Service the NQ until empty */ - raw_cons = hwq->cons; while (budget--) { - sw_cons = HWQ_CMP(raw_cons, hwq); - nqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); - if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) + nqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL); + if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags)) break; /* @@ -372,12 +368,12 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t) "nqe with type = 0x%x not handled\n", type); break; } - raw_cons++; + hw_polled++; + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &nq->nq_db.dbinfo.flags); } - if (hwq->cons != raw_cons) { - hwq->cons = raw_cons; + if (hw_polled) bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true); - } spin_unlock_bh(&hwq->lock); } @@ -505,6 +501,7 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt) pdev = nq->pdev; nq_db = &nq->nq_db; + nq_db->dbinfo.flags = 0; nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION; nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id); if (!nq_db->reg.bar_base) { @@ -649,7 +646,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, rc = -ENOMEM; goto fail; } - + srq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_SRQ, sizeof(req)); @@ -703,13 +700,9 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq) { struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; - u32 sw_prod, sw_cons, count = 0; + u32 count; - sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq); - - count = sw_prod > sw_cons ? sw_prod - sw_cons : - srq_hwq->max_elements - sw_cons + sw_prod; + count = __bnxt_qplib_get_avail(srq_hwq); if (count > srq->threshold) { srq->arm_req = false; bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold); @@ -761,7 +754,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; struct rq_wqe *srqe; struct sq_sge *hw_sge; - u32 sw_prod, sw_cons, count = 0; + u32 count = 0; int i, next; spin_lock(&srq_hwq->lock); @@ -775,8 +768,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, srq->start_idx = srq->swq[next].next_idx; spin_unlock(&srq_hwq->lock); - sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - srqe = bnxt_qplib_get_qe(srq_hwq, sw_prod, NULL); + srqe = bnxt_qplib_get_qe(srq_hwq, srq_hwq->prod, NULL); memset(srqe, 0, srq->wqe_size); /* Calculate wqe_size16 and data_len */ for (i = 0, hw_sge = (struct sq_sge *)srqe->data; @@ -792,17 +784,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, srqe->wr_id[0] = cpu_to_le32((u32)next); srq->swq[next].wr_id = wqe->wr_id; - srq_hwq->prod++; + bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot); spin_lock(&srq_hwq->lock); - sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - /* retaining srq_hwq->cons for this logic - * actually the lock is only required to - * read srq_hwq->cons. - */ - sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq); - count = sw_prod > sw_cons ? sw_prod - sw_cons : - srq_hwq->max_elements - sw_cons + sw_prod; + count = __bnxt_qplib_get_avail(srq_hwq); spin_unlock(&srq_hwq->lock); /* Ring DB */ bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ); @@ -849,6 +834,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u32 tbl_indx; int rc; + sq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_QP1, sizeof(req)); @@ -885,6 +871,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (rq->max_wqe) { + rq->dbinfo.flags = 0; hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; hwq_attr.stride = sizeof(struct sq_sge); @@ -992,6 +979,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u32 tbl_indx; u16 nsge; + sq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_QP, sizeof(req)); @@ -1040,6 +1028,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (!qp->srq) { + rq->dbinfo.flags = 0; hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; hwq_attr.stride = sizeof(struct sq_sge); @@ -1454,12 +1443,15 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) { struct bnxt_qplib_hwq *cq_hwq = &cq->hwq; + u32 peek_flags, peek_cons; struct cq_base *hw_cqe; int i; + peek_flags = cq->dbinfo.flags; + peek_cons = cq_hwq->cons; for (i = 0; i < cq_hwq->max_elements; i++) { - hw_cqe = bnxt_qplib_get_qe(cq_hwq, i, NULL); - if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements)) + hw_cqe = bnxt_qplib_get_qe(cq_hwq, peek_cons, NULL); + if (!CQE_CMP_VALID(hw_cqe, peek_flags)) continue; /* * The valid test of the entry must be done first before @@ -1489,6 +1481,8 @@ static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) default: break; } + bnxt_qplib_hwq_incr_cons(cq_hwq->max_elements, &peek_cons, + 1, &peek_flags); } } @@ -1961,7 +1955,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, bnxt_qplib_fill_psn_search(qp, wqe, swq); queue_err: bnxt_qplib_swq_mod_start(sq, wqe_idx); - bnxt_qplib_hwq_incr_prod(hwq, swq->slots); + bnxt_qplib_hwq_incr_prod(&sq->dbinfo, hwq, swq->slots); qp->wqe_cnt++; done: if (sch_handler) { @@ -2049,7 +2043,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, base_hdr->wr_id[0] = cpu_to_le32(wqe_idx); queue_err: bnxt_qplib_swq_mod_start(rq, wqe_idx); - bnxt_qplib_hwq_incr_prod(hwq, swq->slots); + bnxt_qplib_hwq_incr_prod(&rq->dbinfo, hwq, swq->slots); done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); @@ -2086,6 +2080,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) return -EINVAL; } + cq->dbinfo.flags = 0; hwq_attr.res = res; hwq_attr.depth = cq->max_wqe; hwq_attr.stride = sizeof(struct cq_base); @@ -2101,7 +2096,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) req.dpi = cpu_to_le32(cq->dpi->dpi); req.cq_handle = cpu_to_le64(cq->cq_handle); - req.cq_size = cpu_to_le32(cq->hwq.max_elements); + req.cq_size = cpu_to_le32(cq->max_wqe); pbl = &cq->hwq.pbl[PBL_LVL_0]; pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) << CMDQ_CREATE_CQ_PG_SIZE_SFT); @@ -2144,6 +2139,8 @@ void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res, { bnxt_qplib_free_hwq(res, &cq->hwq); memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq)); + /* Reset only the cons bit in the flags */ + cq->dbinfo.flags &= ~(1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT); } int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq, @@ -2240,7 +2237,8 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, cqe++; (*budget)--; skip_compl: - bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots); + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + sq->swq[last].slots, &sq->dbinfo.flags); sq->swq_last = sq->swq[last].next_idx; } *pcqe = cqe; @@ -2287,7 +2285,8 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, cqe->wr_id = rq->swq[last].wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + rq->swq[last].slots, &rq->dbinfo.flags); rq->swq_last = rq->swq[last].next_idx; } *pcqe = cqe; @@ -2316,7 +2315,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle) static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, u32 cq_cons, u32 swq_last, u32 cqe_sq_cons) { - u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx; + u32 peek_sw_cq_cons, peek_sq_cons_idx, peek_flags; struct bnxt_qplib_q *sq = &qp->sq; struct cq_req *peek_req_hwcqe; struct bnxt_qplib_qp *peek_qp; @@ -2347,16 +2346,14 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, } if (sq->condition) { /* Peek at the completions */ - peek_raw_cq_cons = cq->hwq.cons; + peek_flags = cq->dbinfo.flags; peek_sw_cq_cons = cq_cons; i = cq->hwq.max_elements; while (i--) { - peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq); peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq, peek_sw_cq_cons, NULL); /* If the next hwcqe is VALID */ - if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons, - cq->hwq.max_elements)) { + if (CQE_CMP_VALID(peek_hwcqe, peek_flags)) { /* * The valid test of the entry must be done first before * reading any further. @@ -2399,8 +2396,9 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, rc = -EINVAL; goto out; } - peek_sw_cq_cons++; - peek_raw_cq_cons++; + bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, + &peek_sw_cq_cons, + 1, &peek_flags); } dev_err(&cq->hwq.pdev->dev, "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n", @@ -2487,7 +2485,8 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, } } skip: - bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + swq->slots, &sq->dbinfo.flags); sq->swq_last = swq->next_idx; if (sq->single) break; @@ -2514,7 +2513,8 @@ static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag) srq->swq[srq->last_idx].next_idx = (int)tag; srq->last_idx = (int)tag; srq->swq[srq->last_idx].next_idx = -1; - srq->hwq.cons++; /* Support for SRQE counter */ + bnxt_qplib_hwq_incr_cons(srq->hwq.max_elements, &srq->hwq.cons, + srq->dbinfo.max_slot, &srq->dbinfo.flags); spin_unlock(&srq->hwq.lock); } @@ -2583,7 +2583,8 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); rq->swq_last = swq->next_idx; *pcqe = cqe; @@ -2669,7 +2670,8 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); rq->swq_last = swq->next_idx; *pcqe = cqe; @@ -2686,14 +2688,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq) { struct cq_base *hw_cqe; - u32 sw_cons, raw_cons; bool rc = true; - raw_cons = cq->hwq.cons; - sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL); + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL); /* Check for Valid bit. If the CQE is valid, return false */ - rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements); + rc = !CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags); return rc; } @@ -2775,7 +2774,8 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); rq->swq_last = swq->next_idx; *pcqe = cqe; @@ -2848,7 +2848,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, cqe++; (*budget)--; } - bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots); + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + sq->swq[swq_last].slots, &sq->dbinfo.flags); sq->swq_last = sq->swq[swq_last].next_idx; } *pcqe = cqe; @@ -2933,19 +2934,17 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes, struct bnxt_qplib_qp **lib_qp) { struct cq_base *hw_cqe; - u32 sw_cons, raw_cons; int budget, rc = 0; + u32 hw_polled = 0; u8 type; - raw_cons = cq->hwq.cons; budget = num_cqes; while (budget) { - sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL); + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL); /* Check for Valid bit */ - if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements)) + if (!CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags)) break; /* @@ -2960,7 +2959,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, rc = bnxt_qplib_cq_process_req(cq, (struct cq_req *)hw_cqe, &cqe, &budget, - sw_cons, lib_qp); + cq->hwq.cons, lib_qp); break; case CQ_BASE_CQE_TYPE_RES_RC: rc = bnxt_qplib_cq_process_res_rc(cq, @@ -3006,12 +3005,13 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, dev_err(&cq->hwq.pdev->dev, "process_cqe error rc = 0x%x\n", rc); } - raw_cons++; + hw_polled++; + bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, &cq->hwq.cons, + 1, &cq->dbinfo.flags); + } - if (cq->hwq.cons != raw_cons) { - cq->hwq.cons = raw_cons; + if (hw_polled) bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ); - } exit: return num_cqes - budget; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 404b851091ca..23c27cb42978 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -348,9 +348,21 @@ struct bnxt_qplib_qp { #define CQE_IDX(x) ((x) & CQE_MAX_IDX_PER_PG) #define ROCE_CQE_CMP_V 0 -#define CQE_CMP_VALID(hdr, raw_cons, cp_bit) \ +#define CQE_CMP_VALID(hdr, pass) \ (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \ - !((raw_cons) & (cp_bit))) + !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK)) + +static inline u32 __bnxt_qplib_get_avail(struct bnxt_qplib_hwq *hwq) +{ + int cons, prod, avail; + + cons = hwq->cons; + prod = hwq->prod; + avail = cons - prod; + if (cons <= prod) + avail += hwq->depth; + return avail; +} static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que, u8 slots) @@ -443,9 +455,9 @@ struct bnxt_qplib_cq { #define NQE_PG(x) (((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG) #define NQE_IDX(x) ((x) & NQE_MAX_IDX_PER_PG) -#define NQE_CMP_VALID(hdr, raw_cons, cp_bit) \ +#define NQE_CMP_VALID(hdr, pass) \ (!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) == \ - !((raw_cons) & (cp_bit))) + !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK)) #define BNXT_QPLIB_NQE_MAX_CNT (128 * 1024) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index e47b4ca64d33..15e6d2b80c70 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -734,17 +734,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) u32 type, budget = CREQ_ENTRY_POLL_BUDGET; struct bnxt_qplib_hwq *hwq = &creq->hwq; struct creq_base *creqe; - u32 sw_cons, raw_cons; unsigned long flags; u32 num_wakeup = 0; + u32 hw_polled = 0; /* Service the CREQ until budget is over */ spin_lock_irqsave(&hwq->lock, flags); - raw_cons = hwq->cons; while (budget > 0) { - sw_cons = HWQ_CMP(raw_cons, hwq); - creqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); - if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements)) + creqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL); + if (!CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags)) break; /* The valid test of the entry must be done first before * reading any further. @@ -775,15 +773,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) type); break; } - raw_cons++; budget--; + hw_polled++; + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &creq->creq_db.dbinfo.flags); } - if (hwq->cons != raw_cons) { - hwq->cons = raw_cons; + if (hw_polled) bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, true); - } spin_unlock_irqrestore(&hwq->lock, flags); if (num_wakeup) wake_up_nr(&rcfw->cmdq.waitq, num_wakeup); @@ -1113,6 +1111,7 @@ static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt) pdev = rcfw->pdev; creq_db = &rcfw->creq.creq_db; + creq_db->dbinfo.flags = 0; creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION; creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id); if (!creq_db->reg.bar_id) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 7b31bee3e000..45996e60a0d0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -141,9 +141,9 @@ struct bnxt_qplib_crsbe { /* Allocate 1 per QP for async error notification for now */ #define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024) #define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */ -#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \ +#define CREQ_CMP_VALID(hdr, pass) \ (!!((hdr)->v & CREQ_BASE_V) == \ - !((raw_cons) & (cp_bit))) + !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK)) #define CREQ_ENTRY_POLL_BUDGET 0x100 /* HWQ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 157db6b7e119..ae2bde34e785 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -343,7 +343,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, hwq->cons = 0; hwq->pdev = pdev; hwq->depth = hwq_attr->depth; - hwq->max_elements = depth; + hwq->max_elements = hwq->depth; hwq->element_size = stride; hwq->qe_ppg = pg_size / stride; /* For direct access to the elements */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 5949f004f785..3e3383b8a913 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -186,6 +186,14 @@ struct bnxt_qplib_db_info { struct bnxt_qplib_hwq *hwq; u32 xid; u32 max_slot; + u32 flags; +}; + +enum bnxt_qplib_db_info_flags_mask { + BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT = 0x0UL, + BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT = 0x1UL, + BNXT_QPLIB_FLAG_EPOCH_CONS_MASK = 0x1UL, + BNXT_QPLIB_FLAG_EPOCH_PROD_MASK = 0x2UL, }; /* Tables */ @@ -396,24 +404,34 @@ void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res); int bnxt_qplib_determine_atomics(struct pci_dev *dev); -static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt) +static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_db_info *dbinfo, + struct bnxt_qplib_hwq *hwq, u32 cnt) { - hwq->prod = (hwq->prod + cnt) % hwq->depth; + /* move prod and update toggle/epoch if wrap around */ + hwq->prod += cnt; + if (hwq->prod >= hwq->depth) { + hwq->prod %= hwq->depth; + dbinfo->flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT; + } } -static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq, - u32 cnt) +static inline void bnxt_qplib_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt, + u32 *dbinfo_flags) { - hwq->cons = (hwq->cons + cnt) % hwq->depth; + /* move cons and update toggle/epoch if wrap around */ + *cons += cnt; + if (*cons >= max_elements) { + *cons %= max_elements; + *dbinfo_flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT; + } } static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info, bool arm) { - u32 key; + u32 key = 0; - key = info->hwq->cons & (info->hwq->max_elements - 1); - key |= (CMPL_DOORBELL_IDX_VALID | + key |= info->hwq->cons | (CMPL_DOORBELL_IDX_VALID | (CMPL_DOORBELL_KEY_CMPL & CMPL_DOORBELL_KEY_MASK)); if (!arm) key |= CMPL_DOORBELL_MASK; @@ -427,8 +445,7 @@ static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info, key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; key <<= 32; - key |= (info->hwq->cons & (info->hwq->max_elements - 1)) & - DBC_DBC_INDEX_MASK; + key |= (info->hwq->cons & DBC_DBC_INDEX_MASK); writeq(key, info->db); } From 48f996d4adf15a0a0af8b8184d3ec6042a684ea4 Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Mon, 23 Oct 2023 07:03:23 -0700 Subject: [PATCH 04/66] RDMA/bnxt_re: Remove roundup_pow_of_two depth for all hardware queue resources Rounding up the queue depth to power of two is not a hardware requirement. In order to optimize the per connection memory usage, removing drivers implementation which round up to the queue depths to the power of 2. Implements a mask to maintain backward compatibility with older library. Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1698069803-1787-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 59 ++++++++++++++++-------- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 7 +++ include/uapi/rdma/bnxt_re-abi.h | 9 ++++ 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index faa88d12ee86..b2467de721dc 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1184,7 +1184,8 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp } static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, - struct ib_qp_init_attr *init_attr) + struct ib_qp_init_attr *init_attr, + struct bnxt_re_ucontext *uctx) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; @@ -1213,7 +1214,7 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, /* Allocate 1 more than what's provided so posting max doesn't * mean empty. */ - entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); + entries = bnxt_re_init_depth(init_attr->cap.max_recv_wr + 1, uctx); rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); rq->q_full_delta = 0; rq->sg_info.pgsize = PAGE_SIZE; @@ -1243,7 +1244,7 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) + struct bnxt_re_ucontext *uctx) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; @@ -1272,7 +1273,7 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, /* Allocate 128 + 1 more than what's provided */ diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ? 0 : BNXT_QPLIB_RESERVED_QP_WRS; - entries = roundup_pow_of_two(entries + diff + 1); + entries = bnxt_re_init_depth(entries + diff + 1, uctx); sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); sq->q_full_delta = diff + 1; /* @@ -1288,7 +1289,8 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, } static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, - struct ib_qp_init_attr *init_attr) + struct ib_qp_init_attr *init_attr, + struct bnxt_re_ucontext *uctx) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; @@ -1300,7 +1302,7 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, dev_attr = &rdev->dev_attr; if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { - entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); + entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx); qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); qplqp->sq.q_full_delta = qplqp->sq.max_wqe - @@ -1338,6 +1340,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_re_ucontext *uctx; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; struct bnxt_re_cq *cq; @@ -1347,6 +1350,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; + uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); /* Setup misc params */ ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr); qplqp->pd = &pd->qplib_pd; @@ -1388,18 +1392,18 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, } /* Setup RQ/SRQ */ - rc = bnxt_re_init_rq_attr(qp, init_attr); + rc = bnxt_re_init_rq_attr(qp, init_attr, uctx); if (rc) goto out; if (init_attr->qp_type == IB_QPT_GSI) bnxt_re_adjust_gsi_rq_attr(qp); /* Setup SQ */ - rc = bnxt_re_init_sq_attr(qp, init_attr, udata); + rc = bnxt_re_init_sq_attr(qp, init_attr, uctx); if (rc) goto out; if (init_attr->qp_type == IB_QPT_GSI) - bnxt_re_adjust_gsi_sq_attr(qp, init_attr); + bnxt_re_adjust_gsi_sq_attr(qp, init_attr, uctx); if (udata) /* This will update DPI and qp_handle */ rc = bnxt_re_init_user_qp(rdev, pd, qp, udata); @@ -1715,6 +1719,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_nq *nq = NULL; + struct bnxt_re_ucontext *uctx; struct bnxt_re_dev *rdev; struct bnxt_re_srq *srq; struct bnxt_re_pd *pd; @@ -1739,13 +1744,14 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, goto exit; } + uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); srq->rdev = rdev; srq->qplib_srq.pd = &pd->qplib_pd; srq->qplib_srq.dpi = &rdev->dpi_privileged; /* Allocate 1 more than what's provided so posting max doesn't * mean empty */ - entries = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); + entries = bnxt_re_init_depth(srq_init_attr->attr.max_wr + 1, uctx); if (entries > dev_attr->max_srq_wqes + 1) entries = dev_attr->max_srq_wqes + 1; srq->qplib_srq.max_wqe = entries; @@ -2103,6 +2109,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic; } if (qp_attr_mask & IB_QP_CAP) { + struct bnxt_re_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); + qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE | CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE | @@ -2119,7 +2128,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, "Create QP failed - max exceeded"); return -EINVAL; } - entries = roundup_pow_of_two(qp_attr->cap.max_send_wr); + entries = bnxt_re_init_depth(qp_attr->cap.max_send_wr, uctx); qp->qplib_qp.sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe - @@ -2132,7 +2141,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp->qplib_qp.sq.q_full_delta -= 1; qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge; if (qp->qplib_qp.rq.max_wqe) { - entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr); + entries = bnxt_re_init_depth(qp_attr->cap.max_recv_wr, uctx); qp->qplib_qp.rq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe - @@ -2920,9 +2929,11 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { - struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibcq->device, ibdev); - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq); + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibcq->device, ibdev); + struct bnxt_re_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); + struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; int rc, entries; int cqe = attr->cqe; struct bnxt_qplib_nq *nq = NULL; @@ -2941,7 +2952,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->rdev = rdev; cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq); - entries = roundup_pow_of_two(cqe + 1); + entries = bnxt_re_init_depth(cqe + 1, uctx); if (entries > dev_attr->max_cq_wqes + 1) entries = dev_attr->max_cq_wqes + 1; @@ -2949,8 +2960,6 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT; if (udata) { struct bnxt_re_cq_req req; - struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context( - udata, struct bnxt_re_ucontext, ib_uctx); if (ib_copy_from_udata(&req, udata, sizeof(req))) { rc = -EFAULT; goto fail; @@ -3072,12 +3081,11 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) return -EINVAL; } - entries = roundup_pow_of_two(cqe + 1); + uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); + entries = bnxt_re_init_depth(cqe + 1, uctx); if (entries > dev_attr->max_cq_wqes + 1) entries = dev_attr->max_cq_wqes + 1; - uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, - ib_uctx); /* uverbs consumer */ if (ib_copy_from_udata(&req, udata, sizeof(req))) { rc = -EFAULT; @@ -4108,6 +4116,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; struct bnxt_re_user_mmap_entry *entry; struct bnxt_re_uctx_resp resp = {}; + struct bnxt_re_uctx_req ureq = {}; u32 chip_met_rev_num = 0; int rc; @@ -4157,6 +4166,16 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) if (rdev->pacing.dbr_pacing) resp.comp_mask |= BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED; + if (udata->inlen >= sizeof(ureq)) { + rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))); + if (rc) + goto cfail; + if (ureq.comp_mask & BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT) { + resp.comp_mask |= BNXT_RE_UCNTX_CMASK_POW2_DISABLED; + uctx->cmask |= BNXT_RE_UCNTX_CMASK_POW2_DISABLED; + } + } + rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (rc) { ibdev_err(ibdev, "Failed to copy user context"); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 84715b7e7a4e..98baea98fc17 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -140,6 +140,7 @@ struct bnxt_re_ucontext { void *shpg; spinlock_t sh_lock; /* protect shpg */ struct rdma_user_mmap_entry *shpage_mmap; + u64 cmask; }; enum bnxt_re_mmap_flag { @@ -167,6 +168,12 @@ static inline u16 bnxt_re_get_rwqe_size(int nsge) return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge)); } +static inline u32 bnxt_re_init_depth(u32 ent, struct bnxt_re_ucontext *uctx) +{ + return uctx ? (uctx->cmask & BNXT_RE_UCNTX_CMASK_POW2_DISABLED) ? + ent : roundup_pow_of_two(ent) : ent; +} + int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index 6e7c67a0cca3..a1b896d6d940 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -54,6 +54,7 @@ enum { BNXT_RE_UCNTX_CMASK_HAVE_MODE = 0x02ULL, BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL, BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL, + BNXT_RE_UCNTX_CMASK_POW2_DISABLED = 0x10ULL, }; enum bnxt_re_wqe_mode { @@ -62,6 +63,14 @@ enum bnxt_re_wqe_mode { BNXT_QPLIB_WQE_MODE_INVALID = 0x02, }; +enum { + BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT = 0x01, +}; + +struct bnxt_re_uctx_req { + __aligned_u64 comp_mask; +}; + struct bnxt_re_uctx_resp { __u32 dev_id; __u32 max_qp; From 9aac6c05a56289be7bb2ad4b6c34486dfa2d31eb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 28 Oct 2023 21:58:39 -0700 Subject: [PATCH 05/66] RDMA/siw: Use crypto_shash_digest() in siw_qp_prepare_tx() Simplify siw_qp_prepare_tx() by using crypto_shash_digest() instead of an init+update+final sequence. This should also improve performance. Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20231029045839.154071-1-ebiggers@kernel.org Acked-by: Bernard Metzler Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_qp_tx.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index b2c06100cf01..64c113dc8a99 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -249,14 +249,10 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) /* * Do complete CRC if enabled and short packet */ - if (c_tx->mpa_crc_hd) { - crypto_shash_init(c_tx->mpa_crc_hd); - if (crypto_shash_update(c_tx->mpa_crc_hd, - (u8 *)&c_tx->pkt, - c_tx->ctrl_len)) - return -EINVAL; - crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)crc); - } + if (c_tx->mpa_crc_hd && + crypto_shash_digest(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt, + c_tx->ctrl_len, (u8 *)crc) != 0) + return -EINVAL; c_tx->ctrl_len += MPA_CRC_SIZE; return PKT_COMPLETE; From 057a30168175048be9e9b30f0cafd26f5043eb07 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 28 Oct 2023 21:57:56 -0700 Subject: [PATCH 06/66] RDMA/irdma: Use crypto_shash_digest() in irdma_ieq_check_mpacrc() Simplify irdma_ieq_check_mpacrc() by using crypto_shash_digest() instead of an init+update+final sequence. This should also improve performance. Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20231029045756.153943-1-ebiggers@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/utils.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 916bfe2a91eb..0422787592d8 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -1393,17 +1393,12 @@ int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, u32 val) { u32 crc = 0; - int ret; - int ret_code = 0; - crypto_shash_init(desc); - ret = crypto_shash_update(desc, addr, len); - if (!ret) - crypto_shash_final(desc, (u8 *)&crc); + crypto_shash_digest(desc, addr, len, (u8 *)&crc); if (crc != val) - ret_code = -EINVAL; + return -EINVAL; - return ret_code; + return 0; } /** From b9a85e5eec126d6ae6c362f94b447c223e8fe6e4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 13 Nov 2023 11:28:02 +0200 Subject: [PATCH 07/66] RDMA/usnic: Silence uninitialized symbol smatch warnings The patch 1da177e4c3f4: "Linux-2.6.12-rc2" from Apr 16, 2005 (linux-next), leads to the following Smatch static checker warning: drivers/infiniband/hw/mthca/mthca_cmd.c:644 mthca_SYS_EN() error: uninitialized symbol 'out'. drivers/infiniband/hw/mthca/mthca_cmd.c 636 int mthca_SYS_EN(struct mthca_dev *dev) 637 { 638 u64 out; 639 int ret; 640 641 ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, CMD_TIME_CLASS_D); We pass out here and it gets used without being initialized. err = mthca_cmd_post(dev, in_param, out_param ? *out_param : 0, ^^^^^^^^^^ in_modifier, op_modifier, op, context->token, 1); It's the same in mthca_cmd_wait() and mthca_cmd_poll(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/533bc3df-8078-4397-b93d-d1f6cec9b636@moroto.mountain Link: https://lore.kernel.org/r/c559cb7113158c02d75401ac162652072ef1b5f0.1699867650.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mthca/mthca_cmd.c | 4 ++-- drivers/infiniband/hw/mthca/mthca_main.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index f330ce895d88..8fe0cef7e2be 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -635,7 +635,7 @@ void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox) int mthca_SYS_EN(struct mthca_dev *dev) { - u64 out; + u64 out = 0; int ret; ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, CMD_TIME_CLASS_D); @@ -1955,7 +1955,7 @@ int mthca_WRITE_MGM(struct mthca_dev *dev, int index, int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, u16 *hash) { - u64 imm; + u64 imm = 0; int err; err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH, diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index b54bc8865dae..1ab268b77096 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -382,7 +382,7 @@ static int mthca_init_icm(struct mthca_dev *mdev, struct mthca_init_hca_param *init_hca, u64 icm_size) { - u64 aux_pages; + u64 aux_pages = 0; int err; err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages); From 3a179fe34acbd55eb287377811b05fbeb749e52c Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:10 +0800 Subject: [PATCH 08/66] RDMA/siw: Introduce siw_get_page Add the wrapper function to get either pbl page or umem page. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-2-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_qp_tx.c | 31 +++++++++++---------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 64c113dc8a99..86186cd3cca4 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -34,6 +34,15 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) return NULL; } +static struct page *siw_get_page(struct siw_mem *mem, struct siw_sge *sge, + unsigned long offset, int *pbl_idx) +{ + if (!mem->is_pbl) + return siw_get_upage(mem->umem, sge->laddr + offset); + else + return siw_get_pblpage(mem, sge->laddr + offset, pbl_idx); +} + /* * Copy short payload at provided destination payload address */ @@ -67,11 +76,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) char *buffer; int pbl_idx = 0; - if (!mem->is_pbl) - p = siw_get_upage(mem->umem, sge->laddr); - else - p = siw_get_pblpage(mem, sge->laddr, &pbl_idx); - + p = siw_get_page(mem, sge, 0, &pbl_idx); if (unlikely(!p)) return -EFAULT; @@ -85,13 +90,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) memcpy(paddr, buffer + off, part); kunmap_local(buffer); - if (!mem->is_pbl) - p = siw_get_upage(mem->umem, - sge->laddr + part); - else - p = siw_get_pblpage(mem, - sge->laddr + part, - &pbl_idx); + p = siw_get_page(mem, sge, part, &pbl_idx); if (unlikely(!p)) return -EFAULT; @@ -498,13 +497,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (!is_kva) { struct page *p; - if (mem->is_pbl) - p = siw_get_pblpage( - mem, sge->laddr + sge_off, - &pbl_idx); - else - p = siw_get_upage(mem->umem, - sge->laddr + sge_off); + p = siw_get_page(mem, sge, sge_off, &pbl_idx); if (unlikely(!p)) { siw_unmap_pages(iov, kmap_mask, seg); wqe->processed -= c_tx->bytes_unsent; From a2b64565e8ea9530cce8e1c528f53ca888c0a45b Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:11 +0800 Subject: [PATCH 09/66] RDMA/siw: Introduce siw_update_skb_rcvd There are some places share the same logic, factor a common helper for it. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-3-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_qp_rx.c | 31 +++++++++++---------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 33e0fdb362ff..10805a7d0487 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -881,6 +881,13 @@ int siw_proc_rresp(struct siw_qp *qp) return rv; } +static void siw_update_skb_rcvd(struct siw_rx_stream *srx, u16 length) +{ + srx->skb_offset += length; + srx->skb_new -= length; + srx->skb_copied += length; +} + int siw_proc_terminate(struct siw_qp *qp) { struct siw_rx_stream *srx = &qp->rx_stream; @@ -925,9 +932,7 @@ int siw_proc_terminate(struct siw_qp *qp) goto out; infop += to_copy; - srx->skb_offset += to_copy; - srx->skb_new -= to_copy; - srx->skb_copied += to_copy; + siw_update_skb_rcvd(srx, to_copy); srx->fpdu_part_rcvd += to_copy; srx->fpdu_part_rem -= to_copy; @@ -949,9 +954,7 @@ int siw_proc_terminate(struct siw_qp *qp) term->flag_m ? "valid" : "invalid"); } out: - srx->skb_new -= to_copy; - srx->skb_offset += to_copy; - srx->skb_copied += to_copy; + siw_update_skb_rcvd(srx, to_copy); srx->fpdu_part_rcvd += to_copy; srx->fpdu_part_rem -= to_copy; @@ -970,9 +973,7 @@ static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) skb_copy_bits(skb, srx->skb_offset, tbuf, avail); - srx->skb_new -= avail; - srx->skb_offset += avail; - srx->skb_copied += avail; + siw_update_skb_rcvd(srx, avail); srx->fpdu_part_rem -= avail; if (srx->fpdu_part_rem) @@ -1023,12 +1024,8 @@ static int siw_get_hdr(struct siw_rx_stream *srx) skb_copy_bits(skb, srx->skb_offset, (char *)c_hdr + srx->fpdu_part_rcvd, bytes); + siw_update_skb_rcvd(srx, bytes); srx->fpdu_part_rcvd += bytes; - - srx->skb_new -= bytes; - srx->skb_offset += bytes; - srx->skb_copied += bytes; - if (srx->fpdu_part_rcvd < MIN_DDP_HDR) return -EAGAIN; @@ -1091,12 +1088,8 @@ static int siw_get_hdr(struct siw_rx_stream *srx) skb_copy_bits(skb, srx->skb_offset, (char *)c_hdr + srx->fpdu_part_rcvd, bytes); + siw_update_skb_rcvd(srx, bytes); srx->fpdu_part_rcvd += bytes; - - srx->skb_new -= bytes; - srx->skb_offset += bytes; - srx->skb_copied += bytes; - if (srx->fpdu_part_rcvd < hdrlen) return -EAGAIN; } From 2109ddf032ebc57e0cd43e4378474ea6f2a378c2 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:12 +0800 Subject: [PATCH 10/66] RDMA/siw: Use iov.iov_len in kernel_sendmsg We can pass iov.iov_len here. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-4-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_qp_tx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 86186cd3cca4..838123c621e2 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -292,8 +292,7 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent, .iov_len = c_tx->ctrl_len - c_tx->ctrl_sent }; - int rv = kernel_sendmsg(s, &msg, &iov, 1, - c_tx->ctrl_len - c_tx->ctrl_sent); + int rv = kernel_sendmsg(s, &msg, &iov, 1, iov.iov_len); if (rv >= 0) { c_tx->ctrl_sent += rv; From d248960941b71ebc1b62bd3241744f8a5eadc3d7 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:13 +0800 Subject: [PATCH 11/66] RDMA/siw: Remove goto lable in siw_mmap Let's remove it since the failure case only falls through to the useless label. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-5-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_verbs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 27f7dda89e49..aad0a7d8789f 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -66,12 +66,9 @@ int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) entry = to_siw_mmap_entry(rdma_entry); rv = remap_vmalloc_range(vma, entry->address, 0); - if (rv) { + if (rv) pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff, size); - goto out; - } -out: rdma_user_mmap_entry_put(rdma_entry); return rv; From 659da08ed83a67aec9d106e44d2d88c5963fb85a Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:14 +0800 Subject: [PATCH 12/66] RDMA/siw: Remove rcu from siw_qp Remove it since it is not used. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-6-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 849e496c7e67..b36d1ec25327 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -465,7 +465,6 @@ struct siw_qp { } term_info; struct rdma_user_mmap_entry *sq_entry; /* mmap info for SQE array */ struct rdma_user_mmap_entry *rq_entry; /* mmap info for RQE array */ - struct rcu_head rcu; }; /* helper macros */ From 065186d228c5280d0390fde0e7ddba998e66f047 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:15 +0800 Subject: [PATCH 13/66] RDMA/siw: No need to check term_info.valid before call siw_send_terminate Remove the redundate checking since siw_send_terminate check it inside. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-7-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 7de651cb44e8..5e6e57153611 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -393,8 +393,7 @@ void siw_qp_cm_drop(struct siw_qp *qp, int schedule) } siw_dbg_cep(cep, "immediate close, state %d\n", cep->state); - if (qp->term_info.valid) - siw_send_terminate(qp); + siw_send_terminate(qp); if (cep->cm_id) { switch (cep->state) { @@ -1061,7 +1060,7 @@ static void siw_cm_work_handler(struct work_struct *w) /* * QP scheduled LLP close */ - if (cep->qp && cep->qp->term_info.valid) + if (cep->qp) siw_send_terminate(cep->qp); if (cep->cm_id) From 60d2136db8780b080bb6d5c7bdb0522b49c4eac6 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:16 +0800 Subject: [PATCH 14/66] RDMA/siw: Factor out siw_rx_data helper Remove the redundant code given they share the same logic. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-8-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_qp_rx.c | 53 ++++++++++----------------- 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 10805a7d0487..ed4fc39718b4 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -405,6 +405,20 @@ static struct siw_wqe *siw_rqe_get(struct siw_qp *qp) return wqe; } +static int siw_rx_data(struct siw_mem *mem_p, struct siw_rx_stream *srx, + unsigned int *pbl_idx, u64 addr, int bytes) +{ + int rv; + + if (mem_p->mem_obj == NULL) + rv = siw_rx_kva(srx, ib_virt_dma_to_ptr(addr), bytes); + else if (!mem_p->is_pbl) + rv = siw_rx_umem(srx, mem_p->umem, addr, bytes); + else + rv = siw_rx_pbl(srx, pbl_idx, mem_p, addr, bytes); + return rv; +} + /* * siw_proc_send: * @@ -485,17 +499,8 @@ int siw_proc_send(struct siw_qp *qp) break; } mem_p = *mem; - if (mem_p->mem_obj == NULL) - rv = siw_rx_kva(srx, - ib_virt_dma_to_ptr(sge->laddr + frx->sge_off), - sge_bytes); - else if (!mem_p->is_pbl) - rv = siw_rx_umem(srx, mem_p->umem, - sge->laddr + frx->sge_off, sge_bytes); - else - rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p, - sge->laddr + frx->sge_off, sge_bytes); - + rv = siw_rx_data(mem_p, srx, &frx->pbl_idx, + sge->laddr + frx->sge_off, sge_bytes); if (unlikely(rv != sge_bytes)) { wqe->processed += rcvd_bytes; @@ -598,17 +603,8 @@ int siw_proc_write(struct siw_qp *qp) return -EINVAL; } - if (mem->mem_obj == NULL) - rv = siw_rx_kva(srx, - (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd), - bytes); - else if (!mem->is_pbl) - rv = siw_rx_umem(srx, mem->umem, - srx->ddp_to + srx->fpdu_part_rcvd, bytes); - else - rv = siw_rx_pbl(srx, &frx->pbl_idx, mem, - srx->ddp_to + srx->fpdu_part_rcvd, bytes); - + rv = siw_rx_data(mem, srx, &frx->pbl_idx, + srx->ddp_to + srx->fpdu_part_rcvd, bytes); if (unlikely(rv != bytes)) { siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC, @@ -849,17 +845,8 @@ int siw_proc_rresp(struct siw_qp *qp) mem_p = *mem; bytes = min(srx->fpdu_part_rem, srx->skb_new); - - if (mem_p->mem_obj == NULL) - rv = siw_rx_kva(srx, - ib_virt_dma_to_ptr(sge->laddr + wqe->processed), - bytes); - else if (!mem_p->is_pbl) - rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed, - bytes); - else - rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p, - sge->laddr + wqe->processed, bytes); + rv = siw_rx_data(mem_p, srx, &frx->pbl_idx, + sge->laddr + wqe->processed, bytes); if (rv != bytes) { wqe->wc_status = SIW_WC_GENERAL_ERR; rv = -EINVAL; From 6a343cc3bf2626bc0c487bf2a72c90b4519c3da4 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:17 +0800 Subject: [PATCH 15/66] RDMA/siw: Introduce SIW_STAG_MAX_INDEX Add the macro to remove magic number in the code. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-9-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_mem.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index 2110ceb0603c..dcb963607c8b 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -14,18 +14,20 @@ #include "siw.h" #include "siw_mem.h" +/* Stag lookup is based on its index part only (24 bits). */ +#define SIW_STAG_MAX_INDEX 0x00ffffff + /* - * Stag lookup is based on its index part only (24 bits). * The code avoids special Stag of zero and tries to randomize * STag values between 1 and SIW_STAG_MAX_INDEX. */ int siw_mem_add(struct siw_device *sdev, struct siw_mem *m) { - struct xa_limit limit = XA_LIMIT(1, 0x00ffffff); + struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); u32 id, next; get_random_bytes(&next, 4); - next &= 0x00ffffff; + next &= SIW_STAG_MAX_INDEX; if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next, GFP_KERNEL) < 0) @@ -81,7 +83,7 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, { struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL); - struct xa_limit limit = XA_LIMIT(1, 0x00ffffff); + struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); u32 id, next; if (!mem) @@ -97,7 +99,7 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, kref_init(&mem->ref); get_random_bytes(&next, 4); - next &= 0x00ffffff; + next &= SIW_STAG_MAX_INDEX; if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next, GFP_KERNEL) < 0) { From 25680c1f2614756463040f2dc03b3ec611ae3bfe Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:18 +0800 Subject: [PATCH 16/66] RDMA/siw: Add one parameter to siw_destroy_cpulist With that we can reuse it in siw_init_cpulist. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-10-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_main.c | 30 +++++++++++++--------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 1ab62982df74..61ad8ca3d1a2 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -109,6 +109,17 @@ static struct { int num_nodes; } siw_cpu_info; +static void siw_destroy_cpulist(int number) +{ + int i = 0; + + while (i < number) + kfree(siw_cpu_info.tx_valid_cpus[i++]); + + kfree(siw_cpu_info.tx_valid_cpus); + siw_cpu_info.tx_valid_cpus = NULL; +} + static int siw_init_cpulist(void) { int i, num_nodes = nr_node_ids; @@ -138,24 +149,11 @@ static int siw_init_cpulist(void) out_err: siw_cpu_info.num_nodes = 0; - while (--i >= 0) - kfree(siw_cpu_info.tx_valid_cpus[i]); - kfree(siw_cpu_info.tx_valid_cpus); - siw_cpu_info.tx_valid_cpus = NULL; + siw_destroy_cpulist(i); return -ENOMEM; } -static void siw_destroy_cpulist(void) -{ - int i = 0; - - while (i < siw_cpu_info.num_nodes) - kfree(siw_cpu_info.tx_valid_cpus[i++]); - - kfree(siw_cpu_info.tx_valid_cpus); -} - /* * Choose CPU with least number of active QP's from NUMA node of * TX interface. @@ -558,7 +556,7 @@ static __init int siw_init_module(void) pr_info("SoftIWARP attach failed. Error: %d\n", rv); siw_cm_exit(); - siw_destroy_cpulist(); + siw_destroy_cpulist(siw_cpu_info.num_nodes); return rv; } @@ -573,7 +571,7 @@ static void __exit siw_exit_module(void) siw_cm_exit(); - siw_destroy_cpulist(); + siw_destroy_cpulist(siw_cpu_info.num_nodes); if (siw_crypto_shash) crypto_free_shash(siw_crypto_shash); From b5c91543204c345f1b28af573854c4b7e699cc91 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:19 +0800 Subject: [PATCH 17/66] RDMA/siw: Introduce siw_cep_set_free_and_put Add the helper which can be used in some places. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-11-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cm.c | 31 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 5e6e57153611..d72c9fab01d9 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -444,6 +444,12 @@ void siw_cep_put(struct siw_cep *cep) kref_put(&cep->ref, __siw_cep_dealloc); } +static void siw_cep_set_free_and_put(struct siw_cep *cep) +{ + siw_cep_set_free(cep); + siw_cep_put(cep); +} + void siw_cep_get(struct siw_cep *cep) { kref_get(&cep->ref); @@ -1514,9 +1520,7 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); } else if (s) { sock_release(s); @@ -1564,16 +1568,14 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { siw_dbg_cep(cep, "out of state\n"); - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); return -ECONNRESET; } qp = siw_qp_id2obj(sdev, params->qpn); if (!qp) { WARN(1, "[QP %d] does not exist\n", params->qpn); - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); return -EINVAL; } @@ -1719,8 +1721,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->qp = NULL; siw_qp_put(qp); - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); return rv; } @@ -1743,8 +1744,7 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { siw_dbg_cep(cep, "out of state\n"); - siw_cep_set_free(cep); - siw_cep_put(cep); /* put last reference */ + siw_cep_set_free_and_put(cep); /* put last reference */ return -ECONNRESET; } @@ -1761,8 +1761,7 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); return 0; } @@ -1897,8 +1896,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) siw_socket_disassoc(s); cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); } sock_release(s); @@ -1932,8 +1930,7 @@ static void siw_drop_listeners(struct iw_cm_id *id) cep->sock = NULL; } cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); } } From 08456d4db73bbb3fcaa0d63252ea2399c65297a4 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:20 +0800 Subject: [PATCH 18/66] RDMA/siw: Introduce siw_free_cm_id Factor out a helper to simplify code. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310091656.JlrmcNXB-lkp@intel.com/ Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-12-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cm.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index d72c9fab01d9..36fd459b136c 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -364,6 +364,15 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, return id->event_handler(id, &event); } +static void siw_free_cm_id(struct siw_cep *cep) +{ + if (!cep->cm_id) + return; + + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; +} + /* * siw_qp_cm_drop() * @@ -415,8 +424,7 @@ void siw_qp_cm_drop(struct siw_qp *qp, int schedule) default: break; } - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; + siw_free_cm_id(cep); siw_cep_put(cep); } cep->state = SIW_EPSTATE_CLOSED; @@ -1180,8 +1188,7 @@ static void siw_cm_work_handler(struct work_struct *w) cep->sock = NULL; } if (cep->cm_id) { - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; + siw_free_cm_id(cep); siw_cep_put(cep); } } @@ -1710,10 +1717,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->state = SIW_EPSTATE_CLOSED; - if (cep->cm_id) { - cep->cm_id->rem_ref(id); - cep->cm_id = NULL; - } + siw_free_cm_id(cep); if (qp->cep) { siw_cep_put(cep); qp->cep = NULL; @@ -1888,10 +1892,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) if (cep) { siw_cep_set_inuse(cep); - if (cep->cm_id) { - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; - } + siw_free_cm_id(cep); cep->sock = NULL; siw_socket_disassoc(s); cep->state = SIW_EPSTATE_CLOSED; @@ -1920,10 +1921,7 @@ static void siw_drop_listeners(struct iw_cm_id *id) siw_cep_set_inuse(cep); - if (cep->cm_id) { - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; - } + siw_free_cm_id(cep); if (cep->sock) { siw_socket_disassoc(cep->sock); sock_release(cep->sock); From 77b59bd932a026b64303d313d966decb0e9225fa Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:21 +0800 Subject: [PATCH 19/66] RDMA/siw: Cleanup siw_accept With the initialization of rv and the two added label, we can simplifiy code a bit. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-13-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cm.c | 41 ++++++++++-------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 36fd459b136c..5e9a591eec58 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1558,7 +1558,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) struct siw_cep *cep = (struct siw_cep *)id->provider_data; struct siw_qp *qp; struct siw_qp_attrs qp_attrs; - int rv, max_priv_data = MPA_MAX_PRIVDATA; + int rv = -EINVAL, max_priv_data = MPA_MAX_PRIVDATA; bool wait_for_peer_rts = false; siw_cep_set_inuse(cep); @@ -1574,24 +1574,17 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { siw_dbg_cep(cep, "out of state\n"); - - siw_cep_set_free_and_put(cep); - - return -ECONNRESET; + rv = -ECONNRESET; + goto free_cep; } qp = siw_qp_id2obj(sdev, params->qpn); if (!qp) { WARN(1, "[QP %d] does not exist\n", params->qpn); - siw_cep_set_free_and_put(cep); - - return -EINVAL; + goto free_cep; } down_write(&qp->state_lock); - if (qp->attrs.state > SIW_QP_STATE_RTR) { - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; - } + if (qp->attrs.state > SIW_QP_STATE_RTR) + goto error_unlock; siw_dbg_cep(cep, "[QP %d]\n", params->qpn); if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) { @@ -1605,9 +1598,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) "[QP %u]: ord %d (max %d), ird %d (max %d)\n", qp_id(qp), params->ord, sdev->attrs.max_ord, params->ird, sdev->attrs.max_ird); - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } if (cep->enhanced_rdma_conn_est) max_priv_data -= sizeof(struct mpa_v2_data); @@ -1617,9 +1608,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep, "[QP %u]: private data length: %d (max %d)\n", qp_id(qp), params->private_data_len, max_priv_data); - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } if (cep->enhanced_rdma_conn_est) { if (params->ord > cep->ord) { @@ -1628,9 +1617,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) } else { cep->ird = params->ird; cep->ord = params->ord; - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } } if (params->ird < cep->ird) { @@ -1639,8 +1626,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) params->ird = cep->ird; else { rv = -ENOMEM; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } } if (cep->mpa.v2_ctrl.ord & @@ -1687,7 +1673,6 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | SIW_QP_ATTR_MPA); up_write(&qp->state_lock); - if (rv) goto error; @@ -1710,6 +1695,9 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) siw_cep_set_free(cep); return 0; + +error_unlock: + up_write(&qp->state_lock); error: siw_socket_disassoc(cep->sock); sock_release(cep->sock); @@ -1724,9 +1712,8 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) } cep->qp = NULL; siw_qp_put(qp); - +free_cep: siw_cep_set_free_and_put(cep); - return rv; } From a410a7327870264da2a1a3eed704380053ffdf9f Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:22 +0800 Subject: [PATCH 20/66] RDMA/siw: Remove siw_sk_save_upcalls Let's move it into siw_sk_assign_cm_upcalls, then we only need to get sk_callback_lock once. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-14-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cm.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 5e9a591eec58..5c0f9f8bf3db 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -40,16 +40,6 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, int status); static void siw_sk_assign_cm_upcalls(struct sock *sk) -{ - write_lock_bh(&sk->sk_callback_lock); - sk->sk_state_change = siw_cm_llp_state_change; - sk->sk_data_ready = siw_cm_llp_data_ready; - sk->sk_write_space = siw_cm_llp_write_space; - sk->sk_error_report = siw_cm_llp_error_report; - write_unlock_bh(&sk->sk_callback_lock); -} - -static void siw_sk_save_upcalls(struct sock *sk) { struct siw_cep *cep = sk_to_cep(sk); @@ -58,6 +48,11 @@ static void siw_sk_save_upcalls(struct sock *sk) cep->sk_data_ready = sk->sk_data_ready; cep->sk_write_space = sk->sk_write_space; cep->sk_error_report = sk->sk_error_report; + + sk->sk_state_change = siw_cm_llp_state_change; + sk->sk_data_ready = siw_cm_llp_data_ready; + sk->sk_write_space = siw_cm_llp_write_space; + sk->sk_error_report = siw_cm_llp_error_report; write_unlock_bh(&sk->sk_callback_lock); } @@ -156,7 +151,6 @@ static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s) siw_cep_get(cep); s->sk->sk_user_data = cep; - siw_sk_save_upcalls(s->sk); siw_sk_assign_cm_upcalls(s->sk); } From 3beced14d1998c8e0c5d3d78b2dd86255365e705 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:23 +0800 Subject: [PATCH 21/66] RDMA/siw: Fix typo Replace ORRQ with ORQ. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-15-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 26e3904d2f41..da92cfa2073d 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -1183,7 +1183,7 @@ int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes, /* * siw_sq_flush() * - * Flush SQ and ORRQ entries to CQ. + * Flush SQ and ORQ entries to CQ. * * Must be called with QP state write lock held. * Therefore, SQ and ORQ lock must not be taken. From 788bbf4c2fc6e0c35bae9ed5068f484272539d3e Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:24 +0800 Subject: [PATCH 22/66] RDMA/siw: Only check attrs->cap.max_send_wr in siw_create_qp We can just check max_send_wr here given both max_send_wr and max_recv_wr are defined as u32 type, and we also need to ensure num_sqe (derived from max_send_wr) shouldn't be zero. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-16-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_verbs.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index aad0a7d8789f..dca6a155523d 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -333,11 +333,10 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, goto err_atomic; } /* - * NOTE: we allow for zero element SQ and RQ WQE's SGL's - * but not for a QP unable to hold any WQE (SQ + RQ) + * NOTE: we don't allow for a QP unable to hold any SQ WQE */ - if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) { - siw_dbg(base_dev, "QP must have send or receive queue\n"); + if (attrs->cap.max_send_wr == 0) { + siw_dbg(base_dev, "QP must have send queue\n"); rv = -EINVAL; goto err_atomic; } @@ -357,21 +356,14 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (rv) goto err_atomic; - num_sqe = attrs->cap.max_send_wr; - num_rqe = attrs->cap.max_recv_wr; /* All queue indices are derived from modulo operations * on a free running 'get' (consumer) and 'put' (producer) * unsigned counter. Having queue sizes at power of two * avoids handling counter wrap around. */ - if (num_sqe) - num_sqe = roundup_pow_of_two(num_sqe); - else { - /* Zero sized SQ is not supported */ - rv = -EINVAL; - goto err_out_xa; - } + num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); + num_rqe = attrs->cap.max_recv_wr; if (num_rqe) num_rqe = roundup_pow_of_two(num_rqe); From d9a5b48681315a5881cea24bc8f384da02828e88 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:25 +0800 Subject: [PATCH 23/66] RDMA/siw: Introduce siw_destroy_cep_sock Add one helper to simplify code a bit. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310091735.oG7bTvLR-lkp@intel.com/` Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-17-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cm.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 5c0f9f8bf3db..86323918a570 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -367,6 +367,15 @@ static void siw_free_cm_id(struct siw_cep *cep) cep->cm_id = NULL; } +static void siw_destroy_cep_sock(struct siw_cep *cep) +{ + if (cep->sock) { + siw_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + } +} + /* * siw_qp_cm_drop() * @@ -423,14 +432,7 @@ void siw_qp_cm_drop(struct siw_qp *qp, int schedule) } cep->state = SIW_EPSTATE_CLOSED; - if (cep->sock) { - siw_socket_disassoc(cep->sock); - /* - * Immediately close socket - */ - sock_release(cep->sock); - cep->sock = NULL; - } + siw_destroy_cep_sock(cep); if (cep->qp) { cep->qp = NULL; siw_qp_put(qp); @@ -1693,9 +1695,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) error_unlock: up_write(&qp->state_lock); error: - siw_socket_disassoc(cep->sock); - sock_release(cep->sock); - cep->sock = NULL; + siw_destroy_cep_sock(cep); cep->state = SIW_EPSTATE_CLOSED; @@ -1740,9 +1740,7 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */ siw_send_mpareqrep(cep, pdata, pd_len); } - siw_socket_disassoc(cep->sock); - sock_release(cep->sock); - cep->sock = NULL; + siw_destroy_cep_sock(cep); cep->state = SIW_EPSTATE_CLOSED; From 79844118d6c1cd48bd2cfacbab67592e6bf32fe8 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 13 Nov 2023 19:57:26 +0800 Subject: [PATCH 24/66] RDMA/siw: Update comments for siw_qp_sq_process There is no siw_sq_work_handler in code, change it with siw_tx_thread since siw_run_sq -> siw_sq_resume -> siw_qp_sq_process. Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20231113115726.12762-18-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_qp_tx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 838123c621e2..64ad9e0895bd 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -997,13 +997,12 @@ static int siw_qp_sq_proc_local(struct siw_qp *qp, struct siw_wqe *wqe) * MPA FPDUs, each containing a DDP segment. * * SQ processing may occur in user context as a result of posting - * new WQE's or from siw_sq_work_handler() context. Processing in + * new WQE's or from siw_tx_thread context. Processing in * user context is limited to non-kernel verbs users. * * SQ processing may get paused anytime, possibly in the middle of a WR * or FPDU, if insufficient send space is available. SQ processing - * gets resumed from siw_sq_work_handler(), if send space becomes - * available again. + * gets resumed from siw_tx_thread, if send space becomes available again. * * Must be called with the QP state read-locked. * From f45b83ad39f8033e717b1eee57e81811113d5a84 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 14 Nov 2023 20:34:47 +0800 Subject: [PATCH 25/66] RDMA/hns: Fix inappropriate err code for unsupported operations EOPNOTSUPP is more situable than EINVAL for allocating XRCD while XRC is not supported and unsupported resizing SRQ. Fixes: 32548870d438 ("RDMA/hns: Add support for XRC on HIP09") Fixes: 221109e64316 ("RDMA/hns: Add interception for resizing SRQs") Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231114123449.1106162-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- drivers/infiniband/hw/hns/hns_roce_pd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0cd2612a4987..c01307be06ab 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5666,7 +5666,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, /* Resizing SRQs is not supported yet */ if (srq_attr_mask & IB_SRQ_MAX_WR) - return -EINVAL; + return -EOPNOTSUPP; if (srq_attr_mask & IB_SRQ_LIMIT) { if (srq_attr->srq_limit > srq->wqe_cnt) diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 783e71852c50..bd1fe89ca205 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -150,7 +150,7 @@ int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata) int ret; if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)) - return -EINVAL; + return -EOPNOTSUPP; ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn); if (ret) From ca7ad04cd5d2f8070cd34c2c428cea36de516afc Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 14 Nov 2023 20:34:48 +0800 Subject: [PATCH 26/66] RDMA/hns: Add debugfs to hns RoCE Add debugfs to hns RoCE. This patch only adds an empty directory "hns_roce" to debugs root directory. Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231114123449.1106162-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/Makefile | 3 +- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 63 ++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_debugfs.h | 27 +++++++++ drivers/infiniband/hw/hns/hns_roce_device.h | 2 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 + drivers/infiniband/hw/hns/hns_roce_main.c | 3 + 6 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/hns/hns_roce_debugfs.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_debugfs.h diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index a7d259238305..be1e1cdbcfa8 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -7,7 +7,8 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o + hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \ + hns_roce_debugfs.o ifdef CONFIG_INFINIBAND_HNS_HIP08 hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c new file mode 100644 index 000000000000..79825570cc35 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2023 Hisilicon Limited. + */ + +#include +#include + +#include "hns_roce_device.h" + +static struct dentry *hns_roce_dbgfs_root; + +static int hns_debugfs_seqfile_open(struct inode *inode, struct file *f) +{ + struct hns_debugfs_seqfile *seqfile = inode->i_private; + + return single_open(f, seqfile->read, seqfile->data); +} + +static const struct file_operations hns_debugfs_seqfile_fops = { + .owner = THIS_MODULE, + .open = hns_debugfs_seqfile_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + +static void init_debugfs_seqfile(struct hns_debugfs_seqfile *seq, + const char *name, struct dentry *parent, + int (*read_fn)(struct seq_file *, void *), + void *data) +{ + debugfs_create_file(name, 0400, parent, seq, &hns_debugfs_seqfile_fops); + + seq->read = read_fn; + seq->data = data; +} + +/* debugfs for device */ +void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs; + + dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev), + hns_roce_dbgfs_root); +} + +void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev) +{ + debugfs_remove_recursive(hr_dev->dbgfs.root); +} + +/* debugfs for hns module */ +void hns_roce_init_debugfs(void) +{ + hns_roce_dbgfs_root = debugfs_create_dir("hns_roce", NULL); +} + +void hns_roce_cleanup_debugfs(void) +{ + debugfs_remove_recursive(hns_roce_dbgfs_root); + hns_roce_dbgfs_root = NULL; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.h b/drivers/infiniband/hw/hns/hns_roce_debugfs.h new file mode 100644 index 000000000000..ece71fe6730c --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) 2023 Hisilicon Limited. + */ + +#ifndef __HNS_ROCE_DEBUGFS_H +#define __HNS_ROCE_DEBUGFS_H + +/* debugfs seqfile */ +struct hns_debugfs_seqfile { + int (*read)(struct seq_file *seq, void *data); + void *data; +}; + +/* Debugfs for device */ +struct hns_roce_dev_debugfs { + struct dentry *root; +}; + +struct hns_roce_dev; + +void hns_roce_init_debugfs(void); +void hns_roce_cleanup_debugfs(void); +void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev); +void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev); + +#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1627f3b0ef28..a847cdfb4ad6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -35,6 +35,7 @@ #include #include +#include "hns_roce_debugfs.h" #define PCI_REVISION_ID_HIP08 0x21 #define PCI_REVISION_ID_HIP09 0x30 @@ -979,6 +980,7 @@ struct hns_roce_dev { u32 is_vf; u32 cong_algo_tmpl_id; u64 dwqe_page; + struct hns_roce_dev_debugfs dbgfs; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c01307be06ab..8a5432bb0e85 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6958,12 +6958,14 @@ static struct hnae3_client hns_roce_hw_v2_client = { static int __init hns_roce_hw_v2_init(void) { + hns_roce_init_debugfs(); return hnae3_register_client(&hns_roce_hw_v2_client); } static void __exit hns_roce_hw_v2_exit(void) { hnae3_unregister_client(&hns_roce_hw_v2_client); + hns_roce_cleanup_debugfs(); } module_init(hns_roce_hw_v2_init); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index a4a10a4e1aab..c9f93ba522c9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1079,6 +1079,8 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (ret) goto error_failed_register_device; + hns_roce_register_debugfs(hr_dev); + return 0; error_failed_register_device: @@ -1108,6 +1110,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) void hns_roce_exit(struct hns_roce_dev *hr_dev) { + hns_roce_unregister_debugfs(hr_dev); hns_roce_unregister_device(hr_dev); if (hr_dev->hw->hw_exit) From eb7854d63db543caeb8cadb5c3ae5296d0cb7b8f Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 14 Nov 2023 20:34:49 +0800 Subject: [PATCH 27/66] RDMA/hns: Support SW stats with debugfs Support SW stats with debugfs. Query output: $ cat /sys/kernel/debug/hns_roce/hns_0/sw_stat/sw_stat aeqe --- 3341 ceqe --- 0 cmds --- 6764 cmds_err --- 0 posted_mbx --- 3344 polled_mbx --- 3 mbx_event --- 3341 qp_create_err --- 0 qp_modify_err --- 0 cq_create_err --- 0 cq_modify_err --- 0 srq_create_err --- 0 srq_modify_err --- 0 xrcd_alloc_err --- 0 mr_reg_err --- 0 mr_rereg_err --- 0 ah_create_err --- 0 mmap_err --- 0 uctx_alloc_err --- 0 Signed-off-by: Junxian Huang Signed-off-by: Chengchang Tang Link: https://lore.kernel.org/r/20231114123449.1106162-4-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_ah.c | 6 ++- drivers/infiniband/hw/hns/hns_roce_cmd.c | 19 +++++++- drivers/infiniband/hw/hns/hns_roce_cq.c | 17 ++++--- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 47 ++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_debugfs.h | 6 +++ drivers/infiniband/hw/hns/hns_roce_device.h | 24 ++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 47 ++++++++++++++------ drivers/infiniband/hw/hns/hns_roce_main.c | 45 ++++++++++++++++--- drivers/infiniband/hw/hns/hns_roce_mr.c | 26 ++++++++--- drivers/infiniband/hw/hns/hns_roce_pd.c | 14 +++--- drivers/infiniband/hw/hns/hns_roce_qp.c | 8 +++- drivers/infiniband/hw/hns/hns_roce_srq.c | 6 ++- 12 files changed, 221 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 3df032ddda18..fbf046982374 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -92,11 +92,15 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, &ah->av.vlan_id, NULL); if (ret) - return ret; + goto err_out; ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID; } +err_out: + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AH_CREATE_ERR_CNT]); + return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 864413607571..873e8a69a1b9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -41,7 +41,15 @@ static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, struct hns_roce_mbox_msg *mbox_msg) { - return hr_dev->hw->post_mbox(hr_dev, mbox_msg); + int ret; + + ret = hr_dev->hw->post_mbox(hr_dev, mbox_msg); + if (ret) + return ret; + + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POSTED_CNT]); + + return 0; } /* this should be called with "poll_sem" */ @@ -58,7 +66,13 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, return ret; } - return hr_dev->hw->poll_mbox_done(hr_dev); + ret = hr_dev->hw->poll_mbox_done(hr_dev); + if (ret) + return ret; + + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POLLED_CNT]); + + return 0; } static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, @@ -89,6 +103,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO); context->out_param = out_param; complete(&context->done); + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_EVENT_CNT]); } static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 736dc2f993b4..1b6d16af8c12 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -363,29 +363,31 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct hns_roce_ib_create_cq ucmd = {}; int ret; - if (attr->flags) - return -EOPNOTSUPP; + if (attr->flags) { + ret = -EOPNOTSUPP; + goto err_out; + } ret = verify_cq_create_attr(hr_dev, attr); if (ret) - return ret; + goto err_out; if (udata) { ret = get_cq_ucmd(hr_cq, udata, &ucmd); if (ret) - return ret; + goto err_out; } set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd); ret = set_cqe_size(hr_cq, udata, &ucmd); if (ret) - return ret; + goto err_out; ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret); - return ret; + goto err_out; } ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); @@ -430,6 +432,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, free_cq_db(hr_dev, hr_cq, udata); err_cq_buf: free_cq_buf(hr_dev, hr_cq); +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]); + return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c index 79825570cc35..e8febb40f645 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -36,6 +36,51 @@ static void init_debugfs_seqfile(struct hns_debugfs_seqfile *seq, seq->data = data; } +static const char * const sw_stat_info[] = { + [HNS_ROCE_DFX_AEQE_CNT] = "aeqe", + [HNS_ROCE_DFX_CEQE_CNT] = "ceqe", + [HNS_ROCE_DFX_CMDS_CNT] = "cmds", + [HNS_ROCE_DFX_CMDS_ERR_CNT] = "cmds_err", + [HNS_ROCE_DFX_MBX_POSTED_CNT] = "posted_mbx", + [HNS_ROCE_DFX_MBX_POLLED_CNT] = "polled_mbx", + [HNS_ROCE_DFX_MBX_EVENT_CNT] = "mbx_event", + [HNS_ROCE_DFX_QP_CREATE_ERR_CNT] = "qp_create_err", + [HNS_ROCE_DFX_QP_MODIFY_ERR_CNT] = "qp_modify_err", + [HNS_ROCE_DFX_CQ_CREATE_ERR_CNT] = "cq_create_err", + [HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT] = "cq_modify_err", + [HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT] = "srq_create_err", + [HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT] = "srq_modify_err", + [HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT] = "xrcd_alloc_err", + [HNS_ROCE_DFX_MR_REG_ERR_CNT] = "mr_reg_err", + [HNS_ROCE_DFX_MR_REREG_ERR_CNT] = "mr_rereg_err", + [HNS_ROCE_DFX_AH_CREATE_ERR_CNT] = "ah_create_err", + [HNS_ROCE_DFX_MMAP_ERR_CNT] = "mmap_err", + [HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT] = "uctx_alloc_err", +}; + +static int sw_stat_debugfs_show(struct seq_file *file, void *offset) +{ + struct hns_roce_dev *hr_dev = file->private; + int i; + + for (i = 0; i < HNS_ROCE_DFX_CNT_TOTAL; i++) + seq_printf(file, "%-20s --- %lld\n", sw_stat_info[i], + atomic64_read(&hr_dev->dfx_cnt[i])); + + return 0; +} + +static void create_sw_stat_debugfs(struct hns_roce_dev *hr_dev, + struct dentry *parent) +{ + struct hns_sw_stat_debugfs *dbgfs = &hr_dev->dbgfs.sw_stat_root; + + dbgfs->root = debugfs_create_dir("sw_stat", parent); + + init_debugfs_seqfile(&dbgfs->sw_stat, "sw_stat", dbgfs->root, + sw_stat_debugfs_show, hr_dev); +} + /* debugfs for device */ void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev) { @@ -43,6 +88,8 @@ void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev) dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev), hns_roce_dbgfs_root); + + create_sw_stat_debugfs(hr_dev, dbgfs->root); } void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.h b/drivers/infiniband/hw/hns/hns_roce_debugfs.h index ece71fe6730c..98e87bd3161e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.h +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.h @@ -12,9 +12,15 @@ struct hns_debugfs_seqfile { void *data; }; +struct hns_sw_stat_debugfs { + struct dentry *root; + struct hns_debugfs_seqfile sw_stat; +}; + /* Debugfs for device */ struct hns_roce_dev_debugfs { struct dentry *root; + struct hns_sw_stat_debugfs sw_stat_root; }; struct hns_roce_dev; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a847cdfb4ad6..b1fce5ddf631 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -870,6 +870,29 @@ enum hns_roce_hw_pkt_stat_index { HNS_ROCE_HW_CNT_TOTAL }; +enum hns_roce_sw_dfx_stat_index { + HNS_ROCE_DFX_AEQE_CNT, + HNS_ROCE_DFX_CEQE_CNT, + HNS_ROCE_DFX_CMDS_CNT, + HNS_ROCE_DFX_CMDS_ERR_CNT, + HNS_ROCE_DFX_MBX_POSTED_CNT, + HNS_ROCE_DFX_MBX_POLLED_CNT, + HNS_ROCE_DFX_MBX_EVENT_CNT, + HNS_ROCE_DFX_QP_CREATE_ERR_CNT, + HNS_ROCE_DFX_QP_MODIFY_ERR_CNT, + HNS_ROCE_DFX_CQ_CREATE_ERR_CNT, + HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT, + HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT, + HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT, + HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT, + HNS_ROCE_DFX_MR_REG_ERR_CNT, + HNS_ROCE_DFX_MR_REREG_ERR_CNT, + HNS_ROCE_DFX_AH_CREATE_ERR_CNT, + HNS_ROCE_DFX_MMAP_ERR_CNT, + HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT, + HNS_ROCE_DFX_CNT_TOTAL +}; + struct hns_roce_hw { int (*cmq_init)(struct hns_roce_dev *hr_dev); void (*cmq_exit)(struct hns_roce_dev *hr_dev); @@ -981,6 +1004,7 @@ struct hns_roce_dev { u32 cong_algo_tmpl_id; u64 dwqe_page; struct hns_roce_dev_debugfs dbgfs; + atomic64_t *dfx_cnt; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8a5432bb0e85..5d0a7a1394fc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1297,6 +1297,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, /* Write to hardware */ roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head); + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); + do { if (hns_roce_cmq_csq_done(hr_dev)) break; @@ -1334,6 +1336,9 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, spin_unlock_bh(&csq->lock); + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]); + return ret; } @@ -5662,19 +5667,25 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, struct hns_roce_srq_context *srq_context; struct hns_roce_srq_context *srqc_mask; struct hns_roce_cmd_mailbox *mailbox; - int ret; + int ret = 0; /* Resizing SRQs is not supported yet */ - if (srq_attr_mask & IB_SRQ_MAX_WR) - return -EOPNOTSUPP; + if (srq_attr_mask & IB_SRQ_MAX_WR) { + ret = -EOPNOTSUPP; + goto out; + } if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit > srq->wqe_cnt) - return -EINVAL; + if (srq_attr->srq_limit > srq->wqe_cnt) { + ret = -EINVAL; + goto out; + } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + if (IS_ERR(mailbox)) { + ret = PTR_ERR(mailbox); + goto out; + } srq_context = mailbox->buf; srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1; @@ -5687,15 +5698,17 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, HNS_ROCE_CMD_MODIFY_SRQC, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) { + if (ret) ibdev_err(&hr_dev->ib_dev, "failed to handle cmd of modifying SRQ, ret = %d.\n", ret); - return ret; - } } - return 0; +out: + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT]); + + return ret; } static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) @@ -5739,8 +5752,9 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) int ret; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + ret = PTR_ERR_OR_ZERO(mailbox); + if (ret) + goto err_out; cq_context = mailbox->buf; cqc_mask = (struct hns_roce_v2_cq_context *)mailbox->buf + 1; @@ -5770,6 +5784,10 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) "failed to process cmd when modifying CQ, ret = %d.\n", ret); +err_out: + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT]); + return ret; } @@ -6009,6 +6027,8 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; aeqe_found = IRQ_HANDLED; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]); + hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); @@ -6050,6 +6070,7 @@ static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; ceqe_found = IRQ_HANDLED; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]); ceqe = next_ceqe_sw_v2(eq); } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c9f93ba522c9..b55fe6911f9f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -361,10 +361,10 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); struct hns_roce_ib_alloc_ucontext_resp resp = {}; struct hns_roce_ib_alloc_ucontext ucmd = {}; - int ret; + int ret = -EAGAIN; if (!hr_dev->active) - return -EAGAIN; + goto error_out; resp.qp_tab_size = hr_dev->caps.num_qps; resp.srq_tab_size = hr_dev->caps.num_srqs; @@ -372,7 +372,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) - return ret; + goto error_out; if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS; @@ -396,7 +396,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, ret = hns_roce_uar_alloc(hr_dev, &context->uar); if (ret) - goto error_fail_uar_alloc; + goto error_out; ret = hns_roce_alloc_uar_entry(uctx); if (ret) @@ -423,7 +423,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, error_fail_uar_entry: ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); -error_fail_uar_alloc: +error_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT]); + return ret; } @@ -439,6 +441,7 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) { + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); struct rdma_user_mmap_entry *rdma_entry; struct hns_user_mmap_entry *entry; phys_addr_t pfn; @@ -446,8 +449,10 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) int ret; rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff); - if (!rdma_entry) + if (!rdma_entry) { + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]); return -EINVAL; + } entry = to_hns_mmap(rdma_entry); pfn = entry->address >> PAGE_SHIFT; @@ -467,6 +472,9 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) out: rdma_user_mmap_entry_put(rdma_entry); + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]); + return ret; } @@ -1009,6 +1017,21 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); } +static int hns_roce_alloc_dfx_cnt(struct hns_roce_dev *hr_dev) +{ + hr_dev->dfx_cnt = kvcalloc(HNS_ROCE_DFX_CNT_TOTAL, sizeof(atomic64_t), + GFP_KERNEL); + if (!hr_dev->dfx_cnt) + return -ENOMEM; + + return 0; +} + +static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev *hr_dev) +{ + kvfree(hr_dev->dfx_cnt); +} + int hns_roce_init(struct hns_roce_dev *hr_dev) { struct device *dev = hr_dev->dev; @@ -1016,11 +1039,15 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) hr_dev->is_reset = false; + ret = hns_roce_alloc_dfx_cnt(hr_dev); + if (ret) + return ret; + if (hr_dev->hw->cmq_init) { ret = hr_dev->hw->cmq_init(hr_dev); if (ret) { dev_err(dev, "init RoCE Command Queue failed!\n"); - return ret; + goto error_failed_alloc_dfx_cnt; } } @@ -1105,6 +1132,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (hr_dev->hw->cmq_exit) hr_dev->hw->cmq_exit(hr_dev); +error_failed_alloc_dfx_cnt: + hns_roce_dealloc_dfx_cnt(hr_dev); + return ret; } @@ -1125,6 +1155,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev) hns_roce_cmd_cleanup(hr_dev); if (hr_dev->hw->cmq_exit) hr_dev->hw->cmq_exit(hr_dev); + hns_roce_dealloc_dfx_cnt(hr_dev); } MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 14376490ac22..d68074b6ca17 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -228,8 +228,10 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int ret; mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); + if (!mr) { + ret = -ENOMEM; + goto err_out; + } mr->iova = virt_addr; mr->size = length; @@ -259,6 +261,9 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, free_mr_key(hr_dev, mr); err_alloc_mr: kfree(mr); +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REG_ERR_CNT]); + return ERR_PTR(ret); } @@ -274,12 +279,15 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, unsigned long mtpt_idx; int ret; - if (!mr->enabled) - return ERR_PTR(-EINVAL); + if (!mr->enabled) { + ret = -EINVAL; + goto err_out; + } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return ERR_CAST(mailbox); + ret = PTR_ERR_OR_ZERO(mailbox); + if (ret) + goto err_out; mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); @@ -331,8 +339,12 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) +err_out: + if (ret) { + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REREG_ERR_CNT]); return ERR_PTR(ret); + } + return NULL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index bd1fe89ca205..d35cf59d0f43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -149,14 +149,18 @@ int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata) struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd); int ret; - if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)) - return -EOPNOTSUPP; + if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)) { + ret = -EOPNOTSUPP; + goto err_out; + } ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn); - if (ret) - return ret; - return 0; +err_out: + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT]); + + return ret; } int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 828b58534aa9..31b147210688 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1216,7 +1216,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); if (ret) - return ret; + goto err_out; if (init_attr->qp_type == IB_QPT_XRC_TGT) hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn; @@ -1231,6 +1231,10 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n", init_attr->qp_type, ret); +err_out: + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_CREATE_ERR_CNT]); + return ret; } @@ -1366,6 +1370,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, out: mutex_unlock(&hr_qp->mutex); + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT]); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 4e2d1c8e164a..4abae9477854 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -475,11 +475,11 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = set_srq_param(srq, init_attr, udata); if (ret) - return ret; + goto err_out; ret = alloc_srq_buf(hr_dev, srq, udata); if (ret) - return ret; + goto err_out; ret = alloc_srq_db(hr_dev, srq, udata, &resp); if (ret) @@ -517,6 +517,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, free_srq_db(hr_dev, srq, udata); err_srq_buf: free_srq_buf(hr_dev, srq); +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT]); return ret; } From 0529e26d8b7b51da99c9d7234700f4041d20c0e4 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Wed, 15 Nov 2023 16:27:41 +0100 Subject: [PATCH 28/66] RDMA/rtrs-clt: Add warning logs for RDMA events Some RDMA CM events can trigger connection close or recovery for a certain rtrs_clt_path. Such close/recovery triggers should happen after an appropriate log message, since they can lead to IO failures. Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231115152749.424301-2-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 07261523c554..984a4a1db3c8 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2025,6 +2025,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, /* * Device removal is a special case. Queue close and return 0. */ + rtrs_wrn_rl(s, "CM event: %s, status: %d\n", rdma_event_msg(ev->event), + ev->status); rtrs_clt_close_conns(clt_path, false); return 0; default: From e76f514dc9fdacf8522e6efcabf883514e5299e5 Mon Sep 17 00:00:00 2001 From: Supriti Singh Date: Mon, 20 Nov 2023 16:41:45 +0100 Subject: [PATCH 29/66] RDMA/rtrs-clt: Use %pe to print errors While printing error, replace %ld by %pe. %pe prints a string whereas %ld would print an error code. Signed-off-by: Supriti Singh Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner Signed-off-by: Md Haris Iqbal Link: https://lore.kernel.org/r/20231120154146.920486-9-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 984a4a1db3c8..f7a5ec396554 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1391,9 +1391,9 @@ static int alloc_path_reqs(struct rtrs_clt_path *clt_path) clt_path->max_pages_per_mr); if (IS_ERR(req->mr)) { err = PTR_ERR(req->mr); + pr_err("Failed to alloc clt_path->max_pages_per_mr %d: %pe\n", + clt_path->max_pages_per_mr, req->mr); req->mr = NULL; - pr_err("Failed to alloc clt_path->max_pages_per_mr %d\n", - clt_path->max_pages_per_mr); goto out; } @@ -2060,10 +2060,8 @@ static int create_cm(struct rtrs_clt_con *con) clt_path->s.dst_addr.ss_family == AF_IB ? RDMA_PS_IB : RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) { - err = PTR_ERR(cm_id); - rtrs_err(s, "Failed to create CM ID, err: %d\n", err); - - return err; + rtrs_err(s, "Failed to create CM ID, err: %pe\n", cm_id); + return PTR_ERR(cm_id); } con->c.cm_id = cm_id; con->cm_err = 0; From 640233258e5b61fed10b382af691b6a852f00392 Mon Sep 17 00:00:00 2001 From: Supriti Singh Date: Mon, 20 Nov 2023 16:41:46 +0100 Subject: [PATCH 30/66] RDMA/rtrs: Use %pe to print errors While printing error, replace %ld by %pe. %pe prints a string whereas %ld would print an error code. Signed-off-by: Supriti Singh Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner Signed-off-by: Md Haris Iqbal Link: https://lore.kernel.org/r/20231120154146.920486-10-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index d80edfffd2e4..4e17d546d4cc 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -242,8 +242,8 @@ static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe, cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx); if (IS_ERR(cq)) { - rtrs_err(con->path, "Creating completion queue failed, errno: %ld\n", - PTR_ERR(cq)); + rtrs_err(con->path, "Creating completion queue failed, errno: %pe\n", + cq); return PTR_ERR(cq); } con->cq = cq; From 753fff78f430704548f45eda52d6d55371a52c0f Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Tue, 21 Nov 2023 14:03:15 +0100 Subject: [PATCH 31/66] RDMA/IPoIB: Fix error code return in ipoib_mcast_join Return the error code in case of ib_sa_join_multicast fail. Signed-off-by: Jack Wang Link: https://lore.kernel.org/r/20231121130316.126364-2-jinpu.wang@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 5b3154503bf4..9e6967a40042 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -546,6 +546,7 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) spin_unlock_irq(&priv->lock); complete(&mcast->done); spin_lock_irq(&priv->lock); + return ret; } return 0; } From 50af5d12f7e24b85fc10270d7700f4aa1b20b8e4 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Tue, 21 Nov 2023 14:03:16 +0100 Subject: [PATCH 32/66] RDMA/IPoIB: Add tx timeout work to recover queue stop situation As we sometime run into TX timeout from IPoIB, queue seems stopped and can't recover. Diff with Mellanox OFED show Mellanox driver has timeout work to recover in such case. Add TX timeout work/NAPI work to recover such case. Also increase the watchdog_timeo to 10 seconds, so more tolerant to error. Signed-off-by: Jack Wang Link: https://lore.kernel.org/r/20231121130316.126364-3-jinpu.wang@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib.h | 4 +++ drivers/infiniband/ulp/ipoib/ipoib_ib.c | 26 +++++++++++++++++- drivers/infiniband/ulp/ipoib/ipoib_main.c | 33 +++++++++++++++++++++-- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 35e9c8a330e2..963e936da5e3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -351,10 +351,12 @@ struct ipoib_dev_priv { struct workqueue_struct *wq; struct delayed_work mcast_task; struct work_struct carrier_on_task; + struct work_struct reschedule_napi_work; struct work_struct flush_light; struct work_struct flush_normal; struct work_struct flush_heavy; struct work_struct restart_task; + struct work_struct tx_timeout_work; struct delayed_work ah_reap_task; struct delayed_work neigh_reap_task; struct ib_device *ca; @@ -499,6 +501,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ib_ah *address, u32 dqpn); void ipoib_reap_ah(struct work_struct *work); +void ipoib_napi_schedule_work(struct work_struct *work); struct ipoib_path *__path_find(struct net_device *dev, void *gid); void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); @@ -510,6 +513,7 @@ void ipoib_ib_tx_timer_func(struct timer_list *t); void ipoib_ib_dev_flush_light(struct work_struct *work); void ipoib_ib_dev_flush_normal(struct work_struct *work); void ipoib_ib_dev_flush_heavy(struct work_struct *work); +void ipoib_ib_tx_timeout_work(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 7f84d9866cef..5cde275daa94 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -531,11 +531,35 @@ void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr) napi_schedule(&priv->recv_napi); } +/* The function will force napi_schedule */ +void ipoib_napi_schedule_work(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, reschedule_napi_work); + bool ret; + + do { + ret = napi_schedule(&priv->send_napi); + if (!ret) + msleep(3); + } while (!ret && netif_queue_stopped(priv->dev) && + test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)); +} + void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr) { struct ipoib_dev_priv *priv = ctx_ptr; + bool ret; - napi_schedule(&priv->send_napi); + ret = napi_schedule(&priv->send_napi); + /* + * if the queue is closed the driver must be able to schedule napi, + * otherwise we can end with closed queue forever, because no new + * packets to send and napi callback might not get new event after + * its re-arm of the napi. + */ + if (!ret && netif_queue_stopped(priv->dev)) + schedule_work(&priv->reschedule_napi_work); } static inline int post_send(struct ipoib_dev_priv *priv, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 967004ccad98..7a5be705d718 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1200,7 +1200,34 @@ static void ipoib_timeout(struct net_device *dev, unsigned int txqueue) netif_queue_stopped(dev), priv->tx_head, priv->tx_tail, priv->global_tx_head, priv->global_tx_tail); - /* XXX reset QP, etc. */ + + schedule_work(&priv->tx_timeout_work); +} + +void ipoib_ib_tx_timeout_work(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, + struct ipoib_dev_priv, + tx_timeout_work); + int err; + + rtnl_lock(); + + if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + goto unlock; + + ipoib_stop(priv->dev); + err = ipoib_open(priv->dev); + if (err) { + ipoib_warn(priv, "ipoib_open failed recovering from a tx_timeout, err(%d).\n", + err); + goto unlock; + } + + netif_tx_wake_all_queues(priv->dev); +unlock: + rtnl_unlock(); + } static int ipoib_hard_header(struct sk_buff *skb, @@ -2112,7 +2139,7 @@ void ipoib_setup_common(struct net_device *dev) ipoib_set_ethtool_ops(dev); - dev->watchdog_timeo = HZ; + dev->watchdog_timeo = 10 * HZ; dev->flags |= IFF_BROADCAST | IFF_MULTICAST; @@ -2150,10 +2177,12 @@ static void ipoib_build_priv(struct net_device *dev) INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); + INIT_WORK(&priv->reschedule_napi_work, ipoib_napi_schedule_work); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); + INIT_WORK(&priv->tx_timeout_work, ipoib_ib_tx_timeout_work); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh); } From 84de14baf81675a98dc0fe617a93eacf9a417de3 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sun, 3 Dec 2023 17:26:52 +0800 Subject: [PATCH 33/66] RDMA/siw: Move tx_cpu ahead We can reduce one cacheline for the usage of struct siw_qp. Before, /* size: 1928, cachelines: 31, members: 38 */ /* sum members: 1920, holes: 2, sum holes: 8 */ /* paddings: 4, sum paddings: 13 */ /* forced alignments: 3 */ after /* size: 1920, cachelines: 30, members: 38 */ /* paddings: 4, sum paddings: 13 */ /* forced alignments: 3 */ Link: https://lore.kernel.org/r/20231203092655.28102-2-guoqing.jiang@linux.dev Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index b36d1ec25327..d14bb965af75 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -417,10 +417,10 @@ struct siw_iwarp_tx { struct siw_qp { struct ib_qp base_qp; struct siw_device *sdev; + int tx_cpu; struct kref ref; struct completion qp_free; struct list_head devq; - int tx_cpu; struct siw_qp_attrs attrs; struct siw_cep *cep; From 51ac45a6636221d94e0b840cb8dd3d08b3622d64 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sun, 3 Dec 2023 17:26:53 +0800 Subject: [PATCH 34/66] RDMA/siw: Reduce memory usage of struct siw_rx_stream We can reduce the memory of the struct by move some of it's member. Before, /* size: 144, cachelines: 3, members: 17 */ /* sum members: 124, holes: 3, sum holes: 12 */ /* sum bitfield members: 7 bits (0 bytes) */ /* padding: 7 */ /* bit_padding: 1 bits */ After /* size: 128, cachelines: 2, members: 17 */ /* padding: 3 */ /* bit_padding: 1 bits */ Link: https://lore.kernel.org/r/20231203092655.28102-3-guoqing.jiang@linux.dev Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index d14bb965af75..2edba2a864bb 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -288,10 +288,11 @@ struct siw_rx_stream { int skb_offset; /* offset in skb */ int skb_copied; /* processed bytes in skb */ + enum siw_rx_state state; + union iwarp_hdr hdr; struct mpa_trailer trailer; - - enum siw_rx_state state; + struct shash_desc *mpa_crc_hd; /* * For each FPDU, main RX loop runs through 3 stages: @@ -313,7 +314,6 @@ struct siw_rx_stream { u64 ddp_to; u32 inval_stag; /* Stag to be invalidated */ - struct shash_desc *mpa_crc_hd; u8 rx_suspend : 1; u8 pad : 2; /* # of pad bytes expected */ u8 rdmap_op : 4; /* opcode of current frame */ From 0b988c1bee28b48f7e4f264ff17d0d3e8817aa69 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sun, 3 Dec 2023 17:26:54 +0800 Subject: [PATCH 35/66] RDMA/siw: Set qp_state in siw_query_qp Run test_query_rc_qp against siw failed since siw didn't set qp_state accordingly. To address it, introduce siw_qp_state_to_ib_qp_state which convert SIW_QP_STATE_IDLE to IB_QPS_INIT which is similar as in cxgb4. rdma-core# ./build/bin/run_tests.py --dev siw0 tests.test_qp.QPTest.test_query_rc_qp -v test_query_rc_qp (tests.test_qp.QPTest) Queries an RC QP after creation. Verifies that its properties are as ... FAIL ====================================================================== FAIL: test_query_rc_qp (tests.test_qp.QPTest) Queries an RC QP after creation. Verifies that its properties are as ---------------------------------------------------------------------- Traceback (most recent call last): File "/home/gjiang/rdma-core/tests/test_qp.py", line 284, in test_query_rc_qp self.query_qp_common_test(e.IBV_QPT_RC) File "/home/gjiang/rdma-core/tests/test_qp.py", line 265, in query_qp_common_test self.verify_qp_attrs(caps, e.IBV_QPS_INIT, qp_init_attr, qp_attr) File "/home/gjiang/rdma-core/tests/test_qp.py", line 239, in verify_qp_attrs self.assertEqual(state, attr.qp_state) AssertionError: != 0 ---------------------------------------------------------------------- Ran 1 test in 0.057s FAILED (failures=1) Link: https://lore.kernel.org/r/20231203092655.28102-4-guoqing.jiang@linux.dev Signed-off-by: Guoqing Jiang Acked-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_verbs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index dca6a155523d..ecf0444666b4 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -19,6 +19,15 @@ #include "siw_verbs.h" #include "siw_mem.h" +static int siw_qp_state_to_ib_qp_state[SIW_QP_STATE_COUNT] = { + [SIW_QP_STATE_IDLE] = IB_QPS_INIT, + [SIW_QP_STATE_RTR] = IB_QPS_RTR, + [SIW_QP_STATE_RTS] = IB_QPS_RTS, + [SIW_QP_STATE_CLOSING] = IB_QPS_SQD, + [SIW_QP_STATE_TERMINATE] = IB_QPS_SQE, + [SIW_QP_STATE_ERROR] = IB_QPS_ERR +}; + static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = SIW_QP_STATE_IDLE, [IB_QPS_INIT] = SIW_QP_STATE_IDLE, @@ -504,6 +513,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, } else { return -EINVAL; } + qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state]; qp_attr->cap.max_inline_data = SIW_MAX_INLINE; qp_attr->cap.max_send_wr = qp->attrs.sq_size; qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; From b7a2768a1cc3de8947b1fe4b1084de2f5464c606 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sun, 3 Dec 2023 17:26:55 +0800 Subject: [PATCH 36/66] RDMA/siw: Call orq_get_current if possible Use orq_get_current() in siw_orq_empty(). Link: https://lore.kernel.org/r/20231203092655.28102-5-guoqing.jiang@linux.dev Acked-by: Bernard Metzler Signed-off-by: Guoqing Jiang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 2edba2a864bb..75253f2b3e3d 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -657,7 +657,7 @@ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) static inline int siw_orq_empty(struct siw_qp *qp) { - return qp->orq[qp->orq_get % qp->attrs.orq_size].flags == 0 ? 1 : 0; + return orq_get_current(qp)->flags == 0 ? 1 : 0; } static inline struct siw_sqe *irq_alloc_free(struct siw_qp *qp) From 95f6b40082aaf37fd0553828982402af36f81685 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 7 Dec 2023 19:42:27 +0800 Subject: [PATCH 37/66] RDMA/hns: Rename the interrupts Now, different devices may have the same interrupt name, which makes it difficult for users to distinguish between these interrupts. Modify the naming style to be consistent with our network devices. Before: "hns-aeq-0" "hns-ceq-0" ... Now: "hns-0000:35:00.0-aeq-0" "hns-0000:35:00.0-ceq-0" ... Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231207114231.2872104-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 5d0a7a1394fc..4258b6daaded 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6473,15 +6473,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, /* irq contains: abnormal + AEQ + CEQ */ for (j = 0; j < other_num; j++) snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, - "hns-abn-%d", j); + "hns-%s-abn-%d", pci_name(hr_dev->pci_dev), j); for (j = other_num; j < (other_num + aeq_num); j++) snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, - "hns-aeq-%d", j - other_num); + "hns-%s-aeq-%d", pci_name(hr_dev->pci_dev), j - other_num); for (j = (other_num + aeq_num); j < irq_num; j++) snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, - "hns-ceq-%d", j - other_num - aeq_num); + "hns-%s-ceq-%d", pci_name(hr_dev->pci_dev), + j - other_num - aeq_num); for (j = 0; j < irq_num; j++) { if (j < other_num) From d3f4020a213e1cb125eed2363fca372a23f7de7a Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Thu, 7 Dec 2023 19:42:28 +0800 Subject: [PATCH 38/66] RDMA/hns: Response dmac to userspace While creating AH, dmac is already resolved in kernel. Response dmac to userspace so that userspace doesn't need to resolve dmac repeatedly. Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231207114231.2872104-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_ah.c | 7 +++++++ include/uapi/rdma/hns-abi.h | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index fbf046982374..b4209b6aed8d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -57,6 +57,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); + struct hns_roce_ib_create_ah_resp resp = {}; struct hns_roce_ah *ah = to_hr_ah(ibah); int ret = 0; u32 max_sl; @@ -97,6 +98,12 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID; } + if (udata) { + memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + } + err_out: if (ret) atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AH_CREATE_ERR_CNT]); diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index ce0f37f83416..c996e151081e 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -125,4 +125,9 @@ struct hns_roce_ib_alloc_pd_resp { __u32 pdn; }; +struct hns_roce_ib_create_ah_resp { + __u8 dmac[6]; + __u8 reserved[2]; +}; + #endif /* HNS_ABI_USER_H */ From 7243396aaf12385ba514764b6401bcd15e1a52c7 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Thu, 7 Dec 2023 19:42:29 +0800 Subject: [PATCH 39/66] RDMA/hns: Add a max length of gid table IB-core and rdma-core restrict the sgid_index specified by users, which is uint8_t/u8 data type, to only be within the range of 0-255, so it's meaningless to support excessively large gid_table_len. On the other hand, ib-core creates as many sysfs gid files as gid_table_len, most of which are not only useless because of the reason above, but also greatly increase the traversal time of the sysfs gid files for applications. This patch limits the maximum length of gid table to 256. Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231207114231.2872104-4-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4258b6daaded..93a71db527d8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2060,6 +2060,7 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) /* Apply all loaded caps before setting to hardware */ static void apply_func_caps(struct hns_roce_dev *hr_dev) { +#define MAX_GID_TBL_LEN 256 struct hns_roce_caps *caps = &hr_dev->caps; struct hns_roce_v2_priv *priv = hr_dev->priv; @@ -2095,8 +2096,14 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; - caps->gid_table_len[0] = caps->gmv_bt_num * - (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); + + /* It's meaningless to support excessively large gid_table_len, + * as the type of sgid_index in kernel struct ib_global_route + * and userspace struct ibv_global_route are u8/uint8_t (0-255). + */ + caps->gid_table_len[0] = min_t(u32, MAX_GID_TBL_LEN, + caps->gmv_bt_num * + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz)); caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / caps->gmv_entry_sz); From f31683a5227b1a0febae8e2a85d7fdf9514c10f1 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 7 Dec 2023 19:42:30 +0800 Subject: [PATCH 40/66] RDMA/hns: Remove unnecessary checks for NULL in mtr_alloc_bufs() ib_umem_get() never return NULL. Fixes: 3c873161a0d7 ("RDMA/hns: Add support for addressing when hopnum is 0") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231207114231.2872104-5-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index d68074b6ca17..91cd580480fe 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -686,7 +686,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); - if (IS_ERR_OR_NULL(mtr->umem)) { + if (IS_ERR(mtr->umem)) { ibdev_err(ibdev, "failed to get umem, ret = %ld.\n", PTR_ERR(mtr->umem)); return -ENOMEM; From 288f535951aa81ed674f5e5477ab11b9d9351b8c Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 7 Dec 2023 19:42:31 +0800 Subject: [PATCH 41/66] RDMA/hns: Fix memory leak in free_mr_init() When a reserved QP fails to be created, the memory of the remaining created reserved QPs is leaked. Fixes: 70f92521584f ("RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231207114231.2872104-6-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 93a71db527d8..b8dde04bd573 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2710,6 +2710,10 @@ static int free_mr_alloc_res(struct hns_roce_dev *hr_dev) return 0; create_failed_qp: + for (i--; i >= 0; i--) { + hns_roce_v2_destroy_qp(&free_mr->rsv_qp[i]->ibqp, NULL); + kfree(free_mr->rsv_qp[i]); + } hns_roce_destroy_cq(cq, NULL); kfree(cq); From 1801d87b3598b173bce3fbf15c5517796f38db96 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 7 Dec 2023 02:47:35 -0800 Subject: [PATCH 42/66] RDMA/bnxt_re: Support new 5760X P7 devices Add basic support for 5760X P7 devices. Add new chip revisions. The first version support is similar to the existing P5 adapters. Extend the current support for P5 adapters to P7 also. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1701946060-13931-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 4 ++-- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 10 +++++----- drivers/infiniband/hw/bnxt_re/main.c | 14 +++++++------- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 4 ++-- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_res.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_res.h | 20 +++++++++++++++++--- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 6 +++--- 8 files changed, 38 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 93572405d6fa..128651c01595 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -371,7 +371,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, } done: - return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS; } @@ -381,7 +381,7 @@ struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); int num_counters = 0; - if (bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) num_counters = BNXT_RE_NUM_EXT_COUNTERS; else num_counters = BNXT_RE_NUM_STD_COUNTERS; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b2467de721dc..e7ef099c3edd 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1023,7 +1023,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size); /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { - psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + psn_sz = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? @@ -1234,7 +1234,7 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { qplqp->rq.max_sge = dev_attr->max_qp_sges; if (qplqp->rq.max_sge > dev_attr->max_qp_sges) qplqp->rq.max_sge = dev_attr->max_qp_sges; @@ -1301,7 +1301,7 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx); qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); @@ -1328,7 +1328,7 @@ static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, goto out; } - if (bnxt_qplib_is_chip_gen_p5(chip_ctx) && + if (bnxt_qplib_is_chip_gen_p5_p7(chip_ctx) && init_attr->qp_type == IB_QPT_GSI) qptype = CMDQ_CREATE_QP_TYPE_GSI; out: @@ -1527,7 +1527,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, goto fail; if (qp_init_attr->qp_type == IB_QPT_GSI && - !(bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))) { + !(bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))) { rc = bnxt_re_create_gsi_qp(qp, pd, qp_init_attr); if (rc == -ENODEV) goto qp_destroy; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index f79369c8360a..09c0b2e62cd9 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -128,7 +128,7 @@ static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) struct bnxt_qplib_chip_ctx *cctx; cctx = rdev->chip_ctx; - cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? mode : BNXT_QPLIB_WQE_MODE_STATIC; if (bnxt_re_hwrm_qcaps(rdev)) dev_err(rdev_to_dev(rdev), @@ -215,7 +215,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, attr->max_srq); ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq); - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) rdev->qplib_ctx.tqm_ctx.qcount[i] = rdev->dev_attr.tqm_alloc_reqs[i]; @@ -264,7 +264,7 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); bnxt_re_limit_pf_res(rdev); - num_vfs = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + num_vfs = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs; if (num_vfs) bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs); @@ -276,7 +276,7 @@ static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev) if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return; rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev); - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { bnxt_re_set_resource_limits(rdev); bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, &rdev->qplib_ctx); @@ -1216,7 +1216,7 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, #define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) { - return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : BNXT_RE_GEN_P5_PF_NQ_DB) : rdev->en_dev->msix_entries[indx].db_offset; @@ -1681,7 +1681,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) bnxt_re_set_resource_limits(rdev); rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0, - bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)); + bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate QPLIB context: %#x\n", rc); @@ -1804,7 +1804,7 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) return; /* Currently enabling only for GenP5 adapters */ - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) return; if (enable) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index b821c37dffd9..1b7e9506b457 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -991,7 +991,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* SQ */ if (qp->type == CMDQ_CREATE_QP_TYPE_RC) { - psn_sz = bnxt_qplib_is_chip_gen_p5(res->cctx) ? + psn_sz = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); } @@ -1605,7 +1605,7 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & SQ_PSN_SEARCH_NEXT_PSN_MASK); - if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { + if (bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) { psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 15e6d2b80c70..403b6797d9c2 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -852,7 +852,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, */ if (is_virtfn) goto skip_ctx_setup; - if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) goto config_vf_res; lvl = ctx->qpc_tbl.level; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index ae2bde34e785..dfc943fab87b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -805,7 +805,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res, dpit = &res->dpi_tbl; reg = &dpit->wcreg; - if (!bnxt_qplib_is_chip_gen_p5(res->cctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) { /* Offest should come from L2 driver */ dbr_offset = dev_attr->l2_db_size; dpit->ucreg.offset = dbr_offset; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 3e3383b8a913..397846bc8712 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -44,6 +44,9 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; #define CHIP_NUM_57508 0x1750 #define CHIP_NUM_57504 0x1751 #define CHIP_NUM_57502 0x1752 +#define CHIP_NUM_58818 0xd818 +#define CHIP_NUM_57608 0x1760 + struct bnxt_qplib_drv_modes { u8 wqe_mode; @@ -296,6 +299,12 @@ struct bnxt_qplib_res { struct bnxt_qplib_db_pacing_data *pacing_data; }; +static inline bool bnxt_qplib_is_chip_gen_p7(struct bnxt_qplib_chip_ctx *cctx) +{ + return (cctx->chip_num == CHIP_NUM_58818 || + cctx->chip_num == CHIP_NUM_57608); +} + static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) { return (cctx->chip_num == CHIP_NUM_57508 || @@ -303,15 +312,20 @@ static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) cctx->chip_num == CHIP_NUM_57502); } +static inline bool bnxt_qplib_is_chip_gen_p5_p7(struct bnxt_qplib_chip_ctx *cctx) +{ + return bnxt_qplib_is_chip_gen_p5(cctx) || bnxt_qplib_is_chip_gen_p7(cctx); +} + static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res) { - return bnxt_qplib_is_chip_gen_p5(res->cctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? HWQ_TYPE_QUEUE : HWQ_TYPE_L2_CMPL; } static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx) { - return bnxt_qplib_is_chip_gen_p5(cctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(cctx) ? RING_ALLOC_REQ_RING_TYPE_NQ : RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL; } @@ -488,7 +502,7 @@ static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info, u32 type; type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; - if (bnxt_qplib_is_chip_gen_p5(cctx)) + if (bnxt_qplib_is_chip_gen_p5_p7(cctx)) bnxt_qplib_ring_db(info, type); else bnxt_qplib_ring_db32(info, arm); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index a27b68515164..c580bf78d4c1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -59,7 +59,7 @@ static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw) { u16 pcie_ctl2 = 0; - if (!bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (!bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) return false; pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, &pcie_ctl2); @@ -133,7 +133,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, * reporting the max number */ attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; - attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ? + attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx) ? 6 : sb->max_sge; attr->max_cq = le32_to_cpu(sb->max_cq); attr->max_cq_wqes = le32_to_cpu(sb->max_cqe); @@ -934,7 +934,7 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res, req->inactivity_th = cpu_to_le16(cc_param->inact_th); /* For chip gen P5 onwards fill extended cmd and header */ - if (bnxt_qplib_is_chip_gen_p5(res->cctx)) { + if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) { struct roce_tlv *hdr; u32 payload; u32 chunks; From a62d685814416647fbb28b3eb2617744adef2d4f Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 7 Dec 2023 02:47:36 -0800 Subject: [PATCH 43/66] RDMA/bnxt_re: Update the BAR offsets Update the BAR offsets for handling GenP7 adapters. Use the values populated by L2 driver for getting the Doorbell offsets. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1701946060-13931-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 21 +++++++-------------- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 5 +++-- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 09c0b2e62cd9..b7134d510f52 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -107,8 +107,11 @@ static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) dev_info(rdev_to_dev(rdev), "Couldn't get DB bar size, Low latency framework is disabled\n"); /* set register offsets for both UC and WC */ - res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : - BNXT_QPLIB_DBR_PF_DB_OFFSET; + if (bnxt_qplib_is_chip_gen_p7(cctx)) + res->dpi_tbl.ucreg.offset = offset; + else + res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : + BNXT_QPLIB_DBR_PF_DB_OFFSET; res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size @@ -1212,16 +1215,6 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, return 0; } -#define BNXT_RE_GEN_P5_PF_NQ_DB 0x10000 -#define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 -static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) -{ - return bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? - (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : - BNXT_RE_GEN_P5_PF_NQ_DB) : - rdev->en_dev->msix_entries[indx].db_offset; -} - static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) { int i; @@ -1242,7 +1235,7 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) bnxt_qplib_init_res(&rdev->qplib_res); for (i = 1; i < rdev->num_msix ; i++) { - db_offt = bnxt_re_get_nqdb_offset(rdev, i); + db_offt = rdev->en_dev->msix_entries[i].db_offset; rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], i - 1, rdev->en_dev->msix_entries[i].vector, db_offt, &bnxt_re_cqn_handler, @@ -1653,7 +1646,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc); goto free_rcfw; } - db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX); + db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset; vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector; rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, vid, db_offt, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index c580bf78d4c1..8beeedd15061 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -151,8 +151,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_srq_sges = sb->max_srq_sge; attr->max_pkey = 1; attr->max_inline_data = le32_to_cpu(sb->max_inline_data); - attr->l2_db_size = (sb->l2_db_space_size + 1) * - (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); + if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx)) + attr->l2_db_size = (sb->l2_db_space_size + 1) * + (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED; attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); From 880a5dd1880a296575e92dec9816a7f35a7011d1 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 7 Dec 2023 02:47:37 -0800 Subject: [PATCH 44/66] RDMA/bnxt_re: Update the HW interface definitions Adds HW interface definitions to support the new chip revision. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1701946060-13931-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/roce_hsi.h | 67 ++++++++++++++++++++---- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 84b5acd7f7a2..605c9463c408 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -555,7 +555,12 @@ struct cmdq_modify_qp { __le16 flags; __le16 cookie; u8 resp_size; - u8 reserved8; + u8 qp_type; + #define CMDQ_MODIFY_QP_QP_TYPE_RC 0x2UL + #define CMDQ_MODIFY_QP_QP_TYPE_UD 0x4UL + #define CMDQ_MODIFY_QP_QP_TYPE_RAW_ETHERTYPE 0x6UL + #define CMDQ_MODIFY_QP_QP_TYPE_GSI 0x7UL + #define CMDQ_MODIFY_QP_QP_TYPE_LAST CMDQ_MODIFY_QP_QP_TYPE_GSI __le64 resp_addr; __le32 modify_mask; #define CMDQ_MODIFY_QP_MODIFY_MASK_STATE 0x1UL @@ -611,14 +616,12 @@ struct cmdq_modify_qp { #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6 (0x3UL << 6) #define CMDQ_MODIFY_QP_NETWORK_TYPE_LAST CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6 u8 access; - #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_MASK \ - 0xffUL - #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_SFT \ - 0 - #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL - #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL - #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL - #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL + #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_MASK 0xffUL + #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_SFT 0 + #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL + #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL + #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL + #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL __le16 pkey; __le32 qkey; __le32 dgid[4]; @@ -673,6 +676,13 @@ struct cmdq_modify_qp { #define CMDQ_MODIFY_QP_VLAN_PCP_SFT 13 __le64 irrq_addr; __le64 orrq_addr; + __le32 ext_modify_mask; + #define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_EXT_STATS_CTX 0x1UL + #define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_SCHQ_ID_VALID 0x2UL + __le32 ext_stats_ctx_id; + __le16 schq_id; + __le16 unused_0; + __le32 reserved32; }; /* creq_modify_qp_resp (size:128b/16B) */ @@ -3075,6 +3085,17 @@ struct sq_psn_search_ext { __le32 reserved32; }; +/* sq_msn_search (size:64b/8B) */ +struct sq_msn_search { + __le64 start_idx_next_psn_start_psn; + #define SQ_MSN_SEARCH_START_PSN_MASK 0xffffffUL + #define SQ_MSN_SEARCH_START_PSN_SFT 0 + #define SQ_MSN_SEARCH_NEXT_PSN_MASK 0xffffff000000ULL + #define SQ_MSN_SEARCH_NEXT_PSN_SFT 24 + #define SQ_MSN_SEARCH_START_IDX_MASK 0xffff000000000000ULL + #define SQ_MSN_SEARCH_START_IDX_SFT 48 +}; + /* sq_send (size:1024b/128B) */ struct sq_send { u8 wqe_type; @@ -3763,13 +3784,35 @@ struct cq_base { #define CQ_BASE_CQE_TYPE_RES_UD (0x2UL << 1) #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1) #define CQ_BASE_CQE_TYPE_RES_UD_CFA (0x4UL << 1) + #define CQ_BASE_CQE_TYPE_REQ_V3 (0x8UL << 1) + #define CQ_BASE_CQE_TYPE_RES_RC_V3 (0x9UL << 1) + #define CQ_BASE_CQE_TYPE_RES_UD_V3 (0xaUL << 1) + #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1_V3 (0xbUL << 1) + #define CQ_BASE_CQE_TYPE_RES_UD_CFA_V3 (0xcUL << 1) #define CQ_BASE_CQE_TYPE_NO_OP (0xdUL << 1) #define CQ_BASE_CQE_TYPE_TERMINAL (0xeUL << 1) #define CQ_BASE_CQE_TYPE_CUT_OFF (0xfUL << 1) #define CQ_BASE_CQE_TYPE_LAST CQ_BASE_CQE_TYPE_CUT_OFF u8 status; + #define CQ_BASE_STATUS_OK 0x0UL + #define CQ_BASE_STATUS_BAD_RESPONSE_ERR 0x1UL + #define CQ_BASE_STATUS_LOCAL_LENGTH_ERR 0x2UL + #define CQ_BASE_STATUS_HW_LOCAL_LENGTH_ERR 0x3UL + #define CQ_BASE_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL + #define CQ_BASE_STATUS_LOCAL_PROTECTION_ERR 0x5UL + #define CQ_BASE_STATUS_LOCAL_ACCESS_ERROR 0x6UL + #define CQ_BASE_STATUS_MEMORY_MGT_OPERATION_ERR 0x7UL + #define CQ_BASE_STATUS_REMOTE_INVALID_REQUEST_ERR 0x8UL + #define CQ_BASE_STATUS_REMOTE_ACCESS_ERR 0x9UL + #define CQ_BASE_STATUS_REMOTE_OPERATION_ERR 0xaUL + #define CQ_BASE_STATUS_RNR_NAK_RETRY_CNT_ERR 0xbUL + #define CQ_BASE_STATUS_TRANSPORT_RETRY_CNT_ERR 0xcUL + #define CQ_BASE_STATUS_WORK_REQUEST_FLUSHED_ERR 0xdUL + #define CQ_BASE_STATUS_HW_FLUSH_ERR 0xeUL + #define CQ_BASE_STATUS_OVERFLOW_ERR 0xfUL + #define CQ_BASE_STATUS_LAST CQ_BASE_STATUS_OVERFLOW_ERR __le16 reserved16; - __le32 reserved32; + __le32 opaque; }; /* cq_req (size:256b/32B) */ @@ -4384,6 +4427,8 @@ struct cq_cutoff { #define CQ_CUTOFF_CQE_TYPE_SFT 1 #define CQ_CUTOFF_CQE_TYPE_CUT_OFF (0xfUL << 1) #define CQ_CUTOFF_CQE_TYPE_LAST CQ_CUTOFF_CQE_TYPE_CUT_OFF + #define CQ_CUTOFF_RESIZE_TOGGLE_MASK 0x60UL + #define CQ_CUTOFF_RESIZE_TOGGLE_SFT 5 u8 status; #define CQ_CUTOFF_STATUS_OK 0x0UL #define CQ_CUTOFF_STATUS_LAST CQ_CUTOFF_STATUS_OK @@ -4435,6 +4480,8 @@ struct nq_srq_event { #define NQ_SRQ_EVENT_TYPE_SFT 0 #define NQ_SRQ_EVENT_TYPE_SRQ_EVENT 0x32UL #define NQ_SRQ_EVENT_TYPE_LAST NQ_SRQ_EVENT_TYPE_SRQ_EVENT + #define NQ_SRQ_EVENT_TOGGLE_MASK 0xc0UL + #define NQ_SRQ_EVENT_TOGGLE_SFT 6 u8 event; #define NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT 0x1UL #define NQ_SRQ_EVENT_EVENT_LAST NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT From 6027c20dad1ad1da45877151d8b75dc51a928ccd Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 7 Dec 2023 02:47:38 -0800 Subject: [PATCH 45/66] RDMA/bnxt_re: Get the toggle bits from CQ completions Get the toggle bits from CQ completions. For older adapters these values are 0. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1701946060-13931-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 6 ++++++ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 1 + drivers/infiniband/hw/bnxt_re/qplib_res.h | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 1b7e9506b457..177c6c185f0c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -330,6 +330,9 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t) cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle; if (!cq) break; + cq->toggle = (le16_to_cpu(nqe->info10_type) & + NQ_CN_TOGGLE_MASK) >> NQ_CN_TOGGLE_SFT; + cq->dbinfo.toggle = cq->toggle; bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA); spin_lock_bh(&cq->compl_lock); @@ -2124,6 +2127,8 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) cq->dbinfo.xid = cq->id; cq->dbinfo.db = cq->dpi->dbr; cq->dbinfo.priv_db = res->dpi_tbl.priv_db; + cq->dbinfo.flags = 0; + cq->dbinfo.toggle = 0; bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA); @@ -3018,6 +3023,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type) { + cq->dbinfo.toggle = cq->toggle; if (arm_type) bnxt_qplib_ring_db(&cq->dbinfo, arm_type); /* Using cq->arm_state variable to track whether to issue cq handler */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 23c27cb42978..8a6bea201b29 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -418,6 +418,7 @@ struct bnxt_qplib_cq { bool resize_in_progress; struct bnxt_qplib_sg_info sg_info; u64 cq_handle; + u8 toggle; #define CQ_RESIZE_WAIT_TIME_MS 500 unsigned long flags; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 397846bc8712..7e6d9074f900 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -190,6 +190,7 @@ struct bnxt_qplib_db_info { u32 xid; u32 max_slot; u32 flags; + u8 toggle; }; enum bnxt_qplib_db_info_flags_mask { From cdae3936b2fe7f943e8b5b46f3b3d8446aeb2e0c Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 7 Dec 2023 02:47:39 -0800 Subject: [PATCH 46/66] RDMA/bnxt_re: Doorbell changes Update the Doorbell routines to support the latest HW definitions. Use common routine to prepare the Doorbell key. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1701946060-13931-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_res.h | 46 +++++++++++++++++------ 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 7e6d9074f900..c228870eed29 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -47,6 +47,9 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; #define CHIP_NUM_58818 0xd818 #define CHIP_NUM_57608 0x1760 +#define BNXT_QPLIB_DBR_VALID (0x1UL << 26) +#define BNXT_QPLIB_DBR_EPOCH_SHIFT 24 +#define BNXT_QPLIB_DBR_TOGGLE_SHIFT 25 struct bnxt_qplib_drv_modes { u8 wqe_mode; @@ -200,6 +203,11 @@ enum bnxt_qplib_db_info_flags_mask { BNXT_QPLIB_FLAG_EPOCH_PROD_MASK = 0x2UL, }; +enum bnxt_qplib_db_epoch_flag_shift { + BNXT_QPLIB_DB_EPOCH_CONS_SHIFT = BNXT_QPLIB_DBR_EPOCH_SHIFT, + BNXT_QPLIB_DB_EPOCH_PROD_SHIFT = (BNXT_QPLIB_DBR_EPOCH_SHIFT - 1), +}; + /* Tables */ struct bnxt_qplib_pd_tbl { unsigned long *tbl; @@ -453,14 +461,27 @@ static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info, writel(key, info->db); } +#define BNXT_QPLIB_INIT_DBHDR(xid, type, indx, toggle) \ + (((u64)(((xid) & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | \ + (type) | BNXT_QPLIB_DBR_VALID) << 32) | (indx) | \ + (((u32)(toggle)) << (BNXT_QPLIB_DBR_TOGGLE_SHIFT))) + static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info, u32 type) { u64 key = 0; + u32 indx; + u8 toggle = 0; - key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; - key <<= 32; - key |= (info->hwq->cons & DBC_DBC_INDEX_MASK); + if (type == DBC_DBC_TYPE_CQ_ARMALL || + type == DBC_DBC_TYPE_CQ_ARMSE) + toggle = info->toggle; + + indx = (info->hwq->cons & DBC_DBC_INDEX_MASK) | + ((info->flags & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK) << + BNXT_QPLIB_DB_EPOCH_CONS_SHIFT); + + key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, indx, toggle); writeq(key, info->db); } @@ -468,10 +489,12 @@ static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info, u32 type) { u64 key = 0; + u32 indx; - key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; - key <<= 32; - key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK; + indx = (((info->hwq->prod / info->max_slot) & DBC_DBC_INDEX_MASK) | + ((info->flags & BNXT_QPLIB_FLAG_EPOCH_PROD_MASK) << + BNXT_QPLIB_DB_EPOCH_PROD_SHIFT)); + key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, indx, 0); writeq(key, info->db); } @@ -479,9 +502,12 @@ static inline void bnxt_qplib_armen_db(struct bnxt_qplib_db_info *info, u32 type) { u64 key = 0; + u8 toggle = 0; - key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; - key <<= 32; + if (type == DBC_DBC_TYPE_CQ_ARMENA || type == DBC_DBC_TYPE_SRQ_ARMENA) + toggle = info->toggle; + /* Index always at 0 */ + key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, 0, toggle); writeq(key, info->priv_db); } @@ -490,9 +516,7 @@ static inline void bnxt_qplib_srq_arm_db(struct bnxt_qplib_db_info *info, { u64 key = 0; - key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | th; - key <<= 32; - key |= th & DBC_DBC_INDEX_MASK; + key = BNXT_QPLIB_INIT_DBHDR(info->xid, DBC_DBC_TYPE_SRQ_ARM, th, info->toggle); writeq(key, info->priv_db); } From 07f830ae4913d0b986c8c0ff88a7d597948b9bd8 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 7 Dec 2023 02:47:40 -0800 Subject: [PATCH 47/66] RDMA/bnxt_re: Adds MSN table capability for Gen P7 adapters GenP7 HW expects an MSN table instead of PSN table. Check for the HW retransmission capability and populate the MSN table if HW retansmission is supported. Signed-off-by: Damodharam Ammepalli Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1701946060-13931-7-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 67 ++++++++++++++++++++-- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 14 +++++ drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 + drivers/infiniband/hw/bnxt_re/qplib_res.h | 9 +++ include/uapi/rdma/bnxt_re-abi.h | 1 + 5 files changed, 87 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 177c6c185f0c..c98e04fe2ddd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -982,6 +982,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u32 tbl_indx; u16 nsge; + if (res->dattr) + qp->dev_cap_flags = res->dattr->dev_cap_flags; + sq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_QP, @@ -997,6 +1000,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) psn_sz = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); + + if (BNXT_RE_HW_RETX(qp->dev_cap_flags)) { + psn_sz = sizeof(struct sq_msn_search); + qp->msn = 0; + } } hwq_attr.res = res; @@ -1005,6 +1013,13 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.aux_stride = psn_sz; hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode); + /* Update msn tbl size */ + if (BNXT_RE_HW_RETX(qp->dev_cap_flags) && psn_sz) { + hwq_attr.aux_depth = roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); + qp->msn_tbl_sz = hwq_attr.aux_depth; + qp->msn = 0; + } + hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) @@ -1587,6 +1602,27 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, return NULL; } +/* Fil the MSN table into the next psn row */ +static void bnxt_qplib_fill_msn_search(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + struct bnxt_qplib_swq *swq) +{ + struct sq_msn_search *msns; + u32 start_psn, next_psn; + u16 start_idx; + + msns = (struct sq_msn_search *)swq->psn_search; + msns->start_idx_next_psn_start_psn = 0; + + start_psn = swq->start_psn; + next_psn = swq->next_psn; + start_idx = swq->slot_idx; + msns->start_idx_next_psn_start_psn |= + bnxt_re_update_msn_tbl(start_idx, next_psn, start_psn); + qp->msn++; + qp->msn %= qp->msn_tbl_sz; +} + static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swqe *wqe, struct bnxt_qplib_swq *swq) @@ -1598,6 +1634,12 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, if (!swq->psn_search) return; + /* Handle MSN differently on cap flags */ + if (BNXT_RE_HW_RETX(qp->dev_cap_flags)) { + bnxt_qplib_fill_msn_search(qp, wqe, swq); + return; + } + psns = (struct sq_psn_search *)swq->psn_search; psns = swq->psn_search; psns_ext = swq->psn_ext; @@ -1706,8 +1748,8 @@ static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp, return slot; } -static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, - struct bnxt_qplib_swq *swq) +static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_qp *qp, struct bnxt_qplib_q *sq, + struct bnxt_qplib_swq *swq, bool hw_retx) { struct bnxt_qplib_hwq *hwq; u32 pg_num, pg_indx; @@ -1718,6 +1760,11 @@ static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, if (!hwq->pad_pg) return; tail = swq->slot_idx / sq->dbinfo.max_slot; + if (hw_retx) { + /* For HW retx use qp msn index */ + tail = qp->msn; + tail %= qp->msn_tbl_sz; + } pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); @@ -1742,6 +1789,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swq *swq; bool sch_handler = false; u16 wqe_sz, qdf = 0; + bool msn_update; void *base_hdr; void *ext_hdr; __le32 temp32; @@ -1769,7 +1817,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } swq = bnxt_qplib_get_swqe(sq, &wqe_idx); - bnxt_qplib_pull_psn_buff(sq, swq); + bnxt_qplib_pull_psn_buff(qp, sq, swq, BNXT_RE_HW_RETX(qp->dev_cap_flags)); idx = 0; swq->slot_idx = hwq->prod; @@ -1801,6 +1849,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, &idx); if (data_len < 0) goto queue_err; + /* Make sure we update MSN table only for wired wqes */ + msn_update = true; /* Specifics */ switch (wqe->type) { case BNXT_QPLIB_SWQE_TYPE_SEND: @@ -1841,6 +1891,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, SQ_SEND_DST_QP_MASK); ext_sqe->avid = cpu_to_le32(wqe->send.avid & SQ_SEND_AVID_MASK); + msn_update = false; } else { sqe->length = cpu_to_le32(data_len); if (qp->mtu) @@ -1898,7 +1949,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; sqe->inv_l_key = cpu_to_le32(wqe->local_inv.inv_l_key); - + msn_update = false; break; } case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR: @@ -1930,6 +1981,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, PTU_PTE_VALID); ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); ext_sqe->va = cpu_to_le64(wqe->frmr.va); + msn_update = false; break; } @@ -1947,6 +1999,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, sqe->l_key = cpu_to_le32(wqe->bind.r_key); ext_sqe->va = cpu_to_le64(wqe->bind.va); ext_sqe->length_lo = cpu_to_le32(wqe->bind.length); + msn_update = false; break; } default: @@ -1954,8 +2007,10 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, rc = -EINVAL; goto done; } - swq->next_psn = sq->psn & BTH_PSN_MASK; - bnxt_qplib_fill_psn_search(qp, wqe, swq); + if (!BNXT_RE_HW_RETX(qp->dev_cap_flags) || msn_update) { + swq->next_psn = sq->psn & BTH_PSN_MASK; + bnxt_qplib_fill_psn_search(qp, wqe, swq); + } queue_err: bnxt_qplib_swq_mod_start(sq, wqe_idx); bnxt_qplib_hwq_incr_prod(&sq->dbinfo, hwq, swq->slots); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 8a6bea201b29..967c6691e413 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -338,6 +338,9 @@ struct bnxt_qplib_qp { dma_addr_t rq_hdr_buf_map; struct list_head sq_flush; struct list_head rq_flush; + u32 msn; + u32 msn_tbl_sz; + u16 dev_cap_flags; }; #define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base) @@ -627,4 +630,15 @@ static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max) return size; } + +/* MSN table update inlin */ +static inline uint64_t bnxt_re_update_msn_tbl(u32 st_idx, u32 npsn, u32 start_psn) +{ + return cpu_to_le64((((u64)(st_idx) << SQ_MSN_SEARCH_START_IDX_SFT) & + SQ_MSN_SEARCH_START_IDX_MASK) | + (((u64)(npsn) << SQ_MSN_SEARCH_NEXT_PSN_SFT) & + SQ_MSN_SEARCH_NEXT_PSN_MASK) | + (((start_psn) << SQ_MSN_SEARCH_START_PSN_SFT) & + SQ_MSN_SEARCH_START_PSN_MASK)); +} #endif /* __BNXT_QPLIB_FP_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 403b6797d9c2..0ea7ccc70679 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -905,6 +905,8 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf); skip_ctx_setup: + if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags)) + req.flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED; req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id); bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index c228870eed29..382d89fa7d16 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -539,6 +539,15 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags) CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; } +static inline bool _is_hw_retx_supported(u16 dev_cap_flags) +{ + return dev_cap_flags & + (CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED | + CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED); +} + +#define BNXT_RE_HW_RETX(a) _is_hw_retx_supported((a)) + static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx) { return cctx->modes.dbr_pacing; diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index a1b896d6d940..3342276aeac1 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -55,6 +55,7 @@ enum { BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL, BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL, BNXT_RE_UCNTX_CMASK_POW2_DISABLED = 0x10ULL, + BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED = 0x40, }; enum bnxt_re_wqe_mode { From 1ca51628e7303718fdabe29c7d36f582500d5cf2 Mon Sep 17 00:00:00 2001 From: Shun Hao Date: Wed, 6 Dec 2023 16:01:34 +0200 Subject: [PATCH 48/66] net/mlx5: Introduce indirect-sw-encap ICM properties Add new fields for device memory capabilities, in order to support creation of new ICM memory type of SW encap. Signed-off-by: Shun Hao Link: https://lore.kernel.org/r/107cca7dd6a932a1704abf6ebd1b801105546a8e.1701871118.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6f3631425f38..02b25dc36143 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1193,7 +1193,8 @@ struct mlx5_ifc_device_mem_cap_bits { u8 log_sw_icm_alloc_granularity[0x6]; u8 log_steering_sw_icm_size[0x8]; - u8 reserved_at_120[0x18]; + u8 log_indirect_encap_sw_icm_size[0x8]; + u8 reserved_at_128[0x10]; u8 log_header_modify_pattern_sw_icm_size[0x8]; u8 header_modify_sw_icm_start_address[0x40]; @@ -1204,7 +1205,11 @@ struct mlx5_ifc_device_mem_cap_bits { u8 memic_operations[0x20]; - u8 reserved_at_220[0x5e0]; + u8 reserved_at_220[0x20]; + + u8 indirect_encap_sw_icm_start_address[0x40]; + + u8 reserved_at_280[0x580]; }; struct mlx5_ifc_device_event_cap_bits { From a429ec96c07f3020af12029acefc46f42ff5c91c Mon Sep 17 00:00:00 2001 From: Shun Hao Date: Wed, 6 Dec 2023 16:01:35 +0200 Subject: [PATCH 49/66] RDMA/mlx5: Support handling of SW encap ICM area New type for this ICM area, now the user can allocate/deallocate the new type of SW encap ICM memory, to store the encap header data which are managed by SW. Signed-off-by: Shun Hao Link: https://lore.kernel.org/r/546fe43fc700240709e30acf7713ec6834d652bd.1701871118.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/dm.c | 5 +++++ drivers/infiniband/hw/mlx5/mr.c | 1 + include/linux/mlx5/driver.h | 1 + include/uapi/rdma/mlx5_user_ioctl_verbs.h | 1 + 4 files changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c index 3669c90b2dad..b4c97fb62abf 100644 --- a/drivers/infiniband/hw/mlx5/dm.c +++ b/drivers/infiniband/hw/mlx5/dm.c @@ -341,6 +341,8 @@ static enum mlx5_sw_icm_type get_icm_type(int uapi_type) return MLX5_SW_ICM_TYPE_HEADER_MODIFY; case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: return MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN; + case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: + return MLX5_SW_ICM_TYPE_SW_ENCAP; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: default: return MLX5_SW_ICM_TYPE_STEERING; @@ -364,6 +366,7 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, switch (type) { case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) || MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) || @@ -438,6 +441,7 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: return handle_alloc_dm_sw_icm(context, attr, attrs, type); default: return ERR_PTR(-EOPNOTSUPP); @@ -491,6 +495,7 @@ static int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: return mlx5_dm_icm_dealloc(ctx, to_icm(ibdm)); default: return -EOPNOTSUPP; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 18e459b55746..a8ee2ca1f4a1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1347,6 +1347,7 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) return ERR_PTR(-EINVAL); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d2b8d4a74a30..96cb8845682d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -688,6 +688,7 @@ enum mlx5_sw_icm_type { MLX5_SW_ICM_TYPE_STEERING, MLX5_SW_ICM_TYPE_HEADER_MODIFY, MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN, + MLX5_SW_ICM_TYPE_SW_ENCAP, }; #define MLX5_MAX_RESERVED_GIDS 8 diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 7af9e09ea556..3189c7f08d17 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -64,6 +64,7 @@ enum mlx5_ib_uapi_dm_type { MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM, MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM, MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM, + MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM, }; enum mlx5_ib_uapi_devx_create_event_channel_flags { From abf8e8f29a3cb6d9c0f599d335f9ad3dcf2dcf11 Mon Sep 17 00:00:00 2001 From: Shun Hao Date: Wed, 6 Dec 2023 16:01:36 +0200 Subject: [PATCH 50/66] net/mlx5: Manage ICM type of SW encap Support allocate/deallocate the new SW encap ICM type memory. The new ICM type is used for encap context allocation managed by SW, instead FW. It can increase encap context maximum number and allocation speed Signed-off-by: Shun Hao Link: https://lore.kernel.org/r/bed5121255918eb132a1334141c76a0594df8143.1701871118.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/lib/dm.c | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c index 9482e51ac82a..7c5516b0a844 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -13,11 +13,13 @@ struct mlx5_dm { unsigned long *steering_sw_icm_alloc_blocks; unsigned long *header_modify_sw_icm_alloc_blocks; unsigned long *header_modify_pattern_sw_icm_alloc_blocks; + unsigned long *header_encap_sw_icm_alloc_blocks; }; struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) { u64 header_modify_pattern_icm_blocks = 0; + u64 header_sw_encap_icm_blocks = 0; u64 header_modify_icm_blocks = 0; u64 steering_icm_blocks = 0; struct mlx5_dm *dm; @@ -54,6 +56,17 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) goto err_modify_hdr; } + if (MLX5_CAP_DEV_MEM(dev, log_indirect_encap_sw_icm_size)) { + header_sw_encap_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_indirect_encap_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_encap_sw_icm_alloc_blocks = + bitmap_zalloc(header_sw_encap_icm_blocks, GFP_KERNEL); + if (!dm->header_encap_sw_icm_alloc_blocks) + goto err_pattern; + } + support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) && MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) && MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address); @@ -66,11 +79,14 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) dm->header_modify_pattern_sw_icm_alloc_blocks = bitmap_zalloc(header_modify_pattern_icm_blocks, GFP_KERNEL); if (!dm->header_modify_pattern_sw_icm_alloc_blocks) - goto err_pattern; + goto err_sw_encap; } return dm; +err_sw_encap: + bitmap_free(dm->header_encap_sw_icm_alloc_blocks); + err_pattern: bitmap_free(dm->header_modify_sw_icm_alloc_blocks); @@ -105,6 +121,14 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev) bitmap_free(dm->header_modify_sw_icm_alloc_blocks); } + if (dm->header_encap_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_encap_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_indirect_encap_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + bitmap_free(dm->header_encap_sw_icm_alloc_blocks); + } + if (dm->header_modify_pattern_sw_icm_alloc_blocks) { WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks, BIT(MLX5_CAP_DEV_MEM(dev, @@ -164,6 +188,13 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, log_header_modify_pattern_sw_icm_size); block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; break; + case MLX5_SW_ICM_TYPE_SW_ENCAP: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + indirect_encap_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_indirect_encap_sw_icm_size); + block_map = dm->header_encap_sw_icm_alloc_blocks; + break; default: return -EINVAL; } @@ -242,6 +273,11 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type header_modify_pattern_sw_icm_start_address); block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; break; + case MLX5_SW_ICM_TYPE_SW_ENCAP: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + indirect_encap_sw_icm_start_address); + block_map = dm->header_encap_sw_icm_alloc_blocks; + break; default: return -EINVAL; } From eb524d0fd46249b0b9e5d52372dc65d8b32430c3 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 6 Dec 2023 16:01:37 +0200 Subject: [PATCH 51/66] net/mlx5: E-Switch, expose eswitch manager vport Expose the ability the query the eswitch manager vport number. Next patch will utilize this capability to reveal the correct register C0 value to the users. Signed-off-by: Mark Bloch Link: https://lore.kernel.org/r/614fb0e216250e2ce3340471ec141b83ec45c7f4.1701871118.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 7 ------- include/linux/mlx5/eswitch.h | 8 ++++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 37ab66e7b403..60a9a6cba0b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -616,13 +616,6 @@ static inline bool mlx5_esw_allowed(const struct mlx5_eswitch *esw) return esw && MLX5_ESWITCH_MANAGER(esw->dev); } -/* The returned number is valid only when the dev is eswitch manager. */ -static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) -{ - return mlx5_core_is_ecpf_esw_manager(dev) ? - MLX5_VPORT_ECPF : MLX5_VPORT_PF; -} - static inline bool mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num) { diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 950d2431a53c..df73a2ccc9af 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -7,6 +7,7 @@ #define _MLX5_ESWITCH_ #include +#include #include #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) @@ -210,4 +211,11 @@ static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS; } +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + #endif From d727d27db536faea7178290c677cc0567f647231 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 6 Dec 2023 16:01:38 +0200 Subject: [PATCH 52/66] RDMA/mlx5: Expose register c0 for RDMA device This patch introduces improvements for matching egress traffic sent by the local device. When applicable, all egress traffic from the local vport is now tagged with the provided value. This enhancement is particularly useful for FDB steering purposes. The primary focus of this update is facilitating the transmission of traffic from the hypervisor to a VF. To achieve this, one must initiate an SQ on the hypervisor and subsequently create a rule in the FDB that matches on the eswitch manager vport and the SQN of the aforementioned SQ. Obtaining the SQN can be had from SQ opened, and the eswitch manager vport match can be substituted with the register c0 value exposed by this patch. Signed-off-by: Mark Bloch Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/aa4120a91c98ff1c44f1213388c744d4cb0324d6.1701871118.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 24 ++++++++++++++++++++++++ include/uapi/rdma/mlx5-abi.h | 2 ++ 2 files changed, 26 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 650a15b6cfbc..c2b557e64290 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -818,6 +818,17 @@ static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) MLX5_REG_NODE_DESC, 0, 0); } +static void fill_esw_mgr_reg_c0(struct mlx5_core_dev *mdev, + struct mlx5_ib_query_device_resp *resp) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + u16 vport = mlx5_eswitch_manager_vport(mdev); + + resp->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(esw, + vport); + resp->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask(); +} + static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) @@ -1209,6 +1220,19 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_CAP_GEN(mdev, log_max_dci_errored_streams); } + if (offsetofend(typeof(resp), reserved) <= uhw_outlen) + resp.response_length += sizeof(resp.reserved); + + if (offsetofend(typeof(resp), reg_c0) <= uhw_outlen) { + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + resp.response_length += sizeof(resp.reg_c0); + + if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS && + mlx5_eswitch_vport_match_metadata_enabled(esw)) + fill_esw_mgr_reg_c0(mdev, &resp); + } + if (uhw_outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index a96b7d2770e1..d4f6a36dffb0 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -37,6 +37,7 @@ #include #include /* For ETH_ALEN. */ #include +#include enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, @@ -275,6 +276,7 @@ struct mlx5_ib_query_device_resp { __u32 tunnel_offloads_caps; /* enum mlx5_ib_tunnel_offloads */ struct mlx5_ib_dci_streams_caps dci_streams_caps; __u16 reserved; + struct mlx5_ib_uapi_reg reg_c0; }; enum mlx5_ib_create_cq_flags { From 4f973e211b3b1c6d36f7c6a19239d258856749f9 Mon Sep 17 00:00:00 2001 From: Daniel Vacek Date: Tue, 12 Dec 2023 09:07:45 +0100 Subject: [PATCH 53/66] IB/ipoib: Fix mcast list locking Releasing the `priv->lock` while iterating the `priv->multicast_list` in `ipoib_mcast_join_task()` opens a window for `ipoib_mcast_dev_flush()` to remove the items while in the middle of iteration. If the mcast is removed while the lock was dropped, the for loop spins forever resulting in a hard lockup (as was reported on RHEL 4.18.0-372.75.1.el8_6 kernel): Task A (kworker/u72:2 below) | Task B (kworker/u72:0 below) -----------------------------------+----------------------------------- ipoib_mcast_join_task(work) | ipoib_ib_dev_flush_light(work) spin_lock_irq(&priv->lock) | __ipoib_ib_dev_flush(priv, ...) list_for_each_entry(mcast, | ipoib_mcast_dev_flush(dev = priv->dev) &priv->multicast_list, list) | ipoib_mcast_join(dev, mcast) | spin_unlock_irq(&priv->lock) | | spin_lock_irqsave(&priv->lock, flags) | list_for_each_entry_safe(mcast, tmcast, | &priv->multicast_list, list) | list_del(&mcast->list); | list_add_tail(&mcast->list, &remove_list) | spin_unlock_irqrestore(&priv->lock, flags) spin_lock_irq(&priv->lock) | | ipoib_mcast_remove_list(&remove_list) (Here, `mcast` is no longer on the | list_for_each_entry_safe(mcast, tmcast, `priv->multicast_list` and we keep | remove_list, list) spinning on the `remove_list` of | >>> wait_for_completion(&mcast->done) the other thread which is blocked | and the list is still valid on | it's stack.) Fix this by keeping the lock held and changing to GFP_ATOMIC to prevent eventual sleeps. Unfortunately we could not reproduce the lockup and confirm this fix but based on the code review I think this fix should address such lockups. crash> bc 31 PID: 747 TASK: ff1c6a1a007e8000 CPU: 31 COMMAND: "kworker/u72:2" -- [exception RIP: ipoib_mcast_join_task+0x1b1] RIP: ffffffffc0944ac1 RSP: ff646f199a8c7e00 RFLAGS: 00000002 RAX: 0000000000000000 RBX: ff1c6a1a04dc82f8 RCX: 0000000000000000 work (&priv->mcast_task{,.work}) RDX: ff1c6a192d60ac68 RSI: 0000000000000286 RDI: ff1c6a1a04dc8000 &mcast->list RBP: ff646f199a8c7e90 R8: ff1c699980019420 R9: ff1c6a1920c9a000 R10: ff646f199a8c7e00 R11: ff1c6a191a7d9800 R12: ff1c6a192d60ac00 mcast R13: ff1c6a1d82200000 R14: ff1c6a1a04dc8000 R15: ff1c6a1a04dc82d8 dev priv (&priv->lock) &priv->multicast_list (aka head) ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 --- --- #5 [ff646f199a8c7e00] ipoib_mcast_join_task+0x1b1 at ffffffffc0944ac1 [ib_ipoib] #6 [ff646f199a8c7e98] process_one_work+0x1a7 at ffffffff9bf10967 crash> rx ff646f199a8c7e68 ff646f199a8c7e68: ff1c6a1a04dc82f8 <<< work = &priv->mcast_task.work crash> list -hO ipoib_dev_priv.multicast_list ff1c6a1a04dc8000 (empty) crash> ipoib_dev_priv.mcast_task.work.func,mcast_mutex.owner.counter ff1c6a1a04dc8000 mcast_task.work.func = 0xffffffffc0944910 , mcast_mutex.owner.counter = 0xff1c69998efec000 crash> b 8 PID: 8 TASK: ff1c69998efec000 CPU: 33 COMMAND: "kworker/u72:0" -- #3 [ff646f1980153d50] wait_for_completion+0x96 at ffffffff9c7d7646 #4 [ff646f1980153d90] ipoib_mcast_remove_list+0x56 at ffffffffc0944dc6 [ib_ipoib] #5 [ff646f1980153de8] ipoib_mcast_dev_flush+0x1a7 at ffffffffc09455a7 [ib_ipoib] #6 [ff646f1980153e58] __ipoib_ib_dev_flush+0x1a4 at ffffffffc09431a4 [ib_ipoib] #7 [ff646f1980153e98] process_one_work+0x1a7 at ffffffff9bf10967 crash> rx ff646f1980153e68 ff646f1980153e68: ff1c6a1a04dc83f0 <<< work = &priv->flush_light crash> ipoib_dev_priv.flush_light.func,broadcast ff1c6a1a04dc8000 flush_light.func = 0xffffffffc0943820 , broadcast = 0x0, The mcast(s) on the `remove_list` (the remaining part of the ex `priv->multicast_list`): crash> list -s ipoib_mcast.done.done ipoib_mcast.list -H ff646f1980153e10 | paste - - ff1c6a192bd0c200 done.done = 0x0, ff1c6a192d60ac00 done.done = 0x0, Reported-by: Yuya Fujita-bishamonten Signed-off-by: Daniel Vacek Link: https://lore.kernel.org/all/20231212080746.1528802-1-neelx@redhat.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 9e6967a40042..319d4288eddd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -531,21 +531,17 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) rec.join_state = SENDONLY_FULLMEMBER_JOIN; } - spin_unlock_irq(&priv->lock); multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, - &rec, comp_mask, GFP_KERNEL, + &rec, comp_mask, GFP_ATOMIC, ipoib_mcast_join_complete, mcast); - spin_lock_irq(&priv->lock); if (IS_ERR(multicast)) { ret = PTR_ERR(multicast); ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); /* Requeue this join task with a backoff delay */ __ipoib_mcast_schedule_join_thread(priv, mcast, 1); clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - spin_unlock_irq(&priv->lock); complete(&mcast->done); - spin_lock_irq(&priv->lock); return ret; } return 0; From 9b0a7a2cb87d9c430a3588d7d2b6e471200b86ad Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 13 Dec 2023 22:31:23 -0800 Subject: [PATCH 54/66] RDMA/bnxt_re: Add UAPI to share a page with user space Gen P7 adapters require to share a toggle value for CQ and SRQ. This is received by the driver as part of interrupt notifications and needs to be shared with the user space. Add a new UAPI infrastructure to get the shared page for CQ and SRQ. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1702535484-26844-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 105 +++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/ib_verbs.h | 1 + include/uapi/rdma/bnxt_re-abi.h | 26 ++++++ 3 files changed, 132 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e7ef099c3edd..758ea02e1d13 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -567,6 +567,7 @@ bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset, case BNXT_RE_MMAP_WC_DB: case BNXT_RE_MMAP_DBR_BAR: case BNXT_RE_MMAP_DBR_PAGE: + case BNXT_RE_MMAP_TOGGLE_PAGE: ret = rdma_user_mmap_entry_insert(&uctx->ib_uctx, &entry->rdma_entry, PAGE_SIZE); break; @@ -4254,6 +4255,7 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) rdma_entry); break; case BNXT_RE_MMAP_DBR_PAGE: + case BNXT_RE_MMAP_TOGGLE_PAGE: /* Driver doesn't expect write access for user space */ if (vma->vm_flags & VM_WRITE) return -EFAULT; @@ -4430,8 +4432,111 @@ DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_NOTIFY_DRV); DECLARE_UVERBS_GLOBAL_METHODS(BNXT_RE_OBJECT_NOTIFY_DRV, &UVERBS_METHOD(BNXT_RE_METHOD_NOTIFY_DRV)); +/* Toggle MEM */ +static int UVERBS_HANDLER(BNXT_RE_METHOD_GET_TOGGLE_MEM)(struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_TOGGLE_MEM_HANDLE); + enum bnxt_re_mmap_flag mmap_flag = BNXT_RE_MMAP_TOGGLE_PAGE; + enum bnxt_re_get_toggle_mem_type res_type; + struct bnxt_re_user_mmap_entry *entry; + struct bnxt_re_ucontext *uctx; + struct ib_ucontext *ib_uctx; + struct bnxt_re_dev *rdev; + u64 mem_offset; + u64 addr = 0; + u32 length; + u32 offset; + int err; + + ib_uctx = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ib_uctx)) + return PTR_ERR(ib_uctx); + + err = uverbs_get_const(&res_type, attrs, BNXT_RE_TOGGLE_MEM_TYPE); + if (err) + return err; + + uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx); + rdev = uctx->rdev; + + switch (res_type) { + case BNXT_RE_CQ_TOGGLE_MEM: + case BNXT_RE_SRQ_TOGGLE_MEM: + break; + + default: + return -EOPNOTSUPP; + } + + entry = bnxt_re_mmap_entry_insert(uctx, addr, mmap_flag, &mem_offset); + if (!entry) + return -ENOMEM; + + uobj->object = entry; + uverbs_finalize_uobj_create(attrs, BNXT_RE_TOGGLE_MEM_HANDLE); + err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_PAGE, + &mem_offset, sizeof(mem_offset)); + if (err) + return err; + + err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_LENGTH, + &length, sizeof(length)); + if (err) + return err; + + err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_OFFSET, + &offset, sizeof(length)); + if (err) + return err; + + return 0; +} + +static int get_toggle_mem_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct bnxt_re_user_mmap_entry *entry = uobject->object; + + rdma_user_mmap_entry_remove(&entry->rdma_entry); + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_GET_TOGGLE_MEM, + UVERBS_ATTR_IDR(BNXT_RE_TOGGLE_MEM_HANDLE, + BNXT_RE_OBJECT_GET_TOGGLE_MEM, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(BNXT_RE_TOGGLE_MEM_TYPE, + enum bnxt_re_get_toggle_mem_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(BNXT_RE_TOGGLE_MEM_RES_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_PAGE, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_OFFSET, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_LENGTH, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY(BNXT_RE_METHOD_RELEASE_TOGGLE_MEM, + UVERBS_ATTR_IDR(BNXT_RE_RELEASE_TOGGLE_MEM_HANDLE, + BNXT_RE_OBJECT_GET_TOGGLE_MEM, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_GET_TOGGLE_MEM, + UVERBS_TYPE_ALLOC_IDR(get_toggle_mem_obj_cleanup), + &UVERBS_METHOD(BNXT_RE_METHOD_GET_TOGGLE_MEM), + &UVERBS_METHOD(BNXT_RE_METHOD_RELEASE_TOGGLE_MEM)); + const struct uapi_definition bnxt_re_uapi_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_ALLOC_PAGE), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_NOTIFY_DRV), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_GET_TOGGLE_MEM), {} }; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 98baea98fc17..da3fe018f255 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -149,6 +149,7 @@ enum bnxt_re_mmap_flag { BNXT_RE_MMAP_WC_DB, BNXT_RE_MMAP_DBR_PAGE, BNXT_RE_MMAP_DBR_BAR, + BNXT_RE_MMAP_TOGGLE_PAGE, }; struct bnxt_re_user_mmap_entry { diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index 3342276aeac1..9b9eb10cb5e5 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -143,6 +143,7 @@ enum bnxt_re_shpg_offt { enum bnxt_re_objects { BNXT_RE_OBJECT_ALLOC_PAGE = (1U << UVERBS_ID_NS_SHIFT), BNXT_RE_OBJECT_NOTIFY_DRV, + BNXT_RE_OBJECT_GET_TOGGLE_MEM, }; enum bnxt_re_alloc_page_type { @@ -171,4 +172,29 @@ enum bnxt_re_alloc_page_methods { enum bnxt_re_notify_drv_methods { BNXT_RE_METHOD_NOTIFY_DRV = (1U << UVERBS_ID_NS_SHIFT), }; + +/* Toggle mem */ + +enum bnxt_re_get_toggle_mem_type { + BNXT_RE_CQ_TOGGLE_MEM = 0, + BNXT_RE_SRQ_TOGGLE_MEM, +}; + +enum bnxt_re_var_toggle_mem_attrs { + BNXT_RE_TOGGLE_MEM_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + BNXT_RE_TOGGLE_MEM_TYPE, + BNXT_RE_TOGGLE_MEM_RES_ID, + BNXT_RE_TOGGLE_MEM_MMAP_PAGE, + BNXT_RE_TOGGLE_MEM_MMAP_OFFSET, + BNXT_RE_TOGGLE_MEM_MMAP_LENGTH, +}; + +enum bnxt_re_toggle_mem_attrs { + BNXT_RE_RELEASE_TOGGLE_MEM_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum bnxt_re_toggle_mem_methods { + BNXT_RE_METHOD_GET_TOGGLE_MEM = (1U << UVERBS_ID_NS_SHIFT), + BNXT_RE_METHOD_RELEASE_TOGGLE_MEM, +}; #endif /* __BNXT_RE_UVERBS_ABI_H__*/ From e275919d96693c5ca964b20d73a33d52a7e57f04 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 13 Dec 2023 22:31:24 -0800 Subject: [PATCH 55/66] RDMA/bnxt_re: Share a page to expose per CQ info with userspace Gen P7 adapters needs to share a toggle bits information received in kernel driver with the user space. User space needs this info during the request notify call back to arm the CQ. User space application can get this page using the UAPI routines. Library will mmap this page and get the toggle bits to be used in the next ARM Doorbell. Uses a hash list to map the CQ structure from the CQ ID. CQ structure is retrieved from the hash list while the library calls the UAPI routine to get the toggle page mapping. Currently the full page is mapped per CQ. This can be optimized to enable multiple CQs from the same application share the same page and different offsets in the page. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1702535484-26844-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 3 ++ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 59 ++++++++++++++++++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 2 + drivers/infiniband/hw/bnxt_re/main.c | 10 +++- drivers/infiniband/hw/bnxt_re/qplib_res.h | 6 +++ include/uapi/rdma/bnxt_re-abi.h | 5 ++ 6 files changed, 78 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 9fd9849ebdd1..9dca451ed522 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -41,6 +41,7 @@ #define __BNXT_RE_H__ #include #include "hw_counters.h" +#include #define ROCE_DRV_MODULE_NAME "bnxt_re" #define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver" @@ -135,6 +136,7 @@ struct bnxt_re_pacing { #define BNXT_RE_DB_FIFO_ROOM_SHIFT 15 #define BNXT_RE_GRC_FIFO_REG_BASE 0x2000 +#define MAX_CQ_HASH_BITS (16) struct bnxt_re_dev { struct ib_device ibdev; struct list_head list; @@ -189,6 +191,7 @@ struct bnxt_re_dev { struct bnxt_re_pacing pacing; struct work_struct dbq_fifo_check_work; struct delayed_work dbq_pacing_work; + DECLARE_HASHTABLE(cq_hash, MAX_CQ_HASH_BITS); }; #define to_bnxt_re_dev(ptr, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 758ea02e1d13..7213dc7574d0 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "bnxt_ulp.h" @@ -2910,14 +2911,20 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, /* Completion Queues */ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { - struct bnxt_re_cq *cq; + struct bnxt_qplib_chip_ctx *cctx; struct bnxt_qplib_nq *nq; struct bnxt_re_dev *rdev; + struct bnxt_re_cq *cq; cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); rdev = cq->rdev; nq = cq->qplib_cq.nq; + cctx = rdev->chip_ctx; + if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) { + free_page((unsigned long)cq->uctx_cq_page); + hash_del(&cq->hash_entry); + } bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); ib_umem_release(cq->umem); @@ -2935,10 +2942,11 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; - int rc, entries; - int cqe = attr->cqe; + struct bnxt_qplib_chip_ctx *cctx; struct bnxt_qplib_nq *nq = NULL; + int rc = -ENOMEM, entries; unsigned int nq_alloc_cnt; + int cqe = attr->cqe; u32 active_cqs; if (attr->flags) @@ -2951,6 +2959,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } cq->rdev = rdev; + cctx = rdev->chip_ctx; cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq); entries = bnxt_re_init_depth(cqe + 1, uctx); @@ -3012,22 +3021,32 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, spin_lock_init(&cq->cq_lock); if (udata) { - struct bnxt_re_cq_resp resp; + struct bnxt_re_cq_resp resp = {}; + if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) { + hash_add(rdev->cq_hash, &cq->hash_entry, cq->qplib_cq.id); + /* Allocate a page */ + cq->uctx_cq_page = (void *)get_zeroed_page(GFP_KERNEL); + if (!cq->uctx_cq_page) + goto c2fail; + resp.comp_mask |= BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT; + } resp.cqid = cq->qplib_cq.id; resp.tail = cq->qplib_cq.hwq.cons; resp.phase = cq->qplib_cq.period; resp.rsvd = 0; - rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); + rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (rc) { ibdev_err(&rdev->ibdev, "Failed to copy CQ udata"); bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); - goto c2fail; + goto free_mem; } } return 0; +free_mem: + free_page((unsigned long)cq->uctx_cq_page); c2fail: ib_umem_release(cq->umem); fail: @@ -4214,6 +4233,19 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) } } +static struct bnxt_re_cq *bnxt_re_search_for_cq(struct bnxt_re_dev *rdev, u32 cq_id) +{ + struct bnxt_re_cq *cq = NULL, *tmp_cq; + + hash_for_each_possible(rdev->cq_hash, tmp_cq, hash_entry, cq_id) { + if (tmp_cq->qplib_cq.id == cq_id) { + cq = tmp_cq; + break; + } + } + return cq; +} + /* Helper function to mmap the virtual memory from user app */ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) { @@ -4442,10 +4474,12 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_GET_TOGGLE_MEM)(struct uverbs_attr_bund struct bnxt_re_ucontext *uctx; struct ib_ucontext *ib_uctx; struct bnxt_re_dev *rdev; + struct bnxt_re_cq *cq; u64 mem_offset; u64 addr = 0; u32 length; u32 offset; + u32 cq_id; int err; ib_uctx = ib_uverbs_get_ucontext(attrs); @@ -4461,6 +4495,19 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_GET_TOGGLE_MEM)(struct uverbs_attr_bund switch (res_type) { case BNXT_RE_CQ_TOGGLE_MEM: + err = uverbs_copy_from(&cq_id, attrs, BNXT_RE_TOGGLE_MEM_RES_ID); + if (err) + return err; + + cq = bnxt_re_search_for_cq(rdev, cq_id); + if (!cq) + return -EINVAL; + + length = PAGE_SIZE; + addr = (u64)cq->uctx_cq_page; + mmap_flag = BNXT_RE_MMAP_TOGGLE_PAGE; + offset = 0; + break; case BNXT_RE_SRQ_TOGGLE_MEM: break; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index da3fe018f255..b267d6d5975f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -108,6 +108,8 @@ struct bnxt_re_cq { struct ib_umem *umem; struct ib_umem *resize_umem; int resize_cqe; + void *uctx_cq_page; + struct hlist_node hash_entry; }; struct bnxt_re_mr { diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 7f4f6db1392c..eb03ebad2e5a 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "bnxt_ulp.h" #include "roce_hsi.h" @@ -136,6 +137,8 @@ static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) if (bnxt_re_hwrm_qcaps(rdev)) dev_err(rdev_to_dev(rdev), "Failed to query hwrm qcaps\n"); + if (bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx)) + cctx->modes.toggle_bits |= BNXT_QPLIB_CQ_TOGGLE_BIT; } static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) @@ -1206,9 +1209,13 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, { struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq, qplib_cq); + u32 *cq_ptr; if (cq->ib_cq.comp_handler) { - /* Lock comp_handler? */ + if (cq->uctx_cq_page) { + cq_ptr = (u32 *)cq->uctx_cq_page; + *cq_ptr = cq->qplib_cq.toggle; + } (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context); } @@ -1730,6 +1737,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) */ bnxt_re_vf_res_config(rdev); } + hash_init(rdev->cq_hash); return 0; free_sctx: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 382d89fa7d16..61628f7f1253 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -55,6 +55,12 @@ struct bnxt_qplib_drv_modes { u8 wqe_mode; bool db_push; bool dbr_pacing; + u32 toggle_bits; +}; + +enum bnxt_re_toggle_modes { + BNXT_QPLIB_CQ_TOGGLE_BIT = 0x1, + BNXT_QPLIB_SRQ_TOGGLE_BIT = 0x2, }; struct bnxt_qplib_chip_ctx { diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index 9b9eb10cb5e5..c0c34aca90ec 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -102,11 +102,16 @@ struct bnxt_re_cq_req { __aligned_u64 cq_handle; }; +enum bnxt_re_cq_mask { + BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT = 0x1, +}; + struct bnxt_re_cq_resp { __u32 cqid; __u32 tail; __u32 phase; __u32 rsvd; + __aligned_u64 comp_mask; }; struct bnxt_re_resize_cq_req { From 9248f363d0791a548a9c7711365b8be4c70bd375 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 19 Dec 2023 04:11:40 -0800 Subject: [PATCH 56/66] RDMA/bnxt_re: Fix the offset for GenP7 adapters for user applications User Doorbell page indexes start at an offset for GenP7 adapters. Fix the offset that will be used for user doorbell page indexes. Fixes: a62d68581441 ("RDMA/bnxt_re: Update the BAR offsets") Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1702987900-5363-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index eb03ebad2e5a..f022c922fae5 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -108,12 +108,14 @@ static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) dev_info(rdev_to_dev(rdev), "Couldn't get DB bar size, Low latency framework is disabled\n"); /* set register offsets for both UC and WC */ - if (bnxt_qplib_is_chip_gen_p7(cctx)) + if (bnxt_qplib_is_chip_gen_p7(cctx)) { res->dpi_tbl.ucreg.offset = offset; - else + res->dpi_tbl.wcreg.offset = en_dev->l2_db_size; + } else { res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : BNXT_QPLIB_DBR_PF_DB_OFFSET; - res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; + res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; + } /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size * is equal to the DB-Bar actual size. This indicates that L2 From 82a8903a9f9f3ff31027b9a0b92f7505f981f09c Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 19 Dec 2023 20:31:57 -0800 Subject: [PATCH 57/66] RDMA/bnxt_re: Fix the sparse warnings Fix the following warnings reported drivers/infiniband/hw/bnxt_re/qplib_rcfw.c:909:27: warning: invalid assignment: |= drivers/infiniband/hw/bnxt_re/qplib_rcfw.c:909:27: left side has type restricted __le16 drivers/infiniband/hw/bnxt_re/qplib_rcfw.c:909:27: right side has type unsigned long ... drivers/infiniband/hw/bnxt_re/qplib_fp.c:1620:44: warning: invalid assignment: |= drivers/infiniband/hw/bnxt_re/qplib_fp.c:1620:44: left side has type restricted __le64 drivers/infiniband/hw/bnxt_re/qplib_fp.c:1620:44: right side has type unsigned long long Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312200537.HoNqPL5L-lkp@intel.com/ Fixes: 07f830ae4913 ("RDMA/bnxt_re: Adds MSN table capability for Gen P7 adapters") Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1703046717-8914-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 2 +- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 967c6691e413..7fd4506b3584 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -632,7 +632,7 @@ static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max) } /* MSN table update inlin */ -static inline uint64_t bnxt_re_update_msn_tbl(u32 st_idx, u32 npsn, u32 start_psn) +static inline __le64 bnxt_re_update_msn_tbl(u32 st_idx, u32 npsn, u32 start_psn) { return cpu_to_le64((((u64)(st_idx) << SQ_MSN_SEARCH_START_IDX_SFT) & SQ_MSN_SEARCH_START_IDX_MASK) | diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 0ea7ccc70679..3ffaef0c2651 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -906,7 +906,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, skip_ctx_setup: if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags)) - req.flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED; + req.flags |= cpu_to_le16(CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED); req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id); bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); From a7f0636d223ca9d074eb5b4ae71425e082c5c1e1 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 15 Dec 2023 18:04:13 -0800 Subject: [PATCH 58/66] RDMA/mana_ib: register RDMA device with GDMA Software client needs to register with the RDMA management interface on the SoC to access more features, including querying device capabilities and RC queue pair. Signed-off-by: Long Li Link: https://lore.kernel.org/r/1702692255-23640-2-git-send-email-longli@linuxonhyperv.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/device.c | 24 +++++++++++++++---- drivers/infiniband/hw/mana/main.c | 4 ++-- drivers/infiniband/hw/mana/qp.c | 15 ++++++------ .../net/ethernet/microsoft/mana/gdma_main.c | 5 ++++ include/net/mana/gdma.h | 4 ++++ 5 files changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index d4541b8707e4..fe025e13a45c 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -68,7 +68,6 @@ static int mana_ib_probe(struct auxiliary_device *adev, ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev, mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt); - dev->gdma_dev = mdev; dev->ib_dev.node_type = RDMA_NODE_IB_CA; /* @@ -78,16 +77,28 @@ static int mana_ib_probe(struct auxiliary_device *adev, dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = mdev->gdma_context->dev; + ret = mana_gd_register_device(&mdev->gdma_context->mana_ib); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to register device, ret %d", + ret); + goto free_ib_device; + } + dev->gdma_dev = &mdev->gdma_context->mana_ib; + ret = ib_register_device(&dev->ib_dev, "mana_%d", mdev->gdma_context->dev); - if (ret) { - ib_dealloc_device(&dev->ib_dev); - return ret; - } + if (ret) + goto deregister_device; dev_set_drvdata(&adev->dev, dev); return 0; + +deregister_device: + mana_gd_deregister_device(dev->gdma_dev); +free_ib_device: + ib_dealloc_device(&dev->ib_dev); + return ret; } static void mana_ib_remove(struct auxiliary_device *adev) @@ -95,6 +106,9 @@ static void mana_ib_remove(struct auxiliary_device *adev) struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev); ib_unregister_device(&dev->ib_dev); + + mana_gd_deregister_device(dev->gdma_dev); + ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 7be4c3adb4e2..53730306ed9b 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -8,7 +8,7 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, u32 port) { - struct gdma_dev *gd = dev->gdma_dev; + struct gdma_dev *gd = &dev->gdma_dev->gdma_context->mana; struct mana_port_context *mpc; struct net_device *ndev; struct mana_context *mc; @@ -31,7 +31,7 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd, u32 doorbell_id) { - struct gdma_dev *mdev = dev->gdma_dev; + struct gdma_dev *mdev = &dev->gdma_dev->gdma_context->mana; struct mana_port_context *mpc; struct mana_context *mc; struct net_device *ndev; diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 4b3b5b274e84..ae45d28eef5e 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -21,8 +21,8 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, u32 req_buf_size; int i, err; - mdev = dev->gdma_dev; - gc = mdev->gdma_context; + gc = dev->gdma_dev->gdma_context; + mdev = &gc->mana; req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE; @@ -102,20 +102,21 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl; struct mana_ib_create_qp_rss_resp resp = {}; struct mana_ib_create_qp_rss ucmd = {}; - struct gdma_dev *gd = mdev->gdma_dev; mana_handle_t *mana_ind_table; struct mana_port_context *mpc; + unsigned int ind_tbl_size; struct mana_context *mc; struct net_device *ndev; struct mana_ib_cq *cq; struct mana_ib_wq *wq; - unsigned int ind_tbl_size; + struct gdma_dev *gd; struct ib_cq *ibcq; struct ib_wq *ibwq; int i = 0; u32 port; int ret; + gd = &mdev->gdma_dev->gdma_context->mana; mc = gd->driver_data; if (!udata || udata->inlen < sizeof(ucmd)) @@ -266,8 +267,8 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, struct mana_ib_ucontext *mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, ibucontext); + struct gdma_dev *gd = &mdev->gdma_dev->gdma_context->mana; struct mana_ib_create_qp_resp resp = {}; - struct gdma_dev *gd = mdev->gdma_dev; struct mana_ib_create_qp ucmd = {}; struct mana_obj_spec wq_spec = {}; struct mana_obj_spec cq_spec = {}; @@ -437,7 +438,7 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp, { struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); - struct gdma_dev *gd = mdev->gdma_dev; + struct gdma_dev *gd = &mdev->gdma_dev->gdma_context->mana; struct mana_port_context *mpc; struct mana_context *mc; struct net_device *ndev; @@ -464,7 +465,7 @@ static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata) { struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); - struct gdma_dev *gd = mdev->gdma_dev; + struct gdma_dev *gd = &mdev->gdma_dev->gdma_context->mana; struct ib_pd *ibpd = qp->ibqp.pd; struct mana_port_context *mpc; struct mana_context *mc; diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 6367de0c2c2e..e6e71e3c357c 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -158,6 +158,9 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) if (dev_type == GDMA_DEVICE_MANA) { gc->mana.gdma_context = gc; gc->mana.dev_id = dev; + } else if (dev_type == GDMA_DEVICE_MANA_IB) { + gc->mana_ib.dev_id = dev; + gc->mana_ib.gdma_context = gc; } } @@ -971,6 +974,7 @@ int mana_gd_register_device(struct gdma_dev *gd) return 0; } +EXPORT_SYMBOL_NS(mana_gd_register_device, NET_MANA); int mana_gd_deregister_device(struct gdma_dev *gd) { @@ -1001,6 +1005,7 @@ int mana_gd_deregister_device(struct gdma_dev *gd) return err; } +EXPORT_SYMBOL_NS(mana_gd_deregister_device, NET_MANA); u32 mana_gd_wq_avail_space(struct gdma_queue *wq) { diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 88b6ef7ce1a6..000f0d7670f7 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -66,6 +66,7 @@ enum { GDMA_DEVICE_NONE = 0, GDMA_DEVICE_HWC = 1, GDMA_DEVICE_MANA = 2, + GDMA_DEVICE_MANA_IB = 3, }; struct gdma_resource { @@ -387,6 +388,9 @@ struct gdma_context { /* Azure network adapter */ struct gdma_dev mana; + + /* Azure RDMA adapter */ + struct gdma_dev mana_ib; }; #define MAX_NUM_GDMA_DEVICES 4 From 2c20e20b22d9fc64072e3445ae3ca244cbd523a2 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 15 Dec 2023 18:04:14 -0800 Subject: [PATCH 59/66] RDMA/mana_ib: query device capabilities With RDMA device registered, use it to query on hardware capabilities and cache this information for future query requests to the driver. Signed-off-by: Long Li Link: https://lore.kernel.org/r/1702692255-23640-3-git-send-email-longli@linuxonhyperv.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/cq.c | 2 +- drivers/infiniband/hw/mana/device.c | 7 ++++ drivers/infiniband/hw/mana/main.c | 63 ++++++++++++++++++++++------ drivers/infiniband/hw/mana/mana_ib.h | 50 ++++++++++++++++++++++ drivers/infiniband/hw/mana/qp.c | 4 +- include/net/mana/gdma.h | 1 + 6 files changed, 112 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index d141cab8a1e6..09a2c263e39b 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -26,7 +26,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return err; } - if (attr->cqe > MAX_SEND_BUFFERS_PER_QUEUE) { + if (attr->cqe > mdev->adapter_caps.max_qp_wr) { ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); return -EINVAL; } diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index fe025e13a45c..6fa902ee80a6 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -85,6 +85,13 @@ static int mana_ib_probe(struct auxiliary_device *adev, } dev->gdma_dev = &mdev->gdma_context->mana_ib; + ret = mana_ib_gd_query_adapter_caps(dev); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", + ret); + goto deregister_device; + } + ret = ib_register_device(&dev->ib_dev, "mana_%d", mdev->gdma_context->dev); if (ret) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 53730306ed9b..faca092456fa 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -486,20 +486,17 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { - props->max_qp = MANA_MAX_NUM_QUEUES; - props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE; - - /* - * max_cqe could be potentially much bigger. - * As this version of driver only support RAW QP, set it to the same - * value as max_qp_wr - */ - props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE; + struct mana_ib_dev *dev = container_of(ibdev, + struct mana_ib_dev, ib_dev); + props->max_qp = dev->adapter_caps.max_qp_count; + props->max_qp_wr = dev->adapter_caps.max_qp_wr; + props->max_cq = dev->adapter_caps.max_cq_count; + props->max_cqe = dev->adapter_caps.max_qp_wr; + props->max_mr = dev->adapter_caps.max_mr_count; props->max_mr_size = MANA_IB_MAX_MR_SIZE; - props->max_mr = MANA_IB_MAX_MR; - props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES; - props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES; + props->max_send_sge = dev->adapter_caps.max_send_sge_count; + props->max_recv_sge = dev->adapter_caps.max_recv_sge_count; return 0; } @@ -521,3 +518,45 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { } + +int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) +{ + struct mana_ib_adapter_caps *caps = &dev->adapter_caps; + struct mana_ib_query_adapter_caps_resp resp = {}; + struct mana_ib_query_adapter_caps_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req), + sizeof(resp)); + req.hdr.resp.msg_version = GDMA_MESSAGE_V3; + req.hdr.dev_id = dev->gdma_dev->dev_id; + + err = mana_gd_send_request(dev->gdma_dev->gdma_context, sizeof(req), + &req, sizeof(resp), &resp); + + if (err) { + ibdev_err(&dev->ib_dev, + "Failed to query adapter caps err %d", err); + return err; + } + + caps->max_sq_id = resp.max_sq_id; + caps->max_rq_id = resp.max_rq_id; + caps->max_cq_id = resp.max_cq_id; + caps->max_qp_count = resp.max_qp_count; + caps->max_cq_count = resp.max_cq_count; + caps->max_mr_count = resp.max_mr_count; + caps->max_pd_count = resp.max_pd_count; + caps->max_inbound_read_limit = resp.max_inbound_read_limit; + caps->max_outbound_read_limit = resp.max_outbound_read_limit; + caps->mw_count = resp.mw_count; + caps->max_srq_count = resp.max_srq_count; + caps->max_qp_wr = min_t(u32, + resp.max_requester_sq_size / GDMA_MAX_SQE_SIZE, + resp.max_requester_rq_size / GDMA_MAX_RQE_SIZE); + caps->max_inline_data_size = resp.max_inline_data_size; + caps->max_send_sge_count = resp.max_send_sge_count; + caps->max_recv_sge_count = resp.max_recv_sge_count; + + return 0; +} diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index 502cc8672eef..3329eaacc94e 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -27,9 +27,28 @@ */ #define MANA_IB_MAX_MR 0xFFFFFFu +struct mana_ib_adapter_caps { + u32 max_sq_id; + u32 max_rq_id; + u32 max_cq_id; + u32 max_qp_count; + u32 max_cq_count; + u32 max_mr_count; + u32 max_pd_count; + u32 max_inbound_read_limit; + u32 max_outbound_read_limit; + u32 mw_count; + u32 max_srq_count; + u32 max_qp_wr; + u32 max_send_sge_count; + u32 max_recv_sge_count; + u32 max_inline_data_size; +}; + struct mana_ib_dev { struct ib_device ib_dev; struct gdma_dev *gdma_dev; + struct mana_ib_adapter_caps adapter_caps; }; struct mana_ib_wq { @@ -92,6 +111,36 @@ struct mana_ib_rwq_ind_table { struct ib_rwq_ind_table ib_ind_table; }; +enum mana_ib_command_code { + MANA_IB_GET_ADAPTER_CAP = 0x30001, +}; + +struct mana_ib_query_adapter_caps_req { + struct gdma_req_hdr hdr; +}; /*HW Data */ + +struct mana_ib_query_adapter_caps_resp { + struct gdma_resp_hdr hdr; + u32 max_sq_id; + u32 max_rq_id; + u32 max_cq_id; + u32 max_qp_count; + u32 max_cq_count; + u32 max_mr_count; + u32 max_pd_count; + u32 max_inbound_read_limit; + u32 max_outbound_read_limit; + u32 mw_count; + u32 max_srq_count; + u32 max_requester_sq_size; + u32 max_responder_sq_size; + u32 max_requester_rq_size; + u32 max_responder_rq_size; + u32 max_send_sge_count; + u32 max_recv_sge_count; + u32 max_inline_data_size; +}; /* HW Data */ + int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, mana_handle_t *gdma_region); @@ -159,4 +208,5 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext); +int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev); #endif diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index ae45d28eef5e..4667b18ec1dd 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -130,7 +130,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, return ret; } - if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) { + if (attr->cap.max_recv_wr > mdev->adapter_caps.max_qp_wr) { ibdev_dbg(&mdev->ib_dev, "Requested max_recv_wr %d exceeding limit\n", attr->cap.max_recv_wr); @@ -296,7 +296,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, if (port < 1 || port > mc->num_ports) return -EINVAL; - if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) { + if (attr->cap.max_send_wr > mdev->adapter_caps.max_qp_wr) { ibdev_dbg(&mdev->ib_dev, "Requested max_send_wr %d exceeding limit\n", attr->cap.max_send_wr); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 000f0d7670f7..797971e2d5a5 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -150,6 +150,7 @@ struct gdma_general_req { #define GDMA_MESSAGE_V1 1 #define GDMA_MESSAGE_V2 2 +#define GDMA_MESSAGE_V3 3 struct gdma_general_resp { struct gdma_resp_hdr hdr; From c15d7802a42402a87880a17eee89ff023e49ecc0 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 15 Dec 2023 18:04:15 -0800 Subject: [PATCH 60/66] RDMA/mana_ib: Add CQ interrupt support for RAW QP At probing time, the MANA core code allocates EQs for supporting interrupts on Ethernet queues. The same interrupt mechanisum is used by RAW QP. Use the same EQs for delivering interrupts on the CQ for the RAW QP. Signed-off-by: Long Li Link: https://lore.kernel.org/r/1702692255-23640-4-git-send-email-longli@linuxonhyperv.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/cq.c | 32 +++++++++++- drivers/infiniband/hw/mana/mana_ib.h | 3 ++ drivers/infiniband/hw/mana/qp.c | 74 +++++++++++++++++++++++++--- 3 files changed, 102 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index 09a2c263e39b..83ebd070535a 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -12,13 +12,20 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_device *ibdev = ibcq->device; struct mana_ib_create_cq ucmd = {}; struct mana_ib_dev *mdev; + struct gdma_context *gc; int err; mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev->gdma_dev->gdma_context; if (udata->inlen < sizeof(ucmd)) return -EINVAL; + if (attr->comp_vector > gc->max_num_queues) + return -EINVAL; + + cq->comp_vector = attr->comp_vector; + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); if (err) { ibdev_dbg(ibdev, @@ -56,6 +63,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, /* * The CQ ID is not known at this time. The ID is generated at create_qp */ + cq->id = INVALID_QUEUE_ID; return 0; @@ -69,11 +77,33 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); struct ib_device *ibdev = ibcq->device; struct mana_ib_dev *mdev; + struct gdma_context *gc; + int err; mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev->gdma_dev->gdma_context; + + err = mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region); + if (err) { + ibdev_dbg(ibdev, + "Failed to destroy dma region, %d\n", err); + return err; + } + + if (cq->id != INVALID_QUEUE_ID) { + kfree(gc->cq_table[cq->id]); + gc->cq_table[cq->id] = NULL; + } - mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region); ib_umem_release(cq->umem); return 0; } + +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq) +{ + struct mana_ib_cq *cq = ctx; + + if (cq->ibcq.comp_handler) + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index 3329eaacc94e..6bdc0f5498d5 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -86,6 +86,7 @@ struct mana_ib_cq { int cqe; u64 gdma_region; u64 id; + u32 comp_vector; }; struct mana_ib_qp { @@ -209,4 +210,6 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext); int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev); + +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq); #endif diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 4667b18ec1dd..21ac9fcadf3f 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -102,21 +102,26 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl; struct mana_ib_create_qp_rss_resp resp = {}; struct mana_ib_create_qp_rss ucmd = {}; + struct gdma_queue **gdma_cq_allocated; mana_handle_t *mana_ind_table; struct mana_port_context *mpc; + struct gdma_queue *gdma_cq; unsigned int ind_tbl_size; struct mana_context *mc; struct net_device *ndev; + struct gdma_context *gc; struct mana_ib_cq *cq; struct mana_ib_wq *wq; struct gdma_dev *gd; + struct mana_eq *eq; struct ib_cq *ibcq; struct ib_wq *ibwq; int i = 0; u32 port; int ret; - gd = &mdev->gdma_dev->gdma_context->mana; + gc = mdev->gdma_dev->gdma_context; + gd = &gc->mana; mc = gd->driver_data; if (!udata || udata->inlen < sizeof(ucmd)) @@ -179,6 +184,13 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, goto fail; } + gdma_cq_allocated = kcalloc(ind_tbl_size, sizeof(*gdma_cq_allocated), + GFP_KERNEL); + if (!gdma_cq_allocated) { + ret = -ENOMEM; + goto fail; + } + qp->port = port; for (i = 0; i < ind_tbl_size; i++) { @@ -197,12 +209,16 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, cq_spec.gdma_region = cq->gdma_region; cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE; cq_spec.modr_ctx_id = 0; - cq_spec.attached_eq = GDMA_CQ_NO_EQ; + eq = &mc->eqs[cq->comp_vector % gc->max_num_queues]; + cq_spec.attached_eq = eq->eq->id; ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, &wq_spec, &cq_spec, &wq->rx_object); - if (ret) + if (ret) { + /* Do cleanup starting with index i-1 */ + i--; goto fail; + } /* The GDMA regions are now owned by the WQ object */ wq->gdma_region = GDMA_INVALID_DMA_REGION; @@ -219,6 +235,21 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, resp.entries[i].wqid = wq->id; mana_ind_table[i] = wq->rx_object; + + /* Create CQ table entry */ + WARN_ON(gc->cq_table[cq->id]); + gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL); + if (!gdma_cq) { + ret = -ENOMEM; + goto fail; + } + gdma_cq_allocated[i] = gdma_cq; + + gdma_cq->cq.context = cq; + gdma_cq->type = GDMA_CQ; + gdma_cq->cq.callback = mana_ib_cq_handler; + gdma_cq->id = cq->id; + gc->cq_table[cq->id] = gdma_cq; } resp.num_entries = i; @@ -238,6 +269,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, goto fail; } + kfree(gdma_cq_allocated); kfree(mana_ind_table); return 0; @@ -245,10 +277,17 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, fail: while (i-- > 0) { ibwq = ind_tbl->ind_tbl[i]; + ibcq = ibwq->cq; wq = container_of(ibwq, struct mana_ib_wq, ibwq); + cq = container_of(ibcq, struct mana_ib_cq, ibcq); + + gc->cq_table[cq->id] = NULL; + kfree(gdma_cq_allocated[i]); + mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); } + kfree(gdma_cq_allocated); kfree(mana_ind_table); return ret; @@ -270,14 +309,17 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, struct gdma_dev *gd = &mdev->gdma_dev->gdma_context->mana; struct mana_ib_create_qp_resp resp = {}; struct mana_ib_create_qp ucmd = {}; + struct gdma_queue *gdma_cq = NULL; struct mana_obj_spec wq_spec = {}; struct mana_obj_spec cq_spec = {}; struct mana_port_context *mpc; struct mana_context *mc; struct net_device *ndev; struct ib_umem *umem; - int err; + struct mana_eq *eq; + int eq_vec; u32 port; + int err; mc = gd->driver_data; @@ -354,7 +396,9 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, cq_spec.gdma_region = send_cq->gdma_region; cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE; cq_spec.modr_ctx_id = 0; - cq_spec.attached_eq = GDMA_CQ_NO_EQ; + eq_vec = send_cq->comp_vector % gd->gdma_context->max_num_queues; + eq = &mc->eqs[eq_vec]; + cq_spec.attached_eq = eq->eq->id; err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec, &cq_spec, &qp->tx_object); @@ -372,6 +416,20 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, qp->sq_id = wq_spec.queue_index; send_cq->id = cq_spec.queue_index; + /* Create CQ table entry */ + WARN_ON(gd->gdma_context->cq_table[send_cq->id]); + gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL); + if (!gdma_cq) { + err = -ENOMEM; + goto err_destroy_wq_obj; + } + + gdma_cq->cq.context = send_cq; + gdma_cq->type = GDMA_CQ; + gdma_cq->cq.callback = mana_ib_cq_handler; + gdma_cq->id = send_cq->id; + gd->gdma_context->cq_table[send_cq->id] = gdma_cq; + ibdev_dbg(&mdev->ib_dev, "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err, qp->tx_object, qp->sq_id, send_cq->id); @@ -385,11 +443,15 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, ibdev_dbg(&mdev->ib_dev, "Failed copy udata for create qp-raw, %d\n", err); - goto err_destroy_wq_obj; + goto err_release_gdma_cq; } return 0; +err_release_gdma_cq: + kfree(gdma_cq); + gd->gdma_context->cq_table[send_cq->id] = NULL; + err_destroy_wq_obj: mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object); From d42fafb895246e3f5a5e0f83e7485167fa651f5c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 22 Dec 2023 15:46:23 -0800 Subject: [PATCH 61/66] IB/iser: iscsi_iser.h: fix kernel-doc warning and spellos Drop one kernel-doc comment to prevent a warning: iscsi_iser.h:313: warning: Excess struct member 'mr' description in 'iser_device' and spell 2 words correctly (buffer and deferred). Signed-off-by: Randy Dunlap Cc: Sagi Grimberg Cc: Max Gurtovoy Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: linux-rdma@vger.kernel.org Link: https://lore.kernel.org/r/20231222234623.25231-1-rdunlap@infradead.org Reviewed-by: Sagi Grimberg Acked-by: Max Gurtovoy Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/iser/iscsi_iser.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index dee8c97ff056..7450584dd934 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -182,7 +182,7 @@ enum iser_data_dir { * * @sg: pointer to the sg list * @size: num entries of this sg - * @data_len: total beffer byte len + * @data_len: total buffer byte len * @dma_nents: returned by dma_map_sg */ struct iser_data_buf { @@ -299,7 +299,6 @@ struct ib_conn; * * @ib_device: RDMA device * @pd: Protection Domain for this device - * @mr: Global DMA memory region * @event_handler: IB events handle routine * @ig_list: entry in devices list * @refcount: Reference counter, dominated by open iser connections @@ -389,7 +388,7 @@ struct ib_conn { * to max number of post recvs * @max_cmds: maximum cmds allowed for this connection * @name: connection peer portal - * @release_work: deffered work for release job + * @release_work: deferred work for release job * @state_mutex: protects iser onnection state * @stop_completion: conn_stop completion * @ib_completion: RDMA cleanup completion From 68cf9d82f75c07d4117bca8129a770efa9d89f62 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Wed, 27 Dec 2023 16:47:59 +0800 Subject: [PATCH 62/66] RDMA/erdma: Introduce dma pool for hardware responses of CMDQ requests Hardware response, such as the result of query statistics, may be too long to be directly accommodated within the CQE structure. To address this, we introduce a DMA pool to hold the hardware's responses of CMDQ requests. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20231227084800.99091-2-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma.h | 2 ++ drivers/infiniband/hw/erdma/erdma_hw.h | 2 ++ drivers/infiniband/hw/erdma/erdma_main.c | 24 ++++++++++++++++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index f190111840e9..5df401a30cb9 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -212,6 +212,8 @@ struct erdma_dev { atomic_t num_ctx; struct list_head cep_list; + + struct dma_pool *resp_pool; }; static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift) diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 9d316fdc6f9a..4baabf1f2b08 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -357,6 +357,8 @@ struct erdma_cmdq_reflush_req { u32 rq_pi; }; +#define ERDMA_HW_RESP_SIZE 256 + /* cap qword 0 definition */ #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) #define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24) diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 0880c79a978c..e4df5bf89cd0 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -172,14 +172,30 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) { int ret; + dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev, + ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE, + 0); + if (!dev->resp_pool) + return -ENOMEM; + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ERDMA_PCI_WIDTH)); if (ret) - return ret; + goto destroy_pool; dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; + +destroy_pool: + dma_pool_destroy(dev->resp_pool); + + return ret; +} + +static void erdma_device_uninit(struct erdma_dev *dev) +{ + dma_pool_destroy(dev->resp_pool); } static void erdma_hw_reset(struct erdma_dev *dev) @@ -273,7 +289,7 @@ static int erdma_probe_dev(struct pci_dev *pdev) err = erdma_request_vectors(dev); if (err) - goto err_iounmap_func_bar; + goto err_uninit_device; err = erdma_comm_irq_init(dev); if (err) @@ -314,6 +330,9 @@ static int erdma_probe_dev(struct pci_dev *pdev) err_free_vectors: pci_free_irq_vectors(dev->pdev); +err_uninit_device: + erdma_device_uninit(dev); + err_iounmap_func_bar: devm_iounmap(&pdev->dev, dev->func_bar); @@ -339,6 +358,7 @@ static void erdma_remove_dev(struct pci_dev *pdev) erdma_aeq_destroy(dev); erdma_comm_irq_uninit(dev); pci_free_irq_vectors(dev->pdev); + erdma_device_uninit(dev); devm_iounmap(&pdev->dev, dev->func_bar); pci_release_selected_regions(pdev, ERDMA_BAR_MASK); From 63a43a675cb90e8a56e9119fff68292561204b27 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Wed, 27 Dec 2023 16:48:00 +0800 Subject: [PATCH 63/66] RDMA/erdma: Add hardware statistics support First, we add a new command to query hardware statistics, and then implement two functions: ib_device_ops.alloc_hw_port_stats and ib_device_ops.get_hw_stats to allow rdma tool can get the statistics of erdma device. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20231227084800.99091-3-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_hw.h | 37 ++++++++++ drivers/infiniband/hw/erdma/erdma_main.c | 2 + drivers/infiniband/hw/erdma/erdma_verbs.c | 90 +++++++++++++++++++++++ drivers/infiniband/hw/erdma/erdma_verbs.h | 4 + 4 files changed, 133 insertions(+) diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 4baabf1f2b08..ed8e3940cf4c 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -146,6 +146,7 @@ enum CMDQ_COMMON_OPCODE { CMDQ_OPCODE_DESTROY_EQ = 1, CMDQ_OPCODE_QUERY_FW_INFO = 2, CMDQ_OPCODE_CONF_MTU = 3, + CMDQ_OPCODE_GET_STATS = 4, CMDQ_OPCODE_CONF_DEVICE = 5, CMDQ_OPCODE_ALLOC_DB = 8, CMDQ_OPCODE_FREE_DB = 9, @@ -359,6 +360,42 @@ struct erdma_cmdq_reflush_req { #define ERDMA_HW_RESP_SIZE 256 +struct erdma_cmdq_query_req { + u64 hdr; + u32 rsvd; + u32 index; + + u64 target_addr; + u32 target_length; +}; + +#define ERDMA_HW_RESP_MAGIC 0x5566 + +struct erdma_cmdq_query_resp_hdr { + u16 magic; + u8 ver; + u8 length; + + u32 index; + u32 rsvd[2]; +}; + +struct erdma_cmdq_query_stats_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + u64 tx_req_cnt; + u64 tx_packets_cnt; + u64 tx_bytes_cnt; + u64 tx_drop_packets_cnt; + u64 tx_bps_meter_drop_packets_cnt; + u64 tx_pps_meter_drop_packets_cnt; + u64 rx_packets_cnt; + u64 rx_bytes_cnt; + u64 rx_drop_packets_cnt; + u64 rx_bps_meter_drop_packets_cnt; + u64 rx_pps_meter_drop_packets_cnt; +}; + /* cap qword 0 definition */ #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) #define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24) diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index e4df5bf89cd0..472939172f0c 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -468,6 +468,7 @@ static const struct ib_device_ops erdma_device_ops = { .driver_id = RDMA_DRIVER_ERDMA, .uverbs_abi_ver = ERDMA_ABI_VERSION, + .alloc_hw_port_stats = erdma_alloc_hw_port_stats, .alloc_mr = erdma_ib_alloc_mr, .alloc_pd = erdma_alloc_pd, .alloc_ucontext = erdma_alloc_ucontext, @@ -479,6 +480,7 @@ static const struct ib_device_ops erdma_device_ops = { .destroy_cq = erdma_destroy_cq, .destroy_qp = erdma_destroy_qp, .get_dma_mr = erdma_get_dma_mr, + .get_hw_stats = erdma_get_hw_stats, .get_port_immutable = erdma_get_port_immutable, .iw_accept = erdma_accept, .iw_add_ref = erdma_qp_get_ref, diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index c317947563fb..23dfc01603f8 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -1708,3 +1708,93 @@ void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) ib_dispatch_event(&event); } + +enum counters { + ERDMA_STATS_TX_REQS_CNT, + ERDMA_STATS_TX_PACKETS_CNT, + ERDMA_STATS_TX_BYTES_CNT, + ERDMA_STATS_TX_DISABLE_DROP_CNT, + ERDMA_STATS_TX_BPS_METER_DROP_CNT, + ERDMA_STATS_TX_PPS_METER_DROP_CNT, + + ERDMA_STATS_RX_PACKETS_CNT, + ERDMA_STATS_RX_BYTES_CNT, + ERDMA_STATS_RX_DISABLE_DROP_CNT, + ERDMA_STATS_RX_BPS_METER_DROP_CNT, + ERDMA_STATS_RX_PPS_METER_DROP_CNT, + + ERDMA_STATS_MAX +}; + +static const struct rdma_stat_desc erdma_descs[] = { + [ERDMA_STATS_TX_REQS_CNT].name = "tx_reqs_cnt", + [ERDMA_STATS_TX_PACKETS_CNT].name = "tx_packets_cnt", + [ERDMA_STATS_TX_BYTES_CNT].name = "tx_bytes_cnt", + [ERDMA_STATS_TX_DISABLE_DROP_CNT].name = "tx_disable_drop_cnt", + [ERDMA_STATS_TX_BPS_METER_DROP_CNT].name = "tx_bps_limit_drop_cnt", + [ERDMA_STATS_TX_PPS_METER_DROP_CNT].name = "tx_pps_limit_drop_cnt", + [ERDMA_STATS_RX_PACKETS_CNT].name = "rx_packets_cnt", + [ERDMA_STATS_RX_BYTES_CNT].name = "rx_bytes_cnt", + [ERDMA_STATS_RX_DISABLE_DROP_CNT].name = "rx_disable_drop_cnt", + [ERDMA_STATS_RX_BPS_METER_DROP_CNT].name = "rx_bps_limit_drop_cnt", + [ERDMA_STATS_RX_PPS_METER_DROP_CNT].name = "rx_pps_limit_drop_cnt", +}; + +struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device, + u32 port_num) +{ + return rdma_alloc_hw_stats_struct(erdma_descs, ERDMA_STATS_MAX, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int erdma_query_hw_stats(struct erdma_dev *dev, + struct rdma_hw_stats *stats) +{ + struct erdma_cmdq_query_stats_resp *resp; + struct erdma_cmdq_query_req req; + dma_addr_t dma_addr; + int err; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_GET_STATS); + + resp = dma_pool_zalloc(dev->resp_pool, GFP_KERNEL, &dma_addr); + if (!resp) + return -ENOMEM; + + req.target_addr = dma_addr; + req.target_length = ERDMA_HW_RESP_SIZE; + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + goto out; + + if (resp->hdr.magic != ERDMA_HW_RESP_MAGIC) { + err = -EINVAL; + goto out; + } + + memcpy(&stats->value[0], &resp->tx_req_cnt, + sizeof(u64) * stats->num_counters); + +out: + dma_pool_free(dev->resp_pool, resp, dma_addr); + + return err; +} + +int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port, int index) +{ + struct erdma_dev *dev = to_edev(ibdev); + int ret; + + if (port == 0) + return 0; + + ret = erdma_query_hw_stats(dev, stats); + if (ret) + return ret; + + return stats->num_counters; +} diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index eb9c0f92fb6f..db6018529ccc 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -361,5 +361,9 @@ int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason); void erdma_set_mtu(struct erdma_dev *dev, u32 mtu); +struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device, + u32 port_num); +int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port, int index); #endif From 2f1888281e67205bd80d3e8f54dbd519a9653f26 Mon Sep 17 00:00:00 2001 From: Sergey Gorenko Date: Tue, 19 Dec 2023 09:23:11 +0200 Subject: [PATCH 64/66] IB/iser: Prevent invalidating wrong MR The iser_reg_resources structure has two pointers to MR but only one mr_valid field. The implementation assumes that we use only *sig_mr when pi_enable is true. Otherwise, we use only *mr. However, it is only sometimes correct. Read commands without protection information occur even when pi_enble is true. For example, the following SCSI commands have a Data-In buffer but never have protection information: READ CAPACITY (16), INQUIRY, MODE SENSE(6), MAINTENANCE IN. So, we use *sig_mr for some SCSI commands and *mr for the other SCSI commands. In most cases, it works fine because the remote invalidation is applied. However, there are two cases when the remote invalidation is not applicable. 1. Small write commands when all data is sent as an immediate. 2. The target does not support the remote invalidation feature. The lazy invalidation is used if the remote invalidation is impossible. Since, at the lazy invalidation, we always invalidate the MR we want to use, the wrong MR may be invalidated. To fix the issue, we need a field per MR that indicates the MR needs invalidation. Since the ib_mr structure already has such a field, let's use ib_mr.need_inval instead of iser_reg_resources.mr_valid. Fixes: b76a439982f8 ("IB/iser: Use IB_WR_REG_MR_INTEGRITY for PI handover") Link: https://lore.kernel.org/r/20231219072311.40989-1-sergeygo@nvidia.com Acked-by: Max Gurtovoy Signed-off-by: Sergey Gorenko Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 -- drivers/infiniband/ulp/iser/iser_initiator.c | 5 ++++- drivers/infiniband/ulp/iser/iser_memory.c | 8 ++++---- drivers/infiniband/ulp/iser/iser_verbs.c | 1 - 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 7450584dd934..68429a5f796d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -316,12 +316,10 @@ struct iser_device { * * @mr: memory region * @sig_mr: signature memory region - * @mr_valid: is mr valid indicator */ struct iser_reg_resources { struct ib_mr *mr; struct ib_mr *sig_mr; - u8 mr_valid:1; }; /** diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 39ea73f69016..f5f090dc4f1e 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -581,7 +581,10 @@ static inline int iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) return -EINVAL; } - desc->rsc.mr_valid = 0; + if (desc->sig_protected) + desc->rsc.sig_mr->need_inval = false; + else + desc->rsc.mr->need_inval = false; return 0; } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 29ae2c6a250a..6efcb79c8efe 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -264,7 +264,7 @@ static int iser_reg_sig_mr(struct iscsi_iser_task *iser_task, iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); - if (rsc->mr_valid) + if (rsc->sig_mr->need_inval) iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); @@ -288,7 +288,7 @@ static int iser_reg_sig_mr(struct iscsi_iser_task *iser_task, wr->access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - rsc->mr_valid = 1; + rsc->sig_mr->need_inval = true; sig_reg->sge.lkey = mr->lkey; sig_reg->rkey = mr->rkey; @@ -313,7 +313,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct ib_reg_wr *wr = &tx_desc->reg_wr; int n; - if (rsc->mr_valid) + if (rsc->mr->need_inval) iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); @@ -336,7 +336,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; - rsc->mr_valid = 1; + rsc->mr->need_inval = true; reg->sge.lkey = mr->lkey; reg->rkey = mr->rkey; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 95b8eebf7e04..6801b70dc9e0 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -129,7 +129,6 @@ iser_create_fastreg_desc(struct iser_device *device, goto err_alloc_mr_integrity; } } - desc->rsc.mr_valid = 0; return desc; From 2307157c85096026043ba11f9ad8393c31515c45 Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Thu, 4 Jan 2024 09:51:55 +0000 Subject: [PATCH 65/66] RDMA/efa: Add EFA query MR support Add EFA driver uapi definitions and register a new query MR method that currently returns the physical interconnects the device is using to reach the MR. Update admin definitions and efa-abi accordingly. Reviewed-by: Anas Mousa Reviewed-by: Firas Jahjah Signed-off-by: Michael Margolin Link: https://lore.kernel.org/r/20240104095155.10676-1-mrgolin@amazon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/efa/efa.h | 12 +++- .../infiniband/hw/efa/efa_admin_cmds_defs.h | 33 ++++++++- drivers/infiniband/hw/efa/efa_com_cmd.c | 11 ++- drivers/infiniband/hw/efa/efa_com_cmd.h | 12 +++- drivers/infiniband/hw/efa/efa_main.c | 7 +- drivers/infiniband/hw/efa/efa_verbs.c | 71 ++++++++++++++++++- include/uapi/rdma/efa-abi.h | 21 +++++- 7 files changed, 160 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 7352a1f5d811..e2bdec32ae80 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_H_ @@ -80,9 +80,19 @@ struct efa_pd { u16 pdn; }; +struct efa_mr_interconnect_info { + u16 recv_ic_id; + u16 rdma_read_ic_id; + u16 rdma_recv_ic_id; + u8 recv_ic_id_valid : 1; + u8 rdma_read_ic_id_valid : 1; + u8 rdma_recv_ic_id_valid : 1; +}; + struct efa_mr { struct ib_mr ibmr; struct ib_umem *umem; + struct efa_mr_interconnect_info ic_info; }; struct efa_cq { diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 9c65bd27bae0..7377c8a9f4d5 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_CMDS_H_ @@ -415,6 +415,32 @@ struct efa_admin_reg_mr_resp { * memory region */ u32 r_key; + + /* + * Mask indicating which fields have valid values + * 0 : recv_ic_id + * 1 : rdma_read_ic_id + * 2 : rdma_recv_ic_id + */ + u8 validity; + + /* + * Physical interconnect used by the device to reach the MR for receive + * operation + */ + u8 recv_ic_id; + + /* + * Physical interconnect used by the device to reach the MR for RDMA + * read operation + */ + u8 rdma_read_ic_id; + + /* + * Physical interconnect used by the device to reach the MR for RDMA + * write receive + */ + u8 rdma_recv_ic_id; }; struct efa_admin_dereg_mr_cmd { @@ -999,6 +1025,11 @@ struct efa_admin_host_info { #define EFA_ADMIN_REG_MR_CMD_REMOTE_WRITE_ENABLE_MASK BIT(1) #define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2) +/* reg_mr_resp */ +#define EFA_ADMIN_REG_MR_RESP_RECV_IC_ID_MASK BIT(0) +#define EFA_ADMIN_REG_MR_RESP_RDMA_READ_IC_ID_MASK BIT(1) +#define EFA_ADMIN_REG_MR_RESP_RDMA_RECV_IC_ID_MASK BIT(2) + /* create_cq_cmd */ #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 576811885d59..d3398c7b0bd0 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -270,6 +270,15 @@ int efa_com_register_mr(struct efa_com_dev *edev, result->l_key = cmd_completion.l_key; result->r_key = cmd_completion.r_key; + result->ic_info.recv_ic_id = cmd_completion.recv_ic_id; + result->ic_info.rdma_read_ic_id = cmd_completion.rdma_read_ic_id; + result->ic_info.rdma_recv_ic_id = cmd_completion.rdma_recv_ic_id; + result->ic_info.recv_ic_id_valid = EFA_GET(&cmd_completion.validity, + EFA_ADMIN_REG_MR_RESP_RECV_IC_ID); + result->ic_info.rdma_read_ic_id_valid = EFA_GET(&cmd_completion.validity, + EFA_ADMIN_REG_MR_RESP_RDMA_READ_IC_ID); + result->ic_info.rdma_recv_ic_id_valid = EFA_GET(&cmd_completion.validity, + EFA_ADMIN_REG_MR_RESP_RDMA_RECV_IC_ID); return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index fc97f37bb39b..720a99ba0f7d 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_CMD_H_ @@ -199,6 +199,15 @@ struct efa_com_reg_mr_params { u8 indirect; }; +struct efa_com_mr_interconnect_info { + u16 recv_ic_id; + u16 rdma_read_ic_id; + u16 rdma_recv_ic_id; + u8 recv_ic_id_valid : 1; + u8 rdma_read_ic_id_valid : 1; + u8 rdma_recv_ic_id_valid : 1; +}; + struct efa_com_reg_mr_result { /* * To be used in conjunction with local buffers references in SQ and @@ -210,6 +219,7 @@ struct efa_com_reg_mr_result { * accessed memory region */ u32 r_key; + struct efa_com_mr_interconnect_info ic_info; }; struct efa_com_dereg_mr_params { diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 15ee92081118..7b1910a86216 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include @@ -9,6 +9,7 @@ #include #include +#include #include "efa.h" @@ -36,6 +37,8 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl); (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) +extern const struct uapi_definition efa_uapi_defs[]; + /* This handler will called for unknown event group or unimplemented handlers */ static void unimplemented_aenq_handler(void *data, struct efa_admin_aenq_entry *aenq_e) @@ -432,6 +435,8 @@ static int efa_ib_device_add(struct efa_dev *dev) ib_set_device_ops(&dev->ibdev, &efa_dev_ops); + dev->ibdev.driver_def = efa_uapi_defs; + err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); if (err) goto err_destroy_eqs; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 0f8ca99d0827..2f412db2edcd 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include @@ -13,6 +13,9 @@ #include #include #include +#define UVERBS_MODULE_NAME efa_ib +#include +#include #include "efa.h" #include "efa_io_defs.h" @@ -1653,6 +1656,12 @@ static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, mr->ibmr.lkey = result.l_key; mr->ibmr.rkey = result.r_key; mr->ibmr.length = length; + mr->ic_info.recv_ic_id = result.ic_info.recv_ic_id; + mr->ic_info.rdma_read_ic_id = result.ic_info.rdma_read_ic_id; + mr->ic_info.rdma_recv_ic_id = result.ic_info.rdma_recv_ic_id; + mr->ic_info.recv_ic_id_valid = result.ic_info.recv_ic_id_valid; + mr->ic_info.rdma_read_ic_id_valid = result.ic_info.rdma_read_ic_id_valid; + mr->ic_info.rdma_recv_ic_id_valid = result.ic_info.rdma_recv_ic_id_valid; ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); return 0; @@ -1735,6 +1744,39 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, return ERR_PTR(err); } +static int UVERBS_HANDLER(EFA_IB_METHOD_MR_QUERY)(struct uverbs_attr_bundle *attrs) +{ + struct ib_mr *ibmr = uverbs_attr_get_obj(attrs, EFA_IB_ATTR_QUERY_MR_HANDLE); + struct efa_mr *mr = to_emr(ibmr); + u16 ic_id_validity = 0; + int ret; + + ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID, + &mr->ic_info.recv_ic_id, sizeof(mr->ic_info.recv_ic_id)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID, + &mr->ic_info.rdma_read_ic_id, sizeof(mr->ic_info.rdma_read_ic_id)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID, + &mr->ic_info.rdma_recv_ic_id, sizeof(mr->ic_info.rdma_recv_ic_id)); + if (ret) + return ret; + + if (mr->ic_info.recv_ic_id_valid) + ic_id_validity |= EFA_QUERY_MR_VALIDITY_RECV_IC_ID; + if (mr->ic_info.rdma_read_ic_id_valid) + ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_READ_IC_ID; + if (mr->ic_info.rdma_recv_ic_id_valid) + ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_RECV_IC_ID; + + return uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY, + &ic_id_validity, sizeof(ic_id_validity)); +} + int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibmr->device); @@ -2157,3 +2199,30 @@ enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, return IB_LINK_LAYER_UNSPECIFIED; } +DECLARE_UVERBS_NAMED_METHOD(EFA_IB_METHOD_MR_QUERY, + UVERBS_ATTR_IDR(EFA_IB_ATTR_QUERY_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY)); + +ADD_UVERBS_METHODS(efa_mr, + UVERBS_OBJECT_MR, + &UVERBS_METHOD(EFA_IB_METHOD_MR_QUERY)); + +const struct uapi_definition efa_uapi_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR, + &efa_mr), + {}, +}; diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index d94c32f28804..701e2d567e41 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -1,12 +1,13 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H #define EFA_ABI_USER_H #include +#include /* * Increment this value if any changes that break userspace ABI @@ -134,4 +135,22 @@ struct efa_ibv_ex_query_device_resp { __u32 device_caps; }; +enum { + EFA_QUERY_MR_VALIDITY_RECV_IC_ID = 1 << 0, + EFA_QUERY_MR_VALIDITY_RDMA_READ_IC_ID = 1 << 1, + EFA_QUERY_MR_VALIDITY_RDMA_RECV_IC_ID = 1 << 2, +}; + +enum efa_query_mr_attrs { + EFA_IB_ATTR_QUERY_MR_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY, + EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID, + EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID, + EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID, +}; + +enum efa_mr_methods { + EFA_IB_METHOD_MR_QUERY = (1U << UVERBS_ID_NS_SHIFT), +}; + #endif /* EFA_ABI_USER_H */ From d24b923f1d696ddacb09f0f2d1b1f4f045cfe65e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Jan 2024 12:06:11 +0300 Subject: [PATCH 66/66] RDMA/bnxt_re: Fix error code in bnxt_re_create_cq() Return -ENOMEM if get_zeroed_page() fails. Don't return success. Fixes: e275919d9669 ("RDMA/bnxt_re: Share a page to expose per CQ info with userspace") Link: https://lore.kernel.org/r/d714306e-b7d7-4e89-b973-a9ff0f260c78@moroto.mountain Signed-off-by: Dan Carpenter Acked-by: Selvin Xavier Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 7213dc7574d0..824349659d69 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2944,9 +2944,9 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; struct bnxt_qplib_chip_ctx *cctx; struct bnxt_qplib_nq *nq = NULL; - int rc = -ENOMEM, entries; unsigned int nq_alloc_cnt; int cqe = attr->cqe; + int rc, entries; u32 active_cqs; if (attr->flags) @@ -3027,8 +3027,10 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, hash_add(rdev->cq_hash, &cq->hash_entry, cq->qplib_cq.id); /* Allocate a page */ cq->uctx_cq_page = (void *)get_zeroed_page(GFP_KERNEL); - if (!cq->uctx_cq_page) + if (!cq->uctx_cq_page) { + rc = -ENOMEM; goto c2fail; + } resp.comp_mask |= BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT; } resp.cqid = cq->qplib_cq.id;