Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git

This commit is contained in:
Stephen Rothwell 2024-12-20 11:48:37 +11:00
commit eb65cde519
22 changed files with 1226 additions and 546 deletions

View File

@ -37,18 +37,9 @@
*
*/
#include <linux/interrupt.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/prefetch.h>
#include <linux/delay.h>
#include <rdma/ib_addr.h>
#include "bnxt_ulp.h"
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"

View File

@ -52,8 +52,6 @@
#include <rdma/uverbs_ioctl.h>
#include <linux/hashtable.h>
#include "bnxt_ulp.h"
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"

View File

@ -79,17 +79,12 @@ MODULE_LICENSE("Dual BSD/GPL");
/* globals */
static DEFINE_MUTEX(bnxt_re_mutex);
static void bnxt_re_stop_irq(void *handle);
static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev);
static int bnxt_re_netdev_event(struct notifier_block *notifier,
unsigned long event, void *ptr);
static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev);
static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type);
static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev);
static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
u32 *offset);
static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable);
static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_chip_ctx *cctx;
@ -302,16 +297,6 @@ static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
&rdev->qplib_ctx);
}
static void bnxt_re_shutdown(struct auxiliary_device *adev)
{
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
struct bnxt_re_dev *rdev;
rdev = en_info->rdev;
ib_unregister_device(&rdev->ibdev);
bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
}
static void bnxt_re_stop_irq(void *handle)
{
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
@ -2123,6 +2108,30 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
return rc;
}
static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
{
struct bnxt_qplib_cc_param cc_param = {};
/* Do not enable congestion control on VFs */
if (rdev->is_virtfn)
return;
/* Currently enabling only for GenP5 adapters */
if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
return;
if (enable) {
cc_param.enable = 1;
cc_param.tos_ecn = 1;
}
cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable);
}
static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev,
struct bnxt_re_en_dev_info *en_info,
struct auxiliary_device *adev)
@ -2192,30 +2201,6 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type)
return rc;
}
static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
{
struct bnxt_qplib_cc_param cc_param = {};
/* Do not enable congestion control on VFs */
if (rdev->is_virtfn)
return;
/* Currently enabling only for GenP5 adapters */
if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
return;
if (enable) {
cc_param.enable = 1;
cc_param.tos_ecn = 1;
}
cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable);
}
/*
* "Notifier chain callback can be invoked for the same chain from
* different CPUs at the same time".
@ -2241,13 +2226,12 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
real_dev = netdev;
if (real_dev != netdev)
goto exit;
return NOTIFY_DONE;
rdev = bnxt_re_from_netdev(real_dev);
if (!rdev)
return NOTIFY_DONE;
switch (event) {
case NETDEV_UP:
case NETDEV_DOWN:
@ -2261,7 +2245,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
break;
}
ib_device_put(&rdev->ibdev);
exit:
return NOTIFY_DONE;
}
@ -2322,13 +2306,9 @@ static int bnxt_re_probe(struct auxiliary_device *adev,
rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT);
if (rc)
goto err;
mutex_unlock(&bnxt_re_mutex);
return 0;
kfree(en_info);
err:
mutex_unlock(&bnxt_re_mutex);
kfree(en_info);
return rc;
}
@ -2381,6 +2361,16 @@ static int bnxt_re_resume(struct auxiliary_device *adev)
return 0;
}
static void bnxt_re_shutdown(struct auxiliary_device *adev)
{
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
struct bnxt_re_dev *rdev;
rdev = en_info->rdev;
ib_unregister_device(&rdev->ibdev);
bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
}
static const struct auxiliary_device_id bnxt_re_id_table[] = {
{ .name = BNXT_ADEV_NAME ".rdma", },
{},

View File

@ -1114,8 +1114,10 @@ static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
* The math here assumes sizeof cpl_pass_accept_req >= sizeof
* cpl_rx_pkt.
*/
skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
skb = alloc_skb(size_add(gl->tot_len,
sizeof(struct cpl_pass_accept_req) +
sizeof(struct rss_header)) - pktshift,
GFP_ATOMIC);
if (unlikely(!skb))
return NULL;

View File

@ -5,7 +5,7 @@ config INFINIBAND_ERDMA
depends on INFINIBAND_ADDR_TRANS
depends on INFINIBAND_USER_ACCESS
help
This is a RDMA/iWarp driver for Alibaba Elastic RDMA Adapter(ERDMA),
This is a RDMA driver for Alibaba Elastic RDMA Adapter(ERDMA),
which supports RDMA features in Alibaba cloud environment.
To compile this driver as module, choose M here. The module will be

View File

@ -16,7 +16,7 @@
#include "erdma_hw.h"
#define DRV_MODULE_NAME "erdma"
#define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack"
#define ERDMA_NODE_DESC "Elastic RDMA Adapter stack"
struct erdma_eq {
void *qbuf;
@ -148,6 +148,8 @@ struct erdma_devattr {
u32 max_mr;
u32 max_pd;
u32 max_mw;
u32 max_gid;
u32 max_ah;
u32 local_dma_key;
};
@ -177,7 +179,8 @@ struct erdma_resource_cb {
enum {
ERDMA_RES_TYPE_PD = 0,
ERDMA_RES_TYPE_STAG_IDX = 1,
ERDMA_RES_CNT = 2,
ERDMA_RES_TYPE_AH = 2,
ERDMA_RES_CNT = 3,
};
struct erdma_dev {
@ -215,6 +218,7 @@ struct erdma_dev {
struct dma_pool *db_pool;
struct dma_pool *resp_pool;
enum erdma_proto_type proto;
};
static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift)

View File

@ -567,7 +567,8 @@ static int erdma_proc_mpareq(struct erdma_cep *cep)
static int erdma_proc_mpareply(struct erdma_cep *cep)
{
struct erdma_qp_attrs qp_attrs;
enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
struct erdma_mod_qp_params_iwarp params;
struct erdma_qp *qp = cep->qp;
struct mpa_rr *rep;
int ret;
@ -597,26 +598,29 @@ static int erdma_proc_mpareply(struct erdma_cep *cep)
return -EINVAL;
}
memset(&qp_attrs, 0, sizeof(qp_attrs));
qp_attrs.irq_size = cep->ird;
qp_attrs.orq_size = cep->ord;
qp_attrs.state = ERDMA_QP_STATE_RTS;
memset(&params, 0, sizeof(params));
params.state = ERDMA_QPS_IWARP_RTS;
params.irq_size = cep->ird;
params.orq_size = cep->ord;
down_write(&qp->state_lock);
if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
ret = -EINVAL;
up_write(&qp->state_lock);
goto out_err;
}
qp->attrs.qp_type = ERDMA_QP_ACTIVE;
if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc)
qp->attrs.cc = COMPROMISE_CC;
to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_LLP_HANDLE |
ERDMA_QPA_IWARP_MPA | ERDMA_QPA_IWARP_IRD |
ERDMA_QPA_IWARP_ORD;
ret = erdma_modify_qp_internal(qp, &qp_attrs,
ERDMA_QP_ATTR_STATE |
ERDMA_QP_ATTR_LLP_HANDLE |
ERDMA_QP_ATTR_MPA);
params.qp_type = ERDMA_QP_ACTIVE;
if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) {
to_modify_attrs |= ERDMA_QPA_IWARP_CC;
params.cc = COMPROMISE_CC;
}
ret = erdma_modify_qp_state_iwarp(qp, &params, to_modify_attrs);
up_write(&qp->state_lock);
@ -722,7 +726,7 @@ static int erdma_newconn_connected(struct erdma_cep *cep)
__mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
__mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
@ -1126,10 +1130,11 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
{
struct erdma_dev *dev = to_edev(id->device);
struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
struct erdma_mod_qp_params_iwarp mod_qp_params;
enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
struct erdma_dev *dev = to_edev(id->device);
struct erdma_qp *qp;
struct erdma_qp_attrs qp_attrs;
int ret;
erdma_cep_set_inuse(cep);
@ -1156,7 +1161,7 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
erdma_qp_get(qp);
down_write(&qp->state_lock);
if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
ret = -EINVAL;
up_write(&qp->state_lock);
goto error;
@ -1181,11 +1186,11 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
cep->cm_id = id;
id->add_ref(id);
memset(&qp_attrs, 0, sizeof(qp_attrs));
qp_attrs.orq_size = params->ord;
qp_attrs.irq_size = params->ird;
memset(&mod_qp_params, 0, sizeof(mod_qp_params));
qp_attrs.state = ERDMA_QP_STATE_RTS;
mod_qp_params.irq_size = params->ird;
mod_qp_params.orq_size = params->ord;
mod_qp_params.state = ERDMA_QPS_IWARP_RTS;
/* Associate QP with CEP */
erdma_cep_get(cep);
@ -1194,19 +1199,21 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
cep->state = ERDMA_EPSTATE_RDMA_MODE;
qp->attrs.qp_type = ERDMA_QP_PASSIVE;
qp->attrs.pd_len = params->private_data_len;
mod_qp_params.qp_type = ERDMA_QP_PASSIVE;
mod_qp_params.pd_len = params->private_data_len;
if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits))
qp->attrs.cc = COMPROMISE_CC;
to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_ORD |
ERDMA_QPA_IWARP_LLP_HANDLE | ERDMA_QPA_IWARP_IRD |
ERDMA_QPA_IWARP_MPA;
if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) {
to_modify_attrs |= ERDMA_QPA_IWARP_CC;
mod_qp_params.cc = COMPROMISE_CC;
}
/* move to rts */
ret = erdma_modify_qp_internal(qp, &qp_attrs,
ERDMA_QP_ATTR_STATE |
ERDMA_QP_ATTR_ORD |
ERDMA_QP_ATTR_LLP_HANDLE |
ERDMA_QP_ATTR_IRD |
ERDMA_QP_ATTR_MPA);
ret = erdma_modify_qp_state_iwarp(qp, &mod_qp_params, to_modify_attrs);
up_write(&qp->state_lock);
if (ret)
@ -1214,7 +1221,7 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
cep->mpa.ext_data.bits = 0;
__mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
ret = erdma_send_mpareqrep(cep, params->private_data,
params->private_data_len);

View File

@ -105,6 +105,22 @@ static const struct {
{ ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR },
};
static void erdma_process_ud_cqe(struct erdma_cqe *cqe, struct ib_wc *wc)
{
u32 ud_info;
wc->wc_flags |= (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE);
ud_info = be32_to_cpu(cqe->ud.info);
wc->network_hdr_type = FIELD_GET(ERDMA_CQE_NTYPE_MASK, ud_info);
if (wc->network_hdr_type == ERDMA_NETWORK_TYPE_IPV4)
wc->network_hdr_type = RDMA_NETWORK_IPV4;
else
wc->network_hdr_type = RDMA_NETWORK_IPV6;
wc->src_qp = FIELD_GET(ERDMA_CQE_SQPN_MASK, ud_info);
wc->sl = FIELD_GET(ERDMA_CQE_SL_MASK, ud_info);
wc->pkey_index = 0;
}
#define ERDMA_POLLCQ_NO_QP 1
static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
@ -168,6 +184,10 @@ static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
}
if (erdma_device_rocev2(dev) &&
(qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_GSI))
erdma_process_ud_cqe(cqe, wc);
if (syndrome >= ERDMA_NUM_WC_STATUS)
syndrome = ERDMA_WC_GENERAL_ERR;
@ -201,3 +221,48 @@ int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
return npolled;
}
void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn)
{
struct erdma_cq *cq = to_ecq(ibcq);
struct erdma_cqe *cqe, *dst_cqe;
u32 prev_cq_ci, cur_cq_ci;
u32 ncqe = 0, nqp_cqe = 0;
unsigned long flags;
u8 owner;
spin_lock_irqsave(&cq->kern_cq.lock, flags);
prev_cq_ci = cq->kern_cq.ci;
while (ncqe < cq->depth && (cqe = get_next_valid_cqe(cq)) != NULL) {
++cq->kern_cq.ci;
++ncqe;
}
while (ncqe > 0) {
cur_cq_ci = prev_cq_ci + ncqe - 1;
cqe = get_queue_entry(cq->kern_cq.qbuf, cur_cq_ci, cq->depth,
CQE_SHIFT);
if (be32_to_cpu(cqe->qpn) == qpn) {
++nqp_cqe;
} else if (nqp_cqe) {
dst_cqe = get_queue_entry(cq->kern_cq.qbuf,
cur_cq_ci + nqp_cqe,
cq->depth, CQE_SHIFT);
owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
be32_to_cpu(dst_cqe->hdr));
cqe->hdr = cpu_to_be32(
(be32_to_cpu(cqe->hdr) &
~ERDMA_CQE_HDR_OWNER_MASK) |
FIELD_PREP(ERDMA_CQE_HDR_OWNER_MASK, owner));
memcpy(dst_cqe, cqe, sizeof(*cqe));
}
--ncqe;
}
cq->kern_cq.ci = prev_cq_ci + nqp_cqe;
spin_unlock_irqrestore(&cq->kern_cq.lock, flags);
}

View File

@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/if_ether.h>
/* PCIe device related definition. */
#define ERDMA_PCI_WIDTH 64
@ -21,8 +22,21 @@
#define ERDMA_NUM_MSIX_VEC 32U
#define ERDMA_MSIX_VECTOR_CMDQ 0
/* RoCEv2 related */
#define ERDMA_ROCEV2_GID_SIZE 16
#define ERDMA_MAX_PKEYS 1
#define ERDMA_DEFAULT_PKEY 0xFFFF
/* erdma device protocol type */
enum erdma_proto_type {
ERDMA_PROTO_IWARP = 0,
ERDMA_PROTO_ROCEV2 = 1,
ERDMA_PROTO_COUNT = 2,
};
/* PCIe Bar0 Registers. */
#define ERDMA_REGS_VERSION_REG 0x0
#define ERDMA_REGS_DEV_PROTO_REG 0xC
#define ERDMA_REGS_DEV_CTRL_REG 0x10
#define ERDMA_REGS_DEV_ST_REG 0x14
#define ERDMA_REGS_NETDEV_MAC_L_REG 0x18
@ -136,7 +150,11 @@ enum CMDQ_RDMA_OPCODE {
CMDQ_OPCODE_DESTROY_CQ = 5,
CMDQ_OPCODE_REFLUSH = 6,
CMDQ_OPCODE_REG_MR = 8,
CMDQ_OPCODE_DEREG_MR = 9
CMDQ_OPCODE_DEREG_MR = 9,
CMDQ_OPCODE_SET_GID = 14,
CMDQ_OPCODE_CREATE_AH = 15,
CMDQ_OPCODE_DESTROY_AH = 16,
CMDQ_OPCODE_QUERY_QP = 17,
};
enum CMDQ_COMMON_OPCODE {
@ -284,6 +302,36 @@ struct erdma_cmdq_dereg_mr_req {
u32 cfg;
};
/* create_av cfg0 */
#define ERDMA_CMD_CREATE_AV_FL_MASK GENMASK(19, 0)
#define ERDMA_CMD_CREATE_AV_NTYPE_MASK BIT(20)
struct erdma_av_cfg {
u32 cfg0;
u8 traffic_class;
u8 hop_limit;
u8 sl;
u8 rsvd;
u16 udp_sport;
u16 sgid_index;
u8 dmac[ETH_ALEN];
u8 padding[2];
u8 dgid[ERDMA_ROCEV2_GID_SIZE];
};
struct erdma_cmdq_create_ah_req {
u64 hdr;
u32 pdn;
u32 ahn;
struct erdma_av_cfg av_cfg;
};
struct erdma_cmdq_destroy_ah_req {
u64 hdr;
u32 pdn;
u32 ahn;
};
/* modify qp cfg */
#define ERDMA_CMD_MODIFY_QP_STATE_MASK GENMASK(31, 24)
#define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20)
@ -301,6 +349,36 @@ struct erdma_cmdq_modify_qp_req {
u32 recv_nxt;
};
/* modify qp cfg1 for roce device */
#define ERDMA_CMD_MODIFY_QP_DQPN_MASK GENMASK(19, 0)
struct erdma_cmdq_mod_qp_req_rocev2 {
u64 hdr;
u32 cfg0;
u32 cfg1;
u32 attr_mask;
u32 qkey;
u32 rq_psn;
u32 sq_psn;
struct erdma_av_cfg av_cfg;
};
/* query qp response mask */
#define ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK GENMASK_ULL(23, 0)
#define ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK GENMASK_ULL(47, 24)
#define ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK GENMASK_ULL(55, 48)
#define ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK GENMASK_ULL(56, 56)
struct erdma_cmdq_query_qp_req_rocev2 {
u64 hdr;
u32 qpn;
};
enum erdma_qp_type {
ERDMA_QPT_RC = 0,
ERDMA_QPT_UD = 1,
};
/* create qp cfg0 */
#define ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK GENMASK(31, 20)
#define ERDMA_CMD_CREATE_QP_QPN_MASK GENMASK(19, 0)
@ -309,6 +387,9 @@ struct erdma_cmdq_modify_qp_req {
#define ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK GENMASK(31, 20)
#define ERDMA_CMD_CREATE_QP_PD_MASK GENMASK(19, 0)
/* create qp cfg2 */
#define ERDMA_CMD_CREATE_QP_TYPE_MASK GENMASK(3, 0)
/* create qp cqn_mtt_cfg */
#define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28)
#define ERDMA_CMD_CREATE_QP_DB_CFG_MASK BIT(25)
@ -342,6 +423,7 @@ struct erdma_cmdq_create_qp_req {
u64 rq_mtt_entry[3];
u32 db_cfg;
u32 cfg2;
};
struct erdma_cmdq_destroy_qp_req {
@ -394,10 +476,33 @@ struct erdma_cmdq_query_stats_resp {
u64 rx_pps_meter_drop_packets_cnt;
};
enum erdma_network_type {
ERDMA_NETWORK_TYPE_IPV4 = 0,
ERDMA_NETWORK_TYPE_IPV6 = 1,
};
enum erdma_set_gid_op {
ERDMA_SET_GID_OP_ADD = 0,
ERDMA_SET_GID_OP_DEL = 1,
};
/* set gid cfg */
#define ERDMA_CMD_SET_GID_SGID_IDX_MASK GENMASK(15, 0)
#define ERDMA_CMD_SET_GID_NTYPE_MASK BIT(16)
#define ERDMA_CMD_SET_GID_OP_MASK BIT(31)
struct erdma_cmdq_set_gid_req {
u64 hdr;
u32 cfg;
u8 gid[ERDMA_ROCEV2_GID_SIZE];
};
/* cap qword 0 definition */
#define ERDMA_CMD_DEV_CAP_MAX_GID_MASK GENMASK_ULL(51, 48)
#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24)
#define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16)
#define ERDMA_CMD_DEV_CAP_MAX_AH_MASK GENMASK_ULL(15, 8)
#define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0)
/* cap qword 1 definition */
@ -426,6 +531,10 @@ enum {
#define ERDMA_CQE_QTYPE_RQ 1
#define ERDMA_CQE_QTYPE_CMDQ 2
#define ERDMA_CQE_NTYPE_MASK BIT(31)
#define ERDMA_CQE_SL_MASK GENMASK(27, 20)
#define ERDMA_CQE_SQPN_MASK GENMASK(19, 0)
struct erdma_cqe {
__be32 hdr;
__be32 qe_idx;
@ -435,7 +544,16 @@ struct erdma_cqe {
__be32 inv_rkey;
};
__be32 size;
__be32 rsvd[3];
union {
struct {
__be32 rsvd[3];
} rc;
struct {
__be32 rsvd[2];
__be32 info;
} ud;
};
};
struct erdma_sge {
@ -487,7 +605,7 @@ struct erdma_write_sqe {
struct erdma_sge sgl[];
};
struct erdma_send_sqe {
struct erdma_send_sqe_rc {
__le64 hdr;
union {
__be32 imm_data;
@ -498,6 +616,17 @@ struct erdma_send_sqe {
struct erdma_sge sgl[];
};
struct erdma_send_sqe_ud {
__le64 hdr;
__be32 imm_data;
__le32 length;
__le32 qkey;
__le32 dst_qpn;
__le32 ahn;
__le32 rsvd;
struct erdma_sge sgl[];
};
struct erdma_readreq_sqe {
__le64 hdr;
__le32 invalid_stag;

View File

@ -172,6 +172,8 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
{
int ret;
dev->proto = erdma_reg_read32(dev, ERDMA_REGS_DEV_PROTO_REG);
dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev,
ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE,
0);
@ -398,6 +400,8 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
dev->attrs.max_gid = 1 << ERDMA_GET_CAP(MAX_GID, cap0);
dev->attrs.max_ah = 1 << ERDMA_GET_CAP(MAX_AH, cap0);
dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
@ -415,6 +419,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
dev->res_cb[ERDMA_RES_TYPE_AH].max_cap = dev->attrs.max_ah;
erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
CMDQ_OPCODE_QUERY_FW_INFO);
@ -474,6 +479,26 @@ static void erdma_res_cb_free(struct erdma_dev *dev)
bitmap_free(dev->res_cb[i].bitmap);
}
static const struct ib_device_ops erdma_device_ops_rocev2 = {
.get_link_layer = erdma_get_link_layer,
.add_gid = erdma_add_gid,
.del_gid = erdma_del_gid,
.query_pkey = erdma_query_pkey,
.create_ah = erdma_create_ah,
.destroy_ah = erdma_destroy_ah,
};
static const struct ib_device_ops erdma_device_ops_iwarp = {
.iw_accept = erdma_accept,
.iw_add_ref = erdma_qp_get_ref,
.iw_connect = erdma_connect,
.iw_create_listen = erdma_create_listen,
.iw_destroy_listen = erdma_destroy_listen,
.iw_get_qp = erdma_get_ibqp,
.iw_reject = erdma_reject,
.iw_rem_ref = erdma_qp_put_ref,
};
static const struct ib_device_ops erdma_device_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_ERDMA,
@ -494,18 +519,9 @@ static const struct ib_device_ops erdma_device_ops = {
.get_dma_mr = erdma_get_dma_mr,
.get_hw_stats = erdma_get_hw_stats,
.get_port_immutable = erdma_get_port_immutable,
.iw_accept = erdma_accept,
.iw_add_ref = erdma_qp_get_ref,
.iw_connect = erdma_connect,
.iw_create_listen = erdma_create_listen,
.iw_destroy_listen = erdma_destroy_listen,
.iw_get_qp = erdma_get_ibqp,
.iw_reject = erdma_reject,
.iw_rem_ref = erdma_qp_put_ref,
.map_mr_sg = erdma_map_mr_sg,
.mmap = erdma_mmap,
.mmap_free = erdma_mmap_free,
.modify_qp = erdma_modify_qp,
.post_recv = erdma_post_recv,
.post_send = erdma_post_send,
.poll_cq = erdma_poll_cq,
@ -515,6 +531,7 @@ static const struct ib_device_ops erdma_device_ops = {
.query_qp = erdma_query_qp,
.req_notify_cq = erdma_req_notify_cq,
.reg_user_mr = erdma_reg_user_mr,
.modify_qp = erdma_modify_qp,
INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
@ -537,7 +554,14 @@ static int erdma_ib_device_add(struct pci_dev *pdev)
if (ret)
return ret;
ibdev->node_type = RDMA_NODE_RNIC;
if (erdma_device_iwarp(dev)) {
ibdev->node_type = RDMA_NODE_RNIC;
ib_set_device_ops(ibdev, &erdma_device_ops_iwarp);
} else {
ibdev->node_type = RDMA_NODE_IB_CA;
ib_set_device_ops(ibdev, &erdma_device_ops_rocev2);
}
memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
/*

View File

@ -11,20 +11,20 @@
void erdma_qp_llp_close(struct erdma_qp *qp)
{
struct erdma_qp_attrs qp_attrs;
struct erdma_mod_qp_params_iwarp params;
down_write(&qp->state_lock);
switch (qp->attrs.state) {
case ERDMA_QP_STATE_RTS:
case ERDMA_QP_STATE_RTR:
case ERDMA_QP_STATE_IDLE:
case ERDMA_QP_STATE_TERMINATE:
qp_attrs.state = ERDMA_QP_STATE_CLOSING;
erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
switch (qp->attrs.iwarp.state) {
case ERDMA_QPS_IWARP_RTS:
case ERDMA_QPS_IWARP_RTR:
case ERDMA_QPS_IWARP_IDLE:
case ERDMA_QPS_IWARP_TERMINATE:
params.state = ERDMA_QPS_IWARP_CLOSING;
erdma_modify_qp_state_iwarp(qp, &params, ERDMA_QPA_IWARP_STATE);
break;
case ERDMA_QP_STATE_CLOSING:
qp->attrs.state = ERDMA_QP_STATE_IDLE;
case ERDMA_QPS_IWARP_CLOSING:
qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
break;
default:
break;
@ -48,9 +48,10 @@ struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
return NULL;
}
static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
struct erdma_qp_attrs *attrs,
enum erdma_qp_attr_mask mask)
static int
erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
struct erdma_mod_qp_params_iwarp *params,
enum erdma_qpa_mask_iwarp mask)
{
int ret;
struct erdma_dev *dev = qp->dev;
@ -59,12 +60,15 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
struct erdma_cep *cep = qp->cep;
struct sockaddr_storage local_addr, remote_addr;
if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE))
if (!(mask & ERDMA_QPA_IWARP_LLP_HANDLE))
return -EINVAL;
if (!(mask & ERDMA_QP_ATTR_MPA))
if (!(mask & ERDMA_QPA_IWARP_MPA))
return -EINVAL;
if (!(mask & ERDMA_QPA_IWARP_CC))
params->cc = qp->attrs.cc;
ret = getname_local(cep->sock, &local_addr);
if (ret < 0)
return ret;
@ -73,18 +77,16 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
if (ret < 0)
return ret;
qp->attrs.state = ERDMA_QP_STATE_RTS;
tp = tcp_sk(qp->cep->sock->sk);
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_MODIFY_QP);
req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) |
FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) |
req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, params->cc) |
FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie);
req.cookie = be32_to_cpu(cep->mpa.ext_data.cookie);
req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
req.dport = to_sockaddr_in(remote_addr).sin_port;
@ -92,33 +94,55 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
req.send_nxt = tp->snd_nxt;
/* rsvd tcp seq for mpa-rsp in server. */
if (qp->attrs.qp_type == ERDMA_QP_PASSIVE)
req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len;
if (params->qp_type == ERDMA_QP_PASSIVE)
req.send_nxt += MPA_DEFAULT_HDR_LEN + params->pd_len;
req.recv_nxt = tp->rcv_nxt;
return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
if (ret)
return ret;
if (mask & ERDMA_QPA_IWARP_IRD)
qp->attrs.irq_size = params->irq_size;
if (mask & ERDMA_QPA_IWARP_ORD)
qp->attrs.orq_size = params->orq_size;
if (mask & ERDMA_QPA_IWARP_CC)
qp->attrs.cc = params->cc;
qp->attrs.iwarp.state = ERDMA_QPS_IWARP_RTS;
return 0;
}
static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
struct erdma_qp_attrs *attrs,
enum erdma_qp_attr_mask mask)
static int
erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
struct erdma_mod_qp_params_iwarp *params,
enum erdma_qpa_mask_iwarp mask)
{
struct erdma_dev *dev = qp->dev;
struct erdma_cmdq_modify_qp_req req;
qp->attrs.state = attrs->state;
int ret;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_MODIFY_QP);
req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) |
req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
if (ret)
return ret;
qp->attrs.iwarp.state = params->state;
return 0;
}
int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
enum erdma_qp_attr_mask mask)
int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
struct erdma_mod_qp_params_iwarp *params,
int mask)
{
bool need_reflush = false;
int drop_conn, ret = 0;
@ -126,31 +150,31 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
if (!mask)
return 0;
if (!(mask & ERDMA_QP_ATTR_STATE))
if (!(mask & ERDMA_QPA_IWARP_STATE))
return 0;
switch (qp->attrs.state) {
case ERDMA_QP_STATE_IDLE:
case ERDMA_QP_STATE_RTR:
if (attrs->state == ERDMA_QP_STATE_RTS) {
ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
qp->attrs.state = ERDMA_QP_STATE_ERROR;
switch (qp->attrs.iwarp.state) {
case ERDMA_QPS_IWARP_IDLE:
case ERDMA_QPS_IWARP_RTR:
if (params->state == ERDMA_QPS_IWARP_RTS) {
ret = erdma_modify_qp_state_to_rts(qp, params, mask);
} else if (params->state == ERDMA_QPS_IWARP_ERROR) {
qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
need_reflush = true;
if (qp->cep) {
erdma_cep_put(qp->cep);
qp->cep = NULL;
}
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
ret = erdma_modify_qp_state_to_stop(qp, params, mask);
}
break;
case ERDMA_QP_STATE_RTS:
case ERDMA_QPS_IWARP_RTS:
drop_conn = 0;
if (attrs->state == ERDMA_QP_STATE_CLOSING ||
attrs->state == ERDMA_QP_STATE_TERMINATE ||
attrs->state == ERDMA_QP_STATE_ERROR) {
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
if (params->state == ERDMA_QPS_IWARP_CLOSING ||
params->state == ERDMA_QPS_IWARP_TERMINATE ||
params->state == ERDMA_QPS_IWARP_ERROR) {
ret = erdma_modify_qp_state_to_stop(qp, params, mask);
drop_conn = 1;
need_reflush = true;
}
@ -159,17 +183,17 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
erdma_qp_cm_drop(qp);
break;
case ERDMA_QP_STATE_TERMINATE:
if (attrs->state == ERDMA_QP_STATE_ERROR)
qp->attrs.state = ERDMA_QP_STATE_ERROR;
case ERDMA_QPS_IWARP_TERMINATE:
if (params->state == ERDMA_QPS_IWARP_ERROR)
qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
break;
case ERDMA_QP_STATE_CLOSING:
if (attrs->state == ERDMA_QP_STATE_IDLE) {
qp->attrs.state = ERDMA_QP_STATE_IDLE;
} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
qp->attrs.state = ERDMA_QP_STATE_ERROR;
} else if (attrs->state != ERDMA_QP_STATE_CLOSING) {
case ERDMA_QPS_IWARP_CLOSING:
if (params->state == ERDMA_QPS_IWARP_IDLE) {
qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
} else if (params->state == ERDMA_QPS_IWARP_ERROR) {
ret = erdma_modify_qp_state_to_stop(qp, params, mask);
qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
} else if (params->state != ERDMA_QPS_IWARP_CLOSING) {
return -ECONNABORTED;
}
break;
@ -186,6 +210,98 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
return ret;
}
static int modify_qp_cmd_rocev2(struct erdma_qp *qp,
struct erdma_mod_qp_params_rocev2 *params,
enum erdma_qpa_mask_rocev2 attr_mask)
{
struct erdma_cmdq_mod_qp_req_rocev2 req;
memset(&req, 0, sizeof(req));
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_MODIFY_QP);
req.cfg0 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
req.cfg0 |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK,
params->state);
if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
req.cfg1 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_DQPN_MASK,
params->dst_qpn);
if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
req.qkey = params->qkey;
if (attr_mask & ERDMA_QPA_ROCEV2_AV)
erdma_set_av_cfg(&req.av_cfg, &params->av);
if (attr_mask & ERDMA_QPA_ROCEV2_SQ_PSN)
req.sq_psn = params->sq_psn;
if (attr_mask & ERDMA_QPA_ROCEV2_RQ_PSN)
req.rq_psn = params->rq_psn;
req.attr_mask = attr_mask;
return erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL,
NULL);
}
static void erdma_reset_qp(struct erdma_qp *qp)
{
qp->kern_qp.sq_pi = 0;
qp->kern_qp.sq_ci = 0;
qp->kern_qp.rq_pi = 0;
qp->kern_qp.rq_ci = 0;
memset(qp->kern_qp.swr_tbl, 0, qp->attrs.sq_size * sizeof(u64));
memset(qp->kern_qp.rwr_tbl, 0, qp->attrs.rq_size * sizeof(u64));
memset(qp->kern_qp.sq_buf, 0, qp->attrs.sq_size << SQEBB_SHIFT);
memset(qp->kern_qp.rq_buf, 0, qp->attrs.rq_size << RQE_SHIFT);
erdma_remove_cqes_of_qp(&qp->scq->ibcq, QP_ID(qp));
if (qp->rcq != qp->scq)
erdma_remove_cqes_of_qp(&qp->rcq->ibcq, QP_ID(qp));
}
int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
struct erdma_mod_qp_params_rocev2 *params,
int attr_mask)
{
struct erdma_dev *dev = to_edev(qp->ibqp.device);
int ret;
ret = modify_qp_cmd_rocev2(qp, params, attr_mask);
if (ret)
return ret;
if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
qp->attrs.rocev2.state = params->state;
if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
qp->attrs.rocev2.qkey = params->qkey;
if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
qp->attrs.rocev2.dst_qpn = params->dst_qpn;
if (attr_mask & ERDMA_QPA_ROCEV2_AV)
memcpy(&qp->attrs.rocev2.av, &params->av,
sizeof(struct erdma_av));
if (rdma_is_kernel_res(&qp->ibqp.res) &&
params->state == ERDMA_QPS_ROCEV2_RESET)
erdma_reset_qp(qp);
if (rdma_is_kernel_res(&qp->ibqp.res) &&
params->state == ERDMA_QPS_ROCEV2_ERROR) {
qp->flags |= ERDMA_QP_IN_FLUSHING;
mod_delayed_work(dev->reflush_wq, &qp->reflush_dwork,
usecs_to_jiffies(100));
}
return 0;
}
static void erdma_qp_safe_free(struct kref *ref)
{
struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
@ -282,17 +398,57 @@ static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
return 0;
}
static void init_send_sqe_rc(struct erdma_qp *qp, struct erdma_send_sqe_rc *sqe,
const struct ib_send_wr *wr, u32 *hw_op)
{
u32 op = ERDMA_OP_SEND;
if (wr->opcode == IB_WR_SEND_WITH_IMM) {
op = ERDMA_OP_SEND_WITH_IMM;
sqe->imm_data = wr->ex.imm_data;
} else if (op == IB_WR_SEND_WITH_INV) {
op = ERDMA_OP_SEND_WITH_INV;
sqe->invalid_stag = cpu_to_le32(wr->ex.invalidate_rkey);
}
*hw_op = op;
}
static void init_send_sqe_ud(struct erdma_qp *qp, struct erdma_send_sqe_ud *sqe,
const struct ib_send_wr *wr, u32 *hw_op)
{
const struct ib_ud_wr *uwr = ud_wr(wr);
struct erdma_ah *ah = to_eah(uwr->ah);
u32 op = ERDMA_OP_SEND;
if (wr->opcode == IB_WR_SEND_WITH_IMM) {
op = ERDMA_OP_SEND_WITH_IMM;
sqe->imm_data = wr->ex.imm_data;
}
*hw_op = op;
sqe->ahn = cpu_to_le32(ah->ahn);
sqe->dst_qpn = cpu_to_le32(uwr->remote_qpn);
/* Not allowed to send control qkey */
if (uwr->remote_qkey & 0x80000000)
sqe->qkey = cpu_to_le32(qp->attrs.rocev2.qkey);
else
sqe->qkey = cpu_to_le32(uwr->remote_qkey);
}
static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
const struct ib_send_wr *send_wr)
{
u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
u32 idx = *pi & (qp->attrs.sq_size - 1);
enum ib_wr_opcode op = send_wr->opcode;
struct erdma_send_sqe_rc *rc_send_sqe;
struct erdma_send_sqe_ud *ud_send_sqe;
struct erdma_atomic_sqe *atomic_sqe;
struct erdma_readreq_sqe *read_sqe;
struct erdma_reg_mr_sqe *regmr_sge;
struct erdma_write_sqe *write_sqe;
struct erdma_send_sqe *send_sqe;
struct ib_rdma_wr *rdma_wr;
struct erdma_sge *sge;
__le32 *length_field;
@ -301,6 +457,10 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
u32 attrs;
int ret;
if (qp->ibqp.qp_type != IB_QPT_RC && send_wr->opcode != IB_WR_SEND &&
send_wr->opcode != IB_WR_SEND_WITH_IMM)
return -EINVAL;
entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
SQEBB_SHIFT);
@ -374,21 +534,20 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
case IB_WR_SEND_WITH_INV:
send_sqe = (struct erdma_send_sqe *)entry;
hw_op = ERDMA_OP_SEND;
if (op == IB_WR_SEND_WITH_IMM) {
hw_op = ERDMA_OP_SEND_WITH_IMM;
send_sqe->imm_data = send_wr->ex.imm_data;
} else if (op == IB_WR_SEND_WITH_INV) {
hw_op = ERDMA_OP_SEND_WITH_INV;
send_sqe->invalid_stag =
cpu_to_le32(send_wr->ex.invalidate_rkey);
if (qp->ibqp.qp_type == IB_QPT_RC) {
rc_send_sqe = (struct erdma_send_sqe_rc *)entry;
init_send_sqe_rc(qp, rc_send_sqe, send_wr, &hw_op);
length_field = &rc_send_sqe->length;
wqe_size = sizeof(struct erdma_send_sqe_rc);
} else {
ud_send_sqe = (struct erdma_send_sqe_ud *)entry;
init_send_sqe_ud(qp, ud_send_sqe, send_wr, &hw_op);
length_field = &ud_send_sqe->length;
wqe_size = sizeof(struct erdma_send_sqe_ud);
}
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
length_field = &send_sqe->length;
wqe_size = sizeof(struct erdma_send_sqe);
sgl_offset = wqe_size;
sgl_offset = wqe_size;
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
break;
case IB_WR_REG_MR:
wqe_hdr |=

View File

@ -55,6 +55,13 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
ilog2(qp->attrs.rq_size)) |
FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
if (qp->ibqp.qp_type == IB_QPT_RC)
req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
ERDMA_QPT_RC);
else
req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
ERDMA_QPT_UD);
if (rdma_is_kernel_res(&qp->ibqp.res)) {
u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
@ -121,8 +128,8 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
&resp1);
if (!err)
qp->attrs.cookie =
if (!err && erdma_device_iwarp(dev))
qp->attrs.iwarp.cookie =
FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
return err;
@ -336,6 +343,11 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
if (erdma_device_rocev2(dev)) {
attr->max_pkeys = ERDMA_MAX_PKEYS;
attr->max_ah = dev->attrs.max_ah;
}
if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
attr->atomic_cap = IB_ATOMIC_GLOB;
@ -367,7 +379,14 @@ int erdma_query_port(struct ib_device *ibdev, u32 port,
memset(attr, 0, sizeof(*attr));
attr->gid_tbl_len = 1;
if (erdma_device_iwarp(dev)) {
attr->gid_tbl_len = 1;
} else {
attr->gid_tbl_len = dev->attrs.max_gid;
attr->ip_gids = true;
attr->pkey_tbl_len = ERDMA_MAX_PKEYS;
}
attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
attr->max_msg_sz = -1;
@ -395,8 +414,18 @@ int erdma_query_port(struct ib_device *ibdev, u32 port,
int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
struct ib_port_immutable *port_immutable)
{
port_immutable->gid_tbl_len = 1;
port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
struct erdma_dev *dev = to_edev(ibdev);
if (erdma_device_iwarp(dev)) {
port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
port_immutable->gid_tbl_len = 1;
} else {
port_immutable->core_cap_flags =
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
port_immutable->max_mad_size = IB_MGMT_MAD_SIZE;
port_immutable->gid_tbl_len = dev->attrs.max_gid;
port_immutable->pkey_tbl_len = ERDMA_MAX_PKEYS;
}
return 0;
}
@ -459,7 +488,11 @@ static int erdma_qp_validate_cap(struct erdma_dev *dev,
static int erdma_qp_validate_attr(struct erdma_dev *dev,
struct ib_qp_init_attr *attrs)
{
if (attrs->qp_type != IB_QPT_RC)
if (erdma_device_iwarp(dev) && attrs->qp_type != IB_QPT_RC)
return -EOPNOTSUPP;
if (erdma_device_rocev2(dev) && attrs->qp_type != IB_QPT_RC &&
attrs->qp_type != IB_QPT_UD && attrs->qp_type != IB_QPT_GSI)
return -EOPNOTSUPP;
if (attrs->srq)
@ -937,7 +970,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
udata, struct erdma_ucontext, ibucontext);
struct erdma_ureq_create_qp ureq;
struct erdma_uresp_create_qp uresp;
int ret;
void *old_entry;
int ret = 0;
ret = erdma_qp_validate_cap(dev, attrs);
if (ret)
@ -956,9 +990,16 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
kref_init(&qp->ref);
init_completion(&qp->safe_free);
ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
XA_LIMIT(1, dev->attrs.max_qp - 1),
&dev->next_alloc_qpn, GFP_KERNEL);
if (qp->ibqp.qp_type == IB_QPT_GSI) {
old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL);
if (xa_is_err(old_entry))
ret = xa_err(old_entry);
} else {
ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
XA_LIMIT(1, dev->attrs.max_qp - 1),
&dev->next_alloc_qpn, GFP_KERNEL);
}
if (ret < 0) {
ret = -ENOMEM;
goto err_out;
@ -995,7 +1036,12 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
qp->attrs.max_send_sge = attrs->cap.max_send_sge;
qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
qp->attrs.state = ERDMA_QP_STATE_IDLE;
if (erdma_device_iwarp(qp->dev))
qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
else
qp->attrs.rocev2.state = ERDMA_QPS_ROCEV2_RESET;
INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
ret = create_qp_cmd(uctx, qp);
@ -1269,13 +1315,20 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
struct erdma_dev *dev = to_edev(ibqp->device);
struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
udata, struct erdma_ucontext, ibucontext);
struct erdma_qp_attrs qp_attrs;
int err;
struct erdma_cmdq_destroy_qp_req req;
union erdma_mod_qp_params params;
int err;
down_write(&qp->state_lock);
qp_attrs.state = ERDMA_QP_STATE_ERROR;
erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
if (erdma_device_iwarp(dev)) {
params.iwarp.state = ERDMA_QPS_IWARP_ERROR;
erdma_modify_qp_state_iwarp(qp, &params.iwarp,
ERDMA_QPA_IWARP_STATE);
} else {
params.rocev2.state = ERDMA_QPS_ROCEV2_ERROR;
erdma_modify_qp_state_rocev2(qp, &params.rocev2,
ERDMA_QPA_ROCEV2_STATE);
}
up_write(&qp->state_lock);
cancel_delayed_work_sync(&qp->reflush_dwork);
@ -1506,69 +1559,248 @@ void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
atomic_dec(&dev->num_ctx);
}
static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
[IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
[IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
[IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
[IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
[IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
[IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
static void erdma_attr_to_av(const struct rdma_ah_attr *ah_attr,
struct erdma_av *av, u16 sport)
{
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
av->port = rdma_ah_get_port_num(ah_attr);
av->sgid_index = grh->sgid_index;
av->hop_limit = grh->hop_limit;
av->traffic_class = grh->traffic_class;
av->sl = rdma_ah_get_sl(ah_attr);
av->flow_label = grh->flow_label;
av->udp_sport = sport;
ether_addr_copy(av->dmac, ah_attr->roce.dmac);
memcpy(av->dgid, grh->dgid.raw, ERDMA_ROCEV2_GID_SIZE);
if (ipv6_addr_v4mapped((struct in6_addr *)&grh->dgid))
av->ntype = ERDMA_NETWORK_TYPE_IPV4;
else
av->ntype = ERDMA_NETWORK_TYPE_IPV6;
}
static void erdma_av_to_attr(struct erdma_av *av, struct rdma_ah_attr *ah_attr)
{
ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE;
rdma_ah_set_sl(ah_attr, av->sl);
rdma_ah_set_port_num(ah_attr, av->port);
rdma_ah_set_ah_flags(ah_attr, IB_AH_GRH);
rdma_ah_set_grh(ah_attr, NULL, av->flow_label, av->sgid_index,
av->hop_limit, av->traffic_class);
rdma_ah_set_dgid_raw(ah_attr, av->dgid);
}
static int ib_qps_to_erdma_qps[ERDMA_PROTO_COUNT][IB_QPS_ERR + 1] = {
[ERDMA_PROTO_IWARP] = {
[IB_QPS_RESET] = ERDMA_QPS_IWARP_IDLE,
[IB_QPS_INIT] = ERDMA_QPS_IWARP_IDLE,
[IB_QPS_RTR] = ERDMA_QPS_IWARP_RTR,
[IB_QPS_RTS] = ERDMA_QPS_IWARP_RTS,
[IB_QPS_SQD] = ERDMA_QPS_IWARP_CLOSING,
[IB_QPS_SQE] = ERDMA_QPS_IWARP_TERMINATE,
[IB_QPS_ERR] = ERDMA_QPS_IWARP_ERROR,
},
[ERDMA_PROTO_ROCEV2] = {
[IB_QPS_RESET] = ERDMA_QPS_ROCEV2_RESET,
[IB_QPS_INIT] = ERDMA_QPS_ROCEV2_INIT,
[IB_QPS_RTR] = ERDMA_QPS_ROCEV2_RTR,
[IB_QPS_RTS] = ERDMA_QPS_ROCEV2_RTS,
[IB_QPS_SQD] = ERDMA_QPS_ROCEV2_SQD,
[IB_QPS_SQE] = ERDMA_QPS_ROCEV2_SQE,
[IB_QPS_ERR] = ERDMA_QPS_ROCEV2_ERROR,
},
};
static int erdma_qps_to_ib_qps[ERDMA_PROTO_COUNT][ERDMA_QPS_ROCEV2_COUNT] = {
[ERDMA_PROTO_IWARP] = {
[ERDMA_QPS_IWARP_IDLE] = IB_QPS_INIT,
[ERDMA_QPS_IWARP_RTR] = IB_QPS_RTR,
[ERDMA_QPS_IWARP_RTS] = IB_QPS_RTS,
[ERDMA_QPS_IWARP_CLOSING] = IB_QPS_ERR,
[ERDMA_QPS_IWARP_TERMINATE] = IB_QPS_ERR,
[ERDMA_QPS_IWARP_ERROR] = IB_QPS_ERR,
},
[ERDMA_PROTO_ROCEV2] = {
[ERDMA_QPS_ROCEV2_RESET] = IB_QPS_RESET,
[ERDMA_QPS_ROCEV2_INIT] = IB_QPS_INIT,
[ERDMA_QPS_ROCEV2_RTR] = IB_QPS_RTR,
[ERDMA_QPS_ROCEV2_RTS] = IB_QPS_RTS,
[ERDMA_QPS_ROCEV2_SQD] = IB_QPS_SQD,
[ERDMA_QPS_ROCEV2_SQE] = IB_QPS_SQE,
[ERDMA_QPS_ROCEV2_ERROR] = IB_QPS_ERR,
},
};
static inline enum erdma_qps_iwarp ib_to_iwarp_qps(enum ib_qp_state state)
{
return ib_qps_to_erdma_qps[ERDMA_PROTO_IWARP][state];
}
static inline enum erdma_qps_rocev2 ib_to_rocev2_qps(enum ib_qp_state state)
{
return ib_qps_to_erdma_qps[ERDMA_PROTO_ROCEV2][state];
}
static inline enum ib_qp_state iwarp_to_ib_qps(enum erdma_qps_iwarp state)
{
return erdma_qps_to_ib_qps[ERDMA_PROTO_IWARP][state];
}
static inline enum ib_qp_state rocev2_to_ib_qps(enum erdma_qps_rocev2 state)
{
return erdma_qps_to_ib_qps[ERDMA_PROTO_ROCEV2][state];
}
static int erdma_check_qp_attrs(struct erdma_qp *qp, struct ib_qp_attr *attr,
int attr_mask)
{
enum ib_qp_state cur_state, nxt_state;
struct erdma_dev *dev = qp->dev;
int ret = -EINVAL;
if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) {
ret = -EOPNOTSUPP;
goto out;
}
if ((attr_mask & IB_QP_PORT) &&
!rdma_is_port_valid(&dev->ibdev, attr->port_num))
goto out;
if (erdma_device_rocev2(dev)) {
cur_state = (attr_mask & IB_QP_CUR_STATE) ?
attr->cur_qp_state :
rocev2_to_ib_qps(qp->attrs.rocev2.state);
nxt_state = (attr_mask & IB_QP_STATE) ? attr->qp_state :
cur_state;
if (!ib_modify_qp_is_ok(cur_state, nxt_state, qp->ibqp.qp_type,
attr_mask))
goto out;
if ((attr_mask & IB_QP_AV) &&
erdma_check_gid_attr(
rdma_ah_read_grh(&attr->ah_attr)->sgid_attr))
goto out;
if ((attr_mask & IB_QP_PKEY_INDEX) &&
attr->pkey_index >= ERDMA_MAX_PKEYS)
goto out;
}
return 0;
out:
return ret;
}
static void erdma_init_mod_qp_params_rocev2(
struct erdma_qp *qp, struct erdma_mod_qp_params_rocev2 *params,
int *erdma_attr_mask, struct ib_qp_attr *attr, int ib_attr_mask)
{
enum erdma_qpa_mask_rocev2 to_modify_attrs = 0;
enum erdma_qps_rocev2 cur_state, nxt_state;
u16 udp_sport;
if (ib_attr_mask & IB_QP_CUR_STATE)
cur_state = ib_to_rocev2_qps(attr->cur_qp_state);
else
cur_state = qp->attrs.rocev2.state;
if (ib_attr_mask & IB_QP_STATE)
nxt_state = ib_to_rocev2_qps(attr->qp_state);
else
nxt_state = cur_state;
to_modify_attrs |= ERDMA_QPA_ROCEV2_STATE;
params->state = nxt_state;
if (ib_attr_mask & IB_QP_QKEY) {
to_modify_attrs |= ERDMA_QPA_ROCEV2_QKEY;
params->qkey = attr->qkey;
}
if (ib_attr_mask & IB_QP_SQ_PSN) {
to_modify_attrs |= ERDMA_QPA_ROCEV2_SQ_PSN;
params->sq_psn = attr->sq_psn;
}
if (ib_attr_mask & IB_QP_RQ_PSN) {
to_modify_attrs |= ERDMA_QPA_ROCEV2_RQ_PSN;
params->rq_psn = attr->rq_psn;
}
if (ib_attr_mask & IB_QP_DEST_QPN) {
to_modify_attrs |= ERDMA_QPA_ROCEV2_DST_QPN;
params->dst_qpn = attr->dest_qp_num;
}
if (ib_attr_mask & IB_QP_AV) {
to_modify_attrs |= ERDMA_QPA_ROCEV2_AV;
udp_sport = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
QP_ID(qp), params->dst_qpn);
erdma_attr_to_av(&attr->ah_attr, &params->av, udp_sport);
}
*erdma_attr_mask = to_modify_attrs;
}
int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata)
{
struct erdma_qp_attrs new_attrs;
enum erdma_qp_attr_mask erdma_attr_mask = 0;
struct erdma_qp *qp = to_eqp(ibqp);
int ret = 0;
if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
return -EOPNOTSUPP;
memset(&new_attrs, 0, sizeof(new_attrs));
if (attr_mask & IB_QP_STATE) {
new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
}
union erdma_mod_qp_params params;
int ret = 0, erdma_attr_mask = 0;
down_write(&qp->state_lock);
ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
ret = erdma_check_qp_attrs(qp, attr, attr_mask);
if (ret)
goto out;
if (erdma_device_iwarp(qp->dev)) {
if (attr_mask & IB_QP_STATE) {
erdma_attr_mask |= ERDMA_QPA_IWARP_STATE;
params.iwarp.state = ib_to_iwarp_qps(attr->qp_state);
}
ret = erdma_modify_qp_state_iwarp(qp, &params.iwarp,
erdma_attr_mask);
} else {
erdma_init_mod_qp_params_rocev2(
qp, &params.rocev2, &erdma_attr_mask, attr, attr_mask);
ret = erdma_modify_qp_state_rocev2(qp, &params.rocev2,
erdma_attr_mask);
}
out:
up_write(&qp->state_lock);
return ret;
}
static enum ib_qp_state query_qp_state(struct erdma_qp *qp)
{
switch (qp->attrs.state) {
case ERDMA_QP_STATE_IDLE:
return IB_QPS_INIT;
case ERDMA_QP_STATE_RTR:
return IB_QPS_RTR;
case ERDMA_QP_STATE_RTS:
return IB_QPS_RTS;
case ERDMA_QP_STATE_CLOSING:
return IB_QPS_ERR;
case ERDMA_QP_STATE_TERMINATE:
return IB_QPS_ERR;
case ERDMA_QP_STATE_ERROR:
return IB_QPS_ERR;
default:
return IB_QPS_ERR;
}
if (erdma_device_iwarp(qp->dev))
return iwarp_to_ib_qps(qp->attrs.iwarp.state);
else
return rocev2_to_ib_qps(qp->attrs.rocev2.state);
}
int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
{
struct erdma_cmdq_query_qp_req_rocev2 req;
struct erdma_dev *dev;
struct erdma_qp *qp;
u64 resp;
int ret;
if (ibqp && qp_attr && qp_init_attr) {
qp = to_eqp(ibqp);
@ -1595,8 +1827,37 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_init_attr->cap = qp_attr->cap;
qp_attr->qp_state = query_qp_state(qp);
qp_attr->cur_qp_state = query_qp_state(qp);
if (erdma_device_rocev2(dev)) {
/* Query hardware to get some attributes */
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_QUERY_QP);
req.qpn = QP_ID(qp);
ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp,
NULL);
if (ret)
return ret;
qp_attr->sq_psn =
FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK, resp);
qp_attr->rq_psn =
FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK, resp);
qp_attr->qp_state = rocev2_to_ib_qps(
FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK, resp));
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->sq_draining = FIELD_GET(
ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK, resp);
qp_attr->pkey_index = 0;
qp_attr->dest_qp_num = qp->attrs.rocev2.dst_qpn;
if (qp->ibqp.qp_type == IB_QPT_RC)
erdma_av_to_attr(&qp->attrs.rocev2.av,
&qp_attr->ah_attr);
} else {
qp_attr->qp_state = query_qp_state(qp);
qp_attr->cur_qp_state = qp_attr->qp_state;
}
return 0;
}
@ -1839,3 +2100,156 @@ int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
return stats->num_counters;
}
enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev, u32 port_num)
{
return IB_LINK_LAYER_ETHERNET;
}
static int erdma_set_gid(struct erdma_dev *dev, u8 op, u32 idx,
const union ib_gid *gid)
{
struct erdma_cmdq_set_gid_req req;
u8 ntype;
req.cfg = FIELD_PREP(ERDMA_CMD_SET_GID_SGID_IDX_MASK, idx) |
FIELD_PREP(ERDMA_CMD_SET_GID_OP_MASK, op);
if (op == ERDMA_SET_GID_OP_ADD) {
if (ipv6_addr_v4mapped((struct in6_addr *)gid))
ntype = ERDMA_NETWORK_TYPE_IPV4;
else
ntype = ERDMA_NETWORK_TYPE_IPV6;
req.cfg |= FIELD_PREP(ERDMA_CMD_SET_GID_NTYPE_MASK, ntype);
memcpy(&req.gid, gid, ERDMA_ROCEV2_GID_SIZE);
}
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_SET_GID);
return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
}
int erdma_add_gid(const struct ib_gid_attr *attr, void **context)
{
struct erdma_dev *dev = to_edev(attr->device);
int ret;
ret = erdma_check_gid_attr(attr);
if (ret)
return ret;
return erdma_set_gid(dev, ERDMA_SET_GID_OP_ADD, attr->index,
&attr->gid);
}
int erdma_del_gid(const struct ib_gid_attr *attr, void **context)
{
return erdma_set_gid(to_edev(attr->device), ERDMA_SET_GID_OP_DEL,
attr->index, NULL);
}
int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
{
if (index >= ERDMA_MAX_PKEYS)
return -EINVAL;
*pkey = ERDMA_DEFAULT_PKEY;
return 0;
}
void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av)
{
av_cfg->cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_AV_FL_MASK, av->flow_label) |
FIELD_PREP(ERDMA_CMD_CREATE_AV_NTYPE_MASK, av->ntype);
av_cfg->traffic_class = av->traffic_class;
av_cfg->hop_limit = av->hop_limit;
av_cfg->sl = av->sl;
av_cfg->udp_sport = av->udp_sport;
av_cfg->sgid_index = av->sgid_index;
ether_addr_copy(av_cfg->dmac, av->dmac);
memcpy(av_cfg->dgid, av->dgid, ERDMA_ROCEV2_GID_SIZE);
}
int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata)
{
const struct ib_global_route *grh =
rdma_ah_read_grh(init_attr->ah_attr);
struct erdma_dev *dev = to_edev(ibah->device);
struct erdma_pd *pd = to_epd(ibah->pd);
struct erdma_ah *ah = to_eah(ibah);
struct erdma_cmdq_create_ah_req req;
u32 udp_sport;
int ret;
ret = erdma_check_gid_attr(grh->sgid_attr);
if (ret)
return ret;
ret = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_AH]);
if (ret < 0)
return ret;
ah->ahn = ret;
if (grh->flow_label)
udp_sport = rdma_flow_label_to_udp_sport(grh->flow_label);
else
udp_sport =
IB_ROCE_UDP_ENCAP_VALID_PORT_MIN + (ah->ahn & 0x3FFF);
erdma_attr_to_av(init_attr->ah_attr, &ah->av, udp_sport);
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_CREATE_AH);
req.pdn = pd->pdn;
req.ahn = ah->ahn;
erdma_set_av_cfg(&req.av_cfg, &ah->av);
ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
if (ret) {
erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
return ret;
}
return 0;
}
int erdma_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct erdma_dev *dev = to_edev(ibah->device);
struct erdma_pd *pd = to_epd(ibah->pd);
struct erdma_ah *ah = to_eah(ibah);
struct erdma_cmdq_destroy_ah_req req;
int ret;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_DESTROY_AH);
req.pdn = pd->pdn;
req.ahn = ah->ahn;
ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
if (ret)
return ret;
erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
return 0;
}
int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct erdma_ah *ah = to_eah(ibah);
memset(ah_attr, 0, sizeof(*ah_attr));
erdma_av_to_attr(&ah->av, ah_attr);
return 0;
}

View File

@ -136,6 +136,25 @@ struct erdma_user_dbrecords_page {
int refcnt;
};
struct erdma_av {
u8 port;
u8 hop_limit;
u8 traffic_class;
u8 sl;
u8 sgid_index;
u16 udp_sport;
u32 flow_label;
u8 dmac[ETH_ALEN];
u8 dgid[ERDMA_ROCEV2_GID_SIZE];
enum erdma_network_type ntype;
};
struct erdma_ah {
struct ib_ah ibah;
struct erdma_av av;
u32 ahn;
};
struct erdma_uqp {
struct erdma_mem sq_mem;
struct erdma_mem rq_mem;
@ -176,33 +195,91 @@ struct erdma_kqp {
u8 sig_all;
};
enum erdma_qp_state {
ERDMA_QP_STATE_IDLE = 0,
ERDMA_QP_STATE_RTR = 1,
ERDMA_QP_STATE_RTS = 2,
ERDMA_QP_STATE_CLOSING = 3,
ERDMA_QP_STATE_TERMINATE = 4,
ERDMA_QP_STATE_ERROR = 5,
ERDMA_QP_STATE_UNDEF = 7,
ERDMA_QP_STATE_COUNT = 8
enum erdma_qps_iwarp {
ERDMA_QPS_IWARP_IDLE = 0,
ERDMA_QPS_IWARP_RTR = 1,
ERDMA_QPS_IWARP_RTS = 2,
ERDMA_QPS_IWARP_CLOSING = 3,
ERDMA_QPS_IWARP_TERMINATE = 4,
ERDMA_QPS_IWARP_ERROR = 5,
ERDMA_QPS_IWARP_UNDEF = 6,
ERDMA_QPS_IWARP_COUNT = 7,
};
enum erdma_qp_attr_mask {
ERDMA_QP_ATTR_STATE = (1 << 0),
ERDMA_QP_ATTR_LLP_HANDLE = (1 << 2),
ERDMA_QP_ATTR_ORD = (1 << 3),
ERDMA_QP_ATTR_IRD = (1 << 4),
ERDMA_QP_ATTR_SQ_SIZE = (1 << 5),
ERDMA_QP_ATTR_RQ_SIZE = (1 << 6),
ERDMA_QP_ATTR_MPA = (1 << 7)
enum erdma_qpa_mask_iwarp {
ERDMA_QPA_IWARP_STATE = (1 << 0),
ERDMA_QPA_IWARP_LLP_HANDLE = (1 << 2),
ERDMA_QPA_IWARP_ORD = (1 << 3),
ERDMA_QPA_IWARP_IRD = (1 << 4),
ERDMA_QPA_IWARP_SQ_SIZE = (1 << 5),
ERDMA_QPA_IWARP_RQ_SIZE = (1 << 6),
ERDMA_QPA_IWARP_MPA = (1 << 7),
ERDMA_QPA_IWARP_CC = (1 << 8),
};
enum erdma_qps_rocev2 {
ERDMA_QPS_ROCEV2_RESET = 0,
ERDMA_QPS_ROCEV2_INIT = 1,
ERDMA_QPS_ROCEV2_RTR = 2,
ERDMA_QPS_ROCEV2_RTS = 3,
ERDMA_QPS_ROCEV2_SQD = 4,
ERDMA_QPS_ROCEV2_SQE = 5,
ERDMA_QPS_ROCEV2_ERROR = 6,
ERDMA_QPS_ROCEV2_COUNT = 7,
};
enum erdma_qpa_mask_rocev2 {
ERDMA_QPA_ROCEV2_STATE = (1 << 0),
ERDMA_QPA_ROCEV2_QKEY = (1 << 1),
ERDMA_QPA_ROCEV2_AV = (1 << 2),
ERDMA_QPA_ROCEV2_SQ_PSN = (1 << 3),
ERDMA_QPA_ROCEV2_RQ_PSN = (1 << 4),
ERDMA_QPA_ROCEV2_DST_QPN = (1 << 5),
};
enum erdma_qp_flags {
ERDMA_QP_IN_FLUSHING = (1 << 0),
};
#define ERDMA_QP_ACTIVE 0
#define ERDMA_QP_PASSIVE 1
struct erdma_mod_qp_params_iwarp {
enum erdma_qps_iwarp state;
enum erdma_cc_alg cc;
u8 qp_type;
u8 pd_len;
u32 irq_size;
u32 orq_size;
};
struct erdma_qp_attrs_iwarp {
enum erdma_qps_iwarp state;
u32 cookie;
};
struct erdma_mod_qp_params_rocev2 {
enum erdma_qps_rocev2 state;
u32 qkey;
u32 sq_psn;
u32 rq_psn;
u32 dst_qpn;
struct erdma_av av;
};
union erdma_mod_qp_params {
struct erdma_mod_qp_params_iwarp iwarp;
struct erdma_mod_qp_params_rocev2 rocev2;
};
struct erdma_qp_attrs_rocev2 {
enum erdma_qps_rocev2 state;
u32 qkey;
u32 dst_qpn;
struct erdma_av av;
};
struct erdma_qp_attrs {
enum erdma_qp_state state;
enum erdma_cc_alg cc; /* Congestion control algorithm */
u32 sq_size;
u32 rq_size;
@ -210,11 +287,10 @@ struct erdma_qp_attrs {
u32 irq_size;
u32 max_send_sge;
u32 max_recv_sge;
u32 cookie;
#define ERDMA_QP_ACTIVE 0
#define ERDMA_QP_PASSIVE 1
u8 qp_type;
u8 pd_len;
union {
struct erdma_qp_attrs_iwarp iwarp;
struct erdma_qp_attrs_rocev2 rocev2;
};
};
struct erdma_qp {
@ -286,11 +362,25 @@ static inline struct erdma_cq *find_cq_by_cqn(struct erdma_dev *dev, int id)
void erdma_qp_get(struct erdma_qp *qp);
void erdma_qp_put(struct erdma_qp *qp);
int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
enum erdma_qp_attr_mask mask);
int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
struct erdma_mod_qp_params_iwarp *params,
int mask);
int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
struct erdma_mod_qp_params_rocev2 *params,
int attr_mask);
void erdma_qp_llp_close(struct erdma_qp *qp);
void erdma_qp_cm_drop(struct erdma_qp *qp);
static inline bool erdma_device_iwarp(struct erdma_dev *dev)
{
return dev->proto == ERDMA_PROTO_IWARP;
}
static inline bool erdma_device_rocev2(struct erdma_dev *dev)
{
return dev->proto == ERDMA_PROTO_ROCEV2;
}
static inline struct erdma_ucontext *to_ectx(struct ib_ucontext *ibctx)
{
return container_of(ibctx, struct erdma_ucontext, ibucontext);
@ -316,6 +406,21 @@ static inline struct erdma_cq *to_ecq(struct ib_cq *ibcq)
return container_of(ibcq, struct erdma_cq, ibcq);
}
static inline struct erdma_ah *to_eah(struct ib_ah *ibah)
{
return container_of(ibah, struct erdma_ah, ibah);
}
static inline int erdma_check_gid_attr(const struct ib_gid_attr *attr)
{
u8 ntype = rdma_gid_attr_network_type(attr);
if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6)
return -EINVAL;
return 0;
}
static inline struct erdma_user_mmap_entry *
to_emmap(struct rdma_user_mmap_entry *ibmmap)
{
@ -360,6 +465,7 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn);
struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg);
int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
@ -370,5 +476,15 @@ struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
u32 port_num);
int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
u32 port, int index);
enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev,
u32 port_num);
int erdma_add_gid(const struct ib_gid_attr *attr, void **context);
int erdma_del_gid(const struct ib_gid_attr *attr, void **context);
int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av);
int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int erdma_destroy_ah(struct ib_ah *ibah, u32 flags);
int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
#endif

View File

@ -150,8 +150,12 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev,
return PTR_ERR(*umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
if (shift < 0) {
err = shift;
goto err_buf;
}
err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
if (err)
goto err_buf;

View File

@ -351,7 +351,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
struct mlx4_port_gid_table *port_gid_table;
int ret = 0;
int hw_update = 0;
struct gid_entry *gids;
struct gid_entry *gids = NULL;
if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
return -EINVAL;
@ -389,10 +389,10 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
}
spin_unlock_bh(&iboe->lock);
if (!ret && hw_update) {
if (gids)
ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
kfree(gids);
}
kfree(gids);
return ret;
}

View File

@ -667,6 +667,9 @@ struct mlx4_uverbs_ex_query_device {
__u32 reserved;
};
/* 4k - 4G */
#define MLX4_PAGE_SIZE_SUPPORTED ((unsigned long)GENMASK_ULL(31, 12))
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
@ -936,8 +939,19 @@ mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
{
return 0;
}
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
int *num_of_mtts);
static inline int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
u64 start,
int *num_of_mtts)
{
unsigned long pg_sz;
pg_sz = ib_umem_find_best_pgsz(umem, MLX4_PAGE_SIZE_SUPPORTED, start);
if (!pg_sz)
return -EOPNOTSUPP;
*num_of_mtts = ib_umem_num_dma_blocks(umem, pg_sz);
return order_base_2(pg_sz);
}
int mlx4_ib_cm_init(void);
void mlx4_ib_cm_destroy(void);

View File

@ -87,286 +87,20 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
return ERR_PTR(err);
}
enum {
MLX4_MAX_MTT_SHIFT = 31
};
static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
struct mlx4_mtt *mtt,
u64 mtt_size, u64 mtt_shift, u64 len,
u64 cur_start_addr, u64 *pages,
int *start_index, int *npages)
{
u64 cur_end_addr = cur_start_addr + len;
u64 cur_end_addr_aligned = 0;
u64 mtt_entries;
int err = 0;
int k;
len += (cur_start_addr & (mtt_size - 1ULL));
cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
len += (cur_end_addr_aligned - cur_end_addr);
if (len & (mtt_size - 1ULL)) {
pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n",
len, mtt_size);
return -EINVAL;
}
mtt_entries = (len >> mtt_shift);
/*
* Align the MTT start address to the mtt_size.
* Required to handle cases when the MR starts in the middle of an MTT
* record. Was not required in old code since the physical addresses
* provided by the dma subsystem were page aligned, which was also the
* MTT size.
*/
cur_start_addr = round_down(cur_start_addr, mtt_size);
/* A new block is started ... */
for (k = 0; k < mtt_entries; ++k) {
pages[*npages] = cur_start_addr + (mtt_size * k);
(*npages)++;
/*
* Be friendly to mlx4_write_mtt() and pass it chunks of
* appropriate size.
*/
if (*npages == PAGE_SIZE / sizeof(u64)) {
err = mlx4_write_mtt(dev->dev, mtt, *start_index,
*npages, pages);
if (err)
return err;
(*start_index) += *npages;
*npages = 0;
}
}
return 0;
}
static inline u64 alignment_of(u64 ptr)
{
return ilog2(ptr & (~(ptr - 1)));
}
static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
u64 current_block_end,
u64 block_shift)
{
/* Check whether the alignment of the new block is aligned as well as
* the previous block.
* Block address must start with zeros till size of entity_size.
*/
if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
/*
* It is not as well aligned as the previous block-reduce the
* mtt size accordingly. Here we take the last right bit which
* is 1.
*/
block_shift = alignment_of(next_block_start);
/*
* Check whether the alignment of the end of previous block - is it
* aligned as well as the start of the block
*/
if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
/*
* It is not as well aligned as the start of the block -
* reduce the mtt size accordingly.
*/
block_shift = alignment_of(current_block_end);
return block_shift;
}
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
u64 len = 0;
int err = 0;
u64 mtt_size;
u64 cur_start_addr = 0;
u64 mtt_shift;
int start_index = 0;
int npages = 0;
struct scatterlist *sg;
int i;
struct ib_block_iter biter;
int err, i = 0;
u64 addr;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
return -ENOMEM;
mtt_shift = mtt->page_shift;
mtt_size = 1ULL << mtt_shift;
for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
if (cur_start_addr + len == sg_dma_address(sg)) {
/* still the same block */
len += sg_dma_len(sg);
continue;
}
/*
* A new block is started ...
* If len is malaligned, write an extra mtt entry to cover the
* misaligned area (round up the division)
*/
err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
mtt_shift, len,
cur_start_addr,
pages, &start_index,
&npages);
rdma_umem_for_each_dma_block(umem, &biter, BIT(mtt->page_shift)) {
addr = rdma_block_iter_dma_address(&biter);
err = mlx4_write_mtt(dev->dev, mtt, i++, 1, &addr);
if (err)
goto out;
cur_start_addr = sg_dma_address(sg);
len = sg_dma_len(sg);
return err;
}
/* Handle the last block */
if (len > 0) {
/*
* If len is malaligned, write an extra mtt entry to cover
* the misaligned area (round up the division)
*/
err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
mtt_shift, len,
cur_start_addr, pages,
&start_index, &npages);
if (err)
goto out;
}
if (npages)
err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
out:
free_page((unsigned long) pages);
return err;
}
/*
* Calculate optimal mtt size based on contiguous pages.
* Function will return also the number of pages that are not aligned to the
* calculated mtt_size to be added to total number of pages. For that we should
* check the first chunk length & last chunk length and if not aligned to
* mtt_size we should increment the non_aligned_pages number. All chunks in the
* middle already handled as part of mtt shift calculation for both their start
* & end addresses.
*/
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
int *num_of_mtts)
{
u64 block_shift = MLX4_MAX_MTT_SHIFT;
u64 min_shift = PAGE_SHIFT;
u64 last_block_aligned_end = 0;
u64 current_block_start = 0;
u64 first_block_start = 0;
u64 current_block_len = 0;
u64 last_block_end = 0;
struct scatterlist *sg;
u64 current_block_end;
u64 misalignment_bits;
u64 next_block_start;
u64 total_len = 0;
int i;
*num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
/*
* Initialization - save the first chunk start as the
* current_block_start - block means contiguous pages.
*/
if (current_block_len == 0 && current_block_start == 0) {
current_block_start = sg_dma_address(sg);
first_block_start = current_block_start;
/*
* Find the bits that are different between the physical
* address and the virtual address for the start of the
* MR.
* umem_get aligned the start_va to a page boundary.
* Therefore, we need to align the start va to the same
* boundary.
* misalignment_bits is needed to handle the case of a
* single memory region. In this case, the rest of the
* logic will not reduce the block size. If we use a
* block size which is bigger than the alignment of the
* misalignment bits, we might use the virtual page
* number instead of the physical page number, resulting
* in access to the wrong data.
*/
misalignment_bits =
(start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^
current_block_start;
block_shift = min(alignment_of(misalignment_bits),
block_shift);
}
/*
* Go over the scatter entries and check if they continue the
* previous scatter entry.
*/
next_block_start = sg_dma_address(sg);
current_block_end = current_block_start + current_block_len;
/* If we have a split (non-contig.) between two blocks */
if (current_block_end != next_block_start) {
block_shift = mlx4_ib_umem_calc_block_mtt
(next_block_start,
current_block_end,
block_shift);
/*
* If we reached the minimum shift for 4k page we stop
* the loop.
*/
if (block_shift <= min_shift)
goto end;
/*
* If not saved yet we are in first block - we save the
* length of first block to calculate the
* non_aligned_pages number at the end.
*/
total_len += current_block_len;
/* Start a new block */
current_block_start = next_block_start;
current_block_len = sg_dma_len(sg);
continue;
}
/* The scatter entry is another part of the current block,
* increase the block size.
* An entry in the scatter can be larger than 4k (page) as of
* dma mapping which merge some blocks together.
*/
current_block_len += sg_dma_len(sg);
}
/* Account for the last block in the total len */
total_len += current_block_len;
/* Add to the first block the misalignment that it suffers from. */
total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
last_block_end = current_block_start + current_block_len;
last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift);
total_len += (last_block_aligned_end - last_block_end);
if (total_len & ((1ULL << block_shift) - 1ULL))
pr_warn("misaligned total length detected (%llu, %llu)!",
total_len, block_shift);
*num_of_mtts = total_len >> block_shift;
end:
if (block_shift < min_shift) {
/*
* If shift is less than the min we set a warning and return the
* min shift.
*/
pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift);
block_shift = min_shift;
}
return block_shift;
return 0;
}
static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start,
@ -424,6 +158,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
if (shift < 0) {
err = shift;
goto err_umem;
}
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);

View File

@ -925,8 +925,12 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
if (shift < 0) {
err = shift;
goto err_buf;
}
err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
if (err)
goto err_buf;
@ -1108,8 +1112,12 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
if (shift < 0) {
err = shift;
goto err_buf;
}
err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
if (err)
goto err_buf;

View File

@ -669,6 +669,12 @@ struct mlx5_ib_mkey {
#define mlx5_update_odp_stats(mr, counter_name, value) \
atomic64_add(value, &((mr)->odp_stats.counter_name))
#define mlx5_update_odp_stats_with_handled(mr, counter_name, value) \
do { \
mlx5_update_odp_stats(mr, counter_name, value); \
atomic64_add(1, &((mr)->odp_stats.counter_name##_handled)); \
} while (0)
struct mlx5_ib_mr {
struct ib_mr ibmr;
struct mlx5_ib_mkey mmkey;

View File

@ -313,7 +313,7 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
mlx5_update_odp_stats(mr, invalidations, invalidations);
mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations);
/*
* We are now sure that the device will not access the
@ -997,7 +997,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
if (ret < 0)
goto end;
mlx5_update_odp_stats(mr, faults, ret);
mlx5_update_odp_stats_with_handled(mr, faults, ret);
npages += ret;
ret = 0;
@ -1529,7 +1529,7 @@ static void mlx5_ib_mr_memory_pfault_handler(struct mlx5_ib_dev *dev,
goto err;
}
mlx5_update_odp_stats(mr, faults, ret);
mlx5_update_odp_stats_with_handled(mr, faults, ret);
mlx5r_deref_odp_mkey(mmkey);
if (pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST)

View File

@ -95,10 +95,19 @@ static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
if (rdma_nl_stat_hwcounter_entry(msg, "page_faults",
atomic64_read(&mr->odp_stats.faults)))
goto err_table;
if (rdma_nl_stat_hwcounter_entry(
msg, "page_faults_handled",
atomic64_read(&mr->odp_stats.faults_handled)))
goto err_table;
if (rdma_nl_stat_hwcounter_entry(
msg, "page_invalidations",
atomic64_read(&mr->odp_stats.invalidations)))
goto err_table;
if (rdma_nl_stat_hwcounter_entry(
msg, "page_invalidations_handled",
atomic64_read(&mr->odp_stats.invalidations_handled)))
goto err_table;
if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch",
atomic64_read(&mr->odp_stats.prefetch)))
goto err_table;

View File

@ -2256,7 +2256,9 @@ struct rdma_netdev_alloc_params {
struct ib_odp_counters {
atomic64_t faults;
atomic64_t faults_handled;
atomic64_t invalidations;
atomic64_t invalidations_handled;
atomic64_t prefetch;
};