v5.19 pull request

Small collection of incremental improvement patches:
 
 - Minor code cleanup patches, comment improvements, etc from static tools
 
 - Clean the some of the kernel caps, reducing the historical stealth uAPI
   leftovers
 
 - Bug fixes and minor changes for rdmavt, hns, rxe, irdma
 
 - Remove unimplemented cruft from rxe
 
 - Reorganize UMR QP code in mlx5 to avoid going through the IB verbs layer
 
 - flush_workqueue(system_unbound_wq) removal
 
 - Ensure rxe waits for objects to be unused before allowing the core to
   free them
 
 - Several rc quality bug fixes for hfi1
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCYo+NxgAKCRCFwuHvBreF
 YbqSAQDJ+QolaATUvOQUPLbuLopUCJLe95VS15Kl3SNXiVUUFAEA8DLL1s6+WShd
 AgypUxGHipx5BAytrn45/WiwuDeEbQ8=
 =jgTl
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "Small collection of incremental improvement patches:

   - Minor code cleanup patches, comment improvements, etc from static
     tools

   - Clean the some of the kernel caps, reducing the historical stealth
     uAPI leftovers

   - Bug fixes and minor changes for rdmavt, hns, rxe, irdma

   - Remove unimplemented cruft from rxe

   - Reorganize UMR QP code in mlx5 to avoid going through the IB verbs
     layer

   - flush_workqueue(system_unbound_wq) removal

   - Ensure rxe waits for objects to be unused before allowing the core
     to free them

   - Several rc quality bug fixes for hfi1"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (67 commits)
  RDMA/rtrs-clt: Fix one kernel-doc comment
  RDMA/hfi1: Remove all traces of diagpkt support
  RDMA/hfi1: Consolidate software versions
  RDMA/hfi1: Remove pointless driver version
  RDMA/hfi1: Fix potential integer multiplication overflow errors
  RDMA/hfi1: Prevent panic when SDMA is disabled
  RDMA/hfi1: Prevent use of lock before it is initialized
  RDMA/rxe: Fix an error handling path in rxe_get_mcg()
  IB/core: Fix typo in comment
  RDMA/core: Fix typo in comment
  IB/hf1: Fix typo in comment
  IB/qib: Fix typo in comment
  IB/iser: Fix typo in comment
  RDMA/mlx4: Avoid flush_scheduled_work() usage
  IB/isert: Avoid flush_scheduled_work() usage
  RDMA/mlx5: Remove duplicate pointer assignment in mlx5_ib_alloc_implicit_mr()
  RDMA/qedr: Remove unnecessary synchronize_irq() before free_irq()
  RDMA/hns: Use hr_reg_read() instead of remaining roce_get_xxx()
  RDMA/hns: Use hr_reg_xxx() instead of remaining roce_set_xxx()
  RDMA/irdma: Add SW mechanism to generate completions on error
  ...
This commit is contained in:
Linus Torvalds 2022-05-26 21:08:40 -07:00
commit 780d8ce716
88 changed files with 1960 additions and 1994 deletions

View File

@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
static struct workqueue_struct *ib_unreg_wq;
/*
* Each of the three rwsem locks (devices, clients, client_data) protects the
@ -1602,7 +1603,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
WARN_ON(!refcount_read(&ib_dev->refcount));
WARN_ON(!ib_dev->ops.dealloc_driver);
get_device(&ib_dev->dev);
if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work))
put_device(&ib_dev->dev);
}
EXPORT_SYMBOL(ib_unregister_device_queued);
@ -2751,27 +2752,28 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
static int __init ib_core_init(void)
{
int ret;
int ret = -ENOMEM;
ib_wq = alloc_workqueue("infiniband", 0, 0);
if (!ib_wq)
return -ENOMEM;
ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND,
WQ_UNBOUND_MAX_ACTIVE);
if (!ib_unreg_wq)
goto err;
ib_comp_wq = alloc_workqueue("ib-comp-wq",
WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
if (!ib_comp_wq) {
ret = -ENOMEM;
goto err;
}
if (!ib_comp_wq)
goto err_unbound;
ib_comp_unbound_wq =
alloc_workqueue("ib-comp-unb-wq",
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
if (!ib_comp_unbound_wq) {
ret = -ENOMEM;
if (!ib_comp_unbound_wq)
goto err_comp;
}
ret = class_register(&ib_class);
if (ret) {
@ -2831,6 +2833,8 @@ static int __init ib_core_init(void)
destroy_workqueue(ib_comp_unbound_wq);
err_comp:
destroy_workqueue(ib_comp_wq);
err_unbound:
destroy_workqueue(ib_unreg_wq);
err:
destroy_workqueue(ib_wq);
return ret;
@ -2852,7 +2856,7 @@ static void __exit ib_core_cleanup(void)
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
flush_workqueue(system_unbound_wq);
destroy_workqueue(ib_unreg_wq);
WARN_ON(!xa_empty(&clients));
WARN_ON(!xa_empty(&devices));
}

View File

@ -1739,7 +1739,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!device)
return -EINVAL;
if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
ib_device_put(device);
return -EINVAL;
}

View File

@ -1034,10 +1034,9 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_ext_ack *extack)
{
unsigned long flags;
struct ib_sa_query *query;
struct ib_sa_query *query = NULL, *iter;
struct ib_mad_send_buf *send_buf;
struct ib_mad_send_wc mad_send_wc;
int found = 0;
int ret;
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
@ -1045,20 +1044,21 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
list_for_each_entry(query, &ib_nl_request_list, list) {
list_for_each_entry(iter, &ib_nl_request_list, list) {
/*
* If the query is cancelled, let the timeout routine
* take care of it.
*/
if (nlh->nlmsg_seq == query->seq) {
found = !ib_sa_query_cancelled(query);
if (found)
list_del(&query->list);
if (nlh->nlmsg_seq == iter->seq) {
if (!ib_sa_query_cancelled(iter)) {
list_del(&iter->list);
query = iter;
}
break;
}
}
if (!found) {
if (!query) {
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
goto resp_out;
}

View File

@ -455,7 +455,7 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
break;
}
}
/* upon sucesss lock should stay on hold for the callee */
/* upon success lock should stay on hold for the callee */
if (!ret)
ret = dma_index - start_idx;
else

View File

@ -337,7 +337,7 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->hw_ver = attr->hw_ver;
resp->max_qp = attr->max_qp;
resp->max_qp_wr = attr->max_qp_wr;
resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge);
resp->max_sge_rd = attr->max_sge_rd;
resp->max_cq = attr->max_cq;

View File

@ -281,7 +281,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
}
rdma_restrack_add(&pd->res);
if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else
mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
@ -308,7 +308,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
pd->__internal_mr = mr;
if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY))
if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY))
pd->local_dma_lkey = pd->__internal_mr->lkey;
if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@ -2131,8 +2131,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mr *mr;
if (access_flags & IB_ACCESS_ON_DEMAND) {
if (!(pd->device->attrs.device_cap_flags &
IB_DEVICE_ON_DEMAND_PAGING)) {
if (!(pd->device->attrs.kernel_cap_flags &
IBK_ON_DEMAND_PAGING)) {
pr_debug("ODP support not available\n");
return ERR_PTR(-EINVAL);
}

View File

@ -146,13 +146,13 @@ int bnxt_re_query_device(struct ib_device *ibdev,
| IB_DEVICE_RC_RNR_NAK_GEN
| IB_DEVICE_SHUTDOWN_PORT
| IB_DEVICE_SYS_IMAGE_GUID
| IB_DEVICE_LOCAL_DMA_LKEY
| IB_DEVICE_RESIZE_MAX_WR
| IB_DEVICE_PORT_ACTIVE_EVENT
| IB_DEVICE_N_NOTIFY_CQ
| IB_DEVICE_MEM_WINDOW
| IB_DEVICE_MEM_WINDOW_TYPE_2B
| IB_DEVICE_MEM_MGT_EXTENSIONS;
ib_attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
ib_attr->max_send_sge = dev_attr->max_qp_sges;
ib_attr->max_recv_sge = dev_attr->max_qp_sges;
ib_attr->max_sge_rd = dev_attr->max_qp_sges;

View File

@ -314,7 +314,6 @@ enum db_state {
struct c4iw_dev {
struct ib_device ibdev;
struct c4iw_rdev rdev;
u32 device_cap_flags;
struct xarray cqs;
struct xarray qps;
struct xarray mrs;

View File

@ -269,7 +269,10 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
dev->rdev.lldi.ports[0]->dev_addr);
props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type);
props->fw_ver = dev->rdev.lldi.fw_vers;
props->device_cap_flags = dev->device_cap_flags;
props->device_cap_flags = IB_DEVICE_MEM_WINDOW;
props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
if (fastreg_support)
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
props->page_size_cap = T4_PAGESIZE_MASK;
props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor;
props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device;
@ -529,9 +532,6 @@ void c4iw_register_device(struct work_struct *work)
pr_debug("c4iw_dev %p\n", dev);
addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid,
dev->rdev.lldi.ports[0]->dev_addr);
dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
if (fastreg_support)
dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
dev->ibdev.local_dma_lkey = 0;
dev->ibdev.node_type = RDMA_NODE_RNIC;
BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);

View File

@ -137,61 +137,6 @@
#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \
HFI1_USER_SWMINOR)
#ifndef HFI1_KERN_TYPE
#define HFI1_KERN_TYPE 0
#endif
/*
* Similarly, this is the kernel version going back to the user. It's
* slightly different, in that we want to tell if the driver was built as
* part of a Intel release, or from the driver from openfabrics.org,
* kernel.org, or a standard distribution, for support reasons.
* The high bit is 0 for non-Intel and 1 for Intel-built/supplied.
*
* It's returned by the driver to the user code during initialization in the
* spi_sw_version field of hfi1_base_info, so the user code can in turn
* check for compatibility with the kernel.
*/
#define HFI1_KERN_SWVERSION ((HFI1_KERN_TYPE << 31) | HFI1_USER_SWVERSION)
/*
* Define the driver version number. This is something that refers only
* to the driver itself, not the software interfaces it supports.
*/
#ifndef HFI1_DRIVER_VERSION_BASE
#define HFI1_DRIVER_VERSION_BASE "0.9-294"
#endif
/* create the final driver version string */
#ifdef HFI1_IDSTR
#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE " " HFI1_IDSTR
#else
#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE
#endif
/*
* Diagnostics can send a packet by writing the following
* struct to the diag packet special file.
*
* This allows a custom PBC qword, so that special modes and deliberate
* changes to CRCs can be used.
*/
#define _DIAG_PKT_VERS 1
struct diag_pkt {
__u16 version; /* structure version */
__u16 unit; /* which device */
__u16 sw_index; /* send sw index to use */
__u16 len; /* data length, in bytes */
__u16 port; /* port number */
__u16 unused;
__u32 flags; /* call flags */
__u64 data; /* user data pointer */
__u64 pbc; /* PBC for the packet */
};
/* diag_pkt flags */
#define F_DIAGPKT_WAIT 0x1 /* wait until packet is sent */
/*
* The next set of defines are for packet headers, and chip register
* and memory bits that are visible to and/or used by user-mode software.

View File

@ -29,12 +29,6 @@
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
/*
* The size has to be longer than this string, so we can append
* board/chip information to it in the initialization code.
*/
const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n";
DEFINE_MUTEX(hfi1_mutex); /* general driver use */
unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;

View File

@ -72,7 +72,7 @@ static int read_efi_var(const char *name, unsigned long *size,
* is in the EFIVAR_FS code and may not be compiled in.
* However, even that is insufficient since it does not cover
* EFI_BUFFER_TOO_SMALL which could be an important return.
* For now, just split out succces or not found.
* For now, just split out success or not found.
*/
ret = status == EFI_SUCCESS ? 0 :
status == EFI_NOT_FOUND ? -ENOENT :

View File

@ -265,6 +265,8 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
unsigned long dim = from->nr_segs;
int idx;
if (!HFI1_CAP_IS_KSET(SDMA))
return -EINVAL;
idx = srcu_read_lock(&fd->pq_srcu);
pq = srcu_dereference(fd->pq, &fd->pq_srcu);
if (!cq || !pq) {
@ -1220,7 +1222,7 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
memset(&binfo, 0, sizeof(binfo));
binfo.hw_version = dd->revision;
binfo.sw_version = HFI1_KERN_SWVERSION;
binfo.sw_version = HFI1_USER_SWVERSION;
binfo.bthqp = RVT_KDETH_QP_PREFIX;
binfo.jkey = uctxt->jkey;
/*

View File

@ -489,7 +489,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd)
u16 shift, mult;
u64 src;
u32 current_egress_rate; /* Mbits /sec */
u32 max_pkt_time;
u64 max_pkt_time;
/*
* max_pkt_time is the maximum packet egress time in units
* of the fabric clock period 1/(805 MHz).

View File

@ -1288,11 +1288,13 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
kvfree(sde->tx_ring);
sde->tx_ring = NULL;
}
spin_lock_irq(&dd->sde_map_lock);
sdma_map_free(rcu_access_pointer(dd->sdma_map));
RCU_INIT_POINTER(dd->sdma_map, NULL);
spin_unlock_irq(&dd->sde_map_lock);
synchronize_rcu();
if (rcu_access_pointer(dd->sdma_map)) {
spin_lock_irq(&dd->sde_map_lock);
sdma_map_free(rcu_access_pointer(dd->sdma_map));
RCU_INIT_POINTER(dd->sdma_map, NULL);
spin_unlock_irq(&dd->sde_map_lock);
synchronize_rcu();
}
kfree(dd->per_sdma);
dd->per_sdma = NULL;

View File

@ -1300,8 +1300,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
IB_DEVICE_MEM_MGT_EXTENSIONS |
IB_DEVICE_RDMA_NETDEV_OPA;
IB_DEVICE_MEM_MGT_EXTENSIONS;
rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;

View File

@ -106,16 +106,6 @@ enum {
SERV_TYPE_XRC = 5,
};
enum hns_roce_qp_state {
HNS_ROCE_QP_STATE_RST,
HNS_ROCE_QP_STATE_INIT,
HNS_ROCE_QP_STATE_RTR,
HNS_ROCE_QP_STATE_RTS,
HNS_ROCE_QP_STATE_SQD,
HNS_ROCE_QP_STATE_ERR,
HNS_ROCE_QP_NUM_STATE,
};
enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_PATH_MIG = 0x01,
HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED = 0x02,
@ -139,8 +129,6 @@ enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17,
};
#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
enum {
HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
@ -535,6 +523,11 @@ struct hns_roce_cmd_context {
u16 busy;
};
enum hns_roce_cmdq_state {
HNS_ROCE_CMDQ_STATE_NORMAL,
HNS_ROCE_CMDQ_STATE_FATAL_ERR,
};
struct hns_roce_cmdq {
struct dma_pool *pool;
struct semaphore poll_sem;
@ -554,6 +547,7 @@ struct hns_roce_cmdq {
* close device, switch into poll mode(non event mode)
*/
u8 use_events;
enum hns_roce_cmdq_state state;
};
struct hns_roce_cmd_mailbox {
@ -657,6 +651,11 @@ struct hns_roce_ceqe {
__le32 rsv[15];
};
#define CEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ceqe, h, l)
#define CEQE_CQN CEQE_FIELD_LOC(23, 0)
#define CEQE_OWNER CEQE_FIELD_LOC(31, 31)
struct hns_roce_aeqe {
__le32 asyn;
union {
@ -676,6 +675,13 @@ struct hns_roce_aeqe {
__le32 rsv[12];
};
#define AEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_aeqe, h, l)
#define AEQE_EVENT_TYPE AEQE_FIELD_LOC(7, 0)
#define AEQE_SUB_TYPE AEQE_FIELD_LOC(15, 8)
#define AEQE_OWNER AEQE_FIELD_LOC(31, 31)
#define AEQE_EVENT_QUEUE_NUM AEQE_FIELD_LOC(55, 32)
struct hns_roce_eq {
struct hns_roce_dev *hr_dev;
void __iomem *db_reg;
@ -725,7 +731,6 @@ struct hns_roce_caps {
u32 num_pi_qps;
u32 reserved_qps;
int num_qpc_timer;
int num_cqc_timer;
u32 num_srqs;
u32 max_wqes;
u32 max_srq_wrs;
@ -1191,7 +1196,6 @@ void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n);
bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
struct ib_cq *ib_cq);
enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state);
void hns_roce_lock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq);
void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,

View File

@ -149,8 +149,7 @@ static void set_atomic_seg(const struct ib_send_wr *wr,
aseg->cmp_data = 0;
}
roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
}
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
@ -271,8 +270,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
dseg += sizeof(struct hns_roce_v2_rc_send_wqe);
if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) {
roce_set_bit(rc_sq_wqe->byte_20,
V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0);
hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
for (i = 0; i < wr->num_sge; i++) {
memcpy(dseg, ((void *)wr->sg_list[i].addr),
@ -280,17 +278,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
dseg += wr->sg_list[i].length;
}
} else {
roce_set_bit(rc_sq_wqe->byte_20,
V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1);
hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len);
if (ret)
return ret;
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
curr_idx - *sge_idx);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx);
}
*sge_idx = curr_idx;
@ -309,12 +303,10 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
int j = 0;
int i;
roce_set_field(rc_sq_wqe->byte_20,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
(*sge_ind) & (qp->sge.sge_cnt - 1));
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX,
(*sge_ind) & (qp->sge.sge_cnt - 1));
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE,
!!(wr->send_flags & IB_SEND_INLINE));
if (wr->send_flags & IB_SEND_INLINE)
return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind);
@ -339,9 +331,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
valid_num_sge - HNS_ROCE_SGE_IN_WQE);
}
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
return 0;
}
@ -412,8 +402,7 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
ud_sq_wqe->immtdata = get_immtdata(wr);
roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
return 0;
}
@ -424,21 +413,15 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
struct ib_device *ib_dev = ah->ibah.device;
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport);
roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit);
roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass);
roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel);
if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL))
return -EINVAL;
roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M,
V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl);
ud_sq_wqe->sgid_index = ah->av.gid_index;
@ -448,10 +431,8 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
return 0;
roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
ah->av.vlan_en);
roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M,
V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id);
return 0;
}
@ -476,27 +457,19 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S,
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE,
!!(wr->send_flags & IB_SEND_SIGNALED));
roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S,
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE,
!!(wr->send_flags & IB_SEND_SOLICITED));
roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M,
V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn);
roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
roce_set_field(ud_sq_wqe->byte_20,
V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
curr_idx & (qp->sge.sge_cnt - 1));
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX,
curr_idx & (qp->sge.sge_cnt - 1));
ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
qp->qkey : ud_wr(wr)->remote_qkey);
roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M,
V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn);
ret = fill_ud_av(ud_sq_wqe, ah);
if (ret)
@ -516,8 +489,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
dma_wmb();
*sge_idx = curr_idx;
roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S,
owner_bit);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit);
return 0;
}
@ -553,7 +525,7 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev,
ret = -EOPNOTSUPP;
break;
case IB_WR_LOCAL_INV:
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_SO);
fallthrough;
case IB_WR_SEND_WITH_INV:
rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
@ -565,11 +537,11 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev,
if (unlikely(ret))
return ret;
roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
return ret;
}
static inline int set_rc_wqe(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
void *wqe, unsigned int *sge_idx,
@ -590,13 +562,13 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
if (WARN_ON(ret))
return ret;
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S,
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE,
(wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S,
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE,
(wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S,
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE,
(wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@ -616,8 +588,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
dma_wmb();
*sge_idx = curr_idx;
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
owner_bit);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit);
return ret;
}
@ -682,14 +653,11 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
/* All kinds of DirectWQE have the same header field layout */
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1);
roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M,
V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl);
roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M,
V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S,
qp->sl >> HNS_ROCE_SL_SHIFT);
roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M,
V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H,
qp->sl >> HNS_ROCE_SL_SHIFT);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head);
hns_roce_write512(hr_dev, wqe, qp->sq.db_reg);
}
@ -1265,6 +1233,16 @@ static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev)
return tail == priv->cmq.csq.head;
}
static void update_cmdq_status(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
handle->rinfo.instance_state == HNS_ROCE_STATE_INIT)
hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
}
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
@ -1296,7 +1274,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
} while (++timeout < priv->cmq.tx_timeout);
if (hns_roce_cmq_csq_done(hr_dev)) {
for (ret = 0, i = 0; i < num; i++) {
ret = 0;
for (i = 0; i < num; i++) {
/* check the result of hardware write back */
desc[i] = csq->desc[tail++];
if (tail == csq->desc_num)
@ -1318,6 +1297,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
csq->head, tail);
csq->head = tail;
update_cmdq_status(hr_dev);
ret = -EAGAIN;
}
@ -1332,6 +1313,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
bool busy;
int ret;
if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
return -EIO;
if (!v2_chk_mbox_is_avail(hr_dev, &busy))
return busy ? -EBUSY : 0;
@ -1499,7 +1483,7 @@ static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id)
if (ret)
continue;
if (roce_get_bit(resp->func_done, FUNC_CLEAR_RST_FUN_DONE_S)) {
if (hr_reg_read(resp, FUNC_CLEAR_RST_FUN_DONE)) {
if (vf_id == 0)
hr_dev->is_reset = true;
return;
@ -1510,7 +1494,7 @@ static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id)
hns_roce_func_clr_rst_proc(hr_dev, ret, fclr_write_fail_flag);
}
static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
static int hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
{
enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES;
struct hns_roce_cmq_desc desc[2];
@ -1521,17 +1505,29 @@ static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
hr_reg_write(req_a, FUNC_RES_A_VF_ID, vf_id);
hns_roce_cmq_send(hr_dev, desc, 2);
return hns_roce_cmq_send(hr_dev, desc, 2);
}
static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
{
int ret;
int i;
if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
return;
for (i = hr_dev->func_num - 1; i >= 0; i--) {
__hns_roce_function_clear(hr_dev, i);
if (i != 0)
hns_roce_free_vf_resource(hr_dev, i);
if (i == 0)
continue;
ret = hns_roce_free_vf_resource(hr_dev, i);
if (ret)
ibdev_err(&hr_dev->ib_dev,
"failed to free vf resource, vf_id = %d, ret = %d.\n",
i, ret);
}
}
@ -1757,17 +1753,16 @@ static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
swt = (struct hns_roce_vf_switch *)desc.data;
hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M,
VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id);
hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret)
return ret;
desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0);
roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
hr_reg_enable(swt, VF_SWITCH_ALW_LPBK);
hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK);
hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
@ -1947,7 +1942,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM;
caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM;
caps->cqc_timer_bt_num = HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM;
caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
@ -2243,7 +2238,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg);
caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer);
caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer);
caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
caps->num_aeq_vectors = resp_a->num_aeq_vectors;
@ -2270,87 +2264,39 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
ctx_hop_num = resp_b->ctx_hop_num;
pbl_hop_num = resp_b->pbl_hop_num;
caps->num_pds = 1 << roce_get_field(resp_c->cap_flags_num_pds,
V2_QUERY_PF_CAPS_C_NUM_PDS_M,
V2_QUERY_PF_CAPS_C_NUM_PDS_S);
caps->flags = roce_get_field(resp_c->cap_flags_num_pds,
V2_QUERY_PF_CAPS_C_CAP_FLAGS_M,
V2_QUERY_PF_CAPS_C_CAP_FLAGS_S);
caps->num_pds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_PDS);
caps->flags = hr_reg_read(resp_c, PF_CAPS_C_CAP_FLAGS);
caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) <<
HNS_ROCE_CAP_FLAGS_EX_SHIFT;
caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs,
V2_QUERY_PF_CAPS_C_NUM_CQS_M,
V2_QUERY_PF_CAPS_C_NUM_CQS_S);
caps->gid_table_len[0] = roce_get_field(resp_c->max_gid_num_cqs,
V2_QUERY_PF_CAPS_C_MAX_GID_M,
V2_QUERY_PF_CAPS_C_MAX_GID_S);
caps->max_cqes = 1 << roce_get_field(resp_c->cq_depth,
V2_QUERY_PF_CAPS_C_CQ_DEPTH_M,
V2_QUERY_PF_CAPS_C_CQ_DEPTH_S);
caps->num_mtpts = 1 << roce_get_field(resp_c->num_mrws,
V2_QUERY_PF_CAPS_C_NUM_MRWS_M,
V2_QUERY_PF_CAPS_C_NUM_MRWS_S);
caps->num_qps = 1 << roce_get_field(resp_c->ord_num_qps,
V2_QUERY_PF_CAPS_C_NUM_QPS_M,
V2_QUERY_PF_CAPS_C_NUM_QPS_S);
caps->max_qp_init_rdma = roce_get_field(resp_c->ord_num_qps,
V2_QUERY_PF_CAPS_C_MAX_ORD_M,
V2_QUERY_PF_CAPS_C_MAX_ORD_S);
caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS);
caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID);
caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH);
caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS);
caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS);
caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD);
caps->max_qp_dest_rdma = caps->max_qp_init_rdma;
caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
caps->num_srqs = 1 << roce_get_field(resp_d->wq_hop_num_max_srqs,
V2_QUERY_PF_CAPS_D_NUM_SRQS_M,
V2_QUERY_PF_CAPS_D_NUM_SRQS_S);
caps->cong_type = roce_get_field(resp_d->wq_hop_num_max_srqs,
V2_QUERY_PF_CAPS_D_CONG_TYPE_M,
V2_QUERY_PF_CAPS_D_CONG_TYPE_S);
caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE);
caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
caps->default_aeq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_AEQ_ARM_ST);
caps->default_ceq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_CEQ_ARM_ST);
caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
caps->reserved_uars = hr_reg_read(resp_d, PF_CAPS_D_RSV_UARS);
caps->ceqe_depth = 1 << roce_get_field(resp_d->num_ceqs_ceq_depth,
V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M,
V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S);
caps->num_comp_vectors = roce_get_field(resp_d->num_ceqs_ceq_depth,
V2_QUERY_PF_CAPS_D_NUM_CEQS_M,
V2_QUERY_PF_CAPS_D_NUM_CEQS_S);
caps->aeqe_depth = 1 << roce_get_field(resp_d->arm_st_aeq_depth,
V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M,
V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S);
caps->default_aeq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth,
V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M,
V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S);
caps->default_ceq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth,
V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M,
V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S);
caps->reserved_pds = roce_get_field(resp_d->num_uars_rsv_pds,
V2_QUERY_PF_CAPS_D_RSV_PDS_M,
V2_QUERY_PF_CAPS_D_RSV_PDS_S);
caps->num_uars = 1 << roce_get_field(resp_d->num_uars_rsv_pds,
V2_QUERY_PF_CAPS_D_NUM_UARS_M,
V2_QUERY_PF_CAPS_D_NUM_UARS_S);
caps->reserved_qps = roce_get_field(resp_d->rsv_uars_rsv_qps,
V2_QUERY_PF_CAPS_D_RSV_QPS_M,
V2_QUERY_PF_CAPS_D_RSV_QPS_S);
caps->reserved_uars = roce_get_field(resp_d->rsv_uars_rsv_qps,
V2_QUERY_PF_CAPS_D_RSV_UARS_M,
V2_QUERY_PF_CAPS_D_RSV_UARS_S);
caps->reserved_mrws = roce_get_field(resp_e->chunk_size_shift_rsv_mrws,
V2_QUERY_PF_CAPS_E_RSV_MRWS_M,
V2_QUERY_PF_CAPS_E_RSV_MRWS_S);
caps->chunk_sz = 1 << roce_get_field(resp_e->chunk_size_shift_rsv_mrws,
V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M,
V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S);
caps->reserved_cqs = roce_get_field(resp_e->rsv_cqs,
V2_QUERY_PF_CAPS_E_RSV_CQS_M,
V2_QUERY_PF_CAPS_E_RSV_CQS_S);
caps->reserved_srqs = roce_get_field(resp_e->rsv_srqs,
V2_QUERY_PF_CAPS_E_RSV_SRQS_M,
V2_QUERY_PF_CAPS_E_RSV_SRQS_S);
caps->reserved_lkey = roce_get_field(resp_e->rsv_lkey,
V2_QUERY_PF_CAPS_E_RSV_LKEYS_M,
V2_QUERY_PF_CAPS_E_RSV_LKEYS_S);
caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS);
caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT);
caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS);
caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS);
caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS);
caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt);
caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period);
caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt);
@ -2365,15 +2311,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->cqe_hop_num = pbl_hop_num;
caps->srqwqe_hop_num = pbl_hop_num;
caps->idx_hop_num = pbl_hop_num;
caps->wqe_sq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M,
V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S);
caps->wqe_sge_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M,
V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S);
caps->wqe_rq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M,
V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S);
caps->wqe_sq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_SQWQE_HOP_NUM);
caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM);
caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM);
return 0;
}
@ -3000,6 +2940,9 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout,
mb_st = (struct hns_roce_mbox_status *)desc.data;
end = msecs_to_jiffies(timeout) + jiffies;
while (v2_chk_mbox_is_avail(hr_dev, &busy)) {
if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
return -EIO;
status = 0;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST,
true);
@ -3103,10 +3046,8 @@ static int config_sgid_table(struct hns_roce_dev *hr_dev,
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M,
CFG_SGID_TB_TABLE_IDX_S, gid_index);
roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M,
CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index);
hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type);
copy_gid(&sgid_tb->vf_sgid_l, gid);
@ -3141,19 +3082,14 @@ static int config_gmv_table(struct hns_roce_dev *hr_dev,
copy_gid(&tb_a->vf_sgid_l, gid);
roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M,
CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type);
roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S,
vlan_id < VLAN_CFI_MASK);
roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M,
CFG_GMV_TB_VF_VLAN_ID_S, vlan_id);
hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type);
hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK);
hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id);
tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac);
roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M,
CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]);
roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M,
CFG_GMV_TB_SGID_IDX_S, gid_index);
hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]);
hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index);
return hns_roce_cmq_send(hr_dev, desc, 2);
}
@ -3202,10 +3138,8 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
reg_smac_l = *(u32 *)(&addr[0]);
reg_smac_h = *(u16 *)(&addr[4]);
roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M,
CFG_SMAC_TB_IDX_S, phy_port);
roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M,
CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port);
hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h);
smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l);
return hns_roce_cmq_send(hr_dev, &desc, 1);
@ -3234,21 +3168,15 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
roce_set_field(mpt_entry->byte_48_mode_ba,
V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S,
upper_32_bits(pbl_ba >> 3));
hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M,
V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0]));
hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0]));
mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1]));
roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M,
V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1]));
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1]));
hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
return 0;
}
@ -3257,7 +3185,6 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
void *mb_buf, struct hns_roce_mr *mr)
{
struct hns_roce_v2_mpt_entry *mpt_entry;
int ret;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
@ -3296,9 +3223,7 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD);
ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
return ret;
return set_mtpt_pbl(hr_dev, mpt_entry, mr);
}
static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
@ -3309,24 +3234,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
u32 mr_access_flags = mr->access;
int ret = 0;
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
hr_reg_write(mpt_entry, MPT_PD, mr->pd);
if (flags & IB_MR_REREG_ACCESS) {
roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
V2_MPT_BYTE_8_BIND_EN_S,
hr_reg_write(mpt_entry, MPT_BIND_EN,
(mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0));
roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
V2_MPT_BYTE_8_ATOMIC_EN_S,
hr_reg_write(mpt_entry, MPT_ATOMIC_EN,
mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
hr_reg_write(mpt_entry, MPT_RR_EN,
mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
hr_reg_write(mpt_entry, MPT_RW_EN,
mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S,
hr_reg_write(mpt_entry, MPT_LW_EN,
mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
}
@ -3357,37 +3277,28 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
return -ENOBUFS;
}
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
hr_reg_write(mpt_entry, MPT_PD, mr->pd);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
hr_reg_enable(mpt_entry, MPT_RA_EN);
hr_reg_enable(mpt_entry, MPT_R_INV_EN);
hr_reg_enable(mpt_entry, MPT_L_INV_EN);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
hr_reg_enable(mpt_entry, MPT_FRE);
hr_reg_clear(mpt_entry, MPT_MR_MW);
hr_reg_enable(mpt_entry, MPT_BPD);
hr_reg_clear(mpt_entry, MPT_PA);
hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1);
hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3));
roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
V2_MPT_BYTE_48_PBL_BA_H_S,
upper_32_bits(pbl_ba >> 3));
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
return 0;
}
@ -3399,36 +3310,29 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mw->pdn);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
V2_MPT_BYTE_4_PBL_HOP_NUM_S,
mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
mw->pbl_hop_num);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
hr_reg_write(mpt_entry, MPT_PD, mw->pdn);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 1);
hr_reg_enable(mpt_entry, MPT_R_INV_EN);
hr_reg_enable(mpt_entry, MPT_L_INV_EN);
hr_reg_enable(mpt_entry, MPT_LW_EN);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
hr_reg_enable(mpt_entry, MPT_MR_MW);
hr_reg_enable(mpt_entry, MPT_BPD);
hr_reg_clear(mpt_entry, MPT_PA);
hr_reg_write(mpt_entry, MPT_BQP,
mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
mpt_entry->lkey = cpu_to_le32(mw->rkey);
hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM,
mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
mw->pbl_hop_num);
hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
return 0;
}
@ -4966,9 +4870,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
if (ret)
return ret;
if (gid_attr)
is_udp = (gid_attr->gid_type ==
IB_GID_TYPE_ROCE_UDP_ENCAP);
is_udp = (gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
}
/* Only HIP08 needs to set the vlan_en bits in QPC */
@ -5949,7 +5851,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
(eq->cons_index & (eq->entries - 1)) *
eq->eqe_size);
return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
return (hr_reg_read(aeqe, AEQE_OWNER) ^
!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
}
@ -5969,15 +5871,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
*/
dma_rmb();
event_type = roce_get_field(aeqe->asyn,
HNS_ROCE_V2_AEQE_EVENT_TYPE_M,
HNS_ROCE_V2_AEQE_EVENT_TYPE_S);
sub_type = roce_get_field(aeqe->asyn,
HNS_ROCE_V2_AEQE_SUB_TYPE_M,
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
queue_num = roce_get_field(aeqe->event.queue_event.num,
HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
event_type = hr_reg_read(aeqe, AEQE_EVENT_TYPE);
sub_type = hr_reg_read(aeqe, AEQE_SUB_TYPE);
queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@ -6037,8 +5933,8 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
(eq->cons_index & (eq->entries - 1)) *
eq->eqe_size);
return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
return (hr_reg_read(ceqe, CEQE_OWNER) ^
!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
@ -6054,8 +5950,7 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
*/
dma_rmb();
cqn = roce_get_field(ceqe->comp, HNS_ROCE_V2_CEQE_COMP_CQN_M,
HNS_ROCE_V2_CEQE_COMP_CQN_S);
cqn = hr_reg_read(ceqe, CEQE_CQN);
hns_roce_cq_completion(hr_dev, cqn);

View File

@ -41,7 +41,7 @@
#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
#define HNS_ROCE_V2_MAX_SRQ_SGE 64
#define HNS_ROCE_V2_MAX_CQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100
#define HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM 0x100
#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQE_NUM 0x400000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 64
@ -303,33 +303,6 @@ struct hns_roce_v2_cq_context {
#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
#define V2_CQC_BYTE_4_ARM_ST_S 6
#define V2_CQC_BYTE_4_ARM_ST_M GENMASK(7, 6)
#define V2_CQC_BYTE_4_CEQN_S 15
#define V2_CQC_BYTE_4_CEQN_M GENMASK(23, 15)
#define V2_CQC_BYTE_8_CQN_S 0
#define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0)
#define V2_CQC_BYTE_16_CQE_HOP_NUM_S 30
#define V2_CQC_BYTE_16_CQE_HOP_NUM_M GENMASK(31, 30)
#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S 0
#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M GENMASK(23, 0)
#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S 0
#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M GENMASK(23, 0)
#define V2_CQC_BYTE_52_CQE_CNT_S 0
#define V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0)
#define V2_CQC_BYTE_56_CQ_MAX_CNT_S 0
#define V2_CQC_BYTE_56_CQ_MAX_CNT_M GENMASK(15, 0)
#define V2_CQC_BYTE_56_CQ_PERIOD_S 16
#define V2_CQC_BYTE_56_CQ_PERIOD_M GENMASK(31, 16)
#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l)
#define CQC_CQ_ST CQC_FIELD_LOC(1, 0)
@ -788,12 +761,15 @@ struct hns_roce_v2_mpt_entry {
#define MPT_LKEY MPT_FIELD_LOC(223, 192)
#define MPT_VA MPT_FIELD_LOC(287, 224)
#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288)
#define MPT_PBL_BA MPT_FIELD_LOC(380, 320)
#define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320)
#define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352)
#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381)
#define MPT_RSV0 MPT_FIELD_LOC(383, 382)
#define MPT_PA0 MPT_FIELD_LOC(441, 384)
#define MPT_PA0_L MPT_FIELD_LOC(415, 384)
#define MPT_PA0_H MPT_FIELD_LOC(441, 416)
#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442)
#define MPT_PA1 MPT_FIELD_LOC(505, 448)
#define MPT_PA1_L MPT_FIELD_LOC(479, 448)
#define MPT_PA1_H MPT_FIELD_LOC(505, 480)
#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506)
#define MPT_RSV2 MPT_FIELD_LOC(507, 507)
#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508)
@ -899,48 +875,24 @@ struct hns_roce_v2_ud_send_wqe {
u8 dgid[GID_LEN_V2];
};
#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0
#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
#define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l)
#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7
#define V2_UD_SEND_WQE_BYTE_4_CQE_S 8
#define V2_UD_SEND_WQE_BYTE_4_SE_S 11
#define V2_UD_SEND_WQE_BYTE_16_PD_S 0
#define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0)
#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24
#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16
#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16)
#define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0
#define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0)
#define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0
#define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0)
#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16
#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16)
#define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24
#define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24)
#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0
#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0)
#define V2_UD_SEND_WQE_BYTE_40_SL_S 20
#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20)
#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
#define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0)
#define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7)
#define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8)
#define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11)
#define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96)
#define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120)
#define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128)
#define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176)
#define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224)
#define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256)
#define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272)
#define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280)
#define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288)
#define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308)
#define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318)
#define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319)
struct hns_roce_v2_rc_send_wqe {
__le32 byte_4;
@ -955,42 +907,23 @@ struct hns_roce_v2_rc_send_wqe {
__le64 va;
};
#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0
#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
#define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l)
#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5
#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13
#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15
#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7
#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8
#define V2_RC_SEND_WQE_BYTE_4_FENCE_S 9
#define V2_RC_SEND_WQE_BYTE_4_SO_S 10
#define V2_RC_SEND_WQE_BYTE_4_SE_S 11
#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S 24
#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31
#define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0)
#define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5)
#define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13)
#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10)
#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
#define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31)
#define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96)
#define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120)
#define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128)
#define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159)
struct hns_roce_wqe_frmr_seg {
__le32 pbl_size;
@ -1033,7 +966,10 @@ struct hns_roce_func_clear {
__le32 rsv[4];
};
#define FUNC_CLEAR_RST_FUN_DONE_S 0
#define FUNC_CLEAR_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_func_clear, h, l)
#define FUNC_CLEAR_RST_FUN_DONE FUNC_CLEAR_FIELD_LOC(32, 32)
/* Each physical function manages up to 248 virtual functions, it takes up to
* 100ms for each function to execute clear. If an abnormal reset occurs, it is
* executed twice at most, so it takes up to 249 * 2 * 100ms.
@ -1112,12 +1048,12 @@ struct hns_roce_vf_switch {
__le32 resv3;
};
#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3
#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3)
#define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l)
#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1
#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2
#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3
#define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35)
#define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65)
#define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66)
#define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67)
struct hns_roce_post_mbox {
__le32 in_param_l;
@ -1180,11 +1116,10 @@ struct hns_roce_cfg_sgid_tb {
__le32 vf_sgid_type_rsv;
};
#define CFG_SGID_TB_TABLE_IDX_S 0
#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
#define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l)
#define CFG_SGID_TB_VF_SGID_TYPE_S 0
#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0)
#define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0)
#define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160)
struct hns_roce_cfg_smac_tb {
__le32 tb_idx_rsv;
@ -1192,11 +1127,11 @@ struct hns_roce_cfg_smac_tb {
__le32 vf_smac_h_rsv;
__le32 rsv[3];
};
#define CFG_SMAC_TB_IDX_S 0
#define CFG_SMAC_TB_IDX_M GENMASK(7, 0)
#define CFG_SMAC_TB_VF_SMAC_H_S 0
#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
#define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l)
#define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0)
#define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64)
struct hns_roce_cfg_gmv_tb_a {
__le32 vf_sgid_l;
@ -1207,16 +1142,11 @@ struct hns_roce_cfg_gmv_tb_a {
__le32 resv;
};
#define CFG_GMV_TB_SGID_IDX_S 0
#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0)
#define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l)
#define CFG_GMV_TB_VF_SGID_TYPE_S 0
#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0)
#define CFG_GMV_TB_VF_VLAN_EN_S 2
#define CFG_GMV_TB_VF_VLAN_ID_S 16
#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16)
#define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128)
#define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130)
#define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144)
struct hns_roce_cfg_gmv_tb_b {
__le32 vf_smac_l;
@ -1225,8 +1155,10 @@ struct hns_roce_cfg_gmv_tb_b {
__le32 resv[3];
};
#define CFG_GMV_TB_SMAC_H_S 0
#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0)
#define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l)
#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32)
#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64)
#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5
struct hns_roce_query_pf_caps_a {
@ -1278,29 +1210,17 @@ struct hns_roce_query_pf_caps_c {
__le16 rq_depth;
};
#define V2_QUERY_PF_CAPS_C_NUM_PDS_S 0
#define V2_QUERY_PF_CAPS_C_NUM_PDS_M GENMASK(19, 0)
#define PF_CAPS_C_FIELD_LOC(h, l) \
FIELD_LOC(struct hns_roce_query_pf_caps_c, h, l)
#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_S 20
#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_M GENMASK(31, 20)
#define V2_QUERY_PF_CAPS_C_NUM_CQS_S 0
#define V2_QUERY_PF_CAPS_C_NUM_CQS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_C_MAX_GID_S 20
#define V2_QUERY_PF_CAPS_C_MAX_GID_M GENMASK(28, 20)
#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_S 0
#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_M GENMASK(22, 0)
#define V2_QUERY_PF_CAPS_C_NUM_MRWS_S 0
#define V2_QUERY_PF_CAPS_C_NUM_MRWS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_C_NUM_QPS_S 0
#define V2_QUERY_PF_CAPS_C_NUM_QPS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_C_MAX_ORD_S 20
#define V2_QUERY_PF_CAPS_C_MAX_ORD_M GENMASK(27, 20)
#define PF_CAPS_C_NUM_PDS PF_CAPS_C_FIELD_LOC(19, 0)
#define PF_CAPS_C_CAP_FLAGS PF_CAPS_C_FIELD_LOC(31, 20)
#define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32)
#define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52)
#define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64)
#define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96)
#define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128)
#define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148)
struct hns_roce_query_pf_caps_d {
__le32 wq_hop_num_max_srqs;
@ -1311,20 +1231,26 @@ struct hns_roce_query_pf_caps_d {
__le32 num_uars_rsv_pds;
__le32 rsv_uars_rsv_qps;
};
#define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0
#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20
#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20)
#define PF_CAPS_D_FIELD_LOC(h, l) \
FIELD_LOC(struct hns_roce_query_pf_caps_d, h, l)
#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S 22
#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M GENMASK(23, 22)
#define PF_CAPS_D_NUM_SRQS PF_CAPS_D_FIELD_LOC(19, 0)
#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26)
#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
#define PF_CAPS_D_RSV_UARS PF_CAPS_D_FIELD_LOC(187, 180)
#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S 24
#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M GENMASK(25, 24)
#define V2_QUERY_PF_CAPS_D_CONG_TYPE_S 26
#define V2_QUERY_PF_CAPS_D_CONG_TYPE_M GENMASK(29, 26)
#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
struct hns_roce_congestion_algorithm {
u8 alg_sel;
@ -1333,33 +1259,6 @@ struct hns_roce_congestion_algorithm {
u8 wnd_mode_sel;
};
#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S 0
#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M GENMASK(21, 0)
#define V2_QUERY_PF_CAPS_D_NUM_CEQS_S 22
#define V2_QUERY_PF_CAPS_D_NUM_CEQS_M GENMASK(31, 22)
#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S 0
#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M GENMASK(21, 0)
#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S 22
#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M GENMASK(23, 22)
#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S 24
#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M GENMASK(25, 24)
#define V2_QUERY_PF_CAPS_D_RSV_PDS_S 0
#define V2_QUERY_PF_CAPS_D_RSV_PDS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_D_NUM_UARS_S 20
#define V2_QUERY_PF_CAPS_D_NUM_UARS_M GENMASK(27, 20)
#define V2_QUERY_PF_CAPS_D_RSV_QPS_S 0
#define V2_QUERY_PF_CAPS_D_RSV_QPS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_D_RSV_UARS_S 20
#define V2_QUERY_PF_CAPS_D_RSV_UARS_M GENMASK(27, 20)
struct hns_roce_query_pf_caps_e {
__le32 chunk_size_shift_rsv_mrws;
__le32 rsv_cqs;
@ -1371,20 +1270,14 @@ struct hns_roce_query_pf_caps_e {
__le16 aeq_period;
};
#define V2_QUERY_PF_CAPS_E_RSV_MRWS_S 0
#define V2_QUERY_PF_CAPS_E_RSV_MRWS_M GENMASK(19, 0)
#define PF_CAPS_E_FIELD_LOC(h, l) \
FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l)
#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S 20
#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M GENMASK(31, 20)
#define V2_QUERY_PF_CAPS_E_RSV_CQS_S 0
#define V2_QUERY_PF_CAPS_E_RSV_CQS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_E_RSV_SRQS_S 0
#define V2_QUERY_PF_CAPS_E_RSV_SRQS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_S 0
#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_M GENMASK(19, 0)
#define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0)
#define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20)
#define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32)
#define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64)
#define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96)
struct hns_roce_cmq_req {
__le32 data[6];
@ -1485,9 +1378,6 @@ struct hns_roce_dip {
#define HNS_ROCE_EQ_INIT_CONS_IDX 0
#define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0
#define HNS_ROCE_V2_CEQ_CEQE_OWNER_S 31
#define HNS_ROCE_V2_AEQ_AEQE_OWNER_S 31
#define HNS_ROCE_V2_COMP_EQE_NUM 0x1000
#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
@ -1544,18 +1434,6 @@ struct hns_roce_eq_context {
#define EQC_NEX_EQE_BA_H EQC_FIELD_LOC(339, 320)
#define EQC_EQE_SIZE EQC_FIELD_LOC(341, 340)
#define HNS_ROCE_V2_CEQE_COMP_CQN_S 0
#define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0)
#define HNS_ROCE_V2_AEQE_EVENT_TYPE_S 0
#define HNS_ROCE_V2_AEQE_EVENT_TYPE_M GENMASK(7, 0)
#define HNS_ROCE_V2_AEQE_SUB_TYPE_S 8
#define HNS_ROCE_V2_AEQE_SUB_TYPE_M GENMASK(15, 8)
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
#define MAX_SERVICE_LEVEL 0x7
struct hns_roce_wqe_atomic_seg {

View File

@ -737,7 +737,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table,
HEM_TYPE_CQC_TIMER,
hr_dev->caps.cqc_timer_entry_sz,
hr_dev->caps.num_cqc_timer, 1);
hr_dev->caps.cqc_timer_bt_num, 1);
if (ret) {
dev_err(dev,
"Failed to init CQC timer memory, aborting.\n");

View File

@ -340,7 +340,6 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
struct hns_roce_mr *mr = to_hr_mr(ibmr);
int ret = 0;
if (hr_dev->hw->dereg_mr)
hr_dev->hw->dereg_mr(hr_dev);
@ -348,7 +347,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
hns_roce_mr_free(hr_dev, mr);
kfree(mr);
return ret;
return 0;
}
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,

View File

@ -243,26 +243,6 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
return 0;
}
enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
{
switch (state) {
case IB_QPS_RESET:
return HNS_ROCE_QP_STATE_RST;
case IB_QPS_INIT:
return HNS_ROCE_QP_STATE_INIT;
case IB_QPS_RTR:
return HNS_ROCE_QP_STATE_RTR;
case IB_QPS_RTS:
return HNS_ROCE_QP_STATE_RTS;
case IB_QPS_SQD:
return HNS_ROCE_QP_STATE_SQD;
case IB_QPS_ERR:
return HNS_ROCE_QP_STATE_ERR;
default:
return HNS_ROCE_QP_NUM_STATE;
}
}
static void add_qp_to_list(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
struct ib_cq *send_cq, struct ib_cq *recv_cq)

View File

@ -13,61 +13,40 @@ static int hns_roce_fill_cq(struct sk_buff *msg,
struct hns_roce_v2_cq_context *context)
{
if (rdma_nl_put_driver_u32(msg, "state",
roce_get_field(context->byte_4_pg_ceqn,
V2_CQC_BYTE_4_ARM_ST_M,
V2_CQC_BYTE_4_ARM_ST_S)))
hr_reg_read(context, CQC_ARM_ST)))
goto err;
if (rdma_nl_put_driver_u32(msg, "ceqn",
roce_get_field(context->byte_4_pg_ceqn,
V2_CQC_BYTE_4_CEQN_M,
V2_CQC_BYTE_4_CEQN_S)))
hr_reg_read(context, CQC_CEQN)))
goto err;
if (rdma_nl_put_driver_u32(msg, "cqn",
roce_get_field(context->byte_8_cqn,
V2_CQC_BYTE_8_CQN_M,
V2_CQC_BYTE_8_CQN_S)))
hr_reg_read(context, CQC_CQN)))
goto err;
if (rdma_nl_put_driver_u32(msg, "hopnum",
roce_get_field(context->byte_16_hop_addr,
V2_CQC_BYTE_16_CQE_HOP_NUM_M,
V2_CQC_BYTE_16_CQE_HOP_NUM_S)))
hr_reg_read(context, CQC_CQE_HOP_NUM)))
goto err;
if (rdma_nl_put_driver_u32(
msg, "pi",
roce_get_field(context->byte_28_cq_pi,
V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M,
V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S)))
if (rdma_nl_put_driver_u32(msg, "pi",
hr_reg_read(context, CQC_CQ_PRODUCER_IDX)))
goto err;
if (rdma_nl_put_driver_u32(
msg, "ci",
roce_get_field(context->byte_32_cq_ci,
V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M,
V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S)))
if (rdma_nl_put_driver_u32(msg, "ci",
hr_reg_read(context, CQC_CQ_CONSUMER_IDX)))
goto err;
if (rdma_nl_put_driver_u32(
msg, "coalesce",
roce_get_field(context->byte_56_cqe_period_maxcnt,
V2_CQC_BYTE_56_CQ_MAX_CNT_M,
V2_CQC_BYTE_56_CQ_MAX_CNT_S)))
if (rdma_nl_put_driver_u32(msg, "coalesce",
hr_reg_read(context, CQC_CQ_MAX_CNT)))
goto err;
if (rdma_nl_put_driver_u32(
msg, "period",
roce_get_field(context->byte_56_cqe_period_maxcnt,
V2_CQC_BYTE_56_CQ_PERIOD_M,
V2_CQC_BYTE_56_CQ_PERIOD_S)))
if (rdma_nl_put_driver_u32(msg, "period",
hr_reg_read(context, CQC_CQ_PERIOD)))
goto err;
if (rdma_nl_put_driver_u32(msg, "cnt",
roce_get_field(context->byte_52_cqe_cnt,
V2_CQC_BYTE_52_CQE_CNT_M,
V2_CQC_BYTE_52_CQE_CNT_S)))
hr_reg_read(context, CQC_CQE_CNT)))
goto err;
return 0;

View File

@ -61,7 +61,7 @@ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq)
struct irdma_cq *cq = iwcq->back_cq;
if (!cq->user_mode)
cq->armed = false;
atomic_set(&cq->armed, 0);
if (cq->ibcq.comp_handler)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
@ -1827,10 +1827,6 @@ int irdma_rt_init_hw(struct irdma_device *iwdev,
rf->rsrc_created = true;
}
iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_MGT_EXTENSIONS;
if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
irdma_alloc_set_mac(iwdev);
irdma_add_ip(iwdev);
@ -2693,24 +2689,29 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
info.sq = flush_mask & IRDMA_FLUSH_SQ;
info.rq = flush_mask & IRDMA_FLUSH_RQ;
if (flush_mask & IRDMA_REFLUSH) {
if (info.sq)
iwqp->sc_qp.flush_sq = false;
if (info.rq)
iwqp->sc_qp.flush_rq = false;
}
/* Generate userflush errors in CQE */
info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR;
info.sq_minor_code = FLUSH_GENERAL_ERR;
info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR;
info.rq_minor_code = FLUSH_GENERAL_ERR;
info.userflushcode = true;
if (flush_code) {
if (info.sq && iwqp->sc_qp.sq_flush_code)
info.sq_minor_code = flush_code;
if (info.rq && iwqp->sc_qp.rq_flush_code)
info.rq_minor_code = flush_code;
if (flush_mask & IRDMA_REFLUSH) {
if (info.sq)
iwqp->sc_qp.flush_sq = false;
if (info.rq)
iwqp->sc_qp.flush_rq = false;
} else {
if (flush_code) {
if (info.sq && iwqp->sc_qp.sq_flush_code)
info.sq_minor_code = flush_code;
if (info.rq && iwqp->sc_qp.rq_flush_code)
info.rq_minor_code = flush_code;
}
if (!iwqp->user_mode)
queue_delayed_work(iwqp->iwdev->cleanup_wq,
&iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
/* Issue flush */

View File

@ -338,7 +338,6 @@ struct irdma_device {
u32 roce_ackcreds;
u32 vendor_id;
u32 vendor_part_id;
u32 device_cap_flags;
u32 push_mode;
u32 rcv_wnd;
u16 mac_ip_table_idx;

View File

@ -191,7 +191,6 @@ static void irdma_puda_dele_buf(struct irdma_sc_dev *dev,
static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp,
u32 *wqe_idx)
{
__le64 *wqe = NULL;
int ret_code = 0;
*wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
@ -199,11 +198,9 @@ static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp,
qp->swqe_polarity = !qp->swqe_polarity;
IRDMA_RING_MOVE_HEAD(qp->sq_ring, ret_code);
if (ret_code)
return wqe;
return NULL;
wqe = qp->sq_base[*wqe_idx].elem;
return wqe;
return qp->sq_base[*wqe_idx].elem;
}
/**

View File

@ -2495,3 +2495,150 @@ bool irdma_cq_empty(struct irdma_cq *iwcq)
return polarity != ukcq->polarity;
}
void irdma_remove_cmpls_list(struct irdma_cq *iwcq)
{
struct irdma_cmpl_gen *cmpl_node;
struct list_head *tmp_node, *list_node;
list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) {
cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list);
list_del(&cmpl_node->list);
kfree(cmpl_node);
}
}
int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info)
{
struct irdma_cmpl_gen *cmpl;
if (list_empty(&iwcq->cmpl_generated))
return -ENOENT;
cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list);
list_del(&cmpl->list);
memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info));
kfree(cmpl);
ibdev_dbg(iwcq->ibcq.device,
"VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n",
__func__, cq_poll_info->qp_id, cq_poll_info->op_type,
cq_poll_info->wr_id);
return 0;
}
/**
* irdma_set_cpi_common_values - fill in values for polling info struct
* @cpi: resulting structure of cq_poll_info type
* @qp: QPair
* @qp_num: id of the QP
*/
static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi,
struct irdma_qp_uk *qp, u32 qp_num)
{
cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
cpi->error = true;
cpi->major_err = IRDMA_FLUSH_MAJOR_ERR;
cpi->minor_err = FLUSH_GENERAL_ERR;
cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp;
cpi->qp_id = qp_num;
}
static inline void irdma_comp_handler(struct irdma_cq *cq)
{
if (!cq->ibcq.comp_handler)
return;
if (atomic_cmpxchg(&cq->armed, 1, 0))
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
void irdma_generate_flush_completions(struct irdma_qp *iwqp)
{
struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk;
struct irdma_ring *sq_ring = &qp->sq_ring;
struct irdma_ring *rq_ring = &qp->rq_ring;
struct irdma_cmpl_gen *cmpl;
__le64 *sw_wqe;
u64 wqe_qword;
u32 wqe_idx;
bool compl_generated = false;
unsigned long flags1;
spin_lock_irqsave(&iwqp->iwscq->lock, flags1);
if (irdma_cq_empty(iwqp->iwscq)) {
unsigned long flags2;
spin_lock_irqsave(&iwqp->lock, flags2);
while (IRDMA_RING_MORE_WORK(*sq_ring)) {
cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
if (!cmpl) {
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
return;
}
wqe_idx = sq_ring->tail;
irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
sw_wqe = qp->sq_base[wqe_idx].elem;
get_64bit_val(sw_wqe, 24, &wqe_qword);
cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
/* remove the SQ WR by moving SQ tail*/
IRDMA_RING_SET_TAIL(*sq_ring,
sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
ibdev_dbg(iwqp->iwscq->ibcq.device,
"DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n",
__func__, cmpl->cpi.wr_id, qp->qp_id);
list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated);
compl_generated = true;
}
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
if (compl_generated)
irdma_comp_handler(iwqp->iwrcq);
} else {
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
spin_lock_irqsave(&iwqp->iwrcq->lock, flags1);
if (irdma_cq_empty(iwqp->iwrcq)) {
unsigned long flags2;
spin_lock_irqsave(&iwqp->lock, flags2);
while (IRDMA_RING_MORE_WORK(*rq_ring)) {
cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
if (!cmpl) {
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
return;
}
wqe_idx = rq_ring->tail;
irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
/* remove the RQ WR by moving RQ tail */
IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
ibdev_dbg(iwqp->iwrcq->ibcq.device,
"DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n",
__func__, cmpl->cpi.wr_id, qp->qp_id,
wqe_idx);
list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated);
compl_generated = true;
}
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
if (compl_generated)
irdma_comp_handler(iwqp->iwrcq);
} else {
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
}

View File

@ -25,7 +25,9 @@ static int irdma_query_device(struct ib_device *ibdev,
iwdev->netdev->dev_addr);
props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 |
irdma_fw_minor_ver(&rf->sc_dev);
props->device_cap_flags = iwdev->device_cap_flags;
props->device_cap_flags = IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_MGT_EXTENSIONS;
props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
props->vendor_id = pcidev->vendor;
props->vendor_part_id = pcidev->device;
@ -533,6 +535,9 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS)
irdma_modify_qp_to_err(&iwqp->sc_qp);
if (!iwqp->user_mode)
cancel_delayed_work_sync(&iwqp->dwork_flush);
irdma_qp_rem_ref(&iwqp->ibqp);
wait_for_completion(&iwqp->free_qp);
irdma_free_lsmm_rsrc(iwqp);
@ -788,6 +793,14 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
return 0;
}
static void irdma_flush_worker(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush);
irdma_generate_flush_completions(iwqp);
}
/**
* irdma_create_qp - create qp
* @ibqp: ptr of qp
@ -907,6 +920,7 @@ static int irdma_create_qp(struct ib_qp *ibqp,
init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
irdma_setup_virt_qp(iwdev, iwqp, &init_info);
} else {
INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker);
init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
}
@ -1398,11 +1412,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
if (iwqp->ibqp_state > IB_QPS_RTS &&
!iwqp->flush_issued) {
iwqp->flush_issued = 1;
spin_unlock_irqrestore(&iwqp->lock, flags);
irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ |
IRDMA_FLUSH_RQ |
IRDMA_FLUSH_WAIT);
iwqp->flush_issued = 1;
} else {
spin_unlock_irqrestore(&iwqp->lock, flags);
}
@ -1755,6 +1769,8 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
unsigned long flags;
spin_lock_irqsave(&iwcq->lock, flags);
if (!list_empty(&iwcq->cmpl_generated))
irdma_remove_cmpls_list(iwcq);
if (!list_empty(&iwcq->resize_list))
irdma_process_resize_list(iwcq, iwdev, NULL);
spin_unlock_irqrestore(&iwcq->lock, flags);
@ -1959,6 +1975,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
cq->back_cq = iwcq;
spin_lock_init(&iwcq->lock);
INIT_LIST_HEAD(&iwcq->resize_list);
INIT_LIST_HEAD(&iwcq->cmpl_generated);
info.dev = dev;
ukinfo->cq_size = max(entries, 4);
ukinfo->cq_id = cq_num;
@ -3044,15 +3061,12 @@ static int irdma_post_send(struct ib_qp *ibqp,
unsigned long flags;
bool inv_stag;
struct irdma_ah *ah;
bool reflush = false;
iwqp = to_iwqp(ibqp);
ukqp = &iwqp->sc_qp.qp_uk;
dev = &iwqp->iwdev->rf->sc_dev;
spin_lock_irqsave(&iwqp->lock, flags);
if (iwqp->flush_issued && ukqp->sq_flush_complete)
reflush = true;
while (ib_wr) {
memset(&info, 0, sizeof(info));
inv_stag = false;
@ -3202,15 +3216,14 @@ static int irdma_post_send(struct ib_qp *ibqp,
ib_wr = ib_wr->next;
}
if (!iwqp->flush_issued && iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) {
irdma_uk_qp_post_wr(ukqp);
if (!iwqp->flush_issued) {
if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS)
irdma_uk_qp_post_wr(ukqp);
spin_unlock_irqrestore(&iwqp->lock, flags);
} else if (reflush) {
ukqp->sq_flush_complete = false;
spin_unlock_irqrestore(&iwqp->lock, flags);
irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_REFLUSH);
} else {
spin_unlock_irqrestore(&iwqp->lock, flags);
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
if (err)
*bad_wr = ib_wr;
@ -3233,14 +3246,11 @@ static int irdma_post_recv(struct ib_qp *ibqp,
struct irdma_post_rq_info post_recv = {};
unsigned long flags;
int err = 0;
bool reflush = false;
iwqp = to_iwqp(ibqp);
ukqp = &iwqp->sc_qp.qp_uk;
spin_lock_irqsave(&iwqp->lock, flags);
if (iwqp->flush_issued && ukqp->rq_flush_complete)
reflush = true;
while (ib_wr) {
post_recv.num_sges = ib_wr->num_sge;
post_recv.wr_id = ib_wr->wr_id;
@ -3256,13 +3266,10 @@ static int irdma_post_recv(struct ib_qp *ibqp,
}
out:
if (reflush) {
ukqp->rq_flush_complete = false;
spin_unlock_irqrestore(&iwqp->lock, flags);
irdma_flush_wqes(iwqp, IRDMA_FLUSH_RQ | IRDMA_REFLUSH);
} else {
spin_unlock_irqrestore(&iwqp->lock, flags);
}
spin_unlock_irqrestore(&iwqp->lock, flags);
if (iwqp->flush_issued)
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
if (err)
*bad_wr = ib_wr;
@ -3474,6 +3481,11 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc
/* check the current CQ for new cqes */
while (npolled < num_entries) {
ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled);
if (ret == -ENOENT) {
ret = irdma_generated_cmpls(iwcq, cur_cqe);
if (!ret)
irdma_process_cqe(entry + npolled, cur_cqe);
}
if (!ret) {
++npolled;
cq_new_cqe = true;
@ -3555,13 +3567,13 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq,
if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED)
promo_event = true;
if (!iwcq->armed || promo_event) {
iwcq->armed = true;
if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) {
iwcq->last_notify = cq_notify;
irdma_uk_cq_request_notification(ukcq, cq_notify);
}
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq))
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
(!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated)))
ret = 1;
spin_unlock_irqrestore(&iwcq->lock, flags);

View File

@ -4,6 +4,7 @@
#define IRDMA_VERBS_H
#define IRDMA_MAX_SAVED_PHY_PGADDR 4
#define IRDMA_FLUSH_DELAY_MS 20
#define IRDMA_PKEY_TBL_SZ 1
#define IRDMA_DEFAULT_PKEY 0xFFFF
@ -115,7 +116,7 @@ struct irdma_cq {
u16 cq_size;
u16 cq_num;
bool user_mode;
bool armed;
atomic_t armed;
enum irdma_cmpl_notify last_notify;
u32 polled_cmpls;
u32 cq_mem_size;
@ -126,6 +127,12 @@ struct irdma_cq {
struct irdma_pbl *iwpbl_shadow;
struct list_head resize_list;
struct irdma_cq_poll_info cur_cqe;
struct list_head cmpl_generated;
};
struct irdma_cmpl_gen {
struct list_head list;
struct irdma_cq_poll_info cpi;
};
struct disconn_work {
@ -166,6 +173,7 @@ struct irdma_qp {
refcount_t refcnt;
struct iw_cm_id *cm_id;
struct irdma_cm_node *cm_node;
struct delayed_work dwork_flush;
struct ib_mr *lsmm_mr;
atomic_t hw_mod_qp_pend;
enum ib_qp_state ibqp_state;
@ -229,4 +237,7 @@ int irdma_ib_register_device(struct irdma_device *iwdev);
void irdma_ib_unregister_device(struct irdma_device *iwdev);
void irdma_ib_dealloc_device(struct ib_device *ibdev);
void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event);
void irdma_generate_flush_completions(struct irdma_qp *iwqp);
void irdma_remove_cmpls_list(struct irdma_cq *iwcq);
int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info);
#endif /* IRDMA_VERBS_H */

View File

@ -80,6 +80,7 @@ struct cm_req_msg {
union ib_gid primary_path_sgid;
};
static struct workqueue_struct *cm_wq;
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
@ -288,10 +289,10 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
/*make sure that there is no schedule inside the scheduled work.*/
if (!sriov->is_going_down && !id->scheduled_delete) {
id->scheduled_delete = 1;
schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
queue_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
} else if (id->scheduled_delete) {
/* Adjust timeout if already scheduled */
mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
mod_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
spin_unlock_irqrestore(&sriov->going_down_lock, flags);
spin_unlock(&sriov->id_map_lock);
@ -370,7 +371,7 @@ static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl
ret = xa_err(item);
else
/* If a retry, adjust delayed work */
mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
mod_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
goto err_or_exists;
}
xa_unlock(&sriov->xa_rej_tmout);
@ -393,7 +394,7 @@ static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl
return xa_err(old);
}
schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
queue_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
return 0;
@ -500,7 +501,7 @@ static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
xa_lock(&sriov->xa_rej_tmout);
xa_for_each(&sriov->xa_rej_tmout, id, item) {
if (slave < 0 || slave == item->slave) {
mod_delayed_work(system_wq, &item->timeout, 0);
mod_delayed_work(cm_wq, &item->timeout, 0);
flush_needed = true;
++cnt;
}
@ -508,7 +509,7 @@ static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
xa_unlock(&sriov->xa_rej_tmout);
if (flush_needed) {
flush_scheduled_work();
flush_workqueue(cm_wq);
pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n",
cnt, slave);
}
@ -540,7 +541,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
spin_unlock(&sriov->id_map_lock);
if (need_flush)
flush_scheduled_work(); /* make sure all timers were flushed */
flush_workqueue(cm_wq); /* make sure all timers were flushed */
/* now, remove all leftover entries from databases*/
spin_lock(&sriov->id_map_lock);
@ -587,3 +588,17 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
rej_tmout_xa_cleanup(sriov, slave);
}
int mlx4_ib_cm_init(void)
{
cm_wq = alloc_workqueue("mlx4_ib_cm", 0, 0);
if (!cm_wq)
return -ENOMEM;
return 0;
}
void mlx4_ib_cm_destroy(void)
{
destroy_workqueue(cm_wq);
}

View File

@ -479,8 +479,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
IB_DEVICE_RC_RNR_NAK_GEN;
props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@ -494,9 +494,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
if (dev->dev->caps.max_gso_sz &&
(dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
props->device_cap_flags |= IB_DEVICE_UD_TSO;
props->kernel_cap_flags |= IBK_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY;
if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
@ -3307,10 +3307,14 @@ static int __init mlx4_ib_init(void)
if (!wq)
return -ENOMEM;
err = mlx4_ib_mcg_init();
err = mlx4_ib_cm_init();
if (err)
goto clean_wq;
err = mlx4_ib_mcg_init();
if (err)
goto clean_cm;
err = mlx4_register_interface(&mlx4_ib_interface);
if (err)
goto clean_mcg;
@ -3320,6 +3324,9 @@ static int __init mlx4_ib_init(void)
clean_mcg:
mlx4_ib_mcg_destroy();
clean_cm:
mlx4_ib_cm_destroy();
clean_wq:
destroy_workqueue(wq);
return err;
@ -3329,6 +3336,7 @@ static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
mlx4_ib_mcg_destroy();
mlx4_ib_cm_destroy();
destroy_workqueue(wq);
}

View File

@ -937,4 +937,7 @@ mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
int *num_of_mtts);
int mlx4_ib_cm_init(void);
void mlx4_ib_cm_destroy(void);
#endif /* MLX4_IB_H */

View File

@ -19,6 +19,7 @@ mlx5_ib-y := ah.o \
restrack.o \
srq.o \
srq_cmd.o \
umr.o \
wr.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o

View File

@ -1095,11 +1095,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
if (is_egress) {
err = -EINVAL;
goto free;
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
struct mlx5_ib_mcounters *mcounters;

View File

@ -41,6 +41,7 @@
#include "wr.h"
#include "restrack.h"
#include "counters.h"
#include "umr.h"
#include <rdma/uverbs_std_types.h>
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
@ -854,13 +855,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_DEVICE_MEM_WINDOW_TYPE_2B;
props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
/* We support 'Gappy' memory registration too */
props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
props->kernel_cap_flags |= IBK_SG_GAPS_REG;
}
/* IB_WR_REG_MR always requires changing the entity size with UMR */
if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
props->kernel_cap_flags |= IBK_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
@ -869,7 +870,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_GUARD_T10DIF_CSUM;
}
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
props->kernel_cap_flags |= IBK_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) {
if (MLX5_CAP_ETH(mdev, csum_cap)) {
@ -916,7 +917,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
props->device_cap_flags |= IB_DEVICE_UD_TSO;
props->kernel_cap_flags |= IBK_UD_TSO;
}
if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
@ -992,7 +993,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
props->kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
props->odp_caps = dev->odp_caps;
if (!uhw) {
/* ODP for kernel QPs is not implemented for receive
@ -1013,11 +1014,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
if (MLX5_CAP_GEN(mdev, cd))
props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
if (mlx5_core_is_vf(mdev))
props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
props->kernel_cap_flags |= IBK_VIRTUAL_FUNCTION;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET && raw_support) {
@ -4008,12 +4006,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
if (err)
mlx5_ib_warn(dev, "mr cache cleanup failed\n");
if (dev->umrc.qp)
ib_destroy_qp(dev->umrc.qp);
if (dev->umrc.cq)
ib_free_cq(dev->umrc.cq);
if (dev->umrc.pd)
ib_dealloc_pd(dev->umrc.pd);
mlx5r_umr_resource_cleanup(dev);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@ -4021,112 +4014,19 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
ib_unregister_device(&dev->ib_dev);
}
enum {
MAX_UMR_WR = 128,
};
static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
struct ib_qp_init_attr *init_attr = NULL;
struct ib_qp_attr *attr = NULL;
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
int ret;
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
if (!attr || !init_attr) {
ret = -ENOMEM;
goto error_0;
}
ret = mlx5r_umr_resource_init(dev);
if (ret)
return ret;
pd = ib_alloc_pd(&dev->ib_dev, 0);
if (IS_ERR(pd)) {
mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
ret = PTR_ERR(pd);
goto error_0;
}
cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(cq)) {
mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
goto error_2;
}
init_attr->send_cq = cq;
init_attr->recv_cq = cq;
init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
init_attr->cap.max_send_wr = MAX_UMR_WR;
init_attr->cap.max_send_sge = 1;
init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
init_attr->port_num = 1;
qp = ib_create_qp(pd, init_attr);
if (IS_ERR(qp)) {
mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
ret = PTR_ERR(qp);
goto error_3;
}
attr->qp_state = IB_QPS_INIT;
attr->port_num = 1;
ret = ib_modify_qp(qp, attr,
IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
goto error_4;
}
memset(attr, 0, sizeof(*attr));
attr->qp_state = IB_QPS_RTR;
attr->path_mtu = IB_MTU_256;
ret = ib_modify_qp(qp, attr, IB_QP_STATE);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
goto error_4;
}
memset(attr, 0, sizeof(*attr));
attr->qp_state = IB_QPS_RTS;
ret = ib_modify_qp(qp, attr, IB_QP_STATE);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
goto error_4;
}
dev->umrc.qp = qp;
dev->umrc.cq = cq;
dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
ret = mlx5_mr_cache_init(dev);
if (ret) {
mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
goto error_4;
mlx5r_umr_resource_cleanup(dev);
}
kfree(attr);
kfree(init_attr);
return 0;
error_4:
ib_destroy_qp(qp);
dev->umrc.qp = NULL;
error_3:
ib_free_cq(cq);
dev->umrc.cq = NULL;
error_2:
ib_dealloc_pd(pd);
dev->umrc.pd = NULL;
error_0:
kfree(attr);
kfree(init_attr);
return ret;
}

View File

@ -291,16 +291,9 @@ struct mlx5_ib_flow_db {
};
/* Use macros here so that don't have to duplicate
* enum ib_send_flags and enum ib_qp_type for low-level driver
* enum ib_qp_type for low-level driver
*/
#define MLX5_IB_SEND_UMR_ENABLE_MR (IB_SEND_RESERVED_START << 0)
#define MLX5_IB_SEND_UMR_DISABLE_MR (IB_SEND_RESERVED_START << 1)
#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 2)
#define MLX5_IB_SEND_UMR_UPDATE_XLT (IB_SEND_RESERVED_START << 3)
#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 4)
#define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS IB_SEND_RESERVED_END
#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
/*
* IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI
@ -311,9 +304,6 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_QPT_DCT IB_QPT_RESERVED4
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
#define MLX5_IB_UMR_OCTOWORD 16
#define MLX5_IB_UMR_XLT_ALIGNMENT 64
#define MLX5_IB_UPD_XLT_ZAP BIT(0)
#define MLX5_IB_UPD_XLT_ENABLE BIT(1)
#define MLX5_IB_UPD_XLT_ATOMIC BIT(2)
@ -539,24 +529,6 @@ struct mlx5_ib_cq_buf {
int nent;
};
struct mlx5_umr_wr {
struct ib_send_wr wr;
u64 virt_addr;
u64 offset;
struct ib_pd *pd;
unsigned int page_shift;
unsigned int xlt_size;
u64 length;
int access_flags;
u32 mkey;
u8 ignore_free_state:1;
};
static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr)
{
return container_of(wr, struct mlx5_umr_wr, wr);
}
enum mlx5_ib_cq_pr_flags {
MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0,
MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS = 1 << 1,
@ -1291,9 +1263,6 @@ int mlx5_ib_advise_mr(struct ib_pd *pd,
struct uverbs_attr_bundle *attrs);
int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
@ -1472,9 +1441,6 @@ static inline int is_qp1(enum ib_qp_type qp_type)
return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI;
}
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
static inline u32 check_cq_create_flags(u32 flags)
{
/*
@ -1546,59 +1512,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev,
size_t length)
{
/*
* umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
* always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
* MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
* can never be enabled without this capability. Simplify this weird
* quirky hardware by just saying it can't use PAS lists with UMR at
* all.
*/
if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return false;
/*
* length is the size of the MR in bytes when mlx5_ib_update_xlt() is
* used.
*/
if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
return false;
return true;
}
/*
* true if an existing MR can be reconfigured to new access_flags using UMR.
* Older HW cannot use UMR to update certain elements of the MKC. See
* umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask()
*/
static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
unsigned int current_access_flags,
unsigned int target_access_flags)
{
unsigned int diffs = current_access_flags ^ target_access_flags;
if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
MLX5_CAP_GEN(dev->mdev, atomic) &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return false;
if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return false;
if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return false;
return true;
}
static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
struct mlx5_ib_mkey *mmkey)
{

View File

@ -44,13 +44,7 @@
#include <rdma/ib_verbs.h>
#include "dm.h"
#include "mlx5_ib.h"
/*
* We can't use an array for xlt_emergency_page because dma_map_single doesn't
* work on kernel modules memory
*/
void *xlt_emergency_page;
static DEFINE_MUTEX(xlt_emergency_page_mutex);
#include "umr.h"
enum {
MAX_PENDING_REG_MR = 8,
@ -128,11 +122,6 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
}
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
@ -600,7 +589,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
struct mlx5_ib_mr *mr;
/* Matches access in alloc_cache_mr() */
if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
return ERR_PTR(-EOPNOTSUPP);
spin_lock_irq(&ent->lock);
@ -741,7 +730,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
!dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
mlx5_ib_can_load_pas_with_umr(dev, 0))
mlx5r_umr_can_load_pas(dev, 0))
ent->limit = dev->mdev->profile.mr_cache[i].limit;
else
ent->limit = 0;
@ -848,49 +837,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
return MLX5_MAX_UMR_SHIFT;
}
static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
context->status = wc->status;
complete(&context->done);
}
static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
{
context->cqe.done = mlx5_ib_umr_done;
context->status = -1;
init_completion(&context->done);
}
static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
struct mlx5_umr_wr *umrwr)
{
struct umr_common *umrc = &dev->umrc;
const struct ib_send_wr *bad;
int err;
struct mlx5_ib_umr_context umr_context;
mlx5_ib_init_umr_context(&umr_context);
umrwr->wr.wr_cqe = &umr_context.cqe;
down(&umrc->sem);
err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
if (err) {
mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
} else {
wait_for_completion(&umr_context.done);
if (umr_context.status != IB_WC_SUCCESS) {
mlx5_ib_warn(dev, "reg umr failed (%u)\n",
umr_context.status);
err = -EFAULT;
}
}
up(&umrc->sem);
return err;
}
static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
unsigned int order)
{
@ -949,7 +895,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
* cache then synchronously create an uncached one.
*/
if (!ent || ent->limit == 0 ||
!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
!mlx5r_umr_can_reconfig(dev, 0, access_flags)) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(pd, umem, iova, access_flags, page_size, false);
mutex_unlock(&dev->slow_path_mutex);
@ -968,289 +914,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
return mr;
}
#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
MLX5_UMR_MTT_ALIGNMENT)
#define MLX5_SPARE_UMR_CHUNK 0x10000
/*
* Allocate a temporary buffer to hold the per-page information to transfer to
* HW. For efficiency this should be as large as it can be, but buffer
* allocation failure is not allowed, so try smaller sizes.
*/
static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
{
const size_t xlt_chunk_align =
MLX5_UMR_MTT_ALIGNMENT / ent_size;
size_t size;
void *res = NULL;
static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
/*
* MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
* allocation can't trigger any kind of reclaim.
*/
might_sleep();
gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
/*
* If the system already has a suitable high order page then just use
* that, but don't try hard to create one. This max is about 1M, so a
* free x86 huge page will satisfy it.
*/
size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
MLX5_MAX_UMR_CHUNK);
*nents = size / ent_size;
res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
get_order(size));
if (res)
return res;
if (size > MLX5_SPARE_UMR_CHUNK) {
size = MLX5_SPARE_UMR_CHUNK;
*nents = size / ent_size;
res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
get_order(size));
if (res)
return res;
}
*nents = PAGE_SIZE / ent_size;
res = (void *)__get_free_page(gfp_mask);
if (res)
return res;
mutex_lock(&xlt_emergency_page_mutex);
memset(xlt_emergency_page, 0, PAGE_SIZE);
return xlt_emergency_page;
}
static void mlx5_ib_free_xlt(void *xlt, size_t length)
{
if (xlt == xlt_emergency_page) {
mutex_unlock(&xlt_emergency_page_mutex);
return;
}
free_pages((unsigned long)xlt, get_order(length));
}
/*
* Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
* submission.
*/
static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
struct mlx5_umr_wr *wr, struct ib_sge *sg,
size_t nents, size_t ent_size,
unsigned int flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
dma_addr_t dma;
void *xlt;
xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
GFP_KERNEL);
sg->length = nents * ent_size;
dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma)) {
mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
mlx5_ib_free_xlt(xlt, sg->length);
return NULL;
}
sg->addr = dma;
sg->lkey = dev->umrc.pd->local_dma_lkey;
memset(wr, 0, sizeof(*wr));
wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
wr->wr.sg_list = sg;
wr->wr.num_sge = 1;
wr->wr.opcode = MLX5_IB_WR_UMR;
wr->pd = mr->ibmr.pd;
wr->mkey = mr->mmkey.key;
wr->length = mr->ibmr.length;
wr->virt_addr = mr->ibmr.iova;
wr->access_flags = mr->access_flags;
wr->page_shift = mr->page_shift;
wr->xlt_size = sg->length;
return xlt;
}
static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
struct ib_sge *sg)
{
struct device *ddev = &dev->mdev->pdev->dev;
dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
mlx5_ib_free_xlt(xlt, sg->length);
}
static unsigned int xlt_wr_final_send_flags(unsigned int flags)
{
unsigned int res = 0;
if (flags & MLX5_IB_UPD_XLT_ENABLE)
res |= MLX5_IB_SEND_UMR_ENABLE_MR |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
if (flags & MLX5_IB_UPD_XLT_ADDR)
res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
return res;
}
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
void *xlt;
struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
? sizeof(struct mlx5_klm)
: sizeof(struct mlx5_mtt);
const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
const int page_mask = page_align - 1;
size_t pages_mapped = 0;
size_t pages_to_map = 0;
size_t pages_iter;
size_t size_to_map = 0;
size_t orig_sg_length;
if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
!umr_can_use_indirect_mkey(dev))
return -EPERM;
if (WARN_ON(!mr->umem->is_odp))
return -EINVAL;
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
* so we need to align the offset and length accordingly
*/
if (idx & page_mask) {
npages += idx & page_mask;
idx &= ~page_mask;
}
pages_to_map = ALIGN(npages, page_align);
xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
if (!xlt)
return -ENOMEM;
pages_iter = sg.length / desc_size;
orig_sg_length = sg.length;
if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
pages_to_map = min_t(size_t, pages_to_map, max_pages);
}
wr.page_shift = page_shift;
for (pages_mapped = 0;
pages_mapped < pages_to_map && !err;
pages_mapped += pages_iter, idx += pages_iter) {
npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
size_to_map = npages * desc_size;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
if (pages_mapped + pages_iter >= pages_to_map)
wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
wr.offset = idx * desc_size;
wr.xlt_size = sg.length;
err = mlx5_ib_post_send_wait(dev, &wr);
}
sg.length = orig_sg_length;
mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
return err;
}
/*
* Send the DMA list to the HW for a normal MR using UMR.
* Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
* flag may be used.
*/
int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
struct ib_block_iter biter;
struct mlx5_mtt *cur_mtt;
struct mlx5_umr_wr wr;
size_t orig_sg_length;
struct mlx5_mtt *mtt;
size_t final_size;
struct ib_sge sg;
int err = 0;
if (WARN_ON(mr->umem->is_odp))
return -EINVAL;
mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
ib_umem_num_dma_blocks(mr->umem,
1 << mr->page_shift),
sizeof(*mtt), flags);
if (!mtt)
return -ENOMEM;
orig_sg_length = sg.length;
cur_mtt = mtt;
rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter,
mr->umem->sgt_append.sgt.nents,
BIT(mr->page_shift)) {
if (cur_mtt == (void *)mtt + sg.length) {
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
err = mlx5_ib_post_send_wait(dev, &wr);
if (err)
goto err;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
wr.offset += sg.length;
cur_mtt = mtt;
}
cur_mtt->ptag =
cpu_to_be64(rdma_block_iter_dma_address(&biter) |
MLX5_IB_MTT_PRESENT);
if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
cur_mtt->ptag = 0;
cur_mtt++;
}
final_size = (void *)cur_mtt - (void *)mtt;
sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
memset(cur_mtt, 0, sg.length - final_size);
wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
wr.xlt_size = sg.length;
dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
err = mlx5_ib_post_send_wait(dev, &wr);
err:
sg.length = orig_sg_length;
mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
return err;
}
/*
* If ibmr is NULL it will be allocated by reg_create.
* Else, the given ibmr will be used.
@ -1441,7 +1104,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
bool xlt_with_umr;
int err;
xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
if (xlt_with_umr) {
mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
} else {
@ -1467,7 +1130,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
* configured properly but left disabled. It is safe to go ahead
* and configure it again via UMR while enabling it.
*/
err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
if (err) {
mlx5_ib_dereg_mr(&mr->ibmr, NULL);
return ERR_PTR(err);
@ -1504,7 +1167,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
}
/* ODP requires xlt update via umr to work. */
if (!mlx5_ib_can_load_pas_with_umr(dev, length))
if (!mlx5r_umr_can_load_pas(dev, length))
return ERR_PTR(-EINVAL);
odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
@ -1566,7 +1229,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
if (!umem_dmabuf->sgt)
return;
mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
}
@ -1594,7 +1257,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
offset, virt_addr, length, fd, access_flags);
/* dmabuf requires xlt update via umr to work. */
if (!mlx5_ib_can_load_pas_with_umr(dev, length))
if (!mlx5r_umr_can_load_pas(dev, length))
return ERR_PTR(-EINVAL);
umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
@ -1631,31 +1294,6 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
return ERR_PTR(err);
}
/**
* revoke_mr - Fence all DMA on the MR
* @mr: The MR to fence
*
* Upon return the NIC will not be doing any DMA to the pages under the MR,
* and any DMA in progress will be completed. Failure of this function
* indicates the HW has failed catastrophically.
*/
static int revoke_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_umr_wr umrwr = {};
if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
umrwr.wr.opcode = MLX5_IB_WR_UMR;
umrwr.pd = mr_to_mdev(mr)->umrc.pd;
umrwr.mkey = mr->mmkey.key;
umrwr.ignore_free_state = 1;
return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr);
}
/*
* True if the change in access flags can be done via UMR, only some access
* flags can be updated.
@ -1669,32 +1307,8 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
return false;
return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags,
target_access_flags);
}
static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
struct mlx5_umr_wr umrwr = {
.wr = {
.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS,
.opcode = MLX5_IB_WR_UMR,
},
.mkey = mr->mmkey.key,
.pd = pd,
.access_flags = access_flags,
};
int err;
err = mlx5_ib_post_send_wait(dev, &umrwr);
if (err)
return err;
mr->access_flags = access_flags;
return 0;
return mlx5r_umr_can_reconfig(dev, current_access_flags,
target_access_flags);
}
static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
@ -1707,7 +1321,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
/* We only track the allocated sizes of MRs from the cache */
if (!mr->cache_ent)
return false;
if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length))
if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
return false;
*page_size =
@ -1732,7 +1346,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
* with it. This ensure the change is atomic relative to any use of the
* MR.
*/
err = revoke_mr(mr);
err = mlx5r_umr_revoke_mr(mr);
if (err)
return err;
@ -1750,7 +1364,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
mr->ibmr.length = new_umem->length;
mr->page_shift = order_base_2(page_size);
mr->umem = new_umem;
err = mlx5_ib_update_mr_pas(mr, upd_flags);
err = mlx5r_umr_update_mr_pas(mr, upd_flags);
if (err) {
/*
* The MR is revoked at this point so there is no issue to free
@ -1797,7 +1411,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
/* Fast path for PD/access change */
if (can_use_umr_rereg_access(dev, mr->access_flags,
new_access_flags)) {
err = umr_rereg_pd_access(mr, new_pd, new_access_flags);
err = mlx5r_umr_rereg_pd_access(mr, new_pd,
new_access_flags);
if (err)
return ERR_PTR(err);
return NULL;
@ -1810,7 +1425,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
* Only one active MR can refer to a umem at one time, revoke
* the old MR before assigning the umem to the new one.
*/
err = revoke_mr(mr);
err = mlx5r_umr_revoke_mr(mr);
if (err)
return ERR_PTR(err);
umem = mr->umem;
@ -1955,7 +1570,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
/* Stop DMA */
if (mr->cache_ent) {
if (revoke_mr(mr)) {
if (mlx5r_umr_revoke_mr(mr)) {
spin_lock_irq(&mr->cache_ent->lock);
mr->cache_ent->total_mrs--;
spin_unlock_irq(&mr->cache_ent->lock);

View File

@ -38,6 +38,7 @@
#include "mlx5_ib.h"
#include "cmd.h"
#include "umr.h"
#include "qp.h"
#include <linux/mlx5/eq.h>
@ -117,7 +118,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
*
* xa_store()
* mutex_lock(umem_mutex)
* mlx5_ib_update_xlt()
* mlx5r_umr_update_xlt()
* mutex_unlock(umem_mutex)
* destroy lkey
*
@ -198,9 +199,9 @@ static void free_implicit_child_mr_work(struct work_struct *work)
mlx5r_deref_wait_odp_mkey(&mr->mmkey);
mutex_lock(&odp_imr->umem_mutex);
mlx5_ib_update_xlt(mr->parent, ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT,
1, 0,
MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
mlx5r_umr_update_xlt(mr->parent,
ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
mlx5_ib_dereg_mr(&mr->ibmr, NULL);
@ -282,19 +283,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
u64 umr_offset = idx & umr_block_mask;
if (in_block && umr_offset == 0) {
mlx5_ib_update_xlt(mr, blk_start_idx,
idx - blk_start_idx, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
mlx5r_umr_update_xlt(mr, blk_start_idx,
idx - blk_start_idx, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
}
}
}
if (in_block)
mlx5_ib_update_xlt(mr, blk_start_idx,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
mlx5r_umr_update_xlt(mr, blk_start_idx,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
mlx5_update_odp_stats(mr, invalidations, invalidations);
@ -323,8 +324,7 @@ static void internal_fill_odp_caps(struct mlx5_ib_dev *dev)
memset(caps, 0, sizeof(*caps));
if (!MLX5_CAP_GEN(dev->mdev, pg) ||
!mlx5_ib_can_load_pas_with_umr(dev, 0))
if (!MLX5_CAP_GEN(dev->mdev, pg) || !mlx5r_umr_can_load_pas(dev, 0))
return;
caps->general_caps = IB_ODP_SUPPORT;
@ -442,11 +442,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
*/
refcount_set(&mr->mmkey.usecount, 2);
err = mlx5_ib_update_xlt(mr, 0,
MLX5_IMR_MTT_ENTRIES,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
err = mlx5r_umr_update_xlt(mr, 0,
MLX5_IMR_MTT_ENTRIES,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
if (err) {
ret = ERR_PTR(err);
goto out_mr;
@ -487,8 +487,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct mlx5_ib_mr *imr;
int err;
if (!mlx5_ib_can_load_pas_with_umr(dev,
MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
if (!mlx5r_umr_can_load_pas(dev, MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
return ERR_PTR(-EOPNOTSUPP);
umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
@ -510,16 +509,15 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr->ibmr.lkey = imr->mmkey.key;
imr->ibmr.rkey = imr->mmkey.key;
imr->ibmr.device = &dev->ib_dev;
imr->umem = &umem_odp->umem;
imr->is_odp_implicit = true;
xa_init(&imr->implicit_children);
err = mlx5_ib_update_xlt(imr, 0,
mlx5_imr_ksm_entries,
MLX5_KSM_PAGE_SHIFT,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
err = mlx5r_umr_update_xlt(imr, 0,
mlx5_imr_ksm_entries,
MLX5_KSM_PAGE_SHIFT,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
if (err)
goto out_mr;
@ -582,7 +580,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
* No need to check whether the MTTs really belong to this MR, since
* ib_umem_odp_map_dma_and_lock already checks this.
*/
ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
mutex_unlock(&odp->umem_mutex);
if (ret < 0) {
@ -680,9 +678,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
* next pagefault handler will see the new information.
*/
mutex_lock(&odp_imr->umem_mutex);
err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
err = mlx5r_umr_update_xlt(imr, upd_start_idx, upd_len, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
if (err) {
mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n");
@ -716,7 +714,7 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
err = -EINVAL;
} else {
err = mlx5_ib_update_mr_pas(mr, xlt_flags);
err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
}
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);

View File

@ -40,6 +40,7 @@
#include "ib_rep.h"
#include "counters.h"
#include "cmd.h"
#include "umr.h"
#include "qp.h"
#include "wr.h"

View File

@ -0,0 +1,700 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
#include "umr.h"
#include "wr.h"
/*
* We can't use an array for xlt_emergency_page because dma_map_single doesn't
* work on kernel modules memory
*/
void *xlt_emergency_page;
static DEFINE_MUTEX(xlt_emergency_page_mutex);
static __be64 get_umr_enable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_disable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_update_translation_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR;
return cpu_to_be64(result);
}
static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
{
u64 result;
result = MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW;
if (MLX5_CAP_GEN(dev->mdev, atomic))
result |= MLX5_MKEY_MASK_A;
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
return cpu_to_be64(result);
}
static __be64 get_umr_update_pd_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_PD;
return cpu_to_be64(result);
}
static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
{
if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return -EPERM;
if (mask & MLX5_MKEY_MASK_A &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return -EPERM;
if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return -EPERM;
if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return -EPERM;
return 0;
}
enum {
MAX_UMR_WR = 128,
};
static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
{
struct ib_qp_attr attr = {};
int ret;
attr.qp_state = IB_QPS_INIT;
attr.port_num = 1;
ret = ib_modify_qp(qp, &attr,
IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
return ret;
}
memset(&attr, 0, sizeof(attr));
attr.qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
return ret;
}
memset(&attr, 0, sizeof(attr));
attr.qp_state = IB_QPS_RTS;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
return ret;
}
return 0;
}
int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
{
struct ib_qp_init_attr init_attr = {};
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
int ret;
pd = ib_alloc_pd(&dev->ib_dev, 0);
if (IS_ERR(pd)) {
mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
return PTR_ERR(pd);
}
cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(cq)) {
mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
goto destroy_pd;
}
init_attr.send_cq = cq;
init_attr.recv_cq = cq;
init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
init_attr.cap.max_send_wr = MAX_UMR_WR;
init_attr.cap.max_send_sge = 1;
init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
init_attr.port_num = 1;
qp = ib_create_qp(pd, &init_attr);
if (IS_ERR(qp)) {
mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
ret = PTR_ERR(qp);
goto destroy_cq;
}
ret = mlx5r_umr_qp_rst2rts(dev, qp);
if (ret)
goto destroy_qp;
dev->umrc.qp = qp;
dev->umrc.cq = cq;
dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
return 0;
destroy_qp:
ib_destroy_qp(qp);
destroy_cq:
ib_free_cq(cq);
destroy_pd:
ib_dealloc_pd(pd);
return ret;
}
void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
{
ib_destroy_qp(dev->umrc.qp);
ib_free_cq(dev->umrc.cq);
ib_dealloc_pd(dev->umrc.pd);
}
static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
struct mlx5r_umr_wqe *wqe, bool with_data)
{
unsigned int wqe_size =
with_data ? sizeof(struct mlx5r_umr_wqe) :
sizeof(struct mlx5r_umr_wqe) -
sizeof(struct mlx5_wqe_data_seg);
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_ctrl_seg *ctrl;
union {
struct ib_cqe *ib_cqe;
u64 wr_id;
} id;
void *cur_edge, *seg;
unsigned long flags;
unsigned int idx;
int size, err;
if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
return -EIO;
spin_lock_irqsave(&qp->sq.lock, flags);
err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
cpu_to_be32(mkey), false, false);
if (WARN_ON(err))
goto out;
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
id.ib_cqe = cqe;
mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR);
mlx5r_ring_db(qp, 1, ctrl);
out:
spin_unlock_irqrestore(&qp->sq.lock, flags);
return err;
}
static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
context->status = wc->status;
complete(&context->done);
}
static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
{
context->cqe.done = mlx5r_umr_done;
init_completion(&context->done);
}
static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
struct mlx5r_umr_wqe *wqe, bool with_data)
{
struct umr_common *umrc = &dev->umrc;
struct mlx5r_umr_context umr_context;
int err;
err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
if (WARN_ON(err))
return err;
mlx5r_umr_init_context(&umr_context);
down(&umrc->sem);
err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
with_data);
if (err)
mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
else {
wait_for_completion(&umr_context.done);
if (umr_context.status != IB_WC_SUCCESS) {
mlx5_ib_warn(dev, "reg umr failed (%u)\n",
umr_context.status);
err = -EFAULT;
}
}
up(&umrc->sem);
return err;
}
/**
* mlx5r_umr_revoke_mr - Fence all DMA on the MR
* @mr: The MR to fence
*
* Upon return the NIC will not be doing any DMA to the pages under the MR,
* and any DMA in progress will be completed. Failure of this function
* indicates the HW has failed catastrophically.
*/
int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct mlx5r_umr_wqe wqe = {};
if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
mlx5_mkey_variant(mr->mmkey.key));
return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
}
static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
struct mlx5_mkey_seg *seg,
unsigned int access_flags)
{
MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
MLX5_SET(mkc, seg, lr, 1);
MLX5_SET(mkc, seg, relaxed_ordering_write,
!!(access_flags & IB_ACCESS_RELAXED_ORDERING));
MLX5_SET(mkc, seg, relaxed_ordering_read,
!!(access_flags & IB_ACCESS_RELAXED_ORDERING));
}
int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct mlx5r_umr_wqe wqe = {};
int err;
wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
mlx5_mkey_variant(mr->mmkey.key));
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
if (err)
return err;
mr->access_flags = access_flags;
return 0;
}
#define MLX5_MAX_UMR_CHUNK \
((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT)
#define MLX5_SPARE_UMR_CHUNK 0x10000
/*
* Allocate a temporary buffer to hold the per-page information to transfer to
* HW. For efficiency this should be as large as it can be, but buffer
* allocation failure is not allowed, so try smaller sizes.
*/
static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
{
const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size;
size_t size;
void *res = NULL;
static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
/*
* MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
* allocation can't trigger any kind of reclaim.
*/
might_sleep();
gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
/*
* If the system already has a suitable high order page then just use
* that, but don't try hard to create one. This max is about 1M, so a
* free x86 huge page will satisfy it.
*/
size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
MLX5_MAX_UMR_CHUNK);
*nents = size / ent_size;
res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
get_order(size));
if (res)
return res;
if (size > MLX5_SPARE_UMR_CHUNK) {
size = MLX5_SPARE_UMR_CHUNK;
*nents = size / ent_size;
res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
get_order(size));
if (res)
return res;
}
*nents = PAGE_SIZE / ent_size;
res = (void *)__get_free_page(gfp_mask);
if (res)
return res;
mutex_lock(&xlt_emergency_page_mutex);
memset(xlt_emergency_page, 0, PAGE_SIZE);
return xlt_emergency_page;
}
static void mlx5r_umr_free_xlt(void *xlt, size_t length)
{
if (xlt == xlt_emergency_page) {
mutex_unlock(&xlt_emergency_page_mutex);
return;
}
free_pages((unsigned long)xlt, get_order(length));
}
static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
struct ib_sge *sg)
{
struct device *ddev = &dev->mdev->pdev->dev;
dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
mlx5r_umr_free_xlt(xlt, sg->length);
}
/*
* Create an XLT buffer ready for submission.
*/
static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
size_t nents, size_t ent_size,
unsigned int flags)
{
struct device *ddev = &dev->mdev->pdev->dev;
dma_addr_t dma;
void *xlt;
xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
GFP_KERNEL);
sg->length = nents * ent_size;
dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma)) {
mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
mlx5r_umr_free_xlt(xlt, sg->length);
return NULL;
}
sg->addr = dma;
sg->lkey = dev->umrc.pd->local_dma_lkey;
return xlt;
}
static void
mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
unsigned int flags, struct ib_sge *sg)
{
if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
/* fail if free */
ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
else
/* fail if not free */
ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
ctrl_seg->xlt_octowords =
cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
}
static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
struct mlx5_mkey_seg *mkey_seg,
struct mlx5_ib_mr *mr,
unsigned int page_shift)
{
mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
}
static void
mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
struct ib_sge *sg)
{
data_seg->byte_count = cpu_to_be32(sg->length);
data_seg->lkey = cpu_to_be32(sg->lkey);
data_seg->addr = cpu_to_be64(sg->addr);
}
static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
u64 offset)
{
u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
}
static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
struct mlx5r_umr_wqe *wqe,
struct mlx5_ib_mr *mr, struct ib_sge *sg,
unsigned int flags)
{
bool update_pd_access, update_translation;
if (flags & MLX5_IB_UPD_XLT_ENABLE)
wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
flags & MLX5_IB_UPD_XLT_PD ||
flags & MLX5_IB_UPD_XLT_ACCESS;
if (update_pd_access) {
wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
}
update_translation =
flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
if (update_translation) {
wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
if (!mr->ibmr.length)
MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
}
wqe->ctrl_seg.xlt_octowords =
cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
wqe->data_seg.byte_count = cpu_to_be32(sg->length);
}
/*
* Send the DMA list to the HW for a normal MR using UMR.
* Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
* flag may be used.
*/
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
struct mlx5r_umr_wqe wqe = {};
struct ib_block_iter biter;
struct mlx5_mtt *cur_mtt;
size_t orig_sg_length;
struct mlx5_mtt *mtt;
size_t final_size;
struct ib_sge sg;
u64 offset = 0;
int err = 0;
if (WARN_ON(mr->umem->is_odp))
return -EINVAL;
mtt = mlx5r_umr_create_xlt(
dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
sizeof(*mtt), flags);
if (!mtt)
return -ENOMEM;
orig_sg_length = sg.length;
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
mr->page_shift);
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
cur_mtt = mtt;
rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter,
mr->umem->sgt_append.sgt.nents,
BIT(mr->page_shift)) {
if (cur_mtt == (void *)mtt + sg.length) {
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
true);
if (err)
goto err;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
offset += sg.length;
mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
cur_mtt = mtt;
}
cur_mtt->ptag =
cpu_to_be64(rdma_block_iter_dma_address(&biter) |
MLX5_IB_MTT_PRESENT);
if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
cur_mtt->ptag = 0;
cur_mtt++;
}
final_size = (void *)cur_mtt - (void *)mtt;
sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
memset(cur_mtt, 0, sg.length - final_size);
mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
err:
sg.length = orig_sg_length;
mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
return err;
}
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
}
int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags)
{
int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
? sizeof(struct mlx5_klm)
: sizeof(struct mlx5_mtt);
const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
const int page_mask = page_align - 1;
struct mlx5r_umr_wqe wqe = {};
size_t pages_mapped = 0;
size_t pages_to_map = 0;
size_t size_to_map = 0;
size_t orig_sg_length;
size_t pages_iter;
struct ib_sge sg;
int err = 0;
void *xlt;
if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
!umr_can_use_indirect_mkey(dev))
return -EPERM;
if (WARN_ON(!mr->umem->is_odp))
return -EINVAL;
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
* so we need to align the offset and length accordingly
*/
if (idx & page_mask) {
npages += idx & page_mask;
idx &= ~page_mask;
}
pages_to_map = ALIGN(npages, page_align);
xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
if (!xlt)
return -ENOMEM;
pages_iter = sg.length / desc_size;
orig_sg_length = sg.length;
if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
pages_to_map = min_t(size_t, pages_to_map, max_pages);
}
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
for (pages_mapped = 0;
pages_mapped < pages_to_map && !err;
pages_mapped += pages_iter, idx += pages_iter) {
npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
size_to_map = npages * desc_size;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
if (pages_mapped + pages_iter >= pages_to_map)
mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
}
sg.length = orig_sg_length;
mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
return err;
}

View File

@ -0,0 +1,97 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
#ifndef _MLX5_IB_UMR_H
#define _MLX5_IB_UMR_H
#include "mlx5_ib.h"
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
#define MLX5_IB_UMR_OCTOWORD 16
#define MLX5_IB_UMR_XLT_ALIGNMENT 64
int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);
void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);
static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev,
size_t length)
{
/*
* umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
* always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
* MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
* can never be enabled without this capability. Simplify this weird
* quirky hardware by just saying it can't use PAS lists with UMR at
* all.
*/
if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return false;
/*
* length is the size of the MR in bytes when mlx5_ib_update_xlt() is
* used.
*/
if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
return false;
return true;
}
/*
* true if an existing MR can be reconfigured to new access_flags using UMR.
* Older HW cannot use UMR to update certain elements of the MKC. See
* get_umr_update_access_mask() and umr_check_mkey_mask()
*/
static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev,
unsigned int current_access_flags,
unsigned int target_access_flags)
{
unsigned int diffs = current_access_flags ^ target_access_flags;
if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
MLX5_CAP_GEN(dev->mdev, atomic) &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return false;
if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return false;
if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return false;
return true;
}
static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes)
{
return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
MLX5_IB_UMR_OCTOWORD;
}
struct mlx5r_umr_context {
struct ib_cqe cqe;
enum ib_wc_status status;
struct completion done;
};
struct mlx5r_umr_wqe {
struct mlx5_wqe_umr_ctrl_seg ctrl_seg;
struct mlx5_mkey_seg mkey_seg;
struct mlx5_wqe_data_seg data_seg;
};
int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags);
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
#endif /* _MLX5_IB_UMR_H */

View File

@ -7,6 +7,7 @@
#include <linux/mlx5/qp.h>
#include <linux/mlx5/driver.h>
#include "wr.h"
#include "umr.h"
static const u32 mlx5_ib_opcode[] = {
[IB_WR_SEND] = MLX5_OPCODE_SEND,
@ -25,58 +26,7 @@ static const u32 mlx5_ib_opcode[] = {
[MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
};
/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
* next nearby edge and get new address translation for current WQE position.
* @sq - SQ buffer.
* @seg: Current WQE position (16B aligned).
* @wqe_sz: Total current WQE size [16B].
* @cur_edge: Updated current edge.
*/
static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
u32 wqe_sz, void **cur_edge)
{
u32 idx;
if (likely(*seg != *cur_edge))
return;
idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
*cur_edge = get_sq_edge(sq, idx);
*seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
}
/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
* pointers. At the end @seg is aligned to 16B regardless the copied size.
* @sq - SQ buffer.
* @cur_edge: Updated current edge.
* @seg: Current WQE position (16B aligned).
* @wqe_sz: Total current WQE size [16B].
* @src: Pointer to copy from.
* @n: Number of bytes to copy.
*/
static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
void **seg, u32 *wqe_sz, const void *src,
size_t n)
{
while (likely(n)) {
size_t leftlen = *cur_edge - *seg;
size_t copysz = min_t(size_t, leftlen, n);
size_t stride;
memcpy(*seg, src, copysz);
n -= copysz;
src += copysz;
stride = !n ? ALIGN(copysz, 16) : copysz;
*seg += stride;
*wqe_sz += stride >> 4;
handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
}
}
static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq,
struct ib_cq *ib_cq)
int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
{
struct mlx5_ib_cq *cq;
unsigned int cur;
@ -122,9 +72,9 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
eseg->mss = cpu_to_be16(ud_wr->mss);
eseg->inline_hdr.sz = cpu_to_be16(left);
/* memcpy_send_wqe should get a 16B align address. Hence, we
* first copy up to the current edge and then, if needed,
* continue to memcpy_send_wqe.
/* mlx5r_memcpy_send_wqe should get a 16B align address. Hence,
* we first copy up to the current edge and then, if needed,
* continue to mlx5r_memcpy_send_wqe.
*/
copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
left);
@ -138,8 +88,8 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
left -= copysz;
pdata += copysz;
memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
left);
mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size,
pdata, left);
}
return;
@ -165,12 +115,6 @@ static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
static u64 get_xlt_octo(u64 bytes)
{
return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
MLX5_IB_UMR_OCTOWORD;
}
static __be64 frwr_mkey_mask(bool atomic)
{
u64 result;
@ -222,7 +166,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
memset(umr, 0, sizeof(*umr));
umr->flags = flags;
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
umr->mkey_mask = frwr_mkey_mask(atomic);
}
@ -233,134 +177,6 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
umr->flags = MLX5_UMR_INLINE;
}
static __be64 get_umr_enable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_disable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_update_translation_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR;
return cpu_to_be64(result);
}
static __be64 get_umr_update_access_mask(int atomic,
int relaxed_ordering_write,
int relaxed_ordering_read)
{
u64 result;
result = MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW;
if (atomic)
result |= MLX5_MKEY_MASK_A;
if (relaxed_ordering_write)
result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
if (relaxed_ordering_read)
result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
return cpu_to_be64(result);
}
static __be64 get_umr_update_pd_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_PD;
return cpu_to_be64(result);
}
static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
{
if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return -EPERM;
if (mask & MLX5_MKEY_MASK_A &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return -EPERM;
if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return -EPERM;
if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return -EPERM;
return 0;
}
static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
struct mlx5_wqe_umr_ctrl_seg *umr,
const struct ib_send_wr *wr)
{
const struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(umr, 0, sizeof(*umr));
if (!umrwr->ignore_free_state) {
if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
/* fail if free */
umr->flags = MLX5_UMR_CHECK_FREE;
else
/* fail if not free */
umr->flags = MLX5_UMR_CHECK_NOT_FREE;
}
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
u64 offset = get_xlt_octo(umrwr->offset);
umr->xlt_offset = cpu_to_be16(offset & 0xffff);
umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
}
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
umr->mkey_mask |= get_umr_update_translation_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
umr->mkey_mask |= get_umr_update_access_mask(
!!(MLX5_CAP_GEN(dev->mdev, atomic)),
!!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)),
!!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)));
umr->mkey_mask |= get_umr_update_pd_mask();
}
if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
umr->mkey_mask |= get_umr_enable_mr_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
umr->mkey_mask |= get_umr_disable_mr_mask();
if (!wr->num_sge)
umr->flags |= MLX5_UMR_INLINE;
return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
}
static u8 get_umr_flags(int acc)
{
return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
@ -398,43 +214,6 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
seg->status = MLX5_MKEY_STATUS_FREE;
}
static void set_reg_mkey_segment(struct mlx5_ib_dev *dev,
struct mlx5_mkey_seg *seg,
const struct ib_send_wr *wr)
{
const struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(seg, 0, sizeof(*seg));
if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
MLX5_SET(mkc, seg, free, 1);
MLX5_SET(mkc, seg, a,
!!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, seg, rw,
!!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
MLX5_SET(mkc, seg, lr, 1);
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
MLX5_SET(mkc, seg, relaxed_ordering_write,
!!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
MLX5_SET(mkc, seg, relaxed_ordering_read,
!!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
if (umrwr->pd)
MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
!umrwr->length)
MLX5_SET(mkc, seg, length64, 1);
MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr);
MLX5_SET64(mkc, seg, len, umrwr->length);
MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift);
MLX5_SET(mkc, seg, qpn, 0xffffff);
MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey));
}
static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
struct mlx5_ib_mr *mr,
struct mlx5_ib_pd *pd)
@ -760,7 +539,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
seg->len = cpu_to_be64(length);
seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size));
seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
}
@ -770,7 +549,7 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
memset(umr, 0, sizeof(*umr));
umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
umr->mkey_mask = sig_mkey_mask();
}
@ -870,7 +649,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
* Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
* kernel ULPs are not aware of it, so we don't set it here.
*/
if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) {
mlx5_ib_warn(
to_mdev(qp->ibqp.device),
"Fast update for MR access flags is not possible\n");
@ -899,8 +678,8 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
if (umr_inline) {
memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
mr_list_size);
mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
mr_list_size);
*size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
} else {
set_reg_data_seg(*seg, mr, pd);
@ -942,23 +721,22 @@ static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
}
}
static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl,
const struct ib_send_wr *wr, unsigned int *idx,
int *size, void **cur_edge, int nreq,
bool send_signaled, bool solicited)
int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
int *size, void **cur_edge, int nreq, __be32 general_id,
bool send_signaled, bool solicited)
{
if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
return -ENOMEM;
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
*seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
*ctrl = *seg;
*(uint32_t *)(*seg + 8) = 0;
(*ctrl)->imm = send_ieth(wr);
(*ctrl)->general_id = general_id;
(*ctrl)->fm_ce_se = qp->sq_signal_bits |
(send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
(solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
(send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
(solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
@ -972,16 +750,14 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
const struct ib_send_wr *wr, unsigned int *idx, int *size,
void **cur_edge, int nreq)
{
return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
wr->send_flags & IB_SEND_SIGNALED,
wr->send_flags & IB_SEND_SOLICITED);
return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED,
wr->send_flags & IB_SEND_SOLICITED);
}
static void finish_wqe(struct mlx5_ib_qp *qp,
struct mlx5_wqe_ctrl_seg *ctrl,
void *seg, u8 size, void *cur_edge,
unsigned int idx, u64 wr_id, int nreq, u8 fence,
u32 mlx5_opcode)
void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
void *seg, u8 size, void *cur_edge, unsigned int idx,
u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode)
{
u8 opmod = 0;
@ -1045,8 +821,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
/*
* SET_PSV WQEs are not signaled and solicited on error.
*/
err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
false, true);
err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
send_ieth(wr), false, true);
if (unlikely(err)) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@ -1057,8 +833,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
mlx5_ib_warn(dev, "\n");
goto out;
}
finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
next_fence, MLX5_OPCODE_SET_PSV);
mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
nreq, next_fence, MLX5_OPCODE_SET_PSV);
out:
return err;
@ -1098,8 +874,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
if (unlikely(err))
goto out;
finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
nreq, fence, MLX5_OPCODE_UMR);
mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx,
wr->wr_id, nreq, fence, MLX5_OPCODE_UMR);
err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
if (unlikely(err)) {
@ -1130,8 +906,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
mlx5_ib_warn(dev, "\n");
goto out;
}
finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
fence, MLX5_OPCODE_UMR);
mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
nreq, fence, MLX5_OPCODE_UMR);
sig_attrs = mr->ibmr.sig_attrs;
err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
@ -1246,33 +1022,30 @@ static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
}
}
static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
const struct ib_send_wr *wr,
struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
int *size, void **cur_edge, unsigned int idx)
void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
struct mlx5_wqe_ctrl_seg *ctrl)
{
int err = 0;
struct mlx5_bf *bf = &qp->bf;
if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) {
err = -EINVAL;
mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode);
goto out;
}
qp->sq.head += nreq;
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
(*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
err = set_reg_umr_segment(dev, *seg, wr);
if (unlikely(err))
goto out;
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_reg_mkey_segment(dev, *seg, wr);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
out:
return err;
/* Make sure that descriptors are written before
* updating doorbell record and ringing the doorbell
*/
wmb();
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
/* Make sure doorbell record is visible to the HCA before
* we hit doorbell.
*/
wmb();
mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
/* Make sure doorbells don't leak out of SQ spinlock
* and reach the HCA out of order.
*/
bf->offset ^= bf->buf_size;
}
int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
@ -1283,7 +1056,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf;
void *cur_edge;
int size;
unsigned long flags;
@ -1305,8 +1077,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
if (qp->type == IB_QPT_GSI)
return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
bf = &qp->bf;
spin_lock_irqsave(&qp->sq.lock, flags);
for (nreq = 0; wr; nreq++, wr = wr->next) {
@ -1384,12 +1154,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
case IB_QPT_UD:
handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
break;
case MLX5_IB_QPT_REG_UMR:
err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg,
&size, &cur_edge, idx);
if (unlikely(err))
goto out;
break;
default:
break;
@ -1418,35 +1182,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
}
qp->next_fence = next_fence;
finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
fence, mlx5_ib_opcode[wr->opcode]);
mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id,
nreq, fence, mlx5_ib_opcode[wr->opcode]);
skip_psv:
if (0)
dump_wqe(qp, idx, size);
}
out:
if (likely(nreq)) {
qp->sq.head += nreq;
/* Make sure that descriptors are written before
* updating doorbell record and ringing the doorbell
*/
wmb();
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
/* Make sure doorbell record is visible to the HCA before
* we hit doorbell.
*/
wmb();
mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
/* Make sure doorbells don't leak out of SQ spinlock
* and reach the HCA out of order.
*/
bf->offset ^= bf->buf_size;
}
if (likely(nreq))
mlx5r_ring_db(qp, nreq, ctrl);
spin_unlock_irqrestore(&qp->sq.lock, flags);
@ -1486,7 +1231,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
err = -ENOMEM;
*bad_wr = wr;
goto out;

View File

@ -41,6 +41,66 @@ static inline void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
return fragment_end + MLX5_SEND_WQE_BB;
}
/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
* next nearby edge and get new address translation for current WQE position.
* @sq: SQ buffer.
* @seg: Current WQE position (16B aligned).
* @wqe_sz: Total current WQE size [16B].
* @cur_edge: Updated current edge.
*/
static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
u32 wqe_sz, void **cur_edge)
{
u32 idx;
if (likely(*seg != *cur_edge))
return;
idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
*cur_edge = get_sq_edge(sq, idx);
*seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
}
/* mlx5r_memcpy_send_wqe - copy data from src to WQE and update the relevant
* WQ's pointers. At the end @seg is aligned to 16B regardless the copied size.
* @sq: SQ buffer.
* @cur_edge: Updated current edge.
* @seg: Current WQE position (16B aligned).
* @wqe_sz: Total current WQE size [16B].
* @src: Pointer to copy from.
* @n: Number of bytes to copy.
*/
static inline void mlx5r_memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
void **seg, u32 *wqe_sz,
const void *src, size_t n)
{
while (likely(n)) {
size_t leftlen = *cur_edge - *seg;
size_t copysz = min_t(size_t, leftlen, n);
size_t stride;
memcpy(*seg, src, copysz);
n -= copysz;
src += copysz;
stride = !n ? ALIGN(copysz, 16) : copysz;
*seg += stride;
*wqe_sz += stride >> 4;
handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
}
}
int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq);
int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
int *size, void **cur_edge, int nreq, __be32 general_id,
bool send_signaled, bool solicited);
void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
void *seg, u8 size, void *cur_edge, unsigned int idx,
u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode);
void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
struct mlx5_wqe_ctrl_seg *ctrl);
int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr, bool drain);
int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,

View File

@ -90,8 +90,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_MGT_EXTENSIONS;
attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
attr->max_send_sge = dev->attr.max_send_sge;
attr->max_recv_sge = dev->attr.max_recv_sge;
attr->max_sge_rd = dev->attr.max_rdma_sge;

View File

@ -500,7 +500,6 @@ static void qedr_sync_free_irqs(struct qedr_dev *dev)
if (dev->int_info.msix_cnt) {
idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
vector = dev->int_info.msix[idx].vector;
synchronize_irq(vector);
free_irq(vector, &dev->cnq_array[i]);
}
}

View File

@ -134,7 +134,8 @@ int qedr_query_device(struct ib_device *ibdev,
attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
IB_DEVICE_MEM_MGT_EXTENSIONS;
attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
if (!rdma_protocol_iwarp(&dev->ibdev, 1))
attr->device_cap_flags |= IB_DEVICE_XRC;

View File

@ -678,7 +678,7 @@ struct qib_pportdata {
/* Observers. Not to be taken lightly, possibly not to ship. */
/*
* If a diag read or write is to (bottom <= offset <= top),
* the "hoook" is called, allowing, e.g. shadows to be
* the "hook" is called, allowing, e.g. shadows to be
* updated in sync with the driver. struct diag_observer
* is the "visible" part.
*/

View File

@ -534,6 +534,11 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
struct usnic_ib_vf *vf;
enum usnic_vnic_res_type res_type;
if (!device_iommu_mapped(&pdev->dev)) {
usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
return -EPERM;
}
vf = kzalloc(sizeof(*vf), GFP_KERNEL);
if (!vf)
return -ENOMEM;
@ -642,12 +647,6 @@ static int __init usnic_ib_init(void)
printk_once(KERN_INFO "%s", usnic_version);
err = usnic_uiom_init(DRV_NAME);
if (err) {
usnic_err("Unable to initialize umem with err %d\n", err);
return err;
}
err = pci_register_driver(&usnic_ib_pci_driver);
if (err) {
usnic_err("Unable to register with PCI\n");

View File

@ -305,7 +305,8 @@ int usnic_ib_query_device(struct ib_device *ibdev,
props->max_qp = qp_per_vf *
kref_read(&us_ibdev->vf_cnt);
props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
IB_DEVICE_SYS_IMAGE_GUID;
props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
kref_read(&us_ibdev->vf_cnt);
props->max_pd = USNIC_UIOM_MAX_PD_CNT;
@ -442,7 +443,7 @@ int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct usnic_ib_pd *pd = to_upd(ibpd);
pd->umem_pd = usnic_uiom_alloc_pd();
pd->umem_pd = usnic_uiom_alloc_pd(ibpd->device->dev.parent);
if (IS_ERR(pd->umem_pd))
return PTR_ERR(pd->umem_pd);
@ -706,4 +707,3 @@ int usnic_ib_mmap(struct ib_ucontext *context,
usnic_err("No VF %u found\n", vfid);
return -EINVAL;
}

View File

@ -40,7 +40,6 @@
#include <linux/iommu.h>
#include <linux/workqueue.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <rdma/ib_verbs.h>
#include "usnic_log.h"
@ -439,7 +438,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr)
__usnic_uiom_release_tail(uiomr);
}
struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev)
{
struct usnic_uiom_pd *pd;
void *domain;
@ -448,7 +447,7 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
if (!pd)
return ERR_PTR(-ENOMEM);
pd->domain = domain = iommu_domain_alloc(&pci_bus_type);
pd->domain = domain = iommu_domain_alloc(dev->bus);
if (!domain) {
usnic_err("Failed to allocate IOMMU domain");
kfree(pd);
@ -556,13 +555,3 @@ void usnic_uiom_free_dev_list(struct device **devs)
{
kfree(devs);
}
int usnic_uiom_init(char *drv_name)
{
if (!iommu_present(&pci_bus_type)) {
usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
return -EPERM;
}
return 0;
}

View File

@ -80,7 +80,7 @@ struct usnic_uiom_chunk {
struct scatterlist page_list[];
};
struct usnic_uiom_pd *usnic_uiom_alloc_pd(void);
struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev);
void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd);
int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev);
void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd,
@ -91,5 +91,4 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
unsigned long addr, size_t size,
int access, int dmasync);
void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr);
int usnic_uiom_init(char *drv_name);
#endif /* USNIC_UIOM_H_ */

View File

@ -2775,7 +2775,7 @@ void rvt_qp_iter(struct rvt_dev_info *rdi,
EXPORT_SYMBOL(rvt_qp_iter);
/*
* This should be called with s_lock held.
* This should be called with s_lock and r_lock held.
*/
void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
@ -3134,7 +3134,9 @@ void rvt_ruc_loopback(struct rvt_qp *sqp)
rvp->n_loop_pkts++;
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
spin_lock(&sqp->r_lock);
rvt_send_complete(sqp, wqe, send_status);
spin_unlock(&sqp->r_lock);
if (local_ops) {
atomic_dec(&sqp->local_ops_pending);
local_ops = 0;
@ -3188,7 +3190,9 @@ void rvt_ruc_loopback(struct rvt_qp *sqp)
spin_unlock_irqrestore(&qp->r_lock, flags);
serr_no_r_lock:
spin_lock_irqsave(&sqp->s_lock, flags);
spin_lock(&sqp->r_lock);
rvt_send_complete(sqp, wqe, send_status);
spin_unlock(&sqp->r_lock);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
int lastwqe;

View File

@ -46,6 +46,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.max_qp = RXE_MAX_QP;
rxe->attr.max_qp_wr = RXE_MAX_QP_WR;
rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS;
rxe->attr.kernel_cap_flags = IBK_ALLOW_USER_UNREG;
rxe->attr.max_send_sge = RXE_MAX_SGE;
rxe->attr.max_recv_sge = RXE_MAX_SGE;
rxe->attr.max_sge_rd = RXE_MAX_SGE_RD;

View File

@ -562,7 +562,8 @@ int rxe_completer(void *arg)
enum comp_state state;
int ret = 0;
rxe_get(qp);
if (!rxe_get(qp))
return -EAGAIN;
if (!qp->valid || qp->req.state == QP_STATE_ERROR ||
qp->req.state == QP_STATE_RESET) {

View File

@ -37,7 +37,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
void rxe_cq_disable(struct rxe_cq *cq);
void rxe_cq_cleanup(struct rxe_pool_elem *arg);
void rxe_cq_cleanup(struct rxe_pool_elem *elem);
/* rxe_mcast.c */
struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid);
@ -81,7 +81,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey);
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr);
int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
void rxe_mr_cleanup(struct rxe_pool_elem *arg);
void rxe_mr_cleanup(struct rxe_pool_elem *elem);
/* rxe_mw.c */
int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
@ -89,7 +89,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw);
int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey);
struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey);
void rxe_mw_cleanup(struct rxe_pool_elem *arg);
void rxe_mw_cleanup(struct rxe_pool_elem *elem);
/* rxe_net.c */
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
@ -114,7 +114,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr,
int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask);
void rxe_qp_error(struct rxe_qp *qp);
int rxe_qp_chk_destroy(struct rxe_qp *qp);
void rxe_qp_destroy(struct rxe_qp *qp);
void rxe_qp_cleanup(struct rxe_pool_elem *elem);
static inline int qp_num(struct rxe_qp *qp)
@ -159,18 +158,16 @@ void retransmit_timer(struct timer_list *t);
void rnr_nak_timer(struct timer_list *t);
/* rxe_srq.c */
#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT)
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);
int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init);
int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_init_attr *init, struct ib_udata *udata,
struct rxe_create_srq_resp __user *uresp);
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);
int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata);
void rxe_srq_cleanup(struct rxe_pool_elem *elem);
void rxe_dealloc(struct ib_device *ib_dev);

View File

@ -206,8 +206,10 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
/* speculative alloc of new mcg */
mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
if (!mcg)
return ERR_PTR(-ENOMEM);
if (!mcg) {
err = -ENOMEM;
goto err_dec;
}
spin_lock_bh(&rxe->mcg_lock);
/* re-check to see if someone else just added it */

View File

@ -683,14 +683,10 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct rxe_mr *mr = to_rmr(ibmr);
if (atomic_read(&mr->num_mw) > 0) {
pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
__func__);
/* See IBA 10.6.7.2.6 */
if (atomic_read(&mr->num_mw) > 0)
return -EINVAL;
}
mr->state = RXE_MR_STATE_INVALID;
rxe_put(mr_pd(mr));
rxe_put(mr);
return 0;
@ -700,6 +696,8 @@ void rxe_mr_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
rxe_put(mr_pd(mr));
ib_umem_release(mr->umem);
if (mr->cur_map_set)

View File

@ -3,6 +3,14 @@
* Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved.
*/
/*
* The rdma_rxe driver supports type 1 or type 2B memory windows.
* Type 1 MWs are created by ibv_alloc_mw() verbs calls and bound by
* ibv_bind_mw() calls. Type 2 MWs are also created by ibv_alloc_mw()
* but bound by bind_mw work requests. The ibv_bind_mw() call is converted
* by libibverbs to a bind_mw work request.
*/
#include "rxe.h"
int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
@ -28,40 +36,11 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
return 0;
}
static void rxe_do_dealloc_mw(struct rxe_mw *mw)
{
if (mw->mr) {
struct rxe_mr *mr = mw->mr;
mw->mr = NULL;
atomic_dec(&mr->num_mw);
rxe_put(mr);
}
if (mw->qp) {
struct rxe_qp *qp = mw->qp;
mw->qp = NULL;
rxe_put(qp);
}
mw->access = 0;
mw->addr = 0;
mw->length = 0;
mw->state = RXE_MW_STATE_INVALID;
}
int rxe_dealloc_mw(struct ib_mw *ibmw)
{
struct rxe_mw *mw = to_rmw(ibmw);
struct rxe_pd *pd = to_rpd(ibmw->pd);
spin_lock_bh(&mw->lock);
rxe_do_dealloc_mw(mw);
spin_unlock_bh(&mw->lock);
rxe_put(mw);
rxe_put(pd);
return 0;
}
@ -328,3 +307,31 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
return mw;
}
void rxe_mw_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_mw *mw = container_of(elem, typeof(*mw), elem);
struct rxe_pd *pd = to_rpd(mw->ibmw.pd);
rxe_put(pd);
if (mw->mr) {
struct rxe_mr *mr = mw->mr;
mw->mr = NULL;
atomic_dec(&mr->num_mw);
rxe_put(mr);
}
if (mw->qp) {
struct rxe_qp *qp = mw->qp;
mw->qp = NULL;
rxe_put(qp);
}
mw->access = 0;
mw->addr = 0;
mw->length = 0;
mw->state = RXE_MW_STATE_INVALID;
}

View File

@ -29,7 +29,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_WR_SEND] = {
.name = "IB_WR_SEND",
.mask = {
[IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
@ -39,7 +38,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_WR_SEND_WITH_IMM] = {
.name = "IB_WR_SEND_WITH_IMM",
.mask = {
[IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,

View File

@ -50,9 +50,7 @@ enum rxe_device_param {
| IB_DEVICE_RC_RNR_NAK_GEN
| IB_DEVICE_SRQ_RESIZE
| IB_DEVICE_MEM_MGT_EXTENSIONS
| IB_DEVICE_ALLOW_USER_UNREG
| IB_DEVICE_MEM_WINDOW
| IB_DEVICE_MEM_WINDOW_TYPE_2A
| IB_DEVICE_MEM_WINDOW_TYPE_2B,
RXE_MAX_SGE = 32,
RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) +

View File

@ -13,7 +13,6 @@ static const struct rxe_type_info {
size_t size;
size_t elem_offset;
void (*cleanup)(struct rxe_pool_elem *elem);
enum rxe_pool_flags flags;
u32 min_index;
u32 max_index;
u32 max_elem;
@ -46,6 +45,7 @@ static const struct rxe_type_info {
.name = "srq",
.size = sizeof(struct rxe_srq),
.elem_offset = offsetof(struct rxe_srq, elem),
.cleanup = rxe_srq_cleanup,
.min_index = RXE_MIN_SRQ_INDEX,
.max_index = RXE_MAX_SRQ_INDEX,
.max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1,
@ -73,7 +73,6 @@ static const struct rxe_type_info {
.size = sizeof(struct rxe_mr),
.elem_offset = offsetof(struct rxe_mr, elem),
.cleanup = rxe_mr_cleanup,
.flags = RXE_POOL_ALLOC,
.min_index = RXE_MIN_MR_INDEX,
.max_index = RXE_MAX_MR_INDEX,
.max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1,
@ -82,6 +81,7 @@ static const struct rxe_type_info {
.name = "mw",
.size = sizeof(struct rxe_mw),
.elem_offset = offsetof(struct rxe_mw, elem),
.cleanup = rxe_mw_cleanup,
.min_index = RXE_MIN_MW_INDEX,
.max_index = RXE_MAX_MW_INDEX,
.max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1,
@ -101,7 +101,6 @@ void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
pool->max_elem = info->max_elem;
pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN);
pool->elem_offset = info->elem_offset;
pool->flags = info->flags;
pool->cleanup = info->cleanup;
atomic_set(&pool->num_elem, 0);
@ -122,7 +121,7 @@ void *rxe_alloc(struct rxe_pool *pool)
void *obj;
int err;
if (WARN_ON(!(pool->flags & RXE_POOL_ALLOC)))
if (WARN_ON(!(pool->type == RXE_TYPE_MR)))
return NULL;
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
@ -156,7 +155,7 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
{
int err;
if (WARN_ON(pool->flags & RXE_POOL_ALLOC))
if (WARN_ON(pool->type == RXE_TYPE_MR))
return -EINVAL;
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
@ -206,7 +205,7 @@ static void rxe_elem_release(struct kref *kref)
if (pool->cleanup)
pool->cleanup(elem);
if (pool->flags & RXE_POOL_ALLOC)
if (pool->type == RXE_TYPE_MR)
kfree(elem->obj);
atomic_dec(&pool->num_elem);

View File

@ -7,10 +7,6 @@
#ifndef RXE_POOL_H
#define RXE_POOL_H
enum rxe_pool_flags {
RXE_POOL_ALLOC = BIT(1),
};
enum rxe_elem_type {
RXE_TYPE_UC,
RXE_TYPE_PD,
@ -35,7 +31,6 @@ struct rxe_pool {
struct rxe_dev *rxe;
const char *name;
void (*cleanup)(struct rxe_pool_elem *elem);
enum rxe_pool_flags flags;
enum rxe_elem_type type;
unsigned int max_elem;

View File

@ -63,7 +63,6 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
int port_num = init->port_num;
switch (init->qp_type) {
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_RC:
case IB_QPT_UC:
@ -81,7 +80,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
if (rxe_qp_chk_cap(rxe, cap, !!init->srq))
goto err1;
if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
if (init->qp_type == IB_QPT_GSI) {
if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
pr_warn("invalid port = %d\n", port_num);
goto err1;
@ -89,11 +88,6 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
port = &rxe->port;
if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) {
pr_warn("SMI QP exists for port %d\n", port_num);
goto err1;
}
if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
pr_warn("GSI QP exists for port %d\n", port_num);
goto err1;
@ -167,12 +161,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
port = &rxe->port;
switch (init->qp_type) {
case IB_QPT_SMI:
qp->ibqp.qp_num = 0;
port->qp_smi_index = qpn;
qp->attr.port_num = init->port_num;
break;
case IB_QPT_GSI:
qp->ibqp.qp_num = 1;
port->qp_gsi_index = qpn;
@ -334,6 +322,9 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
qp->scq = scq;
qp->srq = srq;
atomic_inc(&rcq->num_wq);
atomic_inc(&scq->num_wq);
rxe_qp_init_misc(rxe, qp, init);
err = rxe_qp_init_req(rxe, qp, init, udata, uresp);
@ -353,6 +344,9 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
rxe_queue_cleanup(qp->sq.queue);
qp->sq.queue = NULL;
err1:
atomic_dec(&rcq->num_wq);
atomic_dec(&scq->num_wq);
qp->pd = NULL;
qp->rcq = NULL;
qp->scq = NULL;
@ -777,9 +771,11 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp)
return 0;
}
/* called by the destroy qp verb */
void rxe_qp_destroy(struct rxe_qp *qp)
/* called when the last reference to the qp is dropped */
static void rxe_qp_do_cleanup(struct work_struct *work)
{
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
qp->valid = 0;
qp->qp_timeout_jiffies = 0;
rxe_cleanup_task(&qp->resp.task);
@ -798,12 +794,6 @@ void rxe_qp_destroy(struct rxe_qp *qp)
__rxe_do_task(&qp->comp.task);
__rxe_do_task(&qp->req.task);
}
}
/* called when the last reference to the qp is dropped */
static void rxe_qp_do_cleanup(struct work_struct *work)
{
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
if (qp->sq.queue)
rxe_queue_cleanup(qp->sq.queue);
@ -814,10 +804,14 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
if (qp->rq.queue)
rxe_queue_cleanup(qp->rq.queue);
atomic_dec(&qp->scq->num_wq);
if (qp->scq)
rxe_put(qp->scq);
atomic_dec(&qp->rcq->num_wq);
if (qp->rcq)
rxe_put(qp->rcq);
if (qp->pd)
rxe_put(qp->pd);

View File

@ -34,7 +34,6 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
}
break;
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
if (unlikely(pkt_type != IB_OPCODE_UD)) {
pr_warn_ratelimited("bad qp type\n");

View File

@ -33,8 +33,6 @@ static inline void retry_first_write_send(struct rxe_qp *qp,
} else {
advance_dma_data(&wqe->dma, to_send);
}
if (mask & WR_WRITE_MASK)
wqe->iova += qp->mtu;
}
}
@ -308,7 +306,6 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
case IB_QPT_UC:
return next_opcode_uc(qp, opcode, fits);
case IB_QPT_SMI:
case IB_QPT_UD:
case IB_QPT_GSI:
switch (opcode) {
@ -414,8 +411,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
if (pkt->mask & RXE_ATMETH_MASK) {
atmeth_set_va(pkt, wqe->iova);
if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||
opcode == IB_OPCODE_RD_COMPARE_SWAP) {
if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
} else {
@ -437,7 +433,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, u32 paylen)
struct sk_buff *skb, u32 payload)
{
int err;
@ -449,19 +445,19 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
if (wqe->wr.send_flags & IB_SEND_INLINE) {
u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
memcpy(payload_addr(pkt), tmp, paylen);
memcpy(payload_addr(pkt), tmp, payload);
wqe->dma.resid -= paylen;
wqe->dma.sge_offset += paylen;
wqe->dma.resid -= payload;
wqe->dma.sge_offset += payload;
} else {
err = copy_data(qp->pd, 0, &wqe->dma,
payload_addr(pkt), paylen,
payload_addr(pkt), payload,
RXE_FROM_MR_OBJ);
if (err)
return err;
}
if (bth_pad(pkt)) {
u8 *pad = payload_addr(pkt) + paylen;
u8 *pad = payload_addr(pkt) + payload;
memset(pad, 0, bth_pad(pkt));
}
@ -527,8 +523,7 @@ static void rollback_state(struct rxe_send_wqe *wqe,
qp->req.psn = rollback_psn;
}
static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt)
static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{
qp->req.opcode = pkt->opcode;
@ -611,7 +606,8 @@ int rxe_requester(void *arg)
struct rxe_ah *ah;
struct rxe_av *av;
rxe_get(qp);
if (!rxe_get(qp))
return -EAGAIN;
next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
@ -661,7 +657,7 @@ int rxe_requester(void *arg)
opcode = next_opcode(qp, wqe, wqe->wr.opcode);
if (unlikely(opcode < 0)) {
wqe->status = IB_WC_LOC_QP_OP_ERR;
goto exit;
goto err;
}
mask = rxe_opcode[opcode].mask;
@ -755,7 +751,7 @@ int rxe_requester(void *arg)
goto err;
}
update_state(qp, wqe, &pkt);
update_state(qp, &pkt);
goto next_wqe;

View File

@ -277,7 +277,6 @@ static enum resp_states check_op_valid(struct rxe_qp *qp,
break;
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
break;
@ -577,8 +576,7 @@ static enum resp_states process_atomic(struct rxe_qp *qp,
qp->resp.atomic_orig = *vaddr;
if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP ||
pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) {
if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
if (*vaddr == atmeth_comp(pkt))
*vaddr = atmeth_swap_add(pkt);
} else {
@ -834,7 +832,6 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
if (pkt->mask & RXE_SEND_MASK) {
if (qp_type(qp) == IB_QPT_UD ||
qp_type(qp) == IB_QPT_SMI ||
qp_type(qp) == IB_QPT_GSI) {
if (skb->protocol == htons(ETH_P_IP)) {
memset(&hdr.reserved, 0,
@ -1265,7 +1262,8 @@ int rxe_responder(void *arg)
struct rxe_pkt_info *pkt = NULL;
int ret = 0;
rxe_get(qp);
if (!rxe_get(qp))
return -EAGAIN;
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;

View File

@ -6,64 +6,34 @@
#include <linux/vmalloc.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
{
if (srq && srq->error) {
pr_warn("srq in error state\n");
struct ib_srq_attr *attr = &init->attr;
if (attr->max_wr > rxe->attr.max_srq_wr) {
pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (mask & IB_SRQ_MAX_WR) {
if (attr->max_wr > rxe->attr.max_srq_wr) {
pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
if (srq && srq->limit && (attr->max_wr < srq->limit)) {
pr_warn("max_wr (%d) < srq->limit (%d)\n",
attr->max_wr, srq->limit);
goto err1;
}
if (attr->max_wr < RXE_MIN_SRQ_WR)
attr->max_wr = RXE_MIN_SRQ_WR;
if (attr->max_wr <= 0) {
pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
if (mask & IB_SRQ_LIMIT) {
if (attr->srq_limit > rxe->attr.max_srq_wr) {
pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
attr->srq_limit, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr < RXE_MIN_SRQ_WR)
attr->max_wr = RXE_MIN_SRQ_WR;
if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) {
pr_warn("srq_limit (%d) > cur limit(%d)\n",
attr->srq_limit,
srq->rq.queue->buf->index_mask);
goto err1;
}
if (attr->max_sge > rxe->attr.max_srq_sge) {
pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
attr->max_sge, rxe->attr.max_srq_sge);
goto err1;
}
if (mask == IB_SRQ_INIT_MASK) {
if (attr->max_sge > rxe->attr.max_srq_sge) {
pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
attr->max_sge, rxe->attr.max_srq_sge);
goto err1;
}
if (attr->max_sge < RXE_MIN_SRQ_SGE)
attr->max_sge = RXE_MIN_SRQ_SGE;
}
if (attr->max_sge < RXE_MIN_SRQ_SGE)
attr->max_sge = RXE_MIN_SRQ_SGE;
return 0;
@ -93,8 +63,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
spin_lock_init(&srq->rq.consumer_lock);
type = QUEUE_TYPE_FROM_CLIENT;
q = rxe_queue_init(rxe, &srq->rq.max_wr,
srq_wqe_size, type);
q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type);
if (!q) {
pr_warn("unable to allocate queue for srq\n");
return -ENOMEM;
@ -121,6 +90,57 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
return 0;
}
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
{
if (srq->error) {
pr_warn("srq in error state\n");
goto err1;
}
if (mask & IB_SRQ_MAX_WR) {
if (attr->max_wr > rxe->attr.max_srq_wr) {
pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
if (srq->limit && (attr->max_wr < srq->limit)) {
pr_warn("max_wr (%d) < srq->limit (%d)\n",
attr->max_wr, srq->limit);
goto err1;
}
if (attr->max_wr < RXE_MIN_SRQ_WR)
attr->max_wr = RXE_MIN_SRQ_WR;
}
if (mask & IB_SRQ_LIMIT) {
if (attr->srq_limit > rxe->attr.max_srq_wr) {
pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
attr->srq_limit, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->srq_limit > srq->rq.queue->buf->index_mask) {
pr_warn("srq_limit (%d) > cur limit(%d)\n",
attr->srq_limit,
srq->rq.queue->buf->index_mask);
goto err1;
}
}
return 0;
err1:
return -EINVAL;
}
int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata)
@ -154,3 +174,14 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
srq->rq.queue = NULL;
return err;
}
void rxe_srq_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_srq *srq = container_of(elem, typeof(*srq), elem);
if (srq->pd)
rxe_put(srq->pd);
if (srq->rq.queue)
rxe_queue_cleanup(srq->rq.queue);
}

View File

@ -7,8 +7,8 @@
#include <linux/dma-mapping.h>
#include <net/addrconf.h>
#include <rdma/uverbs_ioctl.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
#include "rxe_hw_counters.h"
@ -286,36 +286,34 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
struct rxe_srq *srq = to_rsrq(ibsrq);
struct rxe_create_srq_resp __user *uresp = NULL;
if (init->srq_type != IB_SRQT_BASIC)
return -EOPNOTSUPP;
if (udata) {
if (udata->outlen < sizeof(*uresp))
return -EINVAL;
uresp = udata->outbuf;
}
err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
if (init->srq_type != IB_SRQT_BASIC)
return -EOPNOTSUPP;
err = rxe_srq_chk_init(rxe, init);
if (err)
goto err1;
return err;
err = rxe_add_to_pool(&rxe->srq_pool, srq);
if (err)
goto err1;
return err;
rxe_get(pd);
srq->pd = pd;
err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
if (err)
goto err2;
goto err_put;
return 0;
err2:
rxe_put(pd);
err_put:
rxe_put(srq);
err1:
return err;
}
@ -339,16 +337,12 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
err = rxe_srq_chk_attr(rxe, srq, attr, mask);
if (err)
goto err1;
return err;
err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
if (err)
goto err1;
return err;
return 0;
err1:
return err;
}
static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
@ -368,10 +362,6 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct rxe_srq *srq = to_rsrq(ibsrq);
if (srq->rq.queue)
rxe_queue_cleanup(srq->rq.queue);
rxe_put(srq->pd);
rxe_put(srq);
return 0;
}
@ -495,7 +485,6 @@ static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (ret)
return ret;
rxe_qp_destroy(qp);
rxe_put(qp);
return 0;
}
@ -536,7 +525,6 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
wr->send_flags = ibwr->send_flags;
if (qp_type(qp) == IB_QPT_UD ||
qp_type(qp) == IB_QPT_SMI ||
qp_type(qp) == IB_QPT_GSI) {
struct ib_ah *ibah = ud_wr(ibwr)->ah;
@ -807,6 +795,12 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct rxe_cq *cq = to_rcq(ibcq);
/* See IBA C11-17: The CI shall return an error if this Verb is
* invoked while a Work Queue is still associated with the CQ.
*/
if (atomic_read(&cq->num_wq))
return -EINVAL;
rxe_cq_disable(cq);
rxe_put(cq);

View File

@ -67,6 +67,7 @@ struct rxe_cq {
bool is_dying;
bool is_user;
struct tasklet_struct comp_task;
atomic_t num_wq;
};
enum wqe_state {
@ -373,7 +374,6 @@ struct rxe_port {
spinlock_t port_lock; /* guard port */
unsigned int mtu_cap;
/* special QPs */
u32 qp_smi_index;
u32 qp_gsi_index;
};
@ -394,7 +394,6 @@ struct rxe_dev {
struct rxe_pool cq_pool;
struct rxe_pool mr_pool;
struct rxe_pool mw_pool;
struct rxe_pool mc_grp_pool;
/* multicast support */
spinlock_t mcg_lock;

View File

@ -119,6 +119,7 @@ static int siw_dev_qualified(struct net_device *netdev)
* <linux/if_arp.h> for type identifiers.
*/
if (netdev->type == ARPHRD_ETHER || netdev->type == ARPHRD_IEEE802 ||
netdev->type == ARPHRD_NONE ||
(netdev->type == ARPHRD_LOOPBACK && loopback_enabled))
return 1;
@ -315,12 +316,12 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
sdev->netdev = netdev;
if (netdev->type != ARPHRD_LOOPBACK) {
if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) {
addrconf_addr_eui48((unsigned char *)&base_dev->node_guid,
netdev->dev_addr);
} else {
/*
* The loopback device does not have a HW address,
* This device does not have a HW address,
* but connection mangagement lib expects gid != 0
*/
size_t len = min_t(size_t, strlen(base_dev->name), 6);

View File

@ -132,8 +132,8 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
/* Revisit atomic caps if RFC 7306 gets supported */
attr->atomic_cap = 0;
attr->device_cap_flags =
IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_ALLOW_USER_UNREG;
attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG;
attr->max_cq = sdev->attrs.max_cq;
attr->max_cqe = sdev->attrs.max_cqe;
attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL;

View File

@ -411,6 +411,7 @@ struct ipoib_dev_priv {
struct dentry *path_dentry;
#endif
u64 hca_caps;
u64 kernel_caps;
struct ipoib_ethtool_st ethtool;
unsigned int max_send_sge;
const struct net_device_ops *rn_ops;

View File

@ -1850,11 +1850,12 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev)
static void ipoib_set_dev_features(struct ipoib_dev_priv *priv)
{
priv->hca_caps = priv->ca->attrs.device_cap_flags;
priv->kernel_caps = priv->ca->attrs.kernel_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
if (priv->kernel_caps & IBK_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;
priv->dev->features |= priv->dev->hw_features;
@ -2201,7 +2202,7 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name,
priv->rn_ops = dev->netdev_ops;
if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION)
if (hca->attrs.kernel_cap_flags & IBK_VIRTUAL_FUNCTION)
dev->netdev_ops = &ipoib_netdev_ops_vf;
else
dev->netdev_ops = &ipoib_netdev_ops_pf;

View File

@ -197,16 +197,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.send_cq = priv->send_cq;
init_attr.recv_cq = priv->recv_cq;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
if (priv->kernel_caps & IBK_UD_TSO)
init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
if (priv->kernel_caps & IBK_BLOCK_MULTICAST_LOOPBACK)
init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
if (priv->hca_caps & IB_DEVICE_RDMA_NETDEV_OPA)
if (priv->kernel_caps & IBK_RDMA_NETDEV_OPA)
init_attr.create_flags |= IB_QP_CREATE_NETDEV_USE;
priv->qp = ib_create_qp(priv->pd, &init_attr);

View File

@ -650,7 +650,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
SHOST_DIX_GUARD_CRC);
}
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
if (!(ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
shost->virt_boundary_mask = SZ_4K - 1;
if (iscsi_host_add(shost, ib_dev->dev.parent)) {

View File

@ -363,7 +363,7 @@ struct iser_fr_pool {
* @cq: Connection completion queue
* @cq_size: The number of max outstanding completions
* @device: reference to iser device
* @fr_pool: connection fast registration poool
* @fr_pool: connection fast registration pool
* @pi_support: Indicate device T10-PI support
* @reg_cqe: completion handler
*/

View File

@ -115,7 +115,7 @@ iser_create_fastreg_desc(struct iser_device *device,
if (!desc)
return ERR_PTR(-ENOMEM);
if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
if (ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
mr_type = IB_MR_TYPE_SG_GAPS;
else
mr_type = IB_MR_TYPE_MEM_REG;
@ -517,7 +517,7 @@ static void iser_calc_scsi_params(struct iser_conn *iser_conn,
* (head and tail) for a single page worth data, so one additional
* entry is required.
*/
if (attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
if (attr->kernel_cap_flags & IBK_SG_GAPS_REG)
reserved_mr_pages = 0;
else
reserved_mr_pages = 1;
@ -562,8 +562,8 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
/* connection T10-PI support */
if (iser_pi_enable) {
if (!(device->ib_device->attrs.device_cap_flags &
IB_DEVICE_INTEGRITY_HANDOVER)) {
if (!(device->ib_device->attrs.kernel_cap_flags &
IBK_INTEGRITY_HANDOVER)) {
iser_warn("T10-PI requested but not supported on %s, "
"continue without T10-PI\n",
dev_name(&ib_conn->device->ib_device->dev));

View File

@ -42,6 +42,7 @@ MODULE_PARM_DESC(sg_tablesize,
static DEFINE_MUTEX(device_list_mutex);
static LIST_HEAD(device_list);
static struct workqueue_struct *isert_login_wq;
static struct workqueue_struct *isert_comp_wq;
static struct workqueue_struct *isert_release_wq;
@ -230,7 +231,7 @@ isert_create_device_ib_res(struct isert_device *device)
}
/* Check signature cap */
if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER)
if (ib_dev->attrs.kernel_cap_flags & IBK_INTEGRITY_HANDOVER)
device->pi_capable = true;
else
device->pi_capable = false;
@ -1017,7 +1018,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
complete(&isert_conn->login_comp);
return;
}
schedule_delayed_work(&conn->login_work, 0);
queue_delayed_work(isert_login_wq, &conn->login_work, 0);
}
static struct iscsit_cmd
@ -2348,9 +2349,9 @@ isert_get_login_rx(struct iscsit_conn *conn, struct iscsi_login *login)
/*
* For login requests after the first PDU, isert_rx_login_req() will
* kick schedule_delayed_work(&conn->login_work) as the packet is
* received, which turns this callback from iscsi_target_do_login_rx()
* into a NOP.
* kick queue_delayed_work(isert_login_wq, &conn->login_work) as
* the packet is received, which turns this callback from
* iscsi_target_do_login_rx() into a NOP.
*/
if (!login->first_request)
return 0;
@ -2606,20 +2607,23 @@ static struct iscsit_transport iser_target_transport = {
static int __init isert_init(void)
{
int ret;
isert_login_wq = alloc_workqueue("isert_login_wq", 0, 0);
if (!isert_login_wq) {
isert_err("Unable to allocate isert_login_wq\n");
return -ENOMEM;
}
isert_comp_wq = alloc_workqueue("isert_comp_wq",
WQ_UNBOUND | WQ_HIGHPRI, 0);
if (!isert_comp_wq) {
isert_err("Unable to allocate isert_comp_wq\n");
return -ENOMEM;
goto destroy_login_wq;
}
isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND,
WQ_UNBOUND_MAX_ACTIVE);
if (!isert_release_wq) {
isert_err("Unable to allocate isert_release_wq\n");
ret = -ENOMEM;
goto destroy_comp_wq;
}
@ -2630,17 +2634,20 @@ static int __init isert_init(void)
destroy_comp_wq:
destroy_workqueue(isert_comp_wq);
destroy_login_wq:
destroy_workqueue(isert_login_wq);
return ret;
return -ENOMEM;
}
static void __exit isert_exit(void)
{
flush_scheduled_work();
flush_workqueue(isert_login_wq);
destroy_workqueue(isert_release_wq);
destroy_workqueue(isert_comp_wq);
iscsit_unregister_transport(&iser_target_transport);
isert_info("iSER_TARGET[0] - Released iser_target_transport\n");
destroy_workqueue(isert_login_wq);
}
MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure");

View File

@ -2785,7 +2785,7 @@ static void free_clt(struct rtrs_clt_sess *clt)
/**
* rtrs_clt_open() - Open a path to an RTRS server
* @ops: holds the link event callback and the private pointer.
* @sessname: name of the session
* @pathname: name of the path to an RTRS server
* @paths: Paths to be established defined by their src and dst addresses
* @paths_num: Number of elements in the @paths array
* @port: port to be used by the RTRS session

View File

@ -430,7 +430,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
spin_lock_init(&pool->lock);
INIT_LIST_HEAD(&pool->free_list);
if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
if (device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
mr_type = IB_MR_TYPE_SG_GAPS;
else
mr_type = IB_MR_TYPE_MEM_REG;
@ -3650,7 +3650,7 @@ static ssize_t add_target_store(struct device *dev,
target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
target_host->virt_boundary_mask = ~srp_dev->mr_page_mask;
target = host_to_target(target_host);
@ -3706,8 +3706,8 @@ static ssize_t add_target_store(struct device *dev,
}
if (srp_dev->use_fast_reg) {
bool gaps_reg = (ibdev->attrs.device_cap_flags &
IB_DEVICE_SG_GAPS_REG);
bool gaps_reg = ibdev->attrs.kernel_cap_flags &
IBK_SG_GAPS_REG;
max_sectors_per_mr = srp_dev->max_pages_per_mr <<
(ilog2(srp_dev->mr_page_size) - 9);

View File

@ -867,8 +867,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
/* T10-PI support */
if (ctrl->device->dev->attrs.device_cap_flags &
IB_DEVICE_INTEGRITY_HANDOVER)
if (ctrl->device->dev->attrs.kernel_cap_flags &
IBK_INTEGRITY_HANDOVER)
pi_capable = true;
ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,

View File

@ -1221,8 +1221,8 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
ndev->inline_data_size = nport->inline_data_size;
ndev->inline_page_count = inline_page_count;
if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags &
IB_DEVICE_INTEGRITY_HANDOVER)) {
if (nport->pi_enable && !(cm_id->device->attrs.kernel_cap_flags &
IBK_INTEGRITY_HANDOVER)) {
pr_warn("T10-PI is not supported by device %s. Disabling it\n",
cm_id->device->name);
nport->pi_enable = false;

View File

@ -649,7 +649,7 @@ static int smbd_ia_open(
smbd_max_frmr_depth,
info->id->device->attrs.max_fast_reg_page_list_len);
info->mr_type = IB_MR_TYPE_MEM_REG;
if (info->id->device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
info->mr_type = IB_MR_TYPE_SG_GAPS;
info->pd = ib_alloc_pd(info->id->device, 0);

View File

@ -220,32 +220,24 @@ enum rdma_link_layer {
};
enum ib_device_cap_flags {
IB_DEVICE_RESIZE_MAX_WR = (1 << 0),
IB_DEVICE_BAD_PKEY_CNTR = (1 << 1),
IB_DEVICE_BAD_QKEY_CNTR = (1 << 2),
IB_DEVICE_RAW_MULTI = (1 << 3),
IB_DEVICE_AUTO_PATH_MIG = (1 << 4),
IB_DEVICE_CHANGE_PHY_PORT = (1 << 5),
IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6),
IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7),
IB_DEVICE_SHUTDOWN_PORT = (1 << 8),
/* Not in use, former INIT_TYPE = (1 << 9),*/
IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10),
IB_DEVICE_SYS_IMAGE_GUID = (1 << 11),
IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12),
IB_DEVICE_SRQ_RESIZE = (1 << 13),
IB_DEVICE_N_NOTIFY_CQ = (1 << 14),
IB_DEVICE_RESIZE_MAX_WR = IB_UVERBS_DEVICE_RESIZE_MAX_WR,
IB_DEVICE_BAD_PKEY_CNTR = IB_UVERBS_DEVICE_BAD_PKEY_CNTR,
IB_DEVICE_BAD_QKEY_CNTR = IB_UVERBS_DEVICE_BAD_QKEY_CNTR,
IB_DEVICE_RAW_MULTI = IB_UVERBS_DEVICE_RAW_MULTI,
IB_DEVICE_AUTO_PATH_MIG = IB_UVERBS_DEVICE_AUTO_PATH_MIG,
IB_DEVICE_CHANGE_PHY_PORT = IB_UVERBS_DEVICE_CHANGE_PHY_PORT,
IB_DEVICE_UD_AV_PORT_ENFORCE = IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE,
IB_DEVICE_CURR_QP_STATE_MOD = IB_UVERBS_DEVICE_CURR_QP_STATE_MOD,
IB_DEVICE_SHUTDOWN_PORT = IB_UVERBS_DEVICE_SHUTDOWN_PORT,
/* IB_DEVICE_INIT_TYPE = IB_UVERBS_DEVICE_INIT_TYPE, (not in use) */
IB_DEVICE_PORT_ACTIVE_EVENT = IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT,
IB_DEVICE_SYS_IMAGE_GUID = IB_UVERBS_DEVICE_SYS_IMAGE_GUID,
IB_DEVICE_RC_RNR_NAK_GEN = IB_UVERBS_DEVICE_RC_RNR_NAK_GEN,
IB_DEVICE_SRQ_RESIZE = IB_UVERBS_DEVICE_SRQ_RESIZE,
IB_DEVICE_N_NOTIFY_CQ = IB_UVERBS_DEVICE_N_NOTIFY_CQ,
/*
* This device supports a per-device lkey or stag that can be
* used without performing a memory registration for the local
* memory. Note that ULPs should never check this flag, but
* instead of use the local_dma_lkey flag in the ib_pd structure,
* which will always contain a usable lkey.
*/
IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15),
/* Reserved, old SEND_W_INV = (1 << 16),*/
IB_DEVICE_MEM_WINDOW = (1 << 17),
/* Reserved, old SEND_W_INV = 1 << 16,*/
IB_DEVICE_MEM_WINDOW = IB_UVERBS_DEVICE_MEM_WINDOW,
/*
* Devices should set IB_DEVICE_UD_IP_SUM if they support
* insertion of UDP and TCP checksum on outgoing UD IPoIB
@ -253,9 +245,8 @@ enum ib_device_cap_flags {
* incoming messages. Setting this flag implies that the
* IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
*/
IB_DEVICE_UD_IP_CSUM = (1 << 18),
IB_DEVICE_UD_TSO = (1 << 19),
IB_DEVICE_XRC = (1 << 20),
IB_DEVICE_UD_IP_CSUM = IB_UVERBS_DEVICE_UD_IP_CSUM,
IB_DEVICE_XRC = IB_UVERBS_DEVICE_XRC,
/*
* This device supports the IB "base memory management extension",
@ -266,31 +257,53 @@ enum ib_device_cap_flags {
* IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
* stag.
*/
IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21),
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22),
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
IB_DEVICE_RC_IP_CSUM = (1 << 25),
IB_DEVICE_MEM_MGT_EXTENSIONS = IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS,
IB_DEVICE_MEM_WINDOW_TYPE_2A = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A,
IB_DEVICE_MEM_WINDOW_TYPE_2B = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B,
IB_DEVICE_RC_IP_CSUM = IB_UVERBS_DEVICE_RC_IP_CSUM,
/* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
IB_DEVICE_RAW_IP_CSUM = (1 << 26),
/*
* Devices should set IB_DEVICE_CROSS_CHANNEL if they
* support execution of WQEs that involve synchronization
* of I/O operations with single completion queue managed
* by hardware.
*/
IB_DEVICE_CROSS_CHANNEL = (1 << 27),
IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30),
IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
IB_DEVICE_RAW_IP_CSUM = IB_UVERBS_DEVICE_RAW_IP_CSUM,
IB_DEVICE_MANAGED_FLOW_STEERING =
IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING,
/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35),
IB_DEVICE_RAW_SCATTER_FCS = IB_UVERBS_DEVICE_RAW_SCATTER_FCS,
/* The device supports padding incoming writes to cacheline. */
IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36),
IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37),
IB_DEVICE_PCI_WRITE_END_PADDING =
IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING,
};
enum ib_kernel_cap_flags {
/*
* This device supports a per-device lkey or stag that can be
* used without performing a memory registration for the local
* memory. Note that ULPs should never check this flag, but
* instead of use the local_dma_lkey flag in the ib_pd structure,
* which will always contain a usable lkey.
*/
IBK_LOCAL_DMA_LKEY = 1 << 0,
/* IB_QP_CREATE_INTEGRITY_EN is supported to implement T10-PI */
IBK_INTEGRITY_HANDOVER = 1 << 1,
/* IB_ACCESS_ON_DEMAND is supported during reg_user_mr() */
IBK_ON_DEMAND_PAGING = 1 << 2,
/* IB_MR_TYPE_SG_GAPS is supported */
IBK_SG_GAPS_REG = 1 << 3,
/* Driver supports RDMA_NLDEV_CMD_DELLINK */
IBK_ALLOW_USER_UNREG = 1 << 4,
/* ipoib will use IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK */
IBK_BLOCK_MULTICAST_LOOPBACK = 1 << 5,
/* iopib will use IB_QP_CREATE_IPOIB_UD_LSO for its QPs */
IBK_UD_TSO = 1 << 6,
/* iopib will use the device ops:
* get_vf_config
* get_vf_guid
* get_vf_stats
* set_vf_guid
* set_vf_link_state
*/
IBK_VIRTUAL_FUNCTION = 1 << 7,
/* ipoib will use IB_QP_CREATE_NETDEV_USE for its QPs */
IBK_RDMA_NETDEV_OPA = 1 << 8,
};
enum ib_atomic_cap {
@ -389,6 +402,7 @@ struct ib_device_attr {
int max_qp;
int max_qp_wr;
u64 device_cap_flags;
u64 kernel_cap_flags;
int max_send_sge;
int max_recv_sge;
int max_sge_rd;
@ -564,7 +578,7 @@ struct rdma_stat_desc {
/**
* struct rdma_hw_stats
* @lock - Mutex to protect parallel write access to lifespan and values
* of counters, which are 64bits and not guaranteeed to be written
* of counters, which are 64bits and not guaranteed to be written
* atomicaly on 32bits systems.
* @timestamp - Used by the core code to track when the last update was
* @lifespan - Used by the core code to determine how old the counters
@ -1621,19 +1635,23 @@ struct ib_srq {
};
enum ib_raw_packet_caps {
/* Strip cvlan from incoming packet and report it in the matching work
/*
* Strip cvlan from incoming packet and report it in the matching work
* completion is supported.
*/
IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0),
/* Scatter FCS field of an incoming packet to host memory is supported.
IB_RAW_PACKET_CAP_CVLAN_STRIPPING =
IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING,
/*
* Scatter FCS field of an incoming packet to host memory is supported.
*/
IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1),
IB_RAW_PACKET_CAP_SCATTER_FCS = IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS,
/* Checksum offloads are supported (for both send and receive). */
IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2),
/* When a packet is received for an RQ with no receive WQEs, the
IB_RAW_PACKET_CAP_IP_CSUM = IB_UVERBS_RAW_PACKET_CAP_IP_CSUM,
/*
* When a packet is received for an RQ with no receive WQEs, the
* packet processing is delayed.
*/
IB_RAW_PACKET_CAP_DELAY_DROP = (1 << 3),
IB_RAW_PACKET_CAP_DELAY_DROP = IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP,
};
enum ib_wq_type {
@ -4304,7 +4322,7 @@ static inline int ib_check_mr_access(struct ib_device *ib_dev,
return -EINVAL;
if (flags & IB_ACCESS_ON_DEMAND &&
!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
!(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING))
return -EINVAL;
return 0;
}

View File

@ -90,8 +90,7 @@ struct opa_vnic_stats {
static inline bool rdma_cap_opa_vnic(struct ib_device *device)
{
return !!(device->attrs.device_cap_flags &
IB_DEVICE_RDMA_NETDEV_OPA);
return !!(device->attrs.kernel_cap_flags & IBK_RDMA_NETDEV_OPA);
}
#endif /* _OPA_VNIC_H */

View File

@ -1298,4 +1298,46 @@ struct ib_uverbs_ex_modify_cq {
#define IB_DEVICE_NAME_MAX 64
/*
* bits 9, 15, 16, 19, 22, 27, 30, 31, 32, 33, 35 and 37 may be set by old
* kernels and should not be used.
*/
enum ib_uverbs_device_cap_flags {
IB_UVERBS_DEVICE_RESIZE_MAX_WR = 1 << 0,
IB_UVERBS_DEVICE_BAD_PKEY_CNTR = 1 << 1,
IB_UVERBS_DEVICE_BAD_QKEY_CNTR = 1 << 2,
IB_UVERBS_DEVICE_RAW_MULTI = 1 << 3,
IB_UVERBS_DEVICE_AUTO_PATH_MIG = 1 << 4,
IB_UVERBS_DEVICE_CHANGE_PHY_PORT = 1 << 5,
IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE = 1 << 6,
IB_UVERBS_DEVICE_CURR_QP_STATE_MOD = 1 << 7,
IB_UVERBS_DEVICE_SHUTDOWN_PORT = 1 << 8,
/* IB_UVERBS_DEVICE_INIT_TYPE = 1 << 9, (not in use) */
IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT = 1 << 10,
IB_UVERBS_DEVICE_SYS_IMAGE_GUID = 1 << 11,
IB_UVERBS_DEVICE_RC_RNR_NAK_GEN = 1 << 12,
IB_UVERBS_DEVICE_SRQ_RESIZE = 1 << 13,
IB_UVERBS_DEVICE_N_NOTIFY_CQ = 1 << 14,
IB_UVERBS_DEVICE_MEM_WINDOW = 1 << 17,
IB_UVERBS_DEVICE_UD_IP_CSUM = 1 << 18,
IB_UVERBS_DEVICE_XRC = 1 << 20,
IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS = 1 << 21,
IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A = 1 << 23,
IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B = 1 << 24,
IB_UVERBS_DEVICE_RC_IP_CSUM = 1 << 25,
/* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_IP_CSUM. */
IB_UVERBS_DEVICE_RAW_IP_CSUM = 1 << 26,
IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING = 1 << 29,
/* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS. */
IB_UVERBS_DEVICE_RAW_SCATTER_FCS = 1ULL << 34,
IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING = 1ULL << 36,
};
enum ib_uverbs_raw_packet_caps {
IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING = 1 << 0,
IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS = 1 << 1,
IB_UVERBS_RAW_PACKET_CAP_IP_CSUM = 1 << 2,
IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP = 1 << 3,
};
#endif /* IB_USER_VERBS_H */

View File

@ -154,8 +154,8 @@ static int rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
rds_ibdev->odp_capable =
!!(device->attrs.device_cap_flags &
IB_DEVICE_ON_DEMAND_PAGING) &&
!!(device->attrs.kernel_cap_flags &
IBK_ON_DEMAND_PAGING) &&
!!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
IB_ODP_SUPPORT_WRITE) &&
!!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &

View File

@ -195,7 +195,7 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
ep->re_attr.cap.max_recv_sge = 1;
ep->re_mrtype = IB_MR_TYPE_MEM_REG;
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
if (attrs->kernel_cap_flags & IBK_SG_GAPS_REG)
ep->re_mrtype = IB_MR_TYPE_SG_GAPS;
/* Quirk: Some devices advertise a large max_fast_reg_page_list_len