RDMA v6.13 merge window pull request

Seveal fixes scattered across the drivers and a few new features:
 
 - Minor updates and bug fixes to hfi1, efa, iopob, bnxt, hns
 
 - Force disassociate the userspace FD when hns does an async reset
 
 - bnxt new features for optimized modify QP to skip certain stayes, CQ
   coalescing, better debug dumping
 
 - mlx5 new data placement ordering feature
 
 - Faster destruction of mlx5 devx HW objects
 
 - Improvements to RDMA CM mad handling
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCZz4ENwAKCRCFwuHvBreF
 YQYQAP9R54r5J1Iylg+zqhCc+e/9oveuuZbfLvy/EJiEpmdprQEAgPs1RrB0z7U6
 1xrVStUKNPhGd5XeVVZGkIV0zYv6Tw4=
 =V5xI
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "Seveal fixes scattered across the drivers and a few new features:

   - Minor updates and bug fixes to hfi1, efa, iopob, bnxt, hns

   - Force disassociate the userspace FD when hns does an async reset

   - bnxt new features for optimized modify QP to skip certain stayes,
     CQ coalescing, better debug dumping

   - mlx5 new data placement ordering feature

   - Faster destruction of mlx5 devx HW objects

   - Improvements to RDMA CM mad handling"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (51 commits)
  RDMA/bnxt_re: Correct the sequence of device suspend
  RDMA/bnxt_re: Use the default mode of congestion control
  RDMA/bnxt_re: Support different traffic class
  IB/cm: Rework sending DREQ when destroying a cm_id
  IB/cm: Do not hold reference on cm_id unless needed
  IB/cm: Explicitly mark if a response MAD is a retransmission
  RDMA/mlx5: Move events notifier registration to be after device registration
  RDMA/bnxt_re: Cache MSIx info to a local structure
  RDMA/bnxt_re: Refurbish CQ to NQ hash calculation
  RDMA/bnxt_re: Refactor NQ allocation
  RDMA/bnxt_re: Fail probe early when not enough MSI-x vectors are reserved
  RDMA/hns: Fix different dgids mapping to the same dip_idx
  RDMA/bnxt_re: Add set_func_resources support for P5/P7 adapters
  RDMA/bnxt_re: Enhance RoCE SRIOV resource configuration design
  bnxt_en: Add support for RoCE sriov configuration
  RDMA/hns: Fix NULL pointer derefernce in hns_roce_map_mr_sg()
  RDMA/hns: Fix out-of-order issue of requester when setting FENCE
  RDMA/nldev: Add IB device and net device rename events
  RDMA/mlx5: Add implementation for ufile_hw_cleanup device operation
  RDMA/core: Move ib_uverbs_file struct to uverbs_types.h
  ...
This commit is contained in:
Linus Torvalds 2024-11-22 20:03:57 -08:00
commit 2a163a4cea
63 changed files with 1989 additions and 511 deletions

View File

@ -35,6 +35,8 @@ MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */
#define CM_DIRECT_RETRY_CTX ((void *) 1UL)
static const char * const ibcm_rej_reason_strs[] = {
[IB_CM_REJ_NO_QP] = "no QP",
[IB_CM_REJ_NO_EEC] = "no EEC",
@ -93,8 +95,7 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
struct cm_work *work);
static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param);
static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
const void *private_data, u8 private_data_len);
static void cm_issue_dreq(struct cm_id_private *cm_id_priv);
static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
void *private_data, u8 private_data_len);
static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
@ -307,12 +308,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
goto out;
}
/* Timeout set by caller if response is expected. */
m->ah = ah;
m->retries = cm_id_priv->max_cm_retries;
refcount_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
out:
spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
@ -321,16 +317,13 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
struct cm_id_private *cm_id_priv = msg->context[0];
if (msg->ah)
rdma_destroy_ah(msg->ah, 0);
cm_deref_id(cm_id_priv);
ib_free_send_mad(msg);
}
static struct ib_mad_send_buf *
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv)
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
{
struct ib_mad_send_buf *msg;
@ -339,7 +332,15 @@ cm_alloc_priv_msg(struct cm_id_private *cm_id_priv)
msg = cm_alloc_msg(cm_id_priv);
if (IS_ERR(msg))
return msg;
cm_id_priv->msg = msg;
refcount_inc(&cm_id_priv->refcount);
msg->context[0] = cm_id_priv;
msg->context[1] = (void *) (unsigned long) state;
msg->retries = cm_id_priv->max_cm_retries;
msg->timeout_ms = cm_id_priv->timeout_ms;
return msg;
}
@ -358,13 +359,20 @@ static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
ib_free_send_mad(msg);
}
static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
struct ib_mad_recv_wc *mad_recv_wc)
static struct ib_mad_send_buf *
cm_alloc_response_msg_no_ah(struct cm_port *port,
struct ib_mad_recv_wc *mad_recv_wc,
bool direct_retry)
{
return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
struct ib_mad_send_buf *m;
m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC, IB_MGMT_BASE_VERSION);
if (!IS_ERR(m))
m->context[0] = direct_retry ? CM_DIRECT_RETRY_CTX : NULL;
return m;
}
static int cm_create_response_msg_ah(struct cm_port *port,
@ -384,12 +392,13 @@ static int cm_create_response_msg_ah(struct cm_port *port,
static int cm_alloc_response_msg(struct cm_port *port,
struct ib_mad_recv_wc *mad_recv_wc,
bool direct_retry,
struct ib_mad_send_buf **msg)
{
struct ib_mad_send_buf *m;
int ret;
m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
m = cm_alloc_response_msg_no_ah(port, mad_recv_wc, direct_retry);
if (IS_ERR(m))
return PTR_ERR(m);
@ -403,13 +412,6 @@ static int cm_alloc_response_msg(struct cm_port *port,
return 0;
}
static void cm_free_response_msg(struct ib_mad_send_buf *msg)
{
if (msg->ah)
rdma_destroy_ah(msg->ah, 0);
ib_free_send_mad(msg);
}
static void *cm_copy_private_data(const void *private_data, u8 private_data_len)
{
void *data;
@ -1109,7 +1111,8 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
cm_id->state = IB_CM_IDLE;
break;
}
cm_send_dreq_locked(cm_id_priv, NULL, 0);
cm_issue_dreq(cm_id_priv);
cm_enter_timewait(cm_id_priv);
goto retest;
case IB_CM_DREQ_SENT:
ib_cancel_mad(cm_id_priv->msg);
@ -1557,7 +1560,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
if (param->alternate_path)
cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
msg = cm_alloc_priv_msg(cm_id_priv);
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REQ_SENT);
if (IS_ERR(msg)) {
ret = PTR_ERR(msg);
goto out_unlock;
@ -1566,8 +1569,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
req_msg = (struct cm_req_msg *)msg->mad;
cm_format_req(req_msg, cm_id_priv, param);
cm_id_priv->tid = req_msg->hdr.tid;
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT;
cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
@ -1598,7 +1599,7 @@ static int cm_issue_rej(struct cm_port *port,
struct cm_rej_msg *rej_msg, *rcv_msg;
int ret;
ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
ret = cm_alloc_response_msg(port, mad_recv_wc, false, &msg);
if (ret)
return ret;
@ -1624,7 +1625,7 @@ static int cm_issue_rej(struct cm_port *port,
IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_response_msg(msg);
cm_free_msg(msg);
return ret;
}
@ -1951,7 +1952,7 @@ static void cm_dup_req_handler(struct cm_work *work,
}
spin_unlock_irq(&cm_id_priv->lock);
ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg);
if (ret)
return;
@ -1980,7 +1981,7 @@ static void cm_dup_req_handler(struct cm_work *work,
return;
unlock: spin_unlock_irq(&cm_id_priv->lock);
free: cm_free_response_msg(msg);
free: cm_free_msg(msg);
}
static struct cm_id_private *cm_match_req(struct cm_work *work,
@ -2294,7 +2295,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
goto out;
}
msg = cm_alloc_priv_msg(cm_id_priv);
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REP_SENT);
if (IS_ERR(msg)) {
ret = PTR_ERR(msg);
goto out;
@ -2302,8 +2303,6 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
rep_msg = (struct cm_rep_msg *) msg->mad;
cm_format_rep(rep_msg, cm_id_priv, param);
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
trace_icm_send_rep(cm_id);
ret = ib_post_send_mad(msg, NULL);
@ -2444,7 +2443,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
atomic_long_inc(
&work->port->counters[CM_RECV_DUPLICATES][CM_REP_COUNTER]);
ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg);
if (ret)
goto deref;
@ -2469,7 +2468,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
goto deref;
unlock: spin_unlock_irq(&cm_id_priv->lock);
free: cm_free_response_msg(msg);
free: cm_free_msg(msg);
deref: cm_deref_id(cm_id_priv);
}
@ -2653,47 +2652,23 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
private_data_len);
}
static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
const void *private_data, u8 private_data_len)
static void cm_issue_dreq(struct cm_id_private *cm_id_priv)
{
struct ib_mad_send_buf *msg;
int ret;
lockdep_assert_held(&cm_id_priv->lock);
if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
return -EINVAL;
msg = cm_alloc_msg(cm_id_priv);
if (IS_ERR(msg))
return;
if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
trace_icm_dreq_skipped(&cm_id_priv->id);
return -EINVAL;
}
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
ib_cancel_mad(cm_id_priv->msg);
msg = cm_alloc_priv_msg(cm_id_priv);
if (IS_ERR(msg)) {
cm_enter_timewait(cm_id_priv);
return PTR_ERR(msg);
}
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, NULL, 0);
trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
cm_free_priv_msg(msg);
return ret;
}
cm_id_priv->id.state = IB_CM_DREQ_SENT;
return 0;
if (ret)
cm_free_msg(msg);
}
int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data,
@ -2701,11 +2676,44 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data,
{
struct cm_id_private *cm_id_priv =
container_of(cm_id, struct cm_id_private, id);
struct ib_mad_send_buf *msg;
unsigned long flags;
int ret;
if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
return -EINVAL;
spin_lock_irqsave(&cm_id_priv->lock, flags);
ret = cm_send_dreq_locked(cm_id_priv, private_data, private_data_len);
if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
trace_icm_dreq_skipped(&cm_id_priv->id);
ret = -EINVAL;
goto unlock;
}
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
ib_cancel_mad(cm_id_priv->msg);
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_DREQ_SENT);
if (IS_ERR(msg)) {
cm_enter_timewait(cm_id_priv);
ret = PTR_ERR(msg);
goto unlock;
}
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
cm_free_priv_msg(msg);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_SENT;
unlock:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
@ -2791,7 +2799,7 @@ static int cm_issue_drep(struct cm_port *port,
struct cm_drep_msg *drep_msg;
int ret;
ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
ret = cm_alloc_response_msg(port, mad_recv_wc, true, &msg);
if (ret)
return ret;
@ -2809,7 +2817,7 @@ static int cm_issue_drep(struct cm_port *port,
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_response_msg(msg);
cm_free_msg(msg);
return ret;
}
@ -2856,7 +2864,8 @@ static int cm_dreq_handler(struct cm_work *work)
case IB_CM_TIMEWAIT:
atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
[CM_DREQ_COUNTER]);
msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc,
true);
if (IS_ERR(msg))
goto unlock;
@ -2867,7 +2876,7 @@ static int cm_dreq_handler(struct cm_work *work)
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
ib_post_send_mad(msg, NULL))
cm_free_response_msg(msg);
cm_free_msg(msg);
goto deref;
case IB_CM_DREQ_RCVD:
atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
@ -3361,7 +3370,8 @@ static int cm_lap_handler(struct cm_work *work)
case IB_CM_MRA_LAP_SENT:
atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
[CM_LAP_COUNTER]);
msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc,
true);
if (IS_ERR(msg))
goto unlock;
@ -3374,7 +3384,7 @@ static int cm_lap_handler(struct cm_work *work)
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
ib_post_send_mad(msg, NULL))
cm_free_response_msg(msg);
cm_free_msg(msg);
goto deref;
case IB_CM_LAP_RCVD:
atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
@ -3513,7 +3523,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
goto out_unlock;
}
msg = cm_alloc_priv_msg(cm_id_priv);
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_SIDR_REQ_SENT);
if (IS_ERR(msg)) {
ret = PTR_ERR(msg);
goto out_unlock;
@ -3521,8 +3531,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv,
param);
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT;
trace_icm_send_sidr_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
@ -3768,17 +3776,17 @@ static int cm_sidr_rep_handler(struct cm_work *work)
static void cm_process_send_error(struct cm_id_private *cm_id_priv,
struct ib_mad_send_buf *msg,
enum ib_cm_state state,
enum ib_wc_status wc_status)
{
enum ib_cm_state state = (unsigned long) msg->context[1];
struct ib_cm_event cm_event = {};
int ret;
/* Discard old sends or ones without a response. */
/* Discard old sends. */
spin_lock_irq(&cm_id_priv->lock);
if (msg != cm_id_priv->msg) {
spin_unlock_irq(&cm_id_priv->lock);
cm_free_msg(msg);
cm_free_priv_msg(msg);
return;
}
cm_free_priv_msg(msg);
@ -3826,9 +3834,7 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
struct cm_id_private *cm_id_priv = msg->context[0];
enum ib_cm_state state =
(enum ib_cm_state)(unsigned long)msg->context[1];
struct cm_id_private *cm_id_priv;
struct cm_port *port;
u16 attr_index;
@ -3836,13 +3842,12 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
attr_index = be16_to_cpu(((struct ib_mad_hdr *)
msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
/*
* If the send was in response to a received message (context[0] is not
* set to a cm_id), and is not a REJ, then it is a send that was
* manually retried.
*/
if (!cm_id_priv && (attr_index != CM_REJ_COUNTER))
if (msg->context[0] == CM_DIRECT_RETRY_CTX) {
msg->retries = 1;
cm_id_priv = NULL;
} else {
cm_id_priv = msg->context[0];
}
atomic_long_add(1 + msg->retries, &port->counters[CM_XMIT][attr_index]);
if (msg->retries)
@ -3850,10 +3855,9 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
&port->counters[CM_XMIT_RETRIES][attr_index]);
if (cm_id_priv)
cm_process_send_error(cm_id_priv, msg, state,
mad_send_wc->status);
cm_process_send_error(cm_id_priv, msg, mad_send_wc->status);
else
cm_free_response_msg(msg);
cm_free_msg(msg);
}
static void cm_work_handler(struct work_struct *_work)

View File

@ -437,6 +437,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
client->rename(ibdev, client_data);
}
up_read(&ibdev->client_data_rwsem);
rdma_nl_notify_event(ibdev, 0, RDMA_RENAME_EVENT);
up_read(&devices_rwsem);
return 0;
}
@ -2759,6 +2760,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, resize_cq);
SET_DEVICE_OP(dev_ops, set_vf_guid);
SET_DEVICE_OP(dev_ops, set_vf_link_state);
SET_DEVICE_OP(dev_ops, ufile_hw_cleanup);
SET_OBJ_SIZE(dev_ops, ib_ah);
SET_OBJ_SIZE(dev_ops, ib_counters);
@ -2852,6 +2854,40 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
},
};
static int ib_netdevice_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
struct net_device *ib_ndev;
struct ib_device *ibdev;
u32 port;
switch (event) {
case NETDEV_CHANGENAME:
ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN);
if (!ibdev)
return NOTIFY_DONE;
rdma_for_each_port(ibdev, port) {
ib_ndev = ib_device_get_netdev(ibdev, port);
if (ndev == ib_ndev)
rdma_nl_notify_event(ibdev, port,
RDMA_NETDEV_RENAME_EVENT);
dev_put(ib_ndev);
}
ib_device_put(ibdev);
break;
default:
break;
}
return NOTIFY_DONE;
}
static struct notifier_block nb_netdevice = {
.notifier_call = ib_netdevice_event,
};
static int __init ib_core_init(void)
{
int ret = -ENOMEM;
@ -2923,6 +2959,8 @@ static int __init ib_core_init(void)
goto err_parent;
}
register_netdevice_notifier(&nb_netdevice);
return 0;
err_parent:
@ -2952,6 +2990,7 @@ static int __init ib_core_init(void)
static void __exit ib_core_cleanup(void)
{
unregister_netdevice_notifier(&nb_netdevice);
roce_gid_mgmt_cleanup();
rdma_nl_unregister(RDMA_NL_LS);
nldev_exit();

View File

@ -2729,6 +2729,25 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
},
};
static int fill_mon_netdev_rename(struct sk_buff *msg,
struct ib_device *device, u32 port,
const struct net *net)
{
struct net_device *netdev = ib_device_get_netdev(device, port);
int ret = 0;
if (!netdev || !net_eq(dev_net(netdev), net))
goto out;
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
if (ret)
goto out;
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
out:
dev_put(netdev);
return ret;
}
static int fill_mon_netdev_association(struct sk_buff *msg,
struct ib_device *device, u32 port,
const struct net *net)
@ -2793,6 +2812,18 @@ static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
"Failed to send RDMA monitor netdev detach event: port %d\n",
port_num);
break;
case RDMA_RENAME_EVENT:
dev_warn_ratelimited(&device->dev,
"Failed to send RDMA monitor rename device event\n");
break;
case RDMA_NETDEV_RENAME_EVENT:
netdev = ib_device_get_netdev(device, port_num);
dev_warn_ratelimited(&device->dev,
"Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
port_num, netdev->ifindex);
dev_put(netdev);
break;
default:
break;
}
@ -2822,14 +2853,19 @@ int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
switch (type) {
case RDMA_REGISTER_EVENT:
case RDMA_UNREGISTER_EVENT:
case RDMA_RENAME_EVENT:
ret = fill_nldev_handle(skb, device);
if (ret)
goto err_free;
break;
case RDMA_NETDEV_ATTACH_EVENT:
case RDMA_NETDEV_DETACH_EVENT:
ret = fill_mon_netdev_association(skb, device,
port_num, net);
ret = fill_mon_netdev_association(skb, device, port_num, net);
if (ret)
goto err_free;
break;
case RDMA_NETDEV_RENAME_EVENT:
ret = fill_mon_netdev_rename(skb, device, port_num, net);
if (ret)
goto err_free;
break;

View File

@ -58,8 +58,8 @@ void uverbs_uobject_put(struct ib_uobject *uobject)
}
EXPORT_SYMBOL(uverbs_uobject_put);
static int uverbs_try_lock_object(struct ib_uobject *uobj,
enum rdma_lookup_mode mode)
int uverbs_try_lock_object(struct ib_uobject *uobj,
enum rdma_lookup_mode mode)
{
/*
* When a shared access is required, we use a positive counter. Each
@ -84,6 +84,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
}
return 0;
}
EXPORT_SYMBOL(uverbs_try_lock_object);
static void assert_uverbs_usecnt(struct ib_uobject *uobj,
enum rdma_lookup_mode mode)
@ -880,9 +881,14 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason)
{
struct uverbs_attr_bundle attrs = { .ufile = ufile };
struct ib_ucontext *ucontext = ufile->ucontext;
struct ib_device *ib_dev = ucontext->device;
struct ib_uobject *obj, *next_obj;
int ret = -EINVAL;
struct uverbs_attr_bundle attrs = { .ufile = ufile };
if (ib_dev->ops.ufile_hw_cleanup)
ib_dev->ops.ufile_hw_cleanup(ufile);
/*
* This shouldn't run while executing other commands on this

View File

@ -515,6 +515,27 @@ void rdma_roce_rescan_device(struct ib_device *ib_dev)
}
EXPORT_SYMBOL(rdma_roce_rescan_device);
/**
* rdma_roce_rescan_port - Rescan all of the network devices in the system
* and add their gids if relevant to the port of the RoCE device.
*
* @ib_dev: IB device
* @port: Port number
*/
void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port)
{
struct net_device *ndev = NULL;
if (rdma_protocol_roce(ib_dev, port)) {
ndev = ib_device_get_netdev(ib_dev, port);
if (!ndev)
return;
enum_all_gids_of_dev_cb(ib_dev, port, ndev, ndev);
dev_put(ndev);
}
}
EXPORT_SYMBOL(rdma_roce_rescan_port);
static void callback_for_addr_gid_device_scan(struct ib_device *device,
u32 port,
struct net_device *rdma_ndev,
@ -575,16 +596,17 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u32 port,
}
}
static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
struct net_device *event_ndev)
void roce_del_all_netdev_gids(struct ib_device *ib_dev,
u32 port, struct net_device *ndev)
{
ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev);
}
EXPORT_SYMBOL(roce_del_all_netdev_gids);
static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids);
handle_netdev_upper(ib_dev, port, cookie, roce_del_all_netdev_gids);
}
static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port,

View File

@ -133,35 +133,6 @@ struct ib_uverbs_completion_event_file {
struct ib_uverbs_event_queue ev_queue;
};
struct ib_uverbs_file {
struct kref ref;
struct ib_uverbs_device *device;
struct mutex ucontext_lock;
/*
* ucontext must be accessed via ib_uverbs_get_ucontext() or with
* ucontext_lock held
*/
struct ib_ucontext *ucontext;
struct ib_uverbs_async_event_file *default_async_file;
struct list_head list;
/*
* To access the uobjects list hw_destroy_rwsem must be held for write
* OR hw_destroy_rwsem held for read AND uobjects_lock held.
* hw_destroy_rwsem should be called across any destruction of the HW
* object of an associated uobject.
*/
struct rw_semaphore hw_destroy_rwsem;
spinlock_t uobjects_lock;
struct list_head uobjects;
struct mutex umap_lock;
struct list_head umaps;
struct page *disassociate_page;
struct xarray idr;
};
struct ib_uverbs_event {
union {
struct ib_uverbs_async_event_desc async;

View File

@ -76,6 +76,7 @@ static dev_t dynamic_uverbs_dev;
static DEFINE_IDA(uverbs_ida);
static int ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
static struct ib_client uverbs_client;
static char *uverbs_devnode(const struct device *dev, umode_t *mode)
{
@ -217,6 +218,7 @@ void ib_uverbs_release_file(struct kref *ref)
if (file->disassociate_page)
__free_pages(file->disassociate_page, 0);
mutex_destroy(&file->disassociation_lock);
mutex_destroy(&file->umap_lock);
mutex_destroy(&file->ucontext_lock);
kfree(file);
@ -698,8 +700,13 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
ret = PTR_ERR(ucontext);
goto out;
}
mutex_lock(&file->disassociation_lock);
vma->vm_ops = &rdma_umap_ops;
ret = ucontext->device->ops.mmap(ucontext, vma);
mutex_unlock(&file->disassociation_lock);
out:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return ret;
@ -721,6 +728,8 @@ static void rdma_umap_open(struct vm_area_struct *vma)
/* We are racing with disassociation */
if (!down_read_trylock(&ufile->hw_destroy_rwsem))
goto out_zap;
mutex_lock(&ufile->disassociation_lock);
/*
* Disassociation already completed, the VMA should already be zapped.
*/
@ -732,10 +741,12 @@ static void rdma_umap_open(struct vm_area_struct *vma)
goto out_unlock;
rdma_umap_priv_init(priv, vma, opriv->entry);
mutex_unlock(&ufile->disassociation_lock);
up_read(&ufile->hw_destroy_rwsem);
return;
out_unlock:
mutex_unlock(&ufile->disassociation_lock);
up_read(&ufile->hw_destroy_rwsem);
out_zap:
/*
@ -819,7 +830,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
{
struct rdma_umap_priv *priv, *next_priv;
lockdep_assert_held(&ufile->hw_destroy_rwsem);
mutex_lock(&ufile->disassociation_lock);
while (1) {
struct mm_struct *mm = NULL;
@ -845,8 +856,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
break;
}
mutex_unlock(&ufile->umap_lock);
if (!mm)
if (!mm) {
mutex_unlock(&ufile->disassociation_lock);
return;
}
/*
* The umap_lock is nested under mmap_lock since it used within
@ -876,8 +889,32 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
mmap_read_unlock(mm);
mmput(mm);
}
mutex_unlock(&ufile->disassociation_lock);
}
/**
* rdma_user_mmap_disassociate() - Revoke mmaps for a device
* @device: device to revoke
*
* This function should be called by drivers that need to disable mmaps for the
* device, for instance because it is going to be reset.
*/
void rdma_user_mmap_disassociate(struct ib_device *device)
{
struct ib_uverbs_device *uverbs_dev =
ib_get_client_data(device, &uverbs_client);
struct ib_uverbs_file *ufile;
mutex_lock(&uverbs_dev->lists_mutex);
list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) {
if (ufile->ucontext)
uverbs_user_mmap_disassociate(ufile);
}
mutex_unlock(&uverbs_dev->lists_mutex);
}
EXPORT_SYMBOL(rdma_user_mmap_disassociate);
/*
* ib_uverbs_open() does not need the BKL:
*
@ -947,6 +984,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
mutex_init(&file->umap_lock);
INIT_LIST_HEAD(&file->umaps);
mutex_init(&file->disassociation_lock);
filp->private_data = file;
list_add_tail(&file->list, &dev->uverbs_file_list);
mutex_unlock(&dev->lists_mutex);

View File

@ -4,4 +4,5 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnxt
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o
bnxt_re-y := main.o ib_verbs.o \
qplib_res.o qplib_rcfw.o \
qplib_sp.o qplib_fp.o hw_counters.o
qplib_sp.o qplib_fp.o hw_counters.o \
debugfs.o

View File

@ -154,8 +154,25 @@ struct bnxt_re_pacing {
#define BNXT_RE_GRC_FIFO_REG_BASE 0x2000
#define BNXT_RE_MIN_MSIX 2
#define BNXT_RE_MAX_MSIX BNXT_MAX_ROCE_MSIX
struct bnxt_re_nq_record {
struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX];
struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX];
int num_msix;
/* serialize NQ access */
struct mutex load_lock;
};
#define MAX_CQ_HASH_BITS (16)
#define MAX_SRQ_HASH_BITS (16)
static inline bool bnxt_re_chip_gen_p7(u16 chip_num)
{
return (chip_num == CHIP_NUM_58818 ||
chip_num == CHIP_NUM_57608);
}
struct bnxt_re_dev {
struct ib_device ibdev;
struct list_head list;
@ -174,27 +191,24 @@ struct bnxt_re_dev {
unsigned int version, major, minor;
struct bnxt_qplib_chip_ctx *chip_ctx;
struct bnxt_en_dev *en_dev;
int num_msix;
int id;
struct delayed_work worker;
u8 cur_prio_map;
/* FP Notification Queue (CQ & SRQ) */
struct tasklet_struct nq_task;
/* RCFW Channel */
struct bnxt_qplib_rcfw rcfw;
/* NQ */
struct bnxt_qplib_nq nq[BNXT_MAX_ROCE_MSIX];
/* NQ record */
struct bnxt_re_nq_record *nqr;
/* Device Resources */
struct bnxt_qplib_dev_attr dev_attr;
struct bnxt_qplib_ctx qplib_ctx;
struct bnxt_qplib_res qplib_res;
struct bnxt_qplib_dpi dpi_privileged;
struct bnxt_qplib_cq_coal_param cq_coalescing;
struct mutex qp_lock; /* protect qp list */
struct list_head qp_list;
@ -213,6 +227,8 @@ struct bnxt_re_dev {
struct delayed_work dbq_pacing_work;
DECLARE_HASHTABLE(cq_hash, MAX_CQ_HASH_BITS);
DECLARE_HASHTABLE(srq_hash, MAX_SRQ_HASH_BITS);
struct dentry *dbg_root;
struct dentry *qp_debugfs;
};
#define to_bnxt_re_dev(ptr, member) \
@ -239,4 +255,23 @@ static inline void bnxt_re_set_pacing_dev_state(struct bnxt_re_dev *rdev)
rdev->qplib_res.pacing_data->dev_err_state =
test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
}
static inline int bnxt_re_read_context_allowed(struct bnxt_re_dev *rdev)
{
if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ||
rdev->rcfw.res->cctx->hwrm_intf_ver < HWRM_VERSION_READ_CTX)
return -EOPNOTSUPP;
return 0;
}
#define BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5 1088
#define BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5 128
#define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5 128
#define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5 192
#define BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 1088
#define BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 192
#define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 192
#define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 192
#endif

View File

@ -0,0 +1,138 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
* Copyright (c) 2024, Broadcom. All rights reserved. The term
* Broadcom refers to Broadcom Limited and/or its subsidiaries.
*
* Description: Debugfs component of the bnxt_re driver
*/
#include <linux/debugfs.h>
#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include "bnxt_ulp.h"
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"
#include "qplib_fp.h"
#include "qplib_rcfw.h"
#include "bnxt_re.h"
#include "ib_verbs.h"
#include "debugfs.h"
static struct dentry *bnxt_re_debugfs_root;
static inline const char *bnxt_re_qp_state_str(u8 state)
{
switch (state) {
case CMDQ_MODIFY_QP_NEW_STATE_RESET:
return "RST";
case CMDQ_MODIFY_QP_NEW_STATE_INIT:
return "INIT";
case CMDQ_MODIFY_QP_NEW_STATE_RTR:
return "RTR";
case CMDQ_MODIFY_QP_NEW_STATE_RTS:
return "RTS";
case CMDQ_MODIFY_QP_NEW_STATE_SQE:
return "SQER";
case CMDQ_MODIFY_QP_NEW_STATE_SQD:
return "SQD";
case CMDQ_MODIFY_QP_NEW_STATE_ERR:
return "ERR";
default:
return "Invalid QP state";
}
}
static inline const char *bnxt_re_qp_type_str(u8 type)
{
switch (type) {
case CMDQ_CREATE_QP1_TYPE_GSI: return "QP1";
case CMDQ_CREATE_QP_TYPE_GSI: return "QP1";
case CMDQ_CREATE_QP_TYPE_RC: return "RC";
case CMDQ_CREATE_QP_TYPE_UD: return "UD";
case CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE: return "RAW_ETHERTYPE";
default: return "Invalid transport type";
}
}
static ssize_t qp_info_read(struct file *filep,
char __user *buffer,
size_t count, loff_t *ppos)
{
struct bnxt_re_qp *qp = filep->private_data;
char *buf;
int len;
if (*ppos)
return 0;
buf = kasprintf(GFP_KERNEL,
"QPN\t\t: %d\n"
"transport\t: %s\n"
"state\t\t: %s\n"
"mtu\t\t: %d\n"
"timeout\t\t: %d\n"
"remote QPN\t: %d\n",
qp->qplib_qp.id,
bnxt_re_qp_type_str(qp->qplib_qp.type),
bnxt_re_qp_state_str(qp->qplib_qp.state),
qp->qplib_qp.mtu,
qp->qplib_qp.timeout,
qp->qplib_qp.dest_qpn);
if (!buf)
return -ENOMEM;
if (count < strlen(buf)) {
kfree(buf);
return -ENOSPC;
}
len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
kfree(buf);
return len;
}
static const struct file_operations debugfs_qp_fops = {
.owner = THIS_MODULE,
.open = simple_open,
.read = qp_info_read,
};
void bnxt_re_debug_add_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
{
char resn[32];
sprintf(resn, "0x%x", qp->qplib_qp.id);
qp->dentry = debugfs_create_file(resn, 0400, rdev->qp_debugfs, qp, &debugfs_qp_fops);
}
void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
{
debugfs_remove(qp->dentry);
}
void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
{
struct pci_dev *pdev = rdev->en_dev->pdev;
rdev->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), bnxt_re_debugfs_root);
rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root);
}
void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev)
{
debugfs_remove_recursive(rdev->qp_debugfs);
debugfs_remove_recursive(rdev->dbg_root);
rdev->dbg_root = NULL;
}
void bnxt_re_register_debugfs(void)
{
bnxt_re_debugfs_root = debugfs_create_dir("bnxt_re", NULL);
}
void bnxt_re_unregister_debugfs(void)
{
debugfs_remove(bnxt_re_debugfs_root);
}

View File

@ -0,0 +1,21 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
* Copyright (c) 2024, Broadcom. All rights reserved. The term
* Broadcom refers to Broadcom Limited and/or its subsidiaries.
*
* Description: Debugfs header
*/
#ifndef __BNXT_RE_DEBUGFS__
#define __BNXT_RE_DEBUGFS__
void bnxt_re_debug_add_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp);
void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp);
void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev);
void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev);
void bnxt_re_register_debugfs(void);
void bnxt_re_unregister_debugfs(void);
#endif

View File

@ -62,6 +62,7 @@
#include "bnxt_re.h"
#include "ib_verbs.h"
#include "debugfs.h"
#include <rdma/uverbs_types.h>
#include <rdma/uverbs_std_types.h>
@ -94,9 +95,9 @@ static int __from_ib_access_flags(int iflags)
return qflags;
};
static enum ib_access_flags __to_ib_access_flags(int qflags)
static int __to_ib_access_flags(int qflags)
{
enum ib_access_flags iflags = 0;
int iflags = 0;
if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
iflags |= IB_ACCESS_LOCAL_WRITE;
@ -113,7 +114,49 @@ static enum ib_access_flags __to_ib_access_flags(int qflags)
if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
iflags |= IB_ACCESS_ON_DEMAND;
return iflags;
};
}
static u8 __qp_access_flags_from_ib(struct bnxt_qplib_chip_ctx *cctx, int iflags)
{
u8 qflags = 0;
if (!bnxt_qplib_is_chip_gen_p5_p7(cctx))
/* For Wh+ */
return (u8)__from_ib_access_flags(iflags);
/* For P5, P7 and later chips */
if (iflags & IB_ACCESS_LOCAL_WRITE)
qflags |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE;
if (iflags & IB_ACCESS_REMOTE_WRITE)
qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
if (iflags & IB_ACCESS_REMOTE_READ)
qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
if (iflags & IB_ACCESS_REMOTE_ATOMIC)
qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC;
return qflags;
}
static int __qp_access_flags_to_ib(struct bnxt_qplib_chip_ctx *cctx, u8 qflags)
{
int iflags = 0;
if (!bnxt_qplib_is_chip_gen_p5_p7(cctx))
/* For Wh+ */
return __to_ib_access_flags(qflags);
/* For P5, P7 and later chips */
if (qflags & CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE)
iflags |= IB_ACCESS_LOCAL_WRITE;
if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE)
iflags |= IB_ACCESS_REMOTE_WRITE;
if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_READ)
iflags |= IB_ACCESS_REMOTE_READ;
if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC)
iflags |= IB_ACCESS_REMOTE_ATOMIC;
return iflags;
}
static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev,
struct bnxt_qplib_mrw *qplib_mr)
@ -211,6 +254,22 @@ int bnxt_re_query_device(struct ib_device *ibdev,
return 0;
}
int bnxt_re_modify_device(struct ib_device *ibdev,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
ibdev_dbg(ibdev, "Modify device with mask 0x%x", device_modify_mask);
if (device_modify_mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
if (!(device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC))
return 0;
memcpy(ibdev->node_desc, device_modify->node_desc, IB_DEVICE_NODE_DESC_MAX);
return 0;
}
/* Port */
int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
struct ib_port_attr *port_attr)
@ -939,6 +998,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD)
atomic_dec(&rdev->stats.res.ud_qp_count);
bnxt_re_debug_rem_qpinfo(rdev, qp);
ib_umem_release(qp->rumem);
ib_umem_release(qp->sumem);
@ -1622,6 +1683,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
if (active_qps > rdev->stats.res.ud_qp_watermark)
rdev->stats.res.ud_qp_watermark = active_qps;
}
bnxt_re_debug_add_qpinfo(rdev, qp);
return 0;
qp_destroy:
@ -1814,8 +1876,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges);
srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
srq->srq_limit = srq_init_attr->attr.srq_limit;
srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id;
nq = &rdev->nq[0];
srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id;
nq = &rdev->nqr->nq[0];
if (udata) {
rc = bnxt_re_init_user_srq(rdev, pd, srq, udata);
@ -2041,12 +2103,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
qp->qplib_qp.access =
__from_ib_access_flags(qp_attr->qp_access_flags);
__qp_access_flags_from_ib(qp->qplib_qp.cctx,
qp_attr->qp_access_flags);
/* LOCAL_WRITE access must be set to allow RC receive */
qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
/* Temp: Set all params on QP as of now */
qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE;
}
if (qp_attr_mask & IB_QP_PKEY_INDEX) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
@ -2080,7 +2140,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp->qplib_qp.ah.sgid_index = ctx->idx;
qp->qplib_qp.ah.host_sgid_index = grh->sgid_index;
qp->qplib_qp.ah.hop_limit = grh->hop_limit;
qp->qplib_qp.ah.traffic_class = grh->traffic_class;
qp->qplib_qp.ah.traffic_class = grh->traffic_class >> 2;
qp->qplib_qp.ah.sl = rdma_ah_get_sl(&qp_attr->ah_attr);
ether_addr_copy(qp->qplib_qp.ah.dmac,
qp_attr->ah_attr.roce.dmac);
@ -2251,7 +2311,8 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state);
qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state);
qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0;
qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access);
qp_attr->qp_access_flags = __qp_access_flags_to_ib(qp->qplib_qp.cctx,
qplib_qp->access);
qp_attr->pkey_index = qplib_qp->pkey_index;
qp_attr->qkey = qplib_qp->qkey;
qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
@ -2972,6 +3033,28 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
return rc;
}
static struct bnxt_qplib_nq *bnxt_re_get_nq(struct bnxt_re_dev *rdev)
{
int min, indx;
mutex_lock(&rdev->nqr->load_lock);
for (indx = 0, min = 0; indx < (rdev->nqr->num_msix - 1); indx++) {
if (rdev->nqr->nq[min].load > rdev->nqr->nq[indx].load)
min = indx;
}
rdev->nqr->nq[min].load++;
mutex_unlock(&rdev->nqr->load_lock);
return &rdev->nqr->nq[min];
}
static void bnxt_re_put_nq(struct bnxt_re_dev *rdev, struct bnxt_qplib_nq *nq)
{
mutex_lock(&rdev->nqr->load_lock);
nq->load--;
mutex_unlock(&rdev->nqr->load_lock);
}
/* Completion Queues */
int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
@ -2990,6 +3073,8 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
hash_del(&cq->hash_entry);
}
bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
bnxt_re_put_nq(rdev, nq);
ib_umem_release(cq->umem);
atomic_dec(&rdev->stats.res.cq_count);
@ -3008,8 +3093,6 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
struct bnxt_qplib_chip_ctx *cctx;
struct bnxt_qplib_nq *nq = NULL;
unsigned int nq_alloc_cnt;
int cqe = attr->cqe;
int rc, entries;
u32 active_cqs;
@ -3060,15 +3143,10 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->qplib_cq.dpi = &rdev->dpi_privileged;
}
/*
* Allocating the NQ in a round robin fashion. nq_alloc_cnt is a
* used for getting the NQ index.
*/
nq_alloc_cnt = atomic_inc_return(&rdev->nq_alloc_cnt);
nq = &rdev->nq[nq_alloc_cnt % (rdev->num_msix - 1)];
cq->qplib_cq.max_wqe = entries;
cq->qplib_cq.cnq_hw_ring_id = nq->ring_id;
cq->qplib_cq.nq = nq;
cq->qplib_cq.coalescing = &rdev->cq_coalescing;
cq->qplib_cq.nq = bnxt_re_get_nq(rdev);
cq->qplib_cq.cnq_hw_ring_id = cq->qplib_cq.nq->ring_id;
rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq);
if (rc) {
@ -3078,7 +3156,6 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->ib_cq.cqe = entries;
cq->cq_period = cq->qplib_cq.period;
nq->budget++;
active_cqs = atomic_inc_return(&rdev->stats.res.cq_count);
if (active_cqs > rdev->stats.res.cq_watermark)
@ -3633,7 +3710,7 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *gsi_sqp,
wc->byte_len = orig_cqe->length;
wc->qp = &gsi_qp->ib_qp;
wc->ex.imm_data = cpu_to_be32(le32_to_cpu(orig_cqe->immdata));
wc->ex.imm_data = cpu_to_be32(orig_cqe->immdata);
wc->src_qp = orig_cqe->src_qp;
memcpy(wc->smac, orig_cqe->smac, ETH_ALEN);
if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) {
@ -3778,7 +3855,10 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
(unsigned long)(cqe->qp_handle),
struct bnxt_re_qp, qplib_qp);
wc->qp = &qp->ib_qp;
wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->immdata));
if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
wc->ex.imm_data = cpu_to_be32(cqe->immdata);
else
wc->ex.invalidate_rkey = cqe->invrkey;
wc->src_qp = cqe->src_qp;
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->port_num = 1;

View File

@ -95,6 +95,7 @@ struct bnxt_re_qp {
struct ib_ud_header qp1_hdr;
struct bnxt_re_cq *scq;
struct bnxt_re_cq *rcq;
struct dentry *dentry;
};
struct bnxt_re_cq {
@ -196,6 +197,9 @@ static inline bool bnxt_re_is_var_size_supported(struct bnxt_re_dev *rdev,
int bnxt_re_query_device(struct ib_device *ibdev,
struct ib_device_attr *ib_attr,
struct ib_udata *udata);
int bnxt_re_modify_device(struct ib_device *ibdev,
int device_modify_mask,
struct ib_device_modify *device_modify);
int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
struct ib_port_attr *port_attr);
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,

View File

@ -67,6 +67,7 @@
#include <rdma/bnxt_re-abi.h>
#include "bnxt.h"
#include "hw_counters.h"
#include "debugfs.h"
static char version[] =
BNXT_RE_DESC "\n";
@ -183,6 +184,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
rdev->rcfw.res = &rdev->qplib_res;
rdev->qplib_res.dattr = &rdev->dev_attr;
rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev);
rdev->qplib_res.en_dev = en_dev;
bnxt_re_set_drv_mode(rdev);
@ -287,12 +289,17 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
{
/*
* Use the total VF count since the actual VF count may not be
* available at this point.
*/
rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev);
if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
bnxt_re_set_resource_limits(rdev);
bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
&rdev->qplib_ctx);
}
if (!rdev->num_vfs)
return;
bnxt_re_set_resource_limits(rdev);
bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
&rdev->qplib_ctx);
}
static void bnxt_re_shutdown(struct auxiliary_device *adev)
@ -316,8 +323,8 @@ static void bnxt_re_stop_irq(void *handle)
rdev = en_info->rdev;
rcfw = &rdev->rcfw;
for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) {
nq = &rdev->nq[indx - 1];
for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) {
nq = &rdev->nqr->nq[indx - 1];
bnxt_qplib_nq_stop_irq(nq, false);
}
@ -334,7 +341,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
int indx, rc;
rdev = en_info->rdev;
msix_ent = rdev->en_dev->msix_entries;
msix_ent = rdev->nqr->msix_entries;
rcfw = &rdev->rcfw;
if (!ent) {
/* Not setting the f/w timeout bit in rcfw.
@ -349,8 +356,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
/* Vectors may change after restart, so update with new vectors
* in device sctructure.
*/
for (indx = 0; indx < rdev->num_msix; indx++)
rdev->en_dev->msix_entries[indx].vector = ent[indx].vector;
for (indx = 0; indx < rdev->nqr->num_msix; indx++)
rdev->nqr->msix_entries[indx].vector = ent[indx].vector;
rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
false);
@ -358,8 +365,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n");
return;
}
for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) {
nq = &rdev->nq[indx - 1];
for (indx = BNXT_RE_NQ_IDX ; indx < rdev->nqr->num_msix; indx++) {
nq = &rdev->nqr->nq[indx - 1];
rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
msix_ent[indx].vector, false);
if (rc) {
@ -873,6 +880,253 @@ static const struct attribute_group bnxt_re_dev_attr_group = {
.attrs = bnxt_re_attributes,
};
static int bnxt_re_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr)
{
struct bnxt_qplib_hwq *mr_hwq;
struct nlattr *table_attr;
struct bnxt_re_mr *mr;
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
if (!table_attr)
return -EMSGSIZE;
mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
mr_hwq = &mr->qplib_mr.hwq;
if (rdma_nl_put_driver_u32(msg, "page_size",
mr_hwq->qe_ppg * mr_hwq->element_size))
goto err;
if (rdma_nl_put_driver_u32(msg, "max_elements", mr_hwq->max_elements))
goto err;
if (rdma_nl_put_driver_u32(msg, "element_size", mr_hwq->element_size))
goto err;
if (rdma_nl_put_driver_u64_hex(msg, "hwq", (unsigned long)mr_hwq))
goto err;
if (rdma_nl_put_driver_u64_hex(msg, "va", mr->qplib_mr.va))
goto err;
nla_nest_end(msg, table_attr);
return 0;
err:
nla_nest_cancel(msg, table_attr);
return -EMSGSIZE;
}
static int bnxt_re_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
{
struct bnxt_re_dev *rdev;
struct bnxt_re_mr *mr;
int err, len;
void *data;
mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
rdev = mr->rdev;
err = bnxt_re_read_context_allowed(rdev);
if (err)
return err;
len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 :
BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5;
data = kzalloc(len, GFP_KERNEL);
if (!data)
return -ENOMEM;
err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_MRW,
mr->qplib_mr.lkey, len, data);
if (!err)
err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
kfree(data);
return err;
}
static int bnxt_re_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq)
{
struct bnxt_qplib_hwq *cq_hwq;
struct nlattr *table_attr;
struct bnxt_re_cq *cq;
cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
cq_hwq = &cq->qplib_cq.hwq;
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
if (!table_attr)
return -EMSGSIZE;
if (rdma_nl_put_driver_u32(msg, "cq_depth", cq_hwq->depth))
goto err;
if (rdma_nl_put_driver_u32(msg, "max_elements", cq_hwq->max_elements))
goto err;
if (rdma_nl_put_driver_u32(msg, "element_size", cq_hwq->element_size))
goto err;
if (rdma_nl_put_driver_u32(msg, "max_wqe", cq->qplib_cq.max_wqe))
goto err;
nla_nest_end(msg, table_attr);
return 0;
err:
nla_nest_cancel(msg, table_attr);
return -EMSGSIZE;
}
static int bnxt_re_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
{
struct bnxt_re_dev *rdev;
struct bnxt_re_cq *cq;
int err, len;
void *data;
cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
rdev = cq->rdev;
err = bnxt_re_read_context_allowed(rdev);
if (err)
return err;
len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 :
BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5;
data = kzalloc(len, GFP_KERNEL);
if (!data)
return -ENOMEM;
err = bnxt_qplib_read_context(&rdev->rcfw,
CMDQ_READ_CONTEXT_TYPE_CQ,
cq->qplib_cq.id, len, data);
if (!err)
err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
kfree(data);
return err;
}
static int bnxt_re_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp)
{
struct bnxt_qplib_qp *qplib_qp;
struct nlattr *table_attr;
struct bnxt_re_qp *qp;
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
if (!table_attr)
return -EMSGSIZE;
qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
qplib_qp = &qp->qplib_qp;
if (rdma_nl_put_driver_u32(msg, "sq_max_wqe", qplib_qp->sq.max_wqe))
goto err;
if (rdma_nl_put_driver_u32(msg, "sq_max_sge", qplib_qp->sq.max_sge))
goto err;
if (rdma_nl_put_driver_u32(msg, "sq_wqe_size", qplib_qp->sq.wqe_size))
goto err;
if (rdma_nl_put_driver_u32(msg, "sq_swq_start", qplib_qp->sq.swq_start))
goto err;
if (rdma_nl_put_driver_u32(msg, "sq_swq_last", qplib_qp->sq.swq_last))
goto err;
if (rdma_nl_put_driver_u32(msg, "rq_max_wqe", qplib_qp->rq.max_wqe))
goto err;
if (rdma_nl_put_driver_u32(msg, "rq_max_sge", qplib_qp->rq.max_sge))
goto err;
if (rdma_nl_put_driver_u32(msg, "rq_wqe_size", qplib_qp->rq.wqe_size))
goto err;
if (rdma_nl_put_driver_u32(msg, "rq_swq_start", qplib_qp->rq.swq_start))
goto err;
if (rdma_nl_put_driver_u32(msg, "rq_swq_last", qplib_qp->rq.swq_last))
goto err;
if (rdma_nl_put_driver_u32(msg, "timeout", qplib_qp->timeout))
goto err;
nla_nest_end(msg, table_attr);
return 0;
err:
nla_nest_cancel(msg, table_attr);
return -EMSGSIZE;
}
static int bnxt_re_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibqp->device, ibdev);
int err, len;
void *data;
err = bnxt_re_read_context_allowed(rdev);
if (err)
return err;
len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 :
BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5;
data = kzalloc(len, GFP_KERNEL);
if (!data)
return -ENOMEM;
err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_QPC,
ibqp->qp_num, len, data);
if (!err)
err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
kfree(data);
return err;
}
static int bnxt_re_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq)
{
struct nlattr *table_attr;
struct bnxt_re_srq *srq;
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
if (!table_attr)
return -EMSGSIZE;
srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
if (rdma_nl_put_driver_u32_hex(msg, "wqe_size", srq->qplib_srq.wqe_size))
goto err;
if (rdma_nl_put_driver_u32_hex(msg, "max_wqe", srq->qplib_srq.max_wqe))
goto err;
if (rdma_nl_put_driver_u32_hex(msg, "max_sge", srq->qplib_srq.max_sge))
goto err;
nla_nest_end(msg, table_attr);
return 0;
err:
nla_nest_cancel(msg, table_attr);
return -EMSGSIZE;
}
static int bnxt_re_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq)
{
struct bnxt_re_dev *rdev;
struct bnxt_re_srq *srq;
int err, len;
void *data;
srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
rdev = srq->rdev;
err = bnxt_re_read_context_allowed(rdev);
if (err)
return err;
len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 :
BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5;
data = kzalloc(len, GFP_KERNEL);
if (!data)
return -ENOMEM;
err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_SRQ,
srq->qplib_srq.id, len, data);
if (!err)
err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
kfree(data);
return err;
}
static const struct ib_device_ops bnxt_re_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_BNXT_RE,
@ -914,6 +1168,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.post_srq_recv = bnxt_re_post_srq_recv,
.query_ah = bnxt_re_query_ah,
.query_device = bnxt_re_query_device,
.modify_device = bnxt_re_modify_device,
.query_pkey = bnxt_re_query_pkey,
.query_port = bnxt_re_query_port,
.query_qp = bnxt_re_query_qp,
@ -930,6 +1185,17 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
};
static const struct ib_device_ops restrack_ops = {
.fill_res_cq_entry = bnxt_re_fill_res_cq_entry,
.fill_res_cq_entry_raw = bnxt_re_fill_res_cq_entry_raw,
.fill_res_qp_entry = bnxt_re_fill_res_qp_entry,
.fill_res_qp_entry_raw = bnxt_re_fill_res_qp_entry_raw,
.fill_res_mr_entry = bnxt_re_fill_res_mr_entry,
.fill_res_mr_entry_raw = bnxt_re_fill_res_mr_entry_raw,
.fill_res_srq_entry = bnxt_re_fill_res_srq_entry,
.fill_res_srq_entry_raw = bnxt_re_fill_res_srq_entry_raw,
};
static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
{
struct ib_device *ibdev = &rdev->ibdev;
@ -943,7 +1209,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr);
ibdev->num_comp_vectors = rdev->num_msix - 1;
ibdev->num_comp_vectors = rdev->nqr->num_msix - 1;
ibdev->dev.parent = &rdev->en_dev->pdev->dev;
ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
@ -951,6 +1217,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->driver_def = bnxt_re_uapi_defs;
ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
ib_set_device_ops(ibdev, &restrack_ops);
ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
if (ret)
return ret;
@ -990,6 +1257,15 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev,
atomic_set(&rdev->stats.res.pd_count, 0);
rdev->cosq[0] = 0xFFFF;
rdev->cosq[1] = 0xFFFF;
rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME;
if (bnxt_re_chip_gen_p7(en_dev->chip_num)) {
rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7;
rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7;
} else {
rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5;
rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5;
}
rdev->cq_coalescing.en_ring_idle_mode = BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE;
return rdev;
}
@ -1276,8 +1552,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
{
int i;
for (i = 1; i < rdev->num_msix; i++)
bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
for (i = 1; i < rdev->nqr->num_msix; i++)
bnxt_qplib_disable_nq(&rdev->nqr->nq[i - 1]);
if (rdev->qplib_res.rcfw)
bnxt_qplib_cleanup_res(&rdev->qplib_res);
@ -1291,10 +1567,12 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
bnxt_qplib_init_res(&rdev->qplib_res);
for (i = 1; i < rdev->num_msix ; i++) {
db_offt = rdev->en_dev->msix_entries[i].db_offset;
rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
i - 1, rdev->en_dev->msix_entries[i].vector,
mutex_init(&rdev->nqr->load_lock);
for (i = 1; i < rdev->nqr->num_msix ; i++) {
db_offt = rdev->nqr->msix_entries[i].db_offset;
rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1],
i - 1, rdev->nqr->msix_entries[i].vector,
db_offt, &bnxt_re_cqn_handler,
&bnxt_re_srqn_handler);
if (rc) {
@ -1307,20 +1585,22 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
return 0;
fail:
for (i = num_vec_enabled; i >= 0; i--)
bnxt_qplib_disable_nq(&rdev->nq[i]);
bnxt_qplib_disable_nq(&rdev->nqr->nq[i]);
return rc;
}
static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_nq *nq;
u8 type;
int i;
for (i = 0; i < rdev->num_msix - 1; i++) {
for (i = 0; i < rdev->nqr->num_msix - 1; i++) {
type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
bnxt_qplib_free_nq(&rdev->nq[i]);
rdev->nq[i].res = NULL;
nq = &rdev->nqr->nq[i];
bnxt_re_net_ring_free(rdev, nq->ring_id, type);
bnxt_qplib_free_nq(nq);
nq->res = NULL;
}
}
@ -1362,12 +1642,12 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
if (rc)
goto dealloc_res;
for (i = 0; i < rdev->num_msix - 1; i++) {
for (i = 0; i < rdev->nqr->num_msix - 1; i++) {
struct bnxt_qplib_nq *nq;
nq = &rdev->nq[i];
nq = &rdev->nqr->nq[i];
nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]);
rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, nq);
if (rc) {
ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x",
i, rc);
@ -1375,17 +1655,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
}
type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count;
rattr.pages = nq->hwq.pbl[rdev->nqr->nq[i].hwq.level].pg_count;
rattr.type = type;
rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1;
rattr.lrid = rdev->en_dev->msix_entries[i + 1].ring_idx;
rattr.lrid = rdev->nqr->msix_entries[i + 1].ring_idx;
rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate NQ fw id with rc = 0x%x",
rc);
bnxt_qplib_free_nq(&rdev->nq[i]);
bnxt_qplib_free_nq(nq);
goto free_nq;
}
num_vec_created++;
@ -1394,8 +1674,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
free_nq:
for (i = num_vec_created - 1; i >= 0; i--) {
type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
bnxt_qplib_free_nq(&rdev->nq[i]);
bnxt_re_net_ring_free(rdev, rdev->nqr->nq[i].ring_id, type);
bnxt_qplib_free_nq(&rdev->nqr->nq[i]);
}
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
&rdev->dpi_privileged);
@ -1590,11 +1870,28 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
return rc;
}
static int bnxt_re_alloc_nqr_mem(struct bnxt_re_dev *rdev)
{
rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL);
if (!rdev->nqr)
return -ENOMEM;
return 0;
}
static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev)
{
kfree(rdev->nqr);
rdev->nqr = NULL;
}
static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
{
u8 type;
int rc;
bnxt_re_debugfs_rem_pdev(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
cancel_delayed_work_sync(&rdev->worker);
@ -1617,11 +1914,12 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
}
rdev->num_msix = 0;
rdev->nqr->num_msix = 0;
if (rdev->pacing.dbr_pacing)
bnxt_re_deinitialize_dbr_pacing(rdev);
bnxt_re_free_nqr_mem(rdev);
bnxt_re_destroy_chip_ctx(rdev);
if (op_type == BNXT_RE_COMPLETE_REMOVE) {
if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
@ -1659,6 +1957,17 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
}
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
if (rdev->en_dev->ulp_tbl->msix_requested < BNXT_RE_MIN_MSIX) {
ibdev_err(&rdev->ibdev,
"RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n",
rdev->en_dev->ulp_tbl->msix_requested);
bnxt_unregister_dev(rdev->en_dev);
clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
return -EINVAL;
}
ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
rdev->en_dev->ulp_tbl->msix_requested);
rc = bnxt_re_setup_chip_ctx(rdev);
if (rc) {
bnxt_unregister_dev(rdev->en_dev);
@ -1667,19 +1976,20 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
return -EINVAL;
}
rc = bnxt_re_alloc_nqr_mem(rdev);
if (rc) {
bnxt_re_destroy_chip_ctx(rdev);
bnxt_unregister_dev(rdev->en_dev);
clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
return rc;
}
rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested;
memcpy(rdev->nqr->msix_entries, rdev->en_dev->msix_entries,
sizeof(struct bnxt_msix_entry) * rdev->nqr->num_msix);
/* Check whether VF or PF */
bnxt_re_get_sriov_func_type(rdev);
if (!rdev->en_dev->ulp_tbl->msix_requested) {
ibdev_err(&rdev->ibdev,
"Failed to get MSI-X vectors: %#x\n", rc);
rc = -EINVAL;
goto fail;
}
ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
rdev->en_dev->ulp_tbl->msix_requested);
rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested;
bnxt_re_query_hwrm_intf_version(rdev);
/* Establish RCFW Communication Channel to initialize the context
@ -1701,14 +2011,14 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
rattr.type = type;
rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
rattr.lrid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
rattr.lrid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
goto free_rcfw;
}
db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset;
vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector;
db_offt = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].db_offset;
vid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].vector;
rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
vid, db_offt,
&bnxt_re_aeq_handler);
@ -1785,16 +2095,16 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
/*
* Use the total VF count since the actual VF count may not be
* available at this point.
*/
bnxt_re_vf_res_config(rdev);
if (!(rdev->qplib_res.en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT))
bnxt_re_vf_res_config(rdev);
}
hash_init(rdev->cq_hash);
if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT)
hash_init(rdev->srq_hash);
bnxt_re_debugfs_add_pdev(rdev);
return 0;
free_sctx:
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
@ -1896,11 +2206,10 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
if (enable) {
cc_param.enable = 1;
cc_param.cc_mode = CMDQ_MODIFY_ROCE_CC_CC_MODE_PROBABILISTIC_CC_MODE;
cc_param.tos_ecn = 1;
}
cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE |
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
@ -2033,12 +2342,6 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
rdev = en_info->rdev;
en_dev = en_info->en_dev;
mutex_lock(&bnxt_re_mutex);
/* L2 driver may invoke this callback during device error/crash or device
* reset. Current RoCE driver doesn't recover the device in case of
* error. Handle the error by dispatching fatal events to all qps
* ie. by calling bnxt_re_dev_stop and release the MSIx vectors as
* L2 driver want to modify the MSIx table.
*/
ibdev_info(&rdev->ibdev, "Handle device suspend call");
/* Check the current device state from bnxt_en_dev and move the
@ -2046,17 +2349,12 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
* This prevents more commands to HW during clean-up,
* in case the device is already in error.
*/
if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state))
if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state)) {
set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
bnxt_re_dev_stop(rdev);
bnxt_re_stop_irq(adev);
/* Move the device states to detached and avoid sending any more
* commands to HW
*/
set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
wake_up_all(&rdev->rcfw.cmdq.waitq);
set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
wake_up_all(&rdev->rcfw.cmdq.waitq);
bnxt_re_dev_stop(rdev);
}
if (rdev->pacing.dbr_pacing)
bnxt_re_set_pacing_dev_state(rdev);
@ -2075,13 +2373,6 @@ static int bnxt_re_resume(struct auxiliary_device *adev)
struct bnxt_re_dev *rdev;
mutex_lock(&bnxt_re_mutex);
/* L2 driver may invoke this callback during device recovery, resume.
* reset. Current RoCE driver doesn't recover the device in case of
* error. Handle the error by dispatching fatal events to all qps
* ie. by calling bnxt_re_dev_stop and release the MSIx vectors as
* L2 driver want to modify the MSIx table.
*/
bnxt_re_add_device(adev, BNXT_RE_POST_RECOVERY_INIT);
rdev = en_info->rdev;
ibdev_info(&rdev->ibdev, "Device resume completed");
@ -2112,18 +2403,24 @@ static int __init bnxt_re_mod_init(void)
int rc;
pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
bnxt_re_register_debugfs();
rc = auxiliary_driver_register(&bnxt_re_driver);
if (rc) {
pr_err("%s: Failed to register auxiliary driver\n",
ROCE_DRV_MODULE_NAME);
return rc;
goto err_debug;
}
return 0;
err_debug:
bnxt_re_unregister_debugfs();
return rc;
}
static void __exit bnxt_re_mod_exit(void)
{
auxiliary_driver_unregister(&bnxt_re_driver);
bnxt_re_unregister_debugfs();
}
module_init(bnxt_re_mod_init);

View File

@ -556,6 +556,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
nq->pdev = pdev;
nq->cqn_handler = cqn_handler;
nq->srqn_handler = srqn_handler;
nq->load = 0;
/* Have a task to schedule CQ notifiers in post send case */
nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq");
@ -1282,12 +1283,47 @@ static void __filter_modify_flags(struct bnxt_qplib_qp *qp)
}
}
static void bnxt_set_mandatory_attributes(struct bnxt_qplib_qp *qp,
struct cmdq_modify_qp *req)
{
u32 mandatory_flags = 0;
if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC)
mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
if (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT &&
qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) {
if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC && qp->srq)
req->flags = cpu_to_le16(CMDQ_MODIFY_QP_FLAGS_SRQ_USED);
mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
}
if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_UD ||
qp->type == CMDQ_MODIFY_QP_QP_TYPE_GSI)
mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
qp->modify_flags |= mandatory_flags;
req->qp_type = qp->type;
}
static bool is_optimized_state_transition(struct bnxt_qplib_qp *qp)
{
if ((qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT &&
qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) ||
(qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR &&
qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS))
return true;
return false;
}
int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct creq_modify_qp_resp resp = {};
struct bnxt_qplib_cmdqmsg msg = {};
struct cmdq_modify_qp req = {};
u16 vlan_pcp_vlan_dei_vlan_id;
u32 temp32[4];
u32 bmask;
int rc;
@ -1298,6 +1334,12 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
/* Filter out the qp_attr_mask based on the state->new transition */
__filter_modify_flags(qp);
if (qp->modify_flags & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) {
/* Set mandatory attributes for INIT -> RTR and RTR -> RTS transition */
if (_is_optimize_modify_qp_supported(res->dattr->dev_cap_flags2) &&
is_optimized_state_transition(qp))
bnxt_set_mandatory_attributes(qp, &req);
}
bmask = qp->modify_flags;
req.modify_mask = cpu_to_le32(qp->modify_flags);
req.qp_cid = cpu_to_le32(qp->id);
@ -1378,7 +1420,16 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID)
req.dest_qp_id = cpu_to_le32(qp->dest_qpn);
req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID) {
vlan_pcp_vlan_dei_vlan_id =
((res->sgid_tbl.tbl[qp->ah.sgid_index].vlan_id <<
CMDQ_MODIFY_QP_VLAN_ID_SFT) &
CMDQ_MODIFY_QP_VLAN_ID_MASK);
vlan_pcp_vlan_dei_vlan_id |=
((qp->ah.sl << CMDQ_MODIFY_QP_VLAN_PCP_SFT) &
CMDQ_MODIFY_QP_VLAN_PCP_MASK);
req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(vlan_pcp_vlan_dei_vlan_id);
}
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
@ -2151,6 +2202,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
struct bnxt_qplib_cmdqmsg msg = {};
struct cmdq_create_cq req = {};
struct bnxt_qplib_pbl *pbl;
u32 coalescing = 0;
u32 pg_sz_lvl;
int rc;
@ -2177,6 +2229,25 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
req.dpi = cpu_to_le32(cq->dpi->dpi);
req.cq_handle = cpu_to_le64(cq->cq_handle);
req.cq_size = cpu_to_le32(cq->max_wqe);
if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2)) {
req.flags |= cpu_to_le16(CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID);
coalescing |= ((cq->coalescing->buf_maxtime <<
CMDQ_CREATE_CQ_BUF_MAXTIME_SFT) &
CMDQ_CREATE_CQ_BUF_MAXTIME_MASK);
coalescing |= ((cq->coalescing->normal_maxbuf <<
CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT) &
CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK);
coalescing |= ((cq->coalescing->during_maxbuf <<
CMDQ_CREATE_CQ_DURING_MAXBUF_SFT) &
CMDQ_CREATE_CQ_DURING_MAXBUF_MASK);
if (cq->coalescing->en_ring_idle_mode)
coalescing |= CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE;
else
coalescing &= ~CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE;
req.coalescing = cpu_to_le32(coalescing);
}
pbl = &cq->hwq.pbl[PBL_LVL_0];
pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) <<
CMDQ_CREATE_CQ_PG_SIZE_SFT);

View File

@ -383,6 +383,25 @@ static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que,
return avail <= slots;
}
/* CQ coalescing parameters */
struct bnxt_qplib_cq_coal_param {
u16 buf_maxtime;
u8 normal_maxbuf;
u8 during_maxbuf;
u8 en_ring_idle_mode;
};
#define BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME 0x1
#define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7 0x8
#define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7 0x8
#define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5 0x1
#define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5 0x1
#define BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE 0x1
#define BNXT_QPLIB_CQ_COAL_MAX_BUF_MAXTIME 0x1bf
#define BNXT_QPLIB_CQ_COAL_MAX_NORMAL_MAXBUF 0x1f
#define BNXT_QPLIB_CQ_COAL_MAX_DURING_MAXBUF 0x1f
#define BNXT_QPLIB_CQ_COAL_MAX_EN_RING_IDLE_MODE 0x1
struct bnxt_qplib_cqe {
u8 status;
u8 type;
@ -391,7 +410,7 @@ struct bnxt_qplib_cqe {
u16 cfa_meta;
u64 wr_id;
union {
__le32 immdata;
u32 immdata;
u32 invrkey;
};
u64 qp_handle;
@ -445,6 +464,7 @@ struct bnxt_qplib_cq {
*/
spinlock_t flush_lock; /* QP flush management */
u16 cnq_events;
struct bnxt_qplib_cq_coal_param *coalescing;
};
#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq)
@ -499,6 +519,7 @@ struct bnxt_qplib_nq {
struct tasklet_struct nq_tasklet;
bool requested;
int budget;
u32 load;
cqn_handler_t cqn_handler;
srqn_handler_t srqn_handler;

View File

@ -831,6 +831,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct creq_initialize_fw_resp resp = {};
struct cmdq_initialize_fw req = {};
struct bnxt_qplib_cmdqmsg msg = {};
u16 flags = 0;
u8 pgsz, lvl;
int rc;
@ -849,10 +850,8 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
* shall setup this area for VF. Skipping the
* HW programming
*/
if (is_virtfn)
if (is_virtfn || bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
goto skip_ctx_setup;
if (bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
goto config_vf_res;
lvl = ctx->qpc_tbl.level;
pgsz = bnxt_qplib_base_pg_size(&ctx->qpc_tbl);
@ -896,16 +895,14 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements);
req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements);
config_vf_res:
req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
skip_ctx_setup:
if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags))
req.flags |= cpu_to_le16(CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED);
flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED;
if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2))
flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED;
if (rcfw->res->en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT)
flags |= CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT;
req.flags |= cpu_to_le16(flags);
req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);

View File

@ -131,6 +131,8 @@ static inline u32 bnxt_qplib_set_cmd_slots(struct cmdq_base *req)
#define RCFW_CMD_IS_BLOCKING 0x8000
#define HWRM_VERSION_DEV_ATTR_MAX_DPI 0x1000A0000000DULL
/* HWRM version 1.10.3.18 */
#define HWRM_VERSION_READ_CTX 0x1000A00030012
/* Crsq buf is 1024-Byte */
struct bnxt_qplib_crsbe {

View File

@ -39,6 +39,8 @@
#ifndef __BNXT_QPLIB_RES_H__
#define __BNXT_QPLIB_RES_H__
#include "bnxt_ulp.h"
extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
#define CHIP_NUM_57508 0x1750
@ -302,6 +304,7 @@ struct bnxt_qplib_res {
struct bnxt_qplib_chip_ctx *cctx;
struct bnxt_qplib_dev_attr *dattr;
struct net_device *netdev;
struct bnxt_en_dev *en_dev;
struct bnxt_qplib_rcfw *rcfw;
struct bnxt_qplib_pd_tbl pd_tbl;
/* To protect the pd table bit map */
@ -576,4 +579,14 @@ static inline bool _is_relaxed_ordering_supported(u16 dev_cap_ext_flags2)
return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED;
}
static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2)
{
return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED;
}
static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2)
{
return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED;
}
#endif /* __BNXT_QPLIB_RES_H__ */

View File

@ -981,3 +981,38 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg);
return rc;
}
int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 res_type,
u32 xid, u32 resp_size, void *resp_va)
{
struct creq_read_context resp = {};
struct bnxt_qplib_cmdqmsg msg = {};
struct cmdq_read_context req = {};
struct bnxt_qplib_rcfw_sbuf sbuf;
int rc;
sbuf.size = resp_size;
sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
&sbuf.dma_addr, GFP_KERNEL);
if (!sbuf.sb)
return -ENOMEM;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_READ_CONTEXT, sizeof(req));
req.resp_addr = cpu_to_le64(sbuf.dma_addr);
req.resp_size = resp_size / BNXT_QPLIB_CMDQE_UNITS;
req.xid = cpu_to_le32(xid);
req.type = res_type;
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
goto free_mem;
memcpy(resp_va, sbuf.sb, resp_size);
free_mem:
dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
return rc;
}

View File

@ -353,6 +353,8 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
struct bnxt_qplib_ext_stat *estat);
int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
struct bnxt_qplib_cc_param *cc_param);
int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid,
u32 resp_size, void *resp_va);
#define BNXT_VAR_MAX_WQE 4352
#define BNXT_VAR_MAX_SLOT_ALIGN 256

View File

@ -216,6 +216,8 @@ struct cmdq_initialize_fw {
__le16 flags;
#define CMDQ_INITIALIZE_FW_FLAGS_MRAV_RESERVATION_SPLIT 0x1UL
#define CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED 0x2UL
#define CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED 0x8UL
#define CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT 0x10UL
__le16 cookie;
u8 resp_size;
u8 reserved8;
@ -559,6 +561,7 @@ struct cmdq_modify_qp {
#define CMDQ_MODIFY_QP_OPCODE_LAST CMDQ_MODIFY_QP_OPCODE_MODIFY_QP
u8 cmd_size;
__le16 flags;
#define CMDQ_MODIFY_QP_FLAGS_SRQ_USED 0x1UL
__le16 cookie;
u8 resp_size;
u8 qp_type;
@ -1137,6 +1140,7 @@ struct cmdq_create_cq {
#define CMDQ_CREATE_CQ_FLAGS_DISABLE_CQ_OVERFLOW_DETECTION 0x1UL
#define CMDQ_CREATE_CQ_FLAGS_STEERING_TAG_VALID 0x2UL
#define CMDQ_CREATE_CQ_FLAGS_INFINITE_CQ_MODE 0x4UL
#define CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID 0x8UL
__le16 cookie;
u8 resp_size;
u8 reserved8;
@ -1169,7 +1173,18 @@ struct cmdq_create_cq {
__le32 cq_size;
__le64 pbl;
__le16 steering_tag;
u8 reserved48[6];
u8 reserved48[2];
__le32 coalescing;
#define CMDQ_CREATE_CQ_BUF_MAXTIME_MASK 0x1ffUL
#define CMDQ_CREATE_CQ_BUF_MAXTIME_SFT 0
#define CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK 0x3e00UL
#define CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT 9
#define CMDQ_CREATE_CQ_DURING_MAXBUF_MASK 0x7c000UL
#define CMDQ_CREATE_CQ_DURING_MAXBUF_SFT 14
#define CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE 0x80000UL
#define CMDQ_CREATE_CQ_UNUSED12_MASK 0xfff00000UL
#define CMDQ_CREATE_CQ_UNUSED12_SFT 20
__le64 reserved64;
};
/* creq_create_cq_resp (size:128b/16B) */
@ -2251,6 +2266,46 @@ struct creq_set_func_resources_resp {
u8 reserved48[6];
};
/* cmdq_read_context (size:192b/24B) */
struct cmdq_read_context {
u8 opcode;
#define CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT 0x85UL
#define CMDQ_READ_CONTEXT_OPCODE_LAST CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT
u8 cmd_size;
__le16 flags;
__le16 cookie;
u8 resp_size;
u8 reserved8;
__le64 resp_addr;
__le32 xid;
u8 type;
#define CMDQ_READ_CONTEXT_TYPE_QPC 0x0UL
#define CMDQ_READ_CONTEXT_TYPE_CQ 0x1UL
#define CMDQ_READ_CONTEXT_TYPE_MRW 0x2UL
#define CMDQ_READ_CONTEXT_TYPE_SRQ 0x3UL
#define CMDQ_READ_CONTEXT_TYPE_LAST CMDQ_READ_CONTEXT_TYPE_SRQ
u8 unused_0[3];
};
/* creq_read_context (size:128b/16B) */
struct creq_read_context {
u8 type;
#define CREQ_READ_CONTEXT_TYPE_MASK 0x3fUL
#define CREQ_READ_CONTEXT_TYPE_SFT 0
#define CREQ_READ_CONTEXT_TYPE_QP_EVENT 0x38UL
#define CREQ_READ_CONTEXT_TYPE_LAST CREQ_READ_CONTEXT_TYPE_QP_EVENT
u8 status;
__le16 cookie;
__le32 reserved32;
u8 v;
#define CREQ_READ_CONTEXT_V 0x1UL
u8 event;
#define CREQ_READ_CONTEXT_EVENT_READ_CONTEXT 0x85UL
#define CREQ_READ_CONTEXT_EVENT_LAST CREQ_READ_CONTEXT_EVENT_READ_CONTEXT
__le16 reserved16;
__le32 reserved_32;
};
/* cmdq_map_tc_to_cos (size:192b/24B) */
struct cmdq_map_tc_to_cos {
u8 opcode;

View File

@ -30,7 +30,8 @@ enum efa_admin_aq_opcode {
EFA_ADMIN_DEALLOC_UAR = 17,
EFA_ADMIN_CREATE_EQ = 18,
EFA_ADMIN_DESTROY_EQ = 19,
EFA_ADMIN_MAX_OPCODE = 19,
EFA_ADMIN_ALLOC_MR = 20,
EFA_ADMIN_MAX_OPCODE = 20,
};
enum efa_admin_aq_feature_id {
@ -150,8 +151,11 @@ struct efa_admin_create_qp_cmd {
/* UAR number */
u16 uar;
/* Requested service level for the QP, 0 is the default SL */
u8 sl;
/* MBZ */
u16 reserved;
u8 reserved;
/* MBZ */
u32 reserved2;
@ -459,6 +463,41 @@ struct efa_admin_dereg_mr_resp {
struct efa_admin_acq_common_desc acq_common_desc;
};
/*
* Allocation of MemoryRegion, required for QP working with Virtual
* Addresses in kernel verbs semantics, ready for fast registration use.
*/
struct efa_admin_alloc_mr_cmd {
/* Common Admin Queue descriptor */
struct efa_admin_aq_common_desc aq_common_desc;
/* Protection Domain */
u16 pd;
/* MBZ */
u16 reserved1;
/* Maximum number of pages this MR supports. */
u32 max_pages;
};
struct efa_admin_alloc_mr_resp {
/* Common Admin Queue completion descriptor */
struct efa_admin_acq_common_desc acq_common_desc;
/*
* L_Key, to be used in conjunction with local buffer references in
* SQ and RQ WQE, or with virtual RQ/CQ rings
*/
u32 l_key;
/*
* R_Key, to be used in RDMA messages to refer to remotely accessed
* memory region
*/
u32 r_key;
};
struct efa_admin_create_cq_cmd {
struct efa_admin_aq_common_desc aq_common_desc;
@ -483,8 +522,8 @@ struct efa_admin_create_cq_cmd {
*/
u8 cq_caps_2;
/* completion queue depth in # of entries. must be power of 2 */
u16 cq_depth;
/* Sub completion queue depth in # of entries. must be power of 2 */
u16 sub_cq_depth;
/* EQ number assigned to this cq */
u16 eqn;
@ -519,8 +558,8 @@ struct efa_admin_create_cq_resp {
u16 cq_idx;
/* actual cq depth in number of entries */
u16 cq_actual_depth;
/* actual sub cq depth in number of entries */
u16 sub_cq_actual_depth;
/* CQ doorbell address, as offset to PCIe DB BAR */
u32 db_offset;
@ -578,6 +617,8 @@ struct efa_admin_basic_stats {
u64 rx_pkts;
u64 rx_drops;
u64 qkey_viol;
};
struct efa_admin_messages_stats {
@ -677,6 +718,15 @@ struct efa_admin_feature_device_attr_desc {
/* Unique global ID for an EFA device */
u64 guid;
/* The device maximum link speed in Gbit/sec */
u16 max_link_speed_gbps;
/* MBZ */
u16 reserved0;
/* MBZ */
u32 reserved1;
};
struct efa_admin_feature_queue_attr_desc {
@ -1057,7 +1107,6 @@ struct efa_admin_host_info {
/* create_eq_cmd */
#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
#define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6)
#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0)
/* host_info */

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_ADMIN_H_
@ -96,7 +96,7 @@ struct efa_admin_acq_entry {
struct efa_admin_aenq_common_desc {
u16 group;
u16 syndrom;
u16 syndrome;
/*
* 0 : phase

View File

@ -31,6 +31,7 @@ int efa_com_create_qp(struct efa_com_dev *edev,
create_qp_cmd.qp_alloc_size.recv_queue_depth =
params->rq_depth;
create_qp_cmd.uar = params->uarn;
create_qp_cmd.sl = params->sl;
if (params->unsolicited_write_recv)
EFA_SET(&create_qp_cmd.flags, EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV, 1);
@ -163,7 +164,7 @@ int efa_com_create_cq(struct efa_com_dev *edev,
EFA_SET(&create_cmd.cq_caps_2,
EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS,
params->entry_size_in_bytes / 4);
create_cmd.cq_depth = params->cq_depth;
create_cmd.sub_cq_depth = params->sub_cq_depth;
create_cmd.num_sub_cqs = params->num_sub_cqs;
create_cmd.uar = params->uarn;
if (params->interrupt_mode_enabled) {
@ -191,7 +192,7 @@ int efa_com_create_cq(struct efa_com_dev *edev,
}
result->cq_idx = cmd_completion.cq_idx;
result->actual_depth = params->cq_depth;
result->actual_depth = params->sub_cq_depth;
result->db_off = cmd_completion.db_offset;
result->db_valid = EFA_GET(&cmd_completion.flags,
EFA_ADMIN_CREATE_CQ_RESP_DB_VALID);
@ -466,6 +467,7 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
result->max_rdma_size = resp.u.device_attr.max_rdma_size;
result->device_caps = resp.u.device_attr.device_caps;
result->guid = resp.u.device_attr.guid;
result->max_link_speed_gbps = resp.u.device_attr.max_link_speed_gbps;
if (result->admin_api_version < 1) {
ibdev_err_ratelimited(

View File

@ -27,6 +27,7 @@ struct efa_com_create_qp_params {
u16 pd;
u16 uarn;
u8 qp_type;
u8 sl;
u8 unsolicited_write_recv : 1;
};
@ -71,7 +72,7 @@ struct efa_com_create_cq_params {
/* cq physical base address in OS memory */
dma_addr_t dma_addr;
/* completion queue depth in # of entries */
u16 cq_depth;
u16 sub_cq_depth;
u16 num_sub_cqs;
u16 uarn;
u16 eqn;
@ -141,6 +142,7 @@ struct efa_com_get_device_attr_result {
u16 max_wr_rdma_sge;
u16 max_tx_batch;
u16 min_sq_depth;
u16 max_link_speed_gbps;
u8 db_bar;
};

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_IO_H_
@ -10,6 +10,7 @@
#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1
#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32
#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4
#define EFA_IO_TX_DESC_INLINE_PBL_SIZE 1
enum efa_io_queue_type {
/* send queue (of a QP) */
@ -25,6 +26,10 @@ enum efa_io_send_op_type {
EFA_IO_RDMA_READ = 1,
/* RDMA write */
EFA_IO_RDMA_WRITE = 2,
/* Fast MR registration */
EFA_IO_FAST_REG = 3,
/* Fast MR invalidation */
EFA_IO_FAST_INV = 4,
};
enum efa_io_comp_status {
@ -34,15 +39,15 @@ enum efa_io_comp_status {
EFA_IO_COMP_STATUS_FLUSHED = 1,
/* Internal QP error */
EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2,
/* Bad operation type */
EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3,
/* Unsupported operation */
EFA_IO_COMP_STATUS_LOCAL_ERROR_UNSUPPORTED_OP = 3,
/* Bad AH */
EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4,
/* LKEY not registered or does not match IOVA */
EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5,
/* Message too long */
EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6,
/* Destination ENI is down or does not run EFA */
/* RKEY not registered or does not match remote IOVA */
EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7,
/* Connection was reset by remote side */
EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8,
@ -54,8 +59,17 @@ enum efa_io_comp_status {
EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11,
/* Unexpected status returned by responder */
EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12,
/* Unresponsive remote - detected locally */
/* Unresponsive remote - was previously responsive */
EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13,
/* No valid AH at remote side (required for RDMA operations) */
EFA_IO_COMP_STATUS_REMOTE_ERROR_UNKNOWN_PEER = 14,
/* Unreachable remote - never received a response */
EFA_IO_COMP_STATUS_LOCAL_ERROR_UNREACH_REMOTE = 15,
};
enum efa_io_frwr_pbl_mode {
EFA_IO_FRWR_INLINE_PBL = 0,
EFA_IO_FRWR_DIRECT_PBL = 1,
};
struct efa_io_tx_meta_desc {
@ -95,13 +109,13 @@ struct efa_io_tx_meta_desc {
/*
* If inline_msg bit is set, length of inline message in bytes,
* otherwise length of SGL (number of buffers).
* otherwise length of SGL (number of buffers).
*/
u16 length;
/*
* immediate data: if has_imm is set, then this field is included
* within Tx message and reported in remote Rx completion.
* immediate data: if has_imm is set, then this field is included within
* Tx message and reported in remote Rx completion.
*/
u32 immediate_data;
@ -158,6 +172,63 @@ struct efa_io_rdma_req {
struct efa_io_tx_buf_desc local_mem[1];
};
struct efa_io_fast_mr_reg_req {
/* Updated local key of the MR after lkey/rkey increment */
u32 lkey;
/*
* permissions
* 0 : local_write_enable - Local write permissions:
* must be set for RQ buffers and buffers posted for
* RDMA Read requests
* 1 : remote_write_enable - Remote write
* permissions: must be set to enable RDMA write to
* the region
* 2 : remote_read_enable - Remote read permissions:
* must be set to enable RDMA read from the region
* 7:3 : reserved2 - MBZ
*/
u8 permissions;
/*
* control flags
* 4:0 : phys_page_size_shift - page size is (1 <<
* phys_page_size_shift)
* 6:5 : pbl_mode - enum efa_io_frwr_pbl_mode
* 7 : reserved - MBZ
*/
u8 flags;
/* MBZ */
u8 reserved[2];
/* IO Virtual Address associated with this MR */
u64 iova;
/* Memory region length, in bytes */
u64 mr_length;
/* Physical Buffer List, each element is page-aligned. */
union {
/*
* Inline array of physical page addresses (optimization
* for short region activation).
*/
u64 inline_array[1];
/* points to PBL (Currently only direct) */
u64 dma_addr;
} pbl;
};
struct efa_io_fast_mr_inv_req {
/* Local key of the MR to invalidate */
u32 lkey;
/* MBZ */
u8 reserved[28];
};
/*
* Tx WQE, composed of tx meta descriptors followed by either tx buffer
* descriptors or inline data
@ -174,6 +245,12 @@ struct efa_io_tx_wqe {
/* RDMA local and remote memory addresses */
struct efa_io_rdma_req rdma_req;
/* Fast registration */
struct efa_io_fast_mr_reg_req reg_mr_req;
/* Fast invalidation */
struct efa_io_fast_mr_inv_req inv_mr_req;
} data;
};
@ -208,7 +285,7 @@ struct efa_io_rx_desc {
struct efa_io_cdesc_common {
/*
* verbs-generated request ID, as provided in the completed tx or rx
* descriptor.
* descriptor.
*/
u16 req_id;
@ -221,7 +298,8 @@ struct efa_io_cdesc_common {
* 3 : has_imm - indicates that immediate data is
* present - for RX completions only
* 6:4 : op_type - enum efa_io_send_op_type
* 7 : reserved31 - MBZ
* 7 : unsolicited - indicates that there is no
* matching request - for RDMA with imm. RX only
*/
u8 flags;
@ -291,6 +369,13 @@ struct efa_io_rx_cdesc_ex {
/* tx_buf_desc */
#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0)
/* fast_mr_reg_req */
#define EFA_IO_FAST_MR_REG_REQ_LOCAL_WRITE_ENABLE_MASK BIT(0)
#define EFA_IO_FAST_MR_REG_REQ_REMOTE_WRITE_ENABLE_MASK BIT(1)
#define EFA_IO_FAST_MR_REG_REQ_REMOTE_READ_ENABLE_MASK BIT(2)
#define EFA_IO_FAST_MR_REG_REQ_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0)
#define EFA_IO_FAST_MR_REG_REQ_PBL_MODE_MASK GENMASK(6, 5)
/* rx_desc */
#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0)
#define EFA_IO_RX_DESC_FIRST_MASK BIT(30)
@ -301,5 +386,6 @@ struct efa_io_rx_cdesc_ex {
#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1)
#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3)
#define EFA_IO_CDESC_COMMON_OP_TYPE_MASK GENMASK(6, 4)
#define EFA_IO_CDESC_COMMON_UNSOLICITED_MASK BIT(7)
#endif /* _EFA_IO_H_ */

View File

@ -85,6 +85,8 @@ static const struct rdma_stat_desc efa_port_stats_descs[] = {
EFA_DEFINE_PORT_STATS(EFA_STATS_STR)
};
#define EFA_DEFAULT_LINK_SPEED_GBPS 100
#define EFA_CHUNK_PAYLOAD_SHIFT 12
#define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
#define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
@ -277,10 +279,47 @@ int efa_query_device(struct ib_device *ibdev,
return 0;
}
static void efa_link_gbps_to_speed_and_width(u16 gbps,
enum ib_port_speed *speed,
enum ib_port_width *width)
{
if (gbps >= 400) {
*width = IB_WIDTH_8X;
*speed = IB_SPEED_HDR;
} else if (gbps >= 200) {
*width = IB_WIDTH_4X;
*speed = IB_SPEED_HDR;
} else if (gbps >= 120) {
*width = IB_WIDTH_12X;
*speed = IB_SPEED_FDR10;
} else if (gbps >= 100) {
*width = IB_WIDTH_4X;
*speed = IB_SPEED_EDR;
} else if (gbps >= 60) {
*width = IB_WIDTH_12X;
*speed = IB_SPEED_DDR;
} else if (gbps >= 50) {
*width = IB_WIDTH_1X;
*speed = IB_SPEED_HDR;
} else if (gbps >= 40) {
*width = IB_WIDTH_4X;
*speed = IB_SPEED_FDR10;
} else if (gbps >= 30) {
*width = IB_WIDTH_12X;
*speed = IB_SPEED_SDR;
} else {
*width = IB_WIDTH_1X;
*speed = IB_SPEED_EDR;
}
}
int efa_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
struct efa_dev *dev = to_edev(ibdev);
enum ib_port_speed link_speed;
enum ib_port_width link_width;
u16 link_gbps;
props->lmc = 1;
@ -288,8 +327,10 @@ int efa_query_port(struct ib_device *ibdev, u32 port,
props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
props->active_speed = IB_SPEED_EDR;
props->active_width = IB_WIDTH_4X;
link_gbps = dev->dev_attr.max_link_speed_gbps ?: EFA_DEFAULT_LINK_SPEED_GBPS;
efa_link_gbps_to_speed_and_width(link_gbps, &link_speed, &link_width);
props->active_speed = link_speed;
props->active_width = link_width;
props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
props->max_msg_sz = dev->dev_attr.mtu;
@ -676,7 +717,7 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
goto err_out;
}
if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_90)) {
if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_98)) {
ibdev_dbg(&dev->ibdev,
"Incompatible ABI params, unknown fields in udata\n");
err = -EINVAL;
@ -732,6 +773,8 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
create_qp_params.rq_base_addr = qp->rq_dma_addr;
}
create_qp_params.sl = cmd.sl;
if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV)
create_qp_params.unsolicited_write_recv = true;
@ -1167,7 +1210,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
params.uarn = cq->ucontext->uarn;
params.cq_depth = entries;
params.sub_cq_depth = entries;
params.dma_addr = cq->dma_addr;
params.entry_size_in_bytes = cmd.cq_entry_size;
params.num_sub_cqs = cmd.num_sub_cqs;

View File

@ -13235,7 +13235,7 @@ int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
/*
* Clear all interrupt sources on the chip.
*/
void clear_all_interrupts(struct hfi1_devdata *dd)
static void clear_all_interrupts(struct hfi1_devdata *dd)
{
int i;

View File

@ -1404,7 +1404,6 @@ irqreturn_t receive_context_interrupt_napi(int irq, void *data);
int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
void init_qsfp_int(struct hfi1_devdata *dd);
void clear_all_interrupts(struct hfi1_devdata *dd);
void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
void reset_interrupts(struct hfi1_devdata *dd);

View File

@ -179,8 +179,8 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC,
hr_cq->cqn);
if (ret)
dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
hr_cq->cqn);
dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n",
ret, hr_cq->cqn);
xa_erase_irq(&cq_table->array, hr_cq->cqn);

View File

@ -5,6 +5,7 @@
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/pci.h>
#include "hns_roce_device.h"
@ -86,7 +87,7 @@ void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev)
{
struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs;
dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev),
dbgfs->root = debugfs_create_dir(pci_name(hr_dev->pci_dev),
hns_roce_dbgfs_root);
create_sw_stat_debugfs(hr_dev, dbgfs->root);

View File

@ -489,12 +489,6 @@ struct hns_roce_bank {
u32 next; /* Next ID to allocate. */
};
struct hns_roce_idx_table {
u32 *spare_idx;
u32 head;
u32 tail;
};
struct hns_roce_qp_table {
struct hns_roce_hem_table qp_table;
struct hns_roce_hem_table irrl_table;
@ -503,7 +497,7 @@ struct hns_roce_qp_table {
struct mutex scc_mutex;
struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM];
struct mutex bank_mutex;
struct hns_roce_idx_table idx_table;
struct xarray dip_xa;
};
struct hns_roce_cq_table {
@ -593,6 +587,7 @@ struct hns_roce_dev;
enum {
HNS_ROCE_FLUSH_FLAG = 0,
HNS_ROCE_STOP_FLUSH_FLAG = 1,
};
struct hns_roce_work {
@ -656,6 +651,8 @@ struct hns_roce_qp {
enum hns_roce_cong_type cong_type;
u8 tc_mode;
u8 priority;
spinlock_t flush_lock;
struct hns_roce_dip *dip;
};
struct hns_roce_ib_iboe {
@ -982,8 +979,6 @@ struct hns_roce_dev {
enum hns_roce_device_state state;
struct list_head qp_list; /* list of all qps on this dev */
spinlock_t qp_list_lock; /* protect qp_list */
struct list_head dip_list; /* list of all dest ips on this dev */
spinlock_t dip_list_lock; /* protect dip_list */
struct list_head pgdir_list;
struct mutex pgdir_mutex;
@ -1289,6 +1284,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);

View File

@ -300,7 +300,7 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop,
struct hns_roce_hem_index *index)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct device *dev = hr_dev->dev;
unsigned long mhop_obj = obj;
u32 l0_idx, l1_idx, l2_idx;
u32 chunk_ba_num;
@ -331,14 +331,14 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev,
index->buf = l0_idx;
break;
default:
ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n",
table->type, mhop->hop_num);
dev_err(dev, "table %u not support mhop.hop_num = %u!\n",
table->type, mhop->hop_num);
return -EINVAL;
}
if (unlikely(index->buf >= table->num_hem)) {
ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n",
table->type, index->buf, table->num_hem);
dev_err(dev, "table %u exceed hem limt idx %llu, max %lu!\n",
table->type, index->buf, table->num_hem);
return -EINVAL;
}
@ -448,14 +448,14 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop,
struct hns_roce_hem_index *index)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct device *dev = hr_dev->dev;
u32 step_idx;
int ret = 0;
if (index->inited & HEM_INDEX_L0) {
ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0);
if (ret) {
ibdev_err(ibdev, "set HEM step 0 failed!\n");
dev_err(dev, "set HEM step 0 failed!\n");
goto out;
}
}
@ -463,7 +463,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
if (index->inited & HEM_INDEX_L1) {
ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1);
if (ret) {
ibdev_err(ibdev, "set HEM step 1 failed!\n");
dev_err(dev, "set HEM step 1 failed!\n");
goto out;
}
}
@ -475,7 +475,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
step_idx = mhop->hop_num;
ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx);
if (ret)
ibdev_err(ibdev, "set HEM step last failed!\n");
dev_err(dev, "set HEM step last failed!\n");
}
out:
return ret;
@ -485,14 +485,14 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
unsigned long obj)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_hem_index index = {};
struct hns_roce_hem_mhop mhop = {};
struct device *dev = hr_dev->dev;
int ret;
ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
if (ret) {
ibdev_err(ibdev, "calc hem config failed!\n");
dev_err(dev, "calc hem config failed!\n");
return ret;
}
@ -504,7 +504,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
ret = alloc_mhop_hem(hr_dev, table, &mhop, &index);
if (ret) {
ibdev_err(ibdev, "alloc mhop hem failed!\n");
dev_err(dev, "alloc mhop hem failed!\n");
goto out;
}
@ -512,7 +512,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
if (table->type < HEM_TYPE_MTT) {
ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index);
if (ret) {
ibdev_err(ibdev, "set HEM address to HW failed!\n");
dev_err(dev, "set HEM address to HW failed!\n");
goto err_alloc;
}
}
@ -575,7 +575,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop,
struct hns_roce_hem_index *index)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct device *dev = hr_dev->dev;
u32 hop_num = mhop->hop_num;
u32 chunk_ba_num;
u32 step_idx;
@ -605,21 +605,21 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx);
if (ret)
ibdev_warn(ibdev, "failed to clear hop%u HEM, ret = %d.\n",
hop_num, ret);
dev_warn(dev, "failed to clear hop%u HEM, ret = %d.\n",
hop_num, ret);
if (index->inited & HEM_INDEX_L1) {
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1);
if (ret)
ibdev_warn(ibdev, "failed to clear HEM step 1, ret = %d.\n",
ret);
dev_warn(dev, "failed to clear HEM step 1, ret = %d.\n",
ret);
}
if (index->inited & HEM_INDEX_L0) {
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0);
if (ret)
ibdev_warn(ibdev, "failed to clear HEM step 0, ret = %d.\n",
ret);
dev_warn(dev, "failed to clear HEM step 0, ret = %d.\n",
ret);
}
}
}
@ -629,14 +629,14 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
unsigned long obj,
int check_refcount)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_hem_index index = {};
struct hns_roce_hem_mhop mhop = {};
struct device *dev = hr_dev->dev;
int ret;
ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
if (ret) {
ibdev_err(ibdev, "calc hem config failed!\n");
dev_err(dev, "calc hem config failed!\n");
return;
}
@ -672,8 +672,8 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
if (ret)
dev_warn(dev, "failed to clear HEM base address, ret = %d.\n",
ret);
dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n",
ret);
hns_roce_free_hem(hr_dev, table->hem[i]);
table->hem[i] = NULL;

View File

@ -373,19 +373,12 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
static int check_send_valid(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
if (unlikely(hr_qp->state == IB_QPS_RESET ||
hr_qp->state == IB_QPS_INIT ||
hr_qp->state == IB_QPS_RTR)) {
ibdev_err(ibdev, "failed to post WQE, QP state %u!\n",
hr_qp->state);
hr_qp->state == IB_QPS_RTR))
return -EINVAL;
} else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
ibdev_err(ibdev, "failed to post WQE, dev state %d!\n",
hr_dev->state);
else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
return -EIO;
}
return 0;
}
@ -582,7 +575,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
if (WARN_ON(ret))
return ret;
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE,
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SO,
(wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE,
@ -2560,20 +2553,19 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev)
free_link_table_buf(hr_dev, &priv->ext_llm);
}
static void free_dip_list(struct hns_roce_dev *hr_dev)
static void free_dip_entry(struct hns_roce_dev *hr_dev)
{
struct hns_roce_dip *hr_dip;
struct hns_roce_dip *tmp;
unsigned long flags;
unsigned long idx;
spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
xa_lock(&hr_dev->qp_table.dip_xa);
list_for_each_entry_safe(hr_dip, tmp, &hr_dev->dip_list, node) {
list_del(&hr_dip->node);
xa_for_each(&hr_dev->qp_table.dip_xa, idx, hr_dip) {
__xa_erase(&hr_dev->qp_table.dip_xa, hr_dip->dip_idx);
kfree(hr_dip);
}
spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
xa_unlock(&hr_dev->qp_table.dip_xa);
}
static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev)
@ -2775,8 +2767,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
IB_QPS_INIT, NULL);
if (ret) {
ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
ret);
ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n",
ret);
return ret;
}
@ -2981,7 +2973,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
hns_roce_free_link_table(hr_dev);
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09)
free_dip_list(hr_dev);
free_dip_entry(hr_dev);
}
static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev,
@ -3421,8 +3413,8 @@ static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)
ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
if (ret) {
ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n",
ret);
ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n",
ret);
return ret;
}
@ -3461,9 +3453,9 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
ret = free_mr_post_send_lp_wqe(hr_qp);
if (ret) {
ibdev_err(ibdev,
"failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
hr_qp->qpn, ret);
ibdev_err_ratelimited(ibdev,
"failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
hr_qp->qpn, ret);
break;
}
@ -3474,16 +3466,16 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
while (cqe_cnt) {
npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc);
if (npolled < 0) {
ibdev_err(ibdev,
"failed to poll cqe for free mr, remain %d cqe.\n",
cqe_cnt);
ibdev_err_ratelimited(ibdev,
"failed to poll cqe for free mr, remain %d cqe.\n",
cqe_cnt);
goto out;
}
if (time_after(jiffies, end)) {
ibdev_err(ibdev,
"failed to poll cqe for free mr and timeout, remain %d cqe.\n",
cqe_cnt);
ibdev_err_ratelimited(ibdev,
"failed to poll cqe for free mr and timeout, remain %d cqe.\n",
cqe_cnt);
goto out;
}
cqe_cnt -= npolled;
@ -4701,26 +4693,49 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask,
return 0;
}
static int alloc_dip_entry(struct xarray *dip_xa, u32 qpn)
{
struct hns_roce_dip *hr_dip;
int ret;
hr_dip = xa_load(dip_xa, qpn);
if (hr_dip)
return 0;
hr_dip = kzalloc(sizeof(*hr_dip), GFP_KERNEL);
if (!hr_dip)
return -ENOMEM;
ret = xa_err(xa_store(dip_xa, qpn, hr_dip, GFP_KERNEL));
if (ret)
kfree(hr_dip);
return ret;
}
static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
u32 *dip_idx)
{
const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx;
u32 *head = &hr_dev->qp_table.idx_table.head;
u32 *tail = &hr_dev->qp_table.idx_table.tail;
struct xarray *dip_xa = &hr_dev->qp_table.dip_xa;
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct hns_roce_dip *hr_dip;
unsigned long flags;
unsigned long idx;
int ret = 0;
spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
ret = alloc_dip_entry(dip_xa, ibqp->qp_num);
if (ret)
return ret;
spare_idx[*tail] = ibqp->qp_num;
*tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1);
xa_lock(dip_xa);
list_for_each_entry(hr_dip, &hr_dev->dip_list, node) {
if (!memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) {
xa_for_each(dip_xa, idx, hr_dip) {
if (hr_dip->qp_cnt &&
!memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) {
*dip_idx = hr_dip->dip_idx;
hr_dip->qp_cnt++;
hr_qp->dip = hr_dip;
goto out;
}
}
@ -4728,19 +4743,24 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
/* If no dgid is found, a new dip and a mapping between dgid and
* dip_idx will be created.
*/
hr_dip = kzalloc(sizeof(*hr_dip), GFP_ATOMIC);
if (!hr_dip) {
ret = -ENOMEM;
goto out;
xa_for_each(dip_xa, idx, hr_dip) {
if (hr_dip->qp_cnt)
continue;
*dip_idx = idx;
memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
hr_dip->dip_idx = idx;
hr_dip->qp_cnt++;
hr_qp->dip = hr_dip;
break;
}
memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
hr_dip->dip_idx = *dip_idx = spare_idx[*head];
*head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1);
list_add_tail(&hr_dip->node, &hr_dev->dip_list);
/* This should never happen. */
if (WARN_ON_ONCE(!hr_qp->dip))
ret = -ENOSPC;
out:
spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
xa_unlock(dip_xa);
return ret;
}
@ -5061,10 +5081,8 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
int ret = 0;
if (!check_qp_state(cur_state, new_state)) {
ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n");
if (!check_qp_state(cur_state, new_state))
return -EINVAL;
}
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
@ -5325,7 +5343,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
/* SW pass context to HW */
ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
if (ret) {
ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret);
ibdev_err_ratelimited(ibdev, "failed to modify QP, ret = %d.\n", ret);
goto out;
}
@ -5463,7 +5481,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context);
if (ret) {
ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret);
ibdev_err_ratelimited(ibdev,
"failed to query QPC, ret = %d.\n",
ret);
ret = -EINVAL;
goto out;
}
@ -5471,7 +5491,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
state = hr_reg_read(&context, QPC_QP_ST);
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
if (tmp_qp_state == -1) {
ibdev_err(ibdev, "Illegal ib_qp_state\n");
ibdev_err_ratelimited(ibdev, "Illegal ib_qp_state\n");
ret = -EINVAL;
goto out;
}
@ -5564,9 +5584,9 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
hr_qp->state, IB_QPS_RESET, udata);
if (ret)
ibdev_err(ibdev,
"failed to modify QP to RST, ret = %d.\n",
ret);
ibdev_err_ratelimited(ibdev,
"failed to modify QP to RST, ret = %d.\n",
ret);
}
send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
@ -5594,17 +5614,41 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
return ret;
}
static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp)
{
struct hns_roce_dip *hr_dip = hr_qp->dip;
xa_lock(&hr_dev->qp_table.dip_xa);
hr_dip->qp_cnt--;
if (!hr_dip->qp_cnt)
memset(hr_dip->dgid, 0, GID_LEN_V2);
xa_unlock(&hr_dev->qp_table.dip_xa);
}
int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
unsigned long flags;
int ret;
/* Make sure flush_cqe() is completed */
spin_lock_irqsave(&hr_qp->flush_lock, flags);
set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag);
spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
flush_work(&hr_qp->flush_work.work);
if (hr_qp->cong_type == CONG_TYPE_DIP)
put_dip_ctx_idx(hr_dev, hr_qp);
ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
if (ret)
ibdev_err(&hr_dev->ib_dev,
"failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
hr_qp->qpn, ret);
ibdev_err_ratelimited(&hr_dev->ib_dev,
"failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
hr_qp->qpn, ret);
hns_roce_qp_destroy(hr_dev, hr_qp, udata);
@ -5898,9 +5942,9 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret)
ibdev_err(&hr_dev->ib_dev,
"failed to process cmd when modifying CQ, ret = %d.\n",
ret);
ibdev_err_ratelimited(&hr_dev->ib_dev,
"failed to process cmd when modifying CQ, ret = %d.\n",
ret);
err_out:
if (ret)
@ -5924,9 +5968,9 @@ static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn,
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
HNS_ROCE_CMD_QUERY_CQC, cqn);
if (ret) {
ibdev_err(&hr_dev->ib_dev,
"failed to process cmd when querying CQ, ret = %d.\n",
ret);
ibdev_err_ratelimited(&hr_dev->ib_dev,
"failed to process cmd when querying CQ, ret = %d.\n",
ret);
goto err_mailbox;
}
@ -5967,11 +6011,10 @@ static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key,
return ret;
}
static void hns_roce_irq_work_handle(struct work_struct *work)
static void dump_aeqe_log(struct hns_roce_work *irq_work)
{
struct hns_roce_work *irq_work =
container_of(work, struct hns_roce_work, work);
struct ib_device *ibdev = &irq_work->hr_dev->ib_dev;
struct hns_roce_dev *hr_dev = irq_work->hr_dev;
struct ib_device *ibdev = &hr_dev->ib_dev;
switch (irq_work->event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@ -6015,6 +6058,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
ibdev_warn(ibdev, "DB overflow.\n");
break;
case HNS_ROCE_EVENT_TYPE_MB:
break;
case HNS_ROCE_EVENT_TYPE_FLR:
ibdev_warn(ibdev, "function level reset.\n");
break;
@ -6025,8 +6070,46 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
ibdev_err(ibdev, "invalid xrceth error.\n");
break;
default:
ibdev_info(ibdev, "Undefined event %d.\n",
irq_work->event_type);
break;
}
}
static void hns_roce_irq_work_handle(struct work_struct *work)
{
struct hns_roce_work *irq_work =
container_of(work, struct hns_roce_work, work);
struct hns_roce_dev *hr_dev = irq_work->hr_dev;
int event_type = irq_work->event_type;
u32 queue_num = irq_work->queue_num;
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
case HNS_ROCE_EVENT_TYPE_COMM_EST:
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
hns_roce_qp_event(hr_dev, queue_num, event_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
hns_roce_srq_event(hr_dev, queue_num, event_type);
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
hns_roce_cq_event(hr_dev, queue_num, event_type);
break;
default:
break;
}
dump_aeqe_log(irq_work);
kfree(irq_work);
}
@ -6087,14 +6170,14 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq)
{
struct device *dev = hr_dev->dev;
struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
irqreturn_t aeqe_found = IRQ_NONE;
int num_aeqes = 0;
int event_type;
u32 queue_num;
int sub_type;
while (aeqe) {
while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) {
/* Make sure we read AEQ entry after we have checked the
* ownership bit
*/
@ -6105,25 +6188,12 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
case HNS_ROCE_EVENT_TYPE_COMM_EST:
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
hns_roce_qp_event(hr_dev, queue_num, event_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
hns_roce_srq_event(hr_dev, queue_num, event_type);
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
hns_roce_cq_event(hr_dev, queue_num, event_type);
hns_roce_flush_cqe(hr_dev, queue_num);
break;
case HNS_ROCE_EVENT_TYPE_MB:
hns_roce_cmd_event(hr_dev,
@ -6131,12 +6201,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
aeqe->event.cmd.status,
le64_to_cpu(aeqe->event.cmd.out_param));
break;
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
case HNS_ROCE_EVENT_TYPE_FLR:
break;
default:
dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n",
event_type, eq->eqn, eq->cons_index);
break;
}
@ -6150,6 +6215,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
aeqe = next_aeqe_sw_v2(eq);
++num_aeqes;
}
update_eq_db(eq);
@ -6699,6 +6765,9 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
int ret;
int i;
if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET)
return -EINVAL;
other_num = hr_dev->caps.num_other_vectors;
comp_num = hr_dev->caps.num_comp_vectors;
aeq_num = hr_dev->caps.num_aeq_vectors;
@ -7017,6 +7086,7 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
}
static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
{
struct hns_roce_dev *hr_dev;
@ -7035,6 +7105,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
hr_dev->active = false;
hr_dev->dis_db = true;
rdma_user_mmap_disassociate(&hr_dev->ib_dev);
hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
return 0;

View File

@ -85,6 +85,11 @@
#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
/* budget must be smaller than aeqe_depth to guarantee that we update
* the ci before we polled all the entries in the EQ.
*/
#define HNS_AEQ_POLLING_BUDGET 64
enum {
HNS_ROCE_CMD_FLAG_IN = BIT(0),
HNS_ROCE_CMD_FLAG_OUT = BIT(1),
@ -919,6 +924,7 @@ struct hns_roce_v2_rc_send_wqe {
#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10)
#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
@ -1342,7 +1348,7 @@ struct hns_roce_v2_priv {
struct hns_roce_dip {
u8 dgid[GID_LEN_V2];
u32 dip_idx;
struct list_head node; /* all dips are on a list */
u32 qp_cnt;
};
struct fmea_ram_ecc {

View File

@ -466,6 +466,11 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
pgprot_t prot;
int ret;
if (hr_dev->dis_db) {
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
return -EPERM;
}
rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
if (!rdma_entry) {
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
@ -1130,8 +1135,6 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
INIT_LIST_HEAD(&hr_dev->qp_list);
spin_lock_init(&hr_dev->qp_list_lock);
INIT_LIST_HEAD(&hr_dev->dip_list);
spin_lock_init(&hr_dev->dip_list_lock);
ret = hns_roce_register_device(hr_dev);
if (ret)

View File

@ -138,8 +138,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr
key_to_hw_index(mr->key) &
(hr_dev->caps.num_mtpts - 1));
if (ret)
ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n",
ret);
ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n",
ret);
}
free_mr_pbl(hr_dev, mr);
@ -435,15 +435,16 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
}
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset)
unsigned int *sg_offset_p)
{
unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_mr *mr = to_hr_mr(ibmr);
struct hns_roce_mtr *mtr = &mr->pbl_mtr;
int ret, sg_num = 0;
if (!IS_ALIGNED(*sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) ||
if (!IS_ALIGNED(sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) ||
ibmr->page_size < HNS_HW_PAGE_SIZE ||
ibmr->page_size > HNS_HW_MAX_PAGE_SIZE)
return sg_num;
@ -454,7 +455,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
if (!mr->page_list)
return sg_num;
sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset_p, hns_roce_set_page);
if (sg_num < 1) {
ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num);

View File

@ -39,6 +39,25 @@
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev,
u32 qpn)
{
struct device *dev = hr_dev->dev;
struct hns_roce_qp *qp;
unsigned long flags;
xa_lock_irqsave(&hr_dev->qp_table_xa, flags);
qp = __hns_roce_qp_lookup(hr_dev, qpn);
if (qp)
refcount_inc(&qp->refcount);
xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags);
if (!qp)
dev_warn(dev, "async event for bogus QP %08x\n", qpn);
return qp;
}
static void flush_work_handle(struct work_struct *work)
{
struct hns_roce_work *flush_work = container_of(work,
@ -71,11 +90,18 @@ static void flush_work_handle(struct work_struct *work)
void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
struct hns_roce_work *flush_work = &hr_qp->flush_work;
unsigned long flags;
spin_lock_irqsave(&hr_qp->flush_lock, flags);
/* Exit directly after destroy_qp() */
if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) {
spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
return;
}
flush_work->hr_dev = hr_dev;
INIT_WORK(&flush_work->work, flush_work_handle);
refcount_inc(&hr_qp->refcount);
queue_work(hr_dev->irq_workq, &flush_work->work);
spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
}
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
@ -95,29 +121,11 @@ void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
{
struct device *dev = hr_dev->dev;
struct hns_roce_qp *qp;
xa_lock(&hr_dev->qp_table_xa);
qp = __hns_roce_qp_lookup(hr_dev, qpn);
if (qp)
refcount_inc(&qp->refcount);
xa_unlock(&hr_dev->qp_table_xa);
if (!qp) {
dev_warn(dev, "async event for bogus QP %08x\n", qpn);
qp = hns_roce_qp_lookup(hr_dev, qpn);
if (!qp)
return;
}
if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) {
qp->state = IB_QPS_ERR;
flush_cqe(hr_dev, qp);
}
qp->event(qp, (enum hns_roce_event)event_type);
@ -125,6 +133,21 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
complete(&qp->free);
}
void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn)
{
struct hns_roce_qp *qp;
qp = hns_roce_qp_lookup(hr_dev, qpn);
if (!qp)
return;
qp->state = IB_QPS_ERR;
flush_cqe(hr_dev, qp);
if (refcount_dec_and_test(&qp->refcount))
complete(&qp->free);
}
static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
enum hns_roce_event type)
{
@ -1124,6 +1147,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
struct ib_udata *udata,
struct hns_roce_qp *hr_qp)
{
struct hns_roce_work *flush_work = &hr_qp->flush_work;
struct hns_roce_ib_create_qp_resp resp = {};
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_ib_create_qp ucmd = {};
@ -1132,9 +1156,12 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
mutex_init(&hr_qp->mutex);
spin_lock_init(&hr_qp->sq.lock);
spin_lock_init(&hr_qp->rq.lock);
spin_lock_init(&hr_qp->flush_lock);
hr_qp->state = IB_QPS_RESET;
hr_qp->flush_flag = 0;
flush_work->hr_dev = hr_dev;
INIT_WORK(&flush_work->work, flush_work_handle);
if (init_attr->create_flags)
return -EOPNOTSUPP;
@ -1546,14 +1573,10 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
unsigned int reserved_from_bot;
unsigned int i;
qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps,
sizeof(u32), GFP_KERNEL);
if (!qp_table->idx_table.spare_idx)
return -ENOMEM;
mutex_init(&qp_table->scc_mutex);
mutex_init(&qp_table->bank_mutex);
xa_init(&hr_dev->qp_table_xa);
xa_init(&qp_table->dip_xa);
reserved_from_bot = hr_dev->caps.reserved_qps;
@ -1578,7 +1601,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
ida_destroy(&hr_dev->qp_table.bank[i].ida);
xa_destroy(&hr_dev->qp_table.dip_xa);
mutex_destroy(&hr_dev->qp_table.bank_mutex);
mutex_destroy(&hr_dev->qp_table.scc_mutex);
kfree(hr_dev->qp_table.idx_table.spare_idx);
}

View File

@ -151,8 +151,8 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ,
srq->srqn);
if (ret)
dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
ret, srq->srqn);
dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
ret, srq->srqn);
xa_erase_irq(&srq_table->xa, srq->srqn);

View File

@ -27,6 +27,19 @@ enum devx_obj_flags {
DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
DEVX_OBJ_FLAGS_DCT = 1 << 1,
DEVX_OBJ_FLAGS_CQ = 1 << 2,
DEVX_OBJ_FLAGS_HW_FREED = 1 << 3,
};
#define MAX_ASYNC_CMDS 8
struct mlx5_async_cmd {
struct ib_uobject *uobject;
void *in;
int in_size;
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
int err;
struct mlx5_async_work cb_work;
struct completion comp;
};
struct devx_async_data {
@ -1405,7 +1418,9 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
*/
mlx5r_deref_wait_odp_mkey(&obj->mkey);
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
if (obj->flags & DEVX_OBJ_FLAGS_HW_FREED)
ret = 0;
else if (obj->flags & DEVX_OBJ_FLAGS_DCT)
ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
@ -2595,6 +2610,82 @@ void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
}
}
static void devx_async_destroy_cb(int status, struct mlx5_async_work *context)
{
struct mlx5_async_cmd *devx_out = container_of(context,
struct mlx5_async_cmd, cb_work);
struct devx_obj *obj = devx_out->uobject->object;
if (!status)
obj->flags |= DEVX_OBJ_FLAGS_HW_FREED;
complete(&devx_out->comp);
}
static void devx_async_destroy(struct mlx5_ib_dev *dev,
struct mlx5_async_cmd *cmd)
{
init_completion(&cmd->comp);
cmd->err = mlx5_cmd_exec_cb(&dev->async_ctx, cmd->in, cmd->in_size,
&cmd->out, sizeof(cmd->out),
devx_async_destroy_cb, &cmd->cb_work);
}
static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd)
{
if (!cmd->err)
wait_for_completion(&cmd->comp);
atomic_set(&cmd->uobject->usecnt, 0);
}
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
{
struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS];
struct ib_ucontext *ucontext = ufile->ucontext;
struct ib_device *device = ucontext->device;
struct mlx5_ib_dev *dev = to_mdev(device);
struct ib_uobject *uobject;
struct devx_obj *obj;
int head = 0;
int tail = 0;
list_for_each_entry(uobject, &ufile->uobjects, list) {
WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE));
/*
* Currently we only support QP destruction, if other objects
* are to be destroyed need to add type synchronization to the
* cleanup algorithm and handle pre/post FW cleanup for the
* new types if needed.
*/
if (uobj_get_object_id(uobject) != MLX5_IB_OBJECT_DEVX_OBJ ||
(get_dec_obj_type(uobject->object, MLX5_EVENT_TYPE_MAX) !=
MLX5_OBJ_TYPE_QP)) {
atomic_set(&uobject->usecnt, 0);
continue;
}
obj = uobject->object;
async_cmd[tail % MAX_ASYNC_CMDS].in = obj->dinbox;
async_cmd[tail % MAX_ASYNC_CMDS].in_size = obj->dinlen;
async_cmd[tail % MAX_ASYNC_CMDS].uobject = uobject;
devx_async_destroy(dev, &async_cmd[tail % MAX_ASYNC_CMDS]);
tail++;
if (tail - head == MAX_ASYNC_CMDS) {
devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
head++;
}
}
while (head != tail) {
devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
head++;
}
}
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{

View File

@ -28,6 +28,7 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile);
#else
static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
{
@ -41,5 +42,8 @@ static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
{
}
static inline void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
{
}
#endif
#endif /* _MLX5_IB_DEVX_H */

View File

@ -278,7 +278,13 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
goto done;
}
err = query_ib_ppcnt(mdev, mdev_port_num, 0, out_cnt, sz, 0);
if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI)
err = query_ib_ppcnt(mdev, mdev_port_num, port_num,
out_cnt, sz, 0);
else
err = query_ib_ppcnt(mdev, mdev_port_num, 0,
out_cnt, sz, 0);
if (!err)
pma_cnt_assign(pma_cnt, out_cnt);
}

View File

@ -1182,6 +1182,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
if (MLX5_CAP_GEN_2(mdev, dp_ordering_force) &&
(MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc) ||
MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc) ||
MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc) ||
MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud) ||
MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc)))
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP;
}
if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) {
@ -2997,7 +3005,6 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_resources *devr = &dev->devr;
int port;
int ret;
if (!MLX5_CAP_GEN(dev->mdev, xrc))
@ -3013,10 +3020,6 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
return ret;
}
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
INIT_WORK(&devr->ports[port].pkey_change_work,
pkey_change_handler);
mutex_init(&devr->cq_lock);
mutex_init(&devr->srq_lock);
@ -3026,16 +3029,6 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_resources *devr = &dev->devr;
int port;
/*
* Make sure no change P_Key work items are still executing.
*
* At this stage, the mlx5_ib_event should be unregistered
* and it ensures that no new works are added.
*/
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
cancel_work_sync(&devr->ports[port].pkey_change_work);
/* After s0/s1 init, they are not unset during the device lifetime. */
if (devr->s1) {
@ -3211,12 +3204,14 @@ static int lag_event(struct notifier_block *nb, unsigned long event, void *data)
struct mlx5_ib_dev *dev = container_of(nb, struct mlx5_ib_dev,
lag_events);
struct mlx5_core_dev *mdev = dev->mdev;
struct ib_device *ibdev = &dev->ib_dev;
struct net_device *old_ndev = NULL;
struct mlx5_ib_port *port;
struct net_device *ndev;
int i, err;
int portnum;
u32 portnum = 0;
int ret = 0;
int i;
portnum = 0;
switch (event) {
case MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE:
ndev = data;
@ -3232,19 +3227,24 @@ static int lag_event(struct notifier_block *nb, unsigned long event, void *data)
}
}
}
err = ib_device_set_netdev(&dev->ib_dev, ndev,
portnum + 1);
dev_put(ndev);
if (err)
return err;
/* Rescan gids after new netdev assignment */
rdma_roce_rescan_device(&dev->ib_dev);
old_ndev = ib_device_get_netdev(ibdev, portnum + 1);
ret = ib_device_set_netdev(ibdev, ndev, portnum + 1);
if (ret)
goto out;
if (old_ndev)
roce_del_all_netdev_gids(ibdev, portnum + 1,
old_ndev);
rdma_roce_rescan_port(ibdev, portnum + 1);
}
break;
default:
return NOTIFY_DONE;
}
return NOTIFY_OK;
out:
dev_put(old_ndev);
return notifier_from_errno(ret);
}
static void mlx5e_lag_event_register(struct mlx5_ib_dev *dev)
@ -4134,6 +4134,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.req_notify_cq = mlx5_ib_arm_cq,
.rereg_user_mr = mlx5_ib_rereg_user_mr,
.resize_cq = mlx5_ib_resize_cq,
.ufile_hw_cleanup = mlx5_ib_ufile_hw_cleanup,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
@ -4464,6 +4465,13 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_resources *devr = &dev->devr;
int port;
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
INIT_WORK(&devr->ports[port].pkey_change_work,
pkey_change_handler);
dev->mdev_events.notifier_call = mlx5_ib_event;
mlx5_notifier_register(dev->mdev, &dev->mdev_events);
@ -4474,8 +4482,14 @@ static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_resources *devr = &dev->devr;
int port;
mlx5r_macsec_event_unregister(dev);
mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
cancel_work_sync(&devr->ports[port].pkey_change_work);
}
void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
@ -4565,9 +4579,6 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_dev_res_init,
mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
mlx5_ib_odp_init_one,
mlx5_ib_odp_cleanup_one),
@ -4592,6 +4603,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
@ -4628,9 +4642,6 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_dev_res_init,
mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_counters_init,
mlx5_ib_counters_cleanup),
@ -4652,6 +4663,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),

View File

@ -521,6 +521,7 @@ struct mlx5_ib_qp {
struct mlx5_bf bf;
u8 has_rq:1;
u8 is_rss:1;
u8 is_ooo_rq:1;
/* only for user space QPs. For kernel
* we have it from the bf object
@ -972,7 +973,6 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_QP,
MLX5_IB_STAGE_SRQ,
MLX5_IB_STAGE_DEVICE_RESOURCES,
MLX5_IB_STAGE_DEVICE_NOTIFIER,
MLX5_IB_STAGE_ODP,
MLX5_IB_STAGE_COUNTERS,
MLX5_IB_STAGE_CONG_DEBUGFS,
@ -981,6 +981,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_PRE_IB_REG_UMR,
MLX5_IB_STAGE_WHITELIST_UID,
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_DEVICE_NOTIFIER,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
MLX5_IB_STAGE_RESTRACK,

View File

@ -1960,7 +1960,7 @@ static int atomic_size_to_mode(int size_mask)
}
static int get_atomic_mode(struct mlx5_ib_dev *dev,
enum ib_qp_type qp_type)
struct mlx5_ib_qp *qp)
{
u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic);
@ -1970,7 +1970,7 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev,
if (!atomic)
return -EOPNOTSUPP;
if (qp_type == MLX5_IB_QPT_DCT)
if (qp->type == MLX5_IB_QPT_DCT)
atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
else
atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
@ -1984,6 +1984,10 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev,
atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD))
atomic_mode = MLX5_ATOMIC_MODE_IB_COMP;
/* OOO DP QPs do not support larger than 8-Bytes atomic operations */
if (atomic_mode > MLX5_ATOMIC_MODE_8B && qp->is_ooo_rq)
atomic_mode = MLX5_ATOMIC_MODE_8B;
return atomic_mode;
}
@ -2839,6 +2843,29 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return 0;
}
static bool get_dp_ooo_cap(struct mlx5_core_dev *mdev, enum ib_qp_type qp_type)
{
if (!MLX5_CAP_GEN_2(mdev, dp_ordering_force))
return false;
switch (qp_type) {
case IB_QPT_RC:
return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc);
case IB_QPT_XRC_INI:
case IB_QPT_XRC_TGT:
return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc);
case IB_QPT_UC:
return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc);
case IB_QPT_UD:
return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud);
case MLX5_IB_QPT_DCI:
case MLX5_IB_QPT_DCT:
return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc);
default:
return false;
}
}
static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
bool cond, struct mlx5_ib_qp *qp)
{
@ -3365,7 +3392,7 @@ static int set_qpc_atomic_flags(struct mlx5_ib_qp *qp,
if (access_flags & IB_ACCESS_REMOTE_ATOMIC) {
int atomic_mode;
atomic_mode = get_atomic_mode(dev, qp->type);
atomic_mode = get_atomic_mode(dev, qp);
if (atomic_mode < 0)
return -EOPNOTSUPP;
@ -4316,6 +4343,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
MLX5_SET(qpc, qpc, deth_sqpn, 1);
if (qp->is_ooo_rq && cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
MLX5_SET(qpc, qpc, dp_ordering_1, 1);
MLX5_SET(qpc, qpc, dp_ordering_force, 1);
}
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
@ -4531,7 +4563,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
int atomic_mode;
atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT);
atomic_mode = get_atomic_mode(dev, qp);
if (atomic_mode < 0)
return -EOPNOTSUPP;
@ -4573,6 +4605,10 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7);
if (qp->is_ooo_rq) {
MLX5_SET(dctc, dctc, dp_ordering_1, 1);
MLX5_SET(dctc, dctc, dp_ordering_force, 1);
}
err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in,
MLX5_ST_SZ_BYTES(create_dct_in), out,
@ -4676,11 +4712,16 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
min(udata->inlen, sizeof(ucmd))))
return -EFAULT;
if (ucmd.comp_mask ||
if (ucmd.comp_mask & ~MLX5_IB_MODIFY_QP_OOO_DP ||
memchr_inv(&ucmd.burst_info.reserved, 0,
sizeof(ucmd.burst_info.reserved)))
return -EOPNOTSUPP;
if (ucmd.comp_mask & MLX5_IB_MODIFY_QP_OOO_DP) {
if (!get_dp_ooo_cap(dev->mdev, qp->type))
return -EOPNOTSUPP;
qp->is_ooo_rq = 1;
}
}
if (qp->type == IB_QPT_GSI)

View File

@ -775,6 +775,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
* Yield the processor
*/
spin_lock_irqsave(&qp->state_lock, flags);
attr->cur_qp_state = qp_state(qp);
if (qp->attr.sq_draining) {
spin_unlock_irqrestore(&qp->state_lock, flags);
cond_resched();

View File

@ -663,10 +663,12 @@ int rxe_requester(struct rxe_qp *qp)
if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
wqe = __req_next_wqe(qp);
spin_unlock_irqrestore(&qp->state_lock, flags);
if (wqe)
if (wqe) {
wqe->status = IB_WC_WR_FLUSH_ERR;
goto err;
else
} else {
goto exit;
}
}
if (unlikely(qp_state(qp) == IB_QPS_RESET)) {

View File

@ -128,16 +128,13 @@ static void ipoib_get_ethtool_stats(struct net_device *dev,
static void ipoib_get_strings(struct net_device __always_unused *dev,
u32 stringset, u8 *data)
{
u8 *p = data;
int i;
switch (stringset) {
case ETH_SS_STATS:
for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++) {
memcpy(p, ipoib_gstrings_stats[i].stat_string,
ETH_GSTRING_LEN);
p += ETH_GSTRING_LEN;
}
for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++)
ethtool_puts(&data,
ipoib_gstrings_stats[i].stat_string);
break;
default:
break;

View File

@ -49,6 +49,7 @@
#include <linux/jhash.h>
#include <net/arp.h>
#include <net/addrconf.h>
#include <net/pkt_sched.h>
#include <linux/inetdevice.h>
#include <rdma/ib_cache.h>
@ -2145,7 +2146,7 @@ void ipoib_setup_common(struct net_device *dev)
dev->hard_header_len = IPOIB_HARD_LEN;
dev->addr_len = INFINIBAND_ALEN;
dev->type = ARPHRD_INFINIBAND;
dev->tx_queue_len = ipoib_sendq_size * 2;
dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
dev->features = (NETIF_F_VLAN_CHALLENGED |
NETIF_F_HIGHDMA);
netif_keep_dst(dev);

View File

@ -164,9 +164,7 @@ static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
return;
for (i = 0; i < VNIC_STATS_LEN; i++)
memcpy(data + i * ETH_GSTRING_LEN,
vnic_gstrings_stats[i].stat_string,
ETH_GSTRING_LEN);
ethtool_puts(&data, vnic_gstrings_stats[i].stat_string);
}
/* ethtool ops */

View File

@ -8256,6 +8256,9 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
if (flags & FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED)
bp->fw_cap |= BNXT_FW_CAP_RING_MONITOR;
if (flags & FUNC_QCFG_RESP_FLAGS_ENABLE_RDMA_SRIOV)
bp->fw_cap |= BNXT_FW_CAP_ENABLE_RDMA_SRIOV;
switch (resp->port_partition_type) {
case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5:
@ -9422,6 +9425,9 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
bp->flags |= BNXT_FLAG_UDP_GSO_CAP;
if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_TX_PKT_TS_CMPL_SUPPORTED)
bp->fw_cap |= BNXT_FW_CAP_TX_TS_CMP;
if (BNXT_PF(bp) &&
(flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_ROCE_VF_RESOURCE_MGMT_SUPPORTED))
bp->fw_cap |= BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED;
bp->tx_push_thresh = 0;
if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) &&

View File

@ -2446,6 +2446,8 @@ struct bnxt {
#define BNXT_FW_CAP_DCBX_AGENT BIT_ULL(2)
#define BNXT_FW_CAP_NEW_RM BIT_ULL(3)
#define BNXT_FW_CAP_IF_CHANGE BIT_ULL(4)
#define BNXT_FW_CAP_ENABLE_RDMA_SRIOV BIT_ULL(5)
#define BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED BIT_ULL(6)
#define BNXT_FW_CAP_KONG_MB_CHNL BIT_ULL(7)
#define BNXT_FW_CAP_OVS_64BIT_HANDLE BIT_ULL(10)
#define BNXT_FW_CAP_TRUSTED_VF BIT_ULL(11)
@ -2492,6 +2494,10 @@ struct bnxt {
#define BNXT_SUPPORTS_QUEUE_API(bp) \
(BNXT_PF(bp) && BNXT_SUPPORTS_NTUPLE_VNIC(bp) && \
((bp)->fw_cap & BNXT_FW_CAP_VNIC_RE_FLUSH))
#define BNXT_RDMA_SRIOV_EN(bp) \
((bp)->fw_cap & BNXT_FW_CAP_ENABLE_RDMA_SRIOV)
#define BNXT_ROCE_VF_RESC_CAP(bp) \
((bp)->fw_cap & BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED)
u32 hwrm_spec_code;
u16 hwrm_cmd_seq;

View File

@ -520,6 +520,56 @@ static int __bnxt_set_vf_params(struct bnxt *bp, int vf_id)
return hwrm_req_send(bp, req);
}
static void bnxt_hwrm_roce_sriov_cfg(struct bnxt *bp, int num_vfs)
{
struct hwrm_func_qcaps_output *resp;
struct hwrm_func_cfg_input *cfg_req;
struct hwrm_func_qcaps_input *req;
int rc;
rc = hwrm_req_init(bp, req, HWRM_FUNC_QCAPS);
if (rc)
return;
req->fid = cpu_to_le16(0xffff);
resp = hwrm_req_hold(bp, req);
rc = hwrm_req_send(bp, req);
if (rc)
goto err;
rc = hwrm_req_init(bp, cfg_req, HWRM_FUNC_CFG);
if (rc)
goto err;
cfg_req->fid = cpu_to_le16(0xffff);
cfg_req->enables2 =
cpu_to_le32(FUNC_CFG_REQ_ENABLES2_ROCE_MAX_AV_PER_VF |
FUNC_CFG_REQ_ENABLES2_ROCE_MAX_CQ_PER_VF |
FUNC_CFG_REQ_ENABLES2_ROCE_MAX_MRW_PER_VF |
FUNC_CFG_REQ_ENABLES2_ROCE_MAX_QP_PER_VF |
FUNC_CFG_REQ_ENABLES2_ROCE_MAX_SRQ_PER_VF |
FUNC_CFG_REQ_ENABLES2_ROCE_MAX_GID_PER_VF);
cfg_req->roce_max_av_per_vf =
cpu_to_le32(le32_to_cpu(resp->roce_vf_max_av) / num_vfs);
cfg_req->roce_max_cq_per_vf =
cpu_to_le32(le32_to_cpu(resp->roce_vf_max_cq) / num_vfs);
cfg_req->roce_max_mrw_per_vf =
cpu_to_le32(le32_to_cpu(resp->roce_vf_max_mrw) / num_vfs);
cfg_req->roce_max_qp_per_vf =
cpu_to_le32(le32_to_cpu(resp->roce_vf_max_qp) / num_vfs);
cfg_req->roce_max_srq_per_vf =
cpu_to_le32(le32_to_cpu(resp->roce_vf_max_srq) / num_vfs);
cfg_req->roce_max_gid_per_vf =
cpu_to_le32(le32_to_cpu(resp->roce_vf_max_gid) / num_vfs);
rc = hwrm_req_send(bp, cfg_req);
err:
hwrm_req_drop(bp, req);
if (rc)
netdev_err(bp->dev, "RoCE sriov configuration failed\n");
}
/* Only called by PF to reserve resources for VFs, returns actual number of
* VFs configured, or < 0 on error.
*/
@ -759,6 +809,9 @@ int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset)
*num_vfs = rc;
}
if (BNXT_RDMA_SRIOV_EN(bp) && BNXT_ROCE_VF_RESC_CAP(bp))
bnxt_hwrm_roce_sriov_cfg(bp, *num_vfs);
return 0;
}

View File

@ -414,6 +414,8 @@ static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp)
edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP;
if (bp->flags & BNXT_FLAG_VF)
edev->flags |= BNXT_EN_FLAG_VF;
if (BNXT_ROCE_VF_RESC_CAP(bp))
edev->flags |= BNXT_EN_FLAG_ROCE_VF_RES_MGMT;
edev->chip_num = bp->chip_num;
edev->hw_ring_stats_size = bp->hw_ring_stats_size;

View File

@ -64,6 +64,7 @@ struct bnxt_en_dev {
#define BNXT_EN_FLAG_ULP_STOPPED 0x8
#define BNXT_EN_FLAG_VF 0x10
#define BNXT_EN_VF(edev) ((edev)->flags & BNXT_EN_FLAG_VF)
#define BNXT_EN_FLAG_ROCE_VF_RES_MGMT 0x20
struct bnxt_ulp *ulp_tbl;
int l2_db_size; /* Doorbell BAR size in

View File

@ -516,6 +516,7 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
blocking_notifier_call_chain(&dev0->priv.lag_nh,
MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
ndev);
dev_put(ndev);
}
}
@ -918,6 +919,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct lag_tracker tracker = { };
struct net_device *ndev;
bool do_bond, roce_lag;
int err;
int i;
@ -981,6 +983,16 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
return;
}
}
if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
ndev = mlx5_lag_active_backup_get_netdev(dev0);
/** Only sriov and roce lag should have tracker->TX_type
* set so no need to check the mode
*/
blocking_notifier_call_chain(&dev0->priv.lag_nh,
MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
ndev);
dev_put(ndev);
}
} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
mlx5_modify_lag(ldev, &tracker);
} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {

View File

@ -1872,7 +1872,11 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_328[0x2];
u8 relaxed_ordering_read[0x1];
u8 log_max_pd[0x5];
u8 reserved_at_330[0x5];
u8 dp_ordering_ooo_all_ud[0x1];
u8 dp_ordering_ooo_all_uc[0x1];
u8 dp_ordering_ooo_all_xrc[0x1];
u8 dp_ordering_ooo_all_dc[0x1];
u8 dp_ordering_ooo_all_rc[0x1];
u8 pcie_reset_using_hotreset_method[0x1];
u8 pci_sync_for_fw_update_with_driver_unload[0x1];
u8 vnic_env_cnt_steering_fail[0x1];
@ -2094,7 +2098,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 reserved_at_0[0x80];
u8 migratable[0x1];
u8 reserved_at_81[0x11];
u8 reserved_at_81[0x7];
u8 dp_ordering_force[0x1];
u8 reserved_at_89[0x9];
u8 query_vuid[0x1];
u8 reserved_at_93[0x5];
u8 umr_log_entity_size_5[0x1];
@ -3524,7 +3530,8 @@ struct mlx5_ifc_qpc_bits {
u8 latency_sensitive[0x1];
u8 reserved_at_24[0x1];
u8 drain_sigerr[0x1];
u8 reserved_at_26[0x2];
u8 reserved_at_26[0x1];
u8 dp_ordering_force[0x1];
u8 pd[0x18];
u8 mtu[0x3];
@ -3597,7 +3604,8 @@ struct mlx5_ifc_qpc_bits {
u8 rae[0x1];
u8 reserved_at_493[0x1];
u8 page_offset[0x6];
u8 reserved_at_49a[0x3];
u8 reserved_at_49a[0x2];
u8 dp_ordering_1[0x1];
u8 cd_slave_receive[0x1];
u8 cd_slave_send[0x1];
u8 cd_master[0x1];
@ -4543,7 +4551,8 @@ struct mlx5_ifc_dctc_bits {
u8 state[0x4];
u8 reserved_at_8[0x18];
u8 reserved_at_20[0x8];
u8 reserved_at_20[0x7];
u8 dp_ordering_force[0x1];
u8 user_index[0x18];
u8 reserved_at_40[0x8];
@ -4558,7 +4567,9 @@ struct mlx5_ifc_dctc_bits {
u8 latency_sensitive[0x1];
u8 rlky[0x1];
u8 free_ar[0x1];
u8 reserved_at_73[0xd];
u8 reserved_at_73[0x1];
u8 dp_ordering_1[0x1];
u8 reserved_at_75[0xb];
u8 reserved_at_80[0x8];
u8 cs_res[0x8];

View File

@ -2675,6 +2675,12 @@ struct ib_device_ops {
*/
void (*del_sub_dev)(struct ib_device *sub_dev);
/**
* ufile_cleanup - Attempt to cleanup ubojects HW resources inside
* the ufile.
*/
void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile);
DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_counters);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
@ -2948,6 +2954,14 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
size_t length, u32 min_pgoff,
u32 max_pgoff);
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
void rdma_user_mmap_disassociate(struct ib_device *device);
#else
static inline void rdma_user_mmap_disassociate(struct ib_device *device)
{
}
#endif
static inline int
rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext,
struct rdma_user_mmap_entry *entry,
@ -4726,6 +4740,9 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
* @device: the rdma device
*/
void rdma_roce_rescan_device(struct ib_device *ibdev);
void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port);
void roce_del_all_netdev_gids(struct ib_device *ib_dev,
u32 port, struct net_device *ndev);
struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);

View File

@ -134,6 +134,8 @@ static inline void uverbs_uobject_get(struct ib_uobject *uobject)
}
void uverbs_uobject_put(struct ib_uobject *uobject);
int uverbs_try_lock_object(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
struct uverbs_obj_fd_type {
/*
* In fd based objects, uverbs_obj_type_ops points to generic
@ -150,6 +152,37 @@ struct uverbs_obj_fd_type {
int flags;
};
struct ib_uverbs_file {
struct kref ref;
struct ib_uverbs_device *device;
struct mutex ucontext_lock;
/*
* ucontext must be accessed via ib_uverbs_get_ucontext() or with
* ucontext_lock held
*/
struct ib_ucontext *ucontext;
struct ib_uverbs_async_event_file *default_async_file;
struct list_head list;
/*
* To access the uobjects list hw_destroy_rwsem must be held for write
* OR hw_destroy_rwsem held for read AND uobjects_lock held.
* hw_destroy_rwsem should be called across any destruction of the HW
* object of an associated uobject.
*/
struct rw_semaphore hw_destroy_rwsem;
spinlock_t uobjects_lock;
struct list_head uobjects;
struct mutex umap_lock;
struct list_head umaps;
struct page *disassociate_page;
struct xarray idr;
struct mutex disassociation_lock;
};
extern const struct uverbs_obj_type_class uverbs_idr_class;
extern const struct uverbs_obj_type_class uverbs_fd_class;
int uverbs_uobject_fd_release(struct inode *inode, struct file *filp);

View File

@ -95,7 +95,8 @@ struct efa_ibv_create_qp {
__u32 sq_ring_size; /* bytes */
__u32 driver_qp_type;
__u16 flags;
__u8 reserved_90[6];
__u8 sl;
__u8 reserved_98[5];
};
struct efa_ibv_create_qp_resp {

View File

@ -252,6 +252,7 @@ enum mlx5_ib_query_dev_resp_flags {
MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1,
MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2,
MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3,
MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP = 1 << 4,
};
enum mlx5_ib_tunnel_offloads {
@ -439,6 +440,10 @@ struct mlx5_ib_burst_info {
__u16 reserved;
};
enum mlx5_ib_modify_qp_mask {
MLX5_IB_MODIFY_QP_OOO_DP = 1 << 0,
};
struct mlx5_ib_modify_qp {
__u32 comp_mask;
struct mlx5_ib_burst_info burst_info;

View File

@ -638,6 +638,8 @@ enum rdma_nl_notify_event_type {
RDMA_UNREGISTER_EVENT,
RDMA_NETDEV_ATTACH_EVENT,
RDMA_NETDEV_DETACH_EVENT,
RDMA_RENAME_EVENT,
RDMA_NETDEV_RENAME_EVENT,
};
#endif /* _UAPI_RDMA_NETLINK_H */