mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 07:23:14 +00:00
RDMA subsystem updates for 5.4-rc
Various minor bug fixes posted in the last couple of weeks: - Various missed memory frees and error unwind bugs - Fix regressions in a few iwarp drivers from 5.4 patches - A few regressions added in past kernels - Squash a number of races in mlx5 ODP code -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEfB7FMLh+8QxL+6i3OG33FX4gmxoFAl2d4r8ACgkQOG33FX4g mxpnnhAAgumREC20OZh/bYzHXdfLrnskznlTrcq4SaPa4We9qu2n/wbgqxlNmuWf wDSWgYHgcNHNla4Ft+U1dmH54/3EjyG0O2fCsxWvPtmoCf2DCg9Veq2R9TqVPpxq P7RG/TxVy7RMuwc5OAGaz0JeffKff6DaZcLJhLGzF/N7whnTrAWtbOr6mjChDy3V 5wF+4dLmEb1ZIb9tmEeMFAyBEuzELpSnoXKTI23z9hOMWgUX6AOa6uxX5iMeOJlq dNmiFTzE5Q0kuayO2IR0aGw1W2rxJRxf5EJYkazBDSc7hfa6PyH8KvLY3ZOBooXi O/bwoXSG09klRdQWVj3YWGlNYF8turhore6PuQco93M1R2w5CdBcUKsQZs5JCDO2 aniSEg7VTaG9nVXvaM4xW3qzkyqGHjJdJMZbr+xn2OA39WBJrAvdUtwAzZVxYrXO Jmue0qPVkt615SF5j0ARd1Z42E0D9QTQ+ifVKdDoYeguWDiujpmNb0OyxTic1RMB a+pQNTqxYd71q0RPBIiThyEm/U+5oUK/hthvprB7jAiLPuB16Taesch5gp1MONGh R0W5Fd8zvTE/CzctD6FPJNA0sApv2/Twwx/ja6OgSgFJKHlnL+q1c8MUExHbddh1 RQ+zIld9AgQWbs2MrrwAHkKXVFY8N7/zfc17bVPamlkNUWzvMTM= =sgve -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma Pull rdma fixes from Jason Gunthorpe: "The usual collection of driver bug fixes, and a few regressions from the merge window. Nothing particularly worrisome. - Various missed memory frees and error unwind bugs - Fix regressions in a few iwarp drivers from 5.4 patches - A few regressions added in past kernels - Squash a number of races in mlx5 ODP code" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: RDMA/mlx5: Add missing synchronize_srcu() for MW cases RDMA/mlx5: Put live in the correct place for ODP MRs RDMA/mlx5: Order num_pending_prefetch properly with synchronize_srcu RDMA/odp: Lift umem_mutex out of ib_umem_odp_unmap_dma_pages() RDMA/mlx5: Fix a race with mlx5_ib_update_xlt on an implicit MR RDMA/mlx5: Do not allow rereg of a ODP MR IB/core: Fix wrong iterating on ports RDMA/nldev: Reshuffle the code to avoid need to rebind QP in error path RDMA/cxgb4: Do not dma memory off of the stack RDMA/cm: Fix memory leak in cm_add/remove_one RDMA/core: Fix an error handling path in 'res_get_common_doit()' RDMA/i40iw: Associate ibdev to netdev before IB device registration RDMA/iwcm: Fix a lock inversion issue RDMA/iw_cxgb4: fix SRQ access from dump_qp() RDMA/hfi1: Prevent memory leak in sdma_init RDMA/core: Fix use after free and refcnt leak on ndev in_device in iwarp_query_port RDMA/siw: Fix serialization issue in write_space() RDMA/vmw_pvrdma: Free SRQ only once
This commit is contained in:
commit
8a8c600de5
@ -4399,6 +4399,7 @@ static void cm_add_one(struct ib_device *ib_device)
|
||||
error1:
|
||||
port_modify.set_port_cap_mask = 0;
|
||||
port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
|
||||
kfree(port);
|
||||
while (--i) {
|
||||
if (!rdma_cap_ib_cm(ib_device, i))
|
||||
continue;
|
||||
@ -4407,6 +4408,7 @@ static void cm_add_one(struct ib_device *ib_device)
|
||||
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
|
||||
ib_unregister_mad_agent(port->mad_agent);
|
||||
cm_remove_port_fs(port);
|
||||
kfree(port);
|
||||
}
|
||||
free:
|
||||
kfree(cm_dev);
|
||||
@ -4460,6 +4462,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
|
||||
spin_unlock_irq(&cm.state_lock);
|
||||
ib_unregister_mad_agent(cur_mad_agent);
|
||||
cm_remove_port_fs(port);
|
||||
kfree(port);
|
||||
}
|
||||
|
||||
kfree(cm_dev);
|
||||
|
@ -2396,9 +2396,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
|
||||
conn_id->cm_id.iw = NULL;
|
||||
cma_exch(conn_id, RDMA_CM_DESTROYING);
|
||||
mutex_unlock(&conn_id->handler_mutex);
|
||||
mutex_unlock(&listen_id->handler_mutex);
|
||||
cma_deref_id(conn_id);
|
||||
rdma_destroy_id(&conn_id->id);
|
||||
goto out;
|
||||
return ret;
|
||||
}
|
||||
|
||||
mutex_unlock(&conn_id->handler_mutex);
|
||||
|
@ -1987,8 +1987,6 @@ static int iw_query_port(struct ib_device *device,
|
||||
if (!netdev)
|
||||
return -ENODEV;
|
||||
|
||||
dev_put(netdev);
|
||||
|
||||
port_attr->max_mtu = IB_MTU_4096;
|
||||
port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
|
||||
|
||||
@ -1996,19 +1994,22 @@ static int iw_query_port(struct ib_device *device,
|
||||
port_attr->state = IB_PORT_DOWN;
|
||||
port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
|
||||
} else {
|
||||
inetdev = in_dev_get(netdev);
|
||||
rcu_read_lock();
|
||||
inetdev = __in_dev_get_rcu(netdev);
|
||||
|
||||
if (inetdev && inetdev->ifa_list) {
|
||||
port_attr->state = IB_PORT_ACTIVE;
|
||||
port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
|
||||
in_dev_put(inetdev);
|
||||
} else {
|
||||
port_attr->state = IB_PORT_INIT;
|
||||
port_attr->phys_state =
|
||||
IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
dev_put(netdev);
|
||||
err = device->ops.query_port(device, port_num, port_attr);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -1230,7 +1230,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
||||
if (!msg) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
goto err_get;
|
||||
}
|
||||
|
||||
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
||||
@ -1787,10 +1787,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
|
||||
cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
|
||||
qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
|
||||
ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
|
||||
if (ret)
|
||||
goto err_unbind;
|
||||
|
||||
if (fill_nldev_handle(msg, device) ||
|
||||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
|
||||
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
|
||||
@ -1799,13 +1795,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
goto err_fill;
|
||||
}
|
||||
|
||||
ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
|
||||
if (ret)
|
||||
goto err_fill;
|
||||
|
||||
nlmsg_end(msg, nlh);
|
||||
ib_device_put(device);
|
||||
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
||||
|
||||
err_fill:
|
||||
rdma_counter_bind_qpn(device, port, qpn, cntn);
|
||||
err_unbind:
|
||||
nlmsg_free(msg);
|
||||
err:
|
||||
ib_device_put(device);
|
||||
|
@ -426,7 +426,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
|
||||
int ret;
|
||||
|
||||
rdma_for_each_port (dev, i) {
|
||||
is_ib = rdma_protocol_ib(dev, i++);
|
||||
is_ib = rdma_protocol_ib(dev, i);
|
||||
if (is_ib)
|
||||
break;
|
||||
}
|
||||
|
@ -451,8 +451,10 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
|
||||
* that the hardware will not attempt to access the MR any more.
|
||||
*/
|
||||
if (!umem_odp->is_implicit_odp) {
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
|
||||
ib_umem_end(umem_odp));
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
kvfree(umem_odp->dma_list);
|
||||
kvfree(umem_odp->page_list);
|
||||
}
|
||||
@ -719,6 +721,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
|
||||
u64 addr;
|
||||
struct ib_device *dev = umem_odp->umem.ibdev;
|
||||
|
||||
lockdep_assert_held(&umem_odp->umem_mutex);
|
||||
|
||||
virt = max_t(u64, virt, ib_umem_start(umem_odp));
|
||||
bound = min_t(u64, bound, ib_umem_end(umem_odp));
|
||||
/* Note that during the run of this function, the
|
||||
@ -726,7 +730,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
|
||||
* faults from completion. We might be racing with other
|
||||
* invalidations, so we must make sure we free each page only
|
||||
* once. */
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
|
||||
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
|
||||
if (umem_odp->page_list[idx]) {
|
||||
@ -757,7 +760,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
|
||||
umem_odp->npages--;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
|
||||
|
||||
|
@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep,
|
||||
}
|
||||
}
|
||||
|
||||
static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd)
|
||||
static int dump_qp(unsigned long id, struct c4iw_qp *qp,
|
||||
struct c4iw_debugfs_data *qpd)
|
||||
{
|
||||
int space;
|
||||
int cc;
|
||||
if (id != qp->wq.sq.qid)
|
||||
return 0;
|
||||
|
||||
space = qpd->bufsize - qpd->pos - 1;
|
||||
if (space == 0)
|
||||
@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file)
|
||||
|
||||
xa_lock_irq(&qpd->devp->qps);
|
||||
xa_for_each(&qpd->devp->qps, index, qp)
|
||||
dump_qp(qp, qpd);
|
||||
dump_qp(index, qp, qpd);
|
||||
xa_unlock_irq(&qpd->devp->qps);
|
||||
|
||||
qpd->buf[qpd->pos++] = 0;
|
||||
|
@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
|
||||
struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp)
|
||||
{
|
||||
int err;
|
||||
struct fw_ri_tpte tpt;
|
||||
struct fw_ri_tpte *tpt;
|
||||
u32 stag_idx;
|
||||
static atomic_t key;
|
||||
|
||||
if (c4iw_fatal_error(rdev))
|
||||
return -EIO;
|
||||
|
||||
tpt = kmalloc(sizeof(*tpt), GFP_KERNEL);
|
||||
if (!tpt)
|
||||
return -ENOMEM;
|
||||
|
||||
stag_state = stag_state > 0;
|
||||
stag_idx = (*stag) >> 8;
|
||||
|
||||
@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
|
||||
mutex_lock(&rdev->stats.lock);
|
||||
rdev->stats.stag.fail++;
|
||||
mutex_unlock(&rdev->stats.lock);
|
||||
kfree(tpt);
|
||||
return -ENOMEM;
|
||||
}
|
||||
mutex_lock(&rdev->stats.lock);
|
||||
@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
|
||||
|
||||
/* write TPT entry */
|
||||
if (reset_tpt_entry)
|
||||
memset(&tpt, 0, sizeof(tpt));
|
||||
memset(tpt, 0, sizeof(*tpt));
|
||||
else {
|
||||
tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
|
||||
tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
|
||||
FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) |
|
||||
FW_RI_TPTE_STAGSTATE_V(stag_state) |
|
||||
FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid));
|
||||
tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
|
||||
tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
|
||||
(bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) |
|
||||
FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO :
|
||||
FW_RI_VA_BASED_TO))|
|
||||
FW_RI_TPTE_PS_V(page_size));
|
||||
tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
|
||||
tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
|
||||
FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3));
|
||||
tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
|
||||
tpt.va_hi = cpu_to_be32((u32)(to >> 32));
|
||||
tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
|
||||
tpt.dca_mwbcnt_pstag = cpu_to_be32(0);
|
||||
tpt.len_hi = cpu_to_be32((u32)(len >> 32));
|
||||
tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
|
||||
tpt->va_hi = cpu_to_be32((u32)(to >> 32));
|
||||
tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
|
||||
tpt->dca_mwbcnt_pstag = cpu_to_be32(0);
|
||||
tpt->len_hi = cpu_to_be32((u32)(len >> 32));
|
||||
}
|
||||
err = write_adapter_mem(rdev, stag_idx +
|
||||
(rdev->lldi.vr->stag.start >> 5),
|
||||
sizeof(tpt), &tpt, skb, wr_waitp);
|
||||
sizeof(*tpt), tpt, skb, wr_waitp);
|
||||
|
||||
if (reset_tpt_entry) {
|
||||
c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
|
||||
@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
|
||||
rdev->stats.stag.cur -= 32;
|
||||
mutex_unlock(&rdev->stats.lock);
|
||||
}
|
||||
kfree(tpt);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
|
||||
if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
|
||||
srq->flags = T4_SRQ_LIMIT_SUPPORT;
|
||||
|
||||
ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL);
|
||||
if (ret)
|
||||
goto err_free_queue;
|
||||
|
||||
if (udata) {
|
||||
srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
|
||||
if (!srq_key_mm) {
|
||||
ret = -ENOMEM;
|
||||
goto err_remove_handle;
|
||||
goto err_free_queue;
|
||||
}
|
||||
srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
|
||||
if (!srq_db_key_mm) {
|
||||
@ -2789,8 +2785,6 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
|
||||
kfree(srq_db_key_mm);
|
||||
err_free_srq_key_mm:
|
||||
kfree(srq_key_mm);
|
||||
err_remove_handle:
|
||||
xa_erase_irq(&rhp->qps, srq->wq.qid);
|
||||
err_free_queue:
|
||||
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
|
||||
srq->wr_waitp);
|
||||
@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
|
||||
rhp = srq->rhp;
|
||||
|
||||
pr_debug("%s id %d\n", __func__, srq->wq.qid);
|
||||
|
||||
xa_erase_irq(&rhp->qps, srq->wq.qid);
|
||||
ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
|
||||
ibucontext);
|
||||
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
|
||||
|
@ -1526,8 +1526,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
|
||||
}
|
||||
|
||||
ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
kfree(tmp_sdma_rht);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
dd->sdma_rht = tmp_sdma_rht;
|
||||
|
||||
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
|
||||
|
@ -2773,6 +2773,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
|
||||
return -ENOMEM;
|
||||
iwibdev = iwdev->iwibdev;
|
||||
rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
|
||||
ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
|
||||
if (ret)
|
||||
goto error;
|
||||
|
@ -1298,29 +1298,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void devx_free_indirect_mkey(struct rcu_head *rcu)
|
||||
{
|
||||
kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
|
||||
}
|
||||
|
||||
/* This function to delete from the radix tree needs to be called before
|
||||
* destroying the underlying mkey. Otherwise a race might occur in case that
|
||||
* other thread will get the same mkey before this one will be deleted,
|
||||
* in that case it will fail via inserting to the tree its own data.
|
||||
*
|
||||
* Note:
|
||||
* An error in the destroy is not expected unless there is some other indirect
|
||||
* mkey which points to this one. In a kernel cleanup flow it will be just
|
||||
* destroyed in the iterative destruction call. In a user flow, in case
|
||||
* the application didn't close in the expected order it's its own problem,
|
||||
* the mkey won't be part of the tree, in both cases the kernel is safe.
|
||||
*/
|
||||
static void devx_cleanup_mkey(struct devx_obj *obj)
|
||||
{
|
||||
xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
|
||||
mlx5_base_mkey(obj->devx_mr.mmkey.key));
|
||||
}
|
||||
|
||||
static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
|
||||
struct devx_event_subscription *sub)
|
||||
{
|
||||
@ -1362,8 +1339,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
|
||||
int ret;
|
||||
|
||||
dev = mlx5_udata_to_mdev(&attrs->driver_udata);
|
||||
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
|
||||
devx_cleanup_mkey(obj);
|
||||
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
|
||||
/*
|
||||
* The pagefault_single_data_segment() does commands against
|
||||
* the mmkey, we must wait for that to stop before freeing the
|
||||
* mkey, as another allocation could get the same mkey #.
|
||||
*/
|
||||
xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
|
||||
mlx5_base_mkey(obj->devx_mr.mmkey.key));
|
||||
synchronize_srcu(&dev->mr_srcu);
|
||||
}
|
||||
|
||||
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
|
||||
ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
|
||||
@ -1382,12 +1367,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
|
||||
devx_cleanup_subscription(dev, sub_entry);
|
||||
mutex_unlock(&devx_event_table->event_xa_lock);
|
||||
|
||||
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
|
||||
call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
|
||||
devx_free_indirect_mkey);
|
||||
return ret;
|
||||
}
|
||||
|
||||
kfree(obj);
|
||||
return ret;
|
||||
}
|
||||
@ -1491,26 +1470,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
|
||||
&obj_id);
|
||||
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
|
||||
|
||||
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
|
||||
if (err)
|
||||
goto obj_destroy;
|
||||
|
||||
if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
|
||||
obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
|
||||
obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
|
||||
|
||||
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
|
||||
err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
|
||||
if (err)
|
||||
goto obj_destroy;
|
||||
}
|
||||
|
||||
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
|
||||
if (err)
|
||||
goto err_copy;
|
||||
|
||||
if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
|
||||
obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
|
||||
|
||||
obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
|
||||
|
||||
return 0;
|
||||
|
||||
err_copy:
|
||||
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
|
||||
devx_cleanup_mkey(obj);
|
||||
obj_destroy:
|
||||
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
|
||||
mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
|
||||
|
@ -606,7 +606,7 @@ struct mlx5_ib_mr {
|
||||
struct mlx5_ib_dev *dev;
|
||||
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
|
||||
struct mlx5_core_sig_ctx *sig;
|
||||
int live;
|
||||
unsigned int live;
|
||||
void *descs_alloc;
|
||||
int access_flags; /* Needed for rereg MR */
|
||||
|
||||
@ -639,7 +639,6 @@ struct mlx5_ib_mw {
|
||||
struct mlx5_ib_devx_mr {
|
||||
struct mlx5_core_mkey mmkey;
|
||||
int ndescs;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
struct mlx5_ib_umr_context {
|
||||
|
@ -84,32 +84,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
|
||||
length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
|
||||
}
|
||||
|
||||
static void update_odp_mr(struct mlx5_ib_mr *mr)
|
||||
{
|
||||
if (is_odp_mr(mr)) {
|
||||
/*
|
||||
* This barrier prevents the compiler from moving the
|
||||
* setting of umem->odp_data->private to point to our
|
||||
* MR, before reg_umr finished, to ensure that the MR
|
||||
* initialization have finished before starting to
|
||||
* handle invalidations.
|
||||
*/
|
||||
smp_wmb();
|
||||
to_ib_umem_odp(mr->umem)->private = mr;
|
||||
/*
|
||||
* Make sure we will see the new
|
||||
* umem->odp_data->private value in the invalidation
|
||||
* routines, before we can get page faults on the
|
||||
* MR. Page faults can happen once we put the MR in
|
||||
* the tree, below this line. Without the barrier,
|
||||
* there can be a fault handling and an invalidation
|
||||
* before umem->odp_data->private == mr is visible to
|
||||
* the invalidation handler.
|
||||
*/
|
||||
smp_wmb();
|
||||
}
|
||||
}
|
||||
|
||||
static void reg_mr_callback(int status, struct mlx5_async_work *context)
|
||||
{
|
||||
struct mlx5_ib_mr *mr =
|
||||
@ -1346,8 +1320,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
|
||||
mr->umem = umem;
|
||||
set_mr_fields(dev, mr, npages, length, access_flags);
|
||||
|
||||
update_odp_mr(mr);
|
||||
|
||||
if (use_umr) {
|
||||
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
|
||||
|
||||
@ -1363,10 +1335,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
|
||||
mr->live = 1;
|
||||
if (is_odp_mr(mr)) {
|
||||
to_ib_umem_odp(mr->umem)->private = mr;
|
||||
atomic_set(&mr->num_pending_prefetch, 0);
|
||||
}
|
||||
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
|
||||
smp_store_release(&mr->live, 1);
|
||||
|
||||
return &mr->ibmr;
|
||||
error:
|
||||
@ -1441,6 +1415,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
|
||||
if (!mr->umem)
|
||||
return -EINVAL;
|
||||
|
||||
if (is_odp_mr(mr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (flags & IB_MR_REREG_TRANS) {
|
||||
addr = virt_addr;
|
||||
len = length;
|
||||
@ -1486,8 +1463,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
|
||||
}
|
||||
|
||||
mr->allocated_from_cache = 0;
|
||||
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
|
||||
mr->live = 1;
|
||||
} else {
|
||||
/*
|
||||
* Send a UMR WQE
|
||||
@ -1516,7 +1491,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
|
||||
|
||||
set_mr_fields(dev, mr, npages, len, access_flags);
|
||||
|
||||
update_odp_mr(mr);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
@ -1607,15 +1581,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
|
||||
/* Prevent new page faults and
|
||||
* prefetch requests from succeeding
|
||||
*/
|
||||
mr->live = 0;
|
||||
WRITE_ONCE(mr->live, 0);
|
||||
|
||||
/* Wait for all running page-fault handlers to finish. */
|
||||
synchronize_srcu(&dev->mr_srcu);
|
||||
|
||||
/* dequeue pending prefetch requests for the mr */
|
||||
if (atomic_read(&mr->num_pending_prefetch))
|
||||
flush_workqueue(system_unbound_wq);
|
||||
WARN_ON(atomic_read(&mr->num_pending_prefetch));
|
||||
|
||||
/* Wait for all running page-fault handlers to finish. */
|
||||
synchronize_srcu(&dev->mr_srcu);
|
||||
/* Destroy all page mappings */
|
||||
if (!umem_odp->is_implicit_odp)
|
||||
mlx5_ib_invalidate_range(umem_odp,
|
||||
@ -1987,14 +1962,25 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
|
||||
|
||||
int mlx5_ib_dealloc_mw(struct ib_mw *mw)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(mw->device);
|
||||
struct mlx5_ib_mw *mmw = to_mmw(mw);
|
||||
int err;
|
||||
|
||||
err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
|
||||
&mmw->mmkey);
|
||||
if (!err)
|
||||
kfree(mmw);
|
||||
return err;
|
||||
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
|
||||
xa_erase(&dev->mdev->priv.mkey_table,
|
||||
mlx5_base_mkey(mmw->mmkey.key));
|
||||
/*
|
||||
* pagefault_single_data_segment() may be accessing mmw under
|
||||
* SRCU if the user bound an ODP MR to this MW.
|
||||
*/
|
||||
synchronize_srcu(&dev->mr_srcu);
|
||||
}
|
||||
|
||||
err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
|
||||
if (err)
|
||||
return err;
|
||||
kfree(mmw);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
|
||||
|
@ -178,6 +178,29 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The locking here is pretty subtle. Ideally the implicit children
|
||||
* list would be protected by the umem_mutex, however that is not
|
||||
* possible. Instead this uses a weaker update-then-lock pattern:
|
||||
*
|
||||
* srcu_read_lock()
|
||||
* <change children list>
|
||||
* mutex_lock(umem_mutex)
|
||||
* mlx5_ib_update_xlt()
|
||||
* mutex_unlock(umem_mutex)
|
||||
* destroy lkey
|
||||
*
|
||||
* ie any change the children list must be followed by the locked
|
||||
* update_xlt before destroying.
|
||||
*
|
||||
* The umem_mutex provides the acquire/release semantic needed to make
|
||||
* the children list visible to a racing thread. While SRCU is not
|
||||
* technically required, using it gives consistent use of the SRCU
|
||||
* locking around the children list.
|
||||
*/
|
||||
lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex);
|
||||
lockdep_assert_held(&mr->dev->mr_srcu);
|
||||
|
||||
odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
|
||||
nentries * MLX5_IMR_MTT_SIZE, mr);
|
||||
|
||||
@ -202,15 +225,22 @@ static void mr_leaf_free_action(struct work_struct *work)
|
||||
struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
|
||||
int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
|
||||
struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
|
||||
struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
|
||||
int srcu_key;
|
||||
|
||||
mr->parent = NULL;
|
||||
synchronize_srcu(&mr->dev->mr_srcu);
|
||||
|
||||
ib_umem_odp_release(odp);
|
||||
if (imr->live)
|
||||
if (smp_load_acquire(&imr->live)) {
|
||||
srcu_key = srcu_read_lock(&mr->dev->mr_srcu);
|
||||
mutex_lock(&odp_imr->umem_mutex);
|
||||
mlx5_ib_update_xlt(imr, idx, 1, 0,
|
||||
MLX5_IB_UPD_XLT_INDIRECT |
|
||||
MLX5_IB_UPD_XLT_ATOMIC);
|
||||
mutex_unlock(&odp_imr->umem_mutex);
|
||||
srcu_read_unlock(&mr->dev->mr_srcu, srcu_key);
|
||||
}
|
||||
ib_umem_odp_release(odp);
|
||||
mlx5_mr_cache_free(mr->dev, mr);
|
||||
|
||||
if (atomic_dec_and_test(&imr->num_leaf_free))
|
||||
@ -278,7 +308,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
|
||||
idx - blk_start_idx + 1, 0,
|
||||
MLX5_IB_UPD_XLT_ZAP |
|
||||
MLX5_IB_UPD_XLT_ATOMIC);
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
/*
|
||||
* We are now sure that the device will not access the
|
||||
* memory. We can safely unmap it, and mark it as dirty if
|
||||
@ -289,10 +318,12 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
|
||||
|
||||
if (unlikely(!umem_odp->npages && mr->parent &&
|
||||
!umem_odp->dying)) {
|
||||
WRITE_ONCE(umem_odp->dying, 1);
|
||||
WRITE_ONCE(mr->live, 0);
|
||||
umem_odp->dying = 1;
|
||||
atomic_inc(&mr->parent->num_leaf_free);
|
||||
schedule_work(&umem_odp->work);
|
||||
}
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
}
|
||||
|
||||
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
|
||||
@ -429,8 +460,6 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
|
||||
mr->ibmr.lkey = mr->mmkey.key;
|
||||
mr->ibmr.rkey = mr->mmkey.key;
|
||||
|
||||
mr->live = 1;
|
||||
|
||||
mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
|
||||
mr->mmkey.key, dev->mdev, mr);
|
||||
|
||||
@ -484,6 +513,8 @@ static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
|
||||
mtt->parent = mr;
|
||||
INIT_WORK(&odp->work, mr_leaf_free_action);
|
||||
|
||||
smp_store_release(&mtt->live, 1);
|
||||
|
||||
if (!nentries)
|
||||
start_idx = addr >> MLX5_IMR_MTT_SHIFT;
|
||||
nentries++;
|
||||
@ -536,6 +567,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
|
||||
init_waitqueue_head(&imr->q_leaf_free);
|
||||
atomic_set(&imr->num_leaf_free, 0);
|
||||
atomic_set(&imr->num_pending_prefetch, 0);
|
||||
smp_store_release(&imr->live, 1);
|
||||
|
||||
return imr;
|
||||
}
|
||||
@ -555,15 +587,19 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
|
||||
if (mr->parent != imr)
|
||||
continue;
|
||||
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
|
||||
ib_umem_end(umem_odp));
|
||||
|
||||
if (umem_odp->dying)
|
||||
if (umem_odp->dying) {
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
continue;
|
||||
}
|
||||
|
||||
WRITE_ONCE(umem_odp->dying, 1);
|
||||
umem_odp->dying = 1;
|
||||
atomic_inc(&imr->num_leaf_free);
|
||||
schedule_work(&umem_odp->work);
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
}
|
||||
up_read(&per_mm->umem_rwsem);
|
||||
|
||||
@ -773,7 +809,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
|
||||
switch (mmkey->type) {
|
||||
case MLX5_MKEY_MR:
|
||||
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
|
||||
if (!mr->live || !mr->ibmr.pd) {
|
||||
if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) {
|
||||
mlx5_ib_dbg(dev, "got dead MR\n");
|
||||
ret = -EFAULT;
|
||||
goto srcu_unlock;
|
||||
@ -1641,12 +1677,12 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd,
|
||||
|
||||
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
|
||||
|
||||
if (mr->ibmr.pd != pd) {
|
||||
if (!smp_load_acquire(&mr->live)) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!mr->live) {
|
||||
if (mr->ibmr.pd != pd) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
|
||||
|
||||
pvrdma_page_dir_cleanup(dev, &srq->pdir);
|
||||
|
||||
kfree(srq);
|
||||
|
||||
atomic_dec(&dev->num_srqs);
|
||||
}
|
||||
|
||||
|
@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp)
|
||||
*/
|
||||
void siw_qp_llp_write_space(struct sock *sk)
|
||||
{
|
||||
struct siw_cep *cep = sk_to_cep(sk);
|
||||
struct siw_cep *cep;
|
||||
|
||||
cep->sk_write_space(sk);
|
||||
read_lock(&sk->sk_callback_lock);
|
||||
|
||||
if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
|
||||
(void)siw_sq_start(cep->qp);
|
||||
cep = sk_to_cep(sk);
|
||||
if (cep) {
|
||||
cep->sk_write_space(sk);
|
||||
|
||||
if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
|
||||
(void)siw_sq_start(cep->qp);
|
||||
}
|
||||
|
||||
read_unlock(&sk->sk_callback_lock);
|
||||
}
|
||||
|
||||
static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
|
||||
|
@ -112,17 +112,11 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
|
||||
u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
|
||||
u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0};
|
||||
struct xarray *mkeys = &dev->priv.mkey_table;
|
||||
struct mlx5_core_mkey *deleted_mkey;
|
||||
unsigned long flags;
|
||||
|
||||
xa_lock_irqsave(mkeys, flags);
|
||||
deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
|
||||
__xa_erase(mkeys, mlx5_base_mkey(mkey->key));
|
||||
xa_unlock_irqrestore(mkeys, flags);
|
||||
if (!deleted_mkey) {
|
||||
mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n",
|
||||
mlx5_base_mkey(mkey->key));
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
|
||||
MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
|
||||
|
Loading…
Reference in New Issue
Block a user