Steve Wise 00f7ec36c9 RDMA/core: Add memory management extensions support
This patch adds support for the IB "base memory management extension"
(BMME) and the equivalent iWARP operations (which the iWARP verbs
mandates all devices must implement).  The new operations are:

 - Allocate an ib_mr for use in fast register work requests.

 - Allocate/free a physical buffer lists for use in fast register work
   requests.  This allows device drivers to allocate this memory as
   needed for use in posting send requests (eg via dma_alloc_coherent).

 - New send queue work requests:
   * send with remote invalidate
   * fast register memory region
   * local invalidate memory region
   * RDMA read with invalidate local memory region (iWARP only)

Consumer interface details:

 - A new device capability flag IB_DEVICE_MEM_MGT_EXTENSIONS is added
   to indicate device support for these features.

 - New send work request opcodes IB_WR_FAST_REG_MR, IB_WR_LOCAL_INV,
   IB_WR_RDMA_READ_WITH_INV are added.

 - A new consumer API function, ib_alloc_mr() is added to allocate
   fast register memory regions.

 - New consumer API functions, ib_alloc_fast_reg_page_list() and
   ib_free_fast_reg_page_list() are added to allocate and free
   device-specific memory for fast registration page lists.

 - A new consumer API function, ib_update_fast_reg_key(), is added to
   allow the key portion of the R_Key and L_Key of a fast registration
   MR to be updated.  Consumers call this if desired before posting
   a IB_WR_FAST_REG_MR work request.

Consumers can use this as follows:

 - MR is allocated with ib_alloc_mr().

 - Page list memory is allocated with ib_alloc_fast_reg_page_list().

 - MR R_Key/L_Key "key" field is updated with ib_update_fast_reg_key().

 - MR made VALID and bound to a specific page list via
   ib_post_send(IB_WR_FAST_REG_MR)

 - MR made INVALID via ib_post_send(IB_WR_LOCAL_INV),
   ib_post_send(IB_WR_RDMA_READ_WITH_INV) or an incoming send with
   invalidate operation.

 - MR is deallocated with ib_dereg_mr()

 - page lists dealloced via ib_free_fast_reg_page_list().

Applications can allocate a fast register MR once, and then can
repeatedly bind the MR to different physical block lists (PBLs) via
posting work requests to a send queue (SQ).  For each outstanding
MR-to-PBL binding in the SQ pipe, a fast_reg_page_list needs to be
allocated (the fast_reg_page_list is owned by the low-level driver
from the consumer posting a work request until the request completes).
Thus pipelining can be achieved while still allowing device-specific
page_list processing.

The 32-bit fast register memory key/STag is composed of a 24-bit index
and an 8-bit key.  The application can change the key each time it
fast registers thus allowing more control over the peer's use of the
key/STag (ie it can effectively be changed each time the rkey is
rebound to a page list).

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
2008-07-14 23:48:45 -07:00

2166 lines
53 KiB
C

/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
* Copyright (c) 2006 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/file.h>
#include <linux/fs.h>
#include <asm/uaccess.h>
#include "uverbs.h"
static struct lock_class_key pd_lock_key;
static struct lock_class_key mr_lock_key;
static struct lock_class_key cq_lock_key;
static struct lock_class_key qp_lock_key;
static struct lock_class_key ah_lock_key;
static struct lock_class_key srq_lock_key;
#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
do { \
(udata)->inbuf = (void __user *) (ibuf); \
(udata)->outbuf = (void __user *) (obuf); \
(udata)->inlen = (ilen); \
(udata)->outlen = (olen); \
} while (0)
/*
* The ib_uobject locking scheme is as follows:
*
* - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
* needs to be held during all idr operations. When an object is
* looked up, a reference must be taken on the object's kref before
* dropping this lock.
*
* - Each object also has an rwsem. This rwsem must be held for
* reading while an operation that uses the object is performed.
* For example, while registering an MR, the associated PD's
* uobject.mutex must be held for reading. The rwsem must be held
* for writing while initializing or destroying an object.
*
* - In addition, each object has a "live" flag. If this flag is not
* set, then lookups of the object will fail even if it is found in
* the idr. This handles a reader that blocks and does not acquire
* the rwsem until after the object is destroyed. The destroy
* operation will set the live flag to 0 and then drop the rwsem;
* this will allow the reader to acquire the rwsem, see that the
* live flag is 0, and then drop the rwsem and its reference to
* object. The underlying storage will not be freed until the last
* reference to the object is dropped.
*/
static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
struct ib_ucontext *context, struct lock_class_key *key)
{
uobj->user_handle = user_handle;
uobj->context = context;
kref_init(&uobj->ref);
init_rwsem(&uobj->mutex);
lockdep_set_class(&uobj->mutex, key);
uobj->live = 0;
}
static void release_uobj(struct kref *kref)
{
kfree(container_of(kref, struct ib_uobject, ref));
}
static void put_uobj(struct ib_uobject *uobj)
{
kref_put(&uobj->ref, release_uobj);
}
static void put_uobj_read(struct ib_uobject *uobj)
{
up_read(&uobj->mutex);
put_uobj(uobj);
}
static void put_uobj_write(struct ib_uobject *uobj)
{
up_write(&uobj->mutex);
put_uobj(uobj);
}
static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
{
int ret;
retry:
if (!idr_pre_get(idr, GFP_KERNEL))
return -ENOMEM;
spin_lock(&ib_uverbs_idr_lock);
ret = idr_get_new(idr, uobj, &uobj->id);
spin_unlock(&ib_uverbs_idr_lock);
if (ret == -EAGAIN)
goto retry;
return ret;
}
void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
{
spin_lock(&ib_uverbs_idr_lock);
idr_remove(idr, uobj->id);
spin_unlock(&ib_uverbs_idr_lock);
}
static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
struct ib_ucontext *context)
{
struct ib_uobject *uobj;
spin_lock(&ib_uverbs_idr_lock);
uobj = idr_find(idr, id);
if (uobj) {
if (uobj->context == context)
kref_get(&uobj->ref);
else
uobj = NULL;
}
spin_unlock(&ib_uverbs_idr_lock);
return uobj;
}
static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
struct ib_ucontext *context, int nested)
{
struct ib_uobject *uobj;
uobj = __idr_get_uobj(idr, id, context);
if (!uobj)
return NULL;
if (nested)
down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
else
down_read(&uobj->mutex);
if (!uobj->live) {
put_uobj_read(uobj);
return NULL;
}
return uobj;
}
static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
struct ib_ucontext *context)
{
struct ib_uobject *uobj;
uobj = __idr_get_uobj(idr, id, context);
if (!uobj)
return NULL;
down_write(&uobj->mutex);
if (!uobj->live) {
put_uobj_write(uobj);
return NULL;
}
return uobj;
}
static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
int nested)
{
struct ib_uobject *uobj;
uobj = idr_read_uobj(idr, id, context, nested);
return uobj ? uobj->object : NULL;
}
static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
{
return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
}
static void put_pd_read(struct ib_pd *pd)
{
put_uobj_read(pd->uobject);
}
static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
{
return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
}
static void put_cq_read(struct ib_cq *cq)
{
put_uobj_read(cq->uobject);
}
static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
{
return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
}
static void put_ah_read(struct ib_ah *ah)
{
put_uobj_read(ah->uobject);
}
static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
{
return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
}
static void put_qp_read(struct ib_qp *qp)
{
put_uobj_read(qp->uobject);
}
static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
{
return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
}
static void put_srq_read(struct ib_srq *srq)
{
put_uobj_read(srq->uobject);
}
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
{
struct ib_uverbs_get_context cmd;
struct ib_uverbs_get_context_resp resp;
struct ib_udata udata;
struct ib_device *ibdev = file->device->ib_dev;
struct ib_ucontext *ucontext;
struct file *filp;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
mutex_lock(&file->mutex);
if (file->ucontext) {
ret = -EINVAL;
goto err;
}
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
ucontext = ibdev->alloc_ucontext(ibdev, &udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(file->ucontext);
goto err;
}
ucontext->device = ibdev;
INIT_LIST_HEAD(&ucontext->pd_list);
INIT_LIST_HEAD(&ucontext->mr_list);
INIT_LIST_HEAD(&ucontext->mw_list);
INIT_LIST_HEAD(&ucontext->cq_list);
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors;
filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd);
if (IS_ERR(filp)) {
ret = PTR_ERR(filp);
goto err_free;
}
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_file;
}
file->async_file = filp->private_data;
INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev,
ib_uverbs_event_handler);
ret = ib_register_event_handler(&file->event_handler);
if (ret)
goto err_file;
kref_get(&file->async_file->ref);
kref_get(&file->ref);
file->ucontext = ucontext;
fd_install(resp.async_fd, filp);
mutex_unlock(&file->mutex);
return in_len;
err_file:
put_unused_fd(resp.async_fd);
fput(filp);
err_free:
ibdev->dealloc_ucontext(ucontext);
err:
mutex_unlock(&file->mutex);
return ret;
}
ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
{
struct ib_uverbs_query_device cmd;
struct ib_uverbs_query_device_resp resp;
struct ib_device_attr attr;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
ret = ib_query_device(file->device->ib_dev, &attr);
if (ret)
return ret;
memset(&resp, 0, sizeof resp);
resp.fw_ver = attr.fw_ver;
resp.node_guid = file->device->ib_dev->node_guid;
resp.sys_image_guid = attr.sys_image_guid;
resp.max_mr_size = attr.max_mr_size;
resp.page_size_cap = attr.page_size_cap;
resp.vendor_id = attr.vendor_id;
resp.vendor_part_id = attr.vendor_part_id;
resp.hw_ver = attr.hw_ver;
resp.max_qp = attr.max_qp;
resp.max_qp_wr = attr.max_qp_wr;
resp.device_cap_flags = attr.device_cap_flags;
resp.max_sge = attr.max_sge;
resp.max_sge_rd = attr.max_sge_rd;
resp.max_cq = attr.max_cq;
resp.max_cqe = attr.max_cqe;
resp.max_mr = attr.max_mr;
resp.max_pd = attr.max_pd;
resp.max_qp_rd_atom = attr.max_qp_rd_atom;
resp.max_ee_rd_atom = attr.max_ee_rd_atom;
resp.max_res_rd_atom = attr.max_res_rd_atom;
resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom;
resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom;
resp.atomic_cap = attr.atomic_cap;
resp.max_ee = attr.max_ee;
resp.max_rdd = attr.max_rdd;
resp.max_mw = attr.max_mw;
resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp;
resp.max_raw_ethy_qp = attr.max_raw_ethy_qp;
resp.max_mcast_grp = attr.max_mcast_grp;
resp.max_mcast_qp_attach = attr.max_mcast_qp_attach;
resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
resp.max_ah = attr.max_ah;
resp.max_fmr = attr.max_fmr;
resp.max_map_per_fmr = attr.max_map_per_fmr;
resp.max_srq = attr.max_srq;
resp.max_srq_wr = attr.max_srq_wr;
resp.max_srq_sge = attr.max_srq_sge;
resp.max_pkeys = attr.max_pkeys;
resp.local_ca_ack_delay = attr.local_ca_ack_delay;
resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
return -EFAULT;
return in_len;
}
ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
{
struct ib_uverbs_query_port cmd;
struct ib_uverbs_query_port_resp resp;
struct ib_port_attr attr;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr);
if (ret)
return ret;
memset(&resp, 0, sizeof resp);
resp.state = attr.state;
resp.max_mtu = attr.max_mtu;
resp.active_mtu = attr.active_mtu;
resp.gid_tbl_len = attr.gid_tbl_len;
resp.port_cap_flags = attr.port_cap_flags;
resp.max_msg_sz = attr.max_msg_sz;
resp.bad_pkey_cntr = attr.bad_pkey_cntr;
resp.qkey_viol_cntr = attr.qkey_viol_cntr;
resp.pkey_tbl_len = attr.pkey_tbl_len;
resp.lid = attr.lid;
resp.sm_lid = attr.sm_lid;
resp.lmc = attr.lmc;
resp.max_vl_num = attr.max_vl_num;
resp.sm_sl = attr.sm_sl;
resp.subnet_timeout = attr.subnet_timeout;
resp.init_type_reply = attr.init_type_reply;
resp.active_width = attr.active_width;
resp.active_speed = attr.active_speed;
resp.phys_state = attr.phys_state;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
return -EFAULT;
return in_len;
}
ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
{
struct ib_uverbs_alloc_pd cmd;
struct ib_uverbs_alloc_pd_resp resp;
struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
down_write(&uobj->mutex);
pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
file->ucontext, &udata);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
goto err;
}
pd->device = file->device->ib_dev;
pd->uobject = uobj;
atomic_set(&pd->usecnt, 0);
uobj->object = pd;
ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
if (ret)
goto err_idr;
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
mutex_lock(&file->mutex);
list_add_tail(&uobj->list, &file->ucontext->pd_list);
mutex_unlock(&file->mutex);
uobj->live = 1;
up_write(&uobj->mutex);
return in_len;
err_copy:
idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
err_idr:
ib_dealloc_pd(pd);
err:
put_uobj_write(uobj);
return ret;
}
ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
{
struct ib_uverbs_dealloc_pd cmd;
struct ib_uobject *uobj;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
if (!uobj)
return -EINVAL;
ret = ib_dealloc_pd(uobj->object);
if (!ret)
uobj->live = 0;
put_uobj_write(uobj);
if (ret)
return ret;
idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
mutex_lock(&file->mutex);
list_del(&uobj->list);
mutex_unlock(&file->mutex);
put_uobj(uobj);
return in_len;
}
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp;
struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
/*
* Local write permission is required if remote write or
* remote atomic permission is also requested.
*/
if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
!(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
return -EINVAL;
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
down_write(&uobj->mutex);
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
}
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
}
mr->device = pd->device;
mr->pd = pd;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
uobj->object = mr;
ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
if (ret)
goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
resp.mr_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
put_pd_read(pd);
mutex_lock(&file->mutex);
list_add_tail(&uobj->list, &file->ucontext->mr_list);
mutex_unlock(&file->mutex);
uobj->live = 1;
up_write(&uobj->mutex);
return in_len;
err_copy:
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
err_unreg:
ib_dereg_mr(mr);
err_put:
put_pd_read(pd);
err_free:
put_uobj_write(uobj);
return ret;
}
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_dereg_mr cmd;
struct ib_mr *mr;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
if (!uobj)
return -EINVAL;
mr = uobj->object;
ret = ib_dereg_mr(mr);
if (!ret)
uobj->live = 0;
put_uobj_write(uobj);
if (ret)
return ret;
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
mutex_lock(&file->mutex);
list_del(&uobj->list);
mutex_unlock(&file->mutex);
put_uobj(uobj);
return in_len;
}
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
struct file *filp;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd);
if (IS_ERR(filp))
return PTR_ERR(filp);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
put_unused_fd(resp.fd);
fput(filp);
return -EFAULT;
}
fd_install(resp.fd, filp);
return in_len;
}
ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_create_cq cmd;
struct ib_uverbs_create_cq_resp resp;
struct ib_udata udata;
struct ib_ucq_object *obj;
struct ib_uverbs_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
if (cmd.comp_vector >= file->device->num_comp_vectors)
return -EINVAL;
obj = kmalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key);
down_write(&obj->uobject.mutex);
if (cmd.comp_channel >= 0) {
ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
if (!ev_file) {
ret = -EINVAL;
goto err;
}
}
obj->uverbs_file = file;
obj->comp_events_reported = 0;
obj->async_events_reported = 0;
INIT_LIST_HEAD(&obj->comp_list);
INIT_LIST_HEAD(&obj->async_list);
cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
cmd.comp_vector,
file->ucontext, &udata);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
}
cq->device = file->device->ib_dev;
cq->uobject = &obj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file;
atomic_set(&cq->usecnt, 0);
obj->uobject.object = cq;
ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
if (ret)
goto err_free;
memset(&resp, 0, sizeof resp);
resp.cq_handle = obj->uobject.id;
resp.cqe = cq->cqe;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
mutex_lock(&file->mutex);
list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
mutex_unlock(&file->mutex);
obj->uobject.live = 1;
up_write(&obj->uobject.mutex);
return in_len;
err_copy:
idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
err_free:
ib_destroy_cq(cq);
err_file:
if (ev_file)
ib_uverbs_release_ucq(file, ev_file, obj);
err:
put_uobj_write(&obj->uobject);
return ret;
}
ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_resize_cq cmd;
struct ib_uverbs_resize_cq_resp resp;
struct ib_udata udata;
struct ib_cq *cq;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
if (!cq)
return -EINVAL;
ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
if (ret)
goto out;
resp.cqe = cq->cqe;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp.cqe))
ret = -EFAULT;
out:
put_cq_read(cq);
return ret ? ret : in_len;
}
ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_poll_cq cmd;
struct ib_uverbs_poll_cq_resp *resp;
struct ib_cq *cq;
struct ib_wc *wc;
int ret = 0;
int i;
int rsize;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
if (!wc)
return -ENOMEM;
rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
resp = kmalloc(rsize, GFP_KERNEL);
if (!resp) {
ret = -ENOMEM;
goto out_wc;
}
cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
if (!cq) {
ret = -EINVAL;
goto out;
}
resp->count = ib_poll_cq(cq, cmd.ne, wc);
put_cq_read(cq);
for (i = 0; i < resp->count; i++) {
resp->wc[i].wr_id = wc[i].wr_id;
resp->wc[i].status = wc[i].status;
resp->wc[i].opcode = wc[i].opcode;
resp->wc[i].vendor_err = wc[i].vendor_err;
resp->wc[i].byte_len = wc[i].byte_len;
resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data;
resp->wc[i].qp_num = wc[i].qp->qp_num;
resp->wc[i].src_qp = wc[i].src_qp;
resp->wc[i].wc_flags = wc[i].wc_flags;
resp->wc[i].pkey_index = wc[i].pkey_index;
resp->wc[i].slid = wc[i].slid;
resp->wc[i].sl = wc[i].sl;
resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
resp->wc[i].port_num = wc[i].port_num;
}
if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize))
ret = -EFAULT;
out:
kfree(resp);
out_wc:
kfree(wc);
return ret ? ret : in_len;
}
ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_req_notify_cq cmd;
struct ib_cq *cq;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
if (!cq)
return -EINVAL;
ib_req_notify_cq(cq, cmd.solicited_only ?
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
put_cq_read(cq);
return in_len;
}
ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_destroy_cq cmd;
struct ib_uverbs_destroy_cq_resp resp;
struct ib_uobject *uobj;
struct ib_cq *cq;
struct ib_ucq_object *obj;
struct ib_uverbs_event_file *ev_file;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
if (!uobj)
return -EINVAL;
cq = uobj->object;
ev_file = cq->cq_context;
obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
ret = ib_destroy_cq(cq);
if (!ret)
uobj->live = 0;
put_uobj_write(uobj);
if (ret)
return ret;
idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
mutex_lock(&file->mutex);
list_del(&uobj->list);
mutex_unlock(&file->mutex);
ib_uverbs_release_ucq(file, ev_file, obj);
memset(&resp, 0, sizeof resp);
resp.comp_events_reported = obj->comp_events_reported;
resp.async_events_reported = obj->async_events_reported;
put_uobj(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
return -EFAULT;
return in_len;
}
ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_create_qp cmd;
struct ib_uverbs_create_qp_resp resp;
struct ib_udata udata;
struct ib_uqp_object *obj;
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
struct ib_qp *qp;
struct ib_qp_init_attr attr;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
obj = kmalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
down_write(&obj->uevent.uobject.mutex);
srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
ret = -EINVAL;
goto err_put;
}
attr.event_handler = ib_uverbs_qp_event_handler;
attr.qp_context = file;
attr.send_cq = scq;
attr.recv_cq = rcq;
attr.srq = srq;
attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
attr.qp_type = cmd.qp_type;
attr.create_flags = 0;
attr.cap.max_send_wr = cmd.max_send_wr;
attr.cap.max_recv_wr = cmd.max_recv_wr;
attr.cap.max_send_sge = cmd.max_send_sge;
attr.cap.max_recv_sge = cmd.max_recv_sge;
attr.cap.max_inline_data = cmd.max_inline_data;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
qp = pd->device->create_qp(pd, &attr, &udata);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_put;
}
qp->device = pd->device;
qp->pd = pd;
qp->send_cq = attr.send_cq;
qp->recv_cq = attr.recv_cq;
qp->srq = attr.srq;
qp->uobject = &obj->uevent.uobject;
qp->event_handler = attr.event_handler;
qp->qp_context = attr.qp_context;
qp->qp_type = attr.qp_type;
atomic_inc(&pd->usecnt);
atomic_inc(&attr.send_cq->usecnt);
atomic_inc(&attr.recv_cq->usecnt);
if (attr.srq)
atomic_inc(&attr.srq->usecnt);
obj->uevent.uobject.object = qp;
ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
if (ret)
goto err_destroy;
memset(&resp, 0, sizeof resp);
resp.qpn = qp->qp_num;
resp.qp_handle = obj->uevent.uobject.id;
resp.max_recv_sge = attr.cap.max_recv_sge;
resp.max_send_sge = attr.cap.max_send_sge;
resp.max_recv_wr = attr.cap.max_recv_wr;
resp.max_send_wr = attr.cap.max_send_wr;
resp.max_inline_data = attr.cap.max_inline_data;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
put_pd_read(pd);
put_cq_read(scq);
if (rcq != scq)
put_cq_read(rcq);
if (srq)
put_srq_read(srq);
mutex_lock(&file->mutex);
list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
mutex_unlock(&file->mutex);
obj->uevent.uobject.live = 1;
up_write(&obj->uevent.uobject.mutex);
return in_len;
err_copy:
idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
err_destroy:
ib_destroy_qp(qp);
err_put:
if (pd)
put_pd_read(pd);
if (scq)
put_cq_read(scq);
if (rcq && rcq != scq)
put_cq_read(rcq);
if (srq)
put_srq_read(srq);
put_uobj_write(&obj->uevent.uobject);
return ret;
}
ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_query_qp cmd;
struct ib_uverbs_query_qp_resp resp;
struct ib_qp *qp;
struct ib_qp_attr *attr;
struct ib_qp_init_attr *init_attr;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
if (!attr || !init_attr) {
ret = -ENOMEM;
goto out;
}
qp = idr_read_qp(cmd.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
}
ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
put_qp_read(qp);
if (ret)
goto out;
memset(&resp, 0, sizeof resp);
resp.qp_state = attr->qp_state;
resp.cur_qp_state = attr->cur_qp_state;
resp.path_mtu = attr->path_mtu;
resp.path_mig_state = attr->path_mig_state;
resp.qkey = attr->qkey;
resp.rq_psn = attr->rq_psn;
resp.sq_psn = attr->sq_psn;
resp.dest_qp_num = attr->dest_qp_num;
resp.qp_access_flags = attr->qp_access_flags;
resp.pkey_index = attr->pkey_index;
resp.alt_pkey_index = attr->alt_pkey_index;
resp.sq_draining = attr->sq_draining;
resp.max_rd_atomic = attr->max_rd_atomic;
resp.max_dest_rd_atomic = attr->max_dest_rd_atomic;
resp.min_rnr_timer = attr->min_rnr_timer;
resp.port_num = attr->port_num;
resp.timeout = attr->timeout;
resp.retry_cnt = attr->retry_cnt;
resp.rnr_retry = attr->rnr_retry;
resp.alt_port_num = attr->alt_port_num;
resp.alt_timeout = attr->alt_timeout;
memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
resp.dest.flow_label = attr->ah_attr.grh.flow_label;
resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
resp.dest.dlid = attr->ah_attr.dlid;
resp.dest.sl = attr->ah_attr.sl;
resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
resp.dest.static_rate = attr->ah_attr.static_rate;
resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
resp.dest.port_num = attr->ah_attr.port_num;
memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
resp.alt_dest.sl = attr->alt_ah_attr.sl;
resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
resp.max_send_wr = init_attr->cap.max_send_wr;
resp.max_recv_wr = init_attr->cap.max_recv_wr;
resp.max_send_sge = init_attr->cap.max_send_sge;
resp.max_recv_sge = init_attr->cap.max_recv_sge;
resp.max_inline_data = init_attr->cap.max_inline_data;
resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
ret = -EFAULT;
out:
kfree(attr);
kfree(init_attr);
return ret ? ret : in_len;
}
ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_modify_qp cmd;
struct ib_udata udata;
struct ib_qp *qp;
struct ib_qp_attr *attr;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr)
return -ENOMEM;
qp = idr_read_qp(cmd.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
}
attr->qp_state = cmd.qp_state;
attr->cur_qp_state = cmd.cur_qp_state;
attr->path_mtu = cmd.path_mtu;
attr->path_mig_state = cmd.path_mig_state;
attr->qkey = cmd.qkey;
attr->rq_psn = cmd.rq_psn;
attr->sq_psn = cmd.sq_psn;
attr->dest_qp_num = cmd.dest_qp_num;
attr->qp_access_flags = cmd.qp_access_flags;
attr->pkey_index = cmd.pkey_index;
attr->alt_pkey_index = cmd.alt_pkey_index;
attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
attr->max_rd_atomic = cmd.max_rd_atomic;
attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
attr->min_rnr_timer = cmd.min_rnr_timer;
attr->port_num = cmd.port_num;
attr->timeout = cmd.timeout;
attr->retry_cnt = cmd.retry_cnt;
attr->rnr_retry = cmd.rnr_retry;
attr->alt_port_num = cmd.alt_port_num;
attr->alt_timeout = cmd.alt_timeout;
memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
attr->ah_attr.dlid = cmd.dest.dlid;
attr->ah_attr.sl = cmd.dest.sl;
attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
attr->ah_attr.static_rate = cmd.dest.static_rate;
attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
attr->ah_attr.port_num = cmd.dest.port_num;
memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
attr->alt_ah_attr.sl = cmd.alt_dest.sl;
attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
put_qp_read(qp);
if (ret)
goto out;
ret = in_len;
out:
kfree(attr);
return ret;
}
ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
struct ib_uobject *uobj;
struct ib_qp *qp;
struct ib_uqp_object *obj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
memset(&resp, 0, sizeof resp);
uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
if (!uobj)
return -EINVAL;
qp = uobj->object;
obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
if (!list_empty(&obj->mcast_list)) {
put_uobj_write(uobj);
return -EBUSY;
}
ret = ib_destroy_qp(qp);
if (!ret)
uobj->live = 0;
put_uobj_write(uobj);
if (ret)
return ret;
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
mutex_lock(&file->mutex);
list_del(&uobj->list);
mutex_unlock(&file->mutex);
ib_uverbs_release_uevent(file, &obj->uevent);
resp.events_reported = obj->uevent.events_reported;
put_uobj(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
return -EFAULT;
return in_len;
}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_post_send cmd;
struct ib_uverbs_post_send_resp resp;
struct ib_uverbs_send_wr *user_wr;
struct ib_send_wr *wr = NULL, *last, *next, *bad_wr;
struct ib_qp *qp;
int i, sg_ind;
int is_ud;
ssize_t ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
cmd.sge_count * sizeof (struct ib_uverbs_sge))
return -EINVAL;
if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
return -EINVAL;
user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
if (!user_wr)
return -ENOMEM;
qp = idr_read_qp(cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
is_ud = qp->qp_type == IB_QPT_UD;
sg_ind = 0;
last = NULL;
for (i = 0; i < cmd.wr_count; ++i) {
if (copy_from_user(user_wr,
buf + sizeof cmd + i * cmd.wqe_size,
cmd.wqe_size)) {
ret = -EFAULT;
goto out_put;
}
if (user_wr->num_sge + sg_ind > cmd.sge_count) {
ret = -EINVAL;
goto out_put;
}
next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
user_wr->num_sge * sizeof (struct ib_sge),
GFP_KERNEL);
if (!next) {
ret = -ENOMEM;
goto out_put;
}
if (!last)
wr = next;
else
last->next = next;
last = next;
next->next = NULL;
next->wr_id = user_wr->wr_id;
next->num_sge = user_wr->num_sge;
next->opcode = user_wr->opcode;
next->send_flags = user_wr->send_flags;
if (is_ud) {
next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
file->ucontext);
if (!next->wr.ud.ah) {
ret = -EINVAL;
goto out_put;
}
next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
} else {
switch (next->opcode) {
case IB_WR_RDMA_WRITE_WITH_IMM:
next->ex.imm_data =
(__be32 __force) user_wr->ex.imm_data;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_READ:
next->wr.rdma.remote_addr =
user_wr->wr.rdma.remote_addr;
next->wr.rdma.rkey =
user_wr->wr.rdma.rkey;
break;
case IB_WR_SEND_WITH_IMM:
next->ex.imm_data =
(__be32 __force) user_wr->ex.imm_data;
break;
case IB_WR_SEND_WITH_INV:
next->ex.invalidate_rkey =
user_wr->ex.invalidate_rkey;
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
next->wr.atomic.remote_addr =
user_wr->wr.atomic.remote_addr;
next->wr.atomic.compare_add =
user_wr->wr.atomic.compare_add;
next->wr.atomic.swap = user_wr->wr.atomic.swap;
next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
break;
default:
break;
}
}
if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(sizeof *next, sizeof (struct ib_sge));
if (copy_from_user(next->sg_list,
buf + sizeof cmd +
cmd.wr_count * cmd.wqe_size +
sg_ind * sizeof (struct ib_sge),
next->num_sge * sizeof (struct ib_sge))) {
ret = -EFAULT;
goto out_put;
}
sg_ind += next->num_sge;
} else
next->sg_list = NULL;
}
resp.bad_wr = 0;
ret = qp->device->post_send(qp, wr, &bad_wr);
if (ret)
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
break;
}
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
ret = -EFAULT;
out_put:
put_qp_read(qp);
while (wr) {
if (is_ud && wr->wr.ud.ah)
put_ah_read(wr->wr.ud.ah);
next = wr->next;
kfree(wr);
wr = next;
}
out:
kfree(user_wr);
return ret ? ret : in_len;
}
static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
int in_len,
u32 wr_count,
u32 sge_count,
u32 wqe_size)
{
struct ib_uverbs_recv_wr *user_wr;
struct ib_recv_wr *wr = NULL, *last, *next;
int sg_ind;
int i;
int ret;
if (in_len < wqe_size * wr_count +
sge_count * sizeof (struct ib_uverbs_sge))
return ERR_PTR(-EINVAL);
if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
return ERR_PTR(-EINVAL);
user_wr = kmalloc(wqe_size, GFP_KERNEL);
if (!user_wr)
return ERR_PTR(-ENOMEM);
sg_ind = 0;
last = NULL;
for (i = 0; i < wr_count; ++i) {
if (copy_from_user(user_wr, buf + i * wqe_size,
wqe_size)) {
ret = -EFAULT;
goto err;
}
if (user_wr->num_sge + sg_ind > sge_count) {
ret = -EINVAL;
goto err;
}
next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
user_wr->num_sge * sizeof (struct ib_sge),
GFP_KERNEL);
if (!next) {
ret = -ENOMEM;
goto err;
}
if (!last)
wr = next;
else
last->next = next;
last = next;
next->next = NULL;
next->wr_id = user_wr->wr_id;
next->num_sge = user_wr->num_sge;
if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(sizeof *next, sizeof (struct ib_sge));
if (copy_from_user(next->sg_list,
buf + wr_count * wqe_size +
sg_ind * sizeof (struct ib_sge),
next->num_sge * sizeof (struct ib_sge))) {
ret = -EFAULT;
goto err;
}
sg_ind += next->num_sge;
} else
next->sg_list = NULL;
}
kfree(user_wr);
return wr;
err:
kfree(user_wr);
while (wr) {
next = wr->next;
kfree(wr);
wr = next;
}
return ERR_PTR(ret);
}
ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_post_recv cmd;
struct ib_uverbs_post_recv_resp resp;
struct ib_recv_wr *wr, *next, *bad_wr;
struct ib_qp *qp;
ssize_t ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
in_len - sizeof cmd, cmd.wr_count,
cmd.sge_count, cmd.wqe_size);
if (IS_ERR(wr))
return PTR_ERR(wr);
qp = idr_read_qp(cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
resp.bad_wr = 0;
ret = qp->device->post_recv(qp, wr, &bad_wr);
put_qp_read(qp);
if (ret)
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
break;
}
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
ret = -EFAULT;
out:
while (wr) {
next = wr->next;
kfree(wr);
wr = next;
}
return ret ? ret : in_len;
}
ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_post_srq_recv cmd;
struct ib_uverbs_post_srq_recv_resp resp;
struct ib_recv_wr *wr, *next, *bad_wr;
struct ib_srq *srq;
ssize_t ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
in_len - sizeof cmd, cmd.wr_count,
cmd.sge_count, cmd.wqe_size);
if (IS_ERR(wr))
return PTR_ERR(wr);
srq = idr_read_srq(cmd.srq_handle, file->ucontext);
if (!srq)
goto out;
resp.bad_wr = 0;
ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
put_srq_read(srq);
if (ret)
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
break;
}
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
ret = -EFAULT;
out:
while (wr) {
next = wr->next;
kfree(wr);
wr = next;
}
return ret ? ret : in_len;
}
ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_create_ah cmd;
struct ib_uverbs_create_ah_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_ah *ah;
struct ib_ah_attr attr;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key);
down_write(&uobj->mutex);
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
}
attr.dlid = cmd.attr.dlid;
attr.sl = cmd.attr.sl;
attr.src_path_bits = cmd.attr.src_path_bits;
attr.static_rate = cmd.attr.static_rate;
attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0;
attr.port_num = cmd.attr.port_num;
attr.grh.flow_label = cmd.attr.grh.flow_label;
attr.grh.sgid_index = cmd.attr.grh.sgid_index;
attr.grh.hop_limit = cmd.attr.grh.hop_limit;
attr.grh.traffic_class = cmd.attr.grh.traffic_class;
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
ah = ib_create_ah(pd, &attr);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_put;
}
ah->uobject = uobj;
uobj->object = ah;
ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
if (ret)
goto err_destroy;
resp.ah_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
put_pd_read(pd);
mutex_lock(&file->mutex);
list_add_tail(&uobj->list, &file->ucontext->ah_list);
mutex_unlock(&file->mutex);
uobj->live = 1;
up_write(&uobj->mutex);
return in_len;
err_copy:
idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
err_destroy:
ib_destroy_ah(ah);
err_put:
put_pd_read(pd);
err:
put_uobj_write(uobj);
return ret;
}
ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_destroy_ah cmd;
struct ib_ah *ah;
struct ib_uobject *uobj;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
if (!uobj)
return -EINVAL;
ah = uobj->object;
ret = ib_destroy_ah(ah);
if (!ret)
uobj->live = 0;
put_uobj_write(uobj);
if (ret)
return ret;
idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
mutex_lock(&file->mutex);
list_del(&uobj->list);
mutex_unlock(&file->mutex);
put_uobj(uobj);
return in_len;
}
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_attach_mcast cmd;
struct ib_qp *qp;
struct ib_uqp_object *obj;
struct ib_uverbs_mcast_entry *mcast;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
qp = idr_read_qp(cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
ret = 0;
goto out_put;
}
mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
if (!mcast) {
ret = -ENOMEM;
goto out_put;
}
mcast->lid = cmd.mlid;
memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw);
ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid);
if (!ret)
list_add_tail(&mcast->list, &obj->mcast_list);
else
kfree(mcast);
out_put:
put_qp_read(qp);
return ret ? ret : in_len;
}
ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_detach_mcast cmd;
struct ib_uqp_object *obj;
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
qp = idr_read_qp(cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
if (ret)
goto out_put;
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
list_del(&mcast->list);
kfree(mcast);
break;
}
out_put:
put_qp_read(qp);
return ret ? ret : in_len;
}
ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_create_srq cmd;
struct ib_uverbs_create_srq_resp resp;
struct ib_udata udata;
struct ib_uevent_object *obj;
struct ib_pd *pd;
struct ib_srq *srq;
struct ib_srq_init_attr attr;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
obj = kmalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
down_write(&obj->uobject.mutex);
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
}
attr.event_handler = ib_uverbs_srq_event_handler;
attr.srq_context = file;
attr.attr.max_wr = cmd.max_wr;
attr.attr.max_sge = cmd.max_sge;
attr.attr.srq_limit = cmd.srq_limit;
obj->events_reported = 0;
INIT_LIST_HEAD(&obj->event_list);
srq = pd->device->create_srq(pd, &attr, &udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
goto err_put;
}
srq->device = pd->device;
srq->pd = pd;
srq->uobject = &obj->uobject;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
obj->uobject.object = srq;
ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
if (ret)
goto err_destroy;
memset(&resp, 0, sizeof resp);
resp.srq_handle = obj->uobject.id;
resp.max_wr = attr.attr.max_wr;
resp.max_sge = attr.attr.max_sge;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
put_pd_read(pd);
mutex_lock(&file->mutex);
list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
mutex_unlock(&file->mutex);
obj->uobject.live = 1;
up_write(&obj->uobject.mutex);
return in_len;
err_copy:
idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
err_destroy:
ib_destroy_srq(srq);
err_put:
put_pd_read(pd);
err:
put_uobj_write(&obj->uobject);
return ret;
}
ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_modify_srq cmd;
struct ib_udata udata;
struct ib_srq *srq;
struct ib_srq_attr attr;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
srq = idr_read_srq(cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
attr.max_wr = cmd.max_wr;
attr.srq_limit = cmd.srq_limit;
ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
put_srq_read(srq);
return ret ? ret : in_len;
}
ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
{
struct ib_uverbs_query_srq cmd;
struct ib_uverbs_query_srq_resp resp;
struct ib_srq_attr attr;
struct ib_srq *srq;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
srq = idr_read_srq(cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
ret = ib_query_srq(srq, &attr);
put_srq_read(srq);
if (ret)
return ret;
memset(&resp, 0, sizeof resp);
resp.max_wr = attr.max_wr;
resp.max_sge = attr.max_sge;
resp.srq_limit = attr.srq_limit;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
return -EFAULT;
return in_len;
}
ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
struct ib_uobject *uobj;
struct ib_srq *srq;
struct ib_uevent_object *obj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
if (!uobj)
return -EINVAL;
srq = uobj->object;
obj = container_of(uobj, struct ib_uevent_object, uobject);
ret = ib_destroy_srq(srq);
if (!ret)
uobj->live = 0;
put_uobj_write(uobj);
if (ret)
return ret;
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
mutex_lock(&file->mutex);
list_del(&uobj->list);
mutex_unlock(&file->mutex);
ib_uverbs_release_uevent(file, obj);
memset(&resp, 0, sizeof resp);
resp.events_reported = obj->events_reported;
put_uobj(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
ret = -EFAULT;
return ret ? ret : in_len;
}