Main batch of InfiniBand/RDMA changes for 3.14:

- Flow steering for InfiniBand UD traffic
  - IP-based addressing for IBoE aka RoCE
  - Pass SRP submaintainership from Dave to Bart
  - SRP transport fixes from Bart
  - Add the new Cisco usNIC low-level device driver
  - Various other fixes
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.15 (GNU/Linux)
 
 iQIcBAABCAAGBQJS4rzgAAoJEENa44ZhAt0hOL4P/jrNK2GeXqfzWIURL5MYtG9A
 YK8xnRonlKQNo850E0WuC5wHCHqQ6Pqze+PL1rgR/MegNGrQ577qKo2eYumnMHSy
 NO2BNhHa+5cUf04dXWOeJgyMTqo7CKwO7trZ6KwD+HFBAZqLDTFHPklH0qMI2bF6
 U8HbKVslrvaDL3PywHop9Gxh9fWKY8ngw7LWPKkm5PQ0BFw8lZLOrGhWYr1MfJoY
 iptf+wqehqlO8u7khfpo8tvar0hGbRYrUanx94RU/B5FbiQN936AXURtmbM+4MDD
 o0QhzJKaaCmB1eYaeLsrEHyGcgAnifFPNzq/SLeRvL3TYIfMvTFWDECZDsdz0n5y
 YuyuIQvs3FcbP9C014e2o8SXEfdJoR4Ht6XH2+wwDCD55t66ZnBHupYiVdYEJz09
 UKBvvlY+v5cdzUmOeut21NgLHqQ/zpqWihfEFdTwXBNmKY27Ai9JILpFnrRCppYh
 mawcEPKEXX1c0Adr1bXXsFBWhONgEOQFoth4FpVK31hJ1o2F9EyTZdLObcNHWcts
 NdzOQ9S5UDcYN5OYfCM183cf/JmDBJB3Q01ms/1L2rhpLnoYA/Mj2BDzF+82FsMK
 0BrPm7vX28a1mUVgLgpEpk1VEhJUJxzrmK8xogV9dU6vyGWmXPWhmBolLbPCx3SY
 6dQ6u8v8tdl54CXStFuP
 =wAWA
 -----END PGP SIGNATURE-----

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull infiniband updates from Roland Dreier:
 "Main batch of InfiniBand/RDMA changes for 3.14:
   - Flow steering for InfiniBand UD traffic
   - IP-based addressing for IBoE aka RoCE
   - Pass SRP submaintainership from Dave to Bart
   - SRP transport fixes from Bart
   - Add the new Cisco usNIC low-level device driver
   - Various other fixes"

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (75 commits)
  IB/mlx5: Verify reserved fields are cleared
  IB/mlx5: Remove old field for create mkey mailbox
  IB/mlx5: Abort driver cleanup if teardown hca fails
  IB/mlx5: Allow creation of QPs with zero-length work queues
  mlx5_core: Fix PowerPC support
  mlx5_core: Improve debugfs readability
  IB/mlx5: Add support for resize CQ
  IB/mlx5: Implement modify CQ
  IB/mlx5: Make sure doorbell record is visible before doorbell
  mlx5_core: Use mlx5 core style warning
  IB/mlx5: Clear out struct before create QP command
  mlx5_core: Fix out arg size in access_register command
  RDMA/nes: Slight optimization of Ethernet address compare
  IB/qib: Fix QP check when looping back to/from QP1
  RDMA/cxgb4: Fix gcc warning on 32-bit arch
  IB/usnic: Remove unused includes of <linux/version.h>
  RDMA/amso1100: Add check if cache memory was allocated before freeing it
  IPoIB: Report operstate consistently when brought up without a link
  IB/core: Fix unused variable warning
  RDMA/cma: Handle global/non-linklocal IPv6 addresses in cma_check_linklocal()
  ...
This commit is contained in:
Linus Torvalds 2014-01-24 17:18:32 -08:00
commit 8e585a6c4a
100 changed files with 7678 additions and 582 deletions

View File

@ -0,0 +1,7 @@
all: rport_state_diagram.svg rport_state_diagram.png
rport_state_diagram.svg: rport_state_diagram.dot
dot -Tsvg -o $@ $<
rport_state_diagram.png: rport_state_diagram.dot
dot -Tpng -o $@ $<

View File

@ -0,0 +1,26 @@
digraph srp_initiator {
node [shape = doublecircle]; running lost;
node [shape = circle];
{
rank = min;
running_rta [ label = "running;\nreconnect\ntimer\nactive" ];
};
running [ label = "running;\nreconnect\ntimer\nstopped" ];
blocked;
failfast [ label = "fail I/O\nfast" ];
lost;
running -> running_rta [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nsrp_start_tl_fail_timers()" ];
running_rta -> running [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nreconnecting succeeded" ];
running -> blocked [ label = "fast_io_fail_tmo >= 0 or\ndev_loss_tmo >= 0;\nsrp_start_tl_fail_timers()" ];
running -> failfast [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nreconnecting failed\n" ];
blocked -> failfast [ label = "fast_io_fail_tmo\nexpired or\nreconnecting\nfailed" ];
blocked -> lost [ label = "dev_loss_tmo\nexpired or\nsrp_stop_rport_timers()" ];
failfast -> lost [ label = "dev_loss_tmo\nexpired or\nsrp_stop_rport_timers()" ];
blocked -> running [ label = "reconnecting\nsucceeded" ];
failfast -> failfast [ label = "reconnecting\nfailed" ];
failfast -> running [ label = "reconnecting\nsucceeded" ];
running -> lost [ label = "srp_stop_rport_timers()" ];
running_rta -> lost [ label = "srp_stop_rport_timers()" ];
}

View File

@ -2195,6 +2195,11 @@ M: Nishank Trivedi <nistrive@cisco.com>
S: Supported
F: drivers/net/ethernet/cisco/enic/
CISCO VIC LOW LATENCY NIC DRIVER
M: Upinder Malhi <umalhi@cisco.com>
S: Supported
F: drivers/infiniband/hw/usnic
CIRRUS LOGIC EP93XX ETHERNET DRIVER
M: Hartley Sweeten <hsweeten@visionengravers.com>
L: netdev@vger.kernel.org
@ -7528,7 +7533,7 @@ S: Maintained
F: drivers/scsi/sr*
SCSI RDMA PROTOCOL (SRP) INITIATOR
M: David Dillow <dillowda@ornl.gov>
M: Bart Van Assche <bvanassche@acm.org>
L: linux-rdma@vger.kernel.org
S: Supported
W: http://www.openfabrics.org

View File

@ -3,6 +3,8 @@ menuconfig INFINIBAND
depends on PCI || BROKEN
depends on HAS_IOMEM
depends on NET
depends on INET
depends on m || IPV6 != m
---help---
Core support for InfiniBand (IB). Make sure to also select
any protocols you wish to use as well as drivers for your
@ -38,8 +40,7 @@ config INFINIBAND_USER_MEM
config INFINIBAND_ADDR_TRANS
bool
depends on INET
depends on !(INFINIBAND = y && IPV6 = m)
depends on INFINIBAND
default y
source "drivers/infiniband/hw/mthca/Kconfig"
@ -53,6 +54,7 @@ source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/nes/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/ulp/ipoib/Kconfig"

View File

@ -10,6 +10,7 @@ obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/
obj-$(CONFIG_INFINIBAND_NES) += hw/nes/
obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/
obj-$(CONFIG_INFINIBAND_USNIC) += hw/usnic/
obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
obj-$(CONFIG_INFINIBAND_SRPT) += ulp/srpt/

View File

@ -1,8 +1,9 @@
infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o
user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
ib_cm.o iw_cm.o $(infiniband-y)
ib_cm.o iw_cm.o ib_addr.o \
$(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)

View File

@ -86,6 +86,8 @@ int rdma_addr_size(struct sockaddr *addr)
}
EXPORT_SYMBOL(rdma_addr_size);
static struct rdma_addr_client self;
void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
@ -119,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
}
EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
{
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
@ -142,6 +145,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
@ -153,6 +158,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
&((struct sockaddr_in6 *) addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
break;
}
}
@ -238,7 +245,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
src_in->sin_addr.s_addr = fl4.saddr;
if (rt->dst.dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
@ -286,7 +293,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
}
if (dst->dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
@ -437,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
}
EXPORT_SYMBOL(rdma_addr_cancel);
struct resolve_cb_context {
struct rdma_dev_addr *addr;
struct completion comp;
};
static void resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
rdma_dev_addr));
complete(&((struct resolve_cb_context *)context)->comp);
}
int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
u16 *vlan_id)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
struct resolve_cb_context ctx;
struct net_device *dev;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
if (ret)
return ret;
ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
if (ret)
return ret;
memset(&dev_addr, 0, sizeof(dev_addr));
ctx.addr = &dev_addr;
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx);
if (ret)
return ret;
wait_for_completion(&ctx.comp);
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
if (!dev)
return -ENODEV;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} gid_addr;
ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
if (ret)
return ret;
memset(&dev_addr, 0, sizeof(dev_addr));
ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
if (ret)
return ret;
memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
@ -461,11 +550,13 @@ static int __init addr_init(void)
return -ENOMEM;
register_netevent_notifier(&nb);
rdma_addr_register_client(&self);
return 0;
}
static void __exit addr_cleanup(void)
{
rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}

View File

@ -47,6 +47,7 @@
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <linux/kdev_t.h>
#include <linux/etherdevice.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
@ -177,6 +178,8 @@ struct cm_av {
struct ib_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
u8 valid;
u8 smac[ETH_ALEN];
};
struct cm_work {
@ -346,6 +349,23 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
{
struct cm_id_private *cm_id_priv;
cm_id_priv = container_of(id, struct cm_id_private, id);
if (smac != NULL)
memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac));
if (alt_smac != NULL)
memcpy(cm_id_priv->alt_av.smac, alt_smac,
sizeof(cm_id_priv->alt_av.smac));
return 0;
}
EXPORT_SYMBOL(ib_update_cm_av);
static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
{
struct cm_device *cm_dev;
@ -376,6 +396,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
memcpy(av->smac, path->smac, sizeof(av->smac));
av->valid = 1;
return 0;
}
@ -1554,6 +1577,9 @@ static int cm_req_handler(struct cm_work *work)
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
if (ret) {
ib_get_cached_gid(work->port->cm_dev->ib_device,
@ -3500,6 +3526,32 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
if (!cm_id_priv->av.valid) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return -EINVAL;
}
if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
*qp_attr_mask |= IB_QP_VID;
}
if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
memcpy(qp_attr->smac, cm_id_priv->av.smac,
sizeof(qp_attr->smac));
*qp_attr_mask |= IB_QP_SMAC;
}
if (cm_id_priv->alt_av.valid) {
if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
qp_attr->alt_vlan_id =
cm_id_priv->alt_av.ah_attr.vlan_id;
*qp_attr_mask |= IB_QP_ALT_VID;
}
if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
memcpy(qp_attr->alt_smac,
cm_id_priv->alt_av.smac,
sizeof(qp_attr->alt_smac));
*qp_attr_mask |= IB_QP_ALT_SMAC;
}
}
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);

View File

@ -340,7 +340,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
int ret;
if (addr->sa_family != AF_IB) {
ret = rdma_translate_ip(addr, dev_addr);
ret = rdma_translate_ip(addr, dev_addr, NULL);
} else {
cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
ret = 0;
@ -365,7 +365,9 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
return -EINVAL;
mutex_lock(&lock);
iboe_addr_get_sgid(dev_addr, &iboe_gid);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid);
if (listen_id_priv &&
@ -603,6 +605,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
union ib_gid sgid;
mutex_lock(&id_priv->qp_mutex);
if (!id_priv->id.qp) {
@ -625,6 +628,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
if (ret)
goto out;
ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
qp_attr.ah_attr.grh.sgid_index, &sgid);
if (ret)
goto out;
if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
== RDMA_TRANSPORT_IB &&
rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
== IB_LINK_LAYER_ETHERNET) {
ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
if (ret)
goto out;
}
if (conn_param)
qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@ -725,6 +742,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
else
ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
qp_attr_mask);
if (qp_attr->qp_state == IB_QPS_RTR)
qp_attr->rq_psn = id_priv->seq_num;
break;
@ -1266,6 +1284,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
int offset, ret;
u8 smac[ETH_ALEN];
u8 alt_smac[ETH_ALEN];
u8 *psmac = smac;
u8 *palt_smac = alt_smac;
int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
RDMA_TRANSPORT_IB) &&
(rdma_port_get_link_layer(cm_id->device,
ib_event->param.req_rcvd.port) ==
IB_LINK_LAYER_ETHERNET));
listen_id = cm_id->context;
if (!cma_check_req_qp_type(&listen_id->id, ib_event))
@ -1310,12 +1337,29 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
if (ret)
goto err3;
if (is_iboe) {
if (ib_event->param.req_rcvd.primary_path != NULL)
rdma_addr_find_smac_by_sgid(
&ib_event->param.req_rcvd.primary_path->sgid,
psmac, NULL);
else
psmac = NULL;
if (ib_event->param.req_rcvd.alternate_path != NULL)
rdma_addr_find_smac_by_sgid(
&ib_event->param.req_rcvd.alternate_path->sgid,
palt_smac, NULL);
else
palt_smac = NULL;
}
/*
* Acquire mutex to prevent user executing rdma_destroy_id()
* while we're accessing the cm_id.
*/
mutex_lock(&lock);
if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
if (is_iboe)
ib_update_cm_av(cm_id, psmac, palt_smac);
if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
(conn_id->id.qp_type != IB_QPT_UD))
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
@ -1474,7 +1518,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@ -1873,7 +1917,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
u16 vid;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
@ -1897,10 +1941,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
vid = rdma_vlan_dev_vlan_id(ndev);
route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
route->path_rec->hop_limit = 1;
route->path_rec->reversible = 1;
@ -2063,6 +2111,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
RDMA_CM_ADDR_RESOLVED))
goto out;
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
if (!status && !id_priv->cma_dev)
status = cma_acquire_dev(id_priv, NULL);
@ -2072,10 +2121,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
goto out;
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = status;
} else {
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
} else
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
}
if (id_priv->id.event_handler(&id_priv->id, &event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
@ -2480,8 +2527,11 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
return 0;
sin6 = (struct sockaddr_in6 *) addr;
if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
!sin6->sin6_scope_id)
if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
return 0;
if (!sin6->sin6_scope_id)
return -EINVAL;
dev_addr->bound_dev_if = sin6->sin6_scope_id;
@ -2556,6 +2606,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err1;
memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
if (!cma_any_addr(addr)) {
ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
if (ret)
@ -2566,7 +2617,6 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
goto err1;
}
memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
if (addr->sa_family == AF_INET)
id_priv->afonly = 1;
@ -3295,7 +3345,8 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -EINVAL;
goto out2;
}
iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&mc->multicast.ib->rec.port_gid);
work->id = id_priv;
work->mc = mc;
INIT_WORK(&work->work, iboe_mcast_work_handler);

View File

@ -49,4 +49,6 @@ void ib_sysfs_cleanup(void);
int ib_cache_setup(void);
void ib_cache_cleanup(void);
int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
struct ib_qp_attr *qp_attr, int *qp_attr_mask);
#endif /* _CORE_PRIV_H */

View File

@ -334,7 +334,6 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
unsigned long flags;
int ret;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
/*
@ -350,7 +349,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
/* destroy the listening endpoint */
ret = cm_id->device->iwcm->destroy_listen(cm_id);
cm_id->device->iwcm->destroy_listen(cm_id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_ESTABLISHED:

View File

@ -42,7 +42,7 @@
#include <linux/kref.h>
#include <linux/idr.h>
#include <linux/workqueue.h>
#include <uapi/linux/if_ether.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_cache.h>
#include "sa.h"
@ -556,6 +556,13 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
}
if (force_grh) {
memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
ah_attr->vlan_id = rec->vlan_id;
} else {
ah_attr->vlan_id = 0xffff;
}
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
@ -670,6 +677,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
mad->data, &rec);
rec.vlan_id = 0xffff;
memset(rec.dmac, 0, ETH_ALEN);
memset(rec.smac, 0, ETH_ALEN);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);

View File

@ -613,6 +613,7 @@ static ssize_t show_node_type(struct device *device,
case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);

View File

@ -655,24 +655,14 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
struct rdma_route *route)
{
struct rdma_dev_addr *dev_addr;
struct net_device *dev;
u16 vid = 0;
resp->num_paths = route->num_paths;
switch (route->num_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
if (dev) {
vid = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
}
iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
dev_addr->dst_dev_addr, vid);
iboe_addr_get_sgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].sgid);
rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
(union ib_gid *)&resp->ib_route[0].dgid);
rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
(union ib_gid *)&resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(0xffff);
break;
case 2:

View File

@ -40,6 +40,7 @@
#include <asm/uaccess.h>
#include "uverbs.h"
#include "core_priv.h"
struct uverbs_lock_class {
struct lock_class_key key;
@ -1961,6 +1962,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
if (qp->real_qp == qp) {
ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask);
if (ret)
goto out;
ret = qp->device->modify_qp(qp, attr,
modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
} else {

View File

@ -44,6 +44,9 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
#include "core_priv.h"
int ib_rate_to_mult(enum ib_rate rate)
{
@ -116,6 +119,8 @@ rdma_node_get_transport(enum rdma_node_type node_type)
return RDMA_TRANSPORT_IWARP;
case RDMA_NODE_USNIC:
return RDMA_TRANSPORT_USNIC;
case RDMA_NODE_USNIC_UDP:
return RDMA_TRANSPORT_USNIC_UDP;
default:
BUG();
return 0;
@ -133,6 +138,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
return IB_LINK_LAYER_INFINIBAND;
case RDMA_TRANSPORT_IWARP:
case RDMA_TRANSPORT_USNIC:
case RDMA_TRANSPORT_USNIC_UDP:
return IB_LINK_LAYER_ETHERNET;
default:
return IB_LINK_LAYER_UNSPECIFIED;
@ -192,8 +198,28 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
u32 flow_class;
u16 gid_index;
int ret;
int is_eth = (rdma_port_get_link_layer(device, port_num) ==
IB_LINK_LAYER_ETHERNET);
memset(ah_attr, 0, sizeof *ah_attr);
if (is_eth) {
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
if (wc->wc_flags & IB_WC_WITH_SMAC &&
wc->wc_flags & IB_WC_WITH_VLAN) {
memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
ah_attr->vlan_id = wc->vlan_id;
} else {
ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
ah_attr->dmac, &ah_attr->vlan_id);
if (ret)
return ret;
}
} else {
ah_attr->vlan_id = 0xffff;
}
ah_attr->dlid = wc->slid;
ah_attr->sl = wc->sl;
ah_attr->src_path_bits = wc->dlid_path_bits;
@ -476,7 +502,9 @@ EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
enum ib_qp_attr_mask req_param[IB_QPT_MAX];
enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@ -557,6 +585,12 @@ static const struct {
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
},
.req_param_add_eth = {
[IB_QPT_RC] = (IB_QP_SMAC),
[IB_QPT_UC] = (IB_QP_SMAC),
[IB_QPT_XRC_INI] = (IB_QP_SMAC),
[IB_QPT_XRC_TGT] = (IB_QP_SMAC)
},
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
@ -576,7 +610,21 @@ static const struct {
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
}
},
.opt_param_add_eth = {
[IB_QPT_RC] = (IB_QP_ALT_SMAC |
IB_QP_VID |
IB_QP_ALT_VID),
[IB_QPT_UC] = (IB_QP_ALT_SMAC |
IB_QP_VID |
IB_QP_ALT_VID),
[IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
IB_QP_VID |
IB_QP_ALT_VID),
[IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
IB_QP_VID |
IB_QP_ALT_VID)
}
}
},
[IB_QPS_RTR] = {
@ -779,7 +827,8 @@ static const struct {
};
int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
enum ib_qp_type type, enum ib_qp_attr_mask mask)
enum ib_qp_type type, enum ib_qp_attr_mask mask,
enum rdma_link_layer ll)
{
enum ib_qp_attr_mask req_param, opt_param;
@ -798,6 +847,13 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
if (ll == IB_LINK_LAYER_ETHERNET) {
req_param |= qp_state_table[cur_state][next_state].
req_param_add_eth[type];
opt_param |= qp_state_table[cur_state][next_state].
opt_param_add_eth[type];
}
if ((mask & req_param) != req_param)
return 0;
@ -808,10 +864,51 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
struct ib_qp_attr *qp_attr, int *qp_attr_mask)
{
int ret = 0;
union ib_gid sgid;
if ((*qp_attr_mask & IB_QP_AV) &&
(rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) {
ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num,
qp_attr->ah_attr.grh.sgid_index, &sgid);
if (ret)
goto out;
if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
} else {
ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
if (ret)
goto out;
ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL);
if (ret)
goto out;
}
*qp_attr_mask |= IB_QP_SMAC;
if (qp_attr->vlan_id < 0xFFFF)
*qp_attr_mask |= IB_QP_VID;
}
out:
return ret;
}
EXPORT_SYMBOL(ib_resolve_eth_l2_attrs);
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
int ret;
ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask);
if (ret)
return ret;
return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);

View File

@ -169,7 +169,8 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
* We should never get here, as the adapter should
* never send us a reply that we're not expecting.
*/
vq_repbuf_free(c2dev, host_msg);
if (reply_msg != NULL)
vq_repbuf_free(c2dev, host_msg);
pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
return;
}

View File

@ -76,7 +76,7 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
INIT_ULPTX_WR(req, wr_len, 0, 0);
req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) |
(wait ? FW_WR_COMPL(1) : 0));
req->wr.wr_lo = wait ? (__force __be64)&wr_wait : 0;
req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L;
req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16)));
req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE));
req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1));

View File

@ -1329,7 +1329,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
if (!smi_reset2init &&
!ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
attr_mask)) {
attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
ret = -EINVAL;
ehca_err(ibqp->device,
"Invalid qp transition new_state=%x cur_state=%x "

View File

@ -463,7 +463,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask))
attr_mask, IB_LINK_LAYER_UNSPECIFIED))
goto inval;
if (attr_mask & IB_QP_AV) {

View File

@ -1,6 +1,6 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on NETDEVICES && ETHERNET && PCI
depends on NETDEVICES && ETHERNET && PCI && INET
select NET_VENDOR_MELLANOX
select MLX4_CORE
---help---

View File

@ -39,25 +39,6 @@
#include "mlx4_ib.h"
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port)
{
struct in6_addr in6;
*is_mcast = 0;
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
if (rdma_link_local_addr(&in6))
rdma_get_ll_mac(&in6, mac);
else if (rdma_is_multicast_addr(&in6)) {
rdma_get_mcast_mac(&in6, mac);
*is_mcast = 1;
} else
return -EINVAL;
return 0;
}
static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
{
@ -92,21 +73,18 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
union ib_gid sgid;
u8 mac[6];
int err;
int is_mcast;
struct in6_addr in6;
u16 vlan_tag;
err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
if (err)
return ERR_PTR(err);
memcpy(ah->av.eth.mac, mac, 6);
err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid);
if (err)
return ERR_PTR(err);
vlan_tag = rdma_get_vlan_id(&sgid);
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6)) {
is_mcast = 1;
rdma_get_mcast_mac(&in6, ah->av.eth.mac);
} else {
memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
}
vlan_tag = ah_attr->vlan_id;
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));

View File

@ -798,6 +798,15 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
else
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) {
wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
MLX4_CQE_VID_MASK;
} else {
wc->vlan_id = 0xffff;
}
wc->wc_flags |= IB_WC_WITH_VLAN;
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->wc_flags |= IB_WC_WITH_SMAC;
}
return 0;

View File

@ -39,6 +39,8 @@
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
@ -55,6 +57,7 @@
#define DRV_RELDATE "April 4, 2008"
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
@ -92,21 +95,27 @@ static union ib_gid zgid;
static int check_flow_steering_support(struct mlx4_dev *dev)
{
int eth_num_ports = 0;
int ib_num_ports = 0;
int i;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_num_ports++;
int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
if (ib_num_ports || mlx4_is_mfunc(dev)) {
pr_warn("Device managed flow steering is unavailable "
"for IB ports or in multifunction env.\n");
return 0;
if (dmfs) {
int i;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
eth_num_ports++;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_num_ports++;
dmfs &= (!ib_num_ports ||
(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
(!eth_num_ports ||
(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
if (ib_num_ports && mlx4_is_mfunc(dev)) {
pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
dmfs = 0;
}
return 1;
}
return 0;
return dmfs;
}
static int mlx4_ib_query_device(struct ib_device *ibdev,
@ -165,7 +174,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
else
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
if (check_flow_steering_support(dev->dev))
if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
}
@ -787,7 +796,6 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid)
{
u8 mac[6];
struct net_device *ndev;
int ret = 0;
@ -801,11 +809,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
spin_unlock(&mdev->iboe.lock);
if (ndev) {
rdma_get_mcast_mac((struct in6_addr *)gid, mac);
rtnl_lock();
dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
ret = 1;
rtnl_unlock();
dev_put(ndev);
}
@ -819,6 +823,7 @@ struct mlx4_ib_steering {
};
static int parse_flow_attr(struct mlx4_dev *dev,
u32 qp_num,
union ib_flow_spec *ib_spec,
struct _rule_hw *mlx4_spec)
{
@ -834,6 +839,14 @@ static int parse_flow_attr(struct mlx4_dev *dev,
mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
break;
case IB_FLOW_SPEC_IB:
type = MLX4_NET_TRANS_RULE_ID_IB;
mlx4_spec->ib.l3_qpn =
cpu_to_be32(qp_num);
mlx4_spec->ib.qpn_mask =
cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
break;
case IB_FLOW_SPEC_IPV4:
type = MLX4_NET_TRANS_RULE_ID_IPV4;
@ -865,6 +878,115 @@ static int parse_flow_attr(struct mlx4_dev *dev,
return mlx4_hw_rule_sz(dev, type);
}
struct default_rules {
__u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
__u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
__u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
__u8 link_layer;
};
static const struct default_rules default_table[] = {
{
.mandatory_fields = {IB_FLOW_SPEC_IPV4},
.mandatory_not_fields = {IB_FLOW_SPEC_ETH},
.rules_create_list = {IB_FLOW_SPEC_IB},
.link_layer = IB_LINK_LAYER_INFINIBAND
}
};
static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
struct ib_flow_attr *flow_attr)
{
int i, j, k;
void *ib_flow;
const struct default_rules *pdefault_rules = default_table;
u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
pdefault_rules++) {
__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
memset(&field_types, 0, sizeof(field_types));
if (link_layer != pdefault_rules->link_layer)
continue;
ib_flow = flow_attr + 1;
/* we assume the specs are sorted */
for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
j < flow_attr->num_of_specs; k++) {
union ib_flow_spec *current_flow =
(union ib_flow_spec *)ib_flow;
/* same layer but different type */
if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
(pdefault_rules->mandatory_fields[k] &
IB_FLOW_SPEC_LAYER_MASK)) &&
(current_flow->type !=
pdefault_rules->mandatory_fields[k]))
goto out;
/* same layer, try match next one */
if (current_flow->type ==
pdefault_rules->mandatory_fields[k]) {
j++;
ib_flow +=
((union ib_flow_spec *)ib_flow)->size;
}
}
ib_flow = flow_attr + 1;
for (j = 0; j < flow_attr->num_of_specs;
j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
/* same layer and same type */
if (((union ib_flow_spec *)ib_flow)->type ==
pdefault_rules->mandatory_not_fields[k])
goto out;
return i;
}
out:
return -1;
}
static int __mlx4_ib_create_default_rules(
struct mlx4_ib_dev *mdev,
struct ib_qp *qp,
const struct default_rules *pdefault_rules,
struct _rule_hw *mlx4_spec) {
int size = 0;
int i;
for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
sizeof(pdefault_rules->rules_create_list[0]); i++) {
int ret;
union ib_flow_spec ib_spec;
switch (pdefault_rules->rules_create_list[i]) {
case 0:
/* no rule */
continue;
case IB_FLOW_SPEC_IB:
ib_spec.type = IB_FLOW_SPEC_IB;
ib_spec.size = sizeof(struct ib_flow_spec_ib);
break;
default:
/* invalid rule */
return -EINVAL;
}
/* We must put empty rule, qpn is being ignored */
ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
mlx4_spec);
if (ret < 0) {
pr_info("invalid parsing\n");
return -EINVAL;
}
mlx4_spec = (void *)mlx4_spec + ret;
size += ret;
}
return size;
}
static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
int domain,
enum mlx4_net_trans_promisc_mode flow_type,
@ -876,6 +998,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
struct mlx4_ib_dev *mdev = to_mdev(qp->device);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
int default_flow;
static const u16 __mlx4_domain[] = {
[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
@ -910,8 +1033,21 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
ib_flow = flow_attr + 1;
size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
/* Add default flows */
default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
if (default_flow >= 0) {
ret = __mlx4_ib_create_default_rules(
mdev, qp, default_table + default_flow,
mailbox->buf + size);
if (ret < 0) {
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return -EINVAL;
}
size += ret;
}
for (i = 0; i < flow_attr->num_of_specs; i++) {
ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
mailbox->buf + size);
if (ret < 0) {
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return -EINVAL;
@ -1025,6 +1161,8 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
u64 reg_id;
struct mlx4_ib_steering *ib_steering = NULL;
enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@ -1036,7 +1174,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
!!(mqp->flags &
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
MLX4_PROT_IB_IPV6, &reg_id);
prot, &reg_id);
if (err)
goto err_malloc;
@ -1055,7 +1193,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err_add:
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
MLX4_PROT_IB_IPV6, reg_id);
prot, reg_id);
err_malloc:
kfree(ib_steering);
@ -1083,10 +1221,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
u8 mac[6];
struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
u64 reg_id = 0;
enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@ -1109,7 +1248,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
MLX4_PROT_IB_IPV6, reg_id);
prot, reg_id);
if (err)
return err;
@ -1121,13 +1260,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (ndev)
dev_hold(ndev);
spin_unlock(&mdev->iboe.lock);
rdma_get_mcast_mac((struct in6_addr *)gid, mac);
if (ndev) {
rtnl_lock();
dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
rtnl_unlock();
if (ndev)
dev_put(ndev);
}
list_del(&ge->list);
kfree(ge);
} else
@ -1223,20 +1357,6 @@ static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_board_id
};
static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
{
memcpy(eui, dev->dev_addr, 3);
memcpy(eui + 5, dev->dev_addr + 3, 3);
if (vlan_id < 0x1000) {
eui[3] = vlan_id >> 8;
eui[4] = vlan_id & 0xff;
} else {
eui[3] = 0xff;
eui[4] = 0xfe;
}
eui[0] ^= 2;
}
static void update_gids_task(struct work_struct *work)
{
struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
@ -1259,161 +1379,318 @@ static void update_gids_task(struct work_struct *work)
MLX4_CMD_WRAPPED);
if (err)
pr_warn("set port command failed\n");
else {
memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
else
mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
}
mlx4_free_cmd_mailbox(dev, mailbox);
kfree(gw);
}
static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
static void reset_gids_task(struct work_struct *work)
{
struct net_device *ndev = dev->iboe.netdevs[port - 1];
struct update_gid_work *work;
struct net_device *tmp;
struct update_gid_work *gw =
container_of(work, struct update_gid_work, work);
struct mlx4_cmd_mailbox *mailbox;
union ib_gid *gids;
int err;
int i;
u8 *hits;
int ret;
union ib_gid gid;
int free;
int found;
int need_update = 0;
u16 vid;
struct mlx4_dev *dev = gw->dev->dev;
work = kzalloc(sizeof *work, GFP_ATOMIC);
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
pr_warn("reset gid table failed\n");
goto free;
}
gids = mailbox->buf;
memcpy(gids, gw->gids, sizeof(gw->gids));
for (i = 1; i < gw->dev->num_ports + 1; i++) {
if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, i) ==
IB_LINK_LAYER_ETHERNET) {
err = mlx4_cmd(dev, mailbox->dma,
MLX4_SET_PORT_GID_TABLE << 8 | i,
1, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (err)
pr_warn(KERN_WARNING
"set port %d command failed\n", i);
}
}
mlx4_free_cmd_mailbox(dev, mailbox);
free:
kfree(gw);
}
static int update_gid_table(struct mlx4_ib_dev *dev, int port,
union ib_gid *gid, int clear)
{
struct update_gid_work *work;
int i;
int need_update = 0;
int free = -1;
int found = -1;
int max_gids;
max_gids = dev->dev->caps.gid_table_len[port];
for (i = 0; i < max_gids; ++i) {
if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
sizeof(*gid)))
found = i;
if (clear) {
if (found >= 0) {
need_update = 1;
dev->iboe.gid_table[port - 1][found] = zgid;
break;
}
} else {
if (found >= 0)
break;
if (free < 0 &&
!memcmp(&dev->iboe.gid_table[port - 1][i], &zgid,
sizeof(*gid)))
free = i;
}
}
if (found == -1 && !clear && free >= 0) {
dev->iboe.gid_table[port - 1][free] = *gid;
need_update = 1;
}
if (!need_update)
return 0;
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return -ENOMEM;
hits = kzalloc(128, GFP_ATOMIC);
if (!hits) {
ret = -ENOMEM;
goto out;
}
memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
INIT_WORK(&work->work, update_gids_task);
work->port = port;
work->dev = dev;
queue_work(wq, &work->work);
rcu_read_lock();
for_each_netdev_rcu(&init_net, tmp) {
if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
vid = rdma_vlan_dev_vlan_id(tmp);
mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
found = 0;
free = -1;
for (i = 0; i < 128; ++i) {
if (free < 0 &&
!memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
free = i;
if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
hits[i] = 1;
found = 1;
break;
}
}
return 0;
}
if (!found) {
if (tmp == ndev &&
(memcmp(&dev->iboe.gid_table[port - 1][0],
&gid, sizeof gid) ||
!memcmp(&dev->iboe.gid_table[port - 1][0],
&zgid, sizeof gid))) {
dev->iboe.gid_table[port - 1][0] = gid;
++need_update;
hits[0] = 1;
} else if (free >= 0) {
dev->iboe.gid_table[port - 1][free] = gid;
hits[free] = 1;
++need_update;
}
}
}
}
rcu_read_unlock();
static int reset_gid_table(struct mlx4_ib_dev *dev)
{
struct update_gid_work *work;
for (i = 0; i < 128; ++i)
if (!hits[i]) {
if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
++need_update;
dev->iboe.gid_table[port - 1][i] = zgid;
}
if (need_update) {
memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
INIT_WORK(&work->work, update_gids_task);
work->port = port;
work->dev = dev;
queue_work(wq, &work->work);
} else
kfree(work);
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return -ENOMEM;
memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table));
memset(work->gids, 0, sizeof(work->gids));
INIT_WORK(&work->work, reset_gids_task);
work->dev = dev;
queue_work(wq, &work->work);
return 0;
}
kfree(hits);
static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
struct mlx4_ib_dev *ibdev, union ib_gid *gid)
{
struct mlx4_ib_iboe *iboe;
int port = 0;
struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
rdma_vlan_dev_real_dev(event_netdev) :
event_netdev;
if (event != NETDEV_DOWN && event != NETDEV_UP)
return 0;
if ((real_dev != event_netdev) &&
(event == NETDEV_DOWN) &&
rdma_link_local_addr((struct in6_addr *)gid))
return 0;
iboe = &ibdev->iboe;
spin_lock(&iboe->lock);
for (port = 1; port <= MLX4_MAX_PORTS; ++port)
if ((netif_is_bond_master(real_dev) &&
(real_dev == iboe->masters[port - 1])) ||
(!netif_is_bond_master(real_dev) &&
(real_dev == iboe->netdevs[port - 1])))
update_gid_table(ibdev, port, gid,
event == NETDEV_DOWN);
spin_unlock(&iboe->lock);
return 0;
out:
kfree(work);
return ret;
}
static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
static u8 mlx4_ib_get_dev_port(struct net_device *dev,
struct mlx4_ib_dev *ibdev)
{
switch (event) {
case NETDEV_UP:
case NETDEV_CHANGEADDR:
update_ipv6_gids(dev, port, 0);
break;
u8 port = 0;
struct mlx4_ib_iboe *iboe;
struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
rdma_vlan_dev_real_dev(dev) : dev;
case NETDEV_DOWN:
update_ipv6_gids(dev, port, 1);
dev->iboe.netdevs[port - 1] = NULL;
}
iboe = &ibdev->iboe;
spin_lock(&iboe->lock);
for (port = 1; port <= MLX4_MAX_PORTS; ++port)
if ((netif_is_bond_master(real_dev) &&
(real_dev == iboe->masters[port - 1])) ||
(!netif_is_bond_master(real_dev) &&
(real_dev == iboe->netdevs[port - 1])))
break;
spin_unlock(&iboe->lock);
if ((port == 0) || (port > MLX4_MAX_PORTS))
return 0;
else
return port;
}
static void netdev_added(struct mlx4_ib_dev *dev, int port)
{
update_ipv6_gids(dev, port, 0);
}
static void netdev_removed(struct mlx4_ib_dev *dev, int port)
{
update_ipv6_gids(dev, port, 1);
}
static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct mlx4_ib_dev *ibdev;
struct in_ifaddr *ifa = ptr;
union ib_gid gid;
struct net_device *event_netdev = ifa->ifa_dev->dev;
ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
mlx4_ib_addr_event(event, event_netdev, ibdev, &gid);
return NOTIFY_DONE;
}
#if IS_ENABLED(CONFIG_IPV6)
static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct mlx4_ib_dev *ibdev;
struct inet6_ifaddr *ifa = ptr;
union ib_gid *gid = (union ib_gid *)&ifa->addr;
struct net_device *event_netdev = ifa->idev->dev;
ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6);
mlx4_ib_addr_event(event, event_netdev, ibdev, gid);
return NOTIFY_DONE;
}
#endif
static void mlx4_ib_get_dev_addr(struct net_device *dev,
struct mlx4_ib_dev *ibdev, u8 port)
{
struct in_device *in_dev;
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_dev *in6_dev;
union ib_gid *pgid;
struct inet6_ifaddr *ifp;
#endif
union ib_gid gid;
if ((port == 0) || (port > MLX4_MAX_PORTS))
return;
/* IPv4 gids */
in_dev = in_dev_get(dev);
if (in_dev) {
for_ifa(in_dev) {
/*ifa->ifa_address;*/
ipv6_addr_set_v4mapped(ifa->ifa_address,
(struct in6_addr *)&gid);
update_gid_table(ibdev, port, &gid, 0);
}
endfor_ifa(in_dev);
in_dev_put(in_dev);
}
#if IS_ENABLED(CONFIG_IPV6)
/* IPv6 gids */
in6_dev = in6_dev_get(dev);
if (in6_dev) {
read_lock_bh(&in6_dev->lock);
list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
pgid = (union ib_gid *)&ifp->addr;
update_gid_table(ibdev, port, pgid, 0);
}
read_unlock_bh(&in6_dev->lock);
in6_dev_put(in6_dev);
}
#endif
}
static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
{
struct net_device *dev;
if (reset_gid_table(ibdev))
return -1;
read_lock(&dev_base_lock);
for_each_netdev(&init_net, dev) {
u8 port = mlx4_ib_get_dev_port(dev, ibdev);
if (port)
mlx4_ib_get_dev_addr(dev, ibdev, port);
}
read_unlock(&dev_base_lock);
return 0;
}
static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev)
{
struct mlx4_ib_iboe *iboe;
int port;
iboe = &ibdev->iboe;
spin_lock(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
struct net_device *old_master = iboe->masters[port - 1];
struct net_device *curr_master;
iboe->netdevs[port - 1] =
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
if (iboe->netdevs[port - 1] &&
netif_is_bond_slave(iboe->netdevs[port - 1])) {
rtnl_lock();
iboe->masters[port - 1] = netdev_master_upper_dev_get(
iboe->netdevs[port - 1]);
rtnl_unlock();
}
curr_master = iboe->masters[port - 1];
/* if bonding is used it is possible that we add it to masters
only after IP address is assigned to the net bonding
interface */
if (curr_master && (old_master != curr_master))
mlx4_ib_get_dev_addr(curr_master, ibdev, port);
}
spin_unlock(&iboe->lock);
}
static int mlx4_ib_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mlx4_ib_dev *ibdev;
struct net_device *oldnd;
struct mlx4_ib_iboe *iboe;
int port;
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
iboe = &ibdev->iboe;
spin_lock(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
oldnd = iboe->netdevs[port - 1];
iboe->netdevs[port - 1] =
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
if (oldnd != iboe->netdevs[port - 1]) {
if (iboe->netdevs[port - 1])
netdev_added(ibdev, port);
else
netdev_removed(ibdev, port);
}
}
if (dev == iboe->netdevs[0] ||
(iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
handle_en_event(ibdev, 1, event);
else if (dev == iboe->netdevs[1]
|| (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
handle_en_event(ibdev, 2, event);
spin_unlock(&iboe->lock);
mlx4_ib_scan_netdevs(ibdev);
return NOTIFY_DONE;
}
@ -1682,6 +1959,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
}
if (check_flow_steering_support(dev)) {
ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
@ -1710,8 +1988,35 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
MLX4_IB_UC_STEER_QPN_ALIGN,
&ibdev->steer_qpn_base);
if (err)
goto err_counter;
ibdev->ib_uc_qpns_bitmap =
kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
sizeof(long),
GFP_KERNEL);
if (!ibdev->ib_uc_qpns_bitmap) {
dev_err(&dev->pdev->dev, "bit map alloc failed\n");
goto err_steer_qp_release;
}
bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_base +
ibdev->steer_qpn_count - 1);
if (err)
goto err_steer_free_bitmap;
}
if (ib_register_device(&ibdev->ib_dev, NULL))
goto err_counter;
goto err_steer_free_bitmap;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
@ -1719,11 +2024,35 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
if (err)
goto err_sriov;
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
if (err) {
iboe->nb.notifier_call = NULL;
goto err_notif;
}
}
if (!iboe->nb_inet.notifier_call) {
iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
err = register_inetaddr_notifier(&iboe->nb_inet);
if (err) {
iboe->nb_inet.notifier_call = NULL;
goto err_notif;
}
}
#if IS_ENABLED(CONFIG_IPV6)
if (!iboe->nb_inet6.notifier_call) {
iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event;
err = register_inet6addr_notifier(&iboe->nb_inet6);
if (err) {
iboe->nb_inet6.notifier_call = NULL;
goto err_notif;
}
}
#endif
mlx4_ib_scan_netdevs(ibdev);
mlx4_ib_init_gid_table(ibdev);
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@ -1749,11 +2078,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
return ibdev;
err_notif:
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
if (ibdev->iboe.nb_inet.notifier_call) {
if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb_inet.notifier_call = NULL;
}
#if IS_ENABLED(CONFIG_IPV6)
if (ibdev->iboe.nb_inet6.notifier_call) {
if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb_inet6.notifier_call = NULL;
}
#endif
flush_workqueue(wq);
err_sriov:
mlx4_ib_close_sriov(ibdev);
err_mad:
@ -1762,6 +2105,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
err_reg:
ib_unregister_device(&ibdev->ib_dev);
err_steer_free_bitmap:
kfree(ibdev->ib_uc_qpns_bitmap);
err_steer_qp_release:
if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
err_counter:
for (; i; --i)
if (ibdev->counters[i - 1] != -1)
@ -1782,6 +2132,69 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
return NULL;
}
int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
{
int offset;
WARN_ON(!dev->ib_uc_qpns_bitmap);
offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
dev->steer_qpn_count,
get_count_order(count));
if (offset < 0)
return offset;
*qpn = dev->steer_qpn_base + offset;
return 0;
}
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
{
if (!qpn ||
dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
return;
BUG_ON(qpn < dev->steer_qpn_base);
bitmap_release_region(dev->ib_uc_qpns_bitmap,
qpn - dev->steer_qpn_base,
get_count_order(count));
}
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach)
{
int err;
size_t flow_size;
struct ib_flow_attr *flow = NULL;
struct ib_flow_spec_ib *ib_spec;
if (is_attach) {
flow_size = sizeof(struct ib_flow_attr) +
sizeof(struct ib_flow_spec_ib);
flow = kzalloc(flow_size, GFP_KERNEL);
if (!flow)
return -ENOMEM;
flow->port = mqp->port;
flow->num_of_specs = 1;
flow->size = flow_size;
ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
ib_spec->type = IB_FLOW_SPEC_IB;
ib_spec->size = sizeof(struct ib_flow_spec_ib);
/* Add an empty rule for IB L2 */
memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
IB_FLOW_DOMAIN_NIC,
MLX4_FS_REGULAR,
&mqp->reg_id);
} else {
err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
}
kfree(flow);
return err;
}
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
@ -1795,6 +2208,26 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
kfree(ibdev->ib_uc_qpns_bitmap);
}
if (ibdev->iboe.nb_inet.notifier_call) {
if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb_inet.notifier_call = NULL;
}
#if IS_ENABLED(CONFIG_IPV6)
if (ibdev->iboe.nb_inet6.notifier_call) {
if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb_inet6.notifier_call = NULL;
}
#endif
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
if (ibdev->counters[p] != -1)

View File

@ -68,6 +68,8 @@ enum {
/*module param to indicate if SM assigns the alias_GUID*/
extern int mlx4_ib_sm_guid_assign;
#define MLX4_IB_UC_STEER_QPN_ALIGN 1
#define MLX4_IB_UC_MAX_NUM_QPS 256
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
@ -153,6 +155,7 @@ struct mlx4_ib_wq {
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
@ -270,6 +273,7 @@ struct mlx4_ib_qp {
struct list_head gid_list;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
u64 reg_id;
};
@ -428,7 +432,10 @@ struct mlx4_ib_sriov {
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
struct net_device *masters[MLX4_MAX_PORTS];
struct notifier_block nb;
struct notifier_block nb_inet;
struct notifier_block nb_inet6;
union ib_gid gid_table[MLX4_MAX_PORTS][128];
};
@ -494,6 +501,10 @@ struct mlx4_ib_dev {
struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
struct pkey_mgt pkeys;
unsigned long *ib_uc_qpns_bitmap;
int steer_qpn_count;
int steer_qpn_base;
int steering_support;
};
struct ib_event_work {
@ -675,9 +686,6 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid, int netw_view);
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port);
static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
@ -752,5 +760,9 @@ void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
__be64 mlx4_ib_gen_node_guid(void);
int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
#endif /* MLX4_IB_H */

View File

@ -90,6 +90,21 @@ enum {
MLX4_RAW_QP_MSGMAX = 31,
};
#ifndef ETH_ALEN
#define ETH_ALEN 6
#endif
static inline u64 mlx4_mac_to_u64(u8 *addr)
{
u64 mac = 0;
int i;
for (i = 0; i < ETH_ALEN; i++) {
mac <<= 8;
mac |= addr[i];
}
return mac;
}
static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
[IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
@ -716,6 +731,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
if (dev->steering_support ==
MLX4_STEERING_MODE_DEVICE_MANAGED)
qp->flags |= MLX4_IB_QP_NETIF;
else
goto err;
}
err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
if (err)
goto err;
@ -765,7 +788,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->qp_type == IB_QPT_RAW_PACKET)
err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
else
err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
if (qp->flags & MLX4_IB_QP_NETIF)
err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
else
err = mlx4_qp_reserve_range(dev->dev, 1, 1,
&qpn);
if (err)
goto err_proxy;
}
@ -790,8 +817,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
return 0;
err_qpn:
if (!sqpn)
mlx4_qp_release_range(dev->dev, qpn, 1);
if (!sqpn) {
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_free(dev, qpn, 1);
else
mlx4_qp_release_range(dev->dev, qpn, 1);
}
err_proxy:
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
free_proxy_bufs(pd->device, qp);
@ -932,8 +963,12 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_qp_free(dev->dev, &qp->mqp);
if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) {
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1);
else
mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
}
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
@ -987,9 +1022,16 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
*/
if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
MLX4_IB_SRIOV_TUNNEL_QP |
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF))
return ERR_PTR(-EINVAL);
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
if (init_attr->qp_type != IB_QPT_UD)
return ERR_PTR(-EINVAL);
}
if (init_attr->create_flags &&
(udata ||
((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
@ -1144,16 +1186,15 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
struct mlx4_qp_path *path, u8 port)
static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
u8 port)
{
int err;
int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET;
u8 mac[6];
int is_mcast;
u16 vlan_tag;
int vidx;
int smac_index;
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
@ -1188,22 +1229,27 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
if (!(ah->ah_flags & IB_AH_GRH))
return -1;
err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
if (err)
return err;
memcpy(path->dmac, mac, 6);
memcpy(path->dmac, ah->dmac, ETH_ALEN);
path->ackto = MLX4_IB_LINK_TYPE_ETH;
/* use index 0 into MAC table for IBoE */
path->grh_mylmc &= 0x80;
/* find the index into MAC table for IBoE */
if (!is_zero_ether_addr((const u8 *)&smac)) {
if (mlx4_find_cached_mac(dev->dev, port, smac,
&smac_index))
return -ENOENT;
} else {
smac_index = 0;
}
vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
path->grh_mylmc &= 0x80 | smac_index;
path->feup |= MLX4_FEUP_FORCE_ETH_UP;
if (vlan_tag < 0x1000) {
if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
return -ENOENT;
path->vlan_index = vidx;
path->fl = 1 << 6;
path->feup |= MLX4_FVL_FORCE_ETH_VLAN;
}
} else
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
@ -1212,6 +1258,28 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
return 0;
}
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
enum ib_qp_attr_mask qp_attr_mask,
struct mlx4_qp_path *path, u8 port)
{
return _mlx4_set_path(dev, &qp->ah_attr,
mlx4_mac_to_u64((u8 *)qp->smac),
(qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
path, port);
}
static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
const struct ib_qp_attr *qp,
enum ib_qp_attr_mask qp_attr_mask,
struct mlx4_qp_path *path, u8 port)
{
return _mlx4_set_path(dev, &qp->alt_ah_attr,
mlx4_mac_to_u64((u8 *)qp->alt_smac),
(qp_attr_mask & IB_QP_ALT_VID) ?
qp->alt_vlan_id : 0xffff,
path, port);
}
static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
struct mlx4_ib_gid_entry *ge, *tmp;
@ -1235,6 +1303,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
int sqd_event;
int steer_qp = 0;
int err = -EINVAL;
context = kzalloc(sizeof *context, GFP_KERNEL);
@ -1319,6 +1388,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
} else
context->pri_path.counter_index = 0xff;
if (qp->flags & MLX4_IB_QP_NETIF) {
mlx4_ib_steer_qp_reg(dev, qp, 1);
steer_qp = 1;
}
}
if (attr_mask & IB_QP_PKEY_INDEX) {
@ -1329,7 +1403,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_AV) {
if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
if (mlx4_set_path(dev, attr, attr_mask, &context->pri_path,
attr_mask & IB_QP_PORT ?
attr->port_num : qp->port))
goto out;
@ -1352,8 +1426,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
dev->dev->caps.pkey_table_len[attr->alt_port_num])
goto out;
if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
attr->alt_port_num))
if (mlx4_set_alt_path(dev, attr, attr_mask, &context->alt_path,
attr->alt_port_num))
goto out;
context->alt_path.pkey_index = attr->alt_pkey_index;
@ -1464,6 +1538,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
MLX4_IB_LINK_TYPE_ETH;
if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
int is_eth = rdma_port_get_link_layer(
&dev->ib_dev, qp->port) ==
IB_LINK_LAYER_ETHERNET;
if (is_eth) {
context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH;
optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH;
}
}
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
@ -1547,9 +1632,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
qp->sq_next_wqe = 0;
if (qp->rq.wqe_cnt)
*qp->db.db = 0;
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_reg(dev, qp, 0);
}
out:
if (err && steer_qp)
mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
return err;
}
@ -1561,13 +1651,21 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
int ll;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
ll = IB_LINK_LAYER_UNSPECIFIED;
} else {
int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
ll = rdma_port_get_link_layer(&dev->ib_dev, port);
}
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask, ll)) {
pr_debug("qpn 0x%x: invalid attribute mask specified "
"for transition %d to %d. qp_type %d,"
" attr_mask 0x%x\n",
@ -1784,8 +1882,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
return err;
}
vlan = rdma_get_vlan_id(&sgid);
is_vlan = vlan < 0x1000;
if (ah->av.eth.vlan != 0xffff) {
vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
is_vlan = 1;
}
}
ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
@ -2762,6 +2862,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
if (qp->flags & MLX4_IB_QP_LSO)
qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
if (qp->flags & MLX4_IB_QP_NETIF)
qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP;
qp_init_attr->sq_sig_type =
qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;

View File

@ -582,8 +582,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
p->pkey_group.attrs =
alloc_group_attrs(show_port_pkey, store_port_pkey,
dev->dev->caps.pkey_table_len[port_num]);
if (!p->pkey_group.attrs)
if (!p->pkey_group.attrs) {
ret = -ENOMEM;
goto err_alloc;
}
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
if (ret)
@ -591,8 +593,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
p->gid_group.name = "gid_idx";
p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
if (!p->gid_group.attrs)
if (!p->gid_group.attrs) {
ret = -ENOMEM;
goto err_free_pkey;
}
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)

View File

@ -73,14 +73,24 @@ static void *get_cqe(struct mlx5_ib_cq *cq, int n)
return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
}
static u8 sw_ownership_bit(int n, int nent)
{
return (n & nent) ? 1 : 0;
}
static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
{
void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
struct mlx5_cqe64 *cqe64;
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
!((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
return cqe;
} else {
return NULL;
}
}
static void *next_cqe_sw(struct mlx5_ib_cq *cq)
@ -351,6 +361,11 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
qp->sq.last_poll = tail;
}
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
mlx5_buf_free(&dev->mdev, &buf->buf);
}
static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_ib_qp **cur_qp,
struct ib_wc *wc)
@ -366,6 +381,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
void *cqe;
int idx;
repoll:
cqe = next_cqe_sw(cq);
if (!cqe)
return -EAGAIN;
@ -379,7 +395,18 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
*/
rmb();
/* TBD: resize CQ */
opcode = cqe64->op_own >> 4;
if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
if (likely(cq->resize_buf)) {
free_cq_buf(dev, &cq->buf);
cq->buf = *cq->resize_buf;
kfree(cq->resize_buf);
cq->resize_buf = NULL;
goto repoll;
} else {
mlx5_ib_warn(dev, "unexpected resize cqe\n");
}
}
qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
@ -398,7 +425,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
}
wc->qp = &(*cur_qp)->ibqp;
opcode = cqe64->op_own >> 4;
switch (opcode) {
case MLX5_CQE_REQ:
wq = &(*cur_qp)->sq;
@ -503,15 +529,11 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
return err;
buf->cqe_size = cqe_size;
buf->nent = nent;
return 0;
}
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
mlx5_buf_free(&dev->mdev, &buf->buf);
}
static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
struct ib_ucontext *context, struct mlx5_ib_cq *cq,
int entries, struct mlx5_create_cq_mbox_in **cqb,
@ -576,16 +598,16 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
ib_umem_release(cq->buf.umem);
}
static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
{
int i;
void *cqe;
struct mlx5_cqe64 *cqe64;
for (i = 0; i < nent; i++) {
cqe = get_cqe(cq, i);
cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
cqe64->op_own = 0xf1;
for (i = 0; i < buf->nent; i++) {
cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
cqe64->op_own = MLX5_CQE_INVALID << 4;
}
}
@ -610,7 +632,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (err)
goto err_db;
init_cq_buf(cq, entries);
init_cq_buf(cq, &cq->buf);
*inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
*cqb = mlx5_vzalloc(*inlen);
@ -818,12 +840,266 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
return -ENOSYS;
struct mlx5_modify_cq_mbox_in *in;
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
int err;
u32 fsel;
if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
return -ENOSYS;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return -ENOMEM;
in->cqn = cpu_to_be32(mcq->mcq.cqn);
fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
in->ctx.cq_period = cpu_to_be16(cq_period);
in->ctx.cq_max_count = cpu_to_be16(cq_count);
in->field_select = cpu_to_be32(fsel);
err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in));
kfree(in);
if (err)
mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
return err;
}
static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
int entries, struct ib_udata *udata, int *npas,
int *page_shift, int *cqe_size)
{
struct mlx5_ib_resize_cq ucmd;
struct ib_umem *umem;
int err;
int npages;
struct ib_ucontext *context = cq->buf.umem->context;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err)
return err;
if (ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(umem)) {
err = PTR_ERR(umem);
return err;
}
mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
npas, NULL);
cq->resize_umem = umem;
*cqe_size = ucmd.cqe_size;
return 0;
}
static void un_resize_user(struct mlx5_ib_cq *cq)
{
ib_umem_release(cq->resize_umem);
}
static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
int entries, int cqe_size)
{
int err;
cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
if (!cq->resize_buf)
return -ENOMEM;
err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
if (err)
goto ex;
init_cq_buf(cq, cq->resize_buf);
return 0;
ex:
kfree(cq->resize_buf);
return err;
}
static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
{
free_cq_buf(dev, cq->resize_buf);
cq->resize_buf = NULL;
}
static int copy_resize_cqes(struct mlx5_ib_cq *cq)
{
struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
struct mlx5_cqe64 *scqe64;
struct mlx5_cqe64 *dcqe64;
void *start_cqe;
void *scqe;
void *dcqe;
int ssize;
int dsize;
int i;
u8 sw_own;
ssize = cq->buf.cqe_size;
dsize = cq->resize_buf->cqe_size;
if (ssize != dsize) {
mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
return -EINVAL;
}
i = cq->mcq.cons_index;
scqe = get_sw_cqe(cq, i);
scqe64 = ssize == 64 ? scqe : scqe + 64;
start_cqe = scqe;
if (!scqe) {
mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
return -EINVAL;
}
while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
dcqe = get_cqe_from_buf(cq->resize_buf,
(i + 1) & (cq->resize_buf->nent),
dsize);
dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
memcpy(dcqe, scqe, dsize);
dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
++i;
scqe = get_sw_cqe(cq, i);
scqe64 = ssize == 64 ? scqe : scqe + 64;
if (!scqe) {
mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
return -EINVAL;
}
if (scqe == start_cqe) {
pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
cq->mcq.cqn);
return -ENOMEM;
}
}
++cq->mcq.cons_index;
return 0;
}
int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
{
return -ENOSYS;
struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
struct mlx5_ib_cq *cq = to_mcq(ibcq);
struct mlx5_modify_cq_mbox_in *in;
int err;
int npas;
int page_shift;
int inlen;
int uninitialized_var(cqe_size);
unsigned long flags;
if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
pr_info("Firmware does not support resize CQ\n");
return -ENOSYS;
}
if (entries < 1)
return -EINVAL;
entries = roundup_pow_of_two(entries + 1);
if (entries > dev->mdev.caps.max_cqes + 1)
return -EINVAL;
if (entries == ibcq->cqe + 1)
return 0;
mutex_lock(&cq->resize_mutex);
if (udata) {
err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
&cqe_size);
} else {
cqe_size = 64;
err = resize_kernel(dev, cq, entries, cqe_size);
if (!err) {
npas = cq->resize_buf->buf.npages;
page_shift = cq->resize_buf->buf.page_shift;
}
}
if (err)
goto ex;
inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
in = mlx5_vzalloc(inlen);
if (!in) {
err = -ENOMEM;
goto ex_resize;
}
if (udata)
mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
in->pas, 0);
else
mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE |
MLX5_MODIFY_CQ_MASK_PG_OFFSET |
MLX5_MODIFY_CQ_MASK_PG_SIZE);
in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
in->ctx.page_offset = 0;
in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
in->cqn = cpu_to_be32(cq->mcq.cqn);
err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen);
if (err)
goto ex_alloc;
if (udata) {
cq->ibcq.cqe = entries - 1;
ib_umem_release(cq->buf.umem);
cq->buf.umem = cq->resize_umem;
cq->resize_umem = NULL;
} else {
struct mlx5_ib_cq_buf tbuf;
int resized = 0;
spin_lock_irqsave(&cq->lock, flags);
if (cq->resize_buf) {
err = copy_resize_cqes(cq);
if (!err) {
tbuf = cq->buf;
cq->buf = *cq->resize_buf;
kfree(cq->resize_buf);
cq->resize_buf = NULL;
resized = 1;
}
}
cq->ibcq.cqe = entries - 1;
spin_unlock_irqrestore(&cq->lock, flags);
if (resized)
free_cq_buf(dev, &tbuf);
}
mutex_unlock(&cq->resize_mutex);
mlx5_vfree(in);
return 0;
ex_alloc:
mlx5_vfree(in);
ex_resize:
if (udata)
un_resize_user(cq);
else
un_resize_kernel(dev, cq);
ex:
mutex_unlock(&cq->resize_mutex);
return err;
}
int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)

View File

@ -541,6 +541,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
int gross_uuars;
int num_uars;
int uuarn;
int err;
@ -559,11 +560,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (req.total_num_uuars == 0)
return ERR_PTR(-EINVAL);
req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
req.total_num_uuars = ALIGN(req.total_num_uuars,
MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1)
return ERR_PTR(-EINVAL);
num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp;
resp.bf_reg_size = dev->mdev.caps.bf_reg_size;
resp.cache_line_size = L1_CACHE_BYTES;
@ -585,7 +588,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
goto out_ctx;
}
uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
sizeof(*uuari->bitmap),
GFP_KERNEL);
if (!uuari->bitmap) {
@ -595,13 +598,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
/*
* clear all fast path uuars
*/
for (i = 0; i < req.total_num_uuars; i++) {
for (i = 0; i < gross_uuars; i++) {
uuarn = i & 3;
if (uuarn == 2 || uuarn == 3)
set_bit(i, uuari->bitmap);
}
uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
if (!uuari->count) {
err = -ENOMEM;
goto out_bitmap;

View File

@ -195,6 +195,7 @@ struct mlx5_ib_cq_buf {
struct mlx5_buf buf;
struct ib_umem *umem;
int cqe_size;
int nent;
};
enum mlx5_ib_qp_flags {
@ -220,7 +221,7 @@ struct mlx5_ib_cq {
/* protect resize cq
*/
struct mutex resize_mutex;
struct mlx5_ib_cq_resize *resize_buf;
struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
};
@ -264,7 +265,6 @@ struct mlx5_ib_mr {
enum ib_wc_status status;
struct mlx5_ib_dev *dev;
struct mlx5_create_mkey_mbox_out out;
unsigned long start;
};
struct mlx5_ib_fast_reg_page_list {

View File

@ -146,7 +146,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
spin_lock_irq(&ent->lock);
ent->pending++;
spin_unlock_irq(&ent->lock);
mr->start = jiffies;
err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
sizeof(*in), reg_mr_callback,
mr, &mr->out);

View File

@ -340,14 +340,57 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return 1;
}
static int first_med_uuar(void)
{
return 1;
}
static int next_uuar(int n)
{
n++;
while (((n % 4) & 2))
n++;
return n;
}
static int num_med_uuar(struct mlx5_uuar_info *uuari)
{
int n;
n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
uuari->num_low_latency_uuars - 1;
return n >= 0 ? n : 0;
}
static int max_uuari(struct mlx5_uuar_info *uuari)
{
return uuari->num_uars * 4;
}
static int first_hi_uuar(struct mlx5_uuar_info *uuari)
{
int med;
int i;
int t;
med = num_med_uuar(uuari);
for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
t++;
if (t == med)
return next_uuar(i);
}
return 0;
}
static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
{
int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
int start_uuar;
int i;
start_uuar = nuuars - uuari->num_low_latency_uuars;
for (i = start_uuar; i < nuuars; i++) {
for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
if (!test_bit(i, uuari->bitmap)) {
set_bit(i, uuari->bitmap);
uuari->count[i]++;
@ -360,19 +403,10 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
{
int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
int minidx = 1;
int uuarn;
int end;
int minidx = first_med_uuar();
int i;
end = nuuars - uuari->num_low_latency_uuars;
for (i = 1; i < end; i++) {
uuarn = i & 3;
if (uuarn == 2 || uuarn == 3)
continue;
for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
if (uuari->count[i] < uuari->count[minidx])
minidx = i;
}
@ -489,12 +523,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
{
struct mlx5_ib_ucontext *context;
struct mlx5_ib_create_qp ucmd;
int page_shift;
int page_shift = 0;
int uar_index;
int npages;
u32 offset;
u32 offset = 0;
int uuarn;
int ncont;
int ncont = 0;
int err;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
@ -510,11 +544,16 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
if (uuarn < 0) {
mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
mlx5_ib_dbg(dev, "reverting to high latency\n");
uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
mlx5_ib_dbg(dev, "reverting to medium latency\n");
uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
if (uuarn < 0) {
mlx5_ib_dbg(dev, "uuar allocation failed\n");
return uuarn;
mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
mlx5_ib_dbg(dev, "reverting to high latency\n");
uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
if (uuarn < 0) {
mlx5_ib_warn(dev, "uuar allocation failed\n");
return uuarn;
}
}
}
@ -525,23 +564,29 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_uuar;
qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
qp->buf_size, 0, 0);
if (IS_ERR(qp->umem)) {
mlx5_ib_dbg(dev, "umem_get failed\n");
err = PTR_ERR(qp->umem);
goto err_uuar;
if (ucmd.buf_addr && qp->buf_size) {
qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
qp->buf_size, 0, 0);
if (IS_ERR(qp->umem)) {
mlx5_ib_dbg(dev, "umem_get failed\n");
err = PTR_ERR(qp->umem);
goto err_uuar;
}
} else {
qp->umem = NULL;
}
mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
&ncont, NULL);
err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
if (err) {
mlx5_ib_warn(dev, "bad offset\n");
goto err_umem;
if (qp->umem) {
mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
&ncont, NULL);
err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
if (err) {
mlx5_ib_warn(dev, "bad offset\n");
goto err_umem;
}
mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
}
mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
*in = mlx5_vzalloc(*inlen);
@ -549,7 +594,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = -ENOMEM;
goto err_umem;
}
mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
if (qp->umem)
mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn =
cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6);
@ -580,7 +626,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
mlx5_vfree(*in);
err_umem:
ib_umem_release(qp->umem);
if (qp->umem)
ib_umem_release(qp->umem);
err_uuar:
free_uuar(&context->uuari, uuarn);
@ -593,7 +640,8 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &qp->db);
ib_umem_release(qp->umem);
if (qp->umem)
ib_umem_release(qp->umem);
free_uuar(&context->uuari, qp->uuarn);
}
@ -1616,7 +1664,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
IB_LINK_LAYER_UNSPECIFIED))
goto out;
if ((attr_mask & IB_QP_PORT) &&
@ -2212,6 +2261,10 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
/* Make sure doorbell record is visible to the HCA before
* we hit doorbell */
wmb();
if (bf->need_lock)
spin_lock(&bf->lock);

View File

@ -93,6 +93,9 @@ struct mlx5_ib_create_cq_resp {
struct mlx5_ib_resize_cq {
__u64 buf_addr;
__u16 cqe_size;
__u16 reserved0;
__u32 reserved1;
};
struct mlx5_ib_create_srq {

View File

@ -860,7 +860,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
IB_LINK_LAYER_UNSPECIFIED)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,

View File

@ -1354,8 +1354,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
neigh->ha, ntohl(rt->rt_gateway));
if (arpindex >= 0) {
if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
neigh->ha, ETH_ALEN)) {
if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) {
/* Mac address same as in nes_arp_table */
goto out;
}

View File

@ -1,6 +1,6 @@
config INFINIBAND_OCRDMA
tristate "Emulex One Connect HCA support"
depends on ETHERNET && NETDEVICES && PCI && (IPV6 || IPV6=n)
depends on ETHERNET && NETDEVICES && PCI && INET && (IPV6 || IPV6=n)
select NET_VENDOR_EMULEX
select BE2NET
---help---

View File

@ -423,5 +423,17 @@ static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
OCRDMA_CQE_WRITE_IMM) ? 1 : 0;
}
static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
struct ib_ah_attr *ah_attr, u8 *mac_addr)
{
struct in6_addr in6;
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6))
rdma_get_mcast_mac(&in6, mac_addr);
else
memcpy(mac_addr, ah_attr->dmac, ETH_ALEN);
return 0;
}
#endif

View File

@ -49,7 +49,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
ah->sgid_index = attr->grh.sgid_index;
vlan_tag = rdma_get_vlan_id(&attr->grh.dgid);
vlan_tag = attr->vlan_id;
if (!vlan_tag || (vlan_tag > 0xFFF))
vlan_tag = dev->pvid;
if (vlan_tag && (vlan_tag < 0x1000)) {
@ -64,7 +64,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
eth_sz = sizeof(struct ocrdma_eth_basic);
}
memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
status = ocrdma_resolve_dgid(dev, &attr->grh.dgid, &eth.dmac[0]);
memcpy(&eth.dmac[0], attr->dmac, ETH_ALEN);
status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]);
if (status)
return status;
status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index,
@ -84,6 +85,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
if (vlan_enabled)
ah->av->valid |= OCRDMA_AV_VLAN_VALID;
ah->av->valid = cpu_to_le32(ah->av->valid);
return status;
}

View File

@ -2076,23 +2076,6 @@ int ocrdma_mbx_query_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
return status;
}
int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid,
u8 *mac_addr)
{
struct in6_addr in6;
memcpy(&in6, dgid, sizeof in6);
if (rdma_is_multicast_addr(&in6)) {
rdma_get_mcast_mac(&in6, mac_addr);
} else if (rdma_link_local_addr(&in6)) {
rdma_get_ll_mac(&in6, mac_addr);
} else {
pr_err("%s() fail to resolve mac_addr.\n", __func__);
return -EINVAL;
}
return 0;
}
static int ocrdma_set_av_params(struct ocrdma_qp *qp,
struct ocrdma_modify_qp *cmd,
struct ib_qp_attr *attrs)
@ -2126,14 +2109,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
qp->sgid_idx = ah_attr->grh.sgid_index;
memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]);
ocrdma_resolve_dmac(qp->dev, ah_attr, &mac_addr[0]);
cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
(mac_addr[2] << 16) | (mac_addr[3] << 24);
/* convert them to LE format. */
ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
vlan_id = rdma_get_vlan_id(&sgid);
vlan_id = ah_attr->vlan_id;
if (vlan_id && (vlan_id < 0x1000)) {
cmd->params.vlan_dmac_b4_to_b5 |=
vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;

View File

@ -94,7 +94,6 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
int ocrdma_query_config(struct ocrdma_dev *,
struct ocrdma_mbx_query_config *config);
int ocrdma_resolve_dgid(struct ocrdma_dev *, union ib_gid *dgid, u8 *mac_addr);
int ocrdma_mbx_alloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);

View File

@ -67,46 +67,24 @@ void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
guid[7] = mac_addr[5];
}
static void ocrdma_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr,
bool is_vlan, u16 vlan_id)
{
sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
sgid->raw[8] = mac_addr[0] ^ 2;
sgid->raw[9] = mac_addr[1];
sgid->raw[10] = mac_addr[2];
if (is_vlan) {
sgid->raw[11] = vlan_id >> 8;
sgid->raw[12] = vlan_id & 0xff;
} else {
sgid->raw[11] = 0xff;
sgid->raw[12] = 0xfe;
}
sgid->raw[13] = mac_addr[3];
sgid->raw[14] = mac_addr[4];
sgid->raw[15] = mac_addr[5];
}
static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
bool is_vlan, u16 vlan_id)
static bool ocrdma_add_sgid(struct ocrdma_dev *dev, union ib_gid *new_sgid)
{
int i;
union ib_gid new_sgid;
unsigned long flags;
memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid));
ocrdma_build_sgid_mac(&new_sgid, mac_addr, is_vlan, vlan_id);
spin_lock_irqsave(&dev->sgid_lock, flags);
for (i = 0; i < OCRDMA_MAX_SGID; i++) {
if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid,
sizeof(union ib_gid))) {
/* found free entry */
memcpy(&dev->sgid_tbl[i], &new_sgid,
memcpy(&dev->sgid_tbl[i], new_sgid,
sizeof(union ib_gid));
spin_unlock_irqrestore(&dev->sgid_lock, flags);
return true;
} else if (!memcmp(&dev->sgid_tbl[i], &new_sgid,
} else if (!memcmp(&dev->sgid_tbl[i], new_sgid,
sizeof(union ib_gid))) {
/* entry already present, no addition is required. */
spin_unlock_irqrestore(&dev->sgid_lock, flags);
@ -117,20 +95,17 @@ static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
return false;
}
static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
bool is_vlan, u16 vlan_id)
static bool ocrdma_del_sgid(struct ocrdma_dev *dev, union ib_gid *sgid)
{
int found = false;
int i;
union ib_gid sgid;
unsigned long flags;
ocrdma_build_sgid_mac(&sgid, mac_addr, is_vlan, vlan_id);
spin_lock_irqsave(&dev->sgid_lock, flags);
/* first is default sgid, which cannot be deleted. */
for (i = 1; i < OCRDMA_MAX_SGID; i++) {
if (!memcmp(&dev->sgid_tbl[i], &sgid, sizeof(union ib_gid))) {
if (!memcmp(&dev->sgid_tbl[i], sgid, sizeof(union ib_gid))) {
/* found matching entry */
memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid));
found = true;
@ -141,75 +116,18 @@ static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
return found;
}
static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
static int ocrdma_addr_event(unsigned long event, struct net_device *netdev,
union ib_gid *gid)
{
/* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
union ib_gid *sgid = &dev->sgid_tbl[0];
sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
ocrdma_get_guid(dev, &sgid->raw[8]);
}
#if IS_ENABLED(CONFIG_VLAN_8021Q)
static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
{
struct net_device *netdev, *tmp;
u16 vlan_id;
bool is_vlan;
netdev = dev->nic_info.netdev;
rcu_read_lock();
for_each_netdev_rcu(&init_net, tmp) {
if (netdev == tmp || vlan_dev_real_dev(tmp) == netdev) {
if (!netif_running(tmp) || !netif_oper_up(tmp))
continue;
if (netdev != tmp) {
vlan_id = vlan_dev_vlan_id(tmp);
is_vlan = true;
} else {
is_vlan = false;
vlan_id = 0;
tmp = netdev;
}
ocrdma_add_sgid(dev, tmp->dev_addr, is_vlan, vlan_id);
}
}
rcu_read_unlock();
}
#else
static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
{
}
#endif /* VLAN */
static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev)
{
ocrdma_add_default_sgid(dev);
ocrdma_add_vlan_sgids(dev);
return 0;
}
#if IS_ENABLED(CONFIG_IPV6)
static int ocrdma_inet6addr_event(struct notifier_block *notifier,
unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
struct net_device *netdev = ifa->idev->dev;
struct ib_event gid_event;
struct ocrdma_dev *dev;
bool found = false;
bool updated = false;
bool is_vlan = false;
u16 vid = 0;
is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN;
if (is_vlan) {
vid = vlan_dev_vlan_id(netdev);
if (is_vlan)
netdev = vlan_dev_real_dev(netdev);
}
rcu_read_lock();
list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {
@ -222,16 +140,14 @@ static int ocrdma_inet6addr_event(struct notifier_block *notifier,
if (!found)
return NOTIFY_DONE;
if (!rdma_link_local_addr((struct in6_addr *)&ifa->addr))
return NOTIFY_DONE;
mutex_lock(&dev->dev_lock);
switch (event) {
case NETDEV_UP:
updated = ocrdma_add_sgid(dev, netdev->dev_addr, is_vlan, vid);
updated = ocrdma_add_sgid(dev, gid);
break;
case NETDEV_DOWN:
updated = ocrdma_del_sgid(dev, netdev->dev_addr, is_vlan, vid);
updated = ocrdma_del_sgid(dev, gid);
break;
default:
break;
@ -247,6 +163,32 @@ static int ocrdma_inet6addr_event(struct notifier_block *notifier,
return NOTIFY_OK;
}
static int ocrdma_inetaddr_event(struct notifier_block *notifier,
unsigned long event, void *ptr)
{
struct in_ifaddr *ifa = ptr;
union ib_gid gid;
struct net_device *netdev = ifa->ifa_dev->dev;
ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
return ocrdma_addr_event(event, netdev, &gid);
}
static struct notifier_block ocrdma_inetaddr_notifier = {
.notifier_call = ocrdma_inetaddr_event
};
#if IS_ENABLED(CONFIG_IPV6)
static int ocrdma_inet6addr_event(struct notifier_block *notifier,
unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
union ib_gid *gid = (union ib_gid *)&ifa->addr;
struct net_device *netdev = ifa->idev->dev;
return ocrdma_addr_event(event, netdev, gid);
}
static struct notifier_block ocrdma_inet6addr_notifier = {
.notifier_call = ocrdma_inet6addr_event
};
@ -423,10 +365,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (status)
goto alloc_err;
status = ocrdma_build_sgid_tbl(dev);
if (status)
goto alloc_err;
status = ocrdma_register_device(dev);
if (status)
goto alloc_err;
@ -553,6 +491,10 @@ static int __init ocrdma_init_module(void)
{
int status;
status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier);
if (status)
return status;
#if IS_ENABLED(CONFIG_IPV6)
status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
if (status)

View File

@ -31,7 +31,7 @@
#define Bit(_b) (1 << (_b))
#define OCRDMA_GEN1_FAMILY 0xB
#define OCRDMA_GEN2_FAMILY 0x2
#define OCRDMA_GEN2_FAMILY 0x0F
#define OCRDMA_SUBSYS_ROCE 10
enum {
@ -1694,7 +1694,7 @@ struct ocrdma_grh {
u16 rsvd;
} __packed;
#define OCRDMA_AV_VALID Bit(0)
#define OCRDMA_AV_VALID Bit(7)
#define OCRDMA_AV_VLAN_VALID Bit(1)
struct ocrdma_av {

View File

@ -1326,7 +1326,8 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_qps = old_qps;
spin_unlock_irqrestore(&qp->q_lock, flags);
if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
IB_LINK_LAYER_ETHERNET)) {
pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
"qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
__func__, dev->id, attr_mask, qp->id, ibqp->qp_type,

View File

@ -585,7 +585,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask))
attr_mask, IB_LINK_LAYER_UNSPECIFIED))
goto inval;
if (attr_mask & IB_QP_AV) {

View File

@ -57,13 +57,20 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
struct qib_sge *sge;
struct ib_wc wc;
u32 length;
enum ib_qp_type sqptype, dqptype;
qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
if (!qp) {
ibp->n_pkt_drops++;
return;
}
if (qp->ibqp.qp_type != sqp->ibqp.qp_type ||
sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
IB_QPT_UD : sqp->ibqp.qp_type;
dqptype = qp->ibqp.qp_type == IB_QPT_GSI ?
IB_QPT_UD : qp->ibqp.qp_type;
if (dqptype != sqptype ||
!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
ibp->n_pkt_drops++;
goto drop;

View File

@ -0,0 +1,10 @@
config INFINIBAND_USNIC
tristate "Verbs support for Cisco VIC"
depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU
select ENIC
select NET_VENDOR_CISCO
select PCI_IOV
select INFINIBAND_USER_ACCESS
---help---
This is a low-level driver for Cisco's Virtual Interface
Cards (VICs), including the VIC 1240 and 1280 cards.

View File

@ -0,0 +1,15 @@
ccflags-y := -Idrivers/net/ethernet/cisco/enic
obj-$(CONFIG_INFINIBAND_USNIC)+= usnic_verbs.o
usnic_verbs-y=\
usnic_fwd.o \
usnic_transport.o \
usnic_uiom.o \
usnic_uiom_interval_tree.o \
usnic_vnic.o \
usnic_ib_main.o \
usnic_ib_qp_grp.o \
usnic_ib_sysfs.o \
usnic_ib_verbs.o \
usnic_debugfs.o \

View File

@ -0,0 +1,29 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_H_
#define USNIC_H_
#define DRV_NAME "usnic_verbs"
#define PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC 0x00cf /* User space NIC */
#define DRV_VERSION "1.0.3"
#define DRV_RELDATE "December 19, 2013"
#endif /* USNIC_H_ */

View File

@ -0,0 +1,73 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_ABI_H
#define USNIC_ABI_H
/* ABI between userspace and kernel */
#define USNIC_UVERBS_ABI_VERSION 4
#define USNIC_QP_GRP_MAX_WQS 8
#define USNIC_QP_GRP_MAX_RQS 8
#define USNIC_QP_GRP_MAX_CQS 16
enum usnic_transport_type {
USNIC_TRANSPORT_UNKNOWN = 0,
USNIC_TRANSPORT_ROCE_CUSTOM = 1,
USNIC_TRANSPORT_IPV4_UDP = 2,
USNIC_TRANSPORT_MAX = 3,
};
struct usnic_transport_spec {
enum usnic_transport_type trans_type;
union {
struct {
uint16_t port_num;
} usnic_roce;
struct {
uint32_t sock_fd;
} udp;
};
};
struct usnic_ib_create_qp_cmd {
struct usnic_transport_spec spec;
};
/*TODO: Future - usnic_modify_qp needs to pass in generic filters */
struct usnic_ib_create_qp_resp {
u32 vfid;
u32 qp_grp_id;
u64 bar_bus_addr;
u32 bar_len;
/*
* WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface
* expands the scope of ABI to many files.
*/
u32 wq_cnt;
u32 rq_cnt;
u32 cq_cnt;
u32 wq_idx[USNIC_QP_GRP_MAX_WQS];
u32 rq_idx[USNIC_QP_GRP_MAX_RQS];
u32 cq_idx[USNIC_QP_GRP_MAX_CQS];
u32 transport;
u32 reserved[9];
};
#endif /* USNIC_ABI_H */

View File

@ -0,0 +1,27 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_CMN_PKT_HDR_H
#define USNIC_CMN_PKT_HDR_H
#define USNIC_ROCE_ETHERTYPE (0x8915)
#define USNIC_ROCE_GRH_VER (8)
#define USNIC_PROTO_VER (1)
#define USNIC_ROCE_GRH_VER_SHIFT (4)
#endif /* USNIC_COMMON_PKT_HDR_H */

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_CMN_UTIL_H
#define USNIC_CMN_UTIL_H
static inline void
usnic_mac_to_gid(const char *const mac, char *raw_gid)
{
raw_gid[0] = 0xfe;
raw_gid[1] = 0x80;
memset(&raw_gid[2], 0, 6);
raw_gid[8] = mac[0]^2;
raw_gid[9] = mac[1];
raw_gid[10] = mac[2];
raw_gid[11] = 0xff;
raw_gid[12] = 0xfe;
raw_gid[13] = mac[3];
raw_gid[14] = mac[4];
raw_gid[15] = mac[5];
}
static inline void
usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
{
raw_gid[0] = 0xfe;
raw_gid[1] = 0x80;
memset(&raw_gid[2], 0, 2);
memcpy(&raw_gid[4], &inaddr, 4);
raw_gid[8] = mac[0]^2;
raw_gid[9] = mac[1];
raw_gid[10] = mac[2];
raw_gid[11] = 0xff;
raw_gid[12] = 0xfe;
raw_gid[13] = mac[3];
raw_gid[14] = mac[4];
raw_gid[15] = mac[5];
}
static inline void
usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid)
{
raw_gid[8] = mac[0]^2;
raw_gid[9] = mac[1];
raw_gid[10] = mac[2];
raw_gid[11] = 0xff;
raw_gid[12] = 0xfe;
raw_gid[13] = mac[3];
raw_gid[14] = mac[4];
raw_gid[15] = mac[5];
}
#endif /* USNIC_COMMON_UTIL_H */

View File

@ -0,0 +1,154 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/debugfs.h>
#include <linux/module.h>
#include "usnic.h"
#include "usnic_log.h"
#include "usnic_debugfs.h"
#include "usnic_ib_qp_grp.h"
#include "usnic_transport.h"
static struct dentry *debugfs_root;
static struct dentry *flows_dentry;
static ssize_t usnic_debugfs_buildinfo_read(struct file *f, char __user *data,
size_t count, loff_t *ppos)
{
char buf[500];
int res;
if (*ppos > 0)
return 0;
res = scnprintf(buf, sizeof(buf),
"version: %s\n"
"build date: %s\n",
DRV_VERSION, DRV_RELDATE);
return simple_read_from_buffer(data, count, ppos, buf, res);
}
static const struct file_operations usnic_debugfs_buildinfo_ops = {
.owner = THIS_MODULE,
.open = simple_open,
.read = usnic_debugfs_buildinfo_read
};
static ssize_t flowinfo_read(struct file *f, char __user *data,
size_t count, loff_t *ppos)
{
struct usnic_ib_qp_grp_flow *qp_flow;
int n;
int left;
char *ptr;
char buf[512];
qp_flow = f->private_data;
ptr = buf;
left = count;
if (*ppos > 0)
return 0;
spin_lock(&qp_flow->qp_grp->lock);
n = scnprintf(ptr, left,
"QP Grp ID: %d Transport: %s ",
qp_flow->qp_grp->grp_id,
usnic_transport_to_str(qp_flow->trans_type));
UPDATE_PTR_LEFT(n, ptr, left);
if (qp_flow->trans_type == USNIC_TRANSPORT_ROCE_CUSTOM) {
n = scnprintf(ptr, left, "Port_Num:%hu\n",
qp_flow->usnic_roce.port_num);
UPDATE_PTR_LEFT(n, ptr, left);
} else if (qp_flow->trans_type == USNIC_TRANSPORT_IPV4_UDP) {
n = usnic_transport_sock_to_str(ptr, left,
qp_flow->udp.sock);
UPDATE_PTR_LEFT(n, ptr, left);
n = scnprintf(ptr, left, "\n");
UPDATE_PTR_LEFT(n, ptr, left);
}
spin_unlock(&qp_flow->qp_grp->lock);
return simple_read_from_buffer(data, count, ppos, buf, ptr - buf);
}
static const struct file_operations flowinfo_ops = {
.owner = THIS_MODULE,
.open = simple_open,
.read = flowinfo_read,
};
void usnic_debugfs_init(void)
{
debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
if (IS_ERR(debugfs_root)) {
usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n");
goto out_clear_root;
}
flows_dentry = debugfs_create_dir("flows", debugfs_root);
if (IS_ERR_OR_NULL(flows_dentry)) {
usnic_err("Failed to create debugfs flow dir with err %ld\n",
PTR_ERR(flows_dentry));
goto out_free_root;
}
debugfs_create_file("build-info", S_IRUGO, debugfs_root,
NULL, &usnic_debugfs_buildinfo_ops);
return;
out_free_root:
debugfs_remove_recursive(debugfs_root);
out_clear_root:
debugfs_root = NULL;
}
void usnic_debugfs_exit(void)
{
if (!debugfs_root)
return;
debugfs_remove_recursive(debugfs_root);
debugfs_root = NULL;
}
void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
{
if (IS_ERR_OR_NULL(flows_dentry))
return;
scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name),
"%u", qp_flow->flow->flow_id);
qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name,
S_IRUGO,
flows_dentry,
qp_flow,
&flowinfo_ops);
if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
usnic_err("Failed to create dbg fs entry for flow %u\n",
qp_flow->flow->flow_id);
}
}
void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow)
{
if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry))
debugfs_remove(qp_flow->dbgfs_dentry);
}

View File

@ -0,0 +1,29 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_DEBUGFS_H_
#define USNIC_DEBUGFS_H_
#include "usnic_ib_qp_grp.h"
void usnic_debugfs_init(void);
void usnic_debugfs_exit(void);
void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow);
void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow);
#endif /*!USNIC_DEBUGFS_H_ */

View File

@ -0,0 +1,350 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/netdevice.h>
#include <linux/pci.h>
#include "enic_api.h"
#include "usnic_common_pkt_hdr.h"
#include "usnic_fwd.h"
#include "usnic_log.h"
static int usnic_fwd_devcmd_locked(struct usnic_fwd_dev *ufdev, int vnic_idx,
enum vnic_devcmd_cmd cmd, u64 *a0,
u64 *a1)
{
int status;
struct net_device *netdev = ufdev->netdev;
lockdep_assert_held(&ufdev->lock);
status = enic_api_devcmd_proxy_by_index(netdev,
vnic_idx,
cmd,
a0, a1,
1000);
if (status) {
if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) {
usnic_dbg("Dev %s vnic idx %u cmd %u already deleted",
ufdev->name, vnic_idx, cmd);
} else {
usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n",
ufdev->name, vnic_idx, cmd,
status);
}
} else {
usnic_dbg("Dev %s vnic idx %u cmd %u success",
ufdev->name, vnic_idx, cmd);
}
return status;
}
static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx,
enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1)
{
int status;
spin_lock(&ufdev->lock);
status = usnic_fwd_devcmd_locked(ufdev, vnic_idx, cmd, a0, a1);
spin_unlock(&ufdev->lock);
return status;
}
struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev)
{
struct usnic_fwd_dev *ufdev;
ufdev = kzalloc(sizeof(*ufdev), GFP_KERNEL);
if (!ufdev)
return NULL;
ufdev->pdev = pdev;
ufdev->netdev = pci_get_drvdata(pdev);
spin_lock_init(&ufdev->lock);
strncpy(ufdev->name, netdev_name(ufdev->netdev),
sizeof(ufdev->name) - 1);
return ufdev;
}
void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev)
{
kfree(ufdev);
}
void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN])
{
spin_lock(&ufdev->lock);
memcpy(&ufdev->mac, mac, sizeof(ufdev->mac));
spin_unlock(&ufdev->lock);
}
int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr)
{
int status;
spin_lock(&ufdev->lock);
if (ufdev->inaddr == 0) {
ufdev->inaddr = inaddr;
status = 0;
} else {
status = -EFAULT;
}
spin_unlock(&ufdev->lock);
return status;
}
void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev)
{
spin_lock(&ufdev->lock);
ufdev->inaddr = 0;
spin_unlock(&ufdev->lock);
}
void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev)
{
spin_lock(&ufdev->lock);
ufdev->link_up = 1;
spin_unlock(&ufdev->lock);
}
void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev)
{
spin_lock(&ufdev->lock);
ufdev->link_up = 0;
spin_unlock(&ufdev->lock);
}
void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu)
{
spin_lock(&ufdev->lock);
ufdev->mtu = mtu;
spin_unlock(&ufdev->lock);
}
static int usnic_fwd_dev_ready_locked(struct usnic_fwd_dev *ufdev)
{
lockdep_assert_held(&ufdev->lock);
if (!ufdev->link_up)
return -EPERM;
return 0;
}
static int validate_filter_locked(struct usnic_fwd_dev *ufdev,
struct filter *filter)
{
lockdep_assert_held(&ufdev->lock);
if (filter->type == FILTER_IPV4_5TUPLE) {
if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_AD))
return -EACCES;
if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_PT))
return -EBUSY;
else if (ufdev->inaddr == 0)
return -EINVAL;
else if (filter->u.ipv4.dst_port == 0)
return -ERANGE;
else if (ntohl(ufdev->inaddr) != filter->u.ipv4.dst_addr)
return -EFAULT;
else
return 0;
}
return 0;
}
static void fill_tlv(struct filter_tlv *tlv, struct filter *filter,
struct filter_action *action)
{
tlv->type = CLSF_TLV_FILTER;
tlv->length = sizeof(struct filter);
*((struct filter *)&tlv->val) = *filter;
tlv = (struct filter_tlv *)((char *)tlv + sizeof(struct filter_tlv) +
sizeof(struct filter));
tlv->type = CLSF_TLV_ACTION;
tlv->length = sizeof(struct filter_action);
*((struct filter_action *)&tlv->val) = *action;
}
struct usnic_fwd_flow*
usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
struct usnic_filter_action *uaction)
{
struct filter_tlv *tlv;
struct pci_dev *pdev;
struct usnic_fwd_flow *flow;
uint64_t a0, a1;
uint64_t tlv_size;
dma_addr_t tlv_pa;
int status;
pdev = ufdev->pdev;
tlv_size = (2*sizeof(struct filter_tlv) + sizeof(struct filter) +
sizeof(struct filter_action));
flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
if (!flow)
return ERR_PTR(-ENOMEM);
tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa);
if (!tlv) {
usnic_err("Failed to allocate memory\n");
status = -ENOMEM;
goto out_free_flow;
}
fill_tlv(tlv, filter, &uaction->action);
spin_lock(&ufdev->lock);
status = usnic_fwd_dev_ready_locked(ufdev);
if (status) {
usnic_err("Forwarding dev %s not ready with status %d\n",
ufdev->name, status);
goto out_free_tlv;
}
status = validate_filter_locked(ufdev, filter);
if (status) {
usnic_err("Failed to validate filter with status %d\n",
status);
goto out_free_tlv;
}
/* Issue Devcmd */
a0 = tlv_pa;
a1 = tlv_size;
status = usnic_fwd_devcmd_locked(ufdev, uaction->vnic_idx,
CMD_ADD_FILTER, &a0, &a1);
if (status) {
usnic_err("VF %s Filter add failed with status:%d",
ufdev->name, status);
status = -EFAULT;
goto out_free_tlv;
} else {
usnic_dbg("VF %s FILTER ID:%llu", ufdev->name, a0);
}
flow->flow_id = (uint32_t) a0;
flow->vnic_idx = uaction->vnic_idx;
flow->ufdev = ufdev;
out_free_tlv:
spin_unlock(&ufdev->lock);
pci_free_consistent(pdev, tlv_size, tlv, tlv_pa);
if (!status)
return flow;
out_free_flow:
kfree(flow);
return ERR_PTR(status);
}
int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow)
{
int status;
u64 a0, a1;
a0 = flow->flow_id;
status = usnic_fwd_devcmd(flow->ufdev, flow->vnic_idx,
CMD_DEL_FILTER, &a0, &a1);
if (status) {
if (status == ERR_EINVAL) {
usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d",
flow->flow_id, flow->vnic_idx,
flow->ufdev->name, status);
} else {
usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d",
flow->ufdev->name, flow->vnic_idx,
flow->flow_id, status);
}
status = 0;
/*
* Log the error and fake success to the caller because if
* a flow fails to be deleted in the firmware, it is an
* unrecoverable error.
*/
} else {
usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED",
flow->ufdev->name, flow->vnic_idx,
flow->flow_id);
}
kfree(flow);
return status;
}
int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
{
int status;
struct net_device *pf_netdev;
u64 a0, a1;
pf_netdev = ufdev->netdev;
a0 = qp_idx;
a1 = CMD_QP_RQWQ;
status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE,
&a0, &a1);
if (status) {
usnic_err("PF %s VNIC Index %u RQ Index: %u ENABLE Failed with status %d",
netdev_name(pf_netdev),
vnic_idx,
qp_idx,
status);
} else {
usnic_dbg("PF %s VNIC Index %u RQ Index: %u ENABLED",
netdev_name(pf_netdev),
vnic_idx, qp_idx);
}
return status;
}
int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
{
int status;
u64 a0, a1;
struct net_device *pf_netdev;
pf_netdev = ufdev->netdev;
a0 = qp_idx;
a1 = CMD_QP_RQWQ;
status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE,
&a0, &a1);
if (status) {
usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d",
netdev_name(pf_netdev),
vnic_idx,
qp_idx,
status);
} else {
usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED",
netdev_name(pf_netdev),
vnic_idx,
qp_idx);
}
return status;
}

View File

@ -0,0 +1,113 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_FWD_H_
#define USNIC_FWD_H_
#include <linux/if.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/in.h>
#include "usnic_abi.h"
#include "usnic_common_pkt_hdr.h"
#include "vnic_devcmd.h"
struct usnic_fwd_dev {
struct pci_dev *pdev;
struct net_device *netdev;
spinlock_t lock;
/*
* The following fields can be read directly off the device.
* However, they should be set by a accessor function, except name,
* which cannot be changed.
*/
bool link_up;
char mac[ETH_ALEN];
unsigned int mtu;
__be32 inaddr;
char name[IFNAMSIZ+1];
};
struct usnic_fwd_flow {
uint32_t flow_id;
struct usnic_fwd_dev *ufdev;
unsigned int vnic_idx;
};
struct usnic_filter_action {
int vnic_idx;
struct filter_action action;
};
struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev);
void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev);
void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]);
int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr);
void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev);
void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev);
void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev);
void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu);
/*
* Allocate a flow on this forwarding device. Whoever calls this function,
* must monitor netdev events on ufdev's netdevice. If NETDEV_REBOOT or
* NETDEV_DOWN is seen, flow will no longer function and must be
* immediately freed by calling usnic_dealloc_flow.
*/
struct usnic_fwd_flow*
usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
struct usnic_filter_action *action);
int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow);
int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
static inline void usnic_fwd_init_usnic_filter(struct filter *filter,
uint32_t usnic_id)
{
filter->type = FILTER_USNIC_ID;
filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE;
filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE |
FILTER_FIELD_USNIC_ID |
FILTER_FIELD_USNIC_PROTO;
filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER <<
USNIC_ROCE_GRH_VER_SHIFT) |
USNIC_PROTO_VER;
filter->u.usnic.usnic_id = usnic_id;
}
static inline void usnic_fwd_init_udp_filter(struct filter *filter,
uint32_t daddr, uint16_t dport)
{
filter->type = FILTER_IPV4_5TUPLE;
filter->u.ipv4.flags = FILTER_FIELD_5TUP_PROTO;
filter->u.ipv4.protocol = PROTO_UDP;
if (daddr) {
filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_AD;
filter->u.ipv4.dst_addr = daddr;
}
if (dport) {
filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_PT;
filter->u.ipv4.dst_port = dport;
}
}
#endif /* !USNIC_FWD_H_ */

View File

@ -0,0 +1,118 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_IB_H_
#define USNIC_IB_H_
#include <linux/iommu.h>
#include <linux/netdevice.h>
#include <rdma/ib_verbs.h>
#include "usnic.h"
#include "usnic_abi.h"
#include "usnic_vnic.h"
#define USNIC_IB_PORT_CNT 1
#define USNIC_IB_NUM_COMP_VECTORS 1
extern unsigned int usnic_ib_share_vf;
struct usnic_ib_ucontext {
struct ib_ucontext ibucontext;
/* Protected by usnic_ib_dev->usdev_lock */
struct list_head qp_grp_list;
struct list_head link;
};
struct usnic_ib_pd {
struct ib_pd ibpd;
struct usnic_uiom_pd *umem_pd;
};
struct usnic_ib_mr {
struct ib_mr ibmr;
struct usnic_uiom_reg *umem;
};
struct usnic_ib_dev {
struct ib_device ib_dev;
struct pci_dev *pdev;
struct net_device *netdev;
struct usnic_fwd_dev *ufdev;
struct list_head ib_dev_link;
struct list_head vf_dev_list;
struct list_head ctx_list;
struct mutex usdev_lock;
/* provisioning information */
struct kref vf_cnt;
unsigned int vf_res_cnt[USNIC_VNIC_RES_TYPE_MAX];
/* sysfs vars for QPN reporting */
struct kobject *qpn_kobj;
};
struct usnic_ib_vf {
struct usnic_ib_dev *pf;
spinlock_t lock;
struct usnic_vnic *vnic;
unsigned int qp_grp_ref_cnt;
struct usnic_ib_pd *pd;
struct list_head link;
};
static inline
struct usnic_ib_dev *to_usdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct usnic_ib_dev, ib_dev);
}
static inline
struct usnic_ib_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
{
return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
}
static inline
struct usnic_ib_pd *to_upd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct usnic_ib_pd, ibpd);
}
static inline
struct usnic_ib_ucontext *to_uucontext(struct ib_ucontext *ibucontext)
{
return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
}
static inline
struct usnic_ib_mr *to_umr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct usnic_ib_mr, ibmr);
}
void usnic_ib_log_vf(struct usnic_ib_vf *vf);
#define UPDATE_PTR_LEFT(N, P, L) \
do { \
L -= (N); \
P += (N); \
} while (0)
#endif /* USNIC_IB_H_ */

View File

@ -0,0 +1,682 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Author: Upinder Malhi <umalhi@cisco.com>
* Author: Anant Deepak <anadeepa@cisco.com>
* Author: Cesare Cantu' <cantuc@cisco.com>
* Author: Jeff Squyres <jsquyres@cisco.com>
* Author: Kiran Thirumalai <kithirum@cisco.com>
* Author: Xuyang Wang <xuywang@cisco.com>
* Author: Reese Faucette <rfaucett@cisco.com>
*
*/
#include <linux/module.h>
#include <linux/inetdevice.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
#include "usnic_abi.h"
#include "usnic_common_util.h"
#include "usnic_ib.h"
#include "usnic_ib_qp_grp.h"
#include "usnic_log.h"
#include "usnic_fwd.h"
#include "usnic_debugfs.h"
#include "usnic_ib_verbs.h"
#include "usnic_transport.h"
#include "usnic_uiom.h"
#include "usnic_ib_sysfs.h"
unsigned int usnic_log_lvl = USNIC_LOG_LVL_ERR;
unsigned int usnic_ib_share_vf = 1;
static const char usnic_version[] =
DRV_NAME ": Cisco VIC (USNIC) Verbs Driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
static DEFINE_MUTEX(usnic_ib_ibdev_list_lock);
static LIST_HEAD(usnic_ib_ibdev_list);
/* Callback dump funcs */
static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz)
{
struct usnic_ib_vf *vf = obj;
return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name);
}
/* End callback dump funcs */
static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz)
{
usnic_vnic_dump(vf->vnic, buf, buf_sz, vf,
usnic_ib_dump_vf_hdr,
usnic_ib_qp_grp_dump_hdr, usnic_ib_qp_grp_dump_rows);
}
void usnic_ib_log_vf(struct usnic_ib_vf *vf)
{
char buf[1000];
usnic_ib_dump_vf(vf, buf, sizeof(buf));
usnic_dbg("%s\n", buf);
}
/* Start of netdev section */
static inline const char *usnic_ib_netdev_event_to_string(unsigned long event)
{
const char *event2str[] = {"NETDEV_NONE", "NETDEV_UP", "NETDEV_DOWN",
"NETDEV_REBOOT", "NETDEV_CHANGE",
"NETDEV_REGISTER", "NETDEV_UNREGISTER", "NETDEV_CHANGEMTU",
"NETDEV_CHANGEADDR", "NETDEV_GOING_DOWN", "NETDEV_FEAT_CHANGE",
"NETDEV_BONDING_FAILOVER", "NETDEV_PRE_UP",
"NETDEV_PRE_TYPE_CHANGE", "NETDEV_POST_TYPE_CHANGE",
"NETDEV_POST_INT", "NETDEV_UNREGISTER_FINAL", "NETDEV_RELEASE",
"NETDEV_NOTIFY_PEERS", "NETDEV_JOIN"
};
if (event >= ARRAY_SIZE(event2str))
return "UNKNOWN_NETDEV_EVENT";
else
return event2str[event];
}
static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev)
{
struct usnic_ib_ucontext *ctx;
struct usnic_ib_qp_grp *qp_grp;
enum ib_qp_state cur_state;
int status;
BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
list_for_each_entry(ctx, &us_ibdev->ctx_list, link) {
list_for_each_entry(qp_grp, &ctx->qp_grp_list, link) {
cur_state = qp_grp->state;
if (cur_state == IB_QPS_INIT ||
cur_state == IB_QPS_RTR ||
cur_state == IB_QPS_RTS) {
status = usnic_ib_qp_grp_modify(qp_grp,
IB_QPS_ERR,
NULL);
if (status) {
usnic_err("Failed to transistion qp grp %u from %s to %s\n",
qp_grp->grp_id,
usnic_ib_qp_grp_state_to_string
(cur_state),
usnic_ib_qp_grp_state_to_string
(IB_QPS_ERR));
}
}
}
}
}
static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
unsigned long event)
{
struct net_device *netdev;
struct ib_event ib_event;
memset(&ib_event, 0, sizeof(ib_event));
mutex_lock(&us_ibdev->usdev_lock);
netdev = us_ibdev->netdev;
switch (event) {
case NETDEV_REBOOT:
usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name);
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
ib_event.event = IB_EVENT_PORT_ERR;
ib_event.device = &us_ibdev->ib_dev;
ib_event.element.port_num = 1;
ib_dispatch_event(&ib_event);
break;
case NETDEV_UP:
case NETDEV_DOWN:
case NETDEV_CHANGE:
if (!us_ibdev->ufdev->link_up &&
netif_carrier_ok(netdev)) {
usnic_fwd_carrier_up(us_ibdev->ufdev);
usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name);
ib_event.event = IB_EVENT_PORT_ACTIVE;
ib_event.device = &us_ibdev->ib_dev;
ib_event.element.port_num = 1;
ib_dispatch_event(&ib_event);
} else if (us_ibdev->ufdev->link_up &&
!netif_carrier_ok(netdev)) {
usnic_fwd_carrier_down(us_ibdev->ufdev);
usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name);
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
ib_event.event = IB_EVENT_PORT_ERR;
ib_event.device = &us_ibdev->ib_dev;
ib_event.element.port_num = 1;
ib_dispatch_event(&ib_event);
} else {
usnic_dbg("Ignoring %s on %s\n",
usnic_ib_netdev_event_to_string(event),
us_ibdev->ib_dev.name);
}
break;
case NETDEV_CHANGEADDR:
if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
sizeof(us_ibdev->ufdev->mac))) {
usnic_dbg("Ignoring addr change on %s\n",
us_ibdev->ib_dev.name);
} else {
usnic_info(" %s old mac: %pM new mac: %pM\n",
us_ibdev->ib_dev.name,
us_ibdev->ufdev->mac,
netdev->dev_addr);
usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr);
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
ib_event.event = IB_EVENT_GID_CHANGE;
ib_event.device = &us_ibdev->ib_dev;
ib_event.element.port_num = 1;
ib_dispatch_event(&ib_event);
}
break;
case NETDEV_CHANGEMTU:
if (us_ibdev->ufdev->mtu != netdev->mtu) {
usnic_info("MTU Change on %s old: %u new: %u\n",
us_ibdev->ib_dev.name,
us_ibdev->ufdev->mtu, netdev->mtu);
usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu);
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
} else {
usnic_dbg("Ignoring MTU change on %s\n",
us_ibdev->ib_dev.name);
}
break;
default:
usnic_dbg("Ignoring event %s on %s",
usnic_ib_netdev_event_to_string(event),
us_ibdev->ib_dev.name);
}
mutex_unlock(&us_ibdev->usdev_lock);
}
static int usnic_ib_netdevice_event(struct notifier_block *notifier,
unsigned long event, void *ptr)
{
struct usnic_ib_dev *us_ibdev;
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
mutex_lock(&usnic_ib_ibdev_list_lock);
list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
if (us_ibdev->netdev == netdev) {
usnic_ib_handle_usdev_event(us_ibdev, event);
break;
}
}
mutex_unlock(&usnic_ib_ibdev_list_lock);
return NOTIFY_DONE;
}
static struct notifier_block usnic_ib_netdevice_notifier = {
.notifier_call = usnic_ib_netdevice_event
};
/* End of netdev section */
/* Start of inet section */
static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
unsigned long event, void *ptr)
{
struct in_ifaddr *ifa = ptr;
struct ib_event ib_event;
mutex_lock(&us_ibdev->usdev_lock);
switch (event) {
case NETDEV_DOWN:
usnic_info("%s via ip notifiers",
usnic_ib_netdev_event_to_string(event));
usnic_fwd_del_ipaddr(us_ibdev->ufdev);
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
ib_event.event = IB_EVENT_GID_CHANGE;
ib_event.device = &us_ibdev->ib_dev;
ib_event.element.port_num = 1;
ib_dispatch_event(&ib_event);
break;
case NETDEV_UP:
usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address);
usnic_info("%s via ip notifiers: ip %pI4",
usnic_ib_netdev_event_to_string(event),
&us_ibdev->ufdev->inaddr);
ib_event.event = IB_EVENT_GID_CHANGE;
ib_event.device = &us_ibdev->ib_dev;
ib_event.element.port_num = 1;
ib_dispatch_event(&ib_event);
break;
default:
usnic_info("Ignoring event %s on %s",
usnic_ib_netdev_event_to_string(event),
us_ibdev->ib_dev.name);
}
mutex_unlock(&us_ibdev->usdev_lock);
return NOTIFY_DONE;
}
static int usnic_ib_inetaddr_event(struct notifier_block *notifier,
unsigned long event, void *ptr)
{
struct usnic_ib_dev *us_ibdev;
struct in_ifaddr *ifa = ptr;
struct net_device *netdev = ifa->ifa_dev->dev;
mutex_lock(&usnic_ib_ibdev_list_lock);
list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
if (us_ibdev->netdev == netdev) {
usnic_ib_handle_inet_event(us_ibdev, event, ptr);
break;
}
}
mutex_unlock(&usnic_ib_ibdev_list_lock);
return NOTIFY_DONE;
}
static struct notifier_block usnic_ib_inetaddr_notifier = {
.notifier_call = usnic_ib_inetaddr_event
};
/* End of inet section*/
/* Start of PF discovery section */
static void *usnic_ib_device_add(struct pci_dev *dev)
{
struct usnic_ib_dev *us_ibdev;
union ib_gid gid;
struct in_ifaddr *in;
struct net_device *netdev;
usnic_dbg("\n");
netdev = pci_get_drvdata(dev);
us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
if (IS_ERR_OR_NULL(us_ibdev)) {
usnic_err("Device %s context alloc failed\n",
netdev_name(pci_get_drvdata(dev)));
return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT);
}
us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
if (IS_ERR_OR_NULL(us_ibdev->ufdev)) {
usnic_err("Failed to alloc ufdev for %s with err %ld\n",
pci_name(dev), PTR_ERR(us_ibdev->ufdev));
goto err_dealloc;
}
mutex_init(&us_ibdev->usdev_lock);
INIT_LIST_HEAD(&us_ibdev->vf_dev_list);
INIT_LIST_HEAD(&us_ibdev->ctx_list);
us_ibdev->pdev = dev;
us_ibdev->netdev = pci_get_drvdata(dev);
us_ibdev->ib_dev.owner = THIS_MODULE;
us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
us_ibdev->ib_dev.dma_device = &dev->dev;
us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
us_ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
us_ibdev->ib_dev.query_device = usnic_ib_query_device;
us_ibdev->ib_dev.query_port = usnic_ib_query_port;
us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey;
us_ibdev->ib_dev.query_gid = usnic_ib_query_gid;
us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer;
us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd;
us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd;
us_ibdev->ib_dev.create_qp = usnic_ib_create_qp;
us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp;
us_ibdev->ib_dev.query_qp = usnic_ib_query_qp;
us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp;
us_ibdev->ib_dev.create_cq = usnic_ib_create_cq;
us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq;
us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr;
us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr;
us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext;
us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext;
us_ibdev->ib_dev.mmap = usnic_ib_mmap;
us_ibdev->ib_dev.create_ah = usnic_ib_create_ah;
us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah;
us_ibdev->ib_dev.post_send = usnic_ib_post_send;
us_ibdev->ib_dev.post_recv = usnic_ib_post_recv;
us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq;
us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
if (ib_register_device(&us_ibdev->ib_dev, NULL))
goto err_fwd_dealloc;
usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
usnic_fwd_set_mac(us_ibdev->ufdev, us_ibdev->netdev->dev_addr);
if (netif_carrier_ok(us_ibdev->netdev))
usnic_fwd_carrier_up(us_ibdev->ufdev);
in = ((struct in_device *)(netdev->ip_ptr))->ifa_list;
if (in != NULL)
usnic_fwd_add_ipaddr(us_ibdev->ufdev, in->ifa_address);
usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr,
us_ibdev->ufdev->inaddr, &gid.raw[0]);
memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id,
sizeof(gid.global.interface_id));
kref_init(&us_ibdev->vf_cnt);
usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n",
us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev),
us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up,
us_ibdev->ufdev->mtu);
return us_ibdev;
err_fwd_dealloc:
usnic_fwd_dev_free(us_ibdev->ufdev);
err_dealloc:
usnic_err("failed -- deallocing device\n");
ib_dealloc_device(&us_ibdev->ib_dev);
return NULL;
}
static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev)
{
usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name);
usnic_ib_sysfs_unregister_usdev(us_ibdev);
usnic_fwd_dev_free(us_ibdev->ufdev);
ib_unregister_device(&us_ibdev->ib_dev);
ib_dealloc_device(&us_ibdev->ib_dev);
}
static void usnic_ib_undiscover_pf(struct kref *kref)
{
struct usnic_ib_dev *us_ibdev, *tmp;
struct pci_dev *dev;
bool found = false;
dev = container_of(kref, struct usnic_ib_dev, vf_cnt)->pdev;
mutex_lock(&usnic_ib_ibdev_list_lock);
list_for_each_entry_safe(us_ibdev, tmp,
&usnic_ib_ibdev_list, ib_dev_link) {
if (us_ibdev->pdev == dev) {
list_del(&us_ibdev->ib_dev_link);
usnic_ib_device_remove(us_ibdev);
found = true;
break;
}
}
WARN(!found, "Failed to remove PF %s\n", pci_name(dev));
mutex_unlock(&usnic_ib_ibdev_list_lock);
}
static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic)
{
struct usnic_ib_dev *us_ibdev;
struct pci_dev *parent_pci, *vf_pci;
int err;
vf_pci = usnic_vnic_get_pdev(vnic);
parent_pci = pci_physfn(vf_pci);
BUG_ON(!parent_pci);
mutex_lock(&usnic_ib_ibdev_list_lock);
list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
if (us_ibdev->pdev == parent_pci) {
kref_get(&us_ibdev->vf_cnt);
goto out;
}
}
us_ibdev = usnic_ib_device_add(parent_pci);
if (IS_ERR_OR_NULL(us_ibdev)) {
us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT);
goto out;
}
err = usnic_ib_sysfs_register_usdev(us_ibdev);
if (err) {
usnic_ib_device_remove(us_ibdev);
us_ibdev = ERR_PTR(err);
goto out;
}
list_add(&us_ibdev->ib_dev_link, &usnic_ib_ibdev_list);
out:
mutex_unlock(&usnic_ib_ibdev_list_lock);
return us_ibdev;
}
/* End of PF discovery section */
/* Start of PCI section */
static DEFINE_PCI_DEVICE_TABLE(usnic_ib_pci_ids) = {
{PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)},
{0,}
};
static int usnic_ib_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
int err;
struct usnic_ib_dev *pf;
struct usnic_ib_vf *vf;
enum usnic_vnic_res_type res_type;
vf = kzalloc(sizeof(*vf), GFP_KERNEL);
if (!vf)
return -ENOMEM;
err = pci_enable_device(pdev);
if (err) {
usnic_err("Failed to enable %s with err %d\n",
pci_name(pdev), err);
goto out_clean_vf;
}
err = pci_request_regions(pdev, DRV_NAME);
if (err) {
usnic_err("Failed to request region for %s with err %d\n",
pci_name(pdev), err);
goto out_disable_device;
}
pci_set_master(pdev);
pci_set_drvdata(pdev, vf);
vf->vnic = usnic_vnic_alloc(pdev);
if (IS_ERR_OR_NULL(vf->vnic)) {
err = vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM;
usnic_err("Failed to alloc vnic for %s with err %d\n",
pci_name(pdev), err);
goto out_release_regions;
}
pf = usnic_ib_discover_pf(vf->vnic);
if (IS_ERR_OR_NULL(pf)) {
usnic_err("Failed to discover pf of vnic %s with err%ld\n",
pci_name(pdev), PTR_ERR(pf));
err = pf ? PTR_ERR(pf) : -EFAULT;
goto out_clean_vnic;
}
vf->pf = pf;
spin_lock_init(&vf->lock);
mutex_lock(&pf->usdev_lock);
list_add_tail(&vf->link, &pf->vf_dev_list);
/*
* Save max settings (will be same for each VF, easier to re-write than
* to say "if (!set) { set_values(); set=1; }
*/
for (res_type = USNIC_VNIC_RES_TYPE_EOL+1;
res_type < USNIC_VNIC_RES_TYPE_MAX;
res_type++) {
pf->vf_res_cnt[res_type] = usnic_vnic_res_cnt(vf->vnic,
res_type);
}
mutex_unlock(&pf->usdev_lock);
usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev),
pf->ib_dev.name);
usnic_ib_log_vf(vf);
return 0;
out_clean_vnic:
usnic_vnic_free(vf->vnic);
out_release_regions:
pci_set_drvdata(pdev, NULL);
pci_clear_master(pdev);
pci_release_regions(pdev);
out_disable_device:
pci_disable_device(pdev);
out_clean_vf:
kfree(vf);
return err;
}
static void usnic_ib_pci_remove(struct pci_dev *pdev)
{
struct usnic_ib_vf *vf = pci_get_drvdata(pdev);
struct usnic_ib_dev *pf = vf->pf;
mutex_lock(&pf->usdev_lock);
list_del(&vf->link);
mutex_unlock(&pf->usdev_lock);
kref_put(&pf->vf_cnt, usnic_ib_undiscover_pf);
usnic_vnic_free(vf->vnic);
pci_set_drvdata(pdev, NULL);
pci_clear_master(pdev);
pci_release_regions(pdev);
pci_disable_device(pdev);
kfree(vf);
usnic_info("Removed VF %s\n", pci_name(pdev));
}
/* PCI driver entry points */
static struct pci_driver usnic_ib_pci_driver = {
.name = DRV_NAME,
.id_table = usnic_ib_pci_ids,
.probe = usnic_ib_pci_probe,
.remove = usnic_ib_pci_remove,
};
/* End of PCI section */
/* Start of module section */
static int __init usnic_ib_init(void)
{
int err;
printk_once(KERN_INFO "%s", usnic_version);
err = usnic_uiom_init(DRV_NAME);
if (err) {
usnic_err("Unable to initalize umem with err %d\n", err);
return err;
}
if (pci_register_driver(&usnic_ib_pci_driver)) {
usnic_err("Unable to register with PCI\n");
goto out_umem_fini;
}
err = register_netdevice_notifier(&usnic_ib_netdevice_notifier);
if (err) {
usnic_err("Failed to register netdev notifier\n");
goto out_pci_unreg;
}
err = register_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
if (err) {
usnic_err("Failed to register inet addr notifier\n");
goto out_unreg_netdev_notifier;
}
err = usnic_transport_init();
if (err) {
usnic_err("Failed to initialize transport\n");
goto out_unreg_inetaddr_notifier;
}
usnic_debugfs_init();
return 0;
out_unreg_inetaddr_notifier:
unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
out_unreg_netdev_notifier:
unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
out_pci_unreg:
pci_unregister_driver(&usnic_ib_pci_driver);
out_umem_fini:
usnic_uiom_fini();
return err;
}
static void __exit usnic_ib_destroy(void)
{
usnic_dbg("\n");
usnic_debugfs_exit();
usnic_transport_fini();
unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
pci_unregister_driver(&usnic_ib_pci_driver);
usnic_uiom_fini();
}
MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver");
MODULE_AUTHOR("Upinder Malhi <umalhi@cisco.com>");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR);
module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3");
MODULE_PARM_DESC(usnic_ib_share_vf, "Off=0, On=1 VF sharing amongst QPs");
MODULE_DEVICE_TABLE(pci, usnic_ib_pci_ids);
module_init(usnic_ib_init);
module_exit(usnic_ib_destroy);
/* End of module section */

View File

@ -0,0 +1,754 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/bug.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include "usnic_log.h"
#include "usnic_vnic.h"
#include "usnic_fwd.h"
#include "usnic_uiom.h"
#include "usnic_debugfs.h"
#include "usnic_ib_qp_grp.h"
#include "usnic_ib_sysfs.h"
#include "usnic_transport.h"
#define DFLT_RQ_IDX 0
const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state)
{
switch (state) {
case IB_QPS_RESET:
return "Rst";
case IB_QPS_INIT:
return "Init";
case IB_QPS_RTR:
return "RTR";
case IB_QPS_RTS:
return "RTS";
case IB_QPS_SQD:
return "SQD";
case IB_QPS_SQE:
return "SQE";
case IB_QPS_ERR:
return "ERR";
default:
return "UNKOWN STATE";
}
}
int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz)
{
return scnprintf(buf, buf_sz, "|QPN\t|State\t|PID\t|VF Idx\t|Fil ID");
}
int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz)
{
struct usnic_ib_qp_grp *qp_grp = obj;
struct usnic_ib_qp_grp_flow *default_flow;
if (obj) {
default_flow = list_first_entry(&qp_grp->flows_lst,
struct usnic_ib_qp_grp_flow, link);
return scnprintf(buf, buf_sz, "|%d\t|%s\t|%d\t|%hu\t|%d",
qp_grp->ibqp.qp_num,
usnic_ib_qp_grp_state_to_string(
qp_grp->state),
qp_grp->owner_pid,
usnic_vnic_get_index(qp_grp->vf->vnic),
default_flow->flow->flow_id);
} else {
return scnprintf(buf, buf_sz, "|N/A\t|N/A\t|N/A\t|N/A\t|N/A");
}
}
static struct usnic_vnic_res_chunk *
get_qp_res_chunk(struct usnic_ib_qp_grp *qp_grp)
{
lockdep_assert_held(&qp_grp->lock);
/*
* The QP res chunk, used to derive qp indices,
* are just indices of the RQs
*/
return usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
}
static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
{
int status;
int i, vnic_idx;
struct usnic_vnic_res_chunk *res_chunk;
struct usnic_vnic_res *res;
lockdep_assert_held(&qp_grp->lock);
vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
res_chunk = get_qp_res_chunk(qp_grp);
if (IS_ERR_OR_NULL(res_chunk)) {
usnic_err("Unable to get qp res with err %ld\n",
PTR_ERR(res_chunk));
return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
}
for (i = 0; i < res_chunk->cnt; i++) {
res = res_chunk->res[i];
status = usnic_fwd_enable_qp(qp_grp->ufdev, vnic_idx,
res->vnic_idx);
if (status) {
usnic_err("Failed to enable qp %d of %s:%d\n with err %d\n",
res->vnic_idx, qp_grp->ufdev->name,
vnic_idx, status);
goto out_err;
}
}
return 0;
out_err:
for (i--; i >= 0; i--) {
res = res_chunk->res[i];
usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
res->vnic_idx);
}
return status;
}
static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
{
int i, vnic_idx;
struct usnic_vnic_res_chunk *res_chunk;
struct usnic_vnic_res *res;
int status = 0;
lockdep_assert_held(&qp_grp->lock);
vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
res_chunk = get_qp_res_chunk(qp_grp);
if (IS_ERR_OR_NULL(res_chunk)) {
usnic_err("Unable to get qp res with err %ld\n",
PTR_ERR(res_chunk));
return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
}
for (i = 0; i < res_chunk->cnt; i++) {
res = res_chunk->res[i];
status = usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
res->vnic_idx);
if (status) {
usnic_err("Failed to disable rq %d of %s:%d\n with err %d\n",
res->vnic_idx,
qp_grp->ufdev->name,
vnic_idx, status);
}
}
return status;
}
static int init_filter_action(struct usnic_ib_qp_grp *qp_grp,
struct usnic_filter_action *uaction)
{
struct usnic_vnic_res_chunk *res_chunk;
res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
if (IS_ERR_OR_NULL(res_chunk)) {
usnic_err("Unable to get %s with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
PTR_ERR(res_chunk));
return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
}
uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
uaction->action.type = FILTER_ACTION_RQ_STEERING;
uaction->action.u.rq_idx = res_chunk->res[DFLT_RQ_IDX]->vnic_idx;
return 0;
}
static struct usnic_ib_qp_grp_flow*
create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
struct usnic_transport_spec *trans_spec)
{
uint16_t port_num;
int err;
struct filter filter;
struct usnic_filter_action uaction;
struct usnic_ib_qp_grp_flow *qp_flow;
struct usnic_fwd_flow *flow;
enum usnic_transport_type trans_type;
trans_type = trans_spec->trans_type;
port_num = trans_spec->usnic_roce.port_num;
/* Reserve Port */
port_num = usnic_transport_rsrv_port(trans_type, port_num);
if (port_num == 0)
return ERR_PTR(-EINVAL);
/* Create Flow */
usnic_fwd_init_usnic_filter(&filter, port_num);
err = init_filter_action(qp_grp, &uaction);
if (err)
goto out_unreserve_port;
flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
if (IS_ERR_OR_NULL(flow)) {
usnic_err("Unable to alloc flow failed with err %ld\n",
PTR_ERR(flow));
err = flow ? PTR_ERR(flow) : -EFAULT;
goto out_unreserve_port;
}
/* Create Flow Handle */
qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
if (IS_ERR_OR_NULL(qp_flow)) {
err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
goto out_dealloc_flow;
}
qp_flow->flow = flow;
qp_flow->trans_type = trans_type;
qp_flow->usnic_roce.port_num = port_num;
qp_flow->qp_grp = qp_grp;
return qp_flow;
out_dealloc_flow:
usnic_fwd_dealloc_flow(flow);
out_unreserve_port:
usnic_transport_unrsrv_port(trans_type, port_num);
return ERR_PTR(err);
}
static void release_roce_custom_flow(struct usnic_ib_qp_grp_flow *qp_flow)
{
usnic_fwd_dealloc_flow(qp_flow->flow);
usnic_transport_unrsrv_port(qp_flow->trans_type,
qp_flow->usnic_roce.port_num);
kfree(qp_flow);
}
static struct usnic_ib_qp_grp_flow*
create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
struct usnic_transport_spec *trans_spec)
{
struct socket *sock;
int sock_fd;
int err;
struct filter filter;
struct usnic_filter_action uaction;
struct usnic_ib_qp_grp_flow *qp_flow;
struct usnic_fwd_flow *flow;
enum usnic_transport_type trans_type;
uint32_t addr;
uint16_t port_num;
int proto;
trans_type = trans_spec->trans_type;
sock_fd = trans_spec->udp.sock_fd;
/* Get and check socket */
sock = usnic_transport_get_socket(sock_fd);
if (IS_ERR_OR_NULL(sock))
return ERR_CAST(sock);
err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port_num);
if (err)
goto out_put_sock;
if (proto != IPPROTO_UDP) {
usnic_err("Protocol for fd %d is not UDP", sock_fd);
err = -EPERM;
goto out_put_sock;
}
/* Create flow */
usnic_fwd_init_udp_filter(&filter, addr, port_num);
err = init_filter_action(qp_grp, &uaction);
if (err)
goto out_put_sock;
flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
if (IS_ERR_OR_NULL(flow)) {
usnic_err("Unable to alloc flow failed with err %ld\n",
PTR_ERR(flow));
err = flow ? PTR_ERR(flow) : -EFAULT;
goto out_put_sock;
}
/* Create qp_flow */
qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
if (IS_ERR_OR_NULL(qp_flow)) {
err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
goto out_dealloc_flow;
}
qp_flow->flow = flow;
qp_flow->trans_type = trans_type;
qp_flow->udp.sock = sock;
qp_flow->qp_grp = qp_grp;
return qp_flow;
out_dealloc_flow:
usnic_fwd_dealloc_flow(flow);
out_put_sock:
usnic_transport_put_socket(sock);
return ERR_PTR(err);
}
static void release_udp_flow(struct usnic_ib_qp_grp_flow *qp_flow)
{
usnic_fwd_dealloc_flow(qp_flow->flow);
usnic_transport_put_socket(qp_flow->udp.sock);
kfree(qp_flow);
}
static struct usnic_ib_qp_grp_flow*
create_and_add_flow(struct usnic_ib_qp_grp *qp_grp,
struct usnic_transport_spec *trans_spec)
{
struct usnic_ib_qp_grp_flow *qp_flow;
enum usnic_transport_type trans_type;
trans_type = trans_spec->trans_type;
switch (trans_type) {
case USNIC_TRANSPORT_ROCE_CUSTOM:
qp_flow = create_roce_custom_flow(qp_grp, trans_spec);
break;
case USNIC_TRANSPORT_IPV4_UDP:
qp_flow = create_udp_flow(qp_grp, trans_spec);
break;
default:
usnic_err("Unsupported transport %u\n",
trans_spec->trans_type);
return ERR_PTR(-EINVAL);
}
if (!IS_ERR_OR_NULL(qp_flow)) {
list_add_tail(&qp_flow->link, &qp_grp->flows_lst);
usnic_debugfs_flow_add(qp_flow);
}
return qp_flow;
}
static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow)
{
usnic_debugfs_flow_remove(qp_flow);
list_del(&qp_flow->link);
switch (qp_flow->trans_type) {
case USNIC_TRANSPORT_ROCE_CUSTOM:
release_roce_custom_flow(qp_flow);
break;
case USNIC_TRANSPORT_IPV4_UDP:
release_udp_flow(qp_flow);
break;
default:
WARN(1, "Unsupported transport %u\n",
qp_flow->trans_type);
break;
}
}
static void release_and_remove_all_flows(struct usnic_ib_qp_grp *qp_grp)
{
struct usnic_ib_qp_grp_flow *qp_flow, *tmp;
list_for_each_entry_safe(qp_flow, tmp, &qp_grp->flows_lst, link)
release_and_remove_flow(qp_flow);
}
int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
enum ib_qp_state new_state,
void *data)
{
int status = 0;
int vnic_idx;
struct ib_event ib_event;
enum ib_qp_state old_state;
struct usnic_transport_spec *trans_spec;
struct usnic_ib_qp_grp_flow *qp_flow;
old_state = qp_grp->state;
vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
trans_spec = (struct usnic_transport_spec *) data;
spin_lock(&qp_grp->lock);
switch (new_state) {
case IB_QPS_RESET:
switch (old_state) {
case IB_QPS_RESET:
/* NO-OP */
break;
case IB_QPS_INIT:
release_and_remove_all_flows(qp_grp);
status = 0;
break;
case IB_QPS_RTR:
case IB_QPS_RTS:
case IB_QPS_ERR:
status = disable_qp_grp(qp_grp);
release_and_remove_all_flows(qp_grp);
break;
default:
status = -EINVAL;
}
break;
case IB_QPS_INIT:
switch (old_state) {
case IB_QPS_RESET:
if (trans_spec) {
qp_flow = create_and_add_flow(qp_grp,
trans_spec);
if (IS_ERR_OR_NULL(qp_flow)) {
status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
break;
}
} else {
/*
* Optional to specify filters.
*/
status = 0;
}
break;
case IB_QPS_INIT:
if (trans_spec) {
qp_flow = create_and_add_flow(qp_grp,
trans_spec);
if (IS_ERR_OR_NULL(qp_flow)) {
status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
break;
}
} else {
/*
* Doesn't make sense to go into INIT state
* from INIT state w/o adding filters.
*/
status = -EINVAL;
}
break;
case IB_QPS_RTR:
status = disable_qp_grp(qp_grp);
break;
case IB_QPS_RTS:
status = disable_qp_grp(qp_grp);
break;
default:
status = -EINVAL;
}
break;
case IB_QPS_RTR:
switch (old_state) {
case IB_QPS_INIT:
status = enable_qp_grp(qp_grp);
break;
default:
status = -EINVAL;
}
break;
case IB_QPS_RTS:
switch (old_state) {
case IB_QPS_RTR:
/* NO-OP FOR NOW */
break;
default:
status = -EINVAL;
}
break;
case IB_QPS_ERR:
ib_event.device = &qp_grp->vf->pf->ib_dev;
ib_event.element.qp = &qp_grp->ibqp;
ib_event.event = IB_EVENT_QP_FATAL;
switch (old_state) {
case IB_QPS_RESET:
qp_grp->ibqp.event_handler(&ib_event,
qp_grp->ibqp.qp_context);
break;
case IB_QPS_INIT:
release_and_remove_all_flows(qp_grp);
qp_grp->ibqp.event_handler(&ib_event,
qp_grp->ibqp.qp_context);
break;
case IB_QPS_RTR:
case IB_QPS_RTS:
status = disable_qp_grp(qp_grp);
release_and_remove_all_flows(qp_grp);
qp_grp->ibqp.event_handler(&ib_event,
qp_grp->ibqp.qp_context);
break;
default:
status = -EINVAL;
}
break;
default:
status = -EINVAL;
}
spin_unlock(&qp_grp->lock);
if (!status) {
qp_grp->state = new_state;
usnic_info("Transistioned %u from %s to %s",
qp_grp->grp_id,
usnic_ib_qp_grp_state_to_string(old_state),
usnic_ib_qp_grp_state_to_string(new_state));
} else {
usnic_err("Failed to transistion %u from %s to %s",
qp_grp->grp_id,
usnic_ib_qp_grp_state_to_string(old_state),
usnic_ib_qp_grp_state_to_string(new_state));
}
return status;
}
static struct usnic_vnic_res_chunk**
alloc_res_chunk_list(struct usnic_vnic *vnic,
struct usnic_vnic_res_spec *res_spec, void *owner_obj)
{
enum usnic_vnic_res_type res_type;
struct usnic_vnic_res_chunk **res_chunk_list;
int err, i, res_cnt, res_lst_sz;
for (res_lst_sz = 0;
res_spec->resources[res_lst_sz].type != USNIC_VNIC_RES_TYPE_EOL;
res_lst_sz++) {
/* Do Nothing */
}
res_chunk_list = kzalloc(sizeof(*res_chunk_list)*(res_lst_sz+1),
GFP_ATOMIC);
if (!res_chunk_list)
return ERR_PTR(-ENOMEM);
for (i = 0; res_spec->resources[i].type != USNIC_VNIC_RES_TYPE_EOL;
i++) {
res_type = res_spec->resources[i].type;
res_cnt = res_spec->resources[i].cnt;
res_chunk_list[i] = usnic_vnic_get_resources(vnic, res_type,
res_cnt, owner_obj);
if (IS_ERR_OR_NULL(res_chunk_list[i])) {
err = res_chunk_list[i] ?
PTR_ERR(res_chunk_list[i]) : -ENOMEM;
usnic_err("Failed to get %s from %s with err %d\n",
usnic_vnic_res_type_to_str(res_type),
usnic_vnic_pci_name(vnic),
err);
goto out_free_res;
}
}
return res_chunk_list;
out_free_res:
for (i--; i > 0; i--)
usnic_vnic_put_resources(res_chunk_list[i]);
kfree(res_chunk_list);
return ERR_PTR(err);
}
static void free_qp_grp_res(struct usnic_vnic_res_chunk **res_chunk_list)
{
int i;
for (i = 0; res_chunk_list[i]; i++)
usnic_vnic_put_resources(res_chunk_list[i]);
kfree(res_chunk_list);
}
static int qp_grp_and_vf_bind(struct usnic_ib_vf *vf,
struct usnic_ib_pd *pd,
struct usnic_ib_qp_grp *qp_grp)
{
int err;
struct pci_dev *pdev;
lockdep_assert_held(&vf->lock);
pdev = usnic_vnic_get_pdev(vf->vnic);
if (vf->qp_grp_ref_cnt == 0) {
err = usnic_uiom_attach_dev_to_pd(pd->umem_pd, &pdev->dev);
if (err) {
usnic_err("Failed to attach %s to domain\n",
pci_name(pdev));
return err;
}
vf->pd = pd;
}
vf->qp_grp_ref_cnt++;
WARN_ON(vf->pd != pd);
qp_grp->vf = vf;
return 0;
}
static void qp_grp_and_vf_unbind(struct usnic_ib_qp_grp *qp_grp)
{
struct pci_dev *pdev;
struct usnic_ib_pd *pd;
lockdep_assert_held(&qp_grp->vf->lock);
pd = qp_grp->vf->pd;
pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
if (--qp_grp->vf->qp_grp_ref_cnt == 0) {
qp_grp->vf->pd = NULL;
usnic_uiom_detach_dev_from_pd(pd->umem_pd, &pdev->dev);
}
qp_grp->vf = NULL;
}
static void log_spec(struct usnic_vnic_res_spec *res_spec)
{
char buf[512];
usnic_vnic_spec_dump(buf, sizeof(buf), res_spec);
usnic_dbg("%s\n", buf);
}
static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow,
uint32_t *id)
{
enum usnic_transport_type trans_type = qp_flow->trans_type;
int err;
switch (trans_type) {
case USNIC_TRANSPORT_ROCE_CUSTOM:
*id = qp_flow->usnic_roce.port_num;
break;
case USNIC_TRANSPORT_IPV4_UDP:
err = usnic_transport_sock_get_addr(qp_flow->udp.sock,
NULL, NULL,
(uint16_t *) id);
if (err)
return err;
break;
default:
usnic_err("Unsupported transport %u\n", trans_type);
return -EINVAL;
}
return 0;
}
struct usnic_ib_qp_grp *
usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
struct usnic_ib_pd *pd,
struct usnic_vnic_res_spec *res_spec,
struct usnic_transport_spec *transport_spec)
{
struct usnic_ib_qp_grp *qp_grp;
int err;
enum usnic_transport_type transport = transport_spec->trans_type;
struct usnic_ib_qp_grp_flow *qp_flow;
lockdep_assert_held(&vf->lock);
err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport],
res_spec);
if (err) {
usnic_err("Spec does not meet miniumum req for transport %d\n",
transport);
log_spec(res_spec);
return ERR_PTR(err);
}
qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC);
if (!qp_grp) {
usnic_err("Unable to alloc qp_grp - Out of memory\n");
return NULL;
}
qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec,
qp_grp);
if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) {
err = qp_grp->res_chunk_list ?
PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM;
usnic_err("Unable to alloc res for %d with err %d\n",
qp_grp->grp_id, err);
goto out_free_qp_grp;
}
err = qp_grp_and_vf_bind(vf, pd, qp_grp);
if (err)
goto out_free_res;
INIT_LIST_HEAD(&qp_grp->flows_lst);
spin_lock_init(&qp_grp->lock);
qp_grp->ufdev = ufdev;
qp_grp->state = IB_QPS_RESET;
qp_grp->owner_pid = current->pid;
qp_flow = create_and_add_flow(qp_grp, transport_spec);
if (IS_ERR_OR_NULL(qp_flow)) {
usnic_err("Unable to create and add flow with err %ld\n",
PTR_ERR(qp_flow));
err = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
goto out_qp_grp_vf_unbind;
}
err = qp_grp_id_from_flow(qp_flow, &qp_grp->grp_id);
if (err)
goto out_release_flow;
qp_grp->ibqp.qp_num = qp_grp->grp_id;
usnic_ib_sysfs_qpn_add(qp_grp);
return qp_grp;
out_release_flow:
release_and_remove_flow(qp_flow);
out_qp_grp_vf_unbind:
qp_grp_and_vf_unbind(qp_grp);
out_free_res:
free_qp_grp_res(qp_grp->res_chunk_list);
out_free_qp_grp:
kfree(qp_grp);
return ERR_PTR(err);
}
void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
{
WARN_ON(qp_grp->state != IB_QPS_RESET);
lockdep_assert_held(&qp_grp->vf->lock);
release_and_remove_all_flows(qp_grp);
usnic_ib_sysfs_qpn_remove(qp_grp);
qp_grp_and_vf_unbind(qp_grp);
free_qp_grp_res(qp_grp->res_chunk_list);
kfree(qp_grp);
}
struct usnic_vnic_res_chunk*
usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
enum usnic_vnic_res_type res_type)
{
int i;
for (i = 0; qp_grp->res_chunk_list[i]; i++) {
if (qp_grp->res_chunk_list[i]->type == res_type)
return qp_grp->res_chunk_list[i];
}
return ERR_PTR(-EINVAL);
}

View File

@ -0,0 +1,117 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_IB_QP_GRP_H_
#define USNIC_IB_QP_GRP_H_
#include <linux/debugfs.h>
#include <rdma/ib_verbs.h>
#include "usnic_ib.h"
#include "usnic_abi.h"
#include "usnic_fwd.h"
#include "usnic_vnic.h"
/*
* The qp group struct represents all the hw resources needed to present a ib_qp
*/
struct usnic_ib_qp_grp {
struct ib_qp ibqp;
enum ib_qp_state state;
int grp_id;
struct usnic_fwd_dev *ufdev;
struct usnic_ib_ucontext *ctx;
struct list_head flows_lst;
struct usnic_vnic_res_chunk **res_chunk_list;
pid_t owner_pid;
struct usnic_ib_vf *vf;
struct list_head link;
spinlock_t lock;
struct kobject kobj;
};
struct usnic_ib_qp_grp_flow {
struct usnic_fwd_flow *flow;
enum usnic_transport_type trans_type;
union {
struct {
uint16_t port_num;
} usnic_roce;
struct {
struct socket *sock;
} udp;
};
struct usnic_ib_qp_grp *qp_grp;
struct list_head link;
/* Debug FS */
struct dentry *dbgfs_dentry;
char dentry_name[32];
};
static const struct
usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = {
{ /*USNIC_TRANSPORT_UNKNOWN*/
.resources = {
{.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
},
},
{ /*USNIC_TRANSPORT_ROCE_CUSTOM*/
.resources = {
{.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
{.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
{.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
{.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
},
},
{ /*USNIC_TRANSPORT_IPV4_UDP*/
.resources = {
{.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
{.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
{.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
{.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
},
},
};
const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state);
int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz);
int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz);
struct usnic_ib_qp_grp *
usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
struct usnic_ib_pd *pd,
struct usnic_vnic_res_spec *res_spec,
struct usnic_transport_spec *trans_spec);
void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp);
int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
enum ib_qp_state new_state,
void *data);
struct usnic_vnic_res_chunk
*usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
enum usnic_vnic_res_type type);
static inline
struct usnic_ib_qp_grp *to_uqp_grp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct usnic_ib_qp_grp, ibqp);
}
#endif /* USNIC_IB_QP_GRP_H_ */

View File

@ -0,0 +1,341 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
#include "usnic_common_util.h"
#include "usnic_ib.h"
#include "usnic_ib_qp_grp.h"
#include "usnic_vnic.h"
#include "usnic_ib_verbs.h"
#include "usnic_log.h"
static ssize_t usnic_ib_show_fw_ver(struct device *device,
struct device_attribute *attr,
char *buf)
{
struct usnic_ib_dev *us_ibdev =
container_of(device, struct usnic_ib_dev, ib_dev.dev);
struct ethtool_drvinfo info;
mutex_lock(&us_ibdev->usdev_lock);
us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
mutex_unlock(&us_ibdev->usdev_lock);
return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version);
}
static ssize_t usnic_ib_show_board(struct device *device,
struct device_attribute *attr,
char *buf)
{
struct usnic_ib_dev *us_ibdev =
container_of(device, struct usnic_ib_dev, ib_dev.dev);
unsigned short subsystem_device_id;
mutex_lock(&us_ibdev->usdev_lock);
subsystem_device_id = us_ibdev->pdev->subsystem_device;
mutex_unlock(&us_ibdev->usdev_lock);
return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
}
/*
* Report the configuration for this PF
*/
static ssize_t
usnic_ib_show_config(struct device *device, struct device_attribute *attr,
char *buf)
{
struct usnic_ib_dev *us_ibdev;
char *ptr;
unsigned left;
unsigned n;
enum usnic_vnic_res_type res_type;
us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
/* Buffer space limit is 1 page */
ptr = buf;
left = PAGE_SIZE;
mutex_lock(&us_ibdev->usdev_lock);
if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) {
char *busname;
/*
* bus name seems to come with annoying prefix.
* Remove it if it is predictable
*/
busname = us_ibdev->pdev->bus->name;
if (strncmp(busname, "PCI Bus ", 8) == 0)
busname += 8;
n = scnprintf(ptr, left,
"%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
us_ibdev->ib_dev.name,
busname,
PCI_SLOT(us_ibdev->pdev->devfn),
PCI_FUNC(us_ibdev->pdev->devfn),
netdev_name(us_ibdev->netdev),
us_ibdev->ufdev->mac,
atomic_read(&us_ibdev->vf_cnt.refcount));
UPDATE_PTR_LEFT(n, ptr, left);
for (res_type = USNIC_VNIC_RES_TYPE_EOL;
res_type < USNIC_VNIC_RES_TYPE_MAX;
res_type++) {
if (us_ibdev->vf_res_cnt[res_type] == 0)
continue;
n = scnprintf(ptr, left, " %d %s%s",
us_ibdev->vf_res_cnt[res_type],
usnic_vnic_res_type_to_str(res_type),
(res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ?
"," : "");
UPDATE_PTR_LEFT(n, ptr, left);
}
n = scnprintf(ptr, left, "\n");
UPDATE_PTR_LEFT(n, ptr, left);
} else {
n = scnprintf(ptr, left, "%s: no VFs\n",
us_ibdev->ib_dev.name);
UPDATE_PTR_LEFT(n, ptr, left);
}
mutex_unlock(&us_ibdev->usdev_lock);
return ptr - buf;
}
static ssize_t
usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
char *buf)
{
struct usnic_ib_dev *us_ibdev;
us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
return scnprintf(buf, PAGE_SIZE, "%s\n",
netdev_name(us_ibdev->netdev));
}
static ssize_t
usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
char *buf)
{
struct usnic_ib_dev *us_ibdev;
us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
return scnprintf(buf, PAGE_SIZE, "%u\n",
atomic_read(&us_ibdev->vf_cnt.refcount));
}
static ssize_t
usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
char *buf)
{
struct usnic_ib_dev *us_ibdev;
int qp_per_vf;
us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
return scnprintf(buf, PAGE_SIZE,
"%d\n", qp_per_vf);
}
static ssize_t
usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
char *buf)
{
struct usnic_ib_dev *us_ibdev;
us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
return scnprintf(buf, PAGE_SIZE, "%d\n",
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
}
static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL);
static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
static struct device_attribute *usnic_class_attributes[] = {
&dev_attr_fw_ver,
&dev_attr_board_id,
&dev_attr_config,
&dev_attr_iface,
&dev_attr_max_vf,
&dev_attr_qp_per_vf,
&dev_attr_cq_per_vf,
};
struct qpn_attribute {
struct attribute attr;
ssize_t (*show)(struct usnic_ib_qp_grp *, char *buf);
};
/*
* Definitions for supporting QPN entries in sysfs
*/
static ssize_t
usnic_ib_qpn_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
{
struct usnic_ib_qp_grp *qp_grp;
struct qpn_attribute *qpn_attr;
qp_grp = container_of(kobj, struct usnic_ib_qp_grp, kobj);
qpn_attr = container_of(attr, struct qpn_attribute, attr);
return qpn_attr->show(qp_grp, buf);
}
static const struct sysfs_ops usnic_ib_qpn_sysfs_ops = {
.show = usnic_ib_qpn_attr_show
};
#define QPN_ATTR_RO(NAME) \
struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME)
static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
{
return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx);
}
static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
{
int i, j, n;
int left;
char *ptr;
struct usnic_vnic_res_chunk *res_chunk;
struct usnic_vnic_res *vnic_res;
left = PAGE_SIZE;
ptr = buf;
n = scnprintf(ptr, left,
"QPN: %d State: (%s) PID: %u VF Idx: %hu ",
qp_grp->ibqp.qp_num,
usnic_ib_qp_grp_state_to_string(qp_grp->state),
qp_grp->owner_pid,
usnic_vnic_get_index(qp_grp->vf->vnic));
UPDATE_PTR_LEFT(n, ptr, left);
for (i = 0; qp_grp->res_chunk_list[i]; i++) {
res_chunk = qp_grp->res_chunk_list[i];
for (j = 0; j < res_chunk->cnt; j++) {
vnic_res = res_chunk->res[j];
n = scnprintf(ptr, left, "%s[%d] ",
usnic_vnic_res_type_to_str(vnic_res->type),
vnic_res->vnic_idx);
UPDATE_PTR_LEFT(n, ptr, left);
}
}
n = scnprintf(ptr, left, "\n");
UPDATE_PTR_LEFT(n, ptr, left);
return ptr - buf;
}
static QPN_ATTR_RO(context);
static QPN_ATTR_RO(summary);
static struct attribute *usnic_ib_qpn_default_attrs[] = {
&qpn_attr_context.attr,
&qpn_attr_summary.attr,
NULL
};
static struct kobj_type usnic_ib_qpn_type = {
.sysfs_ops = &usnic_ib_qpn_sysfs_ops,
.default_attrs = usnic_ib_qpn_default_attrs
};
int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
{
int i;
int err;
for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
err = device_create_file(&us_ibdev->ib_dev.dev,
usnic_class_attributes[i]);
if (err) {
usnic_err("Failed to create device file %d for %s eith err %d",
i, us_ibdev->ib_dev.name, err);
return -EINVAL;
}
}
/* create kernel object for looking at individual QPs */
kobject_get(&us_ibdev->ib_dev.dev.kobj);
us_ibdev->qpn_kobj = kobject_create_and_add("qpn",
&us_ibdev->ib_dev.dev.kobj);
if (us_ibdev->qpn_kobj == NULL) {
kobject_put(&us_ibdev->ib_dev.dev.kobj);
return -ENOMEM;
}
return 0;
}
void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev)
{
int i;
for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
device_remove_file(&us_ibdev->ib_dev.dev,
usnic_class_attributes[i]);
}
kobject_put(us_ibdev->qpn_kobj);
}
void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp)
{
struct usnic_ib_dev *us_ibdev;
int err;
us_ibdev = qp_grp->vf->pf;
err = kobject_init_and_add(&qp_grp->kobj, &usnic_ib_qpn_type,
kobject_get(us_ibdev->qpn_kobj),
"%d", qp_grp->grp_id);
if (err) {
kobject_put(us_ibdev->qpn_kobj);
return;
}
}
void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp)
{
struct usnic_ib_dev *us_ibdev;
us_ibdev = qp_grp->vf->pf;
kobject_put(&qp_grp->kobj);
kobject_put(us_ibdev->qpn_kobj);
}

View File

@ -0,0 +1,29 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_IB_SYSFS_H_
#define USNIC_IB_SYSFS_H_
#include "usnic_ib.h"
int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev);
void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev);
void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp);
void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp);
#endif /* !USNIC_IB_SYSFS_H_ */

View File

@ -0,0 +1,765 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
#include "usnic_abi.h"
#include "usnic_ib.h"
#include "usnic_common_util.h"
#include "usnic_ib_qp_grp.h"
#include "usnic_fwd.h"
#include "usnic_log.h"
#include "usnic_uiom.h"
#include "usnic_transport.h"
#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
{
*fw_ver = (u64) *fw_ver_str;
}
static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
struct ib_udata *udata)
{
struct usnic_ib_dev *us_ibdev;
struct usnic_ib_create_qp_resp resp;
struct pci_dev *pdev;
struct vnic_dev_bar *bar;
struct usnic_vnic_res_chunk *chunk;
struct usnic_ib_qp_grp_flow *default_flow;
int i, err;
memset(&resp, 0, sizeof(resp));
us_ibdev = qp_grp->vf->pf;
pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
if (!pdev) {
usnic_err("Failed to get pdev of qp_grp %d\n",
qp_grp->grp_id);
return -EFAULT;
}
bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0);
if (!bar) {
usnic_err("Failed to get bar0 of qp_grp %d vf %s",
qp_grp->grp_id, pci_name(pdev));
return -EFAULT;
}
resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic);
resp.bar_bus_addr = bar->bus_addr;
resp.bar_len = bar->len;
chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
if (IS_ERR_OR_NULL(chunk)) {
usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
qp_grp->grp_id,
PTR_ERR(chunk));
return chunk ? PTR_ERR(chunk) : -ENOMEM;
}
WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ);
resp.rq_cnt = chunk->cnt;
for (i = 0; i < chunk->cnt; i++)
resp.rq_idx[i] = chunk->res[i]->vnic_idx;
chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ);
if (IS_ERR_OR_NULL(chunk)) {
usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ),
qp_grp->grp_id,
PTR_ERR(chunk));
return chunk ? PTR_ERR(chunk) : -ENOMEM;
}
WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ);
resp.wq_cnt = chunk->cnt;
for (i = 0; i < chunk->cnt; i++)
resp.wq_idx[i] = chunk->res[i]->vnic_idx;
chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ);
if (IS_ERR_OR_NULL(chunk)) {
usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ),
qp_grp->grp_id,
PTR_ERR(chunk));
return chunk ? PTR_ERR(chunk) : -ENOMEM;
}
WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ);
resp.cq_cnt = chunk->cnt;
for (i = 0; i < chunk->cnt; i++)
resp.cq_idx[i] = chunk->res[i]->vnic_idx;
default_flow = list_first_entry(&qp_grp->flows_lst,
struct usnic_ib_qp_grp_flow, link);
resp.transport = default_flow->trans_type;
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (err) {
usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name);
return err;
}
return 0;
}
static struct usnic_ib_qp_grp*
find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
struct usnic_ib_pd *pd,
struct usnic_transport_spec *trans_spec,
struct usnic_vnic_res_spec *res_spec)
{
struct usnic_ib_vf *vf;
struct usnic_vnic *vnic;
struct usnic_ib_qp_grp *qp_grp;
struct device *dev, **dev_list;
int i, found = 0;
BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
if (list_empty(&us_ibdev->vf_dev_list)) {
usnic_info("No vfs to allocate\n");
return NULL;
}
if (usnic_ib_share_vf) {
/* Try to find resouces on a used vf which is in pd */
dev_list = usnic_uiom_get_dev_list(pd->umem_pd);
for (i = 0; dev_list[i]; i++) {
dev = dev_list[i];
vf = pci_get_drvdata(to_pci_dev(dev));
spin_lock(&vf->lock);
vnic = vf->vnic;
if (!usnic_vnic_check_room(vnic, res_spec)) {
usnic_dbg("Found used vnic %s from %s\n",
us_ibdev->ib_dev.name,
pci_name(usnic_vnic_get_pdev(
vnic)));
found = 1;
break;
}
spin_unlock(&vf->lock);
}
usnic_uiom_free_dev_list(dev_list);
}
if (!found) {
/* Try to find resources on an unused vf */
list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
spin_lock(&vf->lock);
vnic = vf->vnic;
if (vf->qp_grp_ref_cnt == 0 &&
usnic_vnic_check_room(vnic, res_spec) == 0) {
found = 1;
break;
}
spin_unlock(&vf->lock);
}
}
if (!found) {
usnic_info("No free qp grp found on %s\n",
us_ibdev->ib_dev.name);
return ERR_PTR(-ENOMEM);
}
qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec,
trans_spec);
spin_unlock(&vf->lock);
if (IS_ERR_OR_NULL(qp_grp)) {
usnic_err("Failed to allocate qp_grp\n");
return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
}
return qp_grp;
}
static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
{
struct usnic_ib_vf *vf = qp_grp->vf;
WARN_ON(qp_grp->state != IB_QPS_RESET);
spin_lock(&vf->lock);
usnic_ib_qp_grp_destroy(qp_grp);
spin_unlock(&vf->lock);
}
static void eth_speed_to_ib_speed(int speed, u8 *active_speed,
u8 *active_width)
{
if (speed <= 10000) {
*active_width = IB_WIDTH_1X;
*active_speed = IB_SPEED_FDR10;
} else if (speed <= 20000) {
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_DDR;
} else if (speed <= 30000) {
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_QDR;
} else if (speed <= 40000) {
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_FDR10;
} else {
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_EDR;
}
}
static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd)
{
if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN ||
cmd.spec.trans_type >= USNIC_TRANSPORT_MAX)
return -EINVAL;
return 0;
}
/* Start of ib callback functions */
enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
u8 port_num)
{
return IB_LINK_LAYER_ETHERNET;
}
int usnic_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
union ib_gid gid;
struct ethtool_drvinfo info;
struct ethtool_cmd cmd;
int qp_per_vf;
usnic_dbg("\n");
mutex_lock(&us_ibdev->usdev_lock);
us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd);
memset(props, 0, sizeof(*props));
usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
&gid.raw[0]);
memcpy(&props->sys_image_guid, &gid.global.interface_id,
sizeof(gid.global.interface_id));
usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver);
props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE;
props->page_size_cap = USNIC_UIOM_PAGE_SIZE;
props->vendor_id = PCI_VENDOR_ID_CISCO;
props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC;
props->hw_ver = us_ibdev->pdev->subsystem_device;
qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
props->max_qp = qp_per_vf *
atomic_read(&us_ibdev->vf_cnt.refcount);
props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
atomic_read(&us_ibdev->vf_cnt.refcount);
props->max_pd = USNIC_UIOM_MAX_PD_CNT;
props->max_mr = USNIC_UIOM_MAX_MR_CNT;
props->local_ca_ack_delay = 0;
props->max_pkeys = 0;
props->atomic_cap = IB_ATOMIC_NONE;
props->masked_atomic_cap = props->atomic_cap;
props->max_qp_rd_atom = 0;
props->max_qp_init_rd_atom = 0;
props->max_res_rd_atom = 0;
props->max_srq = 0;
props->max_srq_wr = 0;
props->max_srq_sge = 0;
props->max_fast_reg_page_list_len = 0;
props->max_mcast_grp = 0;
props->max_mcast_qp_attach = 0;
props->max_total_mcast_qp_attach = 0;
props->max_map_per_fmr = 0;
/* Owned by Userspace
* max_qp_wr, max_sge, max_sge_rd, max_cqe */
mutex_unlock(&us_ibdev->usdev_lock);
return 0;
}
int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
struct ethtool_cmd cmd;
usnic_dbg("\n");
mutex_lock(&us_ibdev->usdev_lock);
us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd);
memset(props, 0, sizeof(*props));
props->lid = 0;
props->lmc = 1;
props->sm_lid = 0;
props->sm_sl = 0;
if (!us_ibdev->ufdev->link_up) {
props->state = IB_PORT_DOWN;
props->phys_state = 3;
} else if (!us_ibdev->ufdev->inaddr) {
props->state = IB_PORT_INIT;
props->phys_state = 4;
} else {
props->state = IB_PORT_ACTIVE;
props->phys_state = 5;
}
props->port_cap_flags = 0;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
props->bad_pkey_cntr = 0;
props->qkey_viol_cntr = 0;
eth_speed_to_ib_speed(cmd.speed, &props->active_speed,
&props->active_width);
props->max_mtu = IB_MTU_4096;
props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu);
/* Userspace will adjust for hdrs */
props->max_msg_sz = us_ibdev->ufdev->mtu;
props->max_vl_num = 1;
mutex_unlock(&us_ibdev->usdev_lock);
return 0;
}
int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
struct usnic_ib_qp_grp *qp_grp;
struct usnic_ib_vf *vf;
int err;
usnic_dbg("\n");
memset(qp_attr, 0, sizeof(*qp_attr));
memset(qp_init_attr, 0, sizeof(*qp_init_attr));
qp_grp = to_uqp_grp(qp);
vf = qp_grp->vf;
mutex_lock(&vf->pf->usdev_lock);
usnic_dbg("\n");
qp_attr->qp_state = qp_grp->state;
qp_attr->cur_qp_state = qp_grp->state;
switch (qp_grp->ibqp.qp_type) {
case IB_QPT_UD:
qp_attr->qkey = 0;
break;
default:
usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type);
err = -EINVAL;
goto err_out;
}
mutex_unlock(&vf->pf->usdev_lock);
return 0;
err_out:
mutex_unlock(&vf->pf->usdev_lock);
return err;
}
int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
usnic_dbg("\n");
if (index > 1)
return -EINVAL;
mutex_lock(&us_ibdev->usdev_lock);
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
&gid->raw[0]);
mutex_unlock(&us_ibdev->usdev_lock);
return 0;
}
int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
{
if (index > 1)
return -EINVAL;
*pkey = 0xffff;
return 0;
}
struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct usnic_ib_pd *pd;
void *umem_pd;
usnic_dbg("\n");
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
umem_pd = pd->umem_pd = usnic_uiom_alloc_pd();
if (IS_ERR_OR_NULL(umem_pd)) {
kfree(pd);
return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM);
}
usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
pd, context, ibdev->name);
return &pd->ibpd;
}
int usnic_ib_dealloc_pd(struct ib_pd *pd)
{
usnic_info("freeing domain 0x%p\n", pd);
usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
kfree(pd);
return 0;
}
struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
int err;
struct usnic_ib_dev *us_ibdev;
struct usnic_ib_qp_grp *qp_grp;
struct usnic_ib_ucontext *ucontext;
int cq_cnt;
struct usnic_vnic_res_spec res_spec;
struct usnic_ib_create_qp_cmd cmd;
struct usnic_transport_spec trans_spec;
usnic_dbg("\n");
ucontext = to_uucontext(pd->uobject->context);
us_ibdev = to_usdev(pd->device);
err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
if (err) {
usnic_err("%s: cannot copy udata for create_qp\n",
us_ibdev->ib_dev.name);
return ERR_PTR(-EINVAL);
}
err = create_qp_validate_user_data(cmd);
if (err) {
usnic_err("%s: Failed to validate user data\n",
us_ibdev->ib_dev.name);
return ERR_PTR(-EINVAL);
}
if (init_attr->qp_type != IB_QPT_UD) {
usnic_err("%s asked to make a non-UD QP: %d\n",
us_ibdev->ib_dev.name, init_attr->qp_type);
return ERR_PTR(-EINVAL);
}
trans_spec = cmd.spec;
mutex_lock(&us_ibdev->usdev_lock);
cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2;
res_spec = min_transport_spec[trans_spec.trans_type];
usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt);
qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd),
&trans_spec,
&res_spec);
if (IS_ERR_OR_NULL(qp_grp)) {
err = qp_grp ? PTR_ERR(qp_grp) : -ENOMEM;
goto out_release_mutex;
}
err = usnic_ib_fill_create_qp_resp(qp_grp, udata);
if (err) {
err = -EBUSY;
goto out_release_qp_grp;
}
qp_grp->ctx = ucontext;
list_add_tail(&qp_grp->link, &ucontext->qp_grp_list);
usnic_ib_log_vf(qp_grp->vf);
mutex_unlock(&us_ibdev->usdev_lock);
return &qp_grp->ibqp;
out_release_qp_grp:
qp_grp_destroy(qp_grp);
out_release_mutex:
mutex_unlock(&us_ibdev->usdev_lock);
return ERR_PTR(err);
}
int usnic_ib_destroy_qp(struct ib_qp *qp)
{
struct usnic_ib_qp_grp *qp_grp;
struct usnic_ib_vf *vf;
usnic_dbg("\n");
qp_grp = to_uqp_grp(qp);
vf = qp_grp->vf;
mutex_lock(&vf->pf->usdev_lock);
if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) {
usnic_err("Failed to move qp grp %u to reset\n",
qp_grp->grp_id);
}
list_del(&qp_grp->link);
qp_grp_destroy(qp_grp);
mutex_unlock(&vf->pf->usdev_lock);
return 0;
}
int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
struct usnic_ib_qp_grp *qp_grp;
int status;
usnic_dbg("\n");
qp_grp = to_uqp_grp(ibqp);
/* TODO: Future Support All States */
mutex_lock(&qp_grp->vf->pf->usdev_lock);
if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
} else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
} else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
} else {
usnic_err("Unexpected combination mask: %u state: %u\n",
attr_mask & IB_QP_STATE, attr->qp_state);
status = -EINVAL;
}
mutex_unlock(&qp_grp->vf->pf->usdev_lock);
return status;
}
struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries,
int vector, struct ib_ucontext *context,
struct ib_udata *udata)
{
struct ib_cq *cq;
usnic_dbg("\n");
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
if (!cq)
return ERR_PTR(-EBUSY);
return cq;
}
int usnic_ib_destroy_cq(struct ib_cq *cq)
{
usnic_dbg("\n");
kfree(cq);
return 0;
}
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
{
struct usnic_ib_mr *mr;
int err;
usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start,
virt_addr, length);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (IS_ERR_OR_NULL(mr))
return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
access_flags, 0);
if (IS_ERR_OR_NULL(mr->umem)) {
err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT;
goto err_free;
}
mr->ibmr.lkey = mr->ibmr.rkey = 0;
return &mr->ibmr;
err_free:
kfree(mr);
return ERR_PTR(err);
}
int usnic_ib_dereg_mr(struct ib_mr *ibmr)
{
struct usnic_ib_mr *mr = to_umr(ibmr);
usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing);
kfree(mr);
return 0;
}
struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct usnic_ib_ucontext *context;
struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
usnic_dbg("\n");
context = kmalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&context->qp_grp_list);
mutex_lock(&us_ibdev->usdev_lock);
list_add_tail(&context->link, &us_ibdev->ctx_list);
mutex_unlock(&us_ibdev->usdev_lock);
return &context->ibucontext;
}
int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct usnic_ib_ucontext *context = to_uucontext(ibcontext);
struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device);
usnic_dbg("\n");
mutex_lock(&us_ibdev->usdev_lock);
BUG_ON(!list_empty(&context->qp_grp_list));
list_del(&context->link);
mutex_unlock(&us_ibdev->usdev_lock);
kfree(context);
return 0;
}
int usnic_ib_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma)
{
struct usnic_ib_ucontext *uctx = to_ucontext(context);
struct usnic_ib_dev *us_ibdev;
struct usnic_ib_qp_grp *qp_grp;
struct usnic_ib_vf *vf;
struct vnic_dev_bar *bar;
dma_addr_t bus_addr;
unsigned int len;
unsigned int vfid;
usnic_dbg("\n");
us_ibdev = to_usdev(context->device);
vma->vm_flags |= VM_IO;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vfid = vma->vm_pgoff;
usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n",
vma->vm_pgoff, PAGE_SHIFT, vfid);
mutex_lock(&us_ibdev->usdev_lock);
list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) {
vf = qp_grp->vf;
if (usnic_vnic_get_index(vf->vnic) == vfid) {
bar = usnic_vnic_get_bar(vf->vnic, 0);
if ((vma->vm_end - vma->vm_start) != bar->len) {
usnic_err("Bar0 Len %lu - Request map %lu\n",
bar->len,
vma->vm_end - vma->vm_start);
mutex_unlock(&us_ibdev->usdev_lock);
return -EINVAL;
}
bus_addr = bar->bus_addr;
len = bar->len;
usnic_dbg("bus: %pa vaddr: %p size: %ld\n",
&bus_addr, bar->vaddr, bar->len);
mutex_unlock(&us_ibdev->usdev_lock);
return remap_pfn_range(vma,
vma->vm_start,
bus_addr >> PAGE_SHIFT,
len, vma->vm_page_prot);
}
}
mutex_unlock(&us_ibdev->usdev_lock);
usnic_err("No VF %u found\n", vfid);
return -EINVAL;
}
/* In ib callbacks section - Start of stub funcs */
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
struct ib_ah_attr *ah_attr)
{
usnic_dbg("\n");
return ERR_PTR(-EPERM);
}
int usnic_ib_destroy_ah(struct ib_ah *ah)
{
usnic_dbg("\n");
return -EINVAL;
}
int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
usnic_dbg("\n");
return -EINVAL;
}
int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
usnic_dbg("\n");
return -EINVAL;
}
int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
struct ib_wc *wc)
{
usnic_dbg("\n");
return -EINVAL;
}
int usnic_ib_req_notify_cq(struct ib_cq *cq,
enum ib_cq_notify_flags flags)
{
usnic_dbg("\n");
return -EINVAL;
}
struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
usnic_dbg("\n");
return ERR_PTR(-ENOMEM);
}
/* In ib callbacks section - End of stub funcs */
/* End of ib callbacks section */

View File

@ -0,0 +1,72 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_IB_VERBS_H_
#define USNIC_IB_VERBS_H_
#include "usnic_ib.h"
enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
u8 port_num);
int usnic_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props);
int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid);
int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey);
struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata);
int usnic_ib_dealloc_pd(struct ib_pd *pd);
struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
int usnic_ib_destroy_qp(struct ib_qp *qp);
int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries,
int vector, struct ib_ucontext *context,
struct ib_udata *udata);
int usnic_ib_destroy_cq(struct ib_cq *cq);
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int usnic_ib_dereg_mr(struct ib_mr *ibmr);
struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata);
int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
int usnic_ib_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma);
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
struct ib_ah_attr *ah_attr);
int usnic_ib_destroy_ah(struct ib_ah *ah);
int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
struct ib_wc *wc);
int usnic_ib_req_notify_cq(struct ib_cq *cq,
enum ib_cq_notify_flags flags);
struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc);
#endif /* !USNIC_IB_VERBS_H */

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_LOG_H_
#define USNIC_LOG_H_
#include "usnic.h"
extern unsigned int usnic_log_lvl;
#define USNIC_LOG_LVL_NONE (0)
#define USNIC_LOG_LVL_ERR (1)
#define USNIC_LOG_LVL_INFO (2)
#define USNIC_LOG_LVL_DBG (3)
#define usnic_printk(lvl, args...) \
do { \
printk(lvl "%s:%s:%d: ", DRV_NAME, __func__, \
__LINE__); \
printk(args); \
} while (0)
#define usnic_dbg(args...) \
do { \
if (unlikely(usnic_log_lvl >= USNIC_LOG_LVL_DBG)) { \
usnic_printk(KERN_INFO, args); \
} \
} while (0)
#define usnic_info(args...) \
do { \
if (usnic_log_lvl >= USNIC_LOG_LVL_INFO) { \
usnic_printk(KERN_INFO, args); \
} \
} while (0)
#define usnic_err(args...) \
do { \
if (usnic_log_lvl >= USNIC_LOG_LVL_ERR) { \
usnic_printk(KERN_ERR, args); \
} \
} while (0)
#endif /* !USNIC_LOG_H_ */

View File

@ -0,0 +1,202 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/bitmap.h>
#include <linux/file.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <net/inet_sock.h>
#include "usnic_transport.h"
#include "usnic_log.h"
/* ROCE */
static unsigned long *roce_bitmap;
static u16 roce_next_port = 1;
#define ROCE_BITMAP_SZ ((1 << (8 /*CHAR_BIT*/ * sizeof(u16)))/8 /*CHAR BIT*/)
static DEFINE_SPINLOCK(roce_bitmap_lock);
const char *usnic_transport_to_str(enum usnic_transport_type type)
{
switch (type) {
case USNIC_TRANSPORT_UNKNOWN:
return "Unknown";
case USNIC_TRANSPORT_ROCE_CUSTOM:
return "roce custom";
case USNIC_TRANSPORT_IPV4_UDP:
return "IPv4 UDP";
case USNIC_TRANSPORT_MAX:
return "Max?";
default:
return "Not known";
}
}
int usnic_transport_sock_to_str(char *buf, int buf_sz,
struct socket *sock)
{
int err;
uint32_t addr;
uint16_t port;
int proto;
memset(buf, 0, buf_sz);
err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port);
if (err)
return 0;
return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4h Port:%hu",
proto, &addr, port);
}
/*
* reserve a port number. if "0" specified, we will try to pick one
* starting at roce_next_port. roce_next_port will take on the values
* 1..4096
*/
u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num)
{
if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
spin_lock(&roce_bitmap_lock);
if (!port_num) {
port_num = bitmap_find_next_zero_area(roce_bitmap,
ROCE_BITMAP_SZ,
roce_next_port /* start */,
1 /* nr */,
0 /* align */);
roce_next_port = (port_num & 4095) + 1;
} else if (test_bit(port_num, roce_bitmap)) {
usnic_err("Failed to allocate port for %s\n",
usnic_transport_to_str(type));
spin_unlock(&roce_bitmap_lock);
goto out_fail;
}
bitmap_set(roce_bitmap, port_num, 1);
spin_unlock(&roce_bitmap_lock);
} else {
usnic_err("Failed to allocate port - transport %s unsupported\n",
usnic_transport_to_str(type));
goto out_fail;
}
usnic_dbg("Allocating port %hu for %s\n", port_num,
usnic_transport_to_str(type));
return port_num;
out_fail:
return 0;
}
void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num)
{
if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
spin_lock(&roce_bitmap_lock);
if (!port_num) {
usnic_err("Unreserved unvalid port num 0 for %s\n",
usnic_transport_to_str(type));
goto out_roce_custom;
}
if (!test_bit(port_num, roce_bitmap)) {
usnic_err("Unreserving invalid %hu for %s\n",
port_num,
usnic_transport_to_str(type));
goto out_roce_custom;
}
bitmap_clear(roce_bitmap, port_num, 1);
usnic_dbg("Freeing port %hu for %s\n", port_num,
usnic_transport_to_str(type));
out_roce_custom:
spin_unlock(&roce_bitmap_lock);
} else {
usnic_err("Freeing invalid port %hu for %d\n", port_num, type);
}
}
struct socket *usnic_transport_get_socket(int sock_fd)
{
struct socket *sock;
int err;
char buf[25];
/* sockfd_lookup will internally do a fget */
sock = sockfd_lookup(sock_fd, &err);
if (!sock) {
usnic_err("Unable to lookup socket for fd %d with err %d\n",
sock_fd, err);
return ERR_PTR(-ENOENT);
}
usnic_transport_sock_to_str(buf, sizeof(buf), sock);
usnic_dbg("Get sock %s\n", buf);
return sock;
}
void usnic_transport_put_socket(struct socket *sock)
{
char buf[100];
usnic_transport_sock_to_str(buf, sizeof(buf), sock);
usnic_dbg("Put sock %s\n", buf);
sockfd_put(sock);
}
int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
uint32_t *addr, uint16_t *port)
{
int len;
int err;
struct sockaddr_in sock_addr;
err = sock->ops->getname(sock,
(struct sockaddr *)&sock_addr,
&len, 0);
if (err)
return err;
if (sock_addr.sin_family != AF_INET)
return -EINVAL;
if (proto)
*proto = sock->sk->sk_protocol;
if (port)
*port = ntohs(((struct sockaddr_in *)&sock_addr)->sin_port);
if (addr)
*addr = ntohl(((struct sockaddr_in *)
&sock_addr)->sin_addr.s_addr);
return 0;
}
int usnic_transport_init(void)
{
roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL);
if (!roce_bitmap) {
usnic_err("Failed to allocate bit map");
return -ENOMEM;
}
/* Do not ever allocate bit 0, hence set it here */
bitmap_set(roce_bitmap, 0, 1);
return 0;
}
void usnic_transport_fini(void)
{
kfree(roce_bitmap);
}

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_TRANSPORT_H_
#define USNIC_TRANSPORT_H_
#include "usnic_abi.h"
const char *usnic_transport_to_str(enum usnic_transport_type trans_type);
/*
* Returns number of bytes written, excluding null terminator. If
* nothing was written, the function returns 0.
*/
int usnic_transport_sock_to_str(char *buf, int buf_sz,
struct socket *sock);
/*
* Reserve a port. If "port_num" is set, then the function will try
* to reserve that particular port.
*/
u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num);
void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num);
/*
* Do a fget on the socket refered to by sock_fd and returns the socket.
* Socket will not be destroyed before usnic_transport_put_socket has
* been called.
*/
struct socket *usnic_transport_get_socket(int sock_fd);
void usnic_transport_put_socket(struct socket *sock);
/*
* Call usnic_transport_get_socket before calling *_sock_get_addr
*/
int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
uint32_t *addr, uint16_t *port);
int usnic_transport_init(void);
void usnic_transport_fini(void);
#endif /* !USNIC_TRANSPORT_H */

View File

@ -0,0 +1,604 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2013 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/sched.h>
#include <linux/hugetlb.h>
#include <linux/dma-attrs.h>
#include <linux/iommu.h>
#include <linux/workqueue.h>
#include <linux/list.h>
#include <linux/pci.h>
#include "usnic_log.h"
#include "usnic_uiom.h"
#include "usnic_uiom_interval_tree.h"
static struct workqueue_struct *usnic_uiom_wq;
#define USNIC_UIOM_PAGE_CHUNK \
((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list)) /\
((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \
(void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
static void usnic_uiom_reg_account(struct work_struct *work)
{
struct usnic_uiom_reg *umem = container_of(work,
struct usnic_uiom_reg, work);
down_write(&umem->mm->mmap_sem);
umem->mm->locked_vm -= umem->diff;
up_write(&umem->mm->mmap_sem);
mmput(umem->mm);
kfree(umem);
}
static int usnic_uiom_dma_fault(struct iommu_domain *domain,
struct device *dev,
unsigned long iova, int flags,
void *token)
{
usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n",
dev_name(dev),
domain, iova, flags);
return -ENOSYS;
}
static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
{
struct usnic_uiom_chunk *chunk, *tmp;
struct page *page;
struct scatterlist *sg;
int i;
dma_addr_t pa;
list_for_each_entry_safe(chunk, tmp, chunk_list, list) {
for_each_sg(chunk->page_list, sg, chunk->nents, i) {
page = sg_page(sg);
pa = sg_phys(sg);
if (dirty)
set_page_dirty_lock(page);
put_page(page);
usnic_dbg("pa: %pa\n", &pa);
}
kfree(chunk);
}
}
static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
int dmasync, struct list_head *chunk_list)
{
struct page **page_list;
struct scatterlist *sg;
struct usnic_uiom_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
int ret;
int off;
int i;
int flags;
dma_addr_t pa;
DEFINE_DMA_ATTRS(attrs);
if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
if (!can_do_mlock())
return -EPERM;
INIT_LIST_HEAD(chunk_list);
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem);
locked = npages + current->mm->locked_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
ret = -ENOMEM;
goto out;
}
flags = IOMMU_READ | IOMMU_CACHE;
flags |= (writable) ? IOMMU_WRITE : 0;
cur_base = addr & PAGE_MASK;
ret = 0;
while (npages) {
ret = get_user_pages(current, current->mm, cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof(struct page *)),
1, !writable, page_list, NULL);
if (ret < 0)
goto out;
npages -= ret;
off = 0;
while (ret) {
chunk = kmalloc(sizeof(*chunk) +
sizeof(struct scatterlist) *
min_t(int, ret, USNIC_UIOM_PAGE_CHUNK),
GFP_KERNEL);
if (!chunk) {
ret = -ENOMEM;
goto out;
}
chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK);
sg_init_table(chunk->page_list, chunk->nents);
for_each_sg(chunk->page_list, sg, chunk->nents, i) {
sg_set_page(sg, page_list[i + off],
PAGE_SIZE, 0);
pa = sg_phys(sg);
usnic_dbg("va: 0x%lx pa: %pa\n",
cur_base + i*PAGE_SIZE, &pa);
}
cur_base += chunk->nents * PAGE_SIZE;
ret -= chunk->nents;
off += chunk->nents;
list_add_tail(&chunk->list, chunk_list);
}
ret = 0;
}
out:
if (ret < 0)
usnic_uiom_put_pages(chunk_list, 0);
else
current->mm->locked_vm = locked;
up_write(&current->mm->mmap_sem);
free_page((unsigned long) page_list);
return ret;
}
static void usnic_uiom_unmap_sorted_intervals(struct list_head *intervals,
struct usnic_uiom_pd *pd)
{
struct usnic_uiom_interval_node *interval, *tmp;
long unsigned va, size;
list_for_each_entry_safe(interval, tmp, intervals, link) {
va = interval->start << PAGE_SHIFT;
size = ((interval->last - interval->start) + 1) << PAGE_SHIFT;
while (size > 0) {
/* Workaround for RH 970401 */
usnic_dbg("va 0x%lx size 0x%lx", va, PAGE_SIZE);
iommu_unmap(pd->domain, va, PAGE_SIZE);
va += PAGE_SIZE;
size -= PAGE_SIZE;
}
}
}
static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd,
struct usnic_uiom_reg *uiomr,
int dirty)
{
int npages;
unsigned long vpn_start, vpn_last;
struct usnic_uiom_interval_node *interval, *tmp;
int writable = 0;
LIST_HEAD(rm_intervals);
npages = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
vpn_start = (uiomr->va & PAGE_MASK) >> PAGE_SHIFT;
vpn_last = vpn_start + npages - 1;
spin_lock(&pd->lock);
usnic_uiom_remove_interval(&pd->rb_root, vpn_start,
vpn_last, &rm_intervals);
usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd);
list_for_each_entry_safe(interval, tmp, &rm_intervals, link) {
if (interval->flags & IOMMU_WRITE)
writable = 1;
list_del(&interval->link);
kfree(interval);
}
usnic_uiom_put_pages(&uiomr->chunk_list, dirty & writable);
spin_unlock(&pd->lock);
}
static int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
struct usnic_uiom_reg *uiomr)
{
int i, err;
size_t size;
struct usnic_uiom_chunk *chunk;
struct usnic_uiom_interval_node *interval_node;
dma_addr_t pa;
dma_addr_t pa_start = 0;
dma_addr_t pa_end = 0;
long int va_start = -EINVAL;
struct usnic_uiom_pd *pd = uiomr->pd;
long int va = uiomr->va & PAGE_MASK;
int flags = IOMMU_READ | IOMMU_CACHE;
flags |= (uiomr->writable) ? IOMMU_WRITE : 0;
chunk = list_first_entry(&uiomr->chunk_list, struct usnic_uiom_chunk,
list);
list_for_each_entry(interval_node, intervals, link) {
iter_chunk:
for (i = 0; i < chunk->nents; i++, va += PAGE_SIZE) {
pa = sg_phys(&chunk->page_list[i]);
if ((va >> PAGE_SHIFT) < interval_node->start)
continue;
if ((va >> PAGE_SHIFT) == interval_node->start) {
/* First page of the interval */
va_start = va;
pa_start = pa;
pa_end = pa;
}
WARN_ON(va_start == -EINVAL);
if ((pa_end + PAGE_SIZE != pa) &&
(pa != pa_start)) {
/* PAs are not contiguous */
size = pa_end - pa_start + PAGE_SIZE;
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
va_start, &pa_start, size, flags);
err = iommu_map(pd->domain, va_start, pa_start,
size, flags);
if (err) {
usnic_err("Failed to map va 0x%lx pa 0x%pa size 0x%zx with err %d\n",
va_start, &pa_start, size, err);
goto err_out;
}
va_start = va;
pa_start = pa;
pa_end = pa;
}
if ((va >> PAGE_SHIFT) == interval_node->last) {
/* Last page of the interval */
size = pa - pa_start + PAGE_SIZE;
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
va_start, &pa_start, size, flags);
err = iommu_map(pd->domain, va_start, pa_start,
size, flags);
if (err) {
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
va_start, &pa_start, size, err);
goto err_out;
}
break;
}
if (pa != pa_start)
pa_end += PAGE_SIZE;
}
if (i == chunk->nents) {
/*
* Hit last entry of the chunk,
* hence advance to next chunk
*/
chunk = list_first_entry(&chunk->list,
struct usnic_uiom_chunk,
list);
goto iter_chunk;
}
}
return 0;
err_out:
usnic_uiom_unmap_sorted_intervals(intervals, pd);
return err;
}
struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
unsigned long addr, size_t size,
int writable, int dmasync)
{
struct usnic_uiom_reg *uiomr;
unsigned long va_base, vpn_start, vpn_last;
unsigned long npages;
int offset, err;
LIST_HEAD(sorted_diff_intervals);
/*
* Intel IOMMU map throws an error if a translation entry is
* changed from read to write. This module may not unmap
* and then remap the entry after fixing the permission
* b/c this open up a small windows where hw DMA may page fault
* Hence, make all entries to be writable.
*/
writable = 1;
va_base = addr & PAGE_MASK;
offset = addr & ~PAGE_MASK;
npages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT;
vpn_start = (addr & PAGE_MASK) >> PAGE_SHIFT;
vpn_last = vpn_start + npages - 1;
uiomr = kmalloc(sizeof(*uiomr), GFP_KERNEL);
if (!uiomr)
return ERR_PTR(-ENOMEM);
uiomr->va = va_base;
uiomr->offset = offset;
uiomr->length = size;
uiomr->writable = writable;
uiomr->pd = pd;
err = usnic_uiom_get_pages(addr, size, writable, dmasync,
&uiomr->chunk_list);
if (err) {
usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
vpn_start, vpn_last, err);
goto out_free_uiomr;
}
spin_lock(&pd->lock);
err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last,
(writable) ? IOMMU_WRITE : 0,
IOMMU_WRITE,
&pd->rb_root,
&sorted_diff_intervals);
if (err) {
usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n",
vpn_start, vpn_last, err);
goto out_put_pages;
}
err = usnic_uiom_map_sorted_intervals(&sorted_diff_intervals, uiomr);
if (err) {
usnic_err("Failed map interval vpn [0x%lx,0x%lx] err %d\n",
vpn_start, vpn_last, err);
goto out_put_intervals;
}
err = usnic_uiom_insert_interval(&pd->rb_root, vpn_start, vpn_last,
(writable) ? IOMMU_WRITE : 0);
if (err) {
usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n",
vpn_start, vpn_last, err);
goto out_unmap_intervals;
}
usnic_uiom_put_interval_set(&sorted_diff_intervals);
spin_unlock(&pd->lock);
return uiomr;
out_unmap_intervals:
usnic_uiom_unmap_sorted_intervals(&sorted_diff_intervals, pd);
out_put_intervals:
usnic_uiom_put_interval_set(&sorted_diff_intervals);
out_put_pages:
usnic_uiom_put_pages(&uiomr->chunk_list, 0);
spin_unlock(&pd->lock);
out_free_uiomr:
kfree(uiomr);
return ERR_PTR(err);
}
void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
{
struct mm_struct *mm;
unsigned long diff;
__usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
mm = get_task_mm(current);
if (!mm) {
kfree(uiomr);
return;
}
diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
/*
* We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. In that case
* we defer the vm_locked accounting to the system workqueue.
*/
if (closing) {
if (!down_write_trylock(&mm->mmap_sem)) {
INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
uiomr->mm = mm;
uiomr->diff = diff;
queue_work(usnic_uiom_wq, &uiomr->work);
return;
}
} else
down_write(&mm->mmap_sem);
current->mm->locked_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
kfree(uiomr);
}
struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
{
struct usnic_uiom_pd *pd;
void *domain;
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
pd->domain = domain = iommu_domain_alloc(&pci_bus_type);
if (IS_ERR_OR_NULL(domain)) {
usnic_err("Failed to allocate IOMMU domain with err %ld\n",
PTR_ERR(pd->domain));
kfree(pd);
return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM);
}
iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL);
spin_lock_init(&pd->lock);
INIT_LIST_HEAD(&pd->devs);
return pd;
}
void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd)
{
iommu_domain_free(pd->domain);
kfree(pd);
}
int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
{
struct usnic_uiom_dev *uiom_dev;
int err;
uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_ATOMIC);
if (!uiom_dev)
return -ENOMEM;
uiom_dev->dev = dev;
err = iommu_attach_device(pd->domain, dev);
if (err)
goto out_free_dev;
if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) {
usnic_err("IOMMU of %s does not support cache coherency\n",
dev_name(dev));
err = -EINVAL;
goto out_detach_device;
}
spin_lock(&pd->lock);
list_add_tail(&uiom_dev->link, &pd->devs);
pd->dev_cnt++;
spin_unlock(&pd->lock);
return 0;
out_detach_device:
iommu_detach_device(pd->domain, dev);
out_free_dev:
kfree(uiom_dev);
return err;
}
void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, struct device *dev)
{
struct usnic_uiom_dev *uiom_dev;
int found = 0;
spin_lock(&pd->lock);
list_for_each_entry(uiom_dev, &pd->devs, link) {
if (uiom_dev->dev == dev) {
found = 1;
break;
}
}
if (!found) {
usnic_err("Unable to free dev %s - not found\n",
dev_name(dev));
spin_unlock(&pd->lock);
return;
}
list_del(&uiom_dev->link);
pd->dev_cnt--;
spin_unlock(&pd->lock);
return iommu_detach_device(pd->domain, dev);
}
struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd)
{
struct usnic_uiom_dev *uiom_dev;
struct device **devs;
int i = 0;
spin_lock(&pd->lock);
devs = kcalloc(pd->dev_cnt + 1, sizeof(*devs), GFP_ATOMIC);
if (!devs) {
devs = ERR_PTR(-ENOMEM);
goto out;
}
list_for_each_entry(uiom_dev, &pd->devs, link) {
devs[i++] = uiom_dev->dev;
}
out:
spin_unlock(&pd->lock);
return devs;
}
void usnic_uiom_free_dev_list(struct device **devs)
{
kfree(devs);
}
int usnic_uiom_init(char *drv_name)
{
if (!iommu_present(&pci_bus_type)) {
usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
return -EPERM;
}
usnic_uiom_wq = create_workqueue(drv_name);
if (!usnic_uiom_wq) {
usnic_err("Unable to alloc wq for drv %s\n", drv_name);
return -ENOMEM;
}
return 0;
}
void usnic_uiom_fini(void)
{
flush_workqueue(usnic_uiom_wq);
destroy_workqueue(usnic_uiom_wq);
}

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_UIOM_H_
#define USNIC_UIOM_H_
#include <linux/list.h>
#include <linux/scatterlist.h>
#include "usnic_uiom_interval_tree.h"
#define USNIC_UIOM_READ (1)
#define USNIC_UIOM_WRITE (2)
#define USNIC_UIOM_MAX_PD_CNT (1000)
#define USNIC_UIOM_MAX_MR_CNT (1000000)
#define USNIC_UIOM_MAX_MR_SIZE (~0UL)
#define USNIC_UIOM_PAGE_SIZE (PAGE_SIZE)
struct usnic_uiom_dev {
struct device *dev;
struct list_head link;
};
struct usnic_uiom_pd {
struct iommu_domain *domain;
spinlock_t lock;
struct rb_root rb_root;
struct list_head devs;
int dev_cnt;
};
struct usnic_uiom_reg {
struct usnic_uiom_pd *pd;
unsigned long va;
size_t length;
int offset;
int page_size;
int writable;
struct list_head chunk_list;
struct work_struct work;
struct mm_struct *mm;
unsigned long diff;
};
struct usnic_uiom_chunk {
struct list_head list;
int nents;
struct scatterlist page_list[0];
};
struct usnic_uiom_pd *usnic_uiom_alloc_pd(void);
void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd);
int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev);
void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd,
struct device *dev);
struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd);
void usnic_uiom_free_dev_list(struct device **devs);
struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
unsigned long addr, size_t size,
int access, int dmasync);
void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing);
int usnic_uiom_init(char *drv_name);
void usnic_uiom_fini(void);
#endif /* USNIC_UIOM_H_ */

View File

@ -0,0 +1,236 @@
#include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/list_sort.h>
#include <linux/interval_tree_generic.h>
#include "usnic_uiom_interval_tree.h"
#define START(node) ((node)->start)
#define LAST(node) ((node)->last)
#define MAKE_NODE(node, start, end, ref_cnt, flags, err, err_out) \
do { \
node = usnic_uiom_interval_node_alloc(start, \
end, ref_cnt, flags); \
if (!node) { \
err = -ENOMEM; \
goto err_out; \
} \
} while (0)
#define MARK_FOR_ADD(node, list) (list_add_tail(&node->link, list))
#define MAKE_NODE_AND_APPEND(node, start, end, ref_cnt, flags, err, \
err_out, list) \
do { \
MAKE_NODE(node, start, end, \
ref_cnt, flags, err, \
err_out); \
MARK_FOR_ADD(node, list); \
} while (0)
#define FLAGS_EQUAL(flags1, flags2, mask) \
(((flags1) & (mask)) == ((flags2) & (mask)))
static struct usnic_uiom_interval_node*
usnic_uiom_interval_node_alloc(long int start, long int last, int ref_cnt,
int flags)
{
struct usnic_uiom_interval_node *interval = kzalloc(sizeof(*interval),
GFP_ATOMIC);
if (!interval)
return NULL;
interval->start = start;
interval->last = last;
interval->flags = flags;
interval->ref_cnt = ref_cnt;
return interval;
}
static int interval_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct usnic_uiom_interval_node *node_a, *node_b;
node_a = list_entry(a, struct usnic_uiom_interval_node, link);
node_b = list_entry(b, struct usnic_uiom_interval_node, link);
/* long to int */
if (node_a->start < node_b->start)
return -1;
else if (node_a->start > node_b->start)
return 1;
return 0;
}
static void
find_intervals_intersection_sorted(struct rb_root *root, unsigned long start,
unsigned long last,
struct list_head *list)
{
struct usnic_uiom_interval_node *node;
INIT_LIST_HEAD(list);
for (node = usnic_uiom_interval_tree_iter_first(root, start, last);
node;
node = usnic_uiom_interval_tree_iter_next(node, start, last))
list_add_tail(&node->link, list);
list_sort(NULL, list, interval_cmp);
}
int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last,
int flags, int flag_mask,
struct rb_root *root,
struct list_head *diff_set)
{
struct usnic_uiom_interval_node *interval, *tmp;
int err = 0;
long int pivot = start;
LIST_HEAD(intersection_set);
INIT_LIST_HEAD(diff_set);
find_intervals_intersection_sorted(root, start, last,
&intersection_set);
list_for_each_entry(interval, &intersection_set, link) {
if (pivot < interval->start) {
MAKE_NODE_AND_APPEND(tmp, pivot, interval->start - 1,
1, flags, err, err_out,
diff_set);
pivot = interval->start;
}
/*
* Invariant: Set [start, pivot] is either in diff_set or root,
* but not in both.
*/
if (pivot > interval->last) {
continue;
} else if (pivot <= interval->last &&
FLAGS_EQUAL(interval->flags, flags,
flag_mask)) {
pivot = interval->last + 1;
}
}
if (pivot <= last)
MAKE_NODE_AND_APPEND(tmp, pivot, last, 1, flags, err, err_out,
diff_set);
return 0;
err_out:
list_for_each_entry_safe(interval, tmp, diff_set, link) {
list_del(&interval->link);
kfree(interval);
}
return err;
}
void usnic_uiom_put_interval_set(struct list_head *intervals)
{
struct usnic_uiom_interval_node *interval, *tmp;
list_for_each_entry_safe(interval, tmp, intervals, link)
kfree(interval);
}
int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start,
unsigned long last, int flags)
{
struct usnic_uiom_interval_node *interval, *tmp;
unsigned long istart, ilast;
int iref_cnt, iflags;
unsigned long lpivot = start;
int err = 0;
LIST_HEAD(to_add);
LIST_HEAD(intersection_set);
find_intervals_intersection_sorted(root, start, last,
&intersection_set);
list_for_each_entry(interval, &intersection_set, link) {
/*
* Invariant - lpivot is the left edge of next interval to be
* inserted
*/
istart = interval->start;
ilast = interval->last;
iref_cnt = interval->ref_cnt;
iflags = interval->flags;
if (istart < lpivot) {
MAKE_NODE_AND_APPEND(tmp, istart, lpivot - 1, iref_cnt,
iflags, err, err_out, &to_add);
} else if (istart > lpivot) {
MAKE_NODE_AND_APPEND(tmp, lpivot, istart - 1, 1, flags,
err, err_out, &to_add);
lpivot = istart;
} else {
lpivot = istart;
}
if (ilast > last) {
MAKE_NODE_AND_APPEND(tmp, lpivot, last, iref_cnt + 1,
iflags | flags, err, err_out,
&to_add);
MAKE_NODE_AND_APPEND(tmp, last + 1, ilast, iref_cnt,
iflags, err, err_out, &to_add);
} else {
MAKE_NODE_AND_APPEND(tmp, lpivot, ilast, iref_cnt + 1,
iflags | flags, err, err_out,
&to_add);
}
lpivot = ilast + 1;
}
if (lpivot <= last)
MAKE_NODE_AND_APPEND(tmp, lpivot, last, 1, flags, err, err_out,
&to_add);
list_for_each_entry_safe(interval, tmp, &intersection_set, link) {
usnic_uiom_interval_tree_remove(interval, root);
kfree(interval);
}
list_for_each_entry(interval, &to_add, link)
usnic_uiom_interval_tree_insert(interval, root);
return 0;
err_out:
list_for_each_entry_safe(interval, tmp, &to_add, link)
kfree(interval);
return err;
}
void usnic_uiom_remove_interval(struct rb_root *root, unsigned long start,
unsigned long last, struct list_head *removed)
{
struct usnic_uiom_interval_node *interval;
for (interval = usnic_uiom_interval_tree_iter_first(root, start, last);
interval;
interval = usnic_uiom_interval_tree_iter_next(interval,
start,
last)) {
if (--interval->ref_cnt == 0)
list_add_tail(&interval->link, removed);
}
list_for_each_entry(interval, removed, link)
usnic_uiom_interval_tree_remove(interval, root);
}
INTERVAL_TREE_DEFINE(struct usnic_uiom_interval_node, rb,
unsigned long, __subtree_last,
START, LAST, , usnic_uiom_interval_tree)

View File

@ -0,0 +1,73 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_UIOM_INTERVAL_TREE_H_
#define USNIC_UIOM_INTERVAL_TREE_H_
#include <linux/rbtree.h>
struct usnic_uiom_interval_node {
struct rb_node rb;
struct list_head link;
unsigned long start;
unsigned long last;
unsigned long __subtree_last;
unsigned int ref_cnt;
int flags;
};
extern void
usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node,
struct rb_root *root);
extern void
usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node,
struct rb_root *root);
extern struct usnic_uiom_interval_node *
usnic_uiom_interval_tree_iter_first(struct rb_root *root,
unsigned long start,
unsigned long last);
extern struct usnic_uiom_interval_node *
usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node,
unsigned long start, unsigned long last);
/*
* Inserts {start...last} into {root}. If there are overlaps,
* nodes will be broken up and merged
*/
int usnic_uiom_insert_interval(struct rb_root *root,
unsigned long start, unsigned long last,
int flags);
/*
* Removed {start...last} from {root}. The nodes removed are returned in
* 'removed.' The caller is responsibile for freeing memory of nodes in
* 'removed.'
*/
void usnic_uiom_remove_interval(struct rb_root *root,
unsigned long start, unsigned long last,
struct list_head *removed);
/*
* Returns {start...last} - {root} (relative complement of {start...last} in
* {root}) in diff_set sorted ascendingly
*/
int usnic_uiom_get_intervals_diff(unsigned long start,
unsigned long last, int flags,
int flag_mask,
struct rb_root *root,
struct list_head *diff_set);
/* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */
void usnic_uiom_put_interval_set(struct list_head *intervals);
#endif /* USNIC_UIOM_INTERVAL_TREE_H_ */

View File

@ -0,0 +1,467 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/pci.h>
#include "usnic_ib.h"
#include "vnic_resource.h"
#include "usnic_log.h"
#include "usnic_vnic.h"
struct usnic_vnic {
struct vnic_dev *vdev;
struct vnic_dev_bar bar[PCI_NUM_RESOURCES];
struct usnic_vnic_res_chunk chunks[USNIC_VNIC_RES_TYPE_MAX];
spinlock_t res_lock;
};
static enum vnic_res_type _to_vnic_res_type(enum usnic_vnic_res_type res_type)
{
#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
vnic_res_type,
#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
vnic_res_type,
static enum vnic_res_type usnic_vnic_type_2_vnic_type[] = {
USNIC_VNIC_RES_TYPES};
#undef DEFINE_USNIC_VNIC_RES
#undef DEFINE_USNIC_VNIC_RES_AT
if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
return RES_TYPE_MAX;
return usnic_vnic_type_2_vnic_type[res_type];
}
const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type)
{
#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
desc,
#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
desc,
static const char * const usnic_vnic_res_type_desc[] = {
USNIC_VNIC_RES_TYPES};
#undef DEFINE_USNIC_VNIC_RES
#undef DEFINE_USNIC_VNIC_RES_AT
if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
return "unknown";
return usnic_vnic_res_type_desc[res_type];
}
const char *usnic_vnic_pci_name(struct usnic_vnic *vnic)
{
return pci_name(usnic_vnic_get_pdev(vnic));
}
int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf,
int buf_sz,
void *hdr_obj,
int (*printtitle)(void *, char*, int),
int (*printcols)(char *, int),
int (*printrow)(void *, char *, int))
{
struct usnic_vnic_res_chunk *chunk;
struct usnic_vnic_res *res;
struct vnic_dev_bar *bar0;
int i, j, offset;
offset = 0;
bar0 = usnic_vnic_get_bar(vnic, 0);
offset += scnprintf(buf + offset, buf_sz - offset,
"VF:%hu BAR0 bus_addr=%pa vaddr=0x%p size=%ld ",
usnic_vnic_get_index(vnic),
&bar0->bus_addr,
bar0->vaddr, bar0->len);
if (printtitle)
offset += printtitle(hdr_obj, buf + offset, buf_sz - offset);
offset += scnprintf(buf + offset, buf_sz - offset, "\n");
offset += scnprintf(buf + offset, buf_sz - offset,
"|RES\t|CTRL_PIN\t\t|IN_USE\t");
if (printcols)
offset += printcols(buf + offset, buf_sz - offset);
offset += scnprintf(buf + offset, buf_sz - offset, "\n");
spin_lock(&vnic->res_lock);
for (i = 0; i < ARRAY_SIZE(vnic->chunks); i++) {
chunk = &vnic->chunks[i];
for (j = 0; j < chunk->cnt; j++) {
res = chunk->res[j];
offset += scnprintf(buf + offset, buf_sz - offset,
"|%s[%u]\t|0x%p\t|%u\t",
usnic_vnic_res_type_to_str(res->type),
res->vnic_idx, res->ctrl, !!res->owner);
if (printrow) {
offset += printrow(res->owner, buf + offset,
buf_sz - offset);
}
offset += scnprintf(buf + offset, buf_sz - offset,
"\n");
}
}
spin_unlock(&vnic->res_lock);
return offset;
}
void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
enum usnic_vnic_res_type trgt_type,
u16 cnt)
{
int i;
for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
if (spec->resources[i].type == trgt_type) {
spec->resources[i].cnt = cnt;
return;
}
}
WARN_ON(1);
}
int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
struct usnic_vnic_res_spec *res_spec)
{
int found, i, j;
for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
found = 0;
for (j = 0; j < USNIC_VNIC_RES_TYPE_MAX; j++) {
if (res_spec->resources[i].type !=
min_spec->resources[i].type)
continue;
found = 1;
if (min_spec->resources[i].cnt >
res_spec->resources[i].cnt)
return -EINVAL;
break;
}
if (!found)
return -EINVAL;
}
return 0;
}
int usnic_vnic_spec_dump(char *buf, int buf_sz,
struct usnic_vnic_res_spec *res_spec)
{
enum usnic_vnic_res_type res_type;
int res_cnt;
int i;
int offset = 0;
for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
res_type = res_spec->resources[i].type;
res_cnt = res_spec->resources[i].cnt;
offset += scnprintf(buf + offset, buf_sz - offset,
"Res: %s Cnt: %d ",
usnic_vnic_res_type_to_str(res_type),
res_cnt);
}
return offset;
}
int usnic_vnic_check_room(struct usnic_vnic *vnic,
struct usnic_vnic_res_spec *res_spec)
{
int i;
enum usnic_vnic_res_type res_type;
int res_cnt;
for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
res_type = res_spec->resources[i].type;
res_cnt = res_spec->resources[i].cnt;
if (res_type == USNIC_VNIC_RES_TYPE_EOL)
break;
if (res_cnt > usnic_vnic_res_free_cnt(vnic, res_type))
return -EBUSY;
}
return 0;
}
int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
enum usnic_vnic_res_type type)
{
return vnic->chunks[type].cnt;
}
int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
enum usnic_vnic_res_type type)
{
return vnic->chunks[type].free_cnt;
}
struct usnic_vnic_res_chunk *
usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
int cnt, void *owner)
{
struct usnic_vnic_res_chunk *src, *ret;
struct usnic_vnic_res *res;
int i;
if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
return ERR_PTR(-EINVAL);
ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret) {
usnic_err("Failed to allocate chunk for %s - Out of memory\n",
usnic_vnic_pci_name(vnic));
return ERR_PTR(-ENOMEM);
}
ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
if (!ret->res) {
usnic_err("Failed to allocate resources for %s. Out of memory\n",
usnic_vnic_pci_name(vnic));
kfree(ret);
return ERR_PTR(-ENOMEM);
}
spin_lock(&vnic->res_lock);
src = &vnic->chunks[type];
for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
res = src->res[i];
if (!res->owner) {
src->free_cnt--;
res->owner = owner;
ret->res[ret->cnt++] = res;
}
}
spin_unlock(&vnic->res_lock);
ret->type = type;
ret->vnic = vnic;
WARN_ON(ret->cnt != cnt);
return ret;
}
void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
{
struct usnic_vnic_res *res;
int i;
struct usnic_vnic *vnic = chunk->vnic;
spin_lock(&vnic->res_lock);
while ((i = --chunk->cnt) >= 0) {
res = chunk->res[i];
chunk->res[i] = NULL;
res->owner = NULL;
vnic->chunks[res->type].free_cnt++;
}
spin_unlock(&vnic->res_lock);
kfree(chunk->res);
kfree(chunk);
}
u16 usnic_vnic_get_index(struct usnic_vnic *vnic)
{
return usnic_vnic_get_pdev(vnic)->devfn - 1;
}
static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic,
enum usnic_vnic_res_type type,
struct usnic_vnic_res_chunk *chunk)
{
int cnt, err, i;
struct usnic_vnic_res *res;
cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type));
if (cnt < 1)
return -EINVAL;
chunk->cnt = chunk->free_cnt = cnt;
chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL);
if (!chunk->res)
return -ENOMEM;
for (i = 0; i < cnt; i++) {
res = kzalloc(sizeof(*res), GFP_KERNEL);
if (!res) {
err = -ENOMEM;
goto fail;
}
res->type = type;
res->vnic_idx = i;
res->vnic = vnic;
res->ctrl = vnic_dev_get_res(vnic->vdev,
_to_vnic_res_type(type), i);
chunk->res[i] = res;
}
chunk->vnic = vnic;
return 0;
fail:
for (i--; i >= 0; i--)
kfree(chunk->res[i]);
kfree(chunk->res);
return err;
}
static void usnic_vnic_free_res_chunk(struct usnic_vnic_res_chunk *chunk)
{
int i;
for (i = 0; i < chunk->cnt; i++)
kfree(chunk->res[i]);
kfree(chunk->res);
}
static int usnic_vnic_discover_resources(struct pci_dev *pdev,
struct usnic_vnic *vnic)
{
enum usnic_vnic_res_type res_type;
int i;
int err = 0;
for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
continue;
vnic->bar[i].len = pci_resource_len(pdev, i);
vnic->bar[i].vaddr = pci_iomap(pdev, i, vnic->bar[i].len);
if (!vnic->bar[i].vaddr) {
usnic_err("Cannot memory-map BAR %d, aborting\n",
i);
err = -ENODEV;
goto out_clean_bar;
}
vnic->bar[i].bus_addr = pci_resource_start(pdev, i);
}
vnic->vdev = vnic_dev_register(NULL, pdev, pdev, vnic->bar,
ARRAY_SIZE(vnic->bar));
if (!vnic->vdev) {
usnic_err("Failed to register device %s\n",
pci_name(pdev));
err = -EINVAL;
goto out_clean_bar;
}
for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) {
err = usnic_vnic_alloc_res_chunk(vnic, res_type,
&vnic->chunks[res_type]);
if (err) {
usnic_err("Failed to alloc res %s with err %d\n",
usnic_vnic_res_type_to_str(res_type),
err);
goto out_clean_chunks;
}
}
return 0;
out_clean_chunks:
for (res_type--; res_type > USNIC_VNIC_RES_TYPE_EOL; res_type--)
usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
vnic_dev_unregister(vnic->vdev);
out_clean_bar:
for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
continue;
if (!vnic->bar[i].vaddr)
break;
iounmap(vnic->bar[i].vaddr);
}
return err;
}
struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic)
{
return vnic_dev_get_pdev(vnic->vdev);
}
struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
int bar_num)
{
return (bar_num < ARRAY_SIZE(vnic->bar)) ? &vnic->bar[bar_num] : NULL;
}
static void usnic_vnic_release_resources(struct usnic_vnic *vnic)
{
int i;
struct pci_dev *pdev;
enum usnic_vnic_res_type res_type;
pdev = usnic_vnic_get_pdev(vnic);
for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++)
usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
vnic_dev_unregister(vnic->vdev);
for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
continue;
iounmap(vnic->bar[i].vaddr);
}
}
struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev)
{
struct usnic_vnic *vnic;
int err = 0;
if (!pci_is_enabled(pdev)) {
usnic_err("PCI dev %s is disabled\n", pci_name(pdev));
return ERR_PTR(-EINVAL);
}
vnic = kzalloc(sizeof(*vnic), GFP_KERNEL);
if (!vnic) {
usnic_err("Failed to alloc vnic for %s - out of memory\n",
pci_name(pdev));
return ERR_PTR(-ENOMEM);
}
spin_lock_init(&vnic->res_lock);
err = usnic_vnic_discover_resources(pdev, vnic);
if (err) {
usnic_err("Failed to discover %s resources with err %d\n",
pci_name(pdev), err);
goto out_free_vnic;
}
usnic_dbg("Allocated vnic for %s\n", usnic_vnic_pci_name(vnic));
return vnic;
out_free_vnic:
kfree(vnic);
return ERR_PTR(err);
}
void usnic_vnic_free(struct usnic_vnic *vnic)
{
usnic_vnic_release_resources(vnic);
kfree(vnic);
}

View File

@ -0,0 +1,103 @@
/*
* Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
*
* This program is free software; you may redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef USNIC_VNIC_H_
#define USNIC_VNIC_H_
#include <linux/pci.h>
#include "vnic_dev.h"
/* =USNIC_VNIC_RES_TYPE= =VNIC_RES= =DESC= */
#define USNIC_VNIC_RES_TYPES \
DEFINE_USNIC_VNIC_RES_AT(EOL, RES_TYPE_EOL, "EOL", 0) \
DEFINE_USNIC_VNIC_RES(WQ, RES_TYPE_WQ, "WQ") \
DEFINE_USNIC_VNIC_RES(RQ, RES_TYPE_RQ, "RQ") \
DEFINE_USNIC_VNIC_RES(CQ, RES_TYPE_CQ, "CQ") \
DEFINE_USNIC_VNIC_RES(INTR, RES_TYPE_INTR_CTRL, "INT") \
DEFINE_USNIC_VNIC_RES(MAX, RES_TYPE_MAX, "MAX")\
#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t = val,
#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t,
enum usnic_vnic_res_type {
USNIC_VNIC_RES_TYPES
};
#undef DEFINE_USNIC_VNIC_RES
#undef DEFINE_USNIC_VNIC_RES_AT
struct usnic_vnic_res {
enum usnic_vnic_res_type type;
unsigned int vnic_idx;
struct usnic_vnic *vnic;
void __iomem *ctrl;
void *owner;
};
struct usnic_vnic_res_chunk {
enum usnic_vnic_res_type type;
int cnt;
int free_cnt;
struct usnic_vnic_res **res;
struct usnic_vnic *vnic;
};
struct usnic_vnic_res_desc {
enum usnic_vnic_res_type type;
uint16_t cnt;
};
struct usnic_vnic_res_spec {
struct usnic_vnic_res_desc resources[USNIC_VNIC_RES_TYPE_MAX];
};
const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type);
const char *usnic_vnic_pci_name(struct usnic_vnic *vnic);
int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, int buf_sz,
void *hdr_obj,
int (*printtitle)(void *, char*, int),
int (*printcols)(char *, int),
int (*printrow)(void *, char *, int));
void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
enum usnic_vnic_res_type trgt_type,
u16 cnt);
int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
struct usnic_vnic_res_spec *res_spec);
int usnic_vnic_spec_dump(char *buf, int buf_sz,
struct usnic_vnic_res_spec *res_spec);
int usnic_vnic_check_room(struct usnic_vnic *vnic,
struct usnic_vnic_res_spec *res_spec);
int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
enum usnic_vnic_res_type type);
int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
enum usnic_vnic_res_type type);
struct usnic_vnic_res_chunk *
usnic_vnic_get_resources(struct usnic_vnic *vnic,
enum usnic_vnic_res_type type,
int cnt,
void *owner);
void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk);
struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic);
struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
int bar_num);
struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev);
void usnic_vnic_free(struct usnic_vnic *vnic);
u16 usnic_vnic_get_index(struct usnic_vnic *vnic);
#endif /*!USNIC_VNIC_H_*/

View File

@ -104,6 +104,8 @@ int ipoib_open(struct net_device *dev)
ipoib_dbg(priv, "bringing up interface\n");
netif_carrier_off(dev);
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
if (ipoib_pkey_dev_delay_open(dev))
@ -1366,8 +1368,6 @@ void ipoib_setup(struct net_device *dev)
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
netif_carrier_off(dev);
priv->dev = dev;
spin_lock_init(&priv->lock);

View File

@ -192,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
if (dev->features & NETIF_F_SG)
init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;

View File

@ -660,6 +660,7 @@ static void srp_remove_target(struct srp_target_port *target)
srp_rport_get(target->rport);
srp_remove_host(target->scsi_host);
scsi_remove_host(target->scsi_host);
srp_stop_rport_timers(target->rport);
srp_disconnect_target(target);
ib_destroy_cm_id(target->cm_id);
srp_free_target_ib(target);

View File

@ -1371,6 +1371,15 @@ static struct mlx4_cmd_info cmd_info[] = {
.verify = NULL,
.wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper
},
{
.opcode = MLX4_FLOW_STEERING_IB_UC_QP_RANGE,
.has_inbox = false,
.has_outbox = false,
.out_is_imm = false,
.encode_slave_id = false,
.verify = NULL,
.wrapper = mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper
},
};
static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,

View File

@ -513,6 +513,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67
#define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68
#define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70
#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74
#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76
#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77
#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80
@ -603,6 +604,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
if (field & 0x80)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
if (field & 0x80)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB;
MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET);
dev_cap->fs_max_num_qp_per_entry = field;
MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
@ -860,6 +864,12 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
MLX4_PUT(outbox->buf, field,
QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
}
/* turn off ipoib managed steering for guests */
MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
field &= ~0x80;
MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
return 0;
}

View File

@ -895,6 +895,23 @@ int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id)
}
EXPORT_SYMBOL_GPL(mlx4_flow_detach);
int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
u32 max_range_qpn)
{
int err;
u64 in_param;
in_param = ((u64) min_range_qpn) << 32;
in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF;
err = mlx4_cmd(dev, in_param, 0, 0,
MLX4_FLOW_STEERING_IB_UC_QP_RANGE,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE);
int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
int block_mcast_loopback, enum mlx4_protocol prot,
enum mlx4_steer_type steer)

View File

@ -1236,6 +1236,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_get_mgm_entry_size(struct mlx4_dev *dev);
int mlx4_get_qp_per_mgm(struct mlx4_dev *dev);

View File

@ -123,6 +123,26 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
return err;
}
int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx)
{
struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
struct mlx4_mac_table *table = &info->mac_table;
int i;
for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
if (!table->refs[i])
continue;
if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
*idx = i;
return 0;
}
}
return -ENOENT;
}
EXPORT_SYMBOL_GPL(mlx4_find_cached_mac);
int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
{
struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];

View File

@ -3844,6 +3844,16 @@ int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
return err;
}
int mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
return -EPERM;
}
static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp)
{
struct res_gid *rgid;

View File

@ -201,10 +201,23 @@ EXPORT_SYMBOL(mlx5_core_query_cq);
int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
int type, struct mlx5_cq_modify_params *params)
struct mlx5_modify_cq_mbox_in *in, int in_sz)
{
return -ENOSYS;
struct mlx5_modify_cq_mbox_out out;
int err;
memset(&out, 0, sizeof(out));
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ);
err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out));
if (err)
return err;
if (out.hdr.status)
return mlx5_cmd_status_to_err(&out.hdr);
return 0;
}
EXPORT_SYMBOL(mlx5_core_modify_cq);
int mlx5_init_cq_table(struct mlx5_core_dev *dev)
{

View File

@ -275,7 +275,7 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
}
static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
int index)
int index, int *is_str)
{
struct mlx5_query_qp_mbox_out *out;
struct mlx5_qp_context *ctx;
@ -293,19 +293,40 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
goto out;
}
*is_str = 0;
ctx = &out->ctx;
switch (index) {
case QP_PID:
param = qp->pid;
break;
case QP_STATE:
param = be32_to_cpu(ctx->flags) >> 28;
param = (u64)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28);
*is_str = 1;
break;
case QP_XPORT:
param = (be32_to_cpu(ctx->flags) >> 16) & 0xff;
param = (u64)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff);
*is_str = 1;
break;
case QP_MTU:
param = ctx->mtu_msgmax >> 5;
switch (ctx->mtu_msgmax >> 5) {
case IB_MTU_256:
param = 256;
break;
case IB_MTU_512:
param = 512;
break;
case IB_MTU_1024:
param = 1024;
break;
case IB_MTU_2048:
param = 2048;
break;
case IB_MTU_4096:
param = 4096;
break;
default:
param = 0;
}
break;
case QP_N_RECV:
param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
@ -414,6 +435,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
struct mlx5_field_desc *desc;
struct mlx5_rsc_debug *d;
char tbuf[18];
int is_str = 0;
u64 field;
int ret;
@ -424,7 +446,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
d = (void *)(desc - desc->i) - sizeof(*d);
switch (d->type) {
case MLX5_DBG_RSC_QP:
field = qp_read_field(d->dev, d->object, desc->i);
field = qp_read_field(d->dev, d->object, desc->i, &is_str);
break;
case MLX5_DBG_RSC_EQ:
@ -440,7 +462,12 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
return -EINVAL;
}
ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
if (is_str)
ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)field);
else
ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
if (ret > 0) {
if (copy_to_user(buf, tbuf, ret))
return -EFAULT;

View File

@ -460,7 +460,10 @@ int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
err_stop_poll:
mlx5_stop_health_poll(dev);
mlx5_cmd_teardown_hca(dev);
if (mlx5_cmd_teardown_hca(dev)) {
dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
return err;
}
err_pagealloc_stop:
mlx5_pagealloc_stop(dev);
@ -503,7 +506,10 @@ void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
mlx5_eq_cleanup(dev);
mlx5_disable_msix(dev);
mlx5_stop_health_poll(dev);
mlx5_cmd_teardown_hca(dev);
if (mlx5_cmd_teardown_hca(dev)) {
dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
return;
}
mlx5_pagealloc_stop(dev);
mlx5_reclaim_startup_pages(dev);
mlx5_core_disable_hca(dev);

View File

@ -99,7 +99,7 @@ enum {
enum {
MLX5_MAX_RECLAIM_TIME_MILI = 5000,
MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / 4096,
MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
};
static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
@ -192,10 +192,8 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
struct fw_page *fp;
unsigned n;
if (list_empty(&dev->priv.free_list)) {
if (list_empty(&dev->priv.free_list))
return -ENOMEM;
mlx5_core_warn(dev, "\n");
}
fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
@ -208,7 +206,7 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
if (!fp->free_count)
list_del(&fp->list);
*addr = fp->addr + n * 4096;
*addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE;
return 0;
}
@ -224,14 +222,15 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr)
return;
}
n = (addr & ~PAGE_MASK) % 4096;
n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
fwp->free_count++;
set_bit(n, &fwp->bitmask);
if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
rb_erase(&fwp->rb_node, &dev->priv.page_root);
if (fwp->free_count != 1)
list_del(&fwp->list);
dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE,
DMA_BIDIRECTIONAL);
__free_page(fwp->page);
kfree(fwp);
} else if (fwp->free_count == 1) {

View File

@ -57,7 +57,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
in->arg = cpu_to_be32(arg);
in->register_id = cpu_to_be16(reg_num);
err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
sizeof(out) + size_out);
sizeof(*out) + size_out);
if (err)
goto ex2;

View File

@ -74,7 +74,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
struct mlx5_destroy_qp_mbox_out dout;
int err;
memset(&dout, 0, sizeof(dout));
memset(&out, 0, sizeof(out));
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
@ -84,7 +84,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
}
if (out.hdr.status) {
pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps));
mlx5_core_warn(dev, "current num of QPs 0x%x\n",
atomic_read(&dev->num_qps));
return mlx5_cmd_status_to_err(&out.hdr);
}

View File

@ -64,10 +64,14 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r)
/**
* srp_tmo_valid() - check timeout combination validity
* @reconnect_delay: Reconnect delay in seconds.
* @fast_io_fail_tmo: Fast I/O fail timeout in seconds.
* @dev_loss_tmo: Device loss timeout in seconds.
*
* The combination of the timeout parameters must be such that SCSI commands
* are finished in a reasonable time. Hence do not allow the fast I/O fail
* timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT. Furthermore, these
* timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT nor allow dev_loss_tmo to
* exceed that limit if failing I/O fast has been disabled. Furthermore, these
* parameters must be such that multipath can detect failed paths timely.
* Hence do not allow all three parameters to be disabled simultaneously.
*/
@ -79,6 +83,9 @@ int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, int dev_loss_tmo)
return -EINVAL;
if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
return -EINVAL;
if (fast_io_fail_tmo < 0 &&
dev_loss_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
return -EINVAL;
if (dev_loss_tmo >= LONG_MAX / HZ)
return -EINVAL;
if (fast_io_fail_tmo >= 0 && dev_loss_tmo >= 0 &&
@ -368,6 +375,7 @@ static int srp_rport_set_state(struct srp_rport *rport,
/**
* srp_reconnect_work() - reconnect and schedule a new attempt if necessary
* @work: Work structure used for scheduling this operation.
*/
static void srp_reconnect_work(struct work_struct *work)
{
@ -408,6 +416,7 @@ static void __rport_fail_io_fast(struct srp_rport *rport)
/**
* rport_fast_io_fail_timedout() - fast I/O failure timeout handler
* @work: Work structure used for scheduling this operation.
*/
static void rport_fast_io_fail_timedout(struct work_struct *work)
{
@ -426,6 +435,7 @@ static void rport_fast_io_fail_timedout(struct work_struct *work)
/**
* rport_dev_loss_timedout() - device loss timeout handler
* @work: Work structure used for scheduling this operation.
*/
static void rport_dev_loss_timedout(struct work_struct *work)
{
@ -452,42 +462,35 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport)
lockdep_assert_held(&rport->mutex);
if (!rport->deleted) {
delay = rport->reconnect_delay;
fast_io_fail_tmo = rport->fast_io_fail_tmo;
dev_loss_tmo = rport->dev_loss_tmo;
pr_debug("%s current state: %d\n",
dev_name(&shost->shost_gendev), rport->state);
delay = rport->reconnect_delay;
fast_io_fail_tmo = rport->fast_io_fail_tmo;
dev_loss_tmo = rport->dev_loss_tmo;
pr_debug("%s current state: %d\n", dev_name(&shost->shost_gendev),
rport->state);
if (delay > 0)
queue_delayed_work(system_long_wq,
&rport->reconnect_work,
1UL * delay * HZ);
if (fast_io_fail_tmo >= 0 &&
srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
pr_debug("%s new state: %d\n",
dev_name(&shost->shost_gendev),
rport->state);
scsi_target_block(&shost->shost_gendev);
if (rport->state == SRP_RPORT_LOST)
return;
if (delay > 0)
queue_delayed_work(system_long_wq, &rport->reconnect_work,
1UL * delay * HZ);
if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev),
rport->state);
scsi_target_block(&shost->shost_gendev);
if (fast_io_fail_tmo >= 0)
queue_delayed_work(system_long_wq,
&rport->fast_io_fail_work,
1UL * fast_io_fail_tmo * HZ);
}
if (dev_loss_tmo >= 0)
queue_delayed_work(system_long_wq,
&rport->dev_loss_work,
1UL * dev_loss_tmo * HZ);
} else {
pr_debug("%s has already been deleted\n",
dev_name(&shost->shost_gendev));
srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST);
scsi_target_unblock(&shost->shost_gendev,
SDEV_TRANSPORT_OFFLINE);
}
}
/**
* srp_start_tl_fail_timers() - start the transport layer failure timers
* @rport: SRP target port.
*
* Start the transport layer fast I/O failure and device loss timers. Do not
* modify a timer that was already started.
@ -502,6 +505,7 @@ EXPORT_SYMBOL(srp_start_tl_fail_timers);
/**
* scsi_request_fn_active() - number of kernel threads inside scsi_request_fn()
* @shost: SCSI host for which to count the number of scsi_request_fn() callers.
*/
static int scsi_request_fn_active(struct Scsi_Host *shost)
{
@ -522,6 +526,7 @@ static int scsi_request_fn_active(struct Scsi_Host *shost)
/**
* srp_reconnect_rport() - reconnect to an SRP target port
* @rport: SRP target port.
*
* Blocks SCSI command queueing before invoking reconnect() such that
* queuecommand() won't be invoked concurrently with reconnect() from outside
@ -556,7 +561,7 @@ int srp_reconnect_rport(struct srp_rport *rport)
scsi_target_block(&shost->shost_gendev);
while (scsi_request_fn_active(shost))
msleep(20);
res = i->f->reconnect(rport);
res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV;
pr_debug("%s (state %d): transport.reconnect() returned %d\n",
dev_name(&shost->shost_gendev), rport->state, res);
if (res == 0) {
@ -578,9 +583,9 @@ int srp_reconnect_rport(struct srp_rport *rport)
spin_unlock_irq(shost->host_lock);
} else if (rport->state == SRP_RPORT_RUNNING) {
/*
* srp_reconnect_rport() was invoked with fast_io_fail
* off. Mark the port as failed and start the TL failure
* timers if these had not yet been started.
* srp_reconnect_rport() has been invoked with fast_io_fail
* and dev_loss off. Mark the port as failed and start the TL
* failure timers if these had not yet been started.
*/
__rport_fail_io_fast(rport);
scsi_target_unblock(&shost->shost_gendev,
@ -599,6 +604,7 @@ EXPORT_SYMBOL(srp_reconnect_rport);
/**
* srp_timed_out() - SRP transport intercept of the SCSI timeout EH
* @scmd: SCSI command.
*
* If a timeout occurs while an rport is in the blocked state, ask the SCSI
* EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core
@ -622,10 +628,6 @@ static void srp_rport_release(struct device *dev)
{
struct srp_rport *rport = dev_to_rport(dev);
cancel_delayed_work_sync(&rport->reconnect_work);
cancel_delayed_work_sync(&rport->fast_io_fail_work);
cancel_delayed_work_sync(&rport->dev_loss_work);
put_device(dev->parent);
kfree(rport);
}
@ -674,6 +676,7 @@ static int srp_host_match(struct attribute_container *cont, struct device *dev)
/**
* srp_rport_get() - increment rport reference count
* @rport: SRP target port.
*/
void srp_rport_get(struct srp_rport *rport)
{
@ -683,6 +686,7 @@ EXPORT_SYMBOL(srp_rport_get);
/**
* srp_rport_put() - decrement rport reference count
* @rport: SRP target port.
*/
void srp_rport_put(struct srp_rport *rport)
{
@ -780,12 +784,6 @@ void srp_rport_del(struct srp_rport *rport)
device_del(dev);
transport_destroy_device(dev);
mutex_lock(&rport->mutex);
if (rport->state == SRP_RPORT_BLOCKED)
__rport_fail_io_fast(rport);
rport->deleted = true;
mutex_unlock(&rport->mutex);
put_device(dev);
}
EXPORT_SYMBOL_GPL(srp_rport_del);
@ -810,6 +808,27 @@ void srp_remove_host(struct Scsi_Host *shost)
}
EXPORT_SYMBOL_GPL(srp_remove_host);
/**
* srp_stop_rport_timers - stop the transport layer recovery timers
*
* Must be called after srp_remove_host() and scsi_remove_host(). The caller
* must hold a reference on the rport (rport->dev) and on the SCSI host
* (rport->dev.parent).
*/
void srp_stop_rport_timers(struct srp_rport *rport)
{
mutex_lock(&rport->mutex);
if (rport->state == SRP_RPORT_BLOCKED)
__rport_fail_io_fast(rport);
srp_rport_set_state(rport, SRP_RPORT_LOST);
mutex_unlock(&rport->mutex);
cancel_delayed_work_sync(&rport->reconnect_work);
cancel_delayed_work_sync(&rport->fast_io_fail_work);
cancel_delayed_work_sync(&rport->dev_loss_work);
}
EXPORT_SYMBOL_GPL(srp_stop_rport_timers);
static int srp_tsk_mgmt_response(struct Scsi_Host *shost, u64 nexus, u64 tm_id,
int result)
{

View File

@ -157,6 +157,7 @@ enum {
/* register/delete flow steering network rules */
MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
MLX4_QP_FLOW_STEERING_DETACH = 0x66,
MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64,
};
enum {

View File

@ -34,6 +34,7 @@
#define MLX4_CQ_H
#include <linux/types.h>
#include <uapi/linux/if_ether.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
@ -43,10 +44,15 @@ struct mlx4_cqe {
__be32 immed_rss_invalid;
__be32 g_mlpath_rqpn;
__be16 sl_vid;
__be16 rlid;
__be16 status;
u8 ipv6_ext_mask;
u8 badfcs_enc;
union {
struct {
__be16 rlid;
__be16 status;
u8 ipv6_ext_mask;
u8 badfcs_enc;
};
u8 smac[ETH_ALEN];
};
__be32 byte_cnt;
__be16 wqe_index;
__be16 checksum;
@ -83,6 +89,7 @@ struct mlx4_ts_cqe {
enum {
MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29,
MLX4_CQE_QPN_MASK = 0xffffff,
MLX4_CQE_VID_MASK = 0xfff,
};
enum {

View File

@ -160,7 +160,8 @@ enum {
MLX4_DEV_CAP_FLAG2_TS = 1LL << 5,
MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6,
MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7,
MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8
MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8,
MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9
};
enum {
@ -1095,6 +1096,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
u8 *pg, u16 *ratelimit);
int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
@ -1144,6 +1146,9 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int
void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
u32 max_range_qpn);
cycle_t mlx4_read_clock(struct mlx4_dev *dev);
#endif /* MLX4_DEVICE_H */

View File

@ -79,15 +79,23 @@ enum {
MLX5_CQE_RESP_SEND = 2,
MLX5_CQE_RESP_SEND_IMM = 3,
MLX5_CQE_RESP_SEND_INV = 4,
MLX5_CQE_RESIZE_CQ = 0xff, /* TBD */
MLX5_CQE_RESIZE_CQ = 5,
MLX5_CQE_REQ_ERR = 13,
MLX5_CQE_RESP_ERR = 14,
MLX5_CQE_INVALID = 15,
};
enum {
MLX5_CQ_MODIFY_RESEIZE = 0,
MLX5_CQ_MODIFY_MODER = 1,
MLX5_CQ_MODIFY_MAPPING = 2,
MLX5_CQ_MODIFY_PERIOD = 1 << 0,
MLX5_CQ_MODIFY_COUNT = 1 << 1,
MLX5_CQ_MODIFY_OVERRUN = 1 << 2,
};
enum {
MLX5_CQ_OPMOD_RESIZE = 1,
MLX5_MODIFY_CQ_MASK_LOG_SIZE = 1 << 0,
MLX5_MODIFY_CQ_MASK_PG_OFFSET = 1 << 1,
MLX5_MODIFY_CQ_MASK_PG_SIZE = 1 << 2,
};
struct mlx5_cq_modify_params {
@ -158,7 +166,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
struct mlx5_query_cq_mbox_out *out);
int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
int type, struct mlx5_cq_modify_params *params);
struct mlx5_modify_cq_mbox_in *in, int in_sz);
int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);

View File

@ -104,9 +104,10 @@ enum {
};
enum {
MLX5_BF_REGS_PER_PAGE = 4,
MLX5_MAX_UAR_PAGES = 1 << 8,
MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE,
MLX5_BF_REGS_PER_PAGE = 4,
MLX5_MAX_UAR_PAGES = 1 << 8,
MLX5_NON_FP_BF_REGS_PER_PAGE = 2,
MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE,
};
enum {
@ -176,6 +177,8 @@ enum {
MLX5_DEV_CAP_FLAG_APM = 1LL << 17,
MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24,
MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29,
MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30,
MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32,
MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38,
MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39,
@ -231,7 +234,8 @@ enum {
};
enum {
MLX5_ADAPTER_PAGE_SHIFT = 12
MLX5_ADAPTER_PAGE_SHIFT = 12,
MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT,
};
enum {
@ -697,6 +701,20 @@ struct mlx5_query_cq_mbox_out {
__be64 pas[0];
};
struct mlx5_modify_cq_mbox_in {
struct mlx5_inbox_hdr hdr;
__be32 cqn;
__be32 field_select;
struct mlx5_cq_context ctx;
u8 rsvd[192];
__be64 pas[0];
};
struct mlx5_modify_cq_mbox_out {
struct mlx5_outbox_hdr hdr;
u8 rsvd[8];
};
struct mlx5_enable_hca_mbox_in {
struct mlx5_inbox_hdr hdr;
u8 rsvd[8];
@ -831,8 +849,8 @@ struct mlx5_create_mkey_mbox_in {
struct mlx5_mkey_seg seg;
u8 rsvd1[16];
__be32 xlat_oct_act_size;
__be32 bsf_coto_act_size;
u8 rsvd2[168];
__be32 rsvd2;
u8 rsvd3[168];
__be64 pas[0];
};
@ -871,6 +889,7 @@ struct mlx5_modify_mkey_mbox_in {
struct mlx5_modify_mkey_mbox_out {
struct mlx5_outbox_hdr hdr;
u8 rsvd[8];
};
struct mlx5_dump_mkey_mbox_in {

View File

@ -464,4 +464,49 @@ void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
static inline const char *mlx5_qp_type_str(int type)
{
switch (type) {
case MLX5_QP_ST_RC: return "RC";
case MLX5_QP_ST_UC: return "C";
case MLX5_QP_ST_UD: return "UD";
case MLX5_QP_ST_XRC: return "XRC";
case MLX5_QP_ST_MLX: return "MLX";
case MLX5_QP_ST_QP0: return "QP0";
case MLX5_QP_ST_QP1: return "QP1";
case MLX5_QP_ST_RAW_ETHERTYPE: return "RAW_ETHERTYPE";
case MLX5_QP_ST_RAW_IPV6: return "RAW_IPV6";
case MLX5_QP_ST_SNIFFER: return "SNIFFER";
case MLX5_QP_ST_SYNC_UMR: return "SYNC_UMR";
case MLX5_QP_ST_PTP_1588: return "PTP_1588";
case MLX5_QP_ST_REG_UMR: return "REG_UMR";
default: return "Invalid transport type";
}
}
static inline const char *mlx5_qp_state_str(int state)
{
switch (state) {
case MLX5_QP_STATE_RST:
return "RST";
case MLX5_QP_STATE_INIT:
return "INIT";
case MLX5_QP_STATE_RTR:
return "RTR";
case MLX5_QP_STATE_RTS:
return "RTS";
case MLX5_QP_STATE_SQER:
return "SQER";
case MLX5_QP_STATE_SQD:
return "SQD";
case MLX5_QP_STATE_ERR:
return "ERR";
case MLX5_QP_STATE_SQ_DRAINING:
return "SQ_DRAINING";
case MLX5_QP_STATE_SUSPENDED:
return "SUSPENDED";
default: return "Invalid QP state";
}
}
#endif /* MLX5_QP_H */

View File

@ -38,10 +38,15 @@
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/socket.h>
#include <linux/if_vlan.h>
#include <net/ipv6.h>
#include <net/if_inet6.h>
#include <net/ip.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
#include <net/ipv6.h>
struct rdma_addr_client {
atomic_t refcount;
@ -72,7 +77,8 @@ struct rdma_dev_addr {
* rdma_translate_ip - Translate a local IP address to an RDMA hardware
* address.
*/
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr);
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
u16 *vlan_id);
/**
* rdma_resolve_ip - Resolve source and destination IP addresses to
@ -104,6 +110,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
int rdma_addr_size(struct sockaddr *addr);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac,
u16 *vlan_id);
static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
{
return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9];
@ -126,41 +136,60 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
}
static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid)
{
memset(gid->raw, 0, 16);
*((__be32 *) gid->raw) = cpu_to_be32(0xfe800000);
if (vid < 0x1000) {
gid->raw[12] = vid & 0xff;
gid->raw[11] = vid >> 8;
} else {
gid->raw[12] = 0xfe;
gid->raw[11] = 0xff;
}
memcpy(gid->raw + 13, mac + 3, 3);
memcpy(gid->raw + 8, mac, 3);
gid->raw[8] ^= 2;
}
static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev)
{
return dev->priv_flags & IFF_802_1Q_VLAN ?
vlan_dev_vlan_id(dev) : 0xffff;
}
static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid)
{
switch (addr->sa_family) {
case AF_INET:
ipv6_addr_set_v4mapped(((struct sockaddr_in *)
addr)->sin_addr.s_addr,
(struct in6_addr *)gid);
break;
case AF_INET6:
memcpy(gid->raw, &((struct sockaddr_in6 *)addr)->sin6_addr, 16);
break;
default:
return -EINVAL;
}
return 0;
}
/* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
{
if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
struct sockaddr_in *out_in = (struct sockaddr_in *)out;
memset(out_in, 0, sizeof(*out_in));
out_in->sin_family = AF_INET;
memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4);
} else {
struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out;
memset(out_in, 0, sizeof(*out_in));
out_in->sin6_family = AF_INET6;
memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
}
return 0;
}
static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
{
struct net_device *dev;
u16 vid = 0xffff;
struct in_device *ip4;
dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
if (dev) {
vid = rdma_vlan_dev_vlan_id(dev);
ip4 = (struct in_device *)dev->ip_ptr;
if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address)
ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address,
(struct in6_addr *)gid);
dev_put(dev);
}
iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid);
}
static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)

View File

@ -601,4 +601,5 @@ struct ib_cm_sidr_rep_param {
int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
struct ib_cm_sidr_rep_param *param);
int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac);
#endif /* IB_CM_H */

View File

@ -34,6 +34,7 @@
#define IB_PACK_H
#include <rdma/ib_verbs.h>
#include <uapi/linux/if_ether.h>
enum {
IB_LRH_BYTES = 8,

View File

@ -154,6 +154,9 @@ struct ib_sa_path_rec {
u8 packet_life_time_selector;
u8 packet_life_time;
u8 preference;
u8 smac[ETH_ALEN];
u8 dmac[ETH_ALEN];
u16 vlan_id;
};
#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)

View File

@ -48,6 +48,7 @@
#include <linux/rwsem.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
#include <uapi/linux/if_ether.h>
#include <linux/atomic.h>
#include <asm/uaccess.h>
@ -69,12 +70,14 @@ enum rdma_node_type {
RDMA_NODE_IB_ROUTER,
RDMA_NODE_RNIC,
RDMA_NODE_USNIC,
RDMA_NODE_USNIC_UDP,
};
enum rdma_transport_type {
RDMA_TRANSPORT_IB,
RDMA_TRANSPORT_IWARP,
RDMA_TRANSPORT_USNIC
RDMA_TRANSPORT_USNIC,
RDMA_TRANSPORT_USNIC_UDP
};
enum rdma_transport_type
@ -472,6 +475,8 @@ struct ib_ah_attr {
u8 static_rate;
u8 ah_flags;
u8 port_num;
u8 dmac[ETH_ALEN];
u16 vlan_id;
};
enum ib_wc_status {
@ -524,6 +529,8 @@ enum ib_wc_flags {
IB_WC_WITH_IMM = (1<<1),
IB_WC_WITH_INVALIDATE = (1<<2),
IB_WC_IP_CSUM_OK = (1<<3),
IB_WC_WITH_SMAC = (1<<4),
IB_WC_WITH_VLAN = (1<<5),
};
struct ib_wc {
@ -544,6 +551,8 @@ struct ib_wc {
u8 sl;
u8 dlid_path_bits;
u8 port_num; /* valid only for DR SMPs on switches */
u8 smac[ETH_ALEN];
u16 vlan_id;
};
enum ib_cq_notify_flags {
@ -633,6 +642,7 @@ enum ib_qp_type {
enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
IB_QP_CREATE_NETIF_QP = 1 << 5,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
@ -721,7 +731,11 @@ enum ib_qp_attr_mask {
IB_QP_MAX_DEST_RD_ATOMIC = (1<<17),
IB_QP_PATH_MIG_STATE = (1<<18),
IB_QP_CAP = (1<<19),
IB_QP_DEST_QPN = (1<<20)
IB_QP_DEST_QPN = (1<<20),
IB_QP_SMAC = (1<<21),
IB_QP_ALT_SMAC = (1<<22),
IB_QP_VID = (1<<23),
IB_QP_ALT_VID = (1<<24),
};
enum ib_qp_state {
@ -771,6 +785,10 @@ struct ib_qp_attr {
u8 rnr_retry;
u8 alt_port_num;
u8 alt_timeout;
u8 smac[ETH_ALEN];
u8 alt_smac[ETH_ALEN];
u16 vlan_id;
u16 alt_vlan_id;
};
enum ib_wr_opcode {
@ -1099,13 +1117,14 @@ enum ib_flow_attr_type {
enum ib_flow_spec_type {
/* L2 headers*/
IB_FLOW_SPEC_ETH = 0x20,
IB_FLOW_SPEC_IB = 0x22,
/* L3 header*/
IB_FLOW_SPEC_IPV4 = 0x30,
/* L4 headers*/
IB_FLOW_SPEC_TCP = 0x40,
IB_FLOW_SPEC_UDP = 0x41
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
/* Flow steering rule priority is set according to it's domain.
@ -1133,6 +1152,18 @@ struct ib_flow_spec_eth {
struct ib_flow_eth_filter mask;
};
struct ib_flow_ib_filter {
__be16 dlid;
__u8 sl;
};
struct ib_flow_spec_ib {
enum ib_flow_spec_type type;
u16 size;
struct ib_flow_ib_filter val;
struct ib_flow_ib_filter mask;
};
struct ib_flow_ipv4_filter {
__be32 src_ip;
__be32 dst_ip;
@ -1163,6 +1194,7 @@ union ib_flow_spec {
u16 size;
};
struct ib_flow_spec_eth eth;
struct ib_flow_spec_ib ib;
struct ib_flow_spec_ipv4 ipv4;
struct ib_flow_spec_tcp_udp tcp_udp;
};
@ -1488,6 +1520,7 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
* @next_state: Next QP state
* @type: QP type
* @mask: Mask of supplied QP attributes
* @ll : link layer of port
*
* This function is a helper function that a low-level driver's
* modify_qp method can use to validate the consumer's input. It
@ -1496,7 +1529,8 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
* and that the attribute mask supplied is allowed for the transition.
*/
int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
enum ib_qp_type type, enum ib_qp_attr_mask mask);
enum ib_qp_type type, enum ib_qp_attr_mask mask,
enum rdma_link_layer ll);
int ib_register_event_handler (struct ib_event_handler *event_handler);
int ib_unregister_event_handler(struct ib_event_handler *event_handler);

View File

@ -19,7 +19,7 @@ struct srp_rport_identifiers {
* @SRP_RPORT_BLOCKED: Transport layer not operational; fast I/O fail timer
* is running and I/O has been blocked.
* @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast.
* @SRP_RPORT_LOST: Device loss timer has expired; port is being removed.
* @SRP_RPORT_LOST: Port is being removed.
*/
enum srp_rport_state {
SRP_RPORT_RUNNING,
@ -29,10 +29,26 @@ enum srp_rport_state {
};
/**
* struct srp_rport
* @lld_data: LLD private data.
* @mutex: Protects against concurrent rport reconnect / fast_io_fail /
* dev_loss_tmo activity.
* struct srp_rport - SRP initiator or target port
*
* Fields that are relevant for SRP initiator and SRP target drivers:
* @dev: Device associated with this rport.
* @port_id: 16-byte port identifier.
* @roles: Role of this port - initiator or target.
*
* Fields that are only relevant for SRP initiator drivers:
* @lld_data: LLD private data.
* @mutex: Protects against concurrent rport reconnect /
* fast_io_fail / dev_loss_tmo activity.
* @state: rport state.
* @deleted: Whether or not srp_rport_del() has already been invoked.
* @reconnect_delay: Reconnect delay in seconds.
* @failed_reconnects: Number of failed reconnect attempts.
* @reconnect_work: Work structure used for scheduling reconnect attempts.
* @fast_io_fail_tmo: Fast I/O fail timeout in seconds.
* @dev_loss_tmo: Device loss timeout in seconds.
* @fast_io_fail_work: Work structure used for scheduling fast I/O fail work.
* @dev_loss_work: Work structure used for scheduling device loss work.
*/
struct srp_rport {
/* for initiator and target drivers */
@ -48,7 +64,6 @@ struct srp_rport {
struct mutex mutex;
enum srp_rport_state state;
bool deleted;
int reconnect_delay;
int failed_reconnects;
struct delayed_work reconnect_work;
@ -60,6 +75,8 @@ struct srp_rport {
/**
* struct srp_function_template
*
* Fields that are only relevant for SRP initiator drivers:
* @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and
* dev_loss_tmo sysfs attribute for an rport.
* @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command
@ -71,6 +88,11 @@ struct srp_rport {
* srp_reconnect_rport().
* @terminate_rport_io: Callback function for terminating all outstanding I/O
* requests for an rport.
* @rport_delete: Callback function that deletes an rport.
*
* Fields that are only relevant for SRP target drivers:
* @tsk_mgmt_response: Callback function for sending a task management response.
* @it_nexus_response: Callback function for processing an IT nexus response.
*/
struct srp_function_template {
/* for initiator drivers */
@ -101,9 +123,11 @@ extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo,
extern int srp_reconnect_rport(struct srp_rport *rport);
extern void srp_start_tl_fail_timers(struct srp_rport *rport);
extern void srp_remove_host(struct Scsi_Host *);
extern void srp_stop_rport_timers(struct srp_rport *rport);
/**
* srp_chkready() - evaluate the transport layer state before I/O
* @rport: SRP target port pointer.
*
* Returns a SCSI result code that can be returned by the LLD queuecommand()
* implementation. The role of this function is similar to that of