vhost,virtio,vhost: fixes,features

Hardening work by Jason
 vdpa driver for Alibaba ENI
 Performance tweaks for virtio blk
 virtio rng rework using an internal buffer
 mac/mtu programming for mlx5 vdpa
 Misc fixes, cleanups
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmF/su8PHG1zdEByZWRo
 YXQuY29tAAoJECgfDbjSjVRpo+sIAJjBTvbET+d0KuIMt8YBGsU+NWgHaJxW06hm
 GHZzinueZYWaS20MPMYMPfcHUgigWj0kk7F0gMljG5rtDFzR85JkOi7+e5V6RVJW
 8SQc7JjA1Krde6EiPJdlv3mVkLz/5VbrGUgjAW9di/O04Xc/jAc9kmJ41wTYr0mL
 E5IsUi9QBmgHtKQ+2ofb9QEWuebJclGK+NVd3Kp08BtAQOrdn5UrIzY30/sjkZwE
 ul2ll6v/k7T3dCQbHD/wMgfFycPvoZVfxaVj+FWQnwdWkqZwzMRmpKPRes4KJfww
 Lfx1qox10mJzH7XJCs7xmOM8f7HgNNWi0gD5FxNlfkiz1AwHYOk=
 =DTXY
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:
 "vhost and virtio fixes and features:

   - Hardening work by Jason

   - vdpa driver for Alibaba ENI

   - Performance tweaks for virtio blk

   - virtio rng rework using an internal buffer

   - mac/mtu programming for mlx5 vdpa

   - Misc fixes, cleanups"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (45 commits)
  vdpa/mlx5: Forward only packets with allowed MAC address
  vdpa/mlx5: Support configuration of MAC
  vdpa/mlx5: Fix clearing of VIRTIO_NET_F_MAC feature bit
  vdpa_sim_net: Enable user to set mac address and mtu
  vdpa: Enable user to set mac and mtu of vdpa device
  vdpa: Use kernel coding style for structure comments
  vdpa: Introduce query of device config layout
  vdpa: Introduce and use vdpa device get, set config helpers
  virtio-scsi: don't let virtio core to validate used buffer length
  virtio-blk: don't let virtio core to validate used length
  virtio-net: don't let virtio core to validate used length
  virtio_ring: validate used buffer length
  virtio_blk: correct types for status handling
  virtio_blk: allow 0 as num_request_queues
  i2c: virtio: Add support for zero-length requests
  virtio-blk: fixup coccinelle warnings
  virtio_ring: fix typos in vring_desc_extra
  virtio-pci: harden INTX interrupts
  virtio_pci: harden MSI-X interrupts
  virtio_config: introduce a new .enable_cbs method
  ...
This commit is contained in:
Linus Torvalds 2021-11-03 15:00:39 -07:00
commit 43e1b12927
36 changed files with 1779 additions and 295 deletions

View File

@ -20083,6 +20083,13 @@ S: Maintained
F: drivers/i2c/busses/i2c-virtio.c
F: include/uapi/linux/virtio_i2c.h
VIRTIO PMEM DRIVER
M: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
L: virtualization@lists.linux-foundation.org
S: Maintained
F: drivers/nvdimm/virtio_pmem.c
F: drivers/nvdimm/nd_virtio.c
VIRTUAL BOX GUEST DEVICE DRIVER
M: Hans de Goede <hdegoede@redhat.com>
M: Arnd Bergmann <arnd@arndb.de>

View File

@ -371,6 +371,7 @@ config XEN_BLKDEV_BACKEND
config VIRTIO_BLK
tristate "Virtio block driver"
depends on VIRTIO
select SG_POOL
help
This is the virtual block driver for virtio. It can be used with
QEMU based VMMs (like KVM or Xen). Say Y or M.

View File

@ -24,6 +24,19 @@
/* The maximum number of sg elements that fit into a virtqueue */
#define VIRTIO_BLK_MAX_SG_ELEMS 32768
#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define VIRTIO_BLK_INLINE_SG_CNT 0
#else
#define VIRTIO_BLK_INLINE_SG_CNT 2
#endif
static unsigned int num_request_queues;
module_param(num_request_queues, uint, 0644);
MODULE_PARM_DESC(num_request_queues,
"Limit the number of request queues to use for blk device. "
"0 for no limit. "
"Values > nr_cpu_ids truncated to nr_cpu_ids.");
static int major;
static DEFINE_IDA(vd_index_ida);
@ -77,6 +90,7 @@ struct virtio_blk {
struct virtblk_req {
struct virtio_blk_outhdr out_hdr;
u8 status;
struct sg_table sg_table;
struct scatterlist sg[];
};
@ -162,12 +176,93 @@ static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
return 0;
}
static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr)
{
if (blk_rq_nr_phys_segments(req))
sg_free_table_chained(&vbr->sg_table,
VIRTIO_BLK_INLINE_SG_CNT);
}
static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req,
struct virtblk_req *vbr)
{
int err;
if (!blk_rq_nr_phys_segments(req))
return 0;
vbr->sg_table.sgl = vbr->sg;
err = sg_alloc_table_chained(&vbr->sg_table,
blk_rq_nr_phys_segments(req),
vbr->sg_table.sgl,
VIRTIO_BLK_INLINE_SG_CNT);
if (unlikely(err))
return -ENOMEM;
return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl);
}
static void virtblk_cleanup_cmd(struct request *req)
{
if (req->rq_flags & RQF_SPECIAL_PAYLOAD)
kfree(bvec_virt(&req->special_vec));
}
static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
struct request *req,
struct virtblk_req *vbr)
{
bool unmap = false;
u32 type;
vbr->out_hdr.sector = 0;
switch (req_op(req)) {
case REQ_OP_READ:
type = VIRTIO_BLK_T_IN;
vbr->out_hdr.sector = cpu_to_virtio64(vdev,
blk_rq_pos(req));
break;
case REQ_OP_WRITE:
type = VIRTIO_BLK_T_OUT;
vbr->out_hdr.sector = cpu_to_virtio64(vdev,
blk_rq_pos(req));
break;
case REQ_OP_FLUSH:
type = VIRTIO_BLK_T_FLUSH;
break;
case REQ_OP_DISCARD:
type = VIRTIO_BLK_T_DISCARD;
break;
case REQ_OP_WRITE_ZEROES:
type = VIRTIO_BLK_T_WRITE_ZEROES;
unmap = !(req->cmd_flags & REQ_NOUNMAP);
break;
case REQ_OP_DRV_IN:
type = VIRTIO_BLK_T_GET_ID;
break;
default:
WARN_ON_ONCE(1);
return BLK_STS_IOERR;
}
vbr->out_hdr.type = cpu_to_virtio32(vdev, type);
vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
if (virtblk_setup_discard_write_zeroes(req, unmap))
return BLK_STS_RESOURCE;
}
return 0;
}
static inline void virtblk_request_done(struct request *req)
{
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
if (req->rq_flags & RQF_SPECIAL_PAYLOAD)
kfree(bvec_virt(&req->special_vec));
virtblk_unmap_data(req, vbr);
virtblk_cleanup_cmd(req);
blk_mq_end_request(req, virtblk_result(vbr));
}
@ -223,59 +318,26 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
unsigned long flags;
unsigned int num;
int qid = hctx->queue_num;
int err;
bool notify = false;
bool unmap = false;
u32 type;
blk_status_t status;
int err;
BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
switch (req_op(req)) {
case REQ_OP_READ:
case REQ_OP_WRITE:
type = 0;
break;
case REQ_OP_FLUSH:
type = VIRTIO_BLK_T_FLUSH;
break;
case REQ_OP_DISCARD:
type = VIRTIO_BLK_T_DISCARD;
break;
case REQ_OP_WRITE_ZEROES:
type = VIRTIO_BLK_T_WRITE_ZEROES;
unmap = !(req->cmd_flags & REQ_NOUNMAP);
break;
case REQ_OP_DRV_IN:
type = VIRTIO_BLK_T_GET_ID;
break;
default:
WARN_ON_ONCE(1);
return BLK_STS_IOERR;
}
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
vbr->out_hdr.sector = type ?
0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req));
vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req));
status = virtblk_setup_cmd(vblk->vdev, req, vbr);
if (unlikely(status))
return status;
blk_mq_start_request(req);
if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
err = virtblk_setup_discard_write_zeroes(req, unmap);
if (err)
return BLK_STS_RESOURCE;
}
num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
if (num) {
if (rq_data_dir(req) == WRITE)
vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
else
vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
num = virtblk_map_data(hctx, req, vbr);
if (unlikely(num < 0)) {
virtblk_cleanup_cmd(req);
return BLK_STS_RESOURCE;
}
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num);
if (err) {
virtqueue_kick(vblk->vqs[qid].vq);
/* Don't stop the queue if -ENOMEM: we may have failed to
@ -284,6 +346,8 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
if (err == -ENOSPC)
blk_mq_stop_hw_queue(hctx);
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
virtblk_unmap_data(req, vbr);
virtblk_cleanup_cmd(req);
switch (err) {
case -ENOSPC:
return BLK_STS_DEV_RESOURCE;
@ -497,8 +561,14 @@ static int init_vq(struct virtio_blk *vblk)
&num_vqs);
if (err)
num_vqs = 1;
if (!err && !num_vqs) {
dev_err(&vdev->dev, "MQ advertised but zero queues reported\n");
return -EINVAL;
}
num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs);
num_vqs = min_t(unsigned int,
min_not_zero(num_request_queues, nr_cpu_ids),
num_vqs);
vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
if (!vblk->vqs)
@ -624,7 +694,7 @@ cache_type_show(struct device *dev, struct device_attribute *attr, char *buf)
u8 writeback = virtblk_get_cache_mode(vblk->vdev);
BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
return sysfs_emit(buf, "%s\n", virtblk_cache_types[writeback]);
}
static DEVICE_ATTR_RW(cache_type);
@ -660,16 +730,6 @@ static const struct attribute_group *virtblk_attr_groups[] = {
NULL,
};
static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
unsigned int hctx_idx, unsigned int numa_node)
{
struct virtio_blk *vblk = set->driver_data;
struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
sg_init_table(vbr->sg, vblk->sg_elems);
return 0;
}
static int virtblk_map_queues(struct blk_mq_tag_set *set)
{
struct virtio_blk *vblk = set->driver_data;
@ -682,7 +742,6 @@ static const struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq,
.commit_rqs = virtio_commit_rqs,
.complete = virtblk_request_done,
.init_request = virtblk_init_request,
.map_queues = virtblk_map_queues,
};
@ -762,7 +821,7 @@ static int virtblk_probe(struct virtio_device *vdev)
vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
vblk->tag_set.cmd_size =
sizeof(struct virtblk_req) +
sizeof(struct scatterlist) * sg_elems;
sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT;
vblk->tag_set.driver_data = vblk;
vblk->tag_set.nr_hw_queues = vblk->num_vqs;
@ -990,6 +1049,7 @@ static struct virtio_driver virtio_blk = {
.feature_table_size = ARRAY_SIZE(features),
.feature_table_legacy = features_legacy,
.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
.suppress_used_validation = true,
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,

View File

@ -18,13 +18,20 @@ static DEFINE_IDA(rng_index_ida);
struct virtrng_info {
struct hwrng hwrng;
struct virtqueue *vq;
struct completion have_data;
char name[25];
unsigned int data_avail;
int index;
bool busy;
bool hwrng_register_done;
bool hwrng_removed;
/* data transfer */
struct completion have_data;
unsigned int data_avail;
unsigned int data_idx;
/* minimal size returned by rng_buffer_size() */
#if SMP_CACHE_BYTES < 32
u8 data[32];
#else
u8 data[SMP_CACHE_BYTES];
#endif
};
static void random_recv_done(struct virtqueue *vq)
@ -35,54 +42,88 @@ static void random_recv_done(struct virtqueue *vq)
if (!virtqueue_get_buf(vi->vq, &vi->data_avail))
return;
vi->data_idx = 0;
complete(&vi->have_data);
}
/* The host will fill any buffer we give it with sweet, sweet randomness. */
static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size)
static void request_entropy(struct virtrng_info *vi)
{
struct scatterlist sg;
sg_init_one(&sg, buf, size);
reinit_completion(&vi->have_data);
vi->data_avail = 0;
vi->data_idx = 0;
sg_init_one(&sg, vi->data, sizeof(vi->data));
/* There should always be room for one buffer. */
virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL);
virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL);
virtqueue_kick(vi->vq);
}
static unsigned int copy_data(struct virtrng_info *vi, void *buf,
unsigned int size)
{
size = min_t(unsigned int, size, vi->data_avail);
memcpy(buf, vi->data + vi->data_idx, size);
vi->data_idx += size;
vi->data_avail -= size;
if (vi->data_avail == 0)
request_entropy(vi);
return size;
}
static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
{
int ret;
struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
unsigned int chunk;
size_t read;
if (vi->hwrng_removed)
return -ENODEV;
if (!vi->busy) {
vi->busy = true;
reinit_completion(&vi->have_data);
register_buffer(vi, buf, size);
read = 0;
/* copy available data */
if (vi->data_avail) {
chunk = copy_data(vi, buf, size);
size -= chunk;
read += chunk;
}
if (!wait)
return 0;
return read;
ret = wait_for_completion_killable(&vi->have_data);
if (ret < 0)
return ret;
/* We have already copied available entropy,
* so either size is 0 or data_avail is 0
*/
while (size != 0) {
/* data_avail is 0 but a request is pending */
ret = wait_for_completion_killable(&vi->have_data);
if (ret < 0)
return ret;
/* if vi->data_avail is 0, we have been interrupted
* by a cleanup, but buffer stays in the queue
*/
if (vi->data_avail == 0)
return read;
vi->busy = false;
chunk = copy_data(vi, buf + read, size);
size -= chunk;
read += chunk;
}
return vi->data_avail;
return read;
}
static void virtio_cleanup(struct hwrng *rng)
{
struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
if (vi->busy)
wait_for_completion(&vi->have_data);
complete(&vi->have_data);
}
static int probe_common(struct virtio_device *vdev)
@ -118,6 +159,9 @@ static int probe_common(struct virtio_device *vdev)
goto err_find;
}
/* we always have a pending entropy request */
request_entropy(vi);
return 0;
err_find:
@ -133,9 +177,9 @@ static void remove_common(struct virtio_device *vdev)
vi->hwrng_removed = true;
vi->data_avail = 0;
vi->data_idx = 0;
complete(&vi->have_data);
vdev->config->reset(vdev);
vi->busy = false;
if (vi->hwrng_register_done)
hwrng_unregister(&vi->hwrng);
vdev->config->del_vqs(vdev);

View File

@ -28,6 +28,7 @@
#include "../tty/hvc/hvc_console.h"
#define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC)
#define VIRTCONS_MAX_PORTS 0x8000
/*
* This is a global struct for storing common data for all the devices
@ -2036,6 +2037,14 @@ static int virtcons_probe(struct virtio_device *vdev)
virtio_cread_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT,
struct virtio_console_config, max_nr_ports,
&portdev->max_nr_ports) == 0) {
if (portdev->max_nr_ports == 0 ||
portdev->max_nr_ports > VIRTCONS_MAX_PORTS) {
dev_err(&vdev->dev,
"Invalidate max_nr_ports %d",
portdev->max_nr_ports);
err = -EINVAL;
goto free;
}
multiport = true;
}

View File

@ -62,35 +62,33 @@ static int virtio_i2c_prepare_reqs(struct virtqueue *vq,
for (i = 0; i < num; i++) {
int outcnt = 0, incnt = 0;
/*
* We don't support 0 length messages and so filter out
* 0 length transfers by using i2c_adapter_quirks.
*/
if (!msgs[i].len)
break;
/*
* Only 7-bit mode supported for this moment. For the address
* format, Please check the Virtio I2C Specification.
*/
reqs[i].out_hdr.addr = cpu_to_le16(msgs[i].addr << 1);
if (msgs[i].flags & I2C_M_RD)
reqs[i].out_hdr.flags |= cpu_to_le32(VIRTIO_I2C_FLAGS_M_RD);
if (i != num - 1)
reqs[i].out_hdr.flags = cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT);
reqs[i].out_hdr.flags |= cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT);
sg_init_one(&out_hdr, &reqs[i].out_hdr, sizeof(reqs[i].out_hdr));
sgs[outcnt++] = &out_hdr;
reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1);
if (!reqs[i].buf)
break;
if (msgs[i].len) {
reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1);
if (!reqs[i].buf)
break;
sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len);
sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len);
if (msgs[i].flags & I2C_M_RD)
sgs[outcnt + incnt++] = &msg_buf;
else
sgs[outcnt++] = &msg_buf;
if (msgs[i].flags & I2C_M_RD)
sgs[outcnt + incnt++] = &msg_buf;
else
sgs[outcnt++] = &msg_buf;
}
sg_init_one(&in_hdr, &reqs[i].in_hdr, sizeof(reqs[i].in_hdr));
sgs[outcnt + incnt++] = &in_hdr;
@ -191,7 +189,7 @@ static int virtio_i2c_setup_vqs(struct virtio_i2c *vi)
static u32 virtio_i2c_func(struct i2c_adapter *adap)
{
return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
}
static struct i2c_algorithm virtio_algorithm = {
@ -199,15 +197,16 @@ static struct i2c_algorithm virtio_algorithm = {
.functionality = virtio_i2c_func,
};
static const struct i2c_adapter_quirks virtio_i2c_quirks = {
.flags = I2C_AQ_NO_ZERO_LEN,
};
static int virtio_i2c_probe(struct virtio_device *vdev)
{
struct virtio_i2c *vi;
int ret;
if (!virtio_has_feature(vdev, VIRTIO_I2C_F_ZERO_LENGTH_REQUEST)) {
dev_err(&vdev->dev, "Zero-length request feature is mandatory\n");
return -EINVAL;
}
vi = devm_kzalloc(&vdev->dev, sizeof(*vi), GFP_KERNEL);
if (!vi)
return -ENOMEM;
@ -225,7 +224,6 @@ static int virtio_i2c_probe(struct virtio_device *vdev)
snprintf(vi->adap.name, sizeof(vi->adap.name),
"i2c_virtio at virtio bus %d", vdev->index);
vi->adap.algo = &virtio_algorithm;
vi->adap.quirks = &virtio_i2c_quirks;
vi->adap.dev.parent = &vdev->dev;
vi->adap.dev.of_node = vdev->dev.of_node;
i2c_set_adapdata(&vi->adap, vi);
@ -270,11 +268,17 @@ static int virtio_i2c_restore(struct virtio_device *vdev)
}
#endif
static const unsigned int features[] = {
VIRTIO_I2C_F_ZERO_LENGTH_REQUEST,
};
static struct virtio_driver virtio_i2c_driver = {
.id_table = id_table,
.probe = virtio_i2c_probe,
.remove = virtio_i2c_remove,
.driver = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.id_table = id_table,
.probe = virtio_i2c_probe,
.remove = virtio_i2c_remove,
.driver = {
.name = "i2c_virtio",
},
#ifdef CONFIG_PM_SLEEP

View File

@ -408,12 +408,13 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
* add_recvbuf_mergeable() + get_mergeable_buf_len()
*/
truesize = headroom ? PAGE_SIZE : truesize;
tailroom = truesize - len - headroom - (hdr_padded_len - hdr_len);
tailroom = truesize - headroom;
buf = p - headroom;
len -= hdr_len;
offset += hdr_padded_len;
p += hdr_padded_len;
tailroom -= hdr_padded_len + len;
shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@ -3422,6 +3423,7 @@ static struct virtio_driver virtio_net_driver = {
.feature_table_size = ARRAY_SIZE(features),
.feature_table_legacy = features_legacy,
.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
.suppress_used_validation = true,
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,

View File

@ -978,6 +978,7 @@ static unsigned int features[] = {
static struct virtio_driver virtio_scsi_driver = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.suppress_used_validation = true,
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,

View File

@ -78,4 +78,12 @@ config VP_VDPA
help
This kernel module bridges virtio PCI device to vDPA bus.
config ALIBABA_ENI_VDPA
tristate "vDPA driver for Alibaba ENI"
select VIRTIO_PCI_LIB_LEGACY
depends on PCI_MSI && X86
help
VDPA driver for Alibaba ENI (Elastic Network Interface) which is built upon
virtio 0.9.5 specification.
endif # VDPA

View File

@ -5,3 +5,4 @@ obj-$(CONFIG_VDPA_USER) += vdpa_user/
obj-$(CONFIG_IFCVF) += ifcvf/
obj-$(CONFIG_MLX5_VDPA) += mlx5/
obj-$(CONFIG_VP_VDPA) += virtio_pci/
obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/

View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ALIBABA_ENI_VDPA) += eni_vdpa.o

View File

@ -0,0 +1,553 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* vDPA bridge driver for Alibaba ENI(Elastic Network Interface)
*
* Copyright (c) 2021, Alibaba Inc. All rights reserved.
* Author: Wu Zongyong <wuzongyong@linux.alibaba.com>
*
*/
#include "linux/bits.h"
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/vdpa.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_pci.h>
#include <linux/virtio_pci_legacy.h>
#include <uapi/linux/virtio_net.h>
#define ENI_MSIX_NAME_SIZE 256
#define ENI_ERR(pdev, fmt, ...) \
dev_err(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__)
#define ENI_DBG(pdev, fmt, ...) \
dev_dbg(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__)
#define ENI_INFO(pdev, fmt, ...) \
dev_info(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__)
struct eni_vring {
void __iomem *notify;
char msix_name[ENI_MSIX_NAME_SIZE];
struct vdpa_callback cb;
int irq;
};
struct eni_vdpa {
struct vdpa_device vdpa;
struct virtio_pci_legacy_device ldev;
struct eni_vring *vring;
struct vdpa_callback config_cb;
char msix_name[ENI_MSIX_NAME_SIZE];
int config_irq;
int queues;
int vectors;
};
static struct eni_vdpa *vdpa_to_eni(struct vdpa_device *vdpa)
{
return container_of(vdpa, struct eni_vdpa, vdpa);
}
static struct virtio_pci_legacy_device *vdpa_to_ldev(struct vdpa_device *vdpa)
{
struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
return &eni_vdpa->ldev;
}
static u64 eni_vdpa_get_features(struct vdpa_device *vdpa)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
u64 features = vp_legacy_get_features(ldev);
features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
features |= BIT_ULL(VIRTIO_F_ORDER_PLATFORM);
return features;
}
static int eni_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
if (!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) && features) {
ENI_ERR(ldev->pci_dev,
"VIRTIO_NET_F_MRG_RXBUF is not negotiated\n");
return -EINVAL;
}
vp_legacy_set_features(ldev, (u32)features);
return 0;
}
static u8 eni_vdpa_get_status(struct vdpa_device *vdpa)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
return vp_legacy_get_status(ldev);
}
static int eni_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx)
{
struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
int irq = eni_vdpa->vring[idx].irq;
if (irq == VIRTIO_MSI_NO_VECTOR)
return -EINVAL;
return irq;
}
static void eni_vdpa_free_irq(struct eni_vdpa *eni_vdpa)
{
struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
struct pci_dev *pdev = ldev->pci_dev;
int i;
for (i = 0; i < eni_vdpa->queues; i++) {
if (eni_vdpa->vring[i].irq != VIRTIO_MSI_NO_VECTOR) {
vp_legacy_queue_vector(ldev, i, VIRTIO_MSI_NO_VECTOR);
devm_free_irq(&pdev->dev, eni_vdpa->vring[i].irq,
&eni_vdpa->vring[i]);
eni_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR;
}
}
if (eni_vdpa->config_irq != VIRTIO_MSI_NO_VECTOR) {
vp_legacy_config_vector(ldev, VIRTIO_MSI_NO_VECTOR);
devm_free_irq(&pdev->dev, eni_vdpa->config_irq, eni_vdpa);
eni_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR;
}
if (eni_vdpa->vectors) {
pci_free_irq_vectors(pdev);
eni_vdpa->vectors = 0;
}
}
static irqreturn_t eni_vdpa_vq_handler(int irq, void *arg)
{
struct eni_vring *vring = arg;
if (vring->cb.callback)
return vring->cb.callback(vring->cb.private);
return IRQ_HANDLED;
}
static irqreturn_t eni_vdpa_config_handler(int irq, void *arg)
{
struct eni_vdpa *eni_vdpa = arg;
if (eni_vdpa->config_cb.callback)
return eni_vdpa->config_cb.callback(eni_vdpa->config_cb.private);
return IRQ_HANDLED;
}
static int eni_vdpa_request_irq(struct eni_vdpa *eni_vdpa)
{
struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
struct pci_dev *pdev = ldev->pci_dev;
int i, ret, irq;
int queues = eni_vdpa->queues;
int vectors = queues + 1;
ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX);
if (ret != vectors) {
ENI_ERR(pdev,
"failed to allocate irq vectors want %d but %d\n",
vectors, ret);
return ret;
}
eni_vdpa->vectors = vectors;
for (i = 0; i < queues; i++) {
snprintf(eni_vdpa->vring[i].msix_name, ENI_MSIX_NAME_SIZE,
"eni-vdpa[%s]-%d\n", pci_name(pdev), i);
irq = pci_irq_vector(pdev, i);
ret = devm_request_irq(&pdev->dev, irq,
eni_vdpa_vq_handler,
0, eni_vdpa->vring[i].msix_name,
&eni_vdpa->vring[i]);
if (ret) {
ENI_ERR(pdev, "failed to request irq for vq %d\n", i);
goto err;
}
vp_legacy_queue_vector(ldev, i, i);
eni_vdpa->vring[i].irq = irq;
}
snprintf(eni_vdpa->msix_name, ENI_MSIX_NAME_SIZE, "eni-vdpa[%s]-config\n",
pci_name(pdev));
irq = pci_irq_vector(pdev, queues);
ret = devm_request_irq(&pdev->dev, irq, eni_vdpa_config_handler, 0,
eni_vdpa->msix_name, eni_vdpa);
if (ret) {
ENI_ERR(pdev, "failed to request irq for config vq %d\n", i);
goto err;
}
vp_legacy_config_vector(ldev, queues);
eni_vdpa->config_irq = irq;
return 0;
err:
eni_vdpa_free_irq(eni_vdpa);
return ret;
}
static void eni_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
{
struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
u8 s = eni_vdpa_get_status(vdpa);
if (status & VIRTIO_CONFIG_S_DRIVER_OK &&
!(s & VIRTIO_CONFIG_S_DRIVER_OK)) {
eni_vdpa_request_irq(eni_vdpa);
}
vp_legacy_set_status(ldev, status);
if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) &&
(s & VIRTIO_CONFIG_S_DRIVER_OK))
eni_vdpa_free_irq(eni_vdpa);
}
static int eni_vdpa_reset(struct vdpa_device *vdpa)
{
struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
u8 s = eni_vdpa_get_status(vdpa);
vp_legacy_set_status(ldev, 0);
if (s & VIRTIO_CONFIG_S_DRIVER_OK)
eni_vdpa_free_irq(eni_vdpa);
return 0;
}
static u16 eni_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
return vp_legacy_get_queue_size(ldev, 0);
}
static u16 eni_vdpa_get_vq_num_min(struct vdpa_device *vdpa)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
return vp_legacy_get_queue_size(ldev, 0);
}
static int eni_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid,
struct vdpa_vq_state *state)
{
return -EOPNOTSUPP;
}
static int eni_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid,
const struct vdpa_vq_state *state)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
const struct vdpa_vq_state_split *split = &state->split;
/* ENI is build upon virtio-pci specfication which not support
* to set state of virtqueue. But if the state is equal to the
* device initial state by chance, we can let it go.
*/
if (!vp_legacy_get_queue_enable(ldev, qid)
&& split->avail_index == 0)
return 0;
return -EOPNOTSUPP;
}
static void eni_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 qid,
struct vdpa_callback *cb)
{
struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
eni_vdpa->vring[qid].cb = *cb;
}
static void eni_vdpa_set_vq_ready(struct vdpa_device *vdpa, u16 qid,
bool ready)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
/* ENI is a legacy virtio-pci device. This is not supported
* by specification. But we can disable virtqueue by setting
* address to 0.
*/
if (!ready)
vp_legacy_set_queue_address(ldev, qid, 0);
}
static bool eni_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 qid)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
return vp_legacy_get_queue_enable(ldev, qid);
}
static void eni_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 qid,
u32 num)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
struct pci_dev *pdev = ldev->pci_dev;
u16 n = vp_legacy_get_queue_size(ldev, qid);
/* ENI is a legacy virtio-pci device which not allow to change
* virtqueue size. Just report a error if someone tries to
* change it.
*/
if (num != n)
ENI_ERR(pdev,
"not support to set vq %u fixed num %u to %u\n",
qid, n, num);
}
static int eni_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 qid,
u64 desc_area, u64 driver_area,
u64 device_area)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
u32 pfn = desc_area >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
vp_legacy_set_queue_address(ldev, qid, pfn);
return 0;
}
static void eni_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid)
{
struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
iowrite16(qid, eni_vdpa->vring[qid].notify);
}
static u32 eni_vdpa_get_device_id(struct vdpa_device *vdpa)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
return ldev->id.device;
}
static u32 eni_vdpa_get_vendor_id(struct vdpa_device *vdpa)
{
struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
return ldev->id.vendor;
}
static u32 eni_vdpa_get_vq_align(struct vdpa_device *vdpa)
{
return VIRTIO_PCI_VRING_ALIGN;
}
static size_t eni_vdpa_get_config_size(struct vdpa_device *vdpa)
{
return sizeof(struct virtio_net_config);
}
static void eni_vdpa_get_config(struct vdpa_device *vdpa,
unsigned int offset,
void *buf, unsigned int len)
{
struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
void __iomem *ioaddr = ldev->ioaddr +
VIRTIO_PCI_CONFIG_OFF(eni_vdpa->vectors) +
offset;
u8 *p = buf;
int i;
for (i = 0; i < len; i++)
*p++ = ioread8(ioaddr + i);
}
static void eni_vdpa_set_config(struct vdpa_device *vdpa,
unsigned int offset, const void *buf,
unsigned int len)
{
struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
void __iomem *ioaddr = ldev->ioaddr +
VIRTIO_PCI_CONFIG_OFF(eni_vdpa->vectors) +
offset;
const u8 *p = buf;
int i;
for (i = 0; i < len; i++)
iowrite8(*p++, ioaddr + i);
}
static void eni_vdpa_set_config_cb(struct vdpa_device *vdpa,
struct vdpa_callback *cb)
{
struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
eni_vdpa->config_cb = *cb;
}
static const struct vdpa_config_ops eni_vdpa_ops = {
.get_features = eni_vdpa_get_features,
.set_features = eni_vdpa_set_features,
.get_status = eni_vdpa_get_status,
.set_status = eni_vdpa_set_status,
.reset = eni_vdpa_reset,
.get_vq_num_max = eni_vdpa_get_vq_num_max,
.get_vq_num_min = eni_vdpa_get_vq_num_min,
.get_vq_state = eni_vdpa_get_vq_state,
.set_vq_state = eni_vdpa_set_vq_state,
.set_vq_cb = eni_vdpa_set_vq_cb,
.set_vq_ready = eni_vdpa_set_vq_ready,
.get_vq_ready = eni_vdpa_get_vq_ready,
.set_vq_num = eni_vdpa_set_vq_num,
.set_vq_address = eni_vdpa_set_vq_address,
.kick_vq = eni_vdpa_kick_vq,
.get_device_id = eni_vdpa_get_device_id,
.get_vendor_id = eni_vdpa_get_vendor_id,
.get_vq_align = eni_vdpa_get_vq_align,
.get_config_size = eni_vdpa_get_config_size,
.get_config = eni_vdpa_get_config,
.set_config = eni_vdpa_set_config,
.set_config_cb = eni_vdpa_set_config_cb,
.get_vq_irq = eni_vdpa_get_vq_irq,
};
static u16 eni_vdpa_get_num_queues(struct eni_vdpa *eni_vdpa)
{
struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
u32 features = vp_legacy_get_features(ldev);
u16 num = 2;
if (features & BIT_ULL(VIRTIO_NET_F_MQ)) {
__virtio16 max_virtqueue_pairs;
eni_vdpa_get_config(&eni_vdpa->vdpa,
offsetof(struct virtio_net_config, max_virtqueue_pairs),
&max_virtqueue_pairs,
sizeof(max_virtqueue_pairs));
num = 2 * __virtio16_to_cpu(virtio_legacy_is_little_endian(),
max_virtqueue_pairs);
}
if (features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
num += 1;
return num;
}
static void eni_vdpa_free_irq_vectors(void *data)
{
pci_free_irq_vectors(data);
}
static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
struct eni_vdpa *eni_vdpa;
struct virtio_pci_legacy_device *ldev;
int ret, i;
ret = pcim_enable_device(pdev);
if (ret)
return ret;
eni_vdpa = vdpa_alloc_device(struct eni_vdpa, vdpa,
dev, &eni_vdpa_ops, NULL, false);
if (IS_ERR(eni_vdpa)) {
ENI_ERR(pdev, "failed to allocate vDPA structure\n");
return PTR_ERR(eni_vdpa);
}
ldev = &eni_vdpa->ldev;
ldev->pci_dev = pdev;
ret = vp_legacy_probe(ldev);
if (ret) {
ENI_ERR(pdev, "failed to probe legacy PCI device\n");
goto err;
}
pci_set_master(pdev);
pci_set_drvdata(pdev, eni_vdpa);
eni_vdpa->vdpa.dma_dev = &pdev->dev;
eni_vdpa->queues = eni_vdpa_get_num_queues(eni_vdpa);
ret = devm_add_action_or_reset(dev, eni_vdpa_free_irq_vectors, pdev);
if (ret) {
ENI_ERR(pdev,
"failed for adding devres for freeing irq vectors\n");
goto err;
}
eni_vdpa->vring = devm_kcalloc(&pdev->dev, eni_vdpa->queues,
sizeof(*eni_vdpa->vring),
GFP_KERNEL);
if (!eni_vdpa->vring) {
ret = -ENOMEM;
ENI_ERR(pdev, "failed to allocate virtqueues\n");
goto err;
}
for (i = 0; i < eni_vdpa->queues; i++) {
eni_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR;
eni_vdpa->vring[i].notify = ldev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;
}
eni_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR;
ret = vdpa_register_device(&eni_vdpa->vdpa, eni_vdpa->queues);
if (ret) {
ENI_ERR(pdev, "failed to register to vdpa bus\n");
goto err;
}
return 0;
err:
put_device(&eni_vdpa->vdpa.dev);
return ret;
}
static void eni_vdpa_remove(struct pci_dev *pdev)
{
struct eni_vdpa *eni_vdpa = pci_get_drvdata(pdev);
vdpa_unregister_device(&eni_vdpa->vdpa);
vp_legacy_remove(&eni_vdpa->ldev);
}
static struct pci_device_id eni_pci_ids[] = {
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET,
VIRTIO_TRANS_ID_NET,
PCI_SUBVENDOR_ID_REDHAT_QUMRANET,
VIRTIO_ID_NET) },
{ 0 },
};
static struct pci_driver eni_vdpa_driver = {
.name = "alibaba-eni-vdpa",
.id_table = eni_pci_ids,
.probe = eni_vdpa_probe,
.remove = eni_vdpa_remove,
};
module_pci_driver(eni_vdpa_driver);
MODULE_AUTHOR("Wu Zongyong <wuzongyong@linux.alibaba.com>");
MODULE_DESCRIPTION("Alibaba ENI vDPA driver");
MODULE_LICENSE("GPL v2");

View File

@ -499,7 +499,8 @@ static u32 get_dev_type(struct pci_dev *pdev)
return dev_type;
}
static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
const struct vdpa_dev_set_config *config)
{
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
struct ifcvf_adapter *adapter;

View File

@ -63,7 +63,7 @@ struct mlx5_control_vq {
unsigned short head;
};
struct mlx5_ctrl_wq_ent {
struct mlx5_vdpa_wq_ent {
struct work_struct work;
struct mlx5_vdpa_dev *mvdev;
};

View File

@ -6,6 +6,7 @@
#include <linux/vringh.h>
#include <uapi/linux/virtio_net.h>
#include <uapi/linux/virtio_ids.h>
#include <uapi/linux/vdpa.h>
#include <linux/virtio_config.h>
#include <linux/auxiliary_bus.h>
#include <linux/mlx5/cq.h>
@ -157,10 +158,12 @@ struct mlx5_vdpa_net {
struct mutex reslock;
struct mlx5_flow_table *rxft;
struct mlx5_fc *rx_counter;
struct mlx5_flow_handle *rx_rule;
struct mlx5_flow_handle *rx_rule_ucast;
struct mlx5_flow_handle *rx_rule_mcast;
bool setup;
u16 mtu;
u32 cur_num_vqs;
struct notifier_block nb;
struct vdpa_callback config_cb;
};
static void free_resources(struct mlx5_vdpa_net *ndev);
@ -1381,21 +1384,33 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_namespace *ns;
struct mlx5_flow_spec *spec;
void *headers_c;
void *headers_v;
u8 *dmac_c;
u8 *dmac_v;
int err;
/* for now, one entry, match all, forward to tir */
ft_attr.max_fte = 1;
ft_attr.autogroup.max_num_groups = 1;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
return -ENOMEM;
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
ft_attr.max_fte = 2;
ft_attr.autogroup.max_num_groups = 2;
ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
if (!ns) {
mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
return -EOPNOTSUPP;
mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
err = -EOPNOTSUPP;
goto err_ns;
}
ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ndev->rxft))
return PTR_ERR(ndev->rxft);
if (IS_ERR(ndev->rxft)) {
err = PTR_ERR(ndev->rxft);
goto err_ns;
}
ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
if (IS_ERR(ndev->rx_counter)) {
@ -1403,37 +1418,64 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
goto err_fc;
}
headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
memset(dmac_c, 0xff, ETH_ALEN);
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
ether_addr_copy(dmac_v, ndev->config.mac);
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
dest[0].tir_num = ndev->res.tirn;
dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
if (IS_ERR(ndev->rx_rule)) {
err = PTR_ERR(ndev->rx_rule);
ndev->rx_rule = NULL;
goto err_rule;
ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2);
if (IS_ERR(ndev->rx_rule_ucast)) {
err = PTR_ERR(ndev->rx_rule_ucast);
ndev->rx_rule_ucast = NULL;
goto err_rule_ucast;
}
memset(dmac_c, 0, ETH_ALEN);
memset(dmac_v, 0, ETH_ALEN);
dmac_c[0] = 1;
dmac_v[0] = 1;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1);
if (IS_ERR(ndev->rx_rule_mcast)) {
err = PTR_ERR(ndev->rx_rule_mcast);
ndev->rx_rule_mcast = NULL;
goto err_rule_mcast;
}
kvfree(spec);
return 0;
err_rule:
err_rule_mcast:
mlx5_del_flow_rules(ndev->rx_rule_ucast);
ndev->rx_rule_ucast = NULL;
err_rule_ucast:
mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
err_fc:
mlx5_destroy_flow_table(ndev->rxft);
err_ns:
kvfree(spec);
return err;
}
static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
{
if (!ndev->rx_rule)
if (!ndev->rx_rule_ucast)
return;
mlx5_del_flow_rules(ndev->rx_rule);
mlx5_del_flow_rules(ndev->rx_rule_mcast);
ndev->rx_rule_mcast = NULL;
mlx5_del_flow_rules(ndev->rx_rule_ucast);
ndev->rx_rule_ucast = NULL;
mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
mlx5_destroy_flow_table(ndev->rxft);
ndev->rx_rule = NULL;
}
static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
@ -1557,14 +1599,14 @@ static void mlx5_cvq_kick_handler(struct work_struct *work)
{
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
struct virtio_net_ctrl_hdr ctrl;
struct mlx5_ctrl_wq_ent *wqent;
struct mlx5_vdpa_wq_ent *wqent;
struct mlx5_vdpa_dev *mvdev;
struct mlx5_control_vq *cvq;
struct mlx5_vdpa_net *ndev;
size_t read, write;
int err;
wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
mvdev = wqent->mvdev;
ndev = to_mlx5_vdpa_ndev(mvdev);
cvq = &mvdev->cvq;
@ -1616,7 +1658,7 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq;
struct mlx5_ctrl_wq_ent *wqent;
struct mlx5_vdpa_wq_ent *wqent;
if (!is_index_valid(mvdev, idx))
return;
@ -1852,6 +1894,7 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
print_features(mvdev, ndev->mvdev.mlx_features, false);
return ndev->mvdev.mlx_features;
@ -1942,16 +1985,16 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
return err;
ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
update_cvq_info(mvdev);
return err;
}
static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
{
/* not implemented */
mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
ndev->config_cb = *cb;
}
#define MLX5_VDPA_MAX_VQ_ENTRIES 256
@ -2192,7 +2235,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
clear_vqs_ready(ndev);
mlx5_vdpa_destroy_mr(&ndev->mvdev);
ndev->mvdev.status = 0;
ndev->mvdev.mlx_features = 0;
memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
ndev->mvdev.actual_features = 0;
++mvdev->generation;
@ -2404,7 +2446,84 @@ struct mlx5_vdpa_mgmtdev {
struct mlx5_vdpa_net *ndev;
};
static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
{
u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
int err;
MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
MLX5_SET(query_vport_state_in, in, op_mod, opmod);
MLX5_SET(query_vport_state_in, in, vport_number, vport);
if (vport)
MLX5_SET(query_vport_state_in, in, other_vport, 1);
err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
if (err)
return 0;
return MLX5_GET(query_vport_state_out, out, state);
}
static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
{
if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
VPORT_STATE_UP)
return true;
return false;
}
static void update_carrier(struct work_struct *work)
{
struct mlx5_vdpa_wq_ent *wqent;
struct mlx5_vdpa_dev *mvdev;
struct mlx5_vdpa_net *ndev;
wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
mvdev = wqent->mvdev;
ndev = to_mlx5_vdpa_ndev(mvdev);
if (get_link_state(mvdev))
ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
else
ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
if (ndev->config_cb.callback)
ndev->config_cb.callback(ndev->config_cb.private);
kfree(wqent);
}
static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
{
struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
struct mlx5_eqe *eqe = param;
int ret = NOTIFY_DONE;
struct mlx5_vdpa_wq_ent *wqent;
if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
if (!wqent)
return NOTIFY_DONE;
wqent->mvdev = &ndev->mvdev;
INIT_WORK(&wqent->work, update_carrier);
queue_work(ndev->mvdev.wq, &wqent->work);
ret = NOTIFY_OK;
break;
default:
return NOTIFY_DONE;
}
return ret;
}
return ret;
}
static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
const struct vdpa_dev_set_config *add_config)
{
struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
struct virtio_net_config *config;
@ -2413,6 +2532,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
struct mlx5_vdpa_net *ndev;
struct mlx5_core_dev *mdev;
u32 max_vqs;
u16 mtu;
int err;
if (mgtdev->ndev)
@ -2440,13 +2560,24 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
init_mvqs(ndev);
mutex_init(&ndev->reslock);
config = &ndev->config;
err = query_mtu(mdev, &ndev->mtu);
err = query_mtu(mdev, &mtu);
if (err)
goto err_mtu;
err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
if (err)
goto err_mtu;
ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
if (get_link_state(mvdev))
ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
else
ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
} else {
err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
if (err)
goto err_mtu;
}
if (!is_zero_ether_addr(config->mac)) {
pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
@ -2473,12 +2604,14 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
if (err)
goto err_mr;
mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
if (!mvdev->wq) {
err = -ENOMEM;
goto err_res2;
}
ndev->nb.notifier_call = event_handler;
mlx5_notifier_register(mdev, &ndev->nb);
ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
@ -2509,7 +2642,9 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
{
struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
destroy_workqueue(mvdev->wq);
_vdpa_unregister_device(dev);
mgtdev->ndev = NULL;
@ -2541,6 +2676,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
mgtdev->mgtdev.ops = &mdev_ops;
mgtdev->mgtdev.device = mdev->device;
mgtdev->mgtdev.id_table = id_table;
mgtdev->mgtdev.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR);
mgtdev->madev = madev;
err = vdpa_mgmtdev_register(&mgtdev->mgtdev);

View File

@ -14,6 +14,7 @@
#include <uapi/linux/vdpa.h>
#include <net/genetlink.h>
#include <linux/mod_devicetable.h>
#include <linux/virtio_ids.h>
static LIST_HEAD(mdev_head);
/* A global mutex that protects vdpa management device and device level operations. */
@ -26,8 +27,16 @@ static int vdpa_dev_probe(struct device *d)
{
struct vdpa_device *vdev = dev_to_vdpa(d);
struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
const struct vdpa_config_ops *ops = vdev->config;
u32 max_num, min_num = 1;
int ret = 0;
max_num = ops->get_vq_num_max(vdev);
if (ops->get_vq_num_min)
min_num = ops->get_vq_num_min(vdev);
if (max_num < min_num)
return -EINVAL;
if (drv && drv->probe)
ret = drv->probe(vdev);
@ -58,6 +67,7 @@ static void vdpa_release_dev(struct device *d)
ops->free(vdev);
ida_simple_remove(&vdpa_index_ida, vdev->index);
mutex_destroy(&vdev->cf_mutex);
kfree(vdev);
}
@ -119,6 +129,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
if (err)
goto err_name;
mutex_init(&vdev->cf_mutex);
device_initialize(&vdev->dev);
return vdev;
@ -289,6 +300,46 @@ void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev)
}
EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister);
/**
* vdpa_get_config - Get one or more device configuration fields.
* @vdev: vdpa device to operate on
* @offset: starting byte offset of the field
* @buf: buffer pointer to read to
* @len: length of the configuration fields in bytes
*/
void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len)
{
const struct vdpa_config_ops *ops = vdev->config;
mutex_lock(&vdev->cf_mutex);
/*
* Config accesses aren't supposed to trigger before features are set.
* If it does happen we assume a legacy guest.
*/
if (!vdev->features_valid)
vdpa_set_features(vdev, 0);
ops->get_config(vdev, offset, buf, len);
mutex_unlock(&vdev->cf_mutex);
}
EXPORT_SYMBOL_GPL(vdpa_get_config);
/**
* vdpa_set_config - Set one or more device configuration fields.
* @vdev: vdpa device to operate on
* @offset: starting byte offset of the field
* @buf: buffer pointer to read from
* @length: length of the configuration fields in bytes
*/
void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset,
const void *buf, unsigned int length)
{
mutex_lock(&vdev->cf_mutex);
vdev->config->set_config(vdev, offset, buf, length);
mutex_unlock(&vdev->cf_mutex);
}
EXPORT_SYMBOL_GPL(vdpa_set_config);
static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev,
const char *busname, const char *devname)
{
@ -428,9 +479,15 @@ vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
return msg->len;
}
#define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \
(1 << VDPA_ATTR_DEV_NET_CFG_MTU))
static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct vdpa_dev_set_config config = {};
struct nlattr **nl_attrs = info->attrs;
struct vdpa_mgmt_dev *mdev;
const u8 *macaddr;
const char *name;
int err = 0;
@ -439,6 +496,26 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) {
macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]);
memcpy(config.net.mac, macaddr, sizeof(config.net.mac));
config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR);
}
if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) {
config.net.mtu =
nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]);
config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU);
}
/* Skip checking capability if user didn't prefer to configure any
* device networking attributes. It is likely that user might have used
* a device specific method to configure such attributes or using device
* default attributes.
*/
if ((config.mask & VDPA_DEV_NET_ATTRS_MASK) &&
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
mutex_lock(&vdpa_dev_mutex);
mdev = vdpa_mgmtdev_get_from_attr(info->attrs);
if (IS_ERR(mdev)) {
@ -446,8 +523,14 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
err = PTR_ERR(mdev);
goto err;
}
if ((config.mask & mdev->config_attr_mask) != config.mask) {
NL_SET_ERR_MSG_MOD(info->extack,
"All provided attributes are not supported");
err = -EOPNOTSUPP;
goto err;
}
err = mdev->ops->dev_add(mdev, name);
err = mdev->ops->dev_add(mdev, name, &config);
err:
mutex_unlock(&vdpa_dev_mutex);
return err;
@ -492,6 +575,7 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq
int flags, struct netlink_ext_ack *extack)
{
u16 max_vq_size;
u16 min_vq_size = 1;
u32 device_id;
u32 vendor_id;
void *hdr;
@ -508,6 +592,8 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq
device_id = vdev->config->get_device_id(vdev);
vendor_id = vdev->config->get_vendor_id(vdev);
max_vq_size = vdev->config->get_vq_num_max(vdev);
if (vdev->config->get_vq_num_min)
min_vq_size = vdev->config->get_vq_num_min(vdev);
err = -EMSGSIZE;
if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev)))
@ -520,6 +606,8 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq
goto msg_err;
if (nla_put_u16(msg, VDPA_ATTR_DEV_MAX_VQ_SIZE, max_vq_size))
goto msg_err;
if (nla_put_u16(msg, VDPA_ATTR_DEV_MIN_VQ_SIZE, min_vq_size))
goto msg_err;
genlmsg_end(msg, hdr);
return 0;
@ -612,10 +700,175 @@ static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callba
return msg->len;
}
static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev,
struct sk_buff *msg, u64 features,
const struct virtio_net_config *config)
{
u16 val_u16;
if ((features & (1ULL << VIRTIO_NET_F_MQ)) == 0)
return 0;
val_u16 = le16_to_cpu(config->max_virtqueue_pairs);
return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16);
}
static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg)
{
struct virtio_net_config config = {};
u64 features;
u16 val_u16;
vdpa_get_config(vdev, 0, &config, sizeof(config));
if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac),
config.mac))
return -EMSGSIZE;
val_u16 = le16_to_cpu(config.status);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16))
return -EMSGSIZE;
val_u16 = le16_to_cpu(config.mtu);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
return -EMSGSIZE;
features = vdev->config->get_features(vdev);
return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config);
}
static int
vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq,
int flags, struct netlink_ext_ack *extack)
{
u32 device_id;
void *hdr;
int err;
hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
VDPA_CMD_DEV_CONFIG_GET);
if (!hdr)
return -EMSGSIZE;
if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) {
err = -EMSGSIZE;
goto msg_err;
}
device_id = vdev->config->get_device_id(vdev);
if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
err = -EMSGSIZE;
goto msg_err;
}
switch (device_id) {
case VIRTIO_ID_NET:
err = vdpa_dev_net_config_fill(vdev, msg);
break;
default:
err = -EOPNOTSUPP;
break;
}
if (err)
goto msg_err;
genlmsg_end(msg, hdr);
return 0;
msg_err:
genlmsg_cancel(msg, hdr);
return err;
}
static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct vdpa_device *vdev;
struct sk_buff *msg;
const char *devname;
struct device *dev;
int err;
if (!info->attrs[VDPA_ATTR_DEV_NAME])
return -EINVAL;
devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
mutex_lock(&vdpa_dev_mutex);
dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
if (!dev) {
NL_SET_ERR_MSG_MOD(info->extack, "device not found");
err = -ENODEV;
goto dev_err;
}
vdev = container_of(dev, struct vdpa_device, dev);
if (!vdev->mdev) {
NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device");
err = -EINVAL;
goto mdev_err;
}
err = vdpa_dev_config_fill(vdev, msg, info->snd_portid, info->snd_seq,
0, info->extack);
if (!err)
err = genlmsg_reply(msg, info);
mdev_err:
put_device(dev);
dev_err:
mutex_unlock(&vdpa_dev_mutex);
if (err)
nlmsg_free(msg);
return err;
}
static int vdpa_dev_config_dump(struct device *dev, void *data)
{
struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
struct vdpa_dev_dump_info *info = data;
int err;
if (!vdev->mdev)
return 0;
if (info->idx < info->start_idx) {
info->idx++;
return 0;
}
err = vdpa_dev_config_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid,
info->cb->nlh->nlmsg_seq, NLM_F_MULTI,
info->cb->extack);
if (err)
return err;
info->idx++;
return 0;
}
static int
vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
{
struct vdpa_dev_dump_info info;
info.msg = msg;
info.cb = cb;
info.start_idx = cb->args[0];
info.idx = 0;
mutex_lock(&vdpa_dev_mutex);
bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump);
mutex_unlock(&vdpa_dev_mutex);
cb->args[0] = info.idx;
return msg->len;
}
static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
[VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
[VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
[VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
};
static const struct genl_ops vdpa_nl_ops[] = {
@ -643,6 +896,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
.doit = vdpa_nl_cmd_dev_get_doit,
.dumpit = vdpa_nl_cmd_dev_get_dumpit,
},
{
.cmd = VDPA_CMD_DEV_CONFIG_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_dev_config_get_doit,
.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
},
};
static struct genl_family vdpa_nl_family __ro_after_init = {

View File

@ -248,7 +248,8 @@ static struct device vdpasim_blk_mgmtdev = {
.release = vdpasim_blk_mgmtdev_release,
};
static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
const struct vdpa_dev_set_config *config)
{
struct vdpasim_dev_attr dev_attr = {};
struct vdpasim *simdev;

View File

@ -16,6 +16,7 @@
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <uapi/linux/virtio_net.h>
#include <uapi/linux/vdpa.h>
#include "vdpa_sim.h"
@ -29,12 +30,6 @@
#define VDPASIM_NET_VQ_NUM 2
static char *macaddr;
module_param(macaddr, charp, 0);
MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
static u8 macaddr_buf[ETH_ALEN];
static void vdpasim_net_work(struct work_struct *work)
{
struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
@ -112,9 +107,21 @@ static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
{
struct virtio_net_config *net_config = config;
net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
memcpy(net_config->mac, macaddr_buf, ETH_ALEN);
}
static void vdpasim_net_setup_config(struct vdpasim *vdpasim,
const struct vdpa_dev_set_config *config)
{
struct virtio_net_config *vio_config = vdpasim->config;
if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR))
memcpy(vio_config->mac, config->net.mac, ETH_ALEN);
if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU))
vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu);
else
/* Setup default MTU to be 1500 */
vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
}
static void vdpasim_net_mgmtdev_release(struct device *dev)
@ -126,7 +133,8 @@ static struct device vdpasim_net_mgmtdev = {
.release = vdpasim_net_mgmtdev_release,
};
static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
const struct vdpa_dev_set_config *config)
{
struct vdpasim_dev_attr dev_attr = {};
struct vdpasim *simdev;
@ -146,6 +154,8 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
if (IS_ERR(simdev))
return PTR_ERR(simdev);
vdpasim_net_setup_config(simdev, config);
ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM);
if (ret)
goto reg_err;
@ -179,20 +189,14 @@ static struct vdpa_mgmt_dev mgmt_dev = {
.device = &vdpasim_net_mgmtdev,
.id_table = id_table,
.ops = &vdpasim_net_mgmtdev_ops,
.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR |
1 << VDPA_ATTR_DEV_NET_CFG_MTU),
};
static int __init vdpasim_net_init(void)
{
int ret;
if (macaddr) {
mac_pton(macaddr, macaddr_buf);
if (!is_valid_ether_addr(macaddr_buf))
return -EADDRNOTAVAIL;
} else {
eth_random_addr(macaddr_buf);
}
ret = device_register(&vdpasim_net_mgmtdev);
if (ret)
return ret;

View File

@ -1503,7 +1503,8 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
return 0;
}
static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
const struct vdpa_dev_set_config *config)
{
struct vduse_dev *dev;
int ret;

View File

@ -76,6 +76,17 @@ static u8 vp_vdpa_get_status(struct vdpa_device *vdpa)
return vp_modern_get_status(mdev);
}
static int vp_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
int irq = vp_vdpa->vring[idx].irq;
if (irq == VIRTIO_MSI_NO_VECTOR)
return -EINVAL;
return irq;
}
static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa)
{
struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
@ -427,6 +438,7 @@ static const struct vdpa_config_ops vp_vdpa_ops = {
.get_config = vp_vdpa_get_config,
.set_config = vp_vdpa_set_config,
.set_config_cb = vp_vdpa_set_config_cb,
.get_vq_irq = vp_vdpa_get_vq_irq,
};
static void vp_vdpa_free_irq_vectors(void *data)

View File

@ -237,7 +237,6 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
struct vhost_vdpa_config __user *c)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_vdpa_config config;
unsigned long size = offsetof(struct vhost_vdpa_config, buf);
u8 *buf;
@ -251,7 +250,7 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
if (IS_ERR(buf))
return PTR_ERR(buf);
ops->set_config(vdpa, config.off, buf, config.len);
vdpa_set_config(vdpa, config.off, buf, config.len);
kvfree(buf);
return 0;

View File

@ -20,6 +20,15 @@ config VIRTIO_PCI_LIB
PCI device with possible vendor specific extensions. Any
module that selects this module must depend on PCI.
config VIRTIO_PCI_LIB_LEGACY
tristate
help
Legacy PCI device (Virtio PCI Card 0.9.x Draft and older device)
implementation.
This module implements the basic probe and control for devices
which are based on legacy PCI device. Any module that selects this
module must depend on PCI.
menuconfig VIRTIO_MENU
bool "Virtio drivers"
default y
@ -43,6 +52,7 @@ config VIRTIO_PCI_LEGACY
bool "Support for legacy virtio draft 0.9.X and older devices"
default y
depends on VIRTIO_PCI
select VIRTIO_PCI_LIB_LEGACY
help
Virtio PCI Card 0.9.X Draft (circa 2014) and older device support.

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio_pci_modern_dev.o
obj-$(CONFIG_VIRTIO_PCI_LIB_LEGACY) += virtio_pci_legacy_dev.o
obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o

View File

@ -24,17 +24,46 @@ MODULE_PARM_DESC(force_legacy,
"Force legacy mode for transitional virtio 1 devices");
#endif
/* wait for pending irq handlers */
void vp_synchronize_vectors(struct virtio_device *vdev)
/* disable irq handlers */
void vp_disable_cbs(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i;
if (vp_dev->intx_enabled)
if (vp_dev->intx_enabled) {
/*
* The below synchronize() guarantees that any
* interrupt for this line arriving after
* synchronize_irq() has completed is guaranteed to see
* intx_soft_enabled == false.
*/
WRITE_ONCE(vp_dev->intx_soft_enabled, false);
synchronize_irq(vp_dev->pci_dev->irq);
}
for (i = 0; i < vp_dev->msix_vectors; ++i)
synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
disable_irq(pci_irq_vector(vp_dev->pci_dev, i));
}
/* enable irq handlers */
void vp_enable_cbs(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i;
if (vp_dev->intx_enabled) {
disable_irq(vp_dev->pci_dev->irq);
/*
* The above disable_irq() provides TSO ordering and
* as such promotes the below store to store-release.
*/
WRITE_ONCE(vp_dev->intx_soft_enabled, true);
enable_irq(vp_dev->pci_dev->irq);
return;
}
for (i = 0; i < vp_dev->msix_vectors; ++i)
enable_irq(pci_irq_vector(vp_dev->pci_dev, i));
}
/* the notify function used when creating a virt queue */
@ -84,6 +113,9 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
struct virtio_pci_device *vp_dev = opaque;
u8 isr;
if (!READ_ONCE(vp_dev->intx_soft_enabled))
return IRQ_NONE;
/* reading the ISR has the effect of also clearing it so it's very
* important to save off the value. */
isr = ioread8(vp_dev->isr);
@ -141,7 +173,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
"%s-config", name);
err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
vp_config_changed, 0, vp_dev->msix_names[v],
vp_config_changed, IRQF_NO_AUTOEN,
vp_dev->msix_names[v],
vp_dev);
if (err)
goto error;
@ -160,7 +193,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
"%s-virtqueues", name);
err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
vp_vring_interrupt, 0, vp_dev->msix_names[v],
vp_vring_interrupt, IRQF_NO_AUTOEN,
vp_dev->msix_names[v],
vp_dev);
if (err)
goto error;
@ -337,7 +371,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
"%s-%s",
dev_name(&vp_dev->vdev.dev), names[i]);
err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
vring_interrupt, 0,
vring_interrupt, IRQF_NO_AUTOEN,
vp_dev->msix_names[msix_vec],
vqs[i]);
if (err)
@ -549,6 +583,8 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
pci_set_master(pci_dev);
vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false;
rc = register_virtio_device(&vp_dev->vdev);
reg_dev = vp_dev;
if (rc)
@ -557,10 +593,10 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
return 0;
err_register:
if (vp_dev->ioaddr)
virtio_pci_legacy_remove(vp_dev);
if (vp_dev->is_legacy)
virtio_pci_legacy_remove(vp_dev);
else
virtio_pci_modern_remove(vp_dev);
virtio_pci_modern_remove(vp_dev);
err_probe:
pci_disable_device(pci_dev);
err_enable_device:
@ -587,7 +623,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
unregister_virtio_device(&vp_dev->vdev);
if (vp_dev->ioaddr)
if (vp_dev->is_legacy)
virtio_pci_legacy_remove(vp_dev);
else
virtio_pci_modern_remove(vp_dev);

View File

@ -25,6 +25,7 @@
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_pci.h>
#include <linux/virtio_pci_legacy.h>
#include <linux/virtio_pci_modern.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
@ -44,16 +45,14 @@ struct virtio_pci_vq_info {
struct virtio_pci_device {
struct virtio_device vdev;
struct pci_dev *pci_dev;
struct virtio_pci_legacy_device ldev;
struct virtio_pci_modern_device mdev;
/* In legacy mode, these two point to within ->legacy. */
bool is_legacy;
/* Where to read and clear interrupt */
u8 __iomem *isr;
/* Legacy only field */
/* the IO mapping for the PCI config space */
void __iomem *ioaddr;
/* a list of queues so we can dispatch IRQs */
spinlock_t lock;
struct list_head virtqueues;
@ -64,6 +63,7 @@ struct virtio_pci_device {
/* MSI-X support */
int msix_enabled;
int intx_enabled;
bool intx_soft_enabled;
cpumask_var_t *msix_affinity_masks;
/* Name strings for interrupts. This size should be enough,
* and I'm too lazy to allocate each name separately. */
@ -102,8 +102,10 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
return container_of(vdev, struct virtio_pci_device, vdev);
}
/* wait for pending irq handlers */
void vp_synchronize_vectors(struct virtio_device *vdev);
/* disable irq handlers */
void vp_disable_cbs(struct virtio_device *vdev);
/* enable irq handlers */
void vp_enable_cbs(struct virtio_device *vdev);
/* the notify function used when creating a virt queue */
bool vp_notify(struct virtqueue *vq);
/* the config->del_vqs() implementation */

View File

@ -14,6 +14,7 @@
* Michael S. Tsirkin <mst@redhat.com>
*/
#include "linux/virtio_pci_legacy.h"
#include "virtio_pci_common.h"
/* virtio config->get_features() implementation */
@ -23,7 +24,7 @@ static u64 vp_get_features(struct virtio_device *vdev)
/* When someone needs more than 32 feature bits, we'll need to
* steal a bit to indicate that the rest are somewhere else. */
return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
return vp_legacy_get_features(&vp_dev->ldev);
}
/* virtio config->finalize_features() implementation */
@ -38,7 +39,7 @@ static int vp_finalize_features(struct virtio_device *vdev)
BUG_ON((u32)vdev->features != vdev->features);
/* We only support 32 feature bits. */
iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
vp_legacy_set_features(&vp_dev->ldev, vdev->features);
return 0;
}
@ -48,7 +49,7 @@ static void vp_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
void __iomem *ioaddr = vp_dev->ioaddr +
void __iomem *ioaddr = vp_dev->ldev.ioaddr +
VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) +
offset;
u8 *ptr = buf;
@ -64,7 +65,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
void __iomem *ioaddr = vp_dev->ioaddr +
void __iomem *ioaddr = vp_dev->ldev.ioaddr +
VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) +
offset;
const u8 *ptr = buf;
@ -78,7 +79,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
static u8 vp_get_status(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
return vp_legacy_get_status(&vp_dev->ldev);
}
static void vp_set_status(struct virtio_device *vdev, u8 status)
@ -86,28 +87,24 @@ static void vp_set_status(struct virtio_device *vdev, u8 status)
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* We should never be setting status to 0. */
BUG_ON(status == 0);
iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
vp_legacy_set_status(&vp_dev->ldev, status);
}
static void vp_reset(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* 0 status means a reset. */
iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
vp_legacy_set_status(&vp_dev->ldev, 0);
/* Flush out the status write, and flush in device writes,
* including MSi-X interrupts, if any. */
ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
/* Flush pending VQ/configuration callbacks. */
vp_synchronize_vectors(vdev);
vp_legacy_get_status(&vp_dev->ldev);
/* Disable VQ/configuration callbacks. */
vp_disable_cbs(vdev);
}
static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
{
/* Setup the vector used for configuration events */
iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
/* Verify we had enough resources to assign the vector */
/* Will also flush the write out to device */
return ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
return vp_legacy_config_vector(&vp_dev->ldev, vector);
}
static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
@ -123,12 +120,9 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
int err;
u64 q_pfn;
/* Select the queue we're interested in */
iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
/* Check if queue is either not available or already active. */
num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
num = vp_legacy_get_queue_size(&vp_dev->ldev, index);
if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index))
return ERR_PTR(-ENOENT);
info->msix_vector = msix_vec;
@ -151,13 +145,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
}
/* activate the queue */
iowrite32(q_pfn, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
vp_legacy_set_queue_address(&vp_dev->ldev, index, q_pfn);
vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;
vq->priv = (void __force *)vp_dev->ldev.ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
msix_vec = vp_legacy_queue_vector(&vp_dev->ldev, index, msix_vec);
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto out_deactivate;
@ -167,7 +160,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
return vq;
out_deactivate:
iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
vp_legacy_set_queue_address(&vp_dev->ldev, index, 0);
out_del_vq:
vring_del_virtqueue(vq);
return ERR_PTR(err);
@ -178,22 +171,21 @@ static void del_vq(struct virtio_pci_vq_info *info)
struct virtqueue *vq = info->vq;
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
if (vp_dev->msix_enabled) {
iowrite16(VIRTIO_MSI_NO_VECTOR,
vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
vp_legacy_queue_vector(&vp_dev->ldev, vq->index,
VIRTIO_MSI_NO_VECTOR);
/* Flush the write out to device */
ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
ioread8(vp_dev->ldev.ioaddr + VIRTIO_PCI_ISR);
}
/* Select and deactivate the queue */
iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
vp_legacy_set_queue_address(&vp_dev->ldev, vq->index, 0);
vring_del_virtqueue(vq);
}
static const struct virtio_config_ops virtio_pci_config_ops = {
.enable_cbs = vp_enable_cbs,
.get = vp_get,
.set = vp_set,
.get_status = vp_get_status,
@ -211,51 +203,18 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
/* the PCI probing function */
int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
{
struct virtio_pci_legacy_device *ldev = &vp_dev->ldev;
struct pci_dev *pci_dev = vp_dev->pci_dev;
int rc;
/* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
return -ENODEV;
ldev->pci_dev = pci_dev;
if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
return -ENODEV;
}
rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64));
if (rc) {
rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
} else {
/*
* The virtio ring base address is expressed as a 32-bit PFN,
* with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT.
*/
dma_set_coherent_mask(&pci_dev->dev,
DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT));
}
if (rc)
dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy");
rc = vp_legacy_probe(ldev);
if (rc)
return rc;
rc = -ENOMEM;
vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
if (!vp_dev->ioaddr)
goto err_iomap;
vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR;
/* we use the subsystem vendor/device id as the virtio vendor/device
* id. this allows us to use the same PCI vendor/device id for all
* virtio devices and to identify the particular virtio driver by
* the subsystem ids */
vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
vp_dev->vdev.id.device = pci_dev->subsystem_device;
vp_dev->isr = ldev->isr;
vp_dev->vdev.id = ldev->id;
vp_dev->vdev.config = &virtio_pci_config_ops;
@ -264,16 +223,11 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
vp_dev->del_vq = del_vq;
return 0;
err_iomap:
pci_release_region(pci_dev, 0);
return rc;
}
void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev)
{
struct pci_dev *pci_dev = vp_dev->pci_dev;
struct virtio_pci_legacy_device *ldev = &vp_dev->ldev;
pci_iounmap(pci_dev, vp_dev->ioaddr);
pci_release_region(pci_dev, 0);
vp_legacy_remove(ldev);
}

View File

@ -0,0 +1,220 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "linux/virtio_pci.h"
#include <linux/virtio_pci_legacy.h>
#include <linux/module.h>
#include <linux/pci.h>
/*
* vp_legacy_probe: probe the legacy virtio pci device, note that the
* caller is required to enable PCI device before calling this function.
* @ldev: the legacy virtio-pci device
*
* Return 0 on succeed otherwise fail
*/
int vp_legacy_probe(struct virtio_pci_legacy_device *ldev)
{
struct pci_dev *pci_dev = ldev->pci_dev;
int rc;
/* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
return -ENODEV;
if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION)
return -ENODEV;
rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64));
if (rc) {
rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
} else {
/*
* The virtio ring base address is expressed as a 32-bit PFN,
* with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT.
*/
dma_set_coherent_mask(&pci_dev->dev,
DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT));
}
if (rc)
dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy");
if (rc)
return rc;
ldev->ioaddr = pci_iomap(pci_dev, 0, 0);
if (!ldev->ioaddr)
goto err_iomap;
ldev->isr = ldev->ioaddr + VIRTIO_PCI_ISR;
ldev->id.vendor = pci_dev->subsystem_vendor;
ldev->id.device = pci_dev->subsystem_device;
return 0;
err_iomap:
pci_release_region(pci_dev, 0);
return rc;
}
EXPORT_SYMBOL_GPL(vp_legacy_probe);
/*
* vp_legacy_probe: remove and cleanup the legacy virtio pci device
* @ldev: the legacy virtio-pci device
*/
void vp_legacy_remove(struct virtio_pci_legacy_device *ldev)
{
struct pci_dev *pci_dev = ldev->pci_dev;
pci_iounmap(pci_dev, ldev->ioaddr);
pci_release_region(pci_dev, 0);
}
EXPORT_SYMBOL_GPL(vp_legacy_remove);
/*
* vp_legacy_get_features - get features from device
* @ldev: the legacy virtio-pci device
*
* Returns the features read from the device
*/
u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev)
{
return ioread32(ldev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
}
EXPORT_SYMBOL_GPL(vp_legacy_get_features);
/*
* vp_legacy_get_driver_features - get driver features from device
* @ldev: the legacy virtio-pci device
*
* Returns the driver features read from the device
*/
u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev)
{
return ioread32(ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
}
EXPORT_SYMBOL_GPL(vp_legacy_get_driver_features);
/*
* vp_legacy_set_features - set features to device
* @ldev: the legacy virtio-pci device
* @features: the features set to device
*/
void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev,
u32 features)
{
iowrite32(features, ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
}
EXPORT_SYMBOL_GPL(vp_legacy_set_features);
/*
* vp_legacy_get_status - get the device status
* @ldev: the legacy virtio-pci device
*
* Returns the status read from device
*/
u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev)
{
return ioread8(ldev->ioaddr + VIRTIO_PCI_STATUS);
}
EXPORT_SYMBOL_GPL(vp_legacy_get_status);
/*
* vp_legacy_set_status - set status to device
* @ldev: the legacy virtio-pci device
* @status: the status set to device
*/
void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev,
u8 status)
{
iowrite8(status, ldev->ioaddr + VIRTIO_PCI_STATUS);
}
EXPORT_SYMBOL_GPL(vp_legacy_set_status);
/*
* vp_legacy_queue_vector - set the MSIX vector for a specific virtqueue
* @ldev: the legacy virtio-pci device
* @index: queue index
* @vector: the config vector
*
* Returns the config vector read from the device
*/
u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev,
u16 index, u16 vector)
{
iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
/* Flush the write out to device */
return ioread16(ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
}
EXPORT_SYMBOL_GPL(vp_legacy_queue_vector);
/*
* vp_legacy_config_vector - set the vector for config interrupt
* @ldev: the legacy virtio-pci device
* @vector: the config vector
*
* Returns the config vector read from the device
*/
u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev,
u16 vector)
{
/* Setup the vector used for configuration events */
iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
/* Verify we had enough resources to assign the vector */
/* Will also flush the write out to device */
return ioread16(ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
}
EXPORT_SYMBOL_GPL(vp_legacy_config_vector);
/*
* vp_legacy_set_queue_address - set the virtqueue address
* @ldev: the legacy virtio-pci device
* @index: the queue index
* @queue_pfn: pfn of the virtqueue
*/
void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev,
u16 index, u32 queue_pfn)
{
iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
iowrite32(queue_pfn, ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
}
EXPORT_SYMBOL_GPL(vp_legacy_set_queue_address);
/*
* vp_legacy_get_queue_enable - enable a virtqueue
* @ldev: the legacy virtio-pci device
* @index: the queue index
*
* Returns whether a virtqueue is enabled or not
*/
bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev,
u16 index)
{
iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
return ioread32(ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
}
EXPORT_SYMBOL_GPL(vp_legacy_get_queue_enable);
/*
* vp_legacy_get_queue_size - get size for a virtqueue
* @ldev: the legacy virtio-pci device
* @index: the queue index
*
* Returns the size of the virtqueue
*/
u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev,
u16 index)
{
iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
return ioread16(ldev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
}
EXPORT_SYMBOL_GPL(vp_legacy_get_queue_size);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Legacy Virtio PCI Device");
MODULE_AUTHOR("Wu Zongyong <wuzongyong@linux.alibaba.com>");
MODULE_LICENSE("GPL");

View File

@ -172,8 +172,8 @@ static void vp_reset(struct virtio_device *vdev)
*/
while (vp_modern_get_status(mdev))
msleep(1);
/* Flush pending VQ/configuration callbacks. */
vp_synchronize_vectors(vdev);
/* Disable VQ/configuration callbacks. */
vp_disable_cbs(vdev);
}
static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
@ -380,6 +380,7 @@ static bool vp_get_shm_region(struct virtio_device *vdev,
}
static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
.enable_cbs = vp_enable_cbs,
.get = NULL,
.set = NULL,
.generation = vp_generation,
@ -397,6 +398,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
};
static const struct virtio_config_ops virtio_pci_config_ops = {
.enable_cbs = vp_enable_cbs,
.get = vp_get,
.set = vp_set,
.generation = vp_generation,

View File

@ -14,6 +14,9 @@
#include <linux/spinlock.h>
#include <xen/xen.h>
static bool force_used_validation = false;
module_param(force_used_validation, bool, 0444);
#ifdef DEBUG
/* For development, we want to crash whenever the ring is screwed. */
#define BAD_RING(_vq, fmt, args...) \
@ -79,8 +82,8 @@ struct vring_desc_state_packed {
};
struct vring_desc_extra {
dma_addr_t addr; /* Buffer DMA addr. */
u32 len; /* Buffer length. */
dma_addr_t addr; /* Descriptor DMA addr. */
u32 len; /* Descriptor length. */
u16 flags; /* Descriptor flags. */
u16 next; /* The next desc state in a list. */
};
@ -182,6 +185,9 @@ struct vring_virtqueue {
} packed;
};
/* Per-descriptor in buffer length */
u32 *buflen;
/* How to notify other side. FIXME: commonalize hcalls! */
bool (*notify)(struct virtqueue *vq);
@ -490,6 +496,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
unsigned int i, n, avail, descs_used, prev, err_idx;
int head;
bool indirect;
u32 buflen = 0;
START_USE(vq);
@ -571,6 +578,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
VRING_DESC_F_NEXT |
VRING_DESC_F_WRITE,
indirect);
buflen += sg->length;
}
}
/* Last one doesn't continue. */
@ -610,6 +618,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
else
vq->split.desc_state[head].indir_desc = ctx;
/* Store in buffer length if necessary */
if (vq->buflen)
vq->buflen[head] = buflen;
/* Put entry in available array (but don't update avail->idx until they
* do sync). */
avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@ -784,6 +796,11 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
BAD_RING(vq, "id %u is not a head!\n", i);
return NULL;
}
if (vq->buflen && unlikely(*len > vq->buflen[i])) {
BAD_RING(vq, "used len %d is larger than in buflen %u\n",
*len, vq->buflen[i]);
return NULL;
}
/* detach_buf_split clears data, so grab it now. */
ret = vq->split.desc_state[i].data;
@ -1050,21 +1067,24 @@ static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
}
static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
struct scatterlist *sgs[],
unsigned int total_sg,
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
gfp_t gfp)
struct scatterlist *sgs[],
unsigned int total_sg,
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
gfp_t gfp)
{
struct vring_packed_desc *desc;
struct scatterlist *sg;
unsigned int i, n, err_idx;
u16 head, id;
dma_addr_t addr;
u32 buflen = 0;
head = vq->packed.next_avail_idx;
desc = alloc_indirect_packed(total_sg, gfp);
if (!desc)
return -ENOMEM;
if (unlikely(vq->vq.num_free < 1)) {
pr_debug("Can't add buf len 1 - avail = 0\n");
@ -1089,6 +1109,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
desc[i].addr = cpu_to_le64(addr);
desc[i].len = cpu_to_le32(sg->length);
i++;
if (n >= out_sgs)
buflen += sg->length;
}
}
@ -1142,6 +1164,10 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
vq->packed.desc_state[id].indir_desc = desc;
vq->packed.desc_state[id].last = id;
/* Store in buffer length if necessary */
if (vq->buflen)
vq->buflen[id] = buflen;
vq->num_added += 1;
pr_debug("Added buffer head %i to %p\n", head, vq);
@ -1176,6 +1202,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
unsigned int i, n, c, descs_used, err_idx;
__le16 head_flags, flags;
u16 head, id, prev, curr, avail_used_flags;
int err;
u32 buflen = 0;
START_USE(vq);
@ -1191,9 +1219,14 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
BUG_ON(total_sg == 0);
if (virtqueue_use_indirect(_vq, total_sg))
return virtqueue_add_indirect_packed(vq, sgs, total_sg,
out_sgs, in_sgs, data, gfp);
if (virtqueue_use_indirect(_vq, total_sg)) {
err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
in_sgs, data, gfp);
if (err != -ENOMEM)
return err;
/* fall back on direct */
}
head = vq->packed.next_avail_idx;
avail_used_flags = vq->packed.avail_used_flags;
@ -1250,6 +1283,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
1 << VRING_PACKED_DESC_F_AVAIL |
1 << VRING_PACKED_DESC_F_USED;
}
if (n >= out_sgs)
buflen += sg->length;
}
}
@ -1269,6 +1304,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
vq->packed.desc_state[id].indir_desc = ctx;
vq->packed.desc_state[id].last = prev;
/* Store in buffer length if necessary */
if (vq->buflen)
vq->buflen[id] = buflen;
/*
* A driver MUST NOT make the first descriptor in the list
* available before all subsequent descriptors comprising
@ -1455,6 +1494,11 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
BAD_RING(vq, "id %u is not a head!\n", id);
return NULL;
}
if (vq->buflen && unlikely(*len > vq->buflen[id])) {
BAD_RING(vq, "used len %d is larger than in buflen %u\n",
*len, vq->buflen[id]);
return NULL;
}
/* detach_buf_packed clears data, so grab it now. */
ret = vq->packed.desc_state[id].data;
@ -1660,6 +1704,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
struct vring_virtqueue *vq;
struct vring_packed_desc *ring;
struct vring_packed_desc_event *driver, *device;
struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
size_t ring_size_in_bytes, event_size_in_bytes;
@ -1749,6 +1794,15 @@ static struct virtqueue *vring_create_virtqueue_packed(
if (!vq->packed.desc_extra)
goto err_desc_extra;
if (!drv->suppress_used_validation || force_used_validation) {
vq->buflen = kmalloc_array(num, sizeof(*vq->buflen),
GFP_KERNEL);
if (!vq->buflen)
goto err_buflen;
} else {
vq->buflen = NULL;
}
/* No callback? Tell other side not to bother us. */
if (!callback) {
vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
@ -1761,6 +1815,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
spin_unlock(&vdev->vqs_list_lock);
return &vq->vq;
err_buflen:
kfree(vq->packed.desc_extra);
err_desc_extra:
kfree(vq->packed.desc_state);
err_desc_state:
@ -2168,6 +2224,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
void (*callback)(struct virtqueue *),
const char *name)
{
struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
struct vring_virtqueue *vq;
if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
@ -2227,6 +2284,15 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
if (!vq->split.desc_extra)
goto err_extra;
if (!drv->suppress_used_validation || force_used_validation) {
vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen),
GFP_KERNEL);
if (!vq->buflen)
goto err_buflen;
} else {
vq->buflen = NULL;
}
/* Put everything in free lists. */
vq->free_head = 0;
memset(vq->split.desc_state, 0, vring.num *
@ -2237,6 +2303,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
spin_unlock(&vdev->vqs_list_lock);
return &vq->vq;
err_buflen:
kfree(vq->split.desc_extra);
err_extra:
kfree(vq->split.desc_state);
err_state:

View File

@ -65,9 +65,8 @@ static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
ops->set_config(vdpa, offset, buf, len);
vdpa_set_config(vdpa, offset, buf, len);
}
static u32 virtio_vdpa_generation(struct virtio_device *vdev)
@ -145,7 +144,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
/* Assume split virtqueue, switch to packed if necessary */
struct vdpa_vq_state state = {0};
unsigned long flags;
u32 align, num;
u32 align, max_num, min_num = 1;
bool may_reduce_num = true;
int err;
if (!name)
@ -163,16 +163,21 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
if (!info)
return ERR_PTR(-ENOMEM);
num = ops->get_vq_num_max(vdpa);
if (num == 0) {
max_num = ops->get_vq_num_max(vdpa);
if (max_num == 0) {
err = -ENOENT;
goto error_new_virtqueue;
}
if (ops->get_vq_num_min)
min_num = ops->get_vq_num_min(vdpa);
may_reduce_num = (max_num == min_num) ? false : true;
/* Create the vring */
align = ops->get_vq_align(vdpa);
vq = vring_create_virtqueue(index, num, align, vdev,
true, true, ctx,
vq = vring_create_virtqueue(index, max_num, align, vdev,
true, may_reduce_num, ctx,
virtio_vdpa_notify, callback, name);
if (!vq) {
err = -ENOMEM;

View File

@ -6,6 +6,8 @@
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/vhost_iotlb.h>
#include <linux/virtio_net.h>
#include <linux/if_ether.h>
/**
* struct vdpa_calllback - vDPA callback definition.
@ -63,6 +65,7 @@ struct vdpa_mgmt_dev;
* @dev: underlying device
* @dma_dev: the actual device that is performing DMA
* @config: the configuration ops for this device.
* @cf_mutex: Protects get and set access to configuration layout.
* @index: device index
* @features_valid: were features initialized? for legacy guests
* @use_va: indicate whether virtual address must be used by this device
@ -74,6 +77,7 @@ struct vdpa_device {
struct device dev;
struct device *dma_dev;
const struct vdpa_config_ops *config;
struct mutex cf_mutex; /* Protects get/set config */
unsigned int index;
bool features_valid;
bool use_va;
@ -91,6 +95,14 @@ struct vdpa_iova_range {
u64 last;
};
struct vdpa_dev_set_config {
struct {
u8 mac[ETH_ALEN];
u16 mtu;
} net;
u64 mask;
};
/**
* Corresponding file area for device memory mapping
* @file: vma->vm_file for the mapping
@ -171,6 +183,9 @@ struct vdpa_map_file {
* @get_vq_num_max: Get the max size of virtqueue
* @vdev: vdpa device
* Returns u16: max size of virtqueue
* @get_vq_num_min: Get the min size of virtqueue (optional)
* @vdev: vdpa device
* Returns u16: min size of virtqueue
* @get_device_id: Get virtio device id
* @vdev: vdpa device
* Returns u32: virtio device id
@ -257,7 +272,7 @@ struct vdpa_config_ops {
struct vdpa_notification_area
(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
/* vq irq is not expected to be changed once DRIVER_OK is set */
int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx);
int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx);
/* Device ops */
u32 (*get_vq_align)(struct vdpa_device *vdev);
@ -266,6 +281,7 @@ struct vdpa_config_ops {
void (*set_config_cb)(struct vdpa_device *vdev,
struct vdpa_callback *cb);
u16 (*get_vq_num_max)(struct vdpa_device *vdev);
u16 (*get_vq_num_min)(struct vdpa_device *vdev);
u32 (*get_device_id)(struct vdpa_device *vdev);
u32 (*get_vendor_id)(struct vdpa_device *vdev);
u8 (*get_status)(struct vdpa_device *vdev);
@ -382,26 +398,16 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
return ops->set_features(vdev, features);
}
static inline void vdpa_get_config(struct vdpa_device *vdev,
unsigned int offset, void *buf,
unsigned int len)
{
const struct vdpa_config_ops *ops = vdev->config;
/*
* Config accesses aren't supposed to trigger before features are set.
* If it does happen we assume a legacy guest.
*/
if (!vdev->features_valid)
vdpa_set_features(vdev, 0);
ops->get_config(vdev, offset, buf, len);
}
void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len);
void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
const void *buf, unsigned int length);
/**
* struct vdpa_mgmtdev_ops - vdpa device ops
* @dev_add: Add a vdpa device using alloc and register
* @mdev: parent device to use for device addition
* @name: name of the new vdpa device
* @config: config attributes to apply to the device under creation
* Driver need to add a new device using _vdpa_register_device()
* after fully initializing the vdpa device. Driver must return 0
* on success or appropriate error code.
@ -412,14 +418,25 @@ static inline void vdpa_get_config(struct vdpa_device *vdev,
* _vdpa_unregister_device().
*/
struct vdpa_mgmtdev_ops {
int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name);
int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name,
const struct vdpa_dev_set_config *config);
void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev);
};
/**
* struct vdpa_mgmt_dev - vdpa management device
* @device: Management parent device
* @ops: operations supported by management device
* @id_table: Pointer to device id table of supported ids
* @config_attr_mask: bit mask of attributes of type enum vdpa_attr that
* management device support during dev_add callback
* @list: list entry
*/
struct vdpa_mgmt_dev {
struct device *device;
const struct vdpa_mgmtdev_ops *ops;
const struct virtio_device_id *id_table; /* supported ids */
const struct virtio_device_id *id_table;
u64 config_attr_mask;
struct list_head list;
};

View File

@ -152,6 +152,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev);
* @feature_table_size: number of entries in the feature table array.
* @feature_table_legacy: same as feature_table but when working in legacy mode.
* @feature_table_size_legacy: number of entries in feature table legacy array.
* @suppress_used_validation: set to not have core validate used length
* @probe: the function to call when a device is found. Returns 0 or -errno.
* @scan: optional function to call after successful probe; intended
* for virtio-scsi to invoke a scan.
@ -168,6 +169,7 @@ struct virtio_driver {
unsigned int feature_table_size;
const unsigned int *feature_table_legacy;
unsigned int feature_table_size_legacy;
bool suppress_used_validation;
int (*validate)(struct virtio_device *dev);
int (*probe)(struct virtio_device *dev);
void (*scan)(struct virtio_device *dev);

View File

@ -23,6 +23,8 @@ struct virtio_shm_region {
* any of @get/@set, @get_status/@set_status, or @get_features/
* @finalize_features are NOT safe to be called from an atomic
* context.
* @enable_cbs: enable the callbacks
* vdev: the virtio_device
* @get: read the value of a configuration field
* vdev: the virtio_device
* offset: the offset of the configuration field
@ -75,6 +77,7 @@ struct virtio_shm_region {
*/
typedef void vq_callback_t(struct virtqueue *);
struct virtio_config_ops {
void (*enable_cbs)(struct virtio_device *vdev);
void (*get)(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len);
void (*set)(struct virtio_device *vdev, unsigned offset,
@ -229,6 +232,9 @@ void virtio_device_ready(struct virtio_device *dev)
{
unsigned status = dev->config->get_status(dev);
if (dev->config->enable_cbs)
dev->config->enable_cbs(dev);
BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
}

View File

@ -0,0 +1,42 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_VIRTIO_PCI_LEGACY_H
#define _LINUX_VIRTIO_PCI_LEGACY_H
#include "linux/mod_devicetable.h"
#include <linux/pci.h>
#include <linux/virtio_pci.h>
struct virtio_pci_legacy_device {
struct pci_dev *pci_dev;
/* Where to read and clear interrupt */
u8 __iomem *isr;
/* The IO mapping for the PCI config space (legacy mode only) */
void __iomem *ioaddr;
struct virtio_device_id id;
};
u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev);
u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev);
void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev,
u32 features);
u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev);
void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev,
u8 status);
u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev,
u16 idx, u16 vector);
u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev,
u16 vector);
void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev,
u16 index, u32 queue_pfn);
bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev,
u16 idx);
void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev,
u16 idx, u16 size);
u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev,
u16 idx);
int vp_legacy_probe(struct virtio_pci_legacy_device *ldev);
void vp_legacy_remove(struct virtio_pci_legacy_device *ldev);
#endif

View File

@ -17,6 +17,7 @@ enum vdpa_command {
VDPA_CMD_DEV_NEW,
VDPA_CMD_DEV_DEL,
VDPA_CMD_DEV_GET, /* can dump */
VDPA_CMD_DEV_CONFIG_GET, /* can dump */
};
enum vdpa_attr {
@ -32,6 +33,12 @@ enum vdpa_attr {
VDPA_ATTR_DEV_VENDOR_ID, /* u32 */
VDPA_ATTR_DEV_MAX_VQS, /* u32 */
VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */
VDPA_ATTR_DEV_MIN_VQ_SIZE, /* u16 */
VDPA_ATTR_DEV_NET_CFG_MACADDR, /* binary */
VDPA_ATTR_DEV_NET_STATUS, /* u8 */
VDPA_ATTR_DEV_NET_CFG_MAX_VQP, /* u16 */
VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */
/* new attributes must be added above here */
VDPA_ATTR_MAX,

View File

@ -11,9 +11,15 @@
#include <linux/const.h>
#include <linux/types.h>
/* Virtio I2C Feature bits */
#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0
/* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */
#define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0)
/* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */
#define VIRTIO_I2C_FLAGS_M_RD _BITUL(1)
/**
* struct virtio_i2c_out_hdr - the virtio I2C message OUT header
* @addr: the controlled device address