From 3fcedec752108de5d99b9f0373ff880756a1e87d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 26 Oct 2010 12:25:32 +0200 Subject: [PATCH 01/11] drivers/vhost/vhost.c: delete double assignment Delete successive assignments to the same location. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression i; @@ *i = ...; i = ...; // Signed-off-by: Julia Lawall Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 94701ff3a23a..ed277276fa98 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -157,7 +157,6 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->avail_idx = 0; vq->last_used_idx = 0; vq->used_flags = 0; - vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; vq->vhost_hlen = 0; From 533a19b4b88fcf81da3106b94f0ac4ac8b33a248 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 6 Oct 2010 15:34:38 +0200 Subject: [PATCH 02/11] vhost: put mm after thread stop makes it possible to batch use/unuse mm Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ed277276fa98..9920bae6ee43 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -401,15 +401,14 @@ void vhost_dev_cleanup(struct vhost_dev *dev) kfree(rcu_dereference_protected(dev->memory, lockdep_is_held(&dev->mutex))); RCU_INIT_POINTER(dev->memory, NULL); - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; - WARN_ON(!list_empty(&dev->work_list)); if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; } + if (dev->mm) + mmput(dev->mm); + dev->mm = NULL; } static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) From 64e1c80748afca3b4818ebb232a9668bf529886d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 6 Oct 2010 15:34:45 +0200 Subject: [PATCH 03/11] vhost-net: batch use/unuse mm Move use/unuse mm to vhost.c which makes it possible to batch these operations. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 7 ------- drivers/vhost/vhost.c | 7 ++++++- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4b4da5b86ff9..d10da280fa0f 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -142,7 +141,6 @@ static void handle_tx(struct vhost_net *net) return; } - use_mm(net->dev.mm); mutex_lock(&vq->mutex); vhost_disable_notify(vq); @@ -207,7 +205,6 @@ static void handle_tx(struct vhost_net *net) } mutex_unlock(&vq->mutex); - unuse_mm(net->dev.mm); } static int peek_head_len(struct sock *sk) @@ -312,7 +309,6 @@ static void handle_rx_big(struct vhost_net *net) if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) return; - use_mm(net->dev.mm); mutex_lock(&vq->mutex); vhost_disable_notify(vq); hdr_size = vq->vhost_hlen; @@ -391,7 +387,6 @@ static void handle_rx_big(struct vhost_net *net) } mutex_unlock(&vq->mutex); - unuse_mm(net->dev.mm); } /* Expects to be always run from workqueue - which acts as @@ -423,7 +418,6 @@ static void handle_rx_mergeable(struct vhost_net *net) if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) return; - use_mm(net->dev.mm); mutex_lock(&vq->mutex); vhost_disable_notify(vq); vhost_hlen = vq->vhost_hlen; @@ -500,7 +494,6 @@ static void handle_rx_mergeable(struct vhost_net *net) } mutex_unlock(&vq->mutex); - unuse_mm(net->dev.mm); } static void handle_rx(struct vhost_net *net) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9920bae6ee43..c17c881e235a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -177,6 +178,8 @@ static int vhost_worker(void *data) struct vhost_work *work = NULL; unsigned uninitialized_var(seq); + use_mm(dev->mm); + for (;;) { /* mb paired w/ kthread_stop */ set_current_state(TASK_INTERRUPTIBLE); @@ -191,7 +194,7 @@ static int vhost_worker(void *data) if (kthread_should_stop()) { spin_unlock_irq(&dev->work_lock); __set_current_state(TASK_RUNNING); - return 0; + break; } if (!list_empty(&dev->work_list)) { work = list_first_entry(&dev->work_list, @@ -209,6 +212,8 @@ static int vhost_worker(void *data) schedule(); } + unuse_mm(dev->mm); + return 0; } /* Helper to allocate iovec buffers for all vqs. */ From dfe5ac5b18be5b10d01a17e734a9905c0def6088 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 21 Sep 2010 14:18:01 +0200 Subject: [PATCH 04/11] vhost: copy_to_user -> __copy_to_user We do access_ok checks at setup time, so we don't need to redo them on each access. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c17c881e235a..e6a093187a0e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1258,7 +1258,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, start = vq->last_used_idx % vq->num; used = vq->used->ring + start; - if (copy_to_user(used, heads, count * sizeof *used)) { + if (__copy_to_user(used, heads, count * sizeof *used)) { vq_err(vq, "Failed to write used"); return -EFAULT; } From 8b7347aab6865ae8a2e5a8b0f1deea12da3d3aff Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 19 Sep 2010 15:56:30 +0200 Subject: [PATCH 05/11] vhost: get/put_user -> __get/__put_user We do access_ok checks on all ring values on an ioctl, so we don't need to redo them on each access. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index e6a093187a0e..a29d91c776b4 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1095,7 +1095,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vq->last_avail_idx; - if (unlikely(get_user(vq->avail_idx, &vq->avail->idx))) { + if (unlikely(__get_user(vq->avail_idx, &vq->avail->idx))) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); return -EFAULT; @@ -1116,8 +1116,8 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - if (unlikely(get_user(head, - &vq->avail->ring[last_avail_idx % vq->num]))) { + if (unlikely(__get_user(head, + &vq->avail->ring[last_avail_idx % vq->num]))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); @@ -1216,17 +1216,17 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) /* The virtqueue contains a ring of used buffers. Get a pointer to the * next entry in that used ring. */ used = &vq->used->ring[vq->last_used_idx % vq->num]; - if (put_user(head, &used->id)) { + if (__put_user(head, &used->id)) { vq_err(vq, "Failed to write used id"); return -EFAULT; } - if (put_user(len, &used->len)) { + if (__put_user(len, &used->len)) { vq_err(vq, "Failed to write used len"); return -EFAULT; } /* Make sure buffer is written before we update index. */ smp_wmb(); - if (put_user(vq->last_used_idx + 1, &vq->used->idx)) { + if (__put_user(vq->last_used_idx + 1, &vq->used->idx)) { vq_err(vq, "Failed to increment used idx"); return -EFAULT; } @@ -1319,7 +1319,7 @@ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) * interrupts. */ smp_mb(); - if (get_user(flags, &vq->avail->flags)) { + if (__get_user(flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); return; } @@ -1370,7 +1370,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); - r = get_user(avail_idx, &vq->avail->idx); + r = __get_user(avail_idx, &vq->avail->idx); if (r) { vq_err(vq, "Failed to check avail idx at %p: %d\n", &vq->avail->idx, r); From bf5e0bd27f7cbaca4d52ae395bbf3715775efebd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 14 Nov 2010 17:33:25 +0200 Subject: [PATCH 06/11] vhost: remove unused include vhost.c does not need to know about sockets, don't include sock.h Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a29d91c776b4..916cdbc279e0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -30,8 +30,6 @@ #include #include -#include - #include "vhost.h" enum { From a290aec88a9c4747353ea7aa9b2569bd61297c3c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 29 Nov 2010 13:48:40 +0800 Subject: [PATCH 07/11] vhost: fix typos in comment Signed-off-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 2 +- drivers/vhost/vhost.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10da280fa0f..14fc189ac0a8 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -452,7 +452,7 @@ static void handle_rx_mergeable(struct vhost_net *net) move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); else /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: - * needed because sendmsg can modify msg_iov. */ + * needed because recvmsg can modify msg_iov. */ copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); msg.msg_iovlen = in; err = sock->ops->recvmsg(NULL, sock, &msg, diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 073d06ae091f..2af44b7b1f3f 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -102,7 +102,7 @@ struct vhost_virtqueue { * flush the vhost_work instead of synchronize_rcu. Therefore readers do * not need to call rcu_read_lock/rcu_read_unlock: the beginning of * vhost_work execution acts instead of rcu_read_lock() and the end of - * vhost_work execution acts instead of rcu_read_lock(). + * vhost_work execution acts instead of rcu_read_unlock(). * Writers use virtqueue mutex. */ void __rcu *private_data; /* Log write descriptors */ From 3bf9be40ff76b6df136f14a497167c116b2b3c53 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Nov 2010 10:19:07 +0200 Subject: [PATCH 08/11] vhost: correctly set bits of dirty pages Fix two bugs in dirty page logging: When counting pages we should increase address by 1 instead of VHOST_PAGE_SIZE. Make log_write() correctly process requests that cross pages with write_address not starting at page boundary. Reported-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 916cdbc279e0..4c256d15c249 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -885,6 +885,7 @@ static int log_write(void __user *log_base, int r; if (!write_length) return 0; + write_length += write_address % VHOST_PAGE_SIZE; write_address /= VHOST_PAGE_SIZE; for (;;) { u64 base = (u64)(unsigned long)log_base; @@ -898,7 +899,7 @@ static int log_write(void __user *log_base, if (write_length <= VHOST_PAGE_SIZE) break; write_length -= VHOST_PAGE_SIZE; - write_address += VHOST_PAGE_SIZE; + write_address += 1; } return r; } From 28831ee60b79bed50958c9cb0d2e76cdc98406f9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Nov 2010 10:22:10 +0200 Subject: [PATCH 09/11] vhost: better variable name in logging We really store a page offset in write_address, so rename it write_page to avoid confusion. Signed-off-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 4c256d15c249..38244f59cdd9 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -882,15 +882,15 @@ static int set_bit_to_user(int nr, void __user *addr) static int log_write(void __user *log_base, u64 write_address, u64 write_length) { + u64 write_page = write_address / VHOST_PAGE_SIZE; int r; if (!write_length) return 0; write_length += write_address % VHOST_PAGE_SIZE; - write_address /= VHOST_PAGE_SIZE; for (;;) { u64 base = (u64)(unsigned long)log_base; - u64 log = base + write_address / 8; - int bit = write_address % 8; + u64 log = base + write_page / 8; + int bit = write_page % 8; if ((u64)(unsigned long)log != log) return -EFAULT; r = set_bit_to_user(bit, (void __user *)(unsigned long)log); @@ -899,7 +899,7 @@ static int log_write(void __user *log_base, if (write_length <= VHOST_PAGE_SIZE) break; write_length -= VHOST_PAGE_SIZE; - write_address += 1; + write_page += 1; } return r; } From 71ccc212e5b28dfcc870b6db6589c2df264fdc6a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Nov 2010 19:09:01 +0200 Subject: [PATCH 10/11] vhost test module This adds a test module for vhost infrastructure. Intentionally not tied to kbuild to prevent people from installing and loading it accidentally. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/test.c | 320 +++++++++++++++++++++++++++ drivers/vhost/test.h | 7 + tools/virtio/vhost_test/Makefile | 2 + tools/virtio/vhost_test/vhost_test.c | 1 + 4 files changed, 330 insertions(+) create mode 100644 drivers/vhost/test.c create mode 100644 drivers/vhost/test.h create mode 100644 tools/virtio/vhost_test/Makefile create mode 100644 tools/virtio/vhost_test/vhost_test.c diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c new file mode 100644 index 000000000000..099f30230d06 --- /dev/null +++ b/drivers/vhost/test.c @@ -0,0 +1,320 @@ +/* Copyright (C) 2009 Red Hat, Inc. + * Author: Michael S. Tsirkin + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * test virtio server in host kernel. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test.h" +#include "vhost.c" + +/* Max number of bytes transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others. */ +#define VHOST_TEST_WEIGHT 0x80000 + +enum { + VHOST_TEST_VQ = 0, + VHOST_TEST_VQ_MAX = 1, +}; + +struct vhost_test { + struct vhost_dev dev; + struct vhost_virtqueue vqs[VHOST_TEST_VQ_MAX]; +}; + +/* Expects to be always run from workqueue - which acts as + * read-size critical section for our kind of RCU. */ +static void handle_vq(struct vhost_test *n) +{ + struct vhost_virtqueue *vq = &n->dev.vqs[VHOST_TEST_VQ]; + unsigned out, in; + int head; + size_t len, total_len = 0; + void *private; + + private = rcu_dereference_check(vq->private_data, 1); + if (!private) + return; + + mutex_lock(&vq->mutex); + vhost_disable_notify(vq); + + for (;;) { + head = vhost_get_vq_desc(&n->dev, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in, + NULL, NULL); + /* On error, stop handling until the next kick. */ + if (unlikely(head < 0)) + break; + /* Nothing new? Wait for eventfd to tell us they refilled. */ + if (head == vq->num) { + if (unlikely(vhost_enable_notify(vq))) { + vhost_disable_notify(vq); + continue; + } + break; + } + if (in) { + vq_err(vq, "Unexpected descriptor format for TX: " + "out %d, int %d\n", out, in); + break; + } + len = iov_length(vq->iov, out); + /* Sanity check */ + if (!len) { + vq_err(vq, "Unexpected 0 len for TX\n"); + break; + } + vhost_add_used_and_signal(&n->dev, vq, head, 0); + total_len += len; + if (unlikely(total_len >= VHOST_TEST_WEIGHT)) { + vhost_poll_queue(&vq->poll); + break; + } + } + + mutex_unlock(&vq->mutex); +} + +static void handle_vq_kick(struct vhost_work *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_test *n = container_of(vq->dev, struct vhost_test, dev); + + handle_vq(n); +} + +static int vhost_test_open(struct inode *inode, struct file *f) +{ + struct vhost_test *n = kmalloc(sizeof *n, GFP_KERNEL); + struct vhost_dev *dev; + int r; + + if (!n) + return -ENOMEM; + + dev = &n->dev; + n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; + r = vhost_dev_init(dev, n->vqs, VHOST_TEST_VQ_MAX); + if (r < 0) { + kfree(n); + return r; + } + + f->private_data = n; + + return 0; +} + +static void *vhost_test_stop_vq(struct vhost_test *n, + struct vhost_virtqueue *vq) +{ + void *private; + + mutex_lock(&vq->mutex); + private = rcu_dereference_protected(vq->private_data, + lockdep_is_held(&vq->mutex)); + rcu_assign_pointer(vq->private_data, NULL); + mutex_unlock(&vq->mutex); + return private; +} + +static void vhost_test_stop(struct vhost_test *n, void **privatep) +{ + *privatep = vhost_test_stop_vq(n, n->vqs + VHOST_TEST_VQ); +} + +static void vhost_test_flush_vq(struct vhost_test *n, int index) +{ + vhost_poll_flush(&n->dev.vqs[index].poll); +} + +static void vhost_test_flush(struct vhost_test *n) +{ + vhost_test_flush_vq(n, VHOST_TEST_VQ); +} + +static int vhost_test_release(struct inode *inode, struct file *f) +{ + struct vhost_test *n = f->private_data; + void *private; + + vhost_test_stop(n, &private); + vhost_test_flush(n); + vhost_dev_cleanup(&n->dev); + /* We do an extra flush before freeing memory, + * since jobs can re-queue themselves. */ + vhost_test_flush(n); + kfree(n); + return 0; +} + +static long vhost_test_run(struct vhost_test *n, int test) +{ + void *priv, *oldpriv; + struct vhost_virtqueue *vq; + int r, index; + + if (test < 0 || test > 1) + return -EINVAL; + + mutex_lock(&n->dev.mutex); + r = vhost_dev_check_owner(&n->dev); + if (r) + goto err; + + for (index = 0; index < n->dev.nvqs; ++index) { + /* Verify that ring has been setup correctly. */ + if (!vhost_vq_access_ok(&n->vqs[index])) { + r = -EFAULT; + goto err; + } + } + + for (index = 0; index < n->dev.nvqs; ++index) { + vq = n->vqs + index; + mutex_lock(&vq->mutex); + priv = test ? n : NULL; + + /* start polling new socket */ + oldpriv = rcu_dereference_protected(vq->private_data, + lockdep_is_held(&vq->mutex)); + rcu_assign_pointer(vq->private_data, priv); + + mutex_unlock(&vq->mutex); + + if (oldpriv) { + vhost_test_flush_vq(n, index); + } + } + + mutex_unlock(&n->dev.mutex); + return 0; + +err: + mutex_unlock(&n->dev.mutex); + return r; +} + +static long vhost_test_reset_owner(struct vhost_test *n) +{ + void *priv = NULL; + long err; + mutex_lock(&n->dev.mutex); + err = vhost_dev_check_owner(&n->dev); + if (err) + goto done; + vhost_test_stop(n, &priv); + vhost_test_flush(n); + err = vhost_dev_reset_owner(&n->dev); +done: + mutex_unlock(&n->dev.mutex); + return err; +} + +static int vhost_test_set_features(struct vhost_test *n, u64 features) +{ + mutex_lock(&n->dev.mutex); + if ((features & (1 << VHOST_F_LOG_ALL)) && + !vhost_log_access_ok(&n->dev)) { + mutex_unlock(&n->dev.mutex); + return -EFAULT; + } + n->dev.acked_features = features; + smp_wmb(); + vhost_test_flush(n); + mutex_unlock(&n->dev.mutex); + return 0; +} + +static long vhost_test_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + struct vhost_test *n = f->private_data; + void __user *argp = (void __user *)arg; + u64 __user *featurep = argp; + int test; + u64 features; + int r; + switch (ioctl) { + case VHOST_TEST_RUN: + if (copy_from_user(&test, argp, sizeof test)) + return -EFAULT; + return vhost_test_run(n, test); + case VHOST_GET_FEATURES: + features = VHOST_FEATURES; + if (copy_to_user(featurep, &features, sizeof features)) + return -EFAULT; + return 0; + case VHOST_SET_FEATURES: + if (copy_from_user(&features, featurep, sizeof features)) + return -EFAULT; + if (features & ~VHOST_FEATURES) + return -EOPNOTSUPP; + return vhost_test_set_features(n, features); + case VHOST_RESET_OWNER: + return vhost_test_reset_owner(n); + default: + mutex_lock(&n->dev.mutex); + r = vhost_dev_ioctl(&n->dev, ioctl, arg); + vhost_test_flush(n); + mutex_unlock(&n->dev.mutex); + return r; + } +} + +#ifdef CONFIG_COMPAT +static long vhost_test_compat_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + return vhost_test_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); +} +#endif + +static const struct file_operations vhost_test_fops = { + .owner = THIS_MODULE, + .release = vhost_test_release, + .unlocked_ioctl = vhost_test_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vhost_test_compat_ioctl, +#endif + .open = vhost_test_open, + .llseek = noop_llseek, +}; + +static struct miscdevice vhost_test_misc = { + MISC_DYNAMIC_MINOR, + "vhost-test", + &vhost_test_fops, +}; + +static int vhost_test_init(void) +{ + return misc_register(&vhost_test_misc); +} +module_init(vhost_test_init); + +static void vhost_test_exit(void) +{ + misc_deregister(&vhost_test_misc); +} +module_exit(vhost_test_exit); + +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Michael S. Tsirkin"); +MODULE_DESCRIPTION("Host kernel side for virtio simulator"); diff --git a/drivers/vhost/test.h b/drivers/vhost/test.h new file mode 100644 index 000000000000..1fef5df82153 --- /dev/null +++ b/drivers/vhost/test.h @@ -0,0 +1,7 @@ +#ifndef LINUX_VHOST_TEST_H +#define LINUX_VHOST_TEST_H + +/* Start a given test on the virtio null device. 0 stops all tests. */ +#define VHOST_TEST_RUN _IOW(VHOST_VIRTIO, 0x31, int) + +#endif diff --git a/tools/virtio/vhost_test/Makefile b/tools/virtio/vhost_test/Makefile new file mode 100644 index 000000000000..a1d35b81b314 --- /dev/null +++ b/tools/virtio/vhost_test/Makefile @@ -0,0 +1,2 @@ +obj-m += vhost_test.o +EXTRA_CFLAGS += -Idrivers/vhost diff --git a/tools/virtio/vhost_test/vhost_test.c b/tools/virtio/vhost_test/vhost_test.c new file mode 100644 index 000000000000..18735189e62b --- /dev/null +++ b/tools/virtio/vhost_test/vhost_test.c @@ -0,0 +1 @@ +#include "test.c" From 4e53f78e5b06c073a5c10814c72e98c1ca8a9f10 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Nov 2010 19:16:37 +0200 Subject: [PATCH 11/11] tools/virtio: virtio_test tool This is the userspace part of the tool: it includes a bunch of stubs for linux APIs, somewhat simular to linuxsched. This makes it possible to recompile the ring code in userspace. A small test example is implemented combining this with vhost_test module. Signed-off-by: Michael S. Tsirkin --- tools/virtio/Makefile | 12 ++ tools/virtio/linux/device.h | 2 + tools/virtio/linux/slab.h | 2 + tools/virtio/linux/virtio.h | 223 ++++++++++++++++++++++++++++++++ tools/virtio/virtio_test.c | 248 ++++++++++++++++++++++++++++++++++++ 5 files changed, 487 insertions(+) create mode 100644 tools/virtio/Makefile create mode 100644 tools/virtio/linux/device.h create mode 100644 tools/virtio/linux/slab.h create mode 100644 tools/virtio/linux/virtio.h create mode 100644 tools/virtio/virtio_test.c diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile new file mode 100644 index 000000000000..d1d442ed106a --- /dev/null +++ b/tools/virtio/Makefile @@ -0,0 +1,12 @@ +all: test mod +test: virtio_test +virtio_test: virtio_ring.o virtio_test.o +CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -MMD +vpath %.c ../../drivers/virtio +mod: + ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test +.PHONY: all test mod clean +clean: + ${RM} *.o vhost_test/*.o vhost_test/.*.cmd \ + vhost_test/Module.symvers vhost_test/modules.order *.d +-include *.d diff --git a/tools/virtio/linux/device.h b/tools/virtio/linux/device.h new file mode 100644 index 000000000000..4ad7e1df0db5 --- /dev/null +++ b/tools/virtio/linux/device.h @@ -0,0 +1,2 @@ +#ifndef LINUX_DEVICE_H +#endif diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h new file mode 100644 index 000000000000..81baeac8ae40 --- /dev/null +++ b/tools/virtio/linux/slab.h @@ -0,0 +1,2 @@ +#ifndef LINUX_SLAB_H +#endif diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h new file mode 100644 index 000000000000..669bcdd45805 --- /dev/null +++ b/tools/virtio/linux/virtio.h @@ -0,0 +1,223 @@ +#ifndef LINUX_VIRTIO_H +#define LINUX_VIRTIO_H + +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef unsigned long long dma_addr_t; + +struct scatterlist { + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; +}; + +struct page { + unsigned long long dummy; +}; + +#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) + +/* Physical == Virtual */ +#define virt_to_phys(p) ((unsigned long)p) +#define phys_to_virt(a) ((void *)(unsigned long)(a)) +/* Page address: Virtual / 4K */ +#define virt_to_page(p) ((struct page*)((virt_to_phys(p) / 4096) * \ + sizeof(struct page))) +#define offset_in_page(p) (((unsigned long)p) % 4096) +#define sg_phys(sg) ((sg->page_link & ~0x3) / sizeof(struct page) * 4096 + \ + sg->offset) +static inline void sg_mark_end(struct scatterlist *sg) +{ + /* + * Set termination bit, clear potential chain bit + */ + sg->page_link |= 0x02; + sg->page_link &= ~0x01; +} +static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) +{ + memset(sgl, 0, sizeof(*sgl) * nents); + sg_mark_end(&sgl[nents - 1]); +} +static inline void sg_assign_page(struct scatterlist *sg, struct page *page) +{ + unsigned long page_link = sg->page_link & 0x3; + + /* + * In order for the low bit stealing approach to work, pages + * must be aligned at a 32-bit boundary as a minimum. + */ + BUG_ON((unsigned long) page & 0x03); + sg->page_link = page_link | (unsigned long) page; +} + +static inline void sg_set_page(struct scatterlist *sg, struct page *page, + unsigned int len, unsigned int offset) +{ + sg_assign_page(sg, page); + sg->offset = offset; + sg->length = len; +} + +static inline void sg_set_buf(struct scatterlist *sg, const void *buf, + unsigned int buflen) +{ + sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); +} + +static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) +{ + sg_init_table(sg, 1); + sg_set_buf(sg, buf, buflen); +} + +typedef __u16 u16; + +typedef enum { + GFP_KERNEL, + GFP_ATOMIC, +} gfp_t; +typedef enum { + IRQ_NONE, + IRQ_HANDLED +} irqreturn_t; + +static inline void *kmalloc(size_t s, gfp_t gfp) +{ + return malloc(s); +} + +static inline void kfree(void *p) +{ + free(p); +} + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +#define uninitialized_var(x) x = x + +# ifndef likely +# define likely(x) (__builtin_expect(!!(x), 1)) +# endif +# ifndef unlikely +# define unlikely(x) (__builtin_expect(!!(x), 0)) +# endif + +#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#ifdef DEBUG +#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#else +#define pr_debug(format, ...) do {} while (0) +#endif +#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) + +/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ +#define list_add_tail(a, b) do {} while (0) +#define list_del(a) do {} while (0) + +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_PER_BYTE 8 +#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE) +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +/* TODO: Not atomic as it should be: + * we don't use this for anything important. */ +static inline void clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + *p &= ~mask; +} + +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +/* The only feature we care to support */ +#define virtio_has_feature(dev, feature) \ + test_bit((feature), (dev)->features) +/* end of stubs */ + +struct virtio_device { + void *dev; + unsigned long features[1]; +}; + +struct virtqueue { + /* TODO: commented as list macros are empty stubs for now. + * Broken but enough for virtio_ring.c + * struct list_head list; */ + void (*callback)(struct virtqueue *vq); + const char *name; + struct virtio_device *vdev; + void *priv; +}; + +#define EXPORT_SYMBOL_GPL(__EXPORT_SYMBOL_GPL_name) \ + void __EXPORT_SYMBOL_GPL##__EXPORT_SYMBOL_GPL_name() { \ +} +#define MODULE_LICENSE(__MODULE_LICENSE_value) \ + const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value + +#define CONFIG_SMP + +#if defined(__i386__) || defined(__x86_64__) +#define barrier() asm volatile("" ::: "memory") +#define mb() __sync_synchronize() + +#define smp_mb() mb() +# define smp_rmb() barrier() +# define smp_wmb() barrier() +#else +#error Please fill in barrier macros +#endif + +/* Interfaces exported by virtio_ring. */ +int virtqueue_add_buf_gfp(struct virtqueue *vq, + struct scatterlist sg[], + unsigned int out_num, + unsigned int in_num, + void *data, + gfp_t gfp); + +static inline int virtqueue_add_buf(struct virtqueue *vq, + struct scatterlist sg[], + unsigned int out_num, + unsigned int in_num, + void *data) +{ + return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC); +} + +void virtqueue_kick(struct virtqueue *vq); + +void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); + +void virtqueue_disable_cb(struct virtqueue *vq); + +bool virtqueue_enable_cb(struct virtqueue *vq); + +void *virtqueue_detach_unused_buf(struct virtqueue *vq); +struct virtqueue *vring_new_virtqueue(unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + void *pages, + void (*notify)(struct virtqueue *vq), + void (*callback)(struct virtqueue *vq), + const char *name); +void vring_del_virtqueue(struct virtqueue *vq); + +#endif diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c new file mode 100644 index 000000000000..df0c6d2c3860 --- /dev/null +++ b/tools/virtio/virtio_test.c @@ -0,0 +1,248 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../drivers/vhost/test.h" + +struct vq_info { + int kick; + int call; + int num; + int idx; + void *ring; + /* copy used for control */ + struct vring vring; + struct virtqueue *vq; +}; + +struct vdev_info { + struct virtio_device vdev; + int control; + struct pollfd fds[1]; + struct vq_info vqs[1]; + int nvqs; + void *buf; + size_t buf_size; + struct vhost_memory *mem; +}; + +void vq_notify(struct virtqueue *vq) +{ + struct vq_info *info = vq->priv; + unsigned long long v = 1; + int r; + r = write(info->kick, &v, sizeof v); + assert(r == sizeof v); +} + +void vq_callback(struct virtqueue *vq) +{ +} + + +void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) +{ + struct vhost_vring_state state = { .index = info->idx }; + struct vhost_vring_file file = { .index = info->idx }; + unsigned long long features = dev->vdev.features[0]; + struct vhost_vring_addr addr = { + .index = info->idx, + .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, + .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail, + .used_user_addr = (uint64_t)(unsigned long)info->vring.used, + }; + int r; + r = ioctl(dev->control, VHOST_SET_FEATURES, &features); + assert(r >= 0); + state.num = info->vring.num; + r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); + assert(r >= 0); + state.num = 0; + r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); + assert(r >= 0); + r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); + assert(r >= 0); + file.fd = info->kick; + r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); + assert(r >= 0); + file.fd = info->call; + r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file); + assert(r >= 0); +} + +static void vq_info_add(struct vdev_info *dev, int num) +{ + struct vq_info *info = &dev->vqs[dev->nvqs]; + int r; + info->idx = dev->nvqs; + info->kick = eventfd(0, EFD_NONBLOCK); + info->call = eventfd(0, EFD_NONBLOCK); + r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); + assert(r >= 0); + memset(info->ring, 0, vring_size(num, 4096)); + vring_init(&info->vring, num, info->ring, 4096); + info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring, + vq_notify, vq_callback, "test"); + assert(info->vq); + info->vq->priv = info; + vhost_vq_setup(dev, info); + dev->fds[info->idx].fd = info->call; + dev->fds[info->idx].events = POLLIN; + dev->nvqs++; +} + +static void vdev_info_init(struct vdev_info* dev, unsigned long long features) +{ + int r; + memset(dev, 0, sizeof *dev); + dev->vdev.features[0] = features; + dev->vdev.features[1] = features >> 32; + dev->buf_size = 1024; + dev->buf = malloc(dev->buf_size); + assert(dev->buf); + dev->control = open("/dev/vhost-test", O_RDWR); + assert(dev->control >= 0); + r = ioctl(dev->control, VHOST_SET_OWNER, NULL); + assert(r >= 0); + dev->mem = malloc(offsetof(struct vhost_memory, regions) + + sizeof dev->mem->regions[0]); + assert(dev->mem); + memset(dev->mem, 0, offsetof(struct vhost_memory, regions) + + sizeof dev->mem->regions[0]); + dev->mem->nregions = 1; + dev->mem->regions[0].guest_phys_addr = (long)dev->buf; + dev->mem->regions[0].userspace_addr = (long)dev->buf; + dev->mem->regions[0].memory_size = dev->buf_size; + r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + assert(r >= 0); +} + +/* TODO: this is pretty bad: we get a cache line bounce + * for the wait queue on poll and another one on read, + * plus the read which is there just to clear the + * current state. */ +static void wait_for_interrupt(struct vdev_info *dev) +{ + int i; + unsigned long long val; + poll(dev->fds, dev->nvqs, -1); + for (i = 0; i < dev->nvqs; ++i) + if (dev->fds[i].revents & POLLIN) { + read(dev->fds[i].fd, &val, sizeof val); + } +} + +static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) +{ + struct scatterlist sl; + long started = 0, completed = 0; + long completed_before; + int r, test = 1; + unsigned len; + long long spurious = 0; + r = ioctl(dev->control, VHOST_TEST_RUN, &test); + assert(r >= 0); + for (;;) { + virtqueue_disable_cb(vq->vq); + completed_before = completed; + do { + if (started < bufs) { + sg_init_one(&sl, dev->buf, dev->buf_size); + r = virtqueue_add_buf(vq->vq, &sl, 1, 0, + dev->buf + started); + if (likely(r >= 0)) { + ++started; + virtqueue_kick(vq->vq); + } + } else + r = -1; + + /* Flush out completed bufs if any */ + if (virtqueue_get_buf(vq->vq, &len)) { + ++completed; + r = 0; + } + + } while (r >= 0); + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + assert(started <= bufs); + if (completed == bufs) + break; + if (virtqueue_enable_cb(vq->vq)) { + wait_for_interrupt(dev); + } + } + test = 0; + r = ioctl(dev->control, VHOST_TEST_RUN, &test); + assert(r >= 0); + fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious); +} + +const char optstring[] = "h"; +const struct option longopts[] = { + { + .name = "help", + .val = 'h', + }, + { + .name = "indirect", + .val = 'I', + }, + { + .name = "no-indirect", + .val = 'i', + }, + { + } +}; + +static void help() +{ + fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n"); +} + +int main(int argc, char **argv) +{ + struct vdev_info dev; + unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC; + int o; + + for (;;) { + o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(); + exit(2); + case 'h': + help(); + goto done; + case 'i': + features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); + break; + default: + assert(0); + break; + } + } + +done: + vdev_info_init(&dev, features); + vq_info_add(&dev, 256); + run_test(&dev, &dev.vqs[0], 0x100000); + return 0; +}