2007-10-22 11:03:36 +10:00
|
|
|
#ifndef _LINUX_VIRTIO_H
|
|
|
|
#define _LINUX_VIRTIO_H
|
|
|
|
/* Everything a virtio driver needs to work with any particular virtio
|
|
|
|
* implementation. */
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/scatterlist.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/mod_devicetable.h>
|
2010-04-29 17:26:37 +03:00
|
|
|
#include <linux/gfp.h>
|
2007-10-22 11:03:36 +10:00
|
|
|
|
|
|
|
/**
|
|
|
|
* virtqueue - a queue to register buffers for sending or receiving.
|
2009-06-12 22:16:35 -06:00
|
|
|
* @list: the chain of virtqueues for this device
|
2007-10-22 11:03:36 +10:00
|
|
|
* @callback: the function to call when buffers are consumed (can be NULL).
|
2009-06-12 22:16:35 -06:00
|
|
|
* @name: the name of this virtqueue (mainly for debugging)
|
2007-10-22 11:03:36 +10:00
|
|
|
* @vdev: the virtio device this queue was created for.
|
|
|
|
* @priv: a pointer for the virtqueue implementation to use.
|
|
|
|
*/
|
2009-06-12 22:16:35 -06:00
|
|
|
struct virtqueue {
|
|
|
|
struct list_head list;
|
2008-02-04 23:49:57 -05:00
|
|
|
void (*callback)(struct virtqueue *vq);
|
2009-06-12 22:16:35 -06:00
|
|
|
const char *name;
|
2007-10-22 11:03:36 +10:00
|
|
|
struct virtio_device *vdev;
|
|
|
|
void *priv;
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
2010-04-12 16:19:07 +03:00
|
|
|
* operations for virtqueue
|
|
|
|
* virtqueue_add_buf: expose buffer to other end
|
2007-10-22 11:03:36 +10:00
|
|
|
* vq: the struct virtqueue we're talking about.
|
|
|
|
* sg: the description of the buffer(s).
|
|
|
|
* out_num: the number of sg readable by other side
|
|
|
|
* in_num: the number of sg which are writable (after readable ones)
|
|
|
|
* data: the token identifying the buffer.
|
2010-04-29 17:26:37 +03:00
|
|
|
* gfp: how to do memory allocations (if necessary).
|
2009-09-23 22:26:31 -06:00
|
|
|
* Returns remaining capacity of queue (sg segments) or a negative error.
|
2010-04-12 16:19:07 +03:00
|
|
|
* virtqueue_kick: update after add_buf
|
2007-10-22 11:03:36 +10:00
|
|
|
* vq: the struct virtqueue
|
|
|
|
* After one or more add_buf calls, invoke this to kick the other side.
|
2010-04-12 16:19:07 +03:00
|
|
|
* virtqueue_get_buf: get the next used buffer
|
2007-10-22 11:03:36 +10:00
|
|
|
* vq: the struct virtqueue we're talking about.
|
|
|
|
* len: the length written into the buffer
|
|
|
|
* Returns NULL or the "data" token handed to add_buf.
|
2010-04-12 16:19:07 +03:00
|
|
|
* virtqueue_disable_cb: disable callbacks
|
2008-02-04 23:49:57 -05:00
|
|
|
* vq: the struct virtqueue we're talking about.
|
2008-04-07 14:30:28 +10:00
|
|
|
* Note that this is not necessarily synchronous, hence unreliable and only
|
|
|
|
* useful as an optimization.
|
2010-04-12 16:19:07 +03:00
|
|
|
* virtqueue_enable_cb: restart callbacks after disable_cb.
|
2007-10-22 11:03:36 +10:00
|
|
|
* vq: the struct virtqueue we're talking about.
|
virtio: fix race in enable_cb
There is a race in virtio_net, dealing with disabling/enabling the callback.
I saw the following oops:
kernel BUG at /space/kvm/drivers/virtio/virtio_ring.c:218!
illegal operation: 0001 [#1] SMP
Modules linked in: sunrpc dm_mod
CPU: 2 Not tainted 2.6.25-rc1zlive-host-10623-gd358142-dirty #99
Process swapper (pid: 0, task: 000000000f85a610, ksp: 000000000f873c60)
Krnl PSW : 0404300180000000 00000000002b81a6 (vring_disable_cb+0x16/0x20)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:3 PM:0 EA:3
Krnl GPRS: 0000000000000001 0000000000000001 0000000010005800 0000000000000001
000000000f3a0900 000000000f85a610 0000000000000000 0000000000000000
0000000000000000 000000000f870000 0000000000000000 0000000000001237
000000000f3a0920 000000000010ff74 00000000002846f6 000000000fa0bcd8
Krnl Code: 00000000002b819a: a7110001 tmll %r1,1
00000000002b819e: a7840004 brc 8,2b81a6
00000000002b81a2: a7f40001 brc 15,2b81a4
>00000000002b81a6: a51b0001 oill %r1,1
00000000002b81aa: 40102000 sth %r1,0(%r2)
00000000002b81ae: 07fe bcr 15,%r14
00000000002b81b0: eb7ff0380024 stmg %r7,%r15,56(%r15)
00000000002b81b6: a7f13e00 tmll %r15,15872
Call Trace:
([<000000000fa0bcd0>] 0xfa0bcd0)
[<00000000002b8350>] vring_interrupt+0x5c/0x6c
[<000000000010ab08>] do_extint+0xb8/0xf0
[<0000000000110716>] ext_no_vtime+0x16/0x1a
[<0000000000107e72>] cpu_idle+0x1c2/0x1e0
The problem can be triggered with a high amount of host->guest traffic.
I think its the following race:
poll says netif_rx_complete
poll calls enable_cb
enable_cb opens the interrupt mask
a new packet comes, an interrupt is triggered----\
enable_cb sees that there is more work |
enable_cb disables the interrupt |
. V
. interrupt is delivered
. skb_recv_done does atomic napi test, ok
some waiting disable_cb is called->check fails->bang!
.
poll would do napi check
poll would do disable_cb
The fix is to let enable_cb not disable the interrupt again, but expect the
caller to do the cleanup if it returns false. In that case, the interrupt is
only disabled, if the napi test_set_bit was successful.
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (cleaned up doco)
2008-03-14 14:17:05 +01:00
|
|
|
* This re-enables callbacks; it returns "false" if there are pending
|
|
|
|
* buffers in the queue, to detect a possible race between the driver
|
|
|
|
* checking for more work, and enabling callbacks.
|
2010-04-12 16:19:07 +03:00
|
|
|
* virtqueue_detach_unused_buf: detach first unused buffer
|
2010-01-18 19:15:23 +05:30
|
|
|
* vq: the struct virtqueue we're talking about.
|
|
|
|
* Returns NULL or the "data" token handed to add_buf
|
2007-10-22 11:03:36 +10:00
|
|
|
*
|
|
|
|
* Locking rules are straightforward: the driver is responsible for
|
2008-04-07 14:30:28 +10:00
|
|
|
* locking. No two operations may be invoked simultaneously, with the exception
|
2010-04-12 16:19:07 +03:00
|
|
|
* of virtqueue_disable_cb.
|
2007-10-22 11:03:36 +10:00
|
|
|
*
|
|
|
|
* All operations can be called in any context.
|
|
|
|
*/
|
|
|
|
|
2010-04-29 17:26:37 +03:00
|
|
|
int virtqueue_add_buf_gfp(struct virtqueue *vq,
|
|
|
|
struct scatterlist sg[],
|
|
|
|
unsigned int out_num,
|
|
|
|
unsigned int in_num,
|
|
|
|
void *data,
|
|
|
|
gfp_t gfp);
|
|
|
|
|
|
|
|
static inline int virtqueue_add_buf(struct virtqueue *vq,
|
|
|
|
struct scatterlist sg[],
|
|
|
|
unsigned int out_num,
|
|
|
|
unsigned int in_num,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
|
|
|
|
}
|
2007-10-22 11:03:36 +10:00
|
|
|
|
2010-04-12 16:19:07 +03:00
|
|
|
void virtqueue_kick(struct virtqueue *vq);
|
2007-10-22 11:03:36 +10:00
|
|
|
|
2010-04-12 16:19:07 +03:00
|
|
|
void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
|
2010-04-12 16:18:25 +03:00
|
|
|
|
2010-04-12 16:19:07 +03:00
|
|
|
void virtqueue_disable_cb(struct virtqueue *vq);
|
2010-04-12 16:18:25 +03:00
|
|
|
|
2010-04-12 16:19:07 +03:00
|
|
|
bool virtqueue_enable_cb(struct virtqueue *vq);
|
2010-04-12 16:18:25 +03:00
|
|
|
|
2010-04-12 16:19:07 +03:00
|
|
|
void *virtqueue_detach_unused_buf(struct virtqueue *vq);
|
2010-04-12 16:18:25 +03:00
|
|
|
|
2007-10-22 11:03:36 +10:00
|
|
|
/**
|
|
|
|
* virtio_device - representation of a device using virtio
|
|
|
|
* @index: unique position on the virtio bus
|
|
|
|
* @dev: underlying device.
|
|
|
|
* @id: the device type identification (used to match it with a driver).
|
|
|
|
* @config: the configuration ops for this device.
|
2009-06-12 22:16:35 -06:00
|
|
|
* @vqs: the list of virtqueues for this device.
|
2008-05-02 21:50:50 -05:00
|
|
|
* @features: the features supported by both driver and device.
|
2007-10-22 11:03:36 +10:00
|
|
|
* @priv: private pointer for the driver's use.
|
|
|
|
*/
|
2009-06-12 22:16:35 -06:00
|
|
|
struct virtio_device {
|
2007-10-22 11:03:36 +10:00
|
|
|
int index;
|
|
|
|
struct device dev;
|
|
|
|
struct virtio_device_id id;
|
|
|
|
struct virtio_config_ops *config;
|
2009-06-12 22:16:35 -06:00
|
|
|
struct list_head vqs;
|
2008-05-02 21:50:50 -05:00
|
|
|
/* Note that this is a Linux set_bit-style bitmap. */
|
|
|
|
unsigned long features[1];
|
2007-10-22 11:03:36 +10:00
|
|
|
void *priv;
|
|
|
|
};
|
|
|
|
|
2010-03-06 04:44:15 +00:00
|
|
|
#define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
|
2007-10-22 11:03:36 +10:00
|
|
|
int register_virtio_device(struct virtio_device *dev);
|
|
|
|
void unregister_virtio_device(struct virtio_device *dev);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virtio_driver - operations for a virtio I/O driver
|
|
|
|
* @driver: underlying device driver (populate name and owner).
|
|
|
|
* @id_table: the ids serviced by this driver.
|
2008-05-02 21:50:50 -05:00
|
|
|
* @feature_table: an array of feature numbers supported by this device.
|
|
|
|
* @feature_table_size: number of entries in the feature table array.
|
2009-06-12 22:16:33 -06:00
|
|
|
* @probe: the function to call when a device is found. Returns 0 or -errno.
|
2007-10-22 11:03:36 +10:00
|
|
|
* @remove: the function when a device is removed.
|
2008-02-04 23:49:58 -05:00
|
|
|
* @config_changed: optional function to call when the device configuration
|
|
|
|
* changes; may be called in interrupt context.
|
2007-10-22 11:03:36 +10:00
|
|
|
*/
|
|
|
|
struct virtio_driver {
|
|
|
|
struct device_driver driver;
|
|
|
|
const struct virtio_device_id *id_table;
|
2008-05-02 21:50:50 -05:00
|
|
|
const unsigned int *feature_table;
|
|
|
|
unsigned int feature_table_size;
|
2007-10-22 11:03:36 +10:00
|
|
|
int (*probe)(struct virtio_device *dev);
|
|
|
|
void (*remove)(struct virtio_device *dev);
|
2008-02-04 23:49:58 -05:00
|
|
|
void (*config_changed)(struct virtio_device *dev);
|
2007-10-22 11:03:36 +10:00
|
|
|
};
|
|
|
|
|
|
|
|
int register_virtio_driver(struct virtio_driver *drv);
|
|
|
|
void unregister_virtio_driver(struct virtio_driver *drv);
|
|
|
|
#endif /* _LINUX_VIRTIO_H */
|