mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-08 14:23:19 +00:00
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into linux-next
Conflicts: fs/ubifs/super.c Merge the upstream tree in order to resolve a conflict with the per-bdi writeback changes from the linux-2.6-block tree.
This commit is contained in:
commit
7cce2f4cb7
8
CREDITS
8
CREDITS
@ -2006,6 +2006,9 @@ E: paul@laufernet.com
|
||||
D: Soundblaster driver fixes, ISAPnP quirk
|
||||
S: California, USA
|
||||
|
||||
N: Jonathan Layes
|
||||
D: ARPD support
|
||||
|
||||
N: Tom Lees
|
||||
E: tom@lpsg.demon.co.uk
|
||||
W: http://www.lpsg.demon.co.uk/
|
||||
@ -2797,7 +2800,7 @@ D: Starter of Linux1394 effort
|
||||
S: ask per mail for current address
|
||||
|
||||
N: Nicolas Pitre
|
||||
E: nico@cam.org
|
||||
E: nico@fluxnic.net
|
||||
D: StrongARM SA1100 support integrator & hacker
|
||||
D: Xscale PXA architecture
|
||||
D: unified SMC 91C9x/91C11x ethernet driver (smc91x)
|
||||
@ -3802,6 +3805,9 @@ S: van Bronckhorststraat 12
|
||||
S: 2612 XV Delft
|
||||
S: The Netherlands
|
||||
|
||||
N: Thomas Woller
|
||||
D: CS461x Cirrus Logic sound driver
|
||||
|
||||
N: David Woodhouse
|
||||
E: dwmw2@infradead.org
|
||||
D: JFFS2 file system, Memory Technology Device subsystem,
|
||||
|
@ -82,6 +82,8 @@ block/
|
||||
- info on the Block I/O (BIO) layer.
|
||||
blockdev/
|
||||
- info on block devices & drivers
|
||||
btmrvl.txt
|
||||
- info on Marvell Bluetooth driver usage.
|
||||
cachetlb.txt
|
||||
- describes the cache/TLB flushing interfaces Linux uses.
|
||||
cdrom/
|
||||
|
@ -94,28 +94,37 @@ What: /sys/block/<disk>/queue/physical_block_size
|
||||
Date: May 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
This is the smallest unit the storage device can write
|
||||
without resorting to read-modify-write operation. It is
|
||||
usually the same as the logical block size but may be
|
||||
bigger. One example is SATA drives with 4KB sectors
|
||||
that expose a 512-byte logical block size to the
|
||||
operating system.
|
||||
This is the smallest unit a physical storage device can
|
||||
write atomically. It is usually the same as the logical
|
||||
block size but may be bigger. One example is SATA
|
||||
drives with 4KB sectors that expose a 512-byte logical
|
||||
block size to the operating system. For stacked block
|
||||
devices the physical_block_size variable contains the
|
||||
maximum physical_block_size of the component devices.
|
||||
|
||||
What: /sys/block/<disk>/queue/minimum_io_size
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Storage devices may report a preferred minimum I/O size,
|
||||
which is the smallest request the device can perform
|
||||
without incurring a read-modify-write penalty. For disk
|
||||
drives this is often the physical block size. For RAID
|
||||
arrays it is often the stripe chunk size.
|
||||
Storage devices may report a granularity or preferred
|
||||
minimum I/O size which is the smallest request the
|
||||
device can perform without incurring a performance
|
||||
penalty. For disk drives this is often the physical
|
||||
block size. For RAID arrays it is often the stripe
|
||||
chunk size. A properly aligned multiple of
|
||||
minimum_io_size is the preferred request size for
|
||||
workloads where a high number of I/O operations is
|
||||
desired.
|
||||
|
||||
What: /sys/block/<disk>/queue/optimal_io_size
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Storage devices may report an optimal I/O size, which is
|
||||
the device's preferred unit of receiving I/O. This is
|
||||
rarely reported for disk drives. For RAID devices it is
|
||||
usually the stripe width or the internal block size.
|
||||
the device's preferred unit for sustained I/O. This is
|
||||
rarely reported for disk drives. For RAID arrays it is
|
||||
usually the stripe width or the internal track size. A
|
||||
properly aligned multiple of optimal_io_size is the
|
||||
preferred request size for workloads where sustained
|
||||
throughput is desired. If no optimal I/O size is
|
||||
reported this file contains 0.
|
||||
|
@ -84,6 +84,16 @@ Description:
|
||||
from this part of the device tree.
|
||||
Depends on CONFIG_HOTPLUG.
|
||||
|
||||
What: /sys/bus/pci/devices/.../reset
|
||||
Date: July 2009
|
||||
Contact: Michael S. Tsirkin <mst@redhat.com>
|
||||
Description:
|
||||
Some devices allow an individual function to be reset
|
||||
without affecting other functions in the same device.
|
||||
For devices that have this support, a file named reset
|
||||
will be present in sysfs. Writing 1 to this file
|
||||
will perform reset.
|
||||
|
||||
What: /sys/bus/pci/devices/.../vpd
|
||||
Date: February 2008
|
||||
Contact: Ben Hutchings <bhutchings@solarflare.com>
|
||||
|
@ -449,8 +449,8 @@ printk(KERN_INFO "i = %u\n", i);
|
||||
</para>
|
||||
|
||||
<programlisting>
|
||||
__u32 ipaddress;
|
||||
printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress));
|
||||
__be32 ipaddress;
|
||||
printk(KERN_INFO "my ip: %pI4\n", &ipaddress);
|
||||
</programlisting>
|
||||
|
||||
<para>
|
||||
|
@ -25,6 +25,10 @@
|
||||
<year>2006-2008</year>
|
||||
<holder>Hans-Jürgen Koch.</holder>
|
||||
</copyright>
|
||||
<copyright>
|
||||
<year>2009</year>
|
||||
<holder>Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)</holder>
|
||||
</copyright>
|
||||
|
||||
<legalnotice>
|
||||
<para>
|
||||
@ -41,6 +45,13 @@ GPL version 2.
|
||||
</abstract>
|
||||
|
||||
<revhistory>
|
||||
<revision>
|
||||
<revnumber>0.9</revnumber>
|
||||
<date>2009-07-16</date>
|
||||
<authorinitials>mst</authorinitials>
|
||||
<revremark>Added generic pci driver
|
||||
</revremark>
|
||||
</revision>
|
||||
<revision>
|
||||
<revnumber>0.8</revnumber>
|
||||
<date>2008-12-24</date>
|
||||
@ -809,6 +820,158 @@ framework to set up sysfs files for this region. Simply leave it alone.
|
||||
|
||||
</chapter>
|
||||
|
||||
<chapter id="uio_pci_generic" xreflabel="Using Generic driver for PCI cards">
|
||||
<?dbhtml filename="uio_pci_generic.html"?>
|
||||
<title>Generic PCI UIO driver</title>
|
||||
<para>
|
||||
The generic driver is a kernel module named uio_pci_generic.
|
||||
It can work with any device compliant to PCI 2.3 (circa 2002) and
|
||||
any compliant PCI Express device. Using this, you only need to
|
||||
write the userspace driver, removing the need to write
|
||||
a hardware-specific kernel module.
|
||||
</para>
|
||||
|
||||
<sect1 id="uio_pci_generic_binding">
|
||||
<title>Making the driver recognize the device</title>
|
||||
<para>
|
||||
Since the driver does not declare any device ids, it will not get loaded
|
||||
automatically and will not automatically bind to any devices, you must load it
|
||||
and allocate id to the driver yourself. For example:
|
||||
<programlisting>
|
||||
modprobe uio_pci_generic
|
||||
echo "8086 10f5" > /sys/bus/pci/drivers/uio_pci_generic/new_id
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
If there already is a hardware specific kernel driver for your device, the
|
||||
generic driver still won't bind to it, in this case if you want to use the
|
||||
generic driver (why would you?) you'll have to manually unbind the hardware
|
||||
specific driver and bind the generic driver, like this:
|
||||
<programlisting>
|
||||
echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind
|
||||
echo -n 0000:00:19.0 > /sys/bus/pci/drivers/uio_pci_generic/bind
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
You can verify that the device has been bound to the driver
|
||||
by looking for it in sysfs, for example like the following:
|
||||
<programlisting>
|
||||
ls -l /sys/bus/pci/devices/0000:00:19.0/driver
|
||||
</programlisting>
|
||||
Which if successful should print
|
||||
<programlisting>
|
||||
.../0000:00:19.0/driver -> ../../../bus/pci/drivers/uio_pci_generic
|
||||
</programlisting>
|
||||
Note that the generic driver will not bind to old PCI 2.2 devices.
|
||||
If binding the device failed, run the following command:
|
||||
<programlisting>
|
||||
dmesg
|
||||
</programlisting>
|
||||
and look in the output for failure reasons
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="uio_pci_generic_internals">
|
||||
<title>Things to know about uio_pci_generic</title>
|
||||
<para>
|
||||
Interrupts are handled using the Interrupt Disable bit in the PCI command
|
||||
register and Interrupt Status bit in the PCI status register. All devices
|
||||
compliant to PCI 2.3 (circa 2002) and all compliant PCI Express devices should
|
||||
support these bits. uio_pci_generic detects this support, and won't bind to
|
||||
devices which do not support the Interrupt Disable Bit in the command register.
|
||||
</para>
|
||||
<para>
|
||||
On each interrupt, uio_pci_generic sets the Interrupt Disable bit.
|
||||
This prevents the device from generating further interrupts
|
||||
until the bit is cleared. The userspace driver should clear this
|
||||
bit before blocking and waiting for more interrupts.
|
||||
</para>
|
||||
</sect1>
|
||||
<sect1 id="uio_pci_generic_userspace">
|
||||
<title>Writing userspace driver using uio_pci_generic</title>
|
||||
<para>
|
||||
Userspace driver can use pci sysfs interface, or the
|
||||
libpci libray that wraps it, to talk to the device and to
|
||||
re-enable interrupts by writing to the command register.
|
||||
</para>
|
||||
</sect1>
|
||||
<sect1 id="uio_pci_generic_example">
|
||||
<title>Example code using uio_pci_generic</title>
|
||||
<para>
|
||||
Here is some sample userspace driver code using uio_pci_generic:
|
||||
<programlisting>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
int uiofd;
|
||||
int configfd;
|
||||
int err;
|
||||
int i;
|
||||
unsigned icount;
|
||||
unsigned char command_high;
|
||||
|
||||
uiofd = open("/dev/uio0", O_RDONLY);
|
||||
if (uiofd < 0) {
|
||||
perror("uio open:");
|
||||
return errno;
|
||||
}
|
||||
configfd = open("/sys/class/uio/uio0/device/config", O_RDWR);
|
||||
if (uiofd < 0) {
|
||||
perror("config open:");
|
||||
return errno;
|
||||
}
|
||||
|
||||
/* Read and cache command value */
|
||||
err = pread(configfd, &command_high, 1, 5);
|
||||
if (err != 1) {
|
||||
perror("command config read:");
|
||||
return errno;
|
||||
}
|
||||
command_high &= ~0x4;
|
||||
|
||||
for(i = 0;; ++i) {
|
||||
/* Print out a message, for debugging. */
|
||||
if (i == 0)
|
||||
fprintf(stderr, "Started uio test driver.\n");
|
||||
else
|
||||
fprintf(stderr, "Interrupts: %d\n", icount);
|
||||
|
||||
/****************************************/
|
||||
/* Here we got an interrupt from the
|
||||
device. Do something to it. */
|
||||
/****************************************/
|
||||
|
||||
/* Re-enable interrupts. */
|
||||
err = pwrite(configfd, &command_high, 1, 5);
|
||||
if (err != 1) {
|
||||
perror("config write:");
|
||||
break;
|
||||
}
|
||||
|
||||
/* Wait for next interrupt. */
|
||||
err = read(uiofd, &icount, 4);
|
||||
if (err != 4) {
|
||||
perror("uio read:");
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
return errno;
|
||||
}
|
||||
|
||||
</programlisting>
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
</chapter>
|
||||
|
||||
<appendix id="app1">
|
||||
<title>Further information</title>
|
||||
<itemizedlist>
|
||||
|
@ -4,15 +4,17 @@
|
||||
February 2, 2006
|
||||
|
||||
Current document maintainer:
|
||||
Linas Vepstas <linas@austin.ibm.com>
|
||||
Linas Vepstas <linasvepstas@gmail.com>
|
||||
updated by Richard Lary <rlary@us.ibm.com>
|
||||
and Mike Mason <mmlnx@us.ibm.com> on 27-Jul-2009
|
||||
|
||||
|
||||
Many PCI bus controllers are able to detect a variety of hardware
|
||||
PCI errors on the bus, such as parity errors on the data and address
|
||||
busses, as well as SERR and PERR errors. Some of the more advanced
|
||||
chipsets are able to deal with these errors; these include PCI-E chipsets,
|
||||
and the PCI-host bridges found on IBM Power4 and Power5-based pSeries
|
||||
boxes. A typical action taken is to disconnect the affected device,
|
||||
and the PCI-host bridges found on IBM Power4, Power5 and Power6-based
|
||||
pSeries boxes. A typical action taken is to disconnect the affected device,
|
||||
halting all I/O to it. The goal of a disconnection is to avoid system
|
||||
corruption; for example, to halt system memory corruption due to DMA's
|
||||
to "wild" addresses. Typically, a reconnection mechanism is also
|
||||
@ -37,10 +39,11 @@ is forced by the need to handle multi-function devices, that is,
|
||||
devices that have multiple device drivers associated with them.
|
||||
In the first stage, each driver is allowed to indicate what type
|
||||
of reset it desires, the choices being a simple re-enabling of I/O
|
||||
or requesting a hard reset (a full electrical #RST of the PCI card).
|
||||
If any driver requests a full reset, that is what will be done.
|
||||
or requesting a slot reset.
|
||||
|
||||
After a full reset and/or a re-enabling of I/O, all drivers are
|
||||
If any driver requests a slot reset, that is what will be done.
|
||||
|
||||
After a reset and/or a re-enabling of I/O, all drivers are
|
||||
again notified, so that they may then perform any device setup/config
|
||||
that may be required. After these have all completed, a final
|
||||
"resume normal operations" event is sent out.
|
||||
@ -101,7 +104,7 @@ if it implements any, it must implement error_detected(). If a callback
|
||||
is not implemented, the corresponding feature is considered unsupported.
|
||||
For example, if mmio_enabled() and resume() aren't there, then it
|
||||
is assumed that the driver is not doing any direct recovery and requires
|
||||
a reset. If link_reset() is not implemented, the card is assumed as
|
||||
a slot reset. If link_reset() is not implemented, the card is assumed to
|
||||
not care about link resets. Typically a driver will want to know about
|
||||
a slot_reset().
|
||||
|
||||
@ -111,7 +114,7 @@ sequence described below.
|
||||
|
||||
STEP 0: Error Event
|
||||
-------------------
|
||||
PCI bus error is detect by the PCI hardware. On powerpc, the slot
|
||||
A PCI bus error is detected by the PCI hardware. On powerpc, the slot
|
||||
is isolated, in that all I/O is blocked: all reads return 0xffffffff,
|
||||
all writes are ignored.
|
||||
|
||||
@ -139,7 +142,7 @@ The driver must return one of the following result codes:
|
||||
a chance to extract some diagnostic information (see
|
||||
mmio_enable, below).
|
||||
- PCI_ERS_RESULT_NEED_RESET:
|
||||
Driver returns this if it can't recover without a hard
|
||||
Driver returns this if it can't recover without a
|
||||
slot reset.
|
||||
- PCI_ERS_RESULT_DISCONNECT:
|
||||
Driver returns this if it doesn't want to recover at all.
|
||||
@ -169,11 +172,11 @@ is STEP 6 (Permanent Failure).
|
||||
|
||||
>>> The current powerpc implementation doesn't much care if the device
|
||||
>>> attempts I/O at this point, or not. I/O's will fail, returning
|
||||
>>> a value of 0xff on read, and writes will be dropped. If the device
|
||||
>>> driver attempts more than 10K I/O's to a frozen adapter, it will
|
||||
>>> assume that the device driver has gone into an infinite loop, and
|
||||
>>> it will panic the kernel. There doesn't seem to be any other
|
||||
>>> way of stopping a device driver that insists on spinning on I/O.
|
||||
>>> a value of 0xff on read, and writes will be dropped. If more than
|
||||
>>> EEH_MAX_FAILS I/O's are attempted to a frozen adapter, EEH
|
||||
>>> assumes that the device driver has gone into an infinite loop
|
||||
>>> and prints an error to syslog. A reboot is then required to
|
||||
>>> get the device working again.
|
||||
|
||||
STEP 2: MMIO Enabled
|
||||
-------------------
|
||||
@ -182,15 +185,14 @@ DMA), and then calls the mmio_enabled() callback on all affected
|
||||
device drivers.
|
||||
|
||||
This is the "early recovery" call. IOs are allowed again, but DMA is
|
||||
not (hrm... to be discussed, I prefer not), with some restrictions. This
|
||||
is NOT a callback for the driver to start operations again, only to
|
||||
peek/poke at the device, extract diagnostic information, if any, and
|
||||
eventually do things like trigger a device local reset or some such,
|
||||
but not restart operations. This is callback is made if all drivers on
|
||||
a segment agree that they can try to recover and if no automatic link reset
|
||||
was performed by the HW. If the platform can't just re-enable IOs without
|
||||
a slot reset or a link reset, it wont call this callback, and instead
|
||||
will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset)
|
||||
not, with some restrictions. This is NOT a callback for the driver to
|
||||
start operations again, only to peek/poke at the device, extract diagnostic
|
||||
information, if any, and eventually do things like trigger a device local
|
||||
reset or some such, but not restart operations. This callback is made if
|
||||
all drivers on a segment agree that they can try to recover and if no automatic
|
||||
link reset was performed by the HW. If the platform can't just re-enable IOs
|
||||
without a slot reset or a link reset, it will not call this callback, and
|
||||
instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset)
|
||||
|
||||
>>> The following is proposed; no platform implements this yet:
|
||||
>>> Proposal: All I/O's should be done _synchronously_ from within
|
||||
@ -228,9 +230,6 @@ proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations).
|
||||
If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
|
||||
proceeds to STEP 4 (Slot Reset)
|
||||
|
||||
>>> The current powerpc implementation does not implement this callback.
|
||||
|
||||
|
||||
STEP 3: Link Reset
|
||||
------------------
|
||||
The platform resets the link, and then calls the link_reset() callback
|
||||
@ -253,16 +252,33 @@ The platform then proceeds to either STEP 4 (Slot Reset) or STEP 5
|
||||
|
||||
>>> The current powerpc implementation does not implement this callback.
|
||||
|
||||
|
||||
STEP 4: Slot Reset
|
||||
------------------
|
||||
The platform performs a soft or hard reset of the device, and then
|
||||
calls the slot_reset() callback.
|
||||
|
||||
A soft reset consists of asserting the adapter #RST line and then
|
||||
In response to a return value of PCI_ERS_RESULT_NEED_RESET, the
|
||||
the platform will peform a slot reset on the requesting PCI device(s).
|
||||
The actual steps taken by a platform to perform a slot reset
|
||||
will be platform-dependent. Upon completion of slot reset, the
|
||||
platform will call the device slot_reset() callback.
|
||||
|
||||
Powerpc platforms implement two levels of slot reset:
|
||||
soft reset(default) and fundamental(optional) reset.
|
||||
|
||||
Powerpc soft reset consists of asserting the adapter #RST line and then
|
||||
restoring the PCI BAR's and PCI configuration header to a state
|
||||
that is equivalent to what it would be after a fresh system
|
||||
power-on followed by power-on BIOS/system firmware initialization.
|
||||
Soft reset is also known as hot-reset.
|
||||
|
||||
Powerpc fundamental reset is supported by PCI Express cards only
|
||||
and results in device's state machines, hardware logic, port states and
|
||||
configuration registers to initialize to their default conditions.
|
||||
|
||||
For most PCI devices, a soft reset will be sufficient for recovery.
|
||||
Optional fundamental reset is provided to support a limited number
|
||||
of PCI Express PCI devices for which a soft reset is not sufficient
|
||||
for recovery.
|
||||
|
||||
If the platform supports PCI hotplug, then the reset might be
|
||||
performed by toggling the slot electrical power off/on.
|
||||
|
||||
@ -274,10 +290,12 @@ may result in hung devices, kernel panics, or silent data corruption.
|
||||
|
||||
This call gives drivers the chance to re-initialize the hardware
|
||||
(re-download firmware, etc.). At this point, the driver may assume
|
||||
that he card is in a fresh state and is fully functional. In
|
||||
particular, interrupt generation should work normally.
|
||||
that the card is in a fresh state and is fully functional. The slot
|
||||
is unfrozen and the driver has full access to PCI config space,
|
||||
memory mapped I/O space and DMA. Interrupts (Legacy, MSI, or MSI-X)
|
||||
will also be available.
|
||||
|
||||
Drivers should not yet restart normal I/O processing operations
|
||||
Drivers should not restart normal I/O processing operations
|
||||
at this point. If all device drivers report success on this
|
||||
callback, the platform will call resume() to complete the sequence,
|
||||
and let the driver restart normal I/O processing.
|
||||
@ -302,11 +320,21 @@ driver performs device init only from PCI function 0:
|
||||
- PCI_ERS_RESULT_DISCONNECT
|
||||
Same as above.
|
||||
|
||||
Drivers for PCI Express cards that require a fundamental reset must
|
||||
set the needs_freset bit in the pci_dev structure in their probe function.
|
||||
For example, the QLogic qla2xxx driver sets the needs_freset bit for certain
|
||||
PCI card types:
|
||||
|
||||
+ /* Set EEH reset type to fundamental if required by hba */
|
||||
+ if (IS_QLA24XX(ha) || IS_QLA25XX(ha) || IS_QLA81XX(ha))
|
||||
+ pdev->needs_freset = 1;
|
||||
+
|
||||
|
||||
Platform proceeds either to STEP 5 (Resume Operations) or STEP 6 (Permanent
|
||||
Failure).
|
||||
|
||||
>>> The current powerpc implementation does not currently try a
|
||||
>>> power-cycle reset if the driver returned PCI_ERS_RESULT_DISCONNECT.
|
||||
>>> The current powerpc implementation does not try a power-cycle
|
||||
>>> reset if the driver returned PCI_ERS_RESULT_DISCONNECT.
|
||||
>>> However, it probably should.
|
||||
|
||||
|
||||
@ -348,7 +376,7 @@ software errors.
|
||||
|
||||
Conclusion; General Remarks
|
||||
---------------------------
|
||||
The way those callbacks are called is platform policy. A platform with
|
||||
The way the callbacks are called is platform policy. A platform with
|
||||
no slot reset capability may want to just "ignore" drivers that can't
|
||||
recover (disconnect them) and try to let other cards on the same segment
|
||||
recover. Keep in mind that in most real life cases, though, there will
|
||||
@ -361,8 +389,8 @@ That is, the recovery API only requires that:
|
||||
|
||||
- There is no guarantee that interrupt delivery can proceed from any
|
||||
device on the segment starting from the error detection and until the
|
||||
resume callback is sent, at which point interrupts are expected to be
|
||||
fully operational.
|
||||
slot_reset callback is called, at which point interrupts are expected
|
||||
to be fully operational.
|
||||
|
||||
- There is no guarantee that interrupt delivery is stopped, that is,
|
||||
a driver that gets an interrupt after detecting an error, or that detects
|
||||
@ -381,16 +409,23 @@ anyway :)
|
||||
>>> Implementation details for the powerpc platform are discussed in
|
||||
>>> the file Documentation/powerpc/eeh-pci-error-recovery.txt
|
||||
|
||||
>>> As of this writing, there are six device drivers with patches
|
||||
>>> implementing error recovery. Not all of these patches are in
|
||||
>>> As of this writing, there is a growing list of device drivers with
|
||||
>>> patches implementing error recovery. Not all of these patches are in
|
||||
>>> mainline yet. These may be used as "examples":
|
||||
>>>
|
||||
>>> drivers/scsi/ipr.c
|
||||
>>> drivers/scsi/sym53cxx_2
|
||||
>>> drivers/scsi/ipr
|
||||
>>> drivers/scsi/sym53c8xx_2
|
||||
>>> drivers/scsi/qla2xxx
|
||||
>>> drivers/scsi/lpfc
|
||||
>>> drivers/next/bnx2.c
|
||||
>>> drivers/next/e100.c
|
||||
>>> drivers/net/e1000
|
||||
>>> drivers/net/e1000e
|
||||
>>> drivers/net/ixgb
|
||||
>>> drivers/net/ixgbe
|
||||
>>> drivers/net/cxgb3
|
||||
>>> drivers/net/s2io.c
|
||||
>>> drivers/net/qlge
|
||||
|
||||
The End
|
||||
-------
|
||||
|
@ -743,3 +743,80 @@ Revised:
|
||||
RCU, realtime RCU, sleepable RCU, performance.
|
||||
"
|
||||
}
|
||||
|
||||
@article{PaulEMcKenney2008RCUOSR
|
||||
,author="Paul E. McKenney and Jonathan Walpole"
|
||||
,title="Introducing technology into the {Linux} kernel: a case study"
|
||||
,Year="2008"
|
||||
,journal="SIGOPS Oper. Syst. Rev."
|
||||
,volume="42"
|
||||
,number="5"
|
||||
,pages="4--17"
|
||||
,issn="0163-5980"
|
||||
,doi={http://doi.acm.org/10.1145/1400097.1400099}
|
||||
,publisher="ACM"
|
||||
,address="New York, NY, USA"
|
||||
,annotation={
|
||||
Linux changed RCU to a far greater degree than RCU has changed Linux.
|
||||
}
|
||||
}
|
||||
|
||||
@unpublished{PaulEMcKenney2008HierarchicalRCU
|
||||
,Author="Paul E. McKenney"
|
||||
,Title="Hierarchical {RCU}"
|
||||
,month="November"
|
||||
,day="3"
|
||||
,year="2008"
|
||||
,note="Available:
|
||||
\url{http://lwn.net/Articles/305782/}
|
||||
[Viewed November 6, 2008]"
|
||||
,annotation="
|
||||
RCU with combining-tree-based grace-period detection,
|
||||
permitting it to handle thousands of CPUs.
|
||||
"
|
||||
}
|
||||
|
||||
@conference{PaulEMcKenney2009MaliciousURCU
|
||||
,Author="Paul E. McKenney"
|
||||
,Title="Using a Malicious User-Level {RCU} to Torture {RCU}-Based Algorithms"
|
||||
,Booktitle="linux.conf.au 2009"
|
||||
,month="January"
|
||||
,year="2009"
|
||||
,address="Hobart, Australia"
|
||||
,note="Available:
|
||||
\url{http://www.rdrop.com/users/paulmck/RCU/urcutorture.2009.01.22a.pdf}
|
||||
[Viewed February 2, 2009]"
|
||||
,annotation="
|
||||
Realtime RCU and torture-testing RCU uses.
|
||||
"
|
||||
}
|
||||
|
||||
@unpublished{MathieuDesnoyers2009URCU
|
||||
,Author="Mathieu Desnoyers"
|
||||
,Title="[{RFC} git tree] Userspace {RCU} (urcu) for {Linux}"
|
||||
,month="February"
|
||||
,day="5"
|
||||
,year="2009"
|
||||
,note="Available:
|
||||
\url{http://lkml.org/lkml/2009/2/5/572}
|
||||
\url{git://lttng.org/userspace-rcu.git}
|
||||
[Viewed February 20, 2009]"
|
||||
,annotation="
|
||||
Mathieu Desnoyers's user-space RCU implementation.
|
||||
git://lttng.org/userspace-rcu.git
|
||||
"
|
||||
}
|
||||
|
||||
@unpublished{PaulEMcKenney2009BloatWatchRCU
|
||||
,Author="Paul E. McKenney"
|
||||
,Title="{RCU}: The {Bloatwatch} Edition"
|
||||
,month="March"
|
||||
,day="17"
|
||||
,year="2009"
|
||||
,note="Available:
|
||||
\url{http://lwn.net/Articles/323929/}
|
||||
[Viewed March 20, 2009]"
|
||||
,annotation="
|
||||
Uniprocessor assumptions allow simplified RCU implementation.
|
||||
"
|
||||
}
|
||||
|
@ -2,14 +2,13 @@ RCU on Uniprocessor Systems
|
||||
|
||||
|
||||
A common misconception is that, on UP systems, the call_rcu() primitive
|
||||
may immediately invoke its function, and that the synchronize_rcu()
|
||||
primitive may return immediately. The basis of this misconception
|
||||
may immediately invoke its function. The basis of this misconception
|
||||
is that since there is only one CPU, it should not be necessary to
|
||||
wait for anything else to get done, since there are no other CPUs for
|
||||
anything else to be happening on. Although this approach will -sort- -of-
|
||||
work a surprising amount of the time, it is a very bad idea in general.
|
||||
This document presents three examples that demonstrate exactly how bad an
|
||||
idea this is.
|
||||
This document presents three examples that demonstrate exactly how bad
|
||||
an idea this is.
|
||||
|
||||
|
||||
Example 1: softirq Suicide
|
||||
@ -82,11 +81,18 @@ Quick Quiz #2: What locking restriction must RCU callbacks respect?
|
||||
|
||||
Summary
|
||||
|
||||
Permitting call_rcu() to immediately invoke its arguments or permitting
|
||||
synchronize_rcu() to immediately return breaks RCU, even on a UP system.
|
||||
So do not do it! Even on a UP system, the RCU infrastructure -must-
|
||||
respect grace periods, and -must- invoke callbacks from a known environment
|
||||
in which no locks are held.
|
||||
Permitting call_rcu() to immediately invoke its arguments breaks RCU,
|
||||
even on a UP system. So do not do it! Even on a UP system, the RCU
|
||||
infrastructure -must- respect grace periods, and -must- invoke callbacks
|
||||
from a known environment in which no locks are held.
|
||||
|
||||
It -is- safe for synchronize_sched() and synchronize_rcu_bh() to return
|
||||
immediately on an UP system. It is also safe for synchronize_rcu()
|
||||
to return immediately on UP systems, except when running preemptable
|
||||
RCU.
|
||||
|
||||
Quick Quiz #3: Why can't synchronize_rcu() return immediately on
|
||||
UP systems running preemptable RCU?
|
||||
|
||||
|
||||
Answer to Quick Quiz #1:
|
||||
@ -117,3 +123,13 @@ Answer to Quick Quiz #2:
|
||||
callbacks acquire locks directly. However, a great many RCU
|
||||
callbacks do acquire locks -indirectly-, for example, via
|
||||
the kfree() primitive.
|
||||
|
||||
Answer to Quick Quiz #3:
|
||||
Why can't synchronize_rcu() return immediately on UP systems
|
||||
running preemptable RCU?
|
||||
|
||||
Because some other task might have been preempted in the middle
|
||||
of an RCU read-side critical section. If synchronize_rcu()
|
||||
simply immediately returned, it would prematurely signal the
|
||||
end of the grace period, which would come as a nasty shock to
|
||||
that other thread when it started running again.
|
||||
|
@ -11,7 +11,10 @@ over a rather long period of time, but improvements are always welcome!
|
||||
structure is updated more than about 10% of the time, then
|
||||
you should strongly consider some other approach, unless
|
||||
detailed performance measurements show that RCU is nonetheless
|
||||
the right tool for the job.
|
||||
the right tool for the job. Yes, you might think of RCU
|
||||
as simply cutting overhead off of the readers and imposing it
|
||||
on the writers. That is exactly why normal uses of RCU will
|
||||
do much more reading than updating.
|
||||
|
||||
Another exception is where performance is not an issue, and RCU
|
||||
provides a simpler implementation. An example of this situation
|
||||
@ -240,10 +243,11 @@ over a rather long period of time, but improvements are always welcome!
|
||||
instead need to use synchronize_irq() or synchronize_sched().
|
||||
|
||||
12. Any lock acquired by an RCU callback must be acquired elsewhere
|
||||
with irq disabled, e.g., via spin_lock_irqsave(). Failing to
|
||||
disable irq on a given acquisition of that lock will result in
|
||||
deadlock as soon as the RCU callback happens to interrupt that
|
||||
acquisition's critical section.
|
||||
with softirq disabled, e.g., via spin_lock_irqsave(),
|
||||
spin_lock_bh(), etc. Failing to disable irq on a given
|
||||
acquisition of that lock will result in deadlock as soon as the
|
||||
RCU callback happens to interrupt that acquisition's critical
|
||||
section.
|
||||
|
||||
13. RCU callbacks can be and are executed in parallel. In many cases,
|
||||
the callback code simply wrappers around kfree(), so that this
|
||||
@ -310,3 +314,9 @@ over a rather long period of time, but improvements are always welcome!
|
||||
Because these primitives only wait for pre-existing readers,
|
||||
it is the caller's responsibility to guarantee safety to
|
||||
any subsequent readers.
|
||||
|
||||
16. The various RCU read-side primitives do -not- contain memory
|
||||
barriers. The CPU (and in some cases, the compiler) is free
|
||||
to reorder code into and out of RCU read-side critical sections.
|
||||
It is the responsibility of the RCU update-side primitives to
|
||||
deal with this.
|
||||
|
@ -36,7 +36,7 @@ o How can the updater tell when a grace period has completed
|
||||
executed in user mode, or executed in the idle loop, we can
|
||||
safely free up that item.
|
||||
|
||||
Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
|
||||
Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the
|
||||
same effect, but require that the readers manipulate CPU-local
|
||||
counters. These counters allow limited types of blocking
|
||||
within RCU read-side critical sections. SRCU also uses
|
||||
@ -79,10 +79,10 @@ o I hear that RCU is patented? What is with that?
|
||||
o I hear that RCU needs work in order to support realtime kernels?
|
||||
|
||||
This work is largely completed. Realtime-friendly RCU can be
|
||||
enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter.
|
||||
However, work is in progress for enabling priority boosting of
|
||||
preempted RCU read-side critical sections. This is needed if you
|
||||
have CPU-bound realtime threads.
|
||||
enabled via the CONFIG_TREE_PREEMPT_RCU kernel configuration
|
||||
parameter. However, work is in progress for enabling priority
|
||||
boosting of preempted RCU read-side critical sections. This is
|
||||
needed if you have CPU-bound realtime threads.
|
||||
|
||||
o Where can I find more information on RCU?
|
||||
|
||||
|
@ -170,6 +170,13 @@ module invokes call_rcu() from timers, you will need to first cancel all
|
||||
the timers, and only then invoke rcu_barrier() to wait for any remaining
|
||||
RCU callbacks to complete.
|
||||
|
||||
Of course, if you module uses call_rcu_bh(), you will need to invoke
|
||||
rcu_barrier_bh() before unloading. Similarly, if your module uses
|
||||
call_rcu_sched(), you will need to invoke rcu_barrier_sched() before
|
||||
unloading. If your module uses call_rcu(), call_rcu_bh(), -and-
|
||||
call_rcu_sched(), then you will need to invoke each of rcu_barrier(),
|
||||
rcu_barrier_bh(), and rcu_barrier_sched().
|
||||
|
||||
|
||||
Implementing rcu_barrier()
|
||||
|
||||
|
@ -76,8 +76,10 @@ torture_type The type of RCU to test: "rcu" for the rcu_read_lock() API,
|
||||
"rcu_sync" for rcu_read_lock() with synchronous reclamation,
|
||||
"rcu_bh" for the rcu_read_lock_bh() API, "rcu_bh_sync" for
|
||||
rcu_read_lock_bh() with synchronous reclamation, "srcu" for
|
||||
the "srcu_read_lock()" API, and "sched" for the use of
|
||||
preempt_disable() together with synchronize_sched().
|
||||
the "srcu_read_lock()" API, "sched" for the use of
|
||||
preempt_disable() together with synchronize_sched(),
|
||||
and "sched_expedited" for the use of preempt_disable()
|
||||
with synchronize_sched_expedited().
|
||||
|
||||
verbose Enable debug printk()s. Default is disabled.
|
||||
|
||||
@ -162,6 +164,23 @@ of the "old" and "current" counters for the corresponding CPU. The
|
||||
"idx" value maps the "old" and "current" values to the underlying array,
|
||||
and is useful for debugging.
|
||||
|
||||
Similarly, sched_expedited RCU provides the following:
|
||||
|
||||
sched_expedited-torture: rtc: d0000000016c1880 ver: 1090796 tfle: 0 rta: 1090796 rtaf: 0 rtf: 1090787 rtmbe: 0 nt: 27713319
|
||||
sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0
|
||||
sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0
|
||||
sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0
|
||||
state: -1 / 0:0 3:0 4:0
|
||||
|
||||
As before, the first four lines are similar to those for RCU.
|
||||
The last line shows the task-migration state. The first number is
|
||||
-1 if synchronize_sched_expedited() is idle, -2 if in the process of
|
||||
posting wakeups to the migration kthreads, and N when waiting on CPU N.
|
||||
Each of the colon-separated fields following the "/" is a CPU:state pair.
|
||||
Valid states are "0" for idle, "1" for waiting for quiescent state,
|
||||
"2" for passed through quiescent state, and "3" when a race with a
|
||||
CPU-hotplug event forces use of the synchronize_sched() primitive.
|
||||
|
||||
|
||||
USAGE
|
||||
|
||||
|
@ -191,8 +191,7 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy).
|
||||
|
||||
The output of "cat rcu/rcudata" looks as follows:
|
||||
|
||||
rcu:
|
||||
rcu:
|
||||
rcu_sched:
|
||||
0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
|
||||
1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
|
||||
2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
|
||||
@ -306,7 +305,7 @@ comma-separated-variable spreadsheet format.
|
||||
|
||||
The output of "cat rcu/rcugp" looks as follows:
|
||||
|
||||
rcu: completed=33062 gpnum=33063
|
||||
rcu_sched: completed=33062 gpnum=33063
|
||||
rcu_bh: completed=464 gpnum=464
|
||||
|
||||
Again, this output is for both "rcu" and "rcu_bh". The fields are
|
||||
@ -413,7 +412,7 @@ o Each element of the form "1/1 0:127 ^0" represents one struct
|
||||
|
||||
The output of "cat rcu/rcu_pending" looks as follows:
|
||||
|
||||
rcu:
|
||||
rcu_sched:
|
||||
0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741
|
||||
1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792
|
||||
2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629
|
||||
|
@ -136,10 +136,10 @@ rcu_read_lock()
|
||||
Used by a reader to inform the reclaimer that the reader is
|
||||
entering an RCU read-side critical section. It is illegal
|
||||
to block while in an RCU read-side critical section, though
|
||||
kernels built with CONFIG_PREEMPT_RCU can preempt RCU read-side
|
||||
critical sections. Any RCU-protected data structure accessed
|
||||
during an RCU read-side critical section is guaranteed to remain
|
||||
unreclaimed for the full duration of that critical section.
|
||||
kernels built with CONFIG_TREE_PREEMPT_RCU can preempt RCU
|
||||
read-side critical sections. Any RCU-protected data structure
|
||||
accessed during an RCU read-side critical section is guaranteed to
|
||||
remain unreclaimed for the full duration of that critical section.
|
||||
Reference counts may be used in conjunction with RCU to maintain
|
||||
longer-term references to data structures.
|
||||
|
||||
@ -785,6 +785,7 @@ RCU pointer/list traversal:
|
||||
rcu_dereference
|
||||
list_for_each_entry_rcu
|
||||
hlist_for_each_entry_rcu
|
||||
hlist_nulls_for_each_entry_rcu
|
||||
|
||||
list_for_each_continue_rcu (to be deprecated in favor of new
|
||||
list_for_each_entry_continue_rcu)
|
||||
@ -807,19 +808,23 @@ RCU: Critical sections Grace period Barrier
|
||||
|
||||
rcu_read_lock synchronize_net rcu_barrier
|
||||
rcu_read_unlock synchronize_rcu
|
||||
synchronize_rcu_expedited
|
||||
call_rcu
|
||||
|
||||
|
||||
bh: Critical sections Grace period Barrier
|
||||
|
||||
rcu_read_lock_bh call_rcu_bh rcu_barrier_bh
|
||||
rcu_read_unlock_bh
|
||||
rcu_read_unlock_bh synchronize_rcu_bh
|
||||
synchronize_rcu_bh_expedited
|
||||
|
||||
|
||||
sched: Critical sections Grace period Barrier
|
||||
|
||||
[preempt_disable] synchronize_sched rcu_barrier_sched
|
||||
[and friends] call_rcu_sched
|
||||
rcu_read_lock_sched synchronize_sched rcu_barrier_sched
|
||||
rcu_read_unlock_sched call_rcu_sched
|
||||
[preempt_disable] synchronize_sched_expedited
|
||||
[and friends]
|
||||
|
||||
|
||||
SRCU: Critical sections Grace period Barrier
|
||||
@ -827,6 +832,9 @@ SRCU: Critical sections Grace period Barrier
|
||||
srcu_read_lock synchronize_srcu N/A
|
||||
srcu_read_unlock
|
||||
|
||||
SRCU: Initialization/cleanup
|
||||
init_srcu_struct
|
||||
cleanup_srcu_struct
|
||||
|
||||
See the comment headers in the source code (or the docbook generated
|
||||
from them) for more information.
|
||||
|
129
Documentation/arm/OMAP/omap_pm
Normal file
129
Documentation/arm/OMAP/omap_pm
Normal file
@ -0,0 +1,129 @@
|
||||
|
||||
The OMAP PM interface
|
||||
=====================
|
||||
|
||||
This document describes the temporary OMAP PM interface. Driver
|
||||
authors use these functions to communicate minimum latency or
|
||||
throughput constraints to the kernel power management code.
|
||||
Over time, the intention is to merge features from the OMAP PM
|
||||
interface into the Linux PM QoS code.
|
||||
|
||||
Drivers need to express PM parameters which:
|
||||
|
||||
- support the range of power management parameters present in the TI SRF;
|
||||
|
||||
- separate the drivers from the underlying PM parameter
|
||||
implementation, whether it is the TI SRF or Linux PM QoS or Linux
|
||||
latency framework or something else;
|
||||
|
||||
- specify PM parameters in terms of fundamental units, such as
|
||||
latency and throughput, rather than units which are specific to OMAP
|
||||
or to particular OMAP variants;
|
||||
|
||||
- allow drivers which are shared with other architectures (e.g.,
|
||||
DaVinci) to add these constraints in a way which won't affect non-OMAP
|
||||
systems,
|
||||
|
||||
- can be implemented immediately with minimal disruption of other
|
||||
architectures.
|
||||
|
||||
|
||||
This document proposes the OMAP PM interface, including the following
|
||||
five power management functions for driver code:
|
||||
|
||||
1. Set the maximum MPU wakeup latency:
|
||||
(*pdata->set_max_mpu_wakeup_lat)(struct device *dev, unsigned long t)
|
||||
|
||||
2. Set the maximum device wakeup latency:
|
||||
(*pdata->set_max_dev_wakeup_lat)(struct device *dev, unsigned long t)
|
||||
|
||||
3. Set the maximum system DMA transfer start latency (CORE pwrdm):
|
||||
(*pdata->set_max_sdma_lat)(struct device *dev, long t)
|
||||
|
||||
4. Set the minimum bus throughput needed by a device:
|
||||
(*pdata->set_min_bus_tput)(struct device *dev, u8 agent_id, unsigned long r)
|
||||
|
||||
5. Return the number of times the device has lost context
|
||||
(*pdata->get_dev_context_loss_count)(struct device *dev)
|
||||
|
||||
|
||||
Further documentation for all OMAP PM interface functions can be
|
||||
found in arch/arm/plat-omap/include/mach/omap-pm.h.
|
||||
|
||||
|
||||
The OMAP PM layer is intended to be temporary
|
||||
---------------------------------------------
|
||||
|
||||
The intention is that eventually the Linux PM QoS layer should support
|
||||
the range of power management features present in OMAP3. As this
|
||||
happens, existing drivers using the OMAP PM interface can be modified
|
||||
to use the Linux PM QoS code; and the OMAP PM interface can disappear.
|
||||
|
||||
|
||||
Driver usage of the OMAP PM functions
|
||||
-------------------------------------
|
||||
|
||||
As the 'pdata' in the above examples indicates, these functions are
|
||||
exposed to drivers through function pointers in driver .platform_data
|
||||
structures. The function pointers are initialized by the board-*.c
|
||||
files to point to the corresponding OMAP PM functions:
|
||||
.set_max_dev_wakeup_lat will point to
|
||||
omap_pm_set_max_dev_wakeup_lat(), etc. Other architectures which do
|
||||
not support these functions should leave these function pointers set
|
||||
to NULL. Drivers should use the following idiom:
|
||||
|
||||
if (pdata->set_max_dev_wakeup_lat)
|
||||
(*pdata->set_max_dev_wakeup_lat)(dev, t);
|
||||
|
||||
The most common usage of these functions will probably be to specify
|
||||
the maximum time from when an interrupt occurs, to when the device
|
||||
becomes accessible. To accomplish this, driver writers should use the
|
||||
set_max_mpu_wakeup_lat() function to to constrain the MPU wakeup
|
||||
latency, and the set_max_dev_wakeup_lat() function to constrain the
|
||||
device wakeup latency (from clk_enable() to accessibility). For
|
||||
example,
|
||||
|
||||
/* Limit MPU wakeup latency */
|
||||
if (pdata->set_max_mpu_wakeup_lat)
|
||||
(*pdata->set_max_mpu_wakeup_lat)(dev, tc);
|
||||
|
||||
/* Limit device powerdomain wakeup latency */
|
||||
if (pdata->set_max_dev_wakeup_lat)
|
||||
(*pdata->set_max_dev_wakeup_lat)(dev, td);
|
||||
|
||||
/* total wakeup latency in this example: (tc + td) */
|
||||
|
||||
The PM parameters can be overwritten by calling the function again
|
||||
with the new value. The settings can be removed by calling the
|
||||
function with a t argument of -1 (except in the case of
|
||||
set_max_bus_tput(), which should be called with an r argument of 0).
|
||||
|
||||
The fifth function above, omap_pm_get_dev_context_loss_count(),
|
||||
is intended as an optimization to allow drivers to determine whether the
|
||||
device has lost its internal context. If context has been lost, the
|
||||
driver must restore its internal context before proceeding.
|
||||
|
||||
|
||||
Other specialized interface functions
|
||||
-------------------------------------
|
||||
|
||||
The five functions listed above are intended to be usable by any
|
||||
device driver. DSPBridge and CPUFreq have a few special requirements.
|
||||
DSPBridge expresses target DSP performance levels in terms of OPP IDs.
|
||||
CPUFreq expresses target MPU performance levels in terms of MPU
|
||||
frequency. The OMAP PM interface contains functions for these
|
||||
specialized cases to convert that input information (OPPs/MPU
|
||||
frequency) into the form that the underlying power management
|
||||
implementation needs:
|
||||
|
||||
6. (*pdata->dsp_get_opp_table)(void)
|
||||
|
||||
7. (*pdata->dsp_set_min_opp)(u8 opp_id)
|
||||
|
||||
8. (*pdata->dsp_get_opp)(void)
|
||||
|
||||
9. (*pdata->cpu_get_freq_table)(void)
|
||||
|
||||
10. (*pdata->cpu_set_freq)(unsigned long f)
|
||||
|
||||
11. (*pdata->cpu_get_freq)(void)
|
@ -40,4 +40,4 @@ Notes:
|
||||
mode, the timing is off so the image is corrupted. This will be
|
||||
fixed soon.
|
||||
|
||||
Any contribution can be sent to nico@cam.org and will be greatly welcome!
|
||||
Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
|
||||
|
@ -240,7 +240,7 @@ Then, rebooting the Assabet is just a matter of waiting for the login prompt.
|
||||
|
||||
|
||||
Nicolas Pitre
|
||||
nico@cam.org
|
||||
nico@fluxnic.net
|
||||
June 12, 2001
|
||||
|
||||
|
||||
|
@ -60,7 +60,7 @@ little modifications.
|
||||
|
||||
Any contribution is welcome.
|
||||
|
||||
Please send patches to nico@cam.org
|
||||
Please send patches to nico@fluxnic.net
|
||||
|
||||
Have Fun !
|
||||
|
||||
|
@ -4,7 +4,7 @@ For more details, contact Applied Data Systems or see
|
||||
http://www.applieddata.net/products.html
|
||||
|
||||
The original Linux support for this product has been provided by
|
||||
Nicolas Pitre <nico@cam.org>. Continued development work by
|
||||
Nicolas Pitre <nico@fluxnic.net>. Continued development work by
|
||||
Woojung Huh <whuh@applieddata.net>
|
||||
|
||||
It's currently possible to mount a root filesystem via NFS providing a
|
||||
@ -94,5 +94,5 @@ Notes:
|
||||
mode, the timing is off so the image is corrupted. This will be
|
||||
fixed soon.
|
||||
|
||||
Any contribution can be sent to nico@cam.org and will be greatly welcome!
|
||||
Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
|
||||
|
||||
|
@ -4,7 +4,7 @@ For more details, contact Applied Data Systems or see
|
||||
http://www.applieddata.net/products.html
|
||||
|
||||
The original Linux support for this product has been provided by
|
||||
Nicolas Pitre <nico@cam.org>. Continued development work by
|
||||
Nicolas Pitre <nico@fluxnic.net>. Continued development work by
|
||||
Woojung Huh <whuh@applieddata.net>
|
||||
|
||||
Use 'make graphicsmaster_config' before any 'make config'.
|
||||
@ -50,4 +50,4 @@ Notes:
|
||||
mode, the timing is off so the image is corrupted. This will be
|
||||
fixed soon.
|
||||
|
||||
Any contribution can be sent to nico@cam.org and will be greatly welcome!
|
||||
Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
|
||||
|
@ -9,7 +9,7 @@ Of course Victor is using Linux as its main operating system.
|
||||
The Victor implementation for Linux is maintained by Nicolas Pitre:
|
||||
|
||||
nico@visuaide.com
|
||||
nico@cam.org
|
||||
nico@fluxnic.net
|
||||
|
||||
For any comments, please feel free to contact me through the above
|
||||
addresses.
|
||||
|
75
Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
Normal file
75
Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
Normal file
@ -0,0 +1,75 @@
|
||||
S3C24XX CPUfreq support
|
||||
=======================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The S3C24XX series support a number of power saving systems, such as
|
||||
the ability to change the core, memory and peripheral operating
|
||||
frequencies. The core control is exported via the CPUFreq driver
|
||||
which has a number of different manual or automatic controls over the
|
||||
rate the core is running at.
|
||||
|
||||
There are two forms of the driver depending on the specific CPU and
|
||||
how the clocks are arranged. The first implementation used as single
|
||||
PLL to feed the ARM, memory and peripherals via a series of dividers
|
||||
and muxes and this is the implementation that is documented here. A
|
||||
newer version where there is a seperate PLL and clock divider for the
|
||||
ARM core is available as a seperate driver.
|
||||
|
||||
|
||||
Layout
|
||||
------
|
||||
|
||||
The code core manages the CPU specific drivers, any data that they
|
||||
need to register and the interface to the generic drivers/cpufreq
|
||||
system. Each CPU registers a driver to control the PLL, clock dividers
|
||||
and anything else associated with it. Any board that wants to use this
|
||||
framework needs to supply at least basic details of what is required.
|
||||
|
||||
The core registers with drivers/cpufreq at init time if all the data
|
||||
necessary has been supplied.
|
||||
|
||||
|
||||
CPU support
|
||||
-----------
|
||||
|
||||
The support for each CPU depends on the facilities provided by the
|
||||
SoC and the driver as each device has different PLL and clock chains
|
||||
associated with it.
|
||||
|
||||
|
||||
Slow Mode
|
||||
---------
|
||||
|
||||
The SLOW mode where the PLL is turned off altogether and the
|
||||
system is fed by the external crystal input is currently not
|
||||
supported.
|
||||
|
||||
|
||||
sysfs
|
||||
-----
|
||||
|
||||
The core code exports extra information via sysfs in the directory
|
||||
devices/system/cpu/cpu0/arch-freq.
|
||||
|
||||
|
||||
Board Support
|
||||
-------------
|
||||
|
||||
Each board that wants to use the cpufreq code must register some basic
|
||||
information with the core driver to provide information about what the
|
||||
board requires and any restrictions being placed on it.
|
||||
|
||||
The board needs to supply information about whether it needs the IO bank
|
||||
timings changing, any maximum frequency limits and information about the
|
||||
SDRAM refresh rate.
|
||||
|
||||
|
||||
|
||||
|
||||
Document Author
|
||||
---------------
|
||||
|
||||
Ben Dooks, Copyright 2009 Simtec Electronics
|
||||
Licensed under GPLv2
|
@ -21,6 +21,8 @@ ffff8000 ffffffff copy_user_page / clear_user_page use.
|
||||
For SA11xx and Xscale, this is used to
|
||||
setup a minicache mapping.
|
||||
|
||||
ffff4000 ffffffff cache aliasing on ARMv6 and later CPUs.
|
||||
|
||||
ffff1000 ffff7fff Reserved.
|
||||
Platforms must not use this address range.
|
||||
|
||||
|
119
Documentation/btmrvl.txt
Normal file
119
Documentation/btmrvl.txt
Normal file
@ -0,0 +1,119 @@
|
||||
=======================================================================
|
||||
README for btmrvl driver
|
||||
=======================================================================
|
||||
|
||||
|
||||
All commands are used via debugfs interface.
|
||||
|
||||
=====================
|
||||
Set/get driver configurations:
|
||||
|
||||
Path: /debug/btmrvl/config/
|
||||
|
||||
gpiogap=[n]
|
||||
hscfgcmd
|
||||
These commands are used to configure the host sleep parameters.
|
||||
bit 8:0 -- Gap
|
||||
bit 16:8 -- GPIO
|
||||
|
||||
where GPIO is the pin number of GPIO used to wake up the host.
|
||||
It could be any valid GPIO pin# (e.g. 0-7) or 0xff (SDIO interface
|
||||
wakeup will be used instead).
|
||||
|
||||
where Gap is the gap in milli seconds between wakeup signal and
|
||||
wakeup event, or 0xff for special host sleep setting.
|
||||
|
||||
Usage:
|
||||
# Use SDIO interface to wake up the host and set GAP to 0x80:
|
||||
echo 0xff80 > /debug/btmrvl/config/gpiogap
|
||||
echo 1 > /debug/btmrvl/config/hscfgcmd
|
||||
|
||||
# Use GPIO pin #3 to wake up the host and set GAP to 0xff:
|
||||
echo 0x03ff > /debug/btmrvl/config/gpiogap
|
||||
echo 1 > /debug/btmrvl/config/hscfgcmd
|
||||
|
||||
psmode=[n]
|
||||
pscmd
|
||||
These commands are used to enable/disable auto sleep mode
|
||||
|
||||
where the option is:
|
||||
1 -- Enable auto sleep mode
|
||||
0 -- Disable auto sleep mode
|
||||
|
||||
Usage:
|
||||
# Enable auto sleep mode
|
||||
echo 1 > /debug/btmrvl/config/psmode
|
||||
echo 1 > /debug/btmrvl/config/pscmd
|
||||
|
||||
# Disable auto sleep mode
|
||||
echo 0 > /debug/btmrvl/config/psmode
|
||||
echo 1 > /debug/btmrvl/config/pscmd
|
||||
|
||||
|
||||
hsmode=[n]
|
||||
hscmd
|
||||
These commands are used to enable host sleep or wake up firmware
|
||||
|
||||
where the option is:
|
||||
1 -- Enable host sleep
|
||||
0 -- Wake up firmware
|
||||
|
||||
Usage:
|
||||
# Enable host sleep
|
||||
echo 1 > /debug/btmrvl/config/hsmode
|
||||
echo 1 > /debug/btmrvl/config/hscmd
|
||||
|
||||
# Wake up firmware
|
||||
echo 0 > /debug/btmrvl/config/hsmode
|
||||
echo 1 > /debug/btmrvl/config/hscmd
|
||||
|
||||
|
||||
======================
|
||||
Get driver status:
|
||||
|
||||
Path: /debug/btmrvl/status/
|
||||
|
||||
Usage:
|
||||
cat /debug/btmrvl/status/<args>
|
||||
|
||||
where the args are:
|
||||
|
||||
curpsmode
|
||||
This command displays current auto sleep status.
|
||||
|
||||
psstate
|
||||
This command display the power save state.
|
||||
|
||||
hsstate
|
||||
This command display the host sleep state.
|
||||
|
||||
txdnldrdy
|
||||
This command displays the value of Tx download ready flag.
|
||||
|
||||
|
||||
=====================
|
||||
|
||||
Use hcitool to issue raw hci command, refer to hcitool manual
|
||||
|
||||
Usage: Hcitool cmd <ogf> <ocf> [Parameters]
|
||||
|
||||
Interface Control Command
|
||||
hcitool cmd 0x3f 0x5b 0xf5 0x01 0x00 --Enable All interface
|
||||
hcitool cmd 0x3f 0x5b 0xf5 0x01 0x01 --Enable Wlan interface
|
||||
hcitool cmd 0x3f 0x5b 0xf5 0x01 0x02 --Enable BT interface
|
||||
hcitool cmd 0x3f 0x5b 0xf5 0x00 0x00 --Disable All interface
|
||||
hcitool cmd 0x3f 0x5b 0xf5 0x00 0x01 --Disable Wlan interface
|
||||
hcitool cmd 0x3f 0x5b 0xf5 0x00 0x02 --Disable BT interface
|
||||
|
||||
=======================================================================
|
||||
|
||||
|
||||
SD8688 firmware:
|
||||
|
||||
/lib/firmware/sd8688_helper.bin
|
||||
/lib/firmware/sd8688.bin
|
||||
|
||||
|
||||
The images can be downloaded from:
|
||||
|
||||
git.infradead.org/users/dwmw2/linux-firmware.git/libertas/
|
@ -9,3 +9,8 @@ hostprogs-y := ucon
|
||||
always := $(hostprogs-y)
|
||||
|
||||
HOSTCFLAGS_ucon.o += -I$(objtree)/usr/include
|
||||
|
||||
all: modules
|
||||
|
||||
modules clean:
|
||||
$(MAKE) -C ../.. SUBDIRS=$(PWD) $@
|
||||
|
@ -19,6 +19,8 @@
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "cn_test: " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleparam.h>
|
||||
@ -27,18 +29,17 @@
|
||||
|
||||
#include <linux/connector.h>
|
||||
|
||||
static struct cb_id cn_test_id = { 0x123, 0x456 };
|
||||
static struct cb_id cn_test_id = { CN_NETLINK_USERS + 3, 0x456 };
|
||||
static char cn_test_name[] = "cn_test";
|
||||
static struct sock *nls;
|
||||
static struct timer_list cn_test_timer;
|
||||
|
||||
void cn_test_callback(void *data)
|
||||
static void cn_test_callback(struct cn_msg *msg)
|
||||
{
|
||||
struct cn_msg *msg = (struct cn_msg *)data;
|
||||
|
||||
printk("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n",
|
||||
__func__, jiffies, msg->id.idx, msg->id.val,
|
||||
msg->seq, msg->ack, msg->len, (char *)msg->data);
|
||||
pr_info("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n",
|
||||
__func__, jiffies, msg->id.idx, msg->id.val,
|
||||
msg->seq, msg->ack, msg->len,
|
||||
msg->len ? (char *)msg->data : "");
|
||||
}
|
||||
|
||||
/*
|
||||
@ -63,9 +64,7 @@ static int cn_test_want_notify(void)
|
||||
|
||||
skb = alloc_skb(size, GFP_ATOMIC);
|
||||
if (!skb) {
|
||||
printk(KERN_ERR "Failed to allocate new skb with size=%u.\n",
|
||||
size);
|
||||
|
||||
pr_err("failed to allocate new skb with size=%u\n", size);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -114,12 +113,12 @@ static int cn_test_want_notify(void)
|
||||
//netlink_broadcast(nls, skb, 0, ctl->group, GFP_ATOMIC);
|
||||
netlink_unicast(nls, skb, 0, 0);
|
||||
|
||||
printk(KERN_INFO "Request was sent. Group=0x%x.\n", ctl->group);
|
||||
pr_info("request was sent: group=0x%x\n", ctl->group);
|
||||
|
||||
return 0;
|
||||
|
||||
nlmsg_failure:
|
||||
printk(KERN_ERR "Failed to send %u.%u\n", msg->seq, msg->ack);
|
||||
pr_err("failed to send %u.%u\n", msg->seq, msg->ack);
|
||||
kfree_skb(skb);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -131,6 +130,8 @@ static void cn_test_timer_func(unsigned long __data)
|
||||
struct cn_msg *m;
|
||||
char data[32];
|
||||
|
||||
pr_debug("%s: timer fired with data %lu\n", __func__, __data);
|
||||
|
||||
m = kzalloc(sizeof(*m) + sizeof(data), GFP_ATOMIC);
|
||||
if (m) {
|
||||
|
||||
@ -150,7 +151,7 @@ static void cn_test_timer_func(unsigned long __data)
|
||||
|
||||
cn_test_timer_counter++;
|
||||
|
||||
mod_timer(&cn_test_timer, jiffies + HZ);
|
||||
mod_timer(&cn_test_timer, jiffies + msecs_to_jiffies(1000));
|
||||
}
|
||||
|
||||
static int cn_test_init(void)
|
||||
@ -168,8 +169,10 @@ static int cn_test_init(void)
|
||||
}
|
||||
|
||||
setup_timer(&cn_test_timer, cn_test_timer_func, 0);
|
||||
cn_test_timer.expires = jiffies + HZ;
|
||||
add_timer(&cn_test_timer);
|
||||
mod_timer(&cn_test_timer, jiffies + msecs_to_jiffies(1000));
|
||||
|
||||
pr_info("initialized with id={%u.%u}\n",
|
||||
cn_test_id.idx, cn_test_id.val);
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -5,10 +5,10 @@ Kernel Connector.
|
||||
Kernel connector - new netlink based userspace <-> kernel space easy
|
||||
to use communication module.
|
||||
|
||||
Connector driver adds possibility to connect various agents using
|
||||
netlink based network. One must register callback and
|
||||
identifier. When driver receives special netlink message with
|
||||
appropriate identifier, appropriate callback will be called.
|
||||
The Connector driver makes it easy to connect various agents using a
|
||||
netlink based network. One must register a callback and an identifier.
|
||||
When the driver receives a special netlink message with the appropriate
|
||||
identifier, the appropriate callback will be called.
|
||||
|
||||
From the userspace point of view it's quite straightforward:
|
||||
|
||||
@ -17,10 +17,10 @@ From the userspace point of view it's quite straightforward:
|
||||
send();
|
||||
recv();
|
||||
|
||||
But if kernelspace want to use full power of such connections, driver
|
||||
writer must create special sockets, must know about struct sk_buff
|
||||
handling... Connector allows any kernelspace agents to use netlink
|
||||
based networking for inter-process communication in a significantly
|
||||
But if kernelspace wants to use the full power of such connections, the
|
||||
driver writer must create special sockets, must know about struct sk_buff
|
||||
handling, etc... The Connector driver allows any kernelspace agents to use
|
||||
netlink based networking for inter-process communication in a significantly
|
||||
easier way:
|
||||
|
||||
int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
|
||||
@ -32,15 +32,15 @@ struct cb_id
|
||||
__u32 val;
|
||||
};
|
||||
|
||||
idx and val are unique identifiers which must be registered in
|
||||
connector.h for in-kernel usage. void (*callback) (void *) - is a
|
||||
callback function which will be called when message with above idx.val
|
||||
will be received by connector core. Argument for that function must
|
||||
idx and val are unique identifiers which must be registered in the
|
||||
connector.h header for in-kernel usage. void (*callback) (void *) is a
|
||||
callback function which will be called when a message with above idx.val
|
||||
is received by the connector core. The argument for that function must
|
||||
be dereferenced to struct cn_msg *.
|
||||
|
||||
struct cn_msg
|
||||
{
|
||||
struct cb_id id;
|
||||
struct cb_id id;
|
||||
|
||||
__u32 seq;
|
||||
__u32 ack;
|
||||
@ -55,92 +55,95 @@ Connector interfaces.
|
||||
|
||||
int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
|
||||
|
||||
Registers new callback with connector core.
|
||||
Registers new callback with connector core.
|
||||
|
||||
struct cb_id *id - unique connector's user identifier.
|
||||
It must be registered in connector.h for legal in-kernel users.
|
||||
char *name - connector's callback symbolic name.
|
||||
void (*callback) (void *) - connector's callback.
|
||||
struct cb_id *id - unique connector's user identifier.
|
||||
It must be registered in connector.h for legal in-kernel users.
|
||||
char *name - connector's callback symbolic name.
|
||||
void (*callback) (void *) - connector's callback.
|
||||
Argument must be dereferenced to struct cn_msg *.
|
||||
|
||||
|
||||
void cn_del_callback(struct cb_id *id);
|
||||
|
||||
Unregisters new callback with connector core.
|
||||
Unregisters new callback with connector core.
|
||||
|
||||
struct cb_id *id - unique connector's user identifier.
|
||||
|
||||
struct cb_id *id - unique connector's user identifier.
|
||||
|
||||
int cn_netlink_send(struct cn_msg *msg, u32 __groups, int gfp_mask);
|
||||
|
||||
Sends message to the specified groups. It can be safely called from
|
||||
softirq context, but may silently fail under strong memory pressure.
|
||||
If there are no listeners for given group -ESRCH can be returned.
|
||||
Sends message to the specified groups. It can be safely called from
|
||||
softirq context, but may silently fail under strong memory pressure.
|
||||
If there are no listeners for given group -ESRCH can be returned.
|
||||
|
||||
struct cn_msg * - message header(with attached data).
|
||||
u32 __group - destination group.
|
||||
struct cn_msg * - message header(with attached data).
|
||||
u32 __group - destination group.
|
||||
If __group is zero, then appropriate group will
|
||||
be searched through all registered connector users,
|
||||
and message will be delivered to the group which was
|
||||
created for user with the same ID as in msg.
|
||||
If __group is not zero, then message will be delivered
|
||||
to the specified group.
|
||||
int gfp_mask - GFP mask.
|
||||
int gfp_mask - GFP mask.
|
||||
|
||||
Note: When registering new callback user, connector core assigns
|
||||
netlink group to the user which is equal to it's id.idx.
|
||||
Note: When registering new callback user, connector core assigns
|
||||
netlink group to the user which is equal to it's id.idx.
|
||||
|
||||
/*****************************************/
|
||||
Protocol description.
|
||||
/*****************************************/
|
||||
|
||||
Current offers transport layer with fixed header. Recommended
|
||||
protocol which uses such header is following:
|
||||
The current framework offers a transport layer with fixed headers. The
|
||||
recommended protocol which uses such a header is as following:
|
||||
|
||||
msg->seq and msg->ack are used to determine message genealogy. When
|
||||
someone sends message it puts there locally unique sequence and random
|
||||
acknowledge numbers. Sequence number may be copied into
|
||||
someone sends a message, they use a locally unique sequence and random
|
||||
acknowledge number. The sequence number may be copied into
|
||||
nlmsghdr->nlmsg_seq too.
|
||||
|
||||
Sequence number is incremented with each message to be sent.
|
||||
The sequence number is incremented with each message sent.
|
||||
|
||||
If we expect reply to our message, then sequence number in received
|
||||
message MUST be the same as in original message, and acknowledge
|
||||
number MUST be the same + 1.
|
||||
If you expect a reply to the message, then the sequence number in the
|
||||
received message MUST be the same as in the original message, and the
|
||||
acknowledge number MUST be the same + 1.
|
||||
|
||||
If we receive message and it's sequence number is not equal to one we
|
||||
are expecting, then it is new message. If we receive message and it's
|
||||
sequence number is the same as one we are expecting, but it's
|
||||
acknowledge is not equal acknowledge number in original message + 1,
|
||||
then it is new message.
|
||||
If we receive a message and its sequence number is not equal to one we
|
||||
are expecting, then it is a new message. If we receive a message and
|
||||
its sequence number is the same as one we are expecting, but its
|
||||
acknowledge is not equal to the acknowledge number in the original
|
||||
message + 1, then it is a new message.
|
||||
|
||||
Obviously, protocol header contains above id.
|
||||
Obviously, the protocol header contains the above id.
|
||||
|
||||
connector allows event notification in the following form: kernel
|
||||
The connector allows event notification in the following form: kernel
|
||||
driver or userspace process can ask connector to notify it when
|
||||
selected id's will be turned on or off(registered or unregistered it's
|
||||
callback). It is done by sending special command to connector
|
||||
driver(it also registers itself with id={-1, -1}).
|
||||
selected ids will be turned on or off (registered or unregistered its
|
||||
callback). It is done by sending a special command to the connector
|
||||
driver (it also registers itself with id={-1, -1}).
|
||||
|
||||
As example of usage Documentation/connector now contains cn_test.c -
|
||||
testing module which uses connector to request notification and to
|
||||
send messages.
|
||||
As example of this usage can be found in the cn_test.c module which
|
||||
uses the connector to request notification and to send messages.
|
||||
|
||||
/*****************************************/
|
||||
Reliability.
|
||||
/*****************************************/
|
||||
|
||||
Netlink itself is not reliable protocol, that means that messages can
|
||||
Netlink itself is not a reliable protocol. That means that messages can
|
||||
be lost due to memory pressure or process' receiving queue overflowed,
|
||||
so caller is warned must be prepared. That is why struct cn_msg [main
|
||||
connector's message header] contains u32 seq and u32 ack fields.
|
||||
so caller is warned that it must be prepared. That is why the struct
|
||||
cn_msg [main connector's message header] contains u32 seq and u32 ack
|
||||
fields.
|
||||
|
||||
/*****************************************/
|
||||
Userspace usage.
|
||||
/*****************************************/
|
||||
|
||||
2.6.14 has a new netlink socket implementation, which by default does not
|
||||
allow to send data to netlink groups other than 1.
|
||||
So, if to use netlink socket (for example using connector)
|
||||
with different group number userspace application must subscribe to
|
||||
that group. It can be achieved by following pseudocode:
|
||||
allow people to send data to netlink groups other than 1.
|
||||
So, if you wish to use a netlink socket (for example using connector)
|
||||
with a different group number, the userspace application must subscribe to
|
||||
that group first. It can be achieved by the following pseudocode:
|
||||
|
||||
s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
|
||||
|
||||
@ -160,8 +163,8 @@ if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
|
||||
}
|
||||
|
||||
Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket
|
||||
option. To drop multicast subscription one should call above socket option
|
||||
with NETLINK_DROP_MEMBERSHIP parameter which is defined as 0.
|
||||
option. To drop a multicast subscription, one should call the above socket
|
||||
option with the NETLINK_DROP_MEMBERSHIP parameter which is defined as 0.
|
||||
|
||||
2.6.14 netlink code only allows to select a group which is less or equal to
|
||||
the maximum group number, which is used at netlink_kernel_create() time.
|
||||
|
@ -30,18 +30,24 @@
|
||||
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <time.h>
|
||||
#include <getopt.h>
|
||||
|
||||
#include <linux/connector.h>
|
||||
|
||||
#define DEBUG
|
||||
#define NETLINK_CONNECTOR 11
|
||||
|
||||
/* Hopefully your userspace connector.h matches this kernel */
|
||||
#define CN_TEST_IDX CN_NETLINK_USERS + 3
|
||||
#define CN_TEST_VAL 0x456
|
||||
|
||||
#ifdef DEBUG
|
||||
#define ulog(f, a...) fprintf(stdout, f, ##a)
|
||||
#else
|
||||
@ -83,6 +89,25 @@ static int netlink_send(int s, struct cn_msg *msg)
|
||||
return err;
|
||||
}
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
printf(
|
||||
"Usage: ucon [options] [output file]\n"
|
||||
"\n"
|
||||
"\t-h\tthis help screen\n"
|
||||
"\t-s\tsend buffers to the test module\n"
|
||||
"\n"
|
||||
"The default behavior of ucon is to subscribe to the test module\n"
|
||||
"and wait for state messages. Any ones received are dumped to the\n"
|
||||
"specified output file (or stdout). The test module is assumed to\n"
|
||||
"have an id of {%u.%u}\n"
|
||||
"\n"
|
||||
"If you get no output, then verify the cn_test module id matches\n"
|
||||
"the expected id above.\n"
|
||||
, CN_TEST_IDX, CN_TEST_VAL
|
||||
);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int s;
|
||||
@ -94,17 +119,34 @@ int main(int argc, char *argv[])
|
||||
FILE *out;
|
||||
time_t tm;
|
||||
struct pollfd pfd;
|
||||
bool send_msgs = false;
|
||||
|
||||
if (argc < 2)
|
||||
out = stdout;
|
||||
else {
|
||||
out = fopen(argv[1], "a+");
|
||||
while ((s = getopt(argc, argv, "hs")) != -1) {
|
||||
switch (s) {
|
||||
case 's':
|
||||
send_msgs = true;
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
usage();
|
||||
return 0;
|
||||
|
||||
default:
|
||||
/* getopt() outputs an error for us */
|
||||
usage();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (argc != optind) {
|
||||
out = fopen(argv[optind], "a+");
|
||||
if (!out) {
|
||||
ulog("Unable to open %s for writing: %s\n",
|
||||
argv[1], strerror(errno));
|
||||
out = stdout;
|
||||
}
|
||||
}
|
||||
} else
|
||||
out = stdout;
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
|
||||
@ -115,9 +157,11 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
l_local.nl_family = AF_NETLINK;
|
||||
l_local.nl_groups = 0x123; /* bitmask of requested groups */
|
||||
l_local.nl_groups = -1; /* bitmask of requested groups */
|
||||
l_local.nl_pid = 0;
|
||||
|
||||
ulog("subscribing to %u.%u\n", CN_TEST_IDX, CN_TEST_VAL);
|
||||
|
||||
if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
|
||||
perror("bind");
|
||||
close(s);
|
||||
@ -130,15 +174,15 @@ int main(int argc, char *argv[])
|
||||
setsockopt(s, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &on, sizeof(on));
|
||||
}
|
||||
#endif
|
||||
if (0) {
|
||||
if (send_msgs) {
|
||||
int i, j;
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
|
||||
data = (struct cn_msg *)buf;
|
||||
|
||||
data->id.idx = 0x123;
|
||||
data->id.val = 0x456;
|
||||
data->id.idx = CN_TEST_IDX;
|
||||
data->id.val = CN_TEST_VAL;
|
||||
data->seq = seq++;
|
||||
data->ack = 0;
|
||||
data->len = 0;
|
||||
|
@ -176,7 +176,9 @@ scaling_governor, and by "echoing" the name of another
|
||||
work on some specific architectures or
|
||||
processors.
|
||||
|
||||
cpuinfo_cur_freq : Current speed of the CPU, in KHz.
|
||||
cpuinfo_cur_freq : Current frequency of the CPU as obtained from
|
||||
the hardware, in KHz. This is the frequency
|
||||
the CPU actually runs at.
|
||||
|
||||
scaling_available_frequencies : List of available frequencies, in KHz.
|
||||
|
||||
@ -196,7 +198,10 @@ related_cpus : List of CPUs that need some sort of frequency
|
||||
|
||||
scaling_driver : Hardware driver for cpufreq.
|
||||
|
||||
scaling_cur_freq : Current frequency of the CPU, in KHz.
|
||||
scaling_cur_freq : Current frequency of the CPU as determined by
|
||||
the governor and cpufreq core, in KHz. This is
|
||||
the frequency the kernel thinks the CPU runs
|
||||
at.
|
||||
|
||||
If you have selected the "userspace" governor which allows you to
|
||||
set the CPU operating frequency to a specific value, you can read out
|
||||
|
@ -152,7 +152,6 @@ piggy.gz
|
||||
piggyback
|
||||
pnmtologo
|
||||
ppc_defs.h*
|
||||
promcon_tbl.c
|
||||
pss_boot.h
|
||||
qconf
|
||||
raid6altivec*.c
|
||||
|
@ -6,6 +6,35 @@ be removed from this file.
|
||||
|
||||
---------------------------
|
||||
|
||||
What: PRISM54
|
||||
When: 2.6.34
|
||||
|
||||
Why: prism54 FullMAC PCI / Cardbus devices used to be supported only by the
|
||||
prism54 wireless driver. After Intersil stopped selling these
|
||||
devices in preference for the newer more flexible SoftMAC devices
|
||||
a SoftMAC device driver was required and prism54 did not support
|
||||
them. The p54pci driver now exists and has been present in the kernel for
|
||||
a while. This driver supports both SoftMAC devices and FullMAC devices.
|
||||
The main difference between these devices was the amount of memory which
|
||||
could be used for the firmware. The SoftMAC devices support a smaller
|
||||
amount of memory. Because of this the SoftMAC firmware fits into FullMAC
|
||||
devices's memory. p54pci supports not only PCI / Cardbus but also USB
|
||||
and SPI. Since p54pci supports all devices prism54 supports
|
||||
you will have a conflict. I'm not quite sure how distributions are
|
||||
handling this conflict right now. prism54 was kept around due to
|
||||
claims users may experience issues when using the SoftMAC driver.
|
||||
Time has passed users have not reported issues. If you use prism54
|
||||
and for whatever reason you cannot use p54pci please let us know!
|
||||
E-mail us at: linux-wireless@vger.kernel.org
|
||||
|
||||
For more information see the p54 wiki page:
|
||||
|
||||
http://wireless.kernel.org/en/users/Drivers/p54
|
||||
|
||||
Who: Luis R. Rodriguez <lrodriguez@atheros.com>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: IRQF_SAMPLE_RANDOM
|
||||
Check: IRQF_SAMPLE_RANDOM
|
||||
When: July 2009
|
||||
@ -206,24 +235,6 @@ Who: Len Brown <len.brown@intel.com>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: libata spindown skipping and warning
|
||||
When: Dec 2008
|
||||
Why: Some halt(8) implementations synchronize caches for and spin
|
||||
down libata disks because libata didn't use to spin down disk on
|
||||
system halt (only synchronized caches).
|
||||
Spin down on system halt is now implemented. sysfs node
|
||||
/sys/class/scsi_disk/h:c:i:l/manage_start_stop is present if
|
||||
spin down support is available.
|
||||
Because issuing spin down command to an already spun down disk
|
||||
makes some disks spin up just to spin down again, libata tracks
|
||||
device spindown status to skip the extra spindown command and
|
||||
warn about it.
|
||||
This is to give userspace tools the time to get updated and will
|
||||
be removed after userspace is reasonably updated.
|
||||
Who: Tejun Heo <htejun@gmail.com>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: i386/x86_64 bzImage symlinks
|
||||
When: April 2010
|
||||
|
||||
@ -235,31 +246,6 @@ Who: Thomas Gleixner <tglx@linutronix.de>
|
||||
---------------------------
|
||||
|
||||
What (Why):
|
||||
- include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files
|
||||
(superseded by xt_TOS/xt_tos target & match)
|
||||
|
||||
- "forwarding" header files like ipt_mac.h in
|
||||
include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
|
||||
|
||||
- xt_CONNMARK match revision 0
|
||||
(superseded by xt_CONNMARK match revision 1)
|
||||
|
||||
- xt_MARK target revisions 0 and 1
|
||||
(superseded by xt_MARK match revision 2)
|
||||
|
||||
- xt_connmark match revision 0
|
||||
(superseded by xt_connmark match revision 1)
|
||||
|
||||
- xt_conntrack match revision 0
|
||||
(superseded by xt_conntrack match revision 1)
|
||||
|
||||
- xt_iprange match revision 0,
|
||||
include/linux/netfilter_ipv4/ipt_iprange.h
|
||||
(superseded by xt_iprange match revision 1)
|
||||
|
||||
- xt_mark match revision 0
|
||||
(superseded by xt_mark match revision 1)
|
||||
|
||||
- xt_recent: the old ipt_recent proc dir
|
||||
(superseded by /proc/net/xt_recent)
|
||||
|
||||
@ -394,15 +380,6 @@ Who: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
-----------------------------
|
||||
|
||||
What: obsolete generic irq defines and typedefs
|
||||
When: 2.6.30
|
||||
Why: The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t)
|
||||
have been kept around for migration reasons. After more than two years
|
||||
it's time to remove them finally
|
||||
Who: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: fakephp and associated sysfs files in /sys/bus/pci/slots/
|
||||
When: 2011
|
||||
Why: In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to
|
||||
@ -451,16 +428,6 @@ Who: Johannes Berg <johannes@sipsolutions.net>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: CONFIG_X86_OLD_MCE
|
||||
When: 2.6.32
|
||||
Why: Remove the old legacy 32bit machine check code. This has been
|
||||
superseded by the newer machine check code from the 64bit port,
|
||||
but the old version has been kept around for easier testing. Note this
|
||||
doesn't impact the old P5 and WinChip machine check handlers.
|
||||
Who: Andi Kleen <andi@firstfloor.org>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be
|
||||
exported interface anymore.
|
||||
When: 2.6.33
|
||||
@ -468,3 +435,27 @@ Why: cpu_policy_rwsem has a new cleaner definition making it local to
|
||||
cpufreq core and contained inside cpufreq.c. Other dependent
|
||||
drivers should not use it in order to safely avoid lockdep issues.
|
||||
Who: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: sound-slot/service-* module aliases and related clutters in
|
||||
sound/sound_core.c
|
||||
When: August 2010
|
||||
Why: OSS sound_core grabs all legacy minors (0-255) of SOUND_MAJOR
|
||||
(14) and requests modules using custom sound-slot/service-*
|
||||
module aliases. The only benefit of doing this is allowing
|
||||
use of custom module aliases which might as well be considered
|
||||
a bug at this point. This preemptive claiming prevents
|
||||
alternative OSS implementations.
|
||||
|
||||
Till the feature is removed, the kernel will be requesting
|
||||
both sound-slot/service-* and the standard char-major-* module
|
||||
aliases and allow turning off the pre-claiming selectively via
|
||||
CONFIG_SOUND_OSS_CORE_PRECLAIM and soundcore.preclaim_oss
|
||||
kernel parameter.
|
||||
|
||||
After the transition phase is complete, both the custom module
|
||||
aliases and switches to disable it will go away. This removal
|
||||
will also allow making ALSA OSS emulation independent of
|
||||
sound_core. The dependency will be broken then too.
|
||||
Who: Tejun Heo <tj@kernel.org>
|
||||
|
@ -123,6 +123,9 @@ available from the same CVS repository.
|
||||
There are user and developer mailing lists available through the v9fs project
|
||||
on sourceforge (http://sourceforge.net/projects/v9fs).
|
||||
|
||||
A stand-alone version of the module (which should build for any 2.6 kernel)
|
||||
is available via (http://github.com/ericvh/9p-sac/tree/master)
|
||||
|
||||
News and other information is maintained on SWiK (http://swik.net/v9fs).
|
||||
|
||||
Bug reports may be issued through the kernel.org bugzilla
|
||||
|
@ -23,16 +23,14 @@ it does support include:
|
||||
|
||||
(*) Security (currently only AFS kaserver and KerberosIV tickets).
|
||||
|
||||
(*) File reading.
|
||||
(*) File reading and writing.
|
||||
|
||||
(*) Automounting.
|
||||
|
||||
(*) Local caching (via fscache).
|
||||
|
||||
It does not yet support the following AFS features:
|
||||
|
||||
(*) Write support.
|
||||
|
||||
(*) Local caching.
|
||||
|
||||
(*) pioctl() system call.
|
||||
|
||||
|
||||
@ -56,7 +54,7 @@ They permit the debugging messages to be turned on dynamically by manipulating
|
||||
the masks in the following files:
|
||||
|
||||
/sys/module/af_rxrpc/parameters/debug
|
||||
/sys/module/afs/parameters/debug
|
||||
/sys/module/kafs/parameters/debug
|
||||
|
||||
|
||||
=====
|
||||
@ -66,9 +64,9 @@ USAGE
|
||||
When inserting the driver modules the root cell must be specified along with a
|
||||
list of volume location server IP addresses:
|
||||
|
||||
insmod af_rxrpc.o
|
||||
insmod rxkad.o
|
||||
insmod kafs.o rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91
|
||||
modprobe af_rxrpc
|
||||
modprobe rxkad
|
||||
modprobe kafs rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91
|
||||
|
||||
The first module is the AF_RXRPC network protocol driver. This provides the
|
||||
RxRPC remote operation protocol and may also be accessed from userspace. See:
|
||||
@ -81,7 +79,7 @@ is the actual filesystem driver for the AFS filesystem.
|
||||
Once the module has been loaded, more modules can be added by the following
|
||||
procedure:
|
||||
|
||||
echo add grand.central.org 18.7.14.88:128.2.191.224 >/proc/fs/afs/cells
|
||||
echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells
|
||||
|
||||
Where the parameters to the "add" command are the name of a cell and a list of
|
||||
volume location servers within that cell, with the latter separated by colons.
|
||||
@ -101,7 +99,7 @@ The name of the volume can be suffixes with ".backup" or ".readonly" to
|
||||
specify connection to only volumes of those types.
|
||||
|
||||
The name of the cell is optional, and if not given during a mount, then the
|
||||
named volume will be looked up in the cell specified during insmod.
|
||||
named volume will be looked up in the cell specified during modprobe.
|
||||
|
||||
Additional cells can be added through /proc (see later section).
|
||||
|
||||
@ -163,14 +161,14 @@ THE CELL DATABASE
|
||||
|
||||
The filesystem maintains an internal database of all the cells it knows and the
|
||||
IP addresses of the volume location servers for those cells. The cell to which
|
||||
the system belongs is added to the database when insmod is performed by the
|
||||
the system belongs is added to the database when modprobe is performed by the
|
||||
"rootcell=" argument or, if compiled in, using a "kafs.rootcell=" argument on
|
||||
the kernel command line.
|
||||
|
||||
Further cells can be added by commands similar to the following:
|
||||
|
||||
echo add CELLNAME VLADDR[:VLADDR][:VLADDR]... >/proc/fs/afs/cells
|
||||
echo add grand.central.org 18.7.14.88:128.2.191.224 >/proc/fs/afs/cells
|
||||
echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells
|
||||
|
||||
No other cell database operations are available at this time.
|
||||
|
||||
@ -233,7 +231,7 @@ insmod /tmp/kafs.o rootcell=cambridge.redhat.com:172.16.18.91
|
||||
mount -t afs \%root.afs. /afs
|
||||
mount -t afs \%cambridge.redhat.com:root.cell. /afs/cambridge.redhat.com/
|
||||
|
||||
echo add grand.central.org 18.7.14.88:128.2.191.224 > /proc/fs/afs/cells
|
||||
echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 > /proc/fs/afs/cells
|
||||
mount -t afs "#grand.central.org:root.cell." /afs/grand.central.org/
|
||||
mount -t afs "#grand.central.org:root.archive." /afs/grand.central.org/archive
|
||||
mount -t afs "#grand.central.org:root.contrib." /afs/grand.central.org/contrib
|
||||
|
@ -134,15 +134,9 @@ ro Mount filesystem read only. Note that ext4 will
|
||||
mount options "ro,noload" can be used to prevent
|
||||
writes to the filesystem.
|
||||
|
||||
journal_checksum Enable checksumming of the journal transactions.
|
||||
This will allow the recovery code in e2fsck and the
|
||||
kernel to detect corruption in the kernel. It is a
|
||||
compatible change and will be ignored by older kernels.
|
||||
|
||||
journal_async_commit Commit block can be written to disk without waiting
|
||||
for descriptor blocks. If enabled older kernels cannot
|
||||
mount the device. This will enable 'journal_checksum'
|
||||
internally.
|
||||
mount the device.
|
||||
|
||||
journal=update Update the ext4 file system's journal to the current
|
||||
format.
|
||||
@ -263,10 +257,18 @@ resuid=n The user ID which may use the reserved blocks.
|
||||
|
||||
sb=n Use alternate superblock at this location.
|
||||
|
||||
quota
|
||||
noquota
|
||||
grpquota
|
||||
usrquota
|
||||
quota These options are ignored by the filesystem. They
|
||||
noquota are used only by quota tools to recognize volumes
|
||||
grpquota where quota should be turned on. See documentation
|
||||
usrquota in the quota-tools package for more details
|
||||
(http://sourceforge.net/projects/linuxquota).
|
||||
|
||||
jqfmt=<quota type> These options tell filesystem details about quota
|
||||
usrjquota=<file> so that quota information can be properly updated
|
||||
grpjquota=<file> during journal replay. They replace the above
|
||||
quota options. See documentation in the quota-tools
|
||||
package for more details
|
||||
(http://sourceforge.net/projects/linuxquota).
|
||||
|
||||
bh (*) ext4 associates buffer heads to data pages to
|
||||
nobh (a) cache disk block mapping information
|
||||
|
100
Documentation/filesystems/gfs2-uevents.txt
Normal file
100
Documentation/filesystems/gfs2-uevents.txt
Normal file
@ -0,0 +1,100 @@
|
||||
uevents and GFS2
|
||||
==================
|
||||
|
||||
During the lifetime of a GFS2 mount, a number of uevents are generated.
|
||||
This document explains what the events are and what they are used
|
||||
for (by gfs_controld in gfs2-utils).
|
||||
|
||||
A list of GFS2 uevents
|
||||
-----------------------
|
||||
|
||||
1. ADD
|
||||
|
||||
The ADD event occurs at mount time. It will always be the first
|
||||
uevent generated by the newly created filesystem. If the mount
|
||||
is successful, an ONLINE uevent will follow. If it is not successful
|
||||
then a REMOVE uevent will follow.
|
||||
|
||||
The ADD uevent has two environment variables: SPECTATOR=[0|1]
|
||||
and RDONLY=[0|1] that specify the spectator status (a read-only mount
|
||||
with no journal assigned), and read-only (with journal assigned) status
|
||||
of the filesystem respectively.
|
||||
|
||||
2. ONLINE
|
||||
|
||||
The ONLINE uevent is generated after a successful mount or remount. It
|
||||
has the same environment variables as the ADD uevent. The ONLINE
|
||||
uevent, along with the two environment variables for spectator and
|
||||
RDONLY are a relatively recent addition (2.6.32-rc+) and will not
|
||||
be generated by older kernels.
|
||||
|
||||
3. CHANGE
|
||||
|
||||
The CHANGE uevent is used in two places. One is when reporting the
|
||||
successful mount of the filesystem by the first node (FIRSTMOUNT=Done).
|
||||
This is used as a signal by gfs_controld that it is then ok for other
|
||||
nodes in the cluster to mount the filesystem.
|
||||
|
||||
The other CHANGE uevent is used to inform of the completion
|
||||
of journal recovery for one of the filesystems journals. It has
|
||||
two environment variables, JID= which specifies the journal id which
|
||||
has just been recovered, and RECOVERY=[Done|Failed] to indicate the
|
||||
success (or otherwise) of the operation. These uevents are generated
|
||||
for every journal recovered, whether it is during the initial mount
|
||||
process or as the result of gfs_controld requesting a specific journal
|
||||
recovery via the /sys/fs/gfs2/<fsname>/lock_module/recovery file.
|
||||
|
||||
Because the CHANGE uevent was used (in early versions of gfs_controld)
|
||||
without checking the environment variables to discover the state, we
|
||||
cannot add any more functions to it without running the risk of
|
||||
someone using an older version of the user tools and breaking their
|
||||
cluster. For this reason the ONLINE uevent was used when adding a new
|
||||
uevent for a successful mount or remount.
|
||||
|
||||
4. OFFLINE
|
||||
|
||||
The OFFLINE uevent is only generated due to filesystem errors and is used
|
||||
as part of the "withdraw" mechanism. Currently this doesn't give any
|
||||
information about what the error is, which is something that needs to
|
||||
be fixed.
|
||||
|
||||
5. REMOVE
|
||||
|
||||
The REMOVE uevent is generated at the end of an unsuccessful mount
|
||||
or at the end of a umount of the filesystem. All REMOVE uevents will
|
||||
have been preceeded by at least an ADD uevent for the same fileystem,
|
||||
and unlike the other uevents is generated automatically by the kernel's
|
||||
kobject subsystem.
|
||||
|
||||
|
||||
Information common to all GFS2 uevents (uevent environment variables)
|
||||
----------------------------------------------------------------------
|
||||
|
||||
1. LOCKTABLE=
|
||||
|
||||
The LOCKTABLE is a string, as supplied on the mount command
|
||||
line (locktable=) or via fstab. It is used as a filesystem label
|
||||
as well as providing the information for a lock_dlm mount to be
|
||||
able to join the cluster.
|
||||
|
||||
2. LOCKPROTO=
|
||||
|
||||
The LOCKPROTO is a string, and its value depends on what is set
|
||||
on the mount command line, or via fstab. It will be either
|
||||
lock_nolock or lock_dlm. In the future other lock managers
|
||||
may be supported.
|
||||
|
||||
3. JOURNALID=
|
||||
|
||||
If a journal is in use by the filesystem (journals are not
|
||||
assigned for spectator mounts) then this will give the
|
||||
numeric journal id in all GFS2 uevents.
|
||||
|
||||
4. UUID=
|
||||
|
||||
With recent versions of gfs2-utils, mkfs.gfs2 writes a UUID
|
||||
into the filesystem superblock. If it exists, this will
|
||||
be included in every uevent relating to the filesystem.
|
||||
|
||||
|
||||
|
98
Documentation/filesystems/nfs.txt
Normal file
98
Documentation/filesystems/nfs.txt
Normal file
@ -0,0 +1,98 @@
|
||||
|
||||
The NFS client
|
||||
==============
|
||||
|
||||
The NFS version 2 protocol was first documented in RFC1094 (March 1989).
|
||||
Since then two more major releases of NFS have been published, with NFSv3
|
||||
being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
|
||||
2003).
|
||||
|
||||
The Linux NFS client currently supports all the above published versions,
|
||||
and work is in progress on adding support for minor version 1 of the NFSv4
|
||||
protocol.
|
||||
|
||||
The purpose of this document is to provide information on some of the
|
||||
upcall interfaces that are used in order to provide the NFS client with
|
||||
some of the information that it requires in order to fully comply with
|
||||
the NFS spec.
|
||||
|
||||
The DNS resolver
|
||||
================
|
||||
|
||||
NFSv4 allows for one server to refer the NFS client to data that has been
|
||||
migrated onto another server by means of the special "fs_locations"
|
||||
attribute. See
|
||||
http://tools.ietf.org/html/rfc3530#section-6
|
||||
and
|
||||
http://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
|
||||
|
||||
The fs_locations information can take the form of either an ip address and
|
||||
a path, or a DNS hostname and a path. The latter requires the NFS client to
|
||||
do a DNS lookup in order to mount the new volume, and hence the need for an
|
||||
upcall to allow userland to provide this service.
|
||||
|
||||
Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
|
||||
/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
|
||||
|
||||
(1) The process checks the dns_resolve cache to see if it contains a
|
||||
valid entry. If so, it returns that entry and exits.
|
||||
|
||||
(2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
|
||||
(may be changed using the 'nfs.cache_getent' kernel boot parameter)
|
||||
is run, with two arguments:
|
||||
- the cache name, "dns_resolve"
|
||||
- the hostname to resolve
|
||||
|
||||
(3) After looking up the corresponding ip address, the helper script
|
||||
writes the result into the rpc_pipefs pseudo-file
|
||||
'/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
|
||||
in the following (text) format:
|
||||
|
||||
"<ip address> <hostname> <ttl>\n"
|
||||
|
||||
Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
|
||||
(ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
|
||||
<hostname> is identical to the second argument of the helper
|
||||
script, and <ttl> is the 'time to live' of this cache entry (in
|
||||
units of seconds).
|
||||
|
||||
Note: If <ip address> is invalid, say the string "0", then a negative
|
||||
entry is created, which will cause the kernel to treat the hostname
|
||||
as having no valid DNS translation.
|
||||
|
||||
|
||||
|
||||
|
||||
A basic sample /sbin/nfs_cache_getent
|
||||
=====================================
|
||||
|
||||
#!/bin/bash
|
||||
#
|
||||
ttl=600
|
||||
#
|
||||
cut=/usr/bin/cut
|
||||
getent=/usr/bin/getent
|
||||
rpc_pipefs=/var/lib/nfs/rpc_pipefs
|
||||
#
|
||||
die()
|
||||
{
|
||||
echo "Usage: $0 cache_name entry_name"
|
||||
exit 1
|
||||
}
|
||||
|
||||
[ $# -lt 2 ] && die
|
||||
cachename="$1"
|
||||
cache_path=${rpc_pipefs}/cache/${cachename}/channel
|
||||
|
||||
case "${cachename}" in
|
||||
dns_resolve)
|
||||
name="$2"
|
||||
result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
|
||||
[ -z "${result}" ] && result="0"
|
||||
;;
|
||||
*)
|
||||
die
|
||||
;;
|
||||
esac
|
||||
echo "${result} ${name} ${ttl}" >${cache_path}
|
||||
|
@ -1167,13 +1167,11 @@ CHAPTER 3: PER-PROCESS PARAMETERS
|
||||
3.1 /proc/<pid>/oom_adj - Adjust the oom-killer score
|
||||
------------------------------------------------------
|
||||
|
||||
This file can be used to adjust the score used to select which processes should
|
||||
be killed in an out-of-memory situation. The oom_adj value is a characteristic
|
||||
of the task's mm, so all threads that share an mm with pid will have the same
|
||||
oom_adj value. A high value will increase the likelihood of this process being
|
||||
killed by the oom-killer. Valid values are in the range -16 to +15 as
|
||||
explained below and a special value of -17, which disables oom-killing
|
||||
altogether for threads sharing pid's mm.
|
||||
This file can be used to adjust the score used to select which processes
|
||||
should be killed in an out-of-memory situation. Giving it a high score will
|
||||
increase the likelihood of this process being killed by the oom-killer. Valid
|
||||
values are in the range -16 to +15, plus the special value -17, which disables
|
||||
oom-killing altogether for this process.
|
||||
|
||||
The process to be killed in an out-of-memory situation is selected among all others
|
||||
based on its badness score. This value equals the original memory size of the process
|
||||
@ -1187,9 +1185,6 @@ the parent's score if they do not share the same memory. Thus forking servers
|
||||
are the prime candidates to be killed. Having only one 'hungry' child will make
|
||||
parent less preferable than the child.
|
||||
|
||||
/proc/<pid>/oom_adj cannot be changed for kthreads since they are immune from
|
||||
oom-killing already.
|
||||
|
||||
/proc/<pid>/oom_score shows process' current badness score.
|
||||
|
||||
The following heuristics are then applied:
|
||||
|
@ -46,7 +46,7 @@ better to do. The file is seekable, in that one can do something like the
|
||||
following:
|
||||
|
||||
dd if=/proc/sequence of=out1 count=1
|
||||
dd if=/proc/sequence skip=1 out=out2 count=1
|
||||
dd if=/proc/sequence skip=1 of=out2 count=1
|
||||
|
||||
Then concatenate the output files out1 and out2 and get the right
|
||||
result. Yes, it is a thoroughly useless module, but the point is to show
|
||||
|
@ -23,7 +23,8 @@ interface.
|
||||
Using sysfs
|
||||
~~~~~~~~~~~
|
||||
|
||||
sysfs is always compiled in. You can access it by doing:
|
||||
sysfs is always compiled in if CONFIG_SYSFS is defined. You can access
|
||||
it by doing:
|
||||
|
||||
mount -t sysfs sysfs /sys
|
||||
|
||||
|
99
Documentation/flexible-arrays.txt
Normal file
99
Documentation/flexible-arrays.txt
Normal file
@ -0,0 +1,99 @@
|
||||
Using flexible arrays in the kernel
|
||||
Last updated for 2.6.31
|
||||
Jonathan Corbet <corbet@lwn.net>
|
||||
|
||||
Large contiguous memory allocations can be unreliable in the Linux kernel.
|
||||
Kernel programmers will sometimes respond to this problem by allocating
|
||||
pages with vmalloc(). This solution not ideal, though. On 32-bit systems,
|
||||
memory from vmalloc() must be mapped into a relatively small address space;
|
||||
it's easy to run out. On SMP systems, the page table changes required by
|
||||
vmalloc() allocations can require expensive cross-processor interrupts on
|
||||
all CPUs. And, on all systems, use of space in the vmalloc() range
|
||||
increases pressure on the translation lookaside buffer (TLB), reducing the
|
||||
performance of the system.
|
||||
|
||||
In many cases, the need for memory from vmalloc() can be eliminated by
|
||||
piecing together an array from smaller parts; the flexible array library
|
||||
exists to make this task easier.
|
||||
|
||||
A flexible array holds an arbitrary (within limits) number of fixed-sized
|
||||
objects, accessed via an integer index. Sparse arrays are handled
|
||||
reasonably well. Only single-page allocations are made, so memory
|
||||
allocation failures should be relatively rare. The down sides are that the
|
||||
arrays cannot be indexed directly, individual object size cannot exceed the
|
||||
system page size, and putting data into a flexible array requires a copy
|
||||
operation. It's also worth noting that flexible arrays do no internal
|
||||
locking at all; if concurrent access to an array is possible, then the
|
||||
caller must arrange for appropriate mutual exclusion.
|
||||
|
||||
The creation of a flexible array is done with:
|
||||
|
||||
#include <linux/flex_array.h>
|
||||
|
||||
struct flex_array *flex_array_alloc(int element_size,
|
||||
unsigned int total,
|
||||
gfp_t flags);
|
||||
|
||||
The individual object size is provided by element_size, while total is the
|
||||
maximum number of objects which can be stored in the array. The flags
|
||||
argument is passed directly to the internal memory allocation calls. With
|
||||
the current code, using flags to ask for high memory is likely to lead to
|
||||
notably unpleasant side effects.
|
||||
|
||||
Storing data into a flexible array is accomplished with a call to:
|
||||
|
||||
int flex_array_put(struct flex_array *array, unsigned int element_nr,
|
||||
void *src, gfp_t flags);
|
||||
|
||||
This call will copy the data from src into the array, in the position
|
||||
indicated by element_nr (which must be less than the maximum specified when
|
||||
the array was created). If any memory allocations must be performed, flags
|
||||
will be used. The return value is zero on success, a negative error code
|
||||
otherwise.
|
||||
|
||||
There might possibly be a need to store data into a flexible array while
|
||||
running in some sort of atomic context; in this situation, sleeping in the
|
||||
memory allocator would be a bad thing. That can be avoided by using
|
||||
GFP_ATOMIC for the flags value, but, often, there is a better way. The
|
||||
trick is to ensure that any needed memory allocations are done before
|
||||
entering atomic context, using:
|
||||
|
||||
int flex_array_prealloc(struct flex_array *array, unsigned int start,
|
||||
unsigned int end, gfp_t flags);
|
||||
|
||||
This function will ensure that memory for the elements indexed in the range
|
||||
defined by start and end has been allocated. Thereafter, a
|
||||
flex_array_put() call on an element in that range is guaranteed not to
|
||||
block.
|
||||
|
||||
Getting data back out of the array is done with:
|
||||
|
||||
void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
|
||||
|
||||
The return value is a pointer to the data element, or NULL if that
|
||||
particular element has never been allocated.
|
||||
|
||||
Note that it is possible to get back a valid pointer for an element which
|
||||
has never been stored in the array. Memory for array elements is allocated
|
||||
one page at a time; a single allocation could provide memory for several
|
||||
adjacent elements. The flexible array code does not know if a specific
|
||||
element has been written; it only knows if the associated memory is
|
||||
present. So a flex_array_get() call on an element which was never stored
|
||||
in the array has the potential to return a pointer to random data. If the
|
||||
caller does not have a separate way to know which elements were actually
|
||||
stored, it might be wise, at least, to add GFP_ZERO to the flags argument
|
||||
to ensure that all elements are zeroed.
|
||||
|
||||
There is no way to remove a single element from the array. It is possible,
|
||||
though, to remove all elements with a call to:
|
||||
|
||||
void flex_array_free_parts(struct flex_array *array);
|
||||
|
||||
This call frees all elements, but leaves the array itself in place.
|
||||
Freeing the entire array is done with:
|
||||
|
||||
void flex_array_free(struct flex_array *array);
|
||||
|
||||
As of this writing, there are no users of flexible arrays in the mainline
|
||||
kernel. The functions described here are also not exported to modules;
|
||||
that will probably be fixed when somebody comes up with a need for it.
|
@ -2,11 +2,11 @@ Kernel driver pcf8591
|
||||
=====================
|
||||
|
||||
Supported chips:
|
||||
* Philips PCF8591
|
||||
* Philips/NXP PCF8591
|
||||
Prefix: 'pcf8591'
|
||||
Addresses scanned: I2C 0x48 - 0x4f
|
||||
Datasheet: Publicly available at the Philips Semiconductor website
|
||||
http://www.semiconductors.philips.com/pip/PCF8591P.html
|
||||
Datasheet: Publicly available at the NXP website
|
||||
http://www.nxp.com/pip/PCF8591_6.html
|
||||
|
||||
Authors:
|
||||
Aurelien Jarno <aurelien@aurel32.net>
|
||||
@ -16,9 +16,10 @@ Authors:
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
The PCF8591 is an 8-bit A/D and D/A converter (4 analog inputs and one
|
||||
analog output) for the I2C bus produced by Philips Semiconductors. It
|
||||
is designed to provide a byte I2C interface to up to 4 separate devices.
|
||||
analog output) for the I2C bus produced by Philips Semiconductors (now NXP).
|
||||
It is designed to provide a byte I2C interface to up to 4 separate devices.
|
||||
|
||||
The PCF8591 has 4 analog inputs programmable as single-ended or
|
||||
differential inputs :
|
||||
@ -58,8 +59,8 @@ Accessing PCF8591 via /sys interface
|
||||
-------------------------------------
|
||||
|
||||
! Be careful !
|
||||
The PCF8591 is plainly impossible to detect ! Stupid chip.
|
||||
So every chip with address in the interval [48..4f] is
|
||||
The PCF8591 is plainly impossible to detect! Stupid chip.
|
||||
So every chip with address in the interval [0x48..0x4f] is
|
||||
detected as PCF8591. If you have other chips in this address
|
||||
range, the workaround is to load this module after the one
|
||||
for your others chips.
|
||||
@ -67,19 +68,20 @@ for your others chips.
|
||||
On detection (i.e. insmod, modprobe et al.), directories are being
|
||||
created for each detected PCF8591:
|
||||
|
||||
/sys/bus/devices/<0>-<1>/
|
||||
/sys/bus/i2c/devices/<0>-<1>/
|
||||
where <0> is the bus the chip was detected on (e. g. i2c-0)
|
||||
and <1> the chip address ([48..4f])
|
||||
|
||||
Inside these directories, there are such files:
|
||||
in0, in1, in2, in3, out0_enable, out0_output, name
|
||||
in0_input, in1_input, in2_input, in3_input, out0_enable, out0_output, name
|
||||
|
||||
Name contains chip name.
|
||||
|
||||
The in0, in1, in2 and in3 files are RO. Reading gives the value of the
|
||||
corresponding channel. Depending on the current analog inputs configuration,
|
||||
files in2 and/or in3 do not exist. Values range are from 0 to 255 for single
|
||||
ended inputs and -128 to +127 for differential inputs (8-bit ADC).
|
||||
The in0_input, in1_input, in2_input and in3_input files are RO. Reading gives
|
||||
the value of the corresponding channel. Depending on the current analog inputs
|
||||
configuration, files in2_input and in3_input may not exist. Values range
|
||||
from 0 to 255 for single ended inputs and -128 to +127 for differential inputs
|
||||
(8-bit ADC).
|
||||
|
||||
The out0_enable file is RW. Reading gives "1" for analog output enabled and
|
||||
"0" for analog output disabled. Writing accepts "0" and "1" accordingly.
|
||||
|
36
Documentation/hwmon/tmp421
Normal file
36
Documentation/hwmon/tmp421
Normal file
@ -0,0 +1,36 @@
|
||||
Kernel driver tmp421
|
||||
====================
|
||||
|
||||
Supported chips:
|
||||
* Texas Instruments TMP421
|
||||
Prefix: 'tmp421'
|
||||
Addresses scanned: I2C 0x2a, 0x4c, 0x4d, 0x4e and 0x4f
|
||||
Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp421.html
|
||||
* Texas Instruments TMP422
|
||||
Prefix: 'tmp422'
|
||||
Addresses scanned: I2C 0x2a, 0x4c, 0x4d, 0x4e and 0x4f
|
||||
Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp421.html
|
||||
* Texas Instruments TMP423
|
||||
Prefix: 'tmp423'
|
||||
Addresses scanned: I2C 0x2a, 0x4c, 0x4d, 0x4e and 0x4f
|
||||
Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp421.html
|
||||
|
||||
Authors:
|
||||
Andre Prendel <andre.prendel@gmx.de>
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
This driver implements support for Texas Instruments TMP421, TMP422
|
||||
and TMP423 temperature sensor chips. These chips implement one local
|
||||
and up to one (TMP421), up to two (TMP422) or up to three (TMP423)
|
||||
remote sensors. Temperature is measured in degrees Celsius. The chips
|
||||
are wired over I2C/SMBus and specified over a temperature range of -40
|
||||
to +125 degrees Celsius. Resolution for both the local and remote
|
||||
channels is 0.0625 degree C.
|
||||
|
||||
The chips support only temperature measurement. The driver exports
|
||||
the temperature values via the following sysfs files:
|
||||
|
||||
temp[1-4]_input
|
||||
temp[2-4]_fault
|
37
Documentation/hwmon/wm831x
Normal file
37
Documentation/hwmon/wm831x
Normal file
@ -0,0 +1,37 @@
|
||||
Kernel driver wm831x-hwmon
|
||||
==========================
|
||||
|
||||
Supported chips:
|
||||
* Wolfson Microelectronics WM831x PMICs
|
||||
Prefix: 'wm831x'
|
||||
Datasheet:
|
||||
http://www.wolfsonmicro.com/products/WM8310
|
||||
http://www.wolfsonmicro.com/products/WM8311
|
||||
http://www.wolfsonmicro.com/products/WM8312
|
||||
|
||||
Authors: Mark Brown <broonie@opensource.wolfsonmicro.com>
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
The WM831x series of PMICs include an AUXADC which can be used to
|
||||
monitor a range of system operating parameters, including the voltages
|
||||
of the major supplies within the system. Currently the driver provides
|
||||
reporting of all the input values but does not provide any alarms.
|
||||
|
||||
Voltage Monitoring
|
||||
------------------
|
||||
|
||||
Voltages are sampled by a 12 bit ADC. Voltages in milivolts are 1.465
|
||||
times the ADC value.
|
||||
|
||||
Temperature Monitoring
|
||||
----------------------
|
||||
|
||||
Temperatures are sampled by a 12 bit ADC. Chip and battery temperatures
|
||||
are available. The chip temperature is calculated as:
|
||||
|
||||
Degrees celsius = (512.18 - data) / 1.0983
|
||||
|
||||
while the battery temperature calculation will depend on the NTC
|
||||
thermistor component.
|
26
Documentation/hwmon/wm8350
Normal file
26
Documentation/hwmon/wm8350
Normal file
@ -0,0 +1,26 @@
|
||||
Kernel driver wm8350-hwmon
|
||||
==========================
|
||||
|
||||
Supported chips:
|
||||
* Wolfson Microelectronics WM835x PMICs
|
||||
Prefix: 'wm8350'
|
||||
Datasheet:
|
||||
http://www.wolfsonmicro.com/products/WM8350
|
||||
http://www.wolfsonmicro.com/products/WM8351
|
||||
http://www.wolfsonmicro.com/products/WM8352
|
||||
|
||||
Authors: Mark Brown <broonie@opensource.wolfsonmicro.com>
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
The WM835x series of PMICs include an AUXADC which can be used to
|
||||
monitor a range of system operating parameters, including the voltages
|
||||
of the major supplies within the system. Currently the driver provides
|
||||
simple access to these major supplies.
|
||||
|
||||
Voltage Monitoring
|
||||
------------------
|
||||
|
||||
Voltages are sampled by a 12 bit ADC. For the internal supplies the ADC
|
||||
is referenced to the system VRTC.
|
475
Documentation/input/sentelic.txt
Normal file
475
Documentation/input/sentelic.txt
Normal file
@ -0,0 +1,475 @@
|
||||
Copyright (C) 2002-2008 Sentelic Corporation.
|
||||
Last update: Oct-31-2008
|
||||
|
||||
==============================================================================
|
||||
* Finger Sensing Pad Intellimouse Mode(scrolling wheel, 4th and 5th buttons)
|
||||
==============================================================================
|
||||
A) MSID 4: Scrolling wheel mode plus Forward page(4th button) and Backward
|
||||
page (5th button)
|
||||
@1. Set sample rate to 200;
|
||||
@2. Set sample rate to 200;
|
||||
@3. Set sample rate to 80;
|
||||
@4. Issuing the "Get device ID" command (0xF2) and waits for the response;
|
||||
@5. FSP will respond 0x04.
|
||||
|
||||
Packet 1
|
||||
Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
|
||||
BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
|
||||
1 |Y|X|y|x|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 | | |B|F|W|W|W|W|
|
||||
|---------------| |---------------| |---------------| |---------------|
|
||||
|
||||
Byte 1: Bit7 => Y overflow
|
||||
Bit6 => X overflow
|
||||
Bit5 => Y sign bit
|
||||
Bit4 => X sign bit
|
||||
Bit3 => 1
|
||||
Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
|
||||
Bit1 => Right Button, 1 is pressed, 0 is not pressed.
|
||||
Bit0 => Left Button, 1 is pressed, 0 is not pressed.
|
||||
Byte 2: X Movement(9-bit 2's complement integers)
|
||||
Byte 3: Y Movement(9-bit 2's complement integers)
|
||||
Byte 4: Bit3~Bit0 => the scrolling wheel's movement since the last data report.
|
||||
valid values, -8 ~ +7
|
||||
Bit4 => 1 = 4th mouse button is pressed, Forward one page.
|
||||
0 = 4th mouse button is not pressed.
|
||||
Bit5 => 1 = 5th mouse button is pressed, Backward one page.
|
||||
0 = 5th mouse button is not pressed.
|
||||
|
||||
B) MSID 6: Horizontal and Vertical scrolling.
|
||||
@ Set bit 1 in register 0x40 to 1
|
||||
|
||||
# FSP replaces scrolling wheel's movement as 4 bits to show horizontal and
|
||||
vertical scrolling.
|
||||
|
||||
Packet 1
|
||||
Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
|
||||
BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
|
||||
1 |Y|X|y|x|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 | | |B|F|l|r|u|d|
|
||||
|---------------| |---------------| |---------------| |---------------|
|
||||
|
||||
Byte 1: Bit7 => Y overflow
|
||||
Bit6 => X overflow
|
||||
Bit5 => Y sign bit
|
||||
Bit4 => X sign bit
|
||||
Bit3 => 1
|
||||
Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
|
||||
Bit1 => Right Button, 1 is pressed, 0 is not pressed.
|
||||
Bit0 => Left Button, 1 is pressed, 0 is not pressed.
|
||||
Byte 2: X Movement(9-bit 2's complement integers)
|
||||
Byte 3: Y Movement(9-bit 2's complement integers)
|
||||
Byte 4: Bit0 => the Vertical scrolling movement downward.
|
||||
Bit1 => the Vertical scrolling movement upward.
|
||||
Bit2 => the Vertical scrolling movement rightward.
|
||||
Bit3 => the Vertical scrolling movement leftward.
|
||||
Bit4 => 1 = 4th mouse button is pressed, Forward one page.
|
||||
0 = 4th mouse button is not pressed.
|
||||
Bit5 => 1 = 5th mouse button is pressed, Backward one page.
|
||||
0 = 5th mouse button is not pressed.
|
||||
|
||||
C) MSID 7:
|
||||
# FSP uses 2 packets(8 Bytes) data to represent Absolute Position
|
||||
so we have PACKET NUMBER to identify packets.
|
||||
If PACKET NUMBER is 0, the packet is Packet 1.
|
||||
If PACKET NUMBER is 1, the packet is Packet 2.
|
||||
Please count this number in program.
|
||||
|
||||
# MSID6 special packet will be enable at the same time when enable MSID 7.
|
||||
|
||||
==============================================================================
|
||||
* Absolute position for STL3886-G0.
|
||||
==============================================================================
|
||||
@ Set bit 2 or 3 in register 0x40 to 1
|
||||
@ Set bit 6 in register 0x40 to 1
|
||||
|
||||
Packet 1 (ABSOLUTE POSITION)
|
||||
Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
|
||||
BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
|
||||
1 |0|1|V|1|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|d|u|X|X|Y|Y|
|
||||
|---------------| |---------------| |---------------| |---------------|
|
||||
|
||||
Byte 1: Bit7~Bit6 => 00, Normal data packet
|
||||
=> 01, Absolute coordination packet
|
||||
=> 10, Notify packet
|
||||
Bit5 => valid bit
|
||||
Bit4 => 1
|
||||
Bit3 => 1
|
||||
Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
|
||||
Bit1 => Right Button, 1 is pressed, 0 is not pressed.
|
||||
Bit0 => Left Button, 1 is pressed, 0 is not pressed.
|
||||
Byte 2: X coordinate (xpos[9:2])
|
||||
Byte 3: Y coordinate (ypos[9:2])
|
||||
Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
|
||||
Bit3~Bit2 => X coordinate (ypos[1:0])
|
||||
Bit4 => scroll up
|
||||
Bit5 => scroll down
|
||||
Bit6 => scroll left
|
||||
Bit7 => scroll right
|
||||
|
||||
Notify Packet for G0
|
||||
Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
|
||||
BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
|
||||
1 |1|0|0|1|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |M|M|M|M|M|M|M|M| 4 |0|0|0|0|0|0|0|0|
|
||||
|---------------| |---------------| |---------------| |---------------|
|
||||
|
||||
Byte 1: Bit7~Bit6 => 00, Normal data packet
|
||||
=> 01, Absolute coordination packet
|
||||
=> 10, Notify packet
|
||||
Bit5 => 0
|
||||
Bit4 => 1
|
||||
Bit3 => 1
|
||||
Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
|
||||
Bit1 => Right Button, 1 is pressed, 0 is not pressed.
|
||||
Bit0 => Left Button, 1 is pressed, 0 is not pressed.
|
||||
Byte 2: Message Type => 0x5A (Enable/Disable status packet)
|
||||
Mode Type => 0xA5 (Normal/Icon mode status)
|
||||
Byte 3: Message Type => 0x00 (Disabled)
|
||||
=> 0x01 (Enabled)
|
||||
Mode Type => 0x00 (Normal)
|
||||
=> 0x01 (Icon)
|
||||
Byte 4: Bit7~Bit0 => Don't Care
|
||||
|
||||
==============================================================================
|
||||
* Absolute position for STL3888-A0.
|
||||
==============================================================================
|
||||
Packet 1 (ABSOLUTE POSITION)
|
||||
Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
|
||||
BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
|
||||
1 |0|1|V|A|1|L|0|1| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |x|x|y|y|X|X|Y|Y|
|
||||
|---------------| |---------------| |---------------| |---------------|
|
||||
|
||||
Byte 1: Bit7~Bit6 => 00, Normal data packet
|
||||
=> 01, Absolute coordination packet
|
||||
=> 10, Notify packet
|
||||
Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
|
||||
When both fingers are up, the last two reports have zero valid
|
||||
bit.
|
||||
Bit4 => arc
|
||||
Bit3 => 1
|
||||
Bit2 => Left Button, 1 is pressed, 0 is released.
|
||||
Bit1 => 0
|
||||
Bit0 => 1
|
||||
Byte 2: X coordinate (xpos[9:2])
|
||||
Byte 3: Y coordinate (ypos[9:2])
|
||||
Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
|
||||
Bit3~Bit2 => X coordinate (ypos[1:0])
|
||||
Bit5~Bit4 => y1_g
|
||||
Bit7~Bit6 => x1_g
|
||||
|
||||
Packet 2 (ABSOLUTE POSITION)
|
||||
Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
|
||||
BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
|
||||
1 |0|1|V|A|1|R|1|0| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |x|x|y|y|X|X|Y|Y|
|
||||
|---------------| |---------------| |---------------| |---------------|
|
||||
|
||||
Byte 1: Bit7~Bit6 => 00, Normal data packet
|
||||
=> 01, Absolute coordinates packet
|
||||
=> 10, Notify packet
|
||||
Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
|
||||
When both fingers are up, the last two reports have zero valid
|
||||
bit.
|
||||
Bit4 => arc
|
||||
Bit3 => 1
|
||||
Bit2 => Right Button, 1 is pressed, 0 is released.
|
||||
Bit1 => 1
|
||||
Bit0 => 0
|
||||
Byte 2: X coordinate (xpos[9:2])
|
||||
Byte 3: Y coordinate (ypos[9:2])
|
||||
Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
|
||||
Bit3~Bit2 => X coordinate (ypos[1:0])
|
||||
Bit5~Bit4 => y2_g
|
||||
Bit7~Bit6 => x2_g
|
||||
|
||||
Notify Packet for STL3888-A0
|
||||
Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
|
||||
BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
|
||||
1 |1|0|1|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|d|u|0|0|0|0|
|
||||
|---------------| |---------------| |---------------| |---------------|
|
||||
|
||||
Byte 1: Bit7~Bit6 => 00, Normal data packet
|
||||
=> 01, Absolute coordination packet
|
||||
=> 10, Notify packet
|
||||
Bit5 => 1
|
||||
Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1):
|
||||
0: left button is generated by the on-pad command
|
||||
1: left button is generated by the external button
|
||||
Bit3 => 1
|
||||
Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
|
||||
Bit1 => Right Button, 1 is pressed, 0 is not pressed.
|
||||
Bit0 => Left Button, 1 is pressed, 0 is not pressed.
|
||||
Byte 2: Message Type => 0xB7 (Multi Finger, Multi Coordinate mode)
|
||||
Byte 3: Bit7~Bit6 => Don't care
|
||||
Bit5~Bit4 => Number of fingers
|
||||
Bit3~Bit1 => Reserved
|
||||
Bit0 => 1: enter gesture mode; 0: leaving gesture mode
|
||||
Byte 4: Bit7 => scroll right button
|
||||
Bit6 => scroll left button
|
||||
Bit5 => scroll down button
|
||||
Bit4 => scroll up button
|
||||
* Note that if gesture and additional button (Bit4~Bit7)
|
||||
happen at the same time, the button information will not
|
||||
be sent.
|
||||
Bit3~Bit0 => Reserved
|
||||
|
||||
Sample sequence of Multi-finger, Multi-coordinate mode:
|
||||
|
||||
notify packet (valid bit == 1), abs pkt 1, abs pkt 2, abs pkt 1,
|
||||
abs pkt 2, ..., notify packet(valid bit == 0)
|
||||
|
||||
==============================================================================
|
||||
* FSP Enable/Disable packet
|
||||
==============================================================================
|
||||
Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
|
||||
BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
|
||||
1 |Y|X|0|0|1|M|R|L| 2 |0|1|0|1|1|0|1|E| 3 | | | | | | | | | 4 | | | | | | | | |
|
||||
|---------------| |---------------| |---------------| |---------------|
|
||||
|
||||
FSP will send out enable/disable packet when FSP receive PS/2 enable/disable
|
||||
command. Host will receive the packet which Middle, Right, Left button will
|
||||
be set. The packet only use byte 0 and byte 1 as a pattern of original packet.
|
||||
Ignore the other bytes of the packet.
|
||||
|
||||
Byte 1: Bit7 => 0, Y overflow
|
||||
Bit6 => 0, X overflow
|
||||
Bit5 => 0, Y sign bit
|
||||
Bit4 => 0, X sign bit
|
||||
Bit3 => 1
|
||||
Bit2 => 1, Middle Button
|
||||
Bit1 => 1, Right Button
|
||||
Bit0 => 1, Left Button
|
||||
Byte 2: Bit7~1 => (0101101b)
|
||||
Bit0 => 1 = Enable
|
||||
0 = Disable
|
||||
Byte 3: Don't care
|
||||
Byte 4: Don't care (MOUSE ID 3, 4)
|
||||
Byte 5~8: Don't care (Absolute packet)
|
||||
|
||||
==============================================================================
|
||||
* PS/2 Command Set
|
||||
==============================================================================
|
||||
|
||||
FSP supports basic PS/2 commanding set and modes, refer to following URL for
|
||||
details about PS/2 commands:
|
||||
|
||||
http://www.computer-engineering.org/index.php?title=PS/2_Mouse_Interface
|
||||
|
||||
==============================================================================
|
||||
* Programming Sequence for Determining Packet Parsing Flow
|
||||
==============================================================================
|
||||
1. Identify FSP by reading device ID(0x00) and version(0x01) register
|
||||
|
||||
2. Determine number of buttons by reading status2 (0x0b) register
|
||||
|
||||
buttons = reg[0x0b] & 0x30
|
||||
|
||||
if buttons == 0x30 or buttons == 0x20:
|
||||
# two/four buttons
|
||||
Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
|
||||
section A for packet parsing detail(ignore byte 4, bit ~ 7)
|
||||
elif buttons == 0x10:
|
||||
# 6 buttons
|
||||
Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
|
||||
section B for packet parsing detail
|
||||
elif buttons == 0x00:
|
||||
# 6 buttons
|
||||
Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
|
||||
section A for packet parsing detail
|
||||
|
||||
==============================================================================
|
||||
* Programming Sequence for Register Reading/Writing
|
||||
==============================================================================
|
||||
|
||||
Register inversion requirement:
|
||||
|
||||
Following values needed to be inverted(the '~' operator in C) before being
|
||||
sent to FSP:
|
||||
|
||||
0xe9, 0xee, 0xf2 and 0xff.
|
||||
|
||||
Register swapping requirement:
|
||||
|
||||
Following values needed to have their higher 4 bits and lower 4 bits being
|
||||
swapped before being sent to FSP:
|
||||
|
||||
10, 20, 40, 60, 80, 100 and 200.
|
||||
|
||||
Register reading sequence:
|
||||
|
||||
1. send 0xf3 PS/2 command to FSP;
|
||||
|
||||
2. send 0x66 PS/2 command to FSP;
|
||||
|
||||
3. send 0x88 PS/2 command to FSP;
|
||||
|
||||
4. send 0xf3 PS/2 command to FSP;
|
||||
|
||||
5. if the register address being to read is not required to be
|
||||
inverted(refer to the 'Register inversion requirement' section),
|
||||
goto step 6
|
||||
|
||||
5a. send 0x68 PS/2 command to FSP;
|
||||
|
||||
5b. send the inverted register address to FSP and goto step 8;
|
||||
|
||||
6. if the register address being to read is not required to be
|
||||
swapped(refer to the 'Register swapping requirement' section),
|
||||
goto step 7
|
||||
|
||||
6a. send 0xcc PS/2 command to FSP;
|
||||
|
||||
6b. send the swapped register address to FSP and goto step 8;
|
||||
|
||||
7. send 0x66 PS/2 command to FSP;
|
||||
|
||||
7a. send the original register address to FSP and goto step 8;
|
||||
|
||||
8. send 0xe9(status request) PS/2 command to FSP;
|
||||
|
||||
9. the response read from FSP should be the requested register value.
|
||||
|
||||
Register writing sequence:
|
||||
|
||||
1. send 0xf3 PS/2 command to FSP;
|
||||
|
||||
2. if the register address being to write is not required to be
|
||||
inverted(refer to the 'Register inversion requirement' section),
|
||||
goto step 3
|
||||
|
||||
2a. send 0x74 PS/2 command to FSP;
|
||||
|
||||
2b. send the inverted register address to FSP and goto step 5;
|
||||
|
||||
3. if the register address being to write is not required to be
|
||||
swapped(refer to the 'Register swapping requirement' section),
|
||||
goto step 4
|
||||
|
||||
3a. send 0x77 PS/2 command to FSP;
|
||||
|
||||
3b. send the swapped register address to FSP and goto step 5;
|
||||
|
||||
4. send 0x55 PS/2 command to FSP;
|
||||
|
||||
4a. send the register address to FSP and goto step 5;
|
||||
|
||||
5. send 0xf3 PS/2 command to FSP;
|
||||
|
||||
6. if the register value being to write is not required to be
|
||||
inverted(refer to the 'Register inversion requirement' section),
|
||||
goto step 7
|
||||
|
||||
6a. send 0x47 PS/2 command to FSP;
|
||||
|
||||
6b. send the inverted register value to FSP and goto step 9;
|
||||
|
||||
7. if the register value being to write is not required to be
|
||||
swapped(refer to the 'Register swapping requirement' section),
|
||||
goto step 8
|
||||
|
||||
7a. send 0x44 PS/2 command to FSP;
|
||||
|
||||
7b. send the swapped register value to FSP and goto step 9;
|
||||
|
||||
8. send 0x33 PS/2 command to FSP;
|
||||
|
||||
8a. send the register value to FSP;
|
||||
|
||||
9. the register writing sequence is completed.
|
||||
|
||||
==============================================================================
|
||||
* Register Listing
|
||||
==============================================================================
|
||||
|
||||
offset width default r/w name
|
||||
0x00 bit7~bit0 0x01 RO device ID
|
||||
|
||||
0x01 bit7~bit0 0xc0 RW version ID
|
||||
|
||||
0x02 bit7~bit0 0x01 RO vendor ID
|
||||
|
||||
0x03 bit7~bit0 0x01 RO product ID
|
||||
|
||||
0x04 bit3~bit0 0x01 RW revision ID
|
||||
|
||||
0x0b RO test mode status 1
|
||||
bit3 1 RO 0: rotate 180 degree, 1: no rotation
|
||||
|
||||
bit5~bit4 RO number of buttons
|
||||
11 => 2, lbtn/rbtn
|
||||
10 => 4, lbtn/rbtn/scru/scrd
|
||||
01 => 6, lbtn/rbtn/scru/scrd/scrl/scrr
|
||||
00 => 6, lbtn/rbtn/scru/scrd/fbtn/bbtn
|
||||
|
||||
0x0f RW register file page control
|
||||
bit0 0 RW 1 to enable page 1 register files
|
||||
|
||||
0x10 RW system control 1
|
||||
bit0 1 RW Reserved, must be 1
|
||||
bit1 0 RW Reserved, must be 0
|
||||
bit4 1 RW Reserved, must be 0
|
||||
bit5 0 RW register clock gating enable
|
||||
0: read only, 1: read/write enable
|
||||
(Note that following registers does not require clock gating being
|
||||
enabled prior to write: 05 06 07 08 09 0c 0f 10 11 12 16 17 18 23 2e
|
||||
40 41 42 43.)
|
||||
|
||||
0x31 RW on-pad command detection
|
||||
bit7 0 RW on-pad command left button down tag
|
||||
enable
|
||||
0: disable, 1: enable
|
||||
|
||||
0x34 RW on-pad command control 5
|
||||
bit4~bit0 0x05 RW XLO in 0s/4/1, so 03h = 0010.1b = 2.5
|
||||
(Note that position unit is in 0.5 scanline)
|
||||
|
||||
bit7 0 RW on-pad tap zone enable
|
||||
0: disable, 1: enable
|
||||
|
||||
0x35 RW on-pad command control 6
|
||||
bit4~bit0 0x1d RW XHI in 0s/4/1, so 19h = 1100.1b = 12.5
|
||||
(Note that position unit is in 0.5 scanline)
|
||||
|
||||
0x36 RW on-pad command control 7
|
||||
bit4~bit0 0x04 RW YLO in 0s/4/1, so 03h = 0010.1b = 2.5
|
||||
(Note that position unit is in 0.5 scanline)
|
||||
|
||||
0x37 RW on-pad command control 8
|
||||
bit4~bit0 0x13 RW YHI in 0s/4/1, so 11h = 1000.1b = 8.5
|
||||
(Note that position unit is in 0.5 scanline)
|
||||
|
||||
0x40 RW system control 5
|
||||
bit1 0 RW FSP Intellimouse mode enable
|
||||
0: disable, 1: enable
|
||||
|
||||
bit2 0 RW movement + abs. coordinate mode enable
|
||||
0: disable, 1: enable
|
||||
(Note that this function has the functionality of bit 1 even when
|
||||
bit 1 is not set. However, the format is different from that of bit 1.
|
||||
In addition, when bit 1 and bit 2 are set at the same time, bit 2 will
|
||||
override bit 1.)
|
||||
|
||||
bit3 0 RW abs. coordinate only mode enable
|
||||
0: disable, 1: enable
|
||||
(Note that this function has the functionality of bit 1 even when
|
||||
bit 1 is not set. However, the format is different from that of bit 1.
|
||||
In addition, when bit 1, bit 2 and bit 3 are set at the same time,
|
||||
bit 3 will override bit 1 and 2.)
|
||||
|
||||
bit5 0 RW auto switch enable
|
||||
0: disable, 1: enable
|
||||
|
||||
bit6 0 RW G0 abs. + notify packet format enable
|
||||
0: disable, 1: enable
|
||||
(Note that the absolute/relative coordinate output still depends on
|
||||
bit 2 and 3. That is, if any of those bit is 1, host will receive
|
||||
absolute coordinates; otherwise, host only receives packets with
|
||||
relative coordinate.)
|
||||
|
||||
0x43 RW on-pad control
|
||||
bit0 0 RW on-pad control enable
|
||||
0: disable, 1: enable
|
||||
(Note that if this bit is cleared, bit 3/5 will be ineffective)
|
||||
|
||||
bit3 0 RW on-pad fix vertical scrolling enable
|
||||
0: disable, 1: enable
|
||||
|
||||
bit5 0 RW on-pad fix horizontal scrolling enable
|
||||
0: disable, 1: enable
|
210
Documentation/intel_txt.txt
Normal file
210
Documentation/intel_txt.txt
Normal file
@ -0,0 +1,210 @@
|
||||
Intel(R) TXT Overview:
|
||||
=====================
|
||||
|
||||
Intel's technology for safer computing, Intel(R) Trusted Execution
|
||||
Technology (Intel(R) TXT), defines platform-level enhancements that
|
||||
provide the building blocks for creating trusted platforms.
|
||||
|
||||
Intel TXT was formerly known by the code name LaGrande Technology (LT).
|
||||
|
||||
Intel TXT in Brief:
|
||||
o Provides dynamic root of trust for measurement (DRTM)
|
||||
o Data protection in case of improper shutdown
|
||||
o Measurement and verification of launched environment
|
||||
|
||||
Intel TXT is part of the vPro(TM) brand and is also available some
|
||||
non-vPro systems. It is currently available on desktop systems
|
||||
based on the Q35, X38, Q45, and Q43 Express chipsets (e.g. Dell
|
||||
Optiplex 755, HP dc7800, etc.) and mobile systems based on the GM45,
|
||||
PM45, and GS45 Express chipsets.
|
||||
|
||||
For more information, see http://www.intel.com/technology/security/.
|
||||
This site also has a link to the Intel TXT MLE Developers Manual,
|
||||
which has been updated for the new released platforms.
|
||||
|
||||
Intel TXT has been presented at various events over the past few
|
||||
years, some of which are:
|
||||
LinuxTAG 2008:
|
||||
http://www.linuxtag.org/2008/en/conf/events/vp-donnerstag/
|
||||
details.html?talkid=110
|
||||
TRUST2008:
|
||||
http://www.trust2008.eu/downloads/Keynote-Speakers/
|
||||
3_David-Grawrock_The-Front-Door-of-Trusted-Computing.pdf
|
||||
IDF 2008, Shanghai:
|
||||
http://inteldeveloperforum.com.edgesuite.net/shanghai_2008/
|
||||
aep/PROS003/index.html
|
||||
IDFs 2006, 2007 (I'm not sure if/where they are online)
|
||||
|
||||
Trusted Boot Project Overview:
|
||||
=============================
|
||||
|
||||
Trusted Boot (tboot) is an open source, pre- kernel/VMM module that
|
||||
uses Intel TXT to perform a measured and verified launch of an OS
|
||||
kernel/VMM.
|
||||
|
||||
It is hosted on SourceForge at http://sourceforge.net/projects/tboot.
|
||||
The mercurial source repo is available at http://www.bughost.org/
|
||||
repos.hg/tboot.hg.
|
||||
|
||||
Tboot currently supports launching Xen (open source VMM/hypervisor
|
||||
w/ TXT support since v3.2), and now Linux kernels.
|
||||
|
||||
|
||||
Value Proposition for Linux or "Why should you care?"
|
||||
=====================================================
|
||||
|
||||
While there are many products and technologies that attempt to
|
||||
measure or protect the integrity of a running kernel, they all
|
||||
assume the kernel is "good" to begin with. The Integrity
|
||||
Measurement Architecture (IMA) and Linux Integrity Module interface
|
||||
are examples of such solutions.
|
||||
|
||||
To get trust in the initial kernel without using Intel TXT, a
|
||||
static root of trust must be used. This bases trust in BIOS
|
||||
starting at system reset and requires measurement of all code
|
||||
executed between system reset through the completion of the kernel
|
||||
boot as well as data objects used by that code. In the case of a
|
||||
Linux kernel, this means all of BIOS, any option ROMs, the
|
||||
bootloader and the boot config. In practice, this is a lot of
|
||||
code/data, much of which is subject to change from boot to boot
|
||||
(e.g. changing NICs may change option ROMs). Without reference
|
||||
hashes, these measurement changes are difficult to assess or
|
||||
confirm as benign. This process also does not provide DMA
|
||||
protection, memory configuration/alias checks and locks, crash
|
||||
protection, or policy support.
|
||||
|
||||
By using the hardware-based root of trust that Intel TXT provides,
|
||||
many of these issues can be mitigated. Specifically: many
|
||||
pre-launch components can be removed from the trust chain, DMA
|
||||
protection is provided to all launched components, a large number
|
||||
of platform configuration checks are performed and values locked,
|
||||
protection is provided for any data in the event of an improper
|
||||
shutdown, and there is support for policy-based execution/verification.
|
||||
This provides a more stable measurement and a higher assurance of
|
||||
system configuration and initial state than would be otherwise
|
||||
possible. Since the tboot project is open source, source code for
|
||||
almost all parts of the trust chain is available (excepting SMM and
|
||||
Intel-provided firmware).
|
||||
|
||||
How Does it Work?
|
||||
=================
|
||||
|
||||
o Tboot is an executable that is launched by the bootloader as
|
||||
the "kernel" (the binary the bootloader executes).
|
||||
o It performs all of the work necessary to determine if the
|
||||
platform supports Intel TXT and, if so, executes the GETSEC[SENTER]
|
||||
processor instruction that initiates the dynamic root of trust.
|
||||
- If tboot determines that the system does not support Intel TXT
|
||||
or is not configured correctly (e.g. the SINIT AC Module was
|
||||
incorrect), it will directly launch the kernel with no changes
|
||||
to any state.
|
||||
- Tboot will output various information about its progress to the
|
||||
terminal, serial port, and/or an in-memory log; the output
|
||||
locations can be configured with a command line switch.
|
||||
o The GETSEC[SENTER] instruction will return control to tboot and
|
||||
tboot then verifies certain aspects of the environment (e.g. TPM NV
|
||||
lock, e820 table does not have invalid entries, etc.).
|
||||
o It will wake the APs from the special sleep state the GETSEC[SENTER]
|
||||
instruction had put them in and place them into a wait-for-SIPI
|
||||
state.
|
||||
- Because the processors will not respond to an INIT or SIPI when
|
||||
in the TXT environment, it is necessary to create a small VT-x
|
||||
guest for the APs. When they run in this guest, they will
|
||||
simply wait for the INIT-SIPI-SIPI sequence, which will cause
|
||||
VMEXITs, and then disable VT and jump to the SIPI vector. This
|
||||
approach seemed like a better choice than having to insert
|
||||
special code into the kernel's MP wakeup sequence.
|
||||
o Tboot then applies an (optional) user-defined launch policy to
|
||||
verify the kernel and initrd.
|
||||
- This policy is rooted in TPM NV and is described in the tboot
|
||||
project. The tboot project also contains code for tools to
|
||||
create and provision the policy.
|
||||
- Policies are completely under user control and if not present
|
||||
then any kernel will be launched.
|
||||
- Policy action is flexible and can include halting on failures
|
||||
or simply logging them and continuing.
|
||||
o Tboot adjusts the e820 table provided by the bootloader to reserve
|
||||
its own location in memory as well as to reserve certain other
|
||||
TXT-related regions.
|
||||
o As part of it's launch, tboot DMA protects all of RAM (using the
|
||||
VT-d PMRs). Thus, the kernel must be booted with 'intel_iommu=on'
|
||||
in order to remove this blanket protection and use VT-d's
|
||||
page-level protection.
|
||||
o Tboot will populate a shared page with some data about itself and
|
||||
pass this to the Linux kernel as it transfers control.
|
||||
- The location of the shared page is passed via the boot_params
|
||||
struct as a physical address.
|
||||
o The kernel will look for the tboot shared page address and, if it
|
||||
exists, map it.
|
||||
o As one of the checks/protections provided by TXT, it makes a copy
|
||||
of the VT-d DMARs in a DMA-protected region of memory and verifies
|
||||
them for correctness. The VT-d code will detect if the kernel was
|
||||
launched with tboot and use this copy instead of the one in the
|
||||
ACPI table.
|
||||
o At this point, tboot and TXT are out of the picture until a
|
||||
shutdown (S<n>)
|
||||
o In order to put a system into any of the sleep states after a TXT
|
||||
launch, TXT must first be exited. This is to prevent attacks that
|
||||
attempt to crash the system to gain control on reboot and steal
|
||||
data left in memory.
|
||||
- The kernel will perform all of its sleep preparation and
|
||||
populate the shared page with the ACPI data needed to put the
|
||||
platform in the desired sleep state.
|
||||
- Then the kernel jumps into tboot via the vector specified in the
|
||||
shared page.
|
||||
- Tboot will clean up the environment and disable TXT, then use the
|
||||
kernel-provided ACPI information to actually place the platform
|
||||
into the desired sleep state.
|
||||
- In the case of S3, tboot will also register itself as the resume
|
||||
vector. This is necessary because it must re-establish the
|
||||
measured environment upon resume. Once the TXT environment
|
||||
has been restored, it will restore the TPM PCRs and then
|
||||
transfer control back to the kernel's S3 resume vector.
|
||||
In order to preserve system integrity across S3, the kernel
|
||||
provides tboot with a set of memory ranges (kernel
|
||||
code/data/bss, S3 resume code, and AP trampoline) that tboot
|
||||
will calculate a MAC (message authentication code) over and then
|
||||
seal with the TPM. On resume and once the measured environment
|
||||
has been re-established, tboot will re-calculate the MAC and
|
||||
verify it against the sealed value. Tboot's policy determines
|
||||
what happens if the verification fails.
|
||||
|
||||
That's pretty much it for TXT support.
|
||||
|
||||
|
||||
Configuring the System:
|
||||
======================
|
||||
|
||||
This code works with 32bit, 32bit PAE, and 64bit (x86_64) kernels.
|
||||
|
||||
In BIOS, the user must enable: TPM, TXT, VT-x, VT-d. Not all BIOSes
|
||||
allow these to be individually enabled/disabled and the screens in
|
||||
which to find them are BIOS-specific.
|
||||
|
||||
grub.conf needs to be modified as follows:
|
||||
title Linux 2.6.29-tip w/ tboot
|
||||
root (hd0,0)
|
||||
kernel /tboot.gz logging=serial,vga,memory
|
||||
module /vmlinuz-2.6.29-tip intel_iommu=on ro
|
||||
root=LABEL=/ rhgb console=ttyS0,115200 3
|
||||
module /initrd-2.6.29-tip.img
|
||||
module /Q35_SINIT_17.BIN
|
||||
|
||||
The kernel option for enabling Intel TXT support is found under the
|
||||
Security top-level menu and is called "Enable Intel(R) Trusted
|
||||
Execution Technology (TXT)". It is marked as EXPERIMENTAL and
|
||||
depends on the generic x86 support (to allow maximum flexibility in
|
||||
kernel build options), since the tboot code will detect whether the
|
||||
platform actually supports Intel TXT and thus whether any of the
|
||||
kernel code is executed.
|
||||
|
||||
The Q35_SINIT_17.BIN file is what Intel TXT refers to as an
|
||||
Authenticated Code Module. It is specific to the chipset in the
|
||||
system and can also be found on the Trusted Boot site. It is an
|
||||
(unencrypted) module signed by Intel that is used as part of the
|
||||
DRTM process to verify and configure the system. It is signed
|
||||
because it operates at a higher privilege level in the system than
|
||||
any other macrocode and its correct operation is critical to the
|
||||
establishment of the DRTM. The process for determining the correct
|
||||
SINIT ACM for a system is documented in the SINIT-guide.txt file
|
||||
that is on the tboot SourceForge site under the SINIT ACM downloads.
|
@ -121,6 +121,7 @@ Code Seq# Include File Comments
|
||||
'c' 00-7F linux/comstats.h conflict!
|
||||
'c' 00-7F linux/coda.h conflict!
|
||||
'c' 80-9F arch/s390/include/asm/chsc.h
|
||||
'c' A0-AF arch/x86/include/asm/msr.h
|
||||
'd' 00-FF linux/char/drm/drm/h conflict!
|
||||
'd' F0-FF linux/digi1.h
|
||||
'e' all linux/digi1.h conflict!
|
||||
@ -139,6 +140,7 @@ Code Seq# Include File Comments
|
||||
'm' all linux/synclink.h conflict!
|
||||
'm' 00-1F net/irda/irmod.h conflict!
|
||||
'n' 00-7F linux/ncp_fs.h
|
||||
'n' 80-8F linux/nilfs2_fs.h NILFS2
|
||||
'n' E0-FF video/matrox.h matroxfb
|
||||
'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2
|
||||
'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps)
|
||||
@ -191,7 +193,7 @@ Code Seq# Include File Comments
|
||||
0xAD 00 Netfilter device in development:
|
||||
<mailto:rusty@rustcorp.com.au>
|
||||
0xAE all linux/kvm.h Kernel-based Virtual Machine
|
||||
<mailto:kvm-devel@lists.sourceforge.net>
|
||||
<mailto:kvm@vger.kernel.org>
|
||||
0xB0 all RATIO devices in development:
|
||||
<mailto:vgo@ratio.de>
|
||||
0xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca>
|
||||
|
@ -66,7 +66,9 @@ Example kernel-doc function comment:
|
||||
* The longer description can have multiple paragraphs.
|
||||
*/
|
||||
|
||||
The first line, with the short description, must be on a single line.
|
||||
The short description following the subject can span multiple lines
|
||||
and ends with an @argument description, an empty line or the end of
|
||||
the comment block.
|
||||
|
||||
The @argument descriptions must begin on the very next line following
|
||||
this opening short function description line, with no intervening
|
||||
|
@ -57,6 +57,7 @@ parameter is applicable:
|
||||
ISAPNP ISA PnP code is enabled.
|
||||
ISDN Appropriate ISDN support is enabled.
|
||||
JOY Appropriate joystick support is enabled.
|
||||
KVM Kernel Virtual Machine support is enabled.
|
||||
LIBATA Libata driver is enabled
|
||||
LP Printer support is enabled.
|
||||
LOOP Loopback device support is enabled.
|
||||
@ -1098,6 +1099,44 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
kstack=N [X86] Print N words from the kernel stack
|
||||
in oops dumps.
|
||||
|
||||
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
|
||||
Default is 0 (don't ignore, but inject #GP)
|
||||
|
||||
kvm.oos_shadow= [KVM] Disable out-of-sync shadow paging.
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
|
||||
Default is 0 (off)
|
||||
|
||||
kvm-amd.npt= [KVM,AMD] Disable nested paging (virtualized MMU)
|
||||
for all guests.
|
||||
Default is 1 (enabled) if in 64bit or 32bit-PAE mode
|
||||
|
||||
kvm-intel.bypass_guest_pf=
|
||||
[KVM,Intel] Disables bypassing of guest page faults
|
||||
on Intel chips. Default is 1 (enabled)
|
||||
|
||||
kvm-intel.ept= [KVM,Intel] Disable extended page tables
|
||||
(virtualized MMU) support on capable Intel chips.
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-intel.emulate_invalid_guest_state=
|
||||
[KVM,Intel] Enable emulation of invalid guest states
|
||||
Default is 0 (disabled)
|
||||
|
||||
kvm-intel.flexpriority=
|
||||
[KVM,Intel] Disable FlexPriority feature (TPR shadow).
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-intel.unrestricted_guest=
|
||||
[KVM,Intel] Disable unrestricted guest feature
|
||||
(virtualized real and unpaged mode) on capable
|
||||
Intel chips. Default is 1 (enabled)
|
||||
|
||||
kvm-intel.vpid= [KVM,Intel] Disable Virtual Processor Identification
|
||||
feature (tagged TLBs) on capable Intel chips.
|
||||
Default is 1 (enabled)
|
||||
|
||||
l2cr= [PPC]
|
||||
|
||||
l3cr= [PPC]
|
||||
@ -1115,6 +1154,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
libata.dma=4 Compact Flash DMA only
|
||||
Combinations also work, so libata.dma=3 enables DMA
|
||||
for disks and CDROMs, but not CFs.
|
||||
|
||||
libata.ignore_hpa= [LIBATA] Ignore HPA limit
|
||||
libata.ignore_hpa=0 keep BIOS limits (default)
|
||||
libata.ignore_hpa=1 ignore limits, using full disk
|
||||
|
||||
libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
|
||||
when set.
|
||||
@ -1243,6 +1286,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
(machvec) in a generic kernel.
|
||||
Example: machvec=hpzx1_swiotlb
|
||||
|
||||
machtype= [Loongson] Share the same kernel image file between different
|
||||
yeeloong laptop.
|
||||
Example: machtype=lemote-yeeloong-2f-7inch
|
||||
|
||||
max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater
|
||||
than or equal to this physical address is ignored.
|
||||
|
||||
@ -1499,6 +1546,14 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
[NFS] set the TCP port on which the NFSv4 callback
|
||||
channel should listen.
|
||||
|
||||
nfs.cache_getent=
|
||||
[NFS] sets the pathname to the program which is used
|
||||
to update the NFS client cache entries.
|
||||
|
||||
nfs.cache_getent_timeout=
|
||||
[NFS] sets the timeout after which an attempt to
|
||||
update a cache entry is deemed to have failed.
|
||||
|
||||
nfs.idmap_cache_timeout=
|
||||
[NFS] set the maximum lifetime for idmapper cache
|
||||
entries.
|
||||
@ -1510,7 +1565,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
of returning the full 64-bit number.
|
||||
The default is to return 64-bit inode numbers.
|
||||
|
||||
nmi_debug= [KNL,AVR32] Specify one or more actions to take
|
||||
nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take
|
||||
when a NMI is triggered.
|
||||
Format: [state][,regs][,debounce][,die]
|
||||
|
||||
@ -1531,6 +1586,11 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
symbolic names: lapic and ioapic
|
||||
Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
|
||||
|
||||
netpoll.carrier_timeout=
|
||||
[NET] Specifies amount of time (in seconds) that
|
||||
netpoll should wait for a carrier. By default netpoll
|
||||
waits 4 seconds.
|
||||
|
||||
no387 [BUGS=X86-32] Tells the kernel to use the 387 maths
|
||||
emulation library even if a 387 maths coprocessor
|
||||
is present.
|
||||
@ -1915,11 +1975,12 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
Format: { 0 | 1 }
|
||||
See arch/parisc/kernel/pdc_chassis.c
|
||||
|
||||
percpu_alloc= [X86] Select which percpu first chunk allocator to use.
|
||||
Allowed values are one of "lpage", "embed" and "4k".
|
||||
See comments in arch/x86/kernel/setup_percpu.c for
|
||||
details on each allocator. This parameter is primarily
|
||||
for debugging and performance comparison.
|
||||
percpu_alloc= Select which percpu first chunk allocator to use.
|
||||
Currently supported values are "embed" and "page".
|
||||
Archs may support subset or none of the selections.
|
||||
See comments in mm/percpu.c for details on each
|
||||
allocator. This parameter is primarily for debugging
|
||||
and performance comparison.
|
||||
|
||||
pf. [PARIDE]
|
||||
See Documentation/blockdev/paride.txt.
|
||||
@ -2391,6 +2452,18 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
stifb= [HW]
|
||||
Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
|
||||
|
||||
sunrpc.min_resvport=
|
||||
sunrpc.max_resvport=
|
||||
[NFS,SUNRPC]
|
||||
SunRPC servers often require that client requests
|
||||
originate from a privileged port (i.e. a port in the
|
||||
range 0 < portnr < 1024).
|
||||
An administrator who wishes to reserve some of these
|
||||
ports for other uses may adjust the range that the
|
||||
kernel's sunrpc client considers to be privileged
|
||||
using these two parameters to set the minimum and
|
||||
maximum port values.
|
||||
|
||||
sunrpc.pool_mode=
|
||||
[NFS]
|
||||
Control how the NFS server code allocates CPUs to
|
||||
@ -2407,6 +2480,15 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
pernode one pool for each NUMA node (equivalent
|
||||
to global on non-NUMA machines)
|
||||
|
||||
sunrpc.tcp_slot_table_entries=
|
||||
sunrpc.udp_slot_table_entries=
|
||||
[NFS,SUNRPC]
|
||||
Sets the upper limit on the number of simultaneous
|
||||
RPC calls that can be sent from the client to a
|
||||
server. Increasing these values may allow you to
|
||||
improve throughput, but will also increase the
|
||||
amount of memory reserved for use by the client.
|
||||
|
||||
swiotlb= [IA-64] Number of I/O TLB slabs
|
||||
|
||||
switches= [HW,M68k]
|
||||
@ -2476,6 +2558,11 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
trace_buf_size=nn[KMG]
|
||||
[FTRACE] will set tracing buffer size.
|
||||
|
||||
trace_event=[event-list]
|
||||
[FTRACE] Set and start specified trace events in order
|
||||
to facilitate early boot debugging.
|
||||
See also Documentation/trace/events.txt
|
||||
|
||||
trix= [HW,OSS] MediaTrix AudioTrix Pro
|
||||
Format:
|
||||
<io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
|
||||
|
@ -26,7 +26,7 @@ This document has the following sections:
|
||||
- Notes on accessing payload contents
|
||||
- Defining a key type
|
||||
- Request-key callback service
|
||||
- Key access filesystem
|
||||
- Garbage collection
|
||||
|
||||
|
||||
============
|
||||
@ -113,6 +113,9 @@ Each key has a number of attributes:
|
||||
|
||||
(*) Dead. The key's type was unregistered, and so the key is now useless.
|
||||
|
||||
Keys in the last three states are subject to garbage collection. See the
|
||||
section on "Garbage collection".
|
||||
|
||||
|
||||
====================
|
||||
KEY SERVICE OVERVIEW
|
||||
@ -754,6 +757,26 @@ The keyctl syscall functions are:
|
||||
successful.
|
||||
|
||||
|
||||
(*) Install the calling process's session keyring on its parent.
|
||||
|
||||
long keyctl(KEYCTL_SESSION_TO_PARENT);
|
||||
|
||||
This functions attempts to install the calling process's session keyring
|
||||
on to the calling process's parent, replacing the parent's current session
|
||||
keyring.
|
||||
|
||||
The calling process must have the same ownership as its parent, the
|
||||
keyring must have the same ownership as the calling process, the calling
|
||||
process must have LINK permission on the keyring and the active LSM module
|
||||
mustn't deny permission, otherwise error EPERM will be returned.
|
||||
|
||||
Error ENOMEM will be returned if there was insufficient memory to complete
|
||||
the operation, otherwise 0 will be returned to indicate success.
|
||||
|
||||
The keyring will be replaced next time the parent process leaves the
|
||||
kernel and resumes executing userspace.
|
||||
|
||||
|
||||
===============
|
||||
KERNEL SERVICES
|
||||
===============
|
||||
@ -1231,3 +1254,17 @@ by executing:
|
||||
|
||||
In this case, the program isn't required to actually attach the key to a ring;
|
||||
the rings are provided for reference.
|
||||
|
||||
|
||||
==================
|
||||
GARBAGE COLLECTION
|
||||
==================
|
||||
|
||||
Dead keys (for which the type has been removed) will be automatically unlinked
|
||||
from those keyrings that point to them and deleted as soon as possible by a
|
||||
background garbage collector.
|
||||
|
||||
Similarly, revoked and expired keys will be garbage collected, but only after a
|
||||
certain amount of time has passed. This time is set as a number of seconds in:
|
||||
|
||||
/proc/sys/kernel/keys/gc_delay
|
||||
|
@ -27,6 +27,13 @@ To trigger an intermediate memory scan:
|
||||
|
||||
# echo scan > /sys/kernel/debug/kmemleak
|
||||
|
||||
To clear the list of all current possible memory leaks:
|
||||
|
||||
# echo clear > /sys/kernel/debug/kmemleak
|
||||
|
||||
New leaks will then come up upon reading /sys/kernel/debug/kmemleak
|
||||
again.
|
||||
|
||||
Note that the orphan objects are listed in the order they were allocated
|
||||
and one object at the beginning of the list may cause other subsequent
|
||||
objects to be reported as orphan.
|
||||
@ -42,6 +49,9 @@ Memory scanning parameters can be modified at run-time by writing to the
|
||||
scan=<secs> - set the automatic memory scanning period in seconds
|
||||
(default 600, 0 to stop the automatic scanning)
|
||||
scan - trigger a memory scan
|
||||
clear - clear list of current memory leak suspects, done by
|
||||
marking all current reported unreferenced objects grey
|
||||
dump=<addr> - dump information about the object found at <addr>
|
||||
|
||||
Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
|
||||
the kernel command line.
|
||||
@ -86,6 +96,27 @@ avoid this, kmemleak can also store the number of values pointing to an
|
||||
address inside the block address range that need to be found so that the
|
||||
block is not considered a leak. One example is __vmalloc().
|
||||
|
||||
Testing specific sections with kmemleak
|
||||
---------------------------------------
|
||||
|
||||
Upon initial bootup your /sys/kernel/debug/kmemleak output page may be
|
||||
quite extensive. This can also be the case if you have very buggy code
|
||||
when doing development. To work around these situations you can use the
|
||||
'clear' command to clear all reported unreferenced objects from the
|
||||
/sys/kernel/debug/kmemleak output. By issuing a 'scan' after a 'clear'
|
||||
you can find new unreferenced objects; this should help with testing
|
||||
specific sections of code.
|
||||
|
||||
To test a critical section on demand with a clean kmemleak do:
|
||||
|
||||
# echo clear > /sys/kernel/debug/kmemleak
|
||||
... test your kernel or modules ...
|
||||
# echo scan > /sys/kernel/debug/kmemleak
|
||||
|
||||
Then as usual to get your report with:
|
||||
|
||||
# cat /sys/kernel/debug/kmemleak
|
||||
|
||||
Kmemleak API
|
||||
------------
|
||||
|
||||
|
@ -84,7 +84,6 @@ int my_data_handler(void)
|
||||
task = kthread_run(more_data_handling, data, "more_data_handling");
|
||||
if (task == ERR_PTR(-ENOMEM)) {
|
||||
rv = -ENOMEM;
|
||||
kref_put(&data->refcount, data_release);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
759
Documentation/kvm/api.txt
Normal file
759
Documentation/kvm/api.txt
Normal file
@ -0,0 +1,759 @@
|
||||
The Definitive KVM (Kernel-based Virtual Machine) API Documentation
|
||||
===================================================================
|
||||
|
||||
1. General description
|
||||
|
||||
The kvm API is a set of ioctls that are issued to control various aspects
|
||||
of a virtual machine. The ioctls belong to three classes
|
||||
|
||||
- System ioctls: These query and set global attributes which affect the
|
||||
whole kvm subsystem. In addition a system ioctl is used to create
|
||||
virtual machines
|
||||
|
||||
- VM ioctls: These query and set attributes that affect an entire virtual
|
||||
machine, for example memory layout. In addition a VM ioctl is used to
|
||||
create virtual cpus (vcpus).
|
||||
|
||||
Only run VM ioctls from the same process (address space) that was used
|
||||
to create the VM.
|
||||
|
||||
- vcpu ioctls: These query and set attributes that control the operation
|
||||
of a single virtual cpu.
|
||||
|
||||
Only run vcpu ioctls from the same thread that was used to create the
|
||||
vcpu.
|
||||
|
||||
2. File descritpors
|
||||
|
||||
The kvm API is centered around file descriptors. An initial
|
||||
open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
|
||||
can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this
|
||||
handle will create a VM file descripror which can be used to issue VM
|
||||
ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
|
||||
and return a file descriptor pointing to it. Finally, ioctls on a vcpu
|
||||
fd can be used to control the vcpu, including the important task of
|
||||
actually running guest code.
|
||||
|
||||
In general file descriptors can be migrated among processes by means
|
||||
of fork() and the SCM_RIGHTS facility of unix domain socket. These
|
||||
kinds of tricks are explicitly not supported by kvm. While they will
|
||||
not cause harm to the host, their actual behavior is not guaranteed by
|
||||
the API. The only supported use is one virtual machine per process,
|
||||
and one vcpu per thread.
|
||||
|
||||
3. Extensions
|
||||
|
||||
As of Linux 2.6.22, the KVM ABI has been stabilized: no backward
|
||||
incompatible change are allowed. However, there is an extension
|
||||
facility that allows backward-compatible extensions to the API to be
|
||||
queried and used.
|
||||
|
||||
The extension mechanism is not based on on the Linux version number.
|
||||
Instead, kvm defines extension identifiers and a facility to query
|
||||
whether a particular extension identifier is available. If it is, a
|
||||
set of ioctls is available for application use.
|
||||
|
||||
4. API description
|
||||
|
||||
This section describes ioctls that can be used to control kvm guests.
|
||||
For each ioctl, the following information is provided along with a
|
||||
description:
|
||||
|
||||
Capability: which KVM extension provides this ioctl. Can be 'basic',
|
||||
which means that is will be provided by any kernel that supports
|
||||
API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which
|
||||
means availability needs to be checked with KVM_CHECK_EXTENSION
|
||||
(see section 4.4).
|
||||
|
||||
Architectures: which instruction set architectures provide this ioctl.
|
||||
x86 includes both i386 and x86_64.
|
||||
|
||||
Type: system, vm, or vcpu.
|
||||
|
||||
Parameters: what parameters are accepted by the ioctl.
|
||||
|
||||
Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
|
||||
are not detailed, but errors with specific meanings are.
|
||||
|
||||
4.1 KVM_GET_API_VERSION
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: system ioctl
|
||||
Parameters: none
|
||||
Returns: the constant KVM_API_VERSION (=12)
|
||||
|
||||
This identifies the API version as the stable kvm API. It is not
|
||||
expected that this number will change. However, Linux 2.6.20 and
|
||||
2.6.21 report earlier versions; these are not documented and not
|
||||
supported. Applications should refuse to run if KVM_GET_API_VERSION
|
||||
returns a value other than 12. If this check passes, all ioctls
|
||||
described as 'basic' will be available.
|
||||
|
||||
4.2 KVM_CREATE_VM
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: system ioctl
|
||||
Parameters: none
|
||||
Returns: a VM fd that can be used to control the new virtual machine.
|
||||
|
||||
The new VM has no virtual cpus and no memory. An mmap() of a VM fd
|
||||
will access the virtual machine's physical address space; offset zero
|
||||
corresponds to guest physical address zero. Use of mmap() on a VM fd
|
||||
is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is
|
||||
available.
|
||||
|
||||
4.3 KVM_GET_MSR_INDEX_LIST
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: system
|
||||
Parameters: struct kvm_msr_list (in/out)
|
||||
Returns: 0 on success; -1 on error
|
||||
Errors:
|
||||
E2BIG: the msr index list is to be to fit in the array specified by
|
||||
the user.
|
||||
|
||||
struct kvm_msr_list {
|
||||
__u32 nmsrs; /* number of msrs in entries */
|
||||
__u32 indices[0];
|
||||
};
|
||||
|
||||
This ioctl returns the guest msrs that are supported. The list varies
|
||||
by kvm version and host processor, but does not change otherwise. The
|
||||
user fills in the size of the indices array in nmsrs, and in return
|
||||
kvm adjusts nmsrs to reflect the actual number of msrs and fills in
|
||||
the indices array with their numbers.
|
||||
|
||||
4.4 KVM_CHECK_EXTENSION
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: system ioctl
|
||||
Parameters: extension identifier (KVM_CAP_*)
|
||||
Returns: 0 if unsupported; 1 (or some other positive integer) if supported
|
||||
|
||||
The API allows the application to query about extensions to the core
|
||||
kvm API. Userspace passes an extension identifier (an integer) and
|
||||
receives an integer that describes the extension availability.
|
||||
Generally 0 means no and 1 means yes, but some extensions may report
|
||||
additional information in the integer return value.
|
||||
|
||||
4.5 KVM_GET_VCPU_MMAP_SIZE
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: system ioctl
|
||||
Parameters: none
|
||||
Returns: size of vcpu mmap area, in bytes
|
||||
|
||||
The KVM_RUN ioctl (cf.) communicates with userspace via a shared
|
||||
memory region. This ioctl returns the size of that region. See the
|
||||
KVM_RUN documentation for details.
|
||||
|
||||
4.6 KVM_SET_MEMORY_REGION
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_memory_region (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
struct kvm_memory_region {
|
||||
__u32 slot;
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size; /* bytes */
|
||||
};
|
||||
|
||||
/* for kvm_memory_region::flags */
|
||||
#define KVM_MEM_LOG_DIRTY_PAGES 1UL
|
||||
|
||||
This ioctl allows the user to create or modify a guest physical memory
|
||||
slot. When changing an existing slot, it may be moved in the guest
|
||||
physical memory space, or its flags may be modified. It may not be
|
||||
resized. Slots may not overlap.
|
||||
|
||||
The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which
|
||||
instructs kvm to keep track of writes to memory within the slot. See
|
||||
the KVM_GET_DIRTY_LOG ioctl.
|
||||
|
||||
It is recommended to use the KVM_SET_USER_MEMORY_REGION ioctl instead
|
||||
of this API, if available. This newer API allows placing guest memory
|
||||
at specified locations in the host address space, yielding better
|
||||
control and easy access.
|
||||
|
||||
4.6 KVM_CREATE_VCPU
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: vm ioctl
|
||||
Parameters: vcpu id (apic id on x86)
|
||||
Returns: vcpu fd on success, -1 on error
|
||||
|
||||
This API adds a vcpu to a virtual machine. The vcpu id is a small integer
|
||||
in the range [0, max_vcpus).
|
||||
|
||||
4.7 KVM_GET_DIRTY_LOG (vm ioctl)
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_dirty_log (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
/* for KVM_GET_DIRTY_LOG */
|
||||
struct kvm_dirty_log {
|
||||
__u32 slot;
|
||||
__u32 padding;
|
||||
union {
|
||||
void __user *dirty_bitmap; /* one bit per page */
|
||||
__u64 padding;
|
||||
};
|
||||
};
|
||||
|
||||
Given a memory slot, return a bitmap containing any pages dirtied
|
||||
since the last call to this ioctl. Bit 0 is the first page in the
|
||||
memory slot. Ensure the entire structure is cleared to avoid padding
|
||||
issues.
|
||||
|
||||
4.8 KVM_SET_MEMORY_ALIAS
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_memory_alias (in)
|
||||
Returns: 0 (success), -1 (error)
|
||||
|
||||
struct kvm_memory_alias {
|
||||
__u32 slot; /* this has a different namespace than memory slots */
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size;
|
||||
__u64 target_phys_addr;
|
||||
};
|
||||
|
||||
Defines a guest physical address space region as an alias to another
|
||||
region. Useful for aliased address, for example the VGA low memory
|
||||
window. Should not be used with userspace memory.
|
||||
|
||||
4.9 KVM_RUN
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: none
|
||||
Returns: 0 on success, -1 on error
|
||||
Errors:
|
||||
EINTR: an unmasked signal is pending
|
||||
|
||||
This ioctl is used to run a guest virtual cpu. While there are no
|
||||
explicit parameters, there is an implicit parameter block that can be
|
||||
obtained by mmap()ing the vcpu fd at offset 0, with the size given by
|
||||
KVM_GET_VCPU_MMAP_SIZE. The parameter block is formatted as a 'struct
|
||||
kvm_run' (see below).
|
||||
|
||||
4.10 KVM_GET_REGS
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_regs (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Reads the general purpose registers from the vcpu.
|
||||
|
||||
/* x86 */
|
||||
struct kvm_regs {
|
||||
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
|
||||
__u64 rax, rbx, rcx, rdx;
|
||||
__u64 rsi, rdi, rsp, rbp;
|
||||
__u64 r8, r9, r10, r11;
|
||||
__u64 r12, r13, r14, r15;
|
||||
__u64 rip, rflags;
|
||||
};
|
||||
|
||||
4.11 KVM_SET_REGS
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_regs (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Writes the general purpose registers into the vcpu.
|
||||
|
||||
See KVM_GET_REGS for the data structure.
|
||||
|
||||
4.12 KVM_GET_SREGS
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_sregs (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Reads special registers from the vcpu.
|
||||
|
||||
/* x86 */
|
||||
struct kvm_sregs {
|
||||
struct kvm_segment cs, ds, es, fs, gs, ss;
|
||||
struct kvm_segment tr, ldt;
|
||||
struct kvm_dtable gdt, idt;
|
||||
__u64 cr0, cr2, cr3, cr4, cr8;
|
||||
__u64 efer;
|
||||
__u64 apic_base;
|
||||
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
|
||||
};
|
||||
|
||||
interrupt_bitmap is a bitmap of pending external interrupts. At most
|
||||
one bit may be set. This interrupt has been acknowledged by the APIC
|
||||
but not yet injected into the cpu core.
|
||||
|
||||
4.13 KVM_SET_SREGS
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_sregs (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Writes special registers into the vcpu. See KVM_GET_SREGS for the
|
||||
data structures.
|
||||
|
||||
4.14 KVM_TRANSLATE
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_translation (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Translates a virtual address according to the vcpu's current address
|
||||
translation mode.
|
||||
|
||||
struct kvm_translation {
|
||||
/* in */
|
||||
__u64 linear_address;
|
||||
|
||||
/* out */
|
||||
__u64 physical_address;
|
||||
__u8 valid;
|
||||
__u8 writeable;
|
||||
__u8 usermode;
|
||||
__u8 pad[5];
|
||||
};
|
||||
|
||||
4.15 KVM_INTERRUPT
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_interrupt (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Queues a hardware interrupt vector to be injected. This is only
|
||||
useful if in-kernel local APIC is not used.
|
||||
|
||||
/* for KVM_INTERRUPT */
|
||||
struct kvm_interrupt {
|
||||
/* in */
|
||||
__u32 irq;
|
||||
};
|
||||
|
||||
Note 'irq' is an interrupt vector, not an interrupt pin or line.
|
||||
|
||||
4.16 KVM_DEBUG_GUEST
|
||||
|
||||
Capability: basic
|
||||
Architectures: none
|
||||
Type: vcpu ioctl
|
||||
Parameters: none)
|
||||
Returns: -1 on error
|
||||
|
||||
Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead.
|
||||
|
||||
4.17 KVM_GET_MSRS
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_msrs (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Reads model-specific registers from the vcpu. Supported msr indices can
|
||||
be obtained using KVM_GET_MSR_INDEX_LIST.
|
||||
|
||||
struct kvm_msrs {
|
||||
__u32 nmsrs; /* number of msrs in entries */
|
||||
__u32 pad;
|
||||
|
||||
struct kvm_msr_entry entries[0];
|
||||
};
|
||||
|
||||
struct kvm_msr_entry {
|
||||
__u32 index;
|
||||
__u32 reserved;
|
||||
__u64 data;
|
||||
};
|
||||
|
||||
Application code should set the 'nmsrs' member (which indicates the
|
||||
size of the entries array) and the 'index' member of each array entry.
|
||||
kvm will fill in the 'data' member.
|
||||
|
||||
4.18 KVM_SET_MSRS
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_msrs (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Writes model-specific registers to the vcpu. See KVM_GET_MSRS for the
|
||||
data structures.
|
||||
|
||||
Application code should set the 'nmsrs' member (which indicates the
|
||||
size of the entries array), and the 'index' and 'data' members of each
|
||||
array entry.
|
||||
|
||||
4.19 KVM_SET_CPUID
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_cpuid (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Defines the vcpu responses to the cpuid instruction. Applications
|
||||
should use the KVM_SET_CPUID2 ioctl if available.
|
||||
|
||||
|
||||
struct kvm_cpuid_entry {
|
||||
__u32 function;
|
||||
__u32 eax;
|
||||
__u32 ebx;
|
||||
__u32 ecx;
|
||||
__u32 edx;
|
||||
__u32 padding;
|
||||
};
|
||||
|
||||
/* for KVM_SET_CPUID */
|
||||
struct kvm_cpuid {
|
||||
__u32 nent;
|
||||
__u32 padding;
|
||||
struct kvm_cpuid_entry entries[0];
|
||||
};
|
||||
|
||||
4.20 KVM_SET_SIGNAL_MASK
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_signal_mask (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Defines which signals are blocked during execution of KVM_RUN. This
|
||||
signal mask temporarily overrides the threads signal mask. Any
|
||||
unblocked signal received (except SIGKILL and SIGSTOP, which retain
|
||||
their traditional behaviour) will cause KVM_RUN to return with -EINTR.
|
||||
|
||||
Note the signal will only be delivered if not blocked by the original
|
||||
signal mask.
|
||||
|
||||
/* for KVM_SET_SIGNAL_MASK */
|
||||
struct kvm_signal_mask {
|
||||
__u32 len;
|
||||
__u8 sigset[0];
|
||||
};
|
||||
|
||||
4.21 KVM_GET_FPU
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_fpu (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Reads the floating point state from the vcpu.
|
||||
|
||||
/* for KVM_GET_FPU and KVM_SET_FPU */
|
||||
struct kvm_fpu {
|
||||
__u8 fpr[8][16];
|
||||
__u16 fcw;
|
||||
__u16 fsw;
|
||||
__u8 ftwx; /* in fxsave format */
|
||||
__u8 pad1;
|
||||
__u16 last_opcode;
|
||||
__u64 last_ip;
|
||||
__u64 last_dp;
|
||||
__u8 xmm[16][16];
|
||||
__u32 mxcsr;
|
||||
__u32 pad2;
|
||||
};
|
||||
|
||||
4.22 KVM_SET_FPU
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_fpu (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Writes the floating point state to the vcpu.
|
||||
|
||||
/* for KVM_GET_FPU and KVM_SET_FPU */
|
||||
struct kvm_fpu {
|
||||
__u8 fpr[8][16];
|
||||
__u16 fcw;
|
||||
__u16 fsw;
|
||||
__u8 ftwx; /* in fxsave format */
|
||||
__u8 pad1;
|
||||
__u16 last_opcode;
|
||||
__u64 last_ip;
|
||||
__u64 last_dp;
|
||||
__u8 xmm[16][16];
|
||||
__u32 mxcsr;
|
||||
__u32 pad2;
|
||||
};
|
||||
|
||||
4.23 KVM_CREATE_IRQCHIP
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86, ia64
|
||||
Type: vm ioctl
|
||||
Parameters: none
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Creates an interrupt controller model in the kernel. On x86, creates a virtual
|
||||
ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
|
||||
local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
|
||||
only go to the IOAPIC. On ia64, a IOSAPIC is created.
|
||||
|
||||
4.24 KVM_IRQ_LINE
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86, ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_irq_level
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Sets the level of a GSI input to the interrupt controller model in the kernel.
|
||||
Requires that an interrupt controller model has been previously created with
|
||||
KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level
|
||||
to be set to 1 and then back to 0.
|
||||
|
||||
struct kvm_irq_level {
|
||||
union {
|
||||
__u32 irq; /* GSI */
|
||||
__s32 status; /* not used for KVM_IRQ_LEVEL */
|
||||
};
|
||||
__u32 level; /* 0 or 1 */
|
||||
};
|
||||
|
||||
4.25 KVM_GET_IRQCHIP
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86, ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_irqchip (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Reads the state of a kernel interrupt controller created with
|
||||
KVM_CREATE_IRQCHIP into a buffer provided by the caller.
|
||||
|
||||
struct kvm_irqchip {
|
||||
__u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
|
||||
__u32 pad;
|
||||
union {
|
||||
char dummy[512]; /* reserving space */
|
||||
struct kvm_pic_state pic;
|
||||
struct kvm_ioapic_state ioapic;
|
||||
} chip;
|
||||
};
|
||||
|
||||
4.26 KVM_SET_IRQCHIP
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86, ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_irqchip (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Sets the state of a kernel interrupt controller created with
|
||||
KVM_CREATE_IRQCHIP from a buffer provided by the caller.
|
||||
|
||||
struct kvm_irqchip {
|
||||
__u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
|
||||
__u32 pad;
|
||||
union {
|
||||
char dummy[512]; /* reserving space */
|
||||
struct kvm_pic_state pic;
|
||||
struct kvm_ioapic_state ioapic;
|
||||
} chip;
|
||||
};
|
||||
|
||||
5. The kvm_run structure
|
||||
|
||||
Application code obtains a pointer to the kvm_run structure by
|
||||
mmap()ing a vcpu fd. From that point, application code can control
|
||||
execution by changing fields in kvm_run prior to calling the KVM_RUN
|
||||
ioctl, and obtain information about the reason KVM_RUN returned by
|
||||
looking up structure members.
|
||||
|
||||
struct kvm_run {
|
||||
/* in */
|
||||
__u8 request_interrupt_window;
|
||||
|
||||
Request that KVM_RUN return when it becomes possible to inject external
|
||||
interrupts into the guest. Useful in conjunction with KVM_INTERRUPT.
|
||||
|
||||
__u8 padding1[7];
|
||||
|
||||
/* out */
|
||||
__u32 exit_reason;
|
||||
|
||||
When KVM_RUN has returned successfully (return value 0), this informs
|
||||
application code why KVM_RUN has returned. Allowable values for this
|
||||
field are detailed below.
|
||||
|
||||
__u8 ready_for_interrupt_injection;
|
||||
|
||||
If request_interrupt_window has been specified, this field indicates
|
||||
an interrupt can be injected now with KVM_INTERRUPT.
|
||||
|
||||
__u8 if_flag;
|
||||
|
||||
The value of the current interrupt flag. Only valid if in-kernel
|
||||
local APIC is not used.
|
||||
|
||||
__u8 padding2[2];
|
||||
|
||||
/* in (pre_kvm_run), out (post_kvm_run) */
|
||||
__u64 cr8;
|
||||
|
||||
The value of the cr8 register. Only valid if in-kernel local APIC is
|
||||
not used. Both input and output.
|
||||
|
||||
__u64 apic_base;
|
||||
|
||||
The value of the APIC BASE msr. Only valid if in-kernel local
|
||||
APIC is not used. Both input and output.
|
||||
|
||||
union {
|
||||
/* KVM_EXIT_UNKNOWN */
|
||||
struct {
|
||||
__u64 hardware_exit_reason;
|
||||
} hw;
|
||||
|
||||
If exit_reason is KVM_EXIT_UNKNOWN, the vcpu has exited due to unknown
|
||||
reasons. Further architecture-specific information is available in
|
||||
hardware_exit_reason.
|
||||
|
||||
/* KVM_EXIT_FAIL_ENTRY */
|
||||
struct {
|
||||
__u64 hardware_entry_failure_reason;
|
||||
} fail_entry;
|
||||
|
||||
If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due
|
||||
to unknown reasons. Further architecture-specific information is
|
||||
available in hardware_entry_failure_reason.
|
||||
|
||||
/* KVM_EXIT_EXCEPTION */
|
||||
struct {
|
||||
__u32 exception;
|
||||
__u32 error_code;
|
||||
} ex;
|
||||
|
||||
Unused.
|
||||
|
||||
/* KVM_EXIT_IO */
|
||||
struct {
|
||||
#define KVM_EXIT_IO_IN 0
|
||||
#define KVM_EXIT_IO_OUT 1
|
||||
__u8 direction;
|
||||
__u8 size; /* bytes */
|
||||
__u16 port;
|
||||
__u32 count;
|
||||
__u64 data_offset; /* relative to kvm_run start */
|
||||
} io;
|
||||
|
||||
If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has
|
||||
executed a port I/O instruction which could not be satisfied by kvm.
|
||||
data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
|
||||
where kvm expects application code to place the data for the next
|
||||
KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a patcked array.
|
||||
|
||||
struct {
|
||||
struct kvm_debug_exit_arch arch;
|
||||
} debug;
|
||||
|
||||
Unused.
|
||||
|
||||
/* KVM_EXIT_MMIO */
|
||||
struct {
|
||||
__u64 phys_addr;
|
||||
__u8 data[8];
|
||||
__u32 len;
|
||||
__u8 is_write;
|
||||
} mmio;
|
||||
|
||||
If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has
|
||||
executed a memory-mapped I/O instruction which could not be satisfied
|
||||
by kvm. The 'data' member contains the written data if 'is_write' is
|
||||
true, and should be filled by application code otherwise.
|
||||
|
||||
/* KVM_EXIT_HYPERCALL */
|
||||
struct {
|
||||
__u64 nr;
|
||||
__u64 args[6];
|
||||
__u64 ret;
|
||||
__u32 longmode;
|
||||
__u32 pad;
|
||||
} hypercall;
|
||||
|
||||
Unused.
|
||||
|
||||
/* KVM_EXIT_TPR_ACCESS */
|
||||
struct {
|
||||
__u64 rip;
|
||||
__u32 is_write;
|
||||
__u32 pad;
|
||||
} tpr_access;
|
||||
|
||||
To be documented (KVM_TPR_ACCESS_REPORTING).
|
||||
|
||||
/* KVM_EXIT_S390_SIEIC */
|
||||
struct {
|
||||
__u8 icptcode;
|
||||
__u64 mask; /* psw upper half */
|
||||
__u64 addr; /* psw lower half */
|
||||
__u16 ipa;
|
||||
__u32 ipb;
|
||||
} s390_sieic;
|
||||
|
||||
s390 specific.
|
||||
|
||||
/* KVM_EXIT_S390_RESET */
|
||||
#define KVM_S390_RESET_POR 1
|
||||
#define KVM_S390_RESET_CLEAR 2
|
||||
#define KVM_S390_RESET_SUBSYSTEM 4
|
||||
#define KVM_S390_RESET_CPU_INIT 8
|
||||
#define KVM_S390_RESET_IPL 16
|
||||
__u64 s390_reset_flags;
|
||||
|
||||
s390 specific.
|
||||
|
||||
/* KVM_EXIT_DCR */
|
||||
struct {
|
||||
__u32 dcrn;
|
||||
__u32 data;
|
||||
__u8 is_write;
|
||||
} dcr;
|
||||
|
||||
powerpc specific.
|
||||
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
};
|
@ -36,8 +36,6 @@ detailed description):
|
||||
- Bluetooth enable and disable
|
||||
- video output switching, expansion control
|
||||
- ThinkLight on and off
|
||||
- limited docking and undocking
|
||||
- UltraBay eject
|
||||
- CMOS/UCMS control
|
||||
- LED control
|
||||
- ACPI sounds
|
||||
@ -729,131 +727,6 @@ cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
|
||||
It is impossible to know if the status returned through sysfs is valid.
|
||||
|
||||
|
||||
Docking / undocking -- /proc/acpi/ibm/dock
|
||||
------------------------------------------
|
||||
|
||||
Docking and undocking (e.g. with the X4 UltraBase) requires some
|
||||
actions to be taken by the operating system to safely make or break
|
||||
the electrical connections with the dock.
|
||||
|
||||
The docking feature of this driver generates the following ACPI events:
|
||||
|
||||
ibm/dock GDCK 00000003 00000001 -- eject request
|
||||
ibm/dock GDCK 00000003 00000002 -- undocked
|
||||
ibm/dock GDCK 00000000 00000003 -- docked
|
||||
|
||||
NOTE: These events will only be generated if the laptop was docked
|
||||
when originally booted. This is due to the current lack of support for
|
||||
hot plugging of devices in the Linux ACPI framework. If the laptop was
|
||||
booted while not in the dock, the following message is shown in the
|
||||
logs:
|
||||
|
||||
Mar 17 01:42:34 aero kernel: thinkpad_acpi: dock device not present
|
||||
|
||||
In this case, no dock-related events are generated but the dock and
|
||||
undock commands described below still work. They can be executed
|
||||
manually or triggered by Fn key combinations (see the example acpid
|
||||
configuration files included in the driver tarball package available
|
||||
on the web site).
|
||||
|
||||
When the eject request button on the dock is pressed, the first event
|
||||
above is generated. The handler for this event should issue the
|
||||
following command:
|
||||
|
||||
echo undock > /proc/acpi/ibm/dock
|
||||
|
||||
After the LED on the dock goes off, it is safe to eject the laptop.
|
||||
Note: if you pressed this key by mistake, go ahead and eject the
|
||||
laptop, then dock it back in. Otherwise, the dock may not function as
|
||||
expected.
|
||||
|
||||
When the laptop is docked, the third event above is generated. The
|
||||
handler for this event should issue the following command to fully
|
||||
enable the dock:
|
||||
|
||||
echo dock > /proc/acpi/ibm/dock
|
||||
|
||||
The contents of the /proc/acpi/ibm/dock file shows the current status
|
||||
of the dock, as provided by the ACPI framework.
|
||||
|
||||
The docking support in this driver does not take care of enabling or
|
||||
disabling any other devices you may have attached to the dock. For
|
||||
example, a CD drive plugged into the UltraBase needs to be disabled or
|
||||
enabled separately. See the provided example acpid configuration files
|
||||
for how this can be accomplished.
|
||||
|
||||
There is no support yet for PCI devices that may be attached to a
|
||||
docking station, e.g. in the ThinkPad Dock II. The driver currently
|
||||
does not recognize, enable or disable such devices. This means that
|
||||
the only docking stations currently supported are the X-series
|
||||
UltraBase docks and "dumb" port replicators like the Mini Dock (the
|
||||
latter don't need any ACPI support, actually).
|
||||
|
||||
|
||||
UltraBay eject -- /proc/acpi/ibm/bay
|
||||
------------------------------------
|
||||
|
||||
Inserting or ejecting an UltraBay device requires some actions to be
|
||||
taken by the operating system to safely make or break the electrical
|
||||
connections with the device.
|
||||
|
||||
This feature generates the following ACPI events:
|
||||
|
||||
ibm/bay MSTR 00000003 00000000 -- eject request
|
||||
ibm/bay MSTR 00000001 00000000 -- eject lever inserted
|
||||
|
||||
NOTE: These events will only be generated if the UltraBay was present
|
||||
when the laptop was originally booted (on the X series, the UltraBay
|
||||
is in the dock, so it may not be present if the laptop was undocked).
|
||||
This is due to the current lack of support for hot plugging of devices
|
||||
in the Linux ACPI framework. If the laptop was booted without the
|
||||
UltraBay, the following message is shown in the logs:
|
||||
|
||||
Mar 17 01:42:34 aero kernel: thinkpad_acpi: bay device not present
|
||||
|
||||
In this case, no bay-related events are generated but the eject
|
||||
command described below still works. It can be executed manually or
|
||||
triggered by a hot key combination.
|
||||
|
||||
Sliding the eject lever generates the first event shown above. The
|
||||
handler for this event should take whatever actions are necessary to
|
||||
shut down the device in the UltraBay (e.g. call idectl), then issue
|
||||
the following command:
|
||||
|
||||
echo eject > /proc/acpi/ibm/bay
|
||||
|
||||
After the LED on the UltraBay goes off, it is safe to pull out the
|
||||
device.
|
||||
|
||||
When the eject lever is inserted, the second event above is
|
||||
generated. The handler for this event should take whatever actions are
|
||||
necessary to enable the UltraBay device (e.g. call idectl).
|
||||
|
||||
The contents of the /proc/acpi/ibm/bay file shows the current status
|
||||
of the UltraBay, as provided by the ACPI framework.
|
||||
|
||||
EXPERIMENTAL warm eject support on the 600e/x, A22p and A3x (To use
|
||||
this feature, you need to supply the experimental=1 parameter when
|
||||
loading the module):
|
||||
|
||||
These models do not have a button near the UltraBay device to request
|
||||
a hot eject but rather require the laptop to be put to sleep
|
||||
(suspend-to-ram) before the bay device is ejected or inserted).
|
||||
The sequence of steps to eject the device is as follows:
|
||||
|
||||
echo eject > /proc/acpi/ibm/bay
|
||||
put the ThinkPad to sleep
|
||||
remove the drive
|
||||
resume from sleep
|
||||
cat /proc/acpi/ibm/bay should show that the drive was removed
|
||||
|
||||
On the A3x, both the UltraBay 2000 and UltraBay Plus devices are
|
||||
supported. Use "eject2" instead of "eject" for the second bay.
|
||||
|
||||
Note: the UltraBay eject support on the 600e/x, A22p and A3x is
|
||||
EXPERIMENTAL and may not work as expected. USE WITH CAUTION!
|
||||
|
||||
|
||||
CMOS/UCMS control
|
||||
-----------------
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -30,9 +30,9 @@ State
|
||||
The validator tracks lock-class usage history into 4n + 1 separate state bits:
|
||||
|
||||
- 'ever held in STATE context'
|
||||
- 'ever head as readlock in STATE context'
|
||||
- 'ever head with STATE enabled'
|
||||
- 'ever head as readlock with STATE enabled'
|
||||
- 'ever held as readlock in STATE context'
|
||||
- 'ever held with STATE enabled'
|
||||
- 'ever held as readlock with STATE enabled'
|
||||
|
||||
Where STATE can be either one of (kernel/lockdep_states.h)
|
||||
- hardirq
|
||||
|
@ -60,6 +60,8 @@ framerelay.txt
|
||||
- info on using Frame Relay/Data Link Connection Identifier (DLCI).
|
||||
generic_netlink.txt
|
||||
- info on Generic Netlink
|
||||
ieee802154.txt
|
||||
- Linux IEEE 802.15.4 implementation, API and drivers
|
||||
ip-sysctl.txt
|
||||
- /proc/sys/net/ipv4/* variables
|
||||
ip_dynaddr.txt
|
||||
|
@ -22,7 +22,7 @@ int sd = socket(PF_IEEE802154, SOCK_DGRAM, 0);
|
||||
.....
|
||||
|
||||
The address family, socket addresses etc. are defined in the
|
||||
include/net/ieee802154/af_ieee802154.h header or in the special header
|
||||
include/net/af_ieee802154.h header or in the special header
|
||||
in our userspace package (see either linux-zigbee sourceforge download page
|
||||
or git tree at git://linux-zigbee.git.sourceforge.net/gitroot/linux-zigbee).
|
||||
|
||||
@ -33,7 +33,7 @@ MLME - MAC Level Management
|
||||
============================
|
||||
|
||||
Most of IEEE 802.15.4 MLME interfaces are directly mapped on netlink commands.
|
||||
See the include/net/ieee802154/nl802154.h header. Our userspace tools package
|
||||
See the include/net/nl802154.h header. Our userspace tools package
|
||||
(see above) provides CLI configuration utility for radio interfaces and simple
|
||||
coordinator for IEEE 802.15.4 networks as an example users of MLME protocol.
|
||||
|
||||
@ -54,10 +54,14 @@ Those types of devices require different approach to be hooked into Linux kernel
|
||||
HardMAC
|
||||
=======
|
||||
|
||||
See the header include/net/ieee802154/netdevice.h. You have to implement Linux
|
||||
See the header include/net/ieee802154_netdev.h. You have to implement Linux
|
||||
net_device, with .type = ARPHRD_IEEE802154. Data is exchanged with socket family
|
||||
code via plain sk_buffs. The control block of sk_buffs will contain additional
|
||||
info as described in the struct ieee802154_mac_cb.
|
||||
code via plain sk_buffs. On skb reception skb->cb must contain additional
|
||||
info as described in the struct ieee802154_mac_cb. During packet transmission
|
||||
the skb->cb is used to provide additional data to device's header_ops->create
|
||||
function. Be aware, that this data can be overriden later (when socket code
|
||||
submits skb to qdisc), so if you need something from that cb later, you should
|
||||
store info in the skb->data on your own.
|
||||
|
||||
To hook the MLME interface you have to populate the ml_priv field of your
|
||||
net_device with a pointer to struct ieee802154_mlme_ops instance. All fields are
|
||||
@ -69,8 +73,8 @@ We provide an example of simple HardMAC driver at drivers/ieee802154/fakehard.c
|
||||
SoftMAC
|
||||
=======
|
||||
|
||||
We are going to provide intermediate layer impelementing IEEE 802.15.4 MAC
|
||||
We are going to provide intermediate layer implementing IEEE 802.15.4 MAC
|
||||
in software. This is currently WIP.
|
||||
|
||||
See header include/net/ieee802154/mac802154.h and several drivers in
|
||||
drivers/ieee802154/
|
||||
See header include/net/mac802154.h and several drivers in drivers/ieee802154/.
|
||||
|
||||
|
@ -311,9 +311,12 @@ tcp_no_metrics_save - BOOLEAN
|
||||
connections.
|
||||
|
||||
tcp_orphan_retries - INTEGER
|
||||
How may times to retry before killing TCP connection, closed
|
||||
by our side. Default value 7 corresponds to ~50sec-16min
|
||||
depending on RTO. If you machine is loaded WEB server,
|
||||
This value influences the timeout of a locally closed TCP connection,
|
||||
when RTO retransmissions remain unacknowledged.
|
||||
See tcp_retries2 for more details.
|
||||
|
||||
The default value is 7.
|
||||
If your machine is a loaded WEB server,
|
||||
you should think about lowering this value, such sockets
|
||||
may consume significant resources. Cf. tcp_max_orphans.
|
||||
|
||||
@ -327,16 +330,28 @@ tcp_retrans_collapse - BOOLEAN
|
||||
certain TCP stacks.
|
||||
|
||||
tcp_retries1 - INTEGER
|
||||
How many times to retry before deciding that something is wrong
|
||||
and it is necessary to report this suspicion to network layer.
|
||||
Minimal RFC value is 3, it is default, which corresponds
|
||||
to ~3sec-8min depending on RTO.
|
||||
This value influences the time, after which TCP decides, that
|
||||
something is wrong due to unacknowledged RTO retransmissions,
|
||||
and reports this suspicion to the network layer.
|
||||
See tcp_retries2 for more details.
|
||||
|
||||
RFC 1122 recommends at least 3 retransmissions, which is the
|
||||
default.
|
||||
|
||||
tcp_retries2 - INTEGER
|
||||
How may times to retry before killing alive TCP connection.
|
||||
RFC1122 says that the limit should be longer than 100 sec.
|
||||
It is too small number. Default value 15 corresponds to ~13-30min
|
||||
depending on RTO.
|
||||
This value influences the timeout of an alive TCP connection,
|
||||
when RTO retransmissions remain unacknowledged.
|
||||
Given a value of N, a hypothetical TCP connection following
|
||||
exponential backoff with an initial RTO of TCP_RTO_MIN would
|
||||
retransmit N times before killing the connection at the (N+1)th RTO.
|
||||
|
||||
The default value of 15 yields a hypothetical timeout of 924.6
|
||||
seconds and is a lower bound for the effective timeout.
|
||||
TCP will effectively time out at the first RTO which exceeds the
|
||||
hypothetical timeout.
|
||||
|
||||
RFC 1122 recommends at least 100 seconds for the timeout,
|
||||
which corresponds to a value of at least 8.
|
||||
|
||||
tcp_rfc1337 - BOOLEAN
|
||||
If set, the TCP stack behaves conforming to RFC1337. If unset,
|
||||
@ -1282,6 +1297,16 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
|
||||
sctp_wmem - vector of 3 INTEGERs: min, default, max
|
||||
See tcp_wmem for a description.
|
||||
|
||||
addr_scope_policy - INTEGER
|
||||
Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
|
||||
|
||||
0 - Disable IPv4 address scoping
|
||||
1 - Enable IPv4 address scoping
|
||||
2 - Follow draft but allow IPv4 private addresses
|
||||
3 - Follow draft but allow IPv4 link local addresses
|
||||
|
||||
Default: 1
|
||||
|
||||
|
||||
/proc/sys/net/core/*
|
||||
dev_weight - INTEGER
|
||||
|
378
Documentation/power/runtime_pm.txt
Normal file
378
Documentation/power/runtime_pm.txt
Normal file
@ -0,0 +1,378 @@
|
||||
Run-time Power Management Framework for I/O Devices
|
||||
|
||||
(C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
|
||||
|
||||
1. Introduction
|
||||
|
||||
Support for run-time power management (run-time PM) of I/O devices is provided
|
||||
at the power management core (PM core) level by means of:
|
||||
|
||||
* The power management workqueue pm_wq in which bus types and device drivers can
|
||||
put their PM-related work items. It is strongly recommended that pm_wq be
|
||||
used for queuing all work items related to run-time PM, because this allows
|
||||
them to be synchronized with system-wide power transitions (suspend to RAM,
|
||||
hibernation and resume from system sleep states). pm_wq is declared in
|
||||
include/linux/pm_runtime.h and defined in kernel/power/main.c.
|
||||
|
||||
* A number of run-time PM fields in the 'power' member of 'struct device' (which
|
||||
is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can
|
||||
be used for synchronizing run-time PM operations with one another.
|
||||
|
||||
* Three device run-time PM callbacks in 'struct dev_pm_ops' (defined in
|
||||
include/linux/pm.h).
|
||||
|
||||
* A set of helper functions defined in drivers/base/power/runtime.c that can be
|
||||
used for carrying out run-time PM operations in such a way that the
|
||||
synchronization between them is taken care of by the PM core. Bus types and
|
||||
device drivers are encouraged to use these functions.
|
||||
|
||||
The run-time PM callbacks present in 'struct dev_pm_ops', the device run-time PM
|
||||
fields of 'struct dev_pm_info' and the core helper functions provided for
|
||||
run-time PM are described below.
|
||||
|
||||
2. Device Run-time PM Callbacks
|
||||
|
||||
There are three device run-time PM callbacks defined in 'struct dev_pm_ops':
|
||||
|
||||
struct dev_pm_ops {
|
||||
...
|
||||
int (*runtime_suspend)(struct device *dev);
|
||||
int (*runtime_resume)(struct device *dev);
|
||||
void (*runtime_idle)(struct device *dev);
|
||||
...
|
||||
};
|
||||
|
||||
The ->runtime_suspend() callback is executed by the PM core for the bus type of
|
||||
the device being suspended. The bus type's callback is then _entirely_
|
||||
_responsible_ for handling the device as appropriate, which may, but need not
|
||||
include executing the device driver's own ->runtime_suspend() callback (from the
|
||||
PM core's point of view it is not necessary to implement a ->runtime_suspend()
|
||||
callback in a device driver as long as the bus type's ->runtime_suspend() knows
|
||||
what to do to handle the device).
|
||||
|
||||
* Once the bus type's ->runtime_suspend() callback has completed successfully
|
||||
for given device, the PM core regards the device as suspended, which need
|
||||
not mean that the device has been put into a low power state. It is
|
||||
supposed to mean, however, that the device will not process data and will
|
||||
not communicate with the CPU(s) and RAM until its bus type's
|
||||
->runtime_resume() callback is executed for it. The run-time PM status of
|
||||
a device after successful execution of its bus type's ->runtime_suspend()
|
||||
callback is 'suspended'.
|
||||
|
||||
* If the bus type's ->runtime_suspend() callback returns -EBUSY or -EAGAIN,
|
||||
the device's run-time PM status is supposed to be 'active', which means that
|
||||
the device _must_ be fully operational afterwards.
|
||||
|
||||
* If the bus type's ->runtime_suspend() callback returns an error code
|
||||
different from -EBUSY or -EAGAIN, the PM core regards this as a fatal
|
||||
error and will refuse to run the helper functions described in Section 4
|
||||
for the device, until the status of it is directly set either to 'active'
|
||||
or to 'suspended' (the PM core provides special helper functions for this
|
||||
purpose).
|
||||
|
||||
In particular, if the driver requires remote wakeup capability for proper
|
||||
functioning and device_may_wakeup() returns 'false' for the device, then
|
||||
->runtime_suspend() should return -EBUSY. On the other hand, if
|
||||
device_may_wakeup() returns 'true' for the device and the device is put
|
||||
into a low power state during the execution of its bus type's
|
||||
->runtime_suspend(), it is expected that remote wake-up (i.e. hardware mechanism
|
||||
allowing the device to request a change of its power state, such as PCI PME)
|
||||
will be enabled for the device. Generally, remote wake-up should be enabled
|
||||
for all input devices put into a low power state at run time.
|
||||
|
||||
The ->runtime_resume() callback is executed by the PM core for the bus type of
|
||||
the device being woken up. The bus type's callback is then _entirely_
|
||||
_responsible_ for handling the device as appropriate, which may, but need not
|
||||
include executing the device driver's own ->runtime_resume() callback (from the
|
||||
PM core's point of view it is not necessary to implement a ->runtime_resume()
|
||||
callback in a device driver as long as the bus type's ->runtime_resume() knows
|
||||
what to do to handle the device).
|
||||
|
||||
* Once the bus type's ->runtime_resume() callback has completed successfully,
|
||||
the PM core regards the device as fully operational, which means that the
|
||||
device _must_ be able to complete I/O operations as needed. The run-time
|
||||
PM status of the device is then 'active'.
|
||||
|
||||
* If the bus type's ->runtime_resume() callback returns an error code, the PM
|
||||
core regards this as a fatal error and will refuse to run the helper
|
||||
functions described in Section 4 for the device, until its status is
|
||||
directly set either to 'active' or to 'suspended' (the PM core provides
|
||||
special helper functions for this purpose).
|
||||
|
||||
The ->runtime_idle() callback is executed by the PM core for the bus type of
|
||||
given device whenever the device appears to be idle, which is indicated to the
|
||||
PM core by two counters, the device's usage counter and the counter of 'active'
|
||||
children of the device.
|
||||
|
||||
* If any of these counters is decreased using a helper function provided by
|
||||
the PM core and it turns out to be equal to zero, the other counter is
|
||||
checked. If that counter also is equal to zero, the PM core executes the
|
||||
device bus type's ->runtime_idle() callback (with the device as an
|
||||
argument).
|
||||
|
||||
The action performed by a bus type's ->runtime_idle() callback is totally
|
||||
dependent on the bus type in question, but the expected and recommended action
|
||||
is to check if the device can be suspended (i.e. if all of the conditions
|
||||
necessary for suspending the device are satisfied) and to queue up a suspend
|
||||
request for the device in that case.
|
||||
|
||||
The helper functions provided by the PM core, described in Section 4, guarantee
|
||||
that the following constraints are met with respect to the bus type's run-time
|
||||
PM callbacks:
|
||||
|
||||
(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
|
||||
->runtime_suspend() in parallel with ->runtime_resume() or with another
|
||||
instance of ->runtime_suspend() for the same device) with the exception that
|
||||
->runtime_suspend() or ->runtime_resume() can be executed in parallel with
|
||||
->runtime_idle() (although ->runtime_idle() will not be started while any
|
||||
of the other callbacks is being executed for the same device).
|
||||
|
||||
(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active'
|
||||
devices (i.e. the PM core will only execute ->runtime_idle() or
|
||||
->runtime_suspend() for the devices the run-time PM status of which is
|
||||
'active').
|
||||
|
||||
(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device
|
||||
the usage counter of which is equal to zero _and_ either the counter of
|
||||
'active' children of which is equal to zero, or the 'power.ignore_children'
|
||||
flag of which is set.
|
||||
|
||||
(4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the
|
||||
PM core will only execute ->runtime_resume() for the devices the run-time
|
||||
PM status of which is 'suspended').
|
||||
|
||||
Additionally, the helper functions provided by the PM core obey the following
|
||||
rules:
|
||||
|
||||
* If ->runtime_suspend() is about to be executed or there's a pending request
|
||||
to execute it, ->runtime_idle() will not be executed for the same device.
|
||||
|
||||
* A request to execute or to schedule the execution of ->runtime_suspend()
|
||||
will cancel any pending requests to execute ->runtime_idle() for the same
|
||||
device.
|
||||
|
||||
* If ->runtime_resume() is about to be executed or there's a pending request
|
||||
to execute it, the other callbacks will not be executed for the same device.
|
||||
|
||||
* A request to execute ->runtime_resume() will cancel any pending or
|
||||
scheduled requests to execute the other callbacks for the same device.
|
||||
|
||||
3. Run-time PM Device Fields
|
||||
|
||||
The following device run-time PM fields are present in 'struct dev_pm_info', as
|
||||
defined in include/linux/pm.h:
|
||||
|
||||
struct timer_list suspend_timer;
|
||||
- timer used for scheduling (delayed) suspend request
|
||||
|
||||
unsigned long timer_expires;
|
||||
- timer expiration time, in jiffies (if this is different from zero, the
|
||||
timer is running and will expire at that time, otherwise the timer is not
|
||||
running)
|
||||
|
||||
struct work_struct work;
|
||||
- work structure used for queuing up requests (i.e. work items in pm_wq)
|
||||
|
||||
wait_queue_head_t wait_queue;
|
||||
- wait queue used if any of the helper functions needs to wait for another
|
||||
one to complete
|
||||
|
||||
spinlock_t lock;
|
||||
- lock used for synchronisation
|
||||
|
||||
atomic_t usage_count;
|
||||
- the usage counter of the device
|
||||
|
||||
atomic_t child_count;
|
||||
- the count of 'active' children of the device
|
||||
|
||||
unsigned int ignore_children;
|
||||
- if set, the value of child_count is ignored (but still updated)
|
||||
|
||||
unsigned int disable_depth;
|
||||
- used for disabling the helper funcions (they work normally if this is
|
||||
equal to zero); the initial value of it is 1 (i.e. run-time PM is
|
||||
initially disabled for all devices)
|
||||
|
||||
unsigned int runtime_error;
|
||||
- if set, there was a fatal error (one of the callbacks returned error code
|
||||
as described in Section 2), so the helper funtions will not work until
|
||||
this flag is cleared; this is the error code returned by the failing
|
||||
callback
|
||||
|
||||
unsigned int idle_notification;
|
||||
- if set, ->runtime_idle() is being executed
|
||||
|
||||
unsigned int request_pending;
|
||||
- if set, there's a pending request (i.e. a work item queued up into pm_wq)
|
||||
|
||||
enum rpm_request request;
|
||||
- type of request that's pending (valid if request_pending is set)
|
||||
|
||||
unsigned int deferred_resume;
|
||||
- set if ->runtime_resume() is about to be run while ->runtime_suspend() is
|
||||
being executed for that device and it is not practical to wait for the
|
||||
suspend to complete; means "start a resume as soon as you've suspended"
|
||||
|
||||
enum rpm_status runtime_status;
|
||||
- the run-time PM status of the device; this field's initial value is
|
||||
RPM_SUSPENDED, which means that each device is initially regarded by the
|
||||
PM core as 'suspended', regardless of its real hardware status
|
||||
|
||||
All of the above fields are members of the 'power' member of 'struct device'.
|
||||
|
||||
4. Run-time PM Device Helper Functions
|
||||
|
||||
The following run-time PM helper functions are defined in
|
||||
drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
||||
|
||||
void pm_runtime_init(struct device *dev);
|
||||
- initialize the device run-time PM fields in 'struct dev_pm_info'
|
||||
|
||||
void pm_runtime_remove(struct device *dev);
|
||||
- make sure that the run-time PM of the device will be disabled after
|
||||
removing the device from device hierarchy
|
||||
|
||||
int pm_runtime_idle(struct device *dev);
|
||||
- execute ->runtime_idle() for the device's bus type; returns 0 on success
|
||||
or error code on failure, where -EINPROGRESS means that ->runtime_idle()
|
||||
is already being executed
|
||||
|
||||
int pm_runtime_suspend(struct device *dev);
|
||||
- execute ->runtime_suspend() for the device's bus type; returns 0 on
|
||||
success, 1 if the device's run-time PM status was already 'suspended', or
|
||||
error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
|
||||
to suspend the device again in future
|
||||
|
||||
int pm_runtime_resume(struct device *dev);
|
||||
- execute ->runtime_resume() for the device's bus type; returns 0 on
|
||||
success, 1 if the device's run-time PM status was already 'active' or
|
||||
error code on failure, where -EAGAIN means it may be safe to attempt to
|
||||
resume the device again in future, but 'power.runtime_error' should be
|
||||
checked additionally
|
||||
|
||||
int pm_request_idle(struct device *dev);
|
||||
- submit a request to execute ->runtime_idle() for the device's bus type
|
||||
(the request is represented by a work item in pm_wq); returns 0 on success
|
||||
or error code if the request has not been queued up
|
||||
|
||||
int pm_schedule_suspend(struct device *dev, unsigned int delay);
|
||||
- schedule the execution of ->runtime_suspend() for the device's bus type
|
||||
in future, where 'delay' is the time to wait before queuing up a suspend
|
||||
work item in pm_wq, in milliseconds (if 'delay' is zero, the work item is
|
||||
queued up immediately); returns 0 on success, 1 if the device's PM
|
||||
run-time status was already 'suspended', or error code if the request
|
||||
hasn't been scheduled (or queued up if 'delay' is 0); if the execution of
|
||||
->runtime_suspend() is already scheduled and not yet expired, the new
|
||||
value of 'delay' will be used as the time to wait
|
||||
|
||||
int pm_request_resume(struct device *dev);
|
||||
- submit a request to execute ->runtime_resume() for the device's bus type
|
||||
(the request is represented by a work item in pm_wq); returns 0 on
|
||||
success, 1 if the device's run-time PM status was already 'active', or
|
||||
error code if the request hasn't been queued up
|
||||
|
||||
void pm_runtime_get_noresume(struct device *dev);
|
||||
- increment the device's usage counter
|
||||
|
||||
int pm_runtime_get(struct device *dev);
|
||||
- increment the device's usage counter, run pm_request_resume(dev) and
|
||||
return its result
|
||||
|
||||
int pm_runtime_get_sync(struct device *dev);
|
||||
- increment the device's usage counter, run pm_runtime_resume(dev) and
|
||||
return its result
|
||||
|
||||
void pm_runtime_put_noidle(struct device *dev);
|
||||
- decrement the device's usage counter
|
||||
|
||||
int pm_runtime_put(struct device *dev);
|
||||
- decrement the device's usage counter, run pm_request_idle(dev) and return
|
||||
its result
|
||||
|
||||
int pm_runtime_put_sync(struct device *dev);
|
||||
- decrement the device's usage counter, run pm_runtime_idle(dev) and return
|
||||
its result
|
||||
|
||||
void pm_runtime_enable(struct device *dev);
|
||||
- enable the run-time PM helper functions to run the device bus type's
|
||||
run-time PM callbacks described in Section 2
|
||||
|
||||
int pm_runtime_disable(struct device *dev);
|
||||
- prevent the run-time PM helper functions from running the device bus
|
||||
type's run-time PM callbacks, make sure that all of the pending run-time
|
||||
PM operations on the device are either completed or canceled; returns
|
||||
1 if there was a resume request pending and it was necessary to execute
|
||||
->runtime_resume() for the device's bus type to satisfy that request,
|
||||
otherwise 0 is returned
|
||||
|
||||
void pm_suspend_ignore_children(struct device *dev, bool enable);
|
||||
- set/unset the power.ignore_children flag of the device
|
||||
|
||||
int pm_runtime_set_active(struct device *dev);
|
||||
- clear the device's 'power.runtime_error' flag, set the device's run-time
|
||||
PM status to 'active' and update its parent's counter of 'active'
|
||||
children as appropriate (it is only valid to use this function if
|
||||
'power.runtime_error' is set or 'power.disable_depth' is greater than
|
||||
zero); it will fail and return error code if the device has a parent
|
||||
which is not active and the 'power.ignore_children' flag of which is unset
|
||||
|
||||
void pm_runtime_set_suspended(struct device *dev);
|
||||
- clear the device's 'power.runtime_error' flag, set the device's run-time
|
||||
PM status to 'suspended' and update its parent's counter of 'active'
|
||||
children as appropriate (it is only valid to use this function if
|
||||
'power.runtime_error' is set or 'power.disable_depth' is greater than
|
||||
zero)
|
||||
|
||||
It is safe to execute the following helper functions from interrupt context:
|
||||
|
||||
pm_request_idle()
|
||||
pm_schedule_suspend()
|
||||
pm_request_resume()
|
||||
pm_runtime_get_noresume()
|
||||
pm_runtime_get()
|
||||
pm_runtime_put_noidle()
|
||||
pm_runtime_put()
|
||||
pm_suspend_ignore_children()
|
||||
pm_runtime_set_active()
|
||||
pm_runtime_set_suspended()
|
||||
pm_runtime_enable()
|
||||
|
||||
5. Run-time PM Initialization, Device Probing and Removal
|
||||
|
||||
Initially, the run-time PM is disabled for all devices, which means that the
|
||||
majority of the run-time PM helper funtions described in Section 4 will return
|
||||
-EAGAIN until pm_runtime_enable() is called for the device.
|
||||
|
||||
In addition to that, the initial run-time PM status of all devices is
|
||||
'suspended', but it need not reflect the actual physical state of the device.
|
||||
Thus, if the device is initially active (i.e. it is able to process I/O), its
|
||||
run-time PM status must be changed to 'active', with the help of
|
||||
pm_runtime_set_active(), before pm_runtime_enable() is called for the device.
|
||||
|
||||
However, if the device has a parent and the parent's run-time PM is enabled,
|
||||
calling pm_runtime_set_active() for the device will affect the parent, unless
|
||||
the parent's 'power.ignore_children' flag is set. Namely, in that case the
|
||||
parent won't be able to suspend at run time, using the PM core's helper
|
||||
functions, as long as the child's status is 'active', even if the child's
|
||||
run-time PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
|
||||
the child yet or pm_runtime_disable() has been called for it). For this reason,
|
||||
once pm_runtime_set_active() has been called for the device, pm_runtime_enable()
|
||||
should be called for it too as soon as reasonably possible or its run-time PM
|
||||
status should be changed back to 'suspended' with the help of
|
||||
pm_runtime_set_suspended().
|
||||
|
||||
If the default initial run-time PM status of the device (i.e. 'suspended')
|
||||
reflects the actual state of the device, its bus type's or its driver's
|
||||
->probe() callback will likely need to wake it up using one of the PM core's
|
||||
helper functions described in Section 4. In that case, pm_runtime_resume()
|
||||
should be used. Of course, for this purpose the device's run-time PM has to be
|
||||
enabled earlier by calling pm_runtime_enable().
|
||||
|
||||
If the device bus type's or driver's ->probe() or ->remove() callback runs
|
||||
pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts,
|
||||
they will fail returning -EAGAIN, because the device's usage counter is
|
||||
incremented by the core before executing ->probe() and ->remove(). Still, it
|
||||
may be desirable to suspend the device as soon as ->probe() or ->remove() has
|
||||
finished, so the PM core uses pm_runtime_idle_sync() to invoke the device bus
|
||||
type's ->runtime_idle() callback at that time.
|
@ -495,6 +495,13 @@ and for each vararg a long value. So e.g. for a debug entry with a format
|
||||
string plus two varargs one would need to allocate a (3 * sizeof(long))
|
||||
byte data area in the debug_register() function.
|
||||
|
||||
IMPORTANT: Using "%s" in sprintf event functions is dangerous. You can only
|
||||
use "%s" in the sprintf event functions, if the memory for the passed string is
|
||||
available as long as the debug feature exists. The reason behind this is that
|
||||
due to performance considerations only a pointer to the string is stored in
|
||||
the debug feature. If you log a string that is freed afterwards, you will get
|
||||
an OOPS when inspecting the debug feature, because then the debug feature will
|
||||
access the already freed memory.
|
||||
|
||||
NOTE: If using the sprintf view do NOT use other event/exception functions
|
||||
than the sprintf-event and -exception functions.
|
||||
|
@ -60,6 +60,12 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
slots - Reserve the slot index for the given driver.
|
||||
This option takes multiple strings.
|
||||
See "Module Autoloading Support" section for details.
|
||||
debug - Specifies the debug message level
|
||||
(0 = disable debug prints, 1 = normal debug messages,
|
||||
2 = verbose debug messages)
|
||||
This option appears only when CONFIG_SND_DEBUG=y.
|
||||
This option can be dynamically changed via sysfs
|
||||
/sys/modules/snd/parameters/debug file.
|
||||
|
||||
Module snd-pcm-oss
|
||||
------------------
|
||||
@ -513,6 +519,26 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
or input, but you may use this module for any application which
|
||||
requires a sound card (like RealPlayer).
|
||||
|
||||
pcm_devs - Number of PCM devices assigned to each card
|
||||
(default = 1, up to 4)
|
||||
pcm_substreams - Number of PCM substreams assigned to each PCM
|
||||
(default = 8, up to 16)
|
||||
hrtimer - Use hrtimer (=1, default) or system timer (=0)
|
||||
fake_buffer - Fake buffer allocations (default = 1)
|
||||
|
||||
When multiple PCM devices are created, snd-dummy gives different
|
||||
behavior to each PCM device:
|
||||
0 = interleaved with mmap support
|
||||
1 = non-interleaved with mmap support
|
||||
2 = interleaved without mmap
|
||||
3 = non-interleaved without mmap
|
||||
|
||||
As default, snd-dummy drivers doesn't allocate the real buffers
|
||||
but either ignores read/write or mmap a single dummy page to all
|
||||
buffer pages, in order to save the resouces. If your apps need
|
||||
the read/ written buffer data to be consistent, pass fake_buffer=0
|
||||
option.
|
||||
|
||||
The power-management is supported.
|
||||
|
||||
Module snd-echo3g
|
||||
@ -768,6 +794,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
bdl_pos_adj - Specifies the DMA IRQ timing delay in samples.
|
||||
Passing -1 will make the driver to choose the appropriate
|
||||
value based on the controller chip.
|
||||
patch - Specifies the early "patch" files to modify the HD-audio
|
||||
setup before initializing the codecs. This option is
|
||||
available only when CONFIG_SND_HDA_PATCH_LOADER=y is set.
|
||||
See HD-Audio.txt for details.
|
||||
|
||||
[Single (global) options]
|
||||
single_cmd - Use single immediate commands to communicate with
|
||||
|
@ -114,8 +114,8 @@ ALC662/663/272
|
||||
samsung-nc10 Samsung NC10 mini notebook
|
||||
auto auto-config reading BIOS (default)
|
||||
|
||||
ALC882/885
|
||||
==========
|
||||
ALC882/883/885/888/889
|
||||
======================
|
||||
3stack-dig 3-jack with SPDIF I/O
|
||||
6stack-dig 6-jack digital with SPDIF I/O
|
||||
arima Arima W820Di1
|
||||
@ -127,12 +127,8 @@ ALC882/885
|
||||
mbp3 Macbook Pro rev3
|
||||
imac24 iMac 24'' with jack detection
|
||||
w2jc ASUS W2JC
|
||||
auto auto-config reading BIOS (default)
|
||||
|
||||
ALC883/888
|
||||
==========
|
||||
3stack-dig 3-jack with SPDIF I/O
|
||||
6stack-dig 6-jack digital with SPDIF I/O
|
||||
3stack-2ch-dig 3-jack with SPDIF I/O (ALC883)
|
||||
alc883-6stack-dig 6-jack digital with SPDIF I/O (ALC883)
|
||||
3stack-6ch 3-jack 6-channel
|
||||
3stack-6ch-dig 3-jack 6-channel with SPDIF I/O
|
||||
6stack-dig-demo 6-jack digital for Intel demo board
|
||||
@ -140,6 +136,7 @@ ALC883/888
|
||||
acer-aspire Acer Aspire 9810
|
||||
acer-aspire-4930g Acer Aspire 4930G
|
||||
acer-aspire-6530g Acer Aspire 6530G
|
||||
acer-aspire-7730g Acer Aspire 7730G
|
||||
acer-aspire-8930g Acer Aspire 8930G
|
||||
medion Medion Laptops
|
||||
medion-md2 Medion MD2
|
||||
@ -155,10 +152,13 @@ ALC883/888
|
||||
3stack-hp HP machines with 3stack (Lucknow, Samba boards)
|
||||
6stack-dell Dell machines with 6stack (Inspiron 530)
|
||||
mitac Mitac 8252D
|
||||
clevo-m540r Clevo M540R (6ch + digital)
|
||||
clevo-m720 Clevo M720 laptop series
|
||||
fujitsu-pi2515 Fujitsu AMILO Pi2515
|
||||
fujitsu-xa3530 Fujitsu AMILO XA3530
|
||||
3stack-6ch-intel Intel DG33* boards
|
||||
intel-alc889a Intel IbexPeak with ALC889A
|
||||
intel-x58 Intel DX58 with ALC889
|
||||
asus-p5q ASUS P5Q-EM boards
|
||||
mb31 MacBook 3,1
|
||||
sony-vaio-tt Sony VAIO TT
|
||||
@ -229,7 +229,7 @@ AD1984
|
||||
======
|
||||
basic default configuration
|
||||
thinkpad Lenovo Thinkpad T61/X61
|
||||
dell Dell T3400
|
||||
dell_desktop Dell T3400
|
||||
|
||||
AD1986A
|
||||
=======
|
||||
@ -258,6 +258,7 @@ Conexant 5045
|
||||
laptop-micsense Laptop with Mic sense (old model fujitsu)
|
||||
laptop-hpmicsense Laptop with HP and Mic senses
|
||||
benq Benq R55E
|
||||
laptop-hp530 HP 530 laptop
|
||||
test for testing/debugging purpose, almost all controls
|
||||
can be adjusted. Appearing only when compiled with
|
||||
$CONFIG_SND_DEBUG=y
|
||||
@ -278,9 +279,16 @@ Conexant 5051
|
||||
hp-dv6736 HP dv6736
|
||||
lenovo-x200 Lenovo X200 laptop
|
||||
|
||||
Conexant 5066
|
||||
=============
|
||||
laptop Basic Laptop config (default)
|
||||
dell-laptop Dell laptops
|
||||
olpc-xo-1_5 OLPC XO 1.5
|
||||
|
||||
STAC9200
|
||||
========
|
||||
ref Reference board
|
||||
oqo OQO Model 2
|
||||
dell-d21 Dell (unknown)
|
||||
dell-d22 Dell (unknown)
|
||||
dell-d23 Dell (unknown)
|
||||
@ -368,10 +376,12 @@ STAC92HD73*
|
||||
===========
|
||||
ref Reference board
|
||||
no-jd BIOS setup but without jack-detection
|
||||
intel Intel DG45* mobos
|
||||
dell-m6-amic Dell desktops/laptops with analog mics
|
||||
dell-m6-dmic Dell desktops/laptops with digital mics
|
||||
dell-m6 Dell desktops/laptops with both type of mics
|
||||
dell-eq Dell desktops/laptops
|
||||
alienware Alienware M17x
|
||||
auto BIOS setup (default)
|
||||
|
||||
STAC92HD83*
|
||||
@ -385,3 +395,8 @@ STAC9872
|
||||
========
|
||||
vaio VAIO laptop without SPDIF
|
||||
auto BIOS setup (default)
|
||||
|
||||
Cirrus Logic CS4206/4207
|
||||
========================
|
||||
mbp55 MacBook Pro 5,5
|
||||
auto BIOS setup (default)
|
||||
|
@ -138,6 +138,10 @@ override the BIOS setup or to provide more comprehensive features.
|
||||
The driver checks PCI SSID and looks through the static configuration
|
||||
table until any matching entry is found. If you have a new machine,
|
||||
you may see a message like below:
|
||||
------------------------------------------------------------------------
|
||||
hda_codec: ALC880: BIOS auto-probing.
|
||||
------------------------------------------------------------------------
|
||||
Meanwhile, in the earlier versions, you would see a message like:
|
||||
------------------------------------------------------------------------
|
||||
hda_codec: Unknown model for ALC880, trying auto-probe from BIOS...
|
||||
------------------------------------------------------------------------
|
||||
@ -403,6 +407,66 @@ re-configure based on that state, run like below:
|
||||
------------------------------------------------------------------------
|
||||
|
||||
|
||||
Early Patching
|
||||
~~~~~~~~~~~~~~
|
||||
When CONFIG_SND_HDA_PATCH_LOADER=y is set, you can pass a "patch" as a
|
||||
firmware file for modifying the HD-audio setup before initializing the
|
||||
codec. This can work basically like the reconfiguration via sysfs in
|
||||
the above, but it does it before the first codec configuration.
|
||||
|
||||
A patch file is a plain text file which looks like below:
|
||||
|
||||
------------------------------------------------------------------------
|
||||
[codec]
|
||||
0x12345678 0xabcd1234 2
|
||||
|
||||
[model]
|
||||
auto
|
||||
|
||||
[pincfg]
|
||||
0x12 0x411111f0
|
||||
|
||||
[verb]
|
||||
0x20 0x500 0x03
|
||||
0x20 0x400 0xff
|
||||
|
||||
[hint]
|
||||
hp_detect = yes
|
||||
------------------------------------------------------------------------
|
||||
|
||||
The file needs to have a line `[codec]`. The next line should contain
|
||||
three numbers indicating the codec vendor-id (0x12345678 in the
|
||||
example), the codec subsystem-id (0xabcd1234) and the address (2) of
|
||||
the codec. The rest patch entries are applied to this specified codec
|
||||
until another codec entry is given.
|
||||
|
||||
The `[model]` line allows to change the model name of the each codec.
|
||||
In the example above, it will be changed to model=auto.
|
||||
Note that this overrides the module option.
|
||||
|
||||
After the `[pincfg]` line, the contents are parsed as the initial
|
||||
default pin-configurations just like `user_pin_configs` sysfs above.
|
||||
The values can be shown in user_pin_configs sysfs file, too.
|
||||
|
||||
Similarly, the lines after `[verb]` are parsed as `init_verbs`
|
||||
sysfs entries, and the lines after `[hint]` are parsed as `hints`
|
||||
sysfs entries, respectively.
|
||||
|
||||
The hd-audio driver reads the file via request_firmware(). Thus,
|
||||
a patch file has to be located on the appropriate firmware path,
|
||||
typically, /lib/firmware. For example, when you pass the option
|
||||
`patch=hda-init.fw`, the file /lib/firmware/hda-init-fw must be
|
||||
present.
|
||||
|
||||
The patch module option is specific to each card instance, and you
|
||||
need to give one file name for each instance, separated by commas.
|
||||
For example, if you have two cards, one for an on-board analog and one
|
||||
for an HDMI video board, you may pass patch option like below:
|
||||
------------------------------------------------------------------------
|
||||
options snd-hda-intel patch=on-board-patch,hdmi-patch
|
||||
------------------------------------------------------------------------
|
||||
|
||||
|
||||
Power-Saving
|
||||
~~~~~~~~~~~~
|
||||
The power-saving is a kind of auto-suspend of the device. When the
|
||||
|
@ -101,6 +101,8 @@ card*/pcm*/xrun_debug
|
||||
bit 0 = Enable XRUN/jiffies debug messages
|
||||
bit 1 = Show stack trace at XRUN / jiffies check
|
||||
bit 2 = Enable additional jiffies check
|
||||
bit 3 = Log hwptr update at each period interrupt
|
||||
bit 4 = Log hwptr update at each snd_pcm_update_hw_ptr()
|
||||
|
||||
When the bit 0 is set, the driver will show the messages to
|
||||
kernel log when an xrun is detected. The debug message is
|
||||
@ -117,6 +119,9 @@ card*/pcm*/xrun_debug
|
||||
buggy) hardware that doesn't give smooth pointer updates.
|
||||
This feature is enabled via the bit 2.
|
||||
|
||||
Bits 3 and 4 are for logging the hwptr records. Note that
|
||||
these will give flood of kernel messages.
|
||||
|
||||
card*/pcm*/sub*/info
|
||||
The general information of this PCM sub-stream.
|
||||
|
||||
|
@ -19,6 +19,7 @@ Currently, these files might (depending on your configuration)
|
||||
show up in /proc/sys/kernel:
|
||||
- acpi_video_flags
|
||||
- acct
|
||||
- callhome [ S390 only ]
|
||||
- auto_msgmni
|
||||
- core_pattern
|
||||
- core_uses_pid
|
||||
@ -91,6 +92,21 @@ valid for 30 seconds.
|
||||
|
||||
==============================================================
|
||||
|
||||
callhome:
|
||||
|
||||
Controls the kernel's callhome behavior in case of a kernel panic.
|
||||
|
||||
The s390 hardware allows an operating system to send a notification
|
||||
to a service organization (callhome) in case of an operating system panic.
|
||||
|
||||
When the value in this file is 0 (which is the default behavior)
|
||||
nothing happens in case of a kernel panic. If this value is set to "1"
|
||||
the complete kernel oops message is send to the IBM customer service
|
||||
organization in case the mainframe the Linux operating system is running
|
||||
on has a service contract with IBM.
|
||||
|
||||
==============================================================
|
||||
|
||||
core_pattern:
|
||||
|
||||
core_pattern is used to specify a core dumpfile pattern name.
|
||||
|
@ -66,7 +66,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
|
||||
'b' - Will immediately reboot the system without syncing or unmounting
|
||||
your disks.
|
||||
|
||||
'c' - Will perform a kexec reboot in order to take a crashdump.
|
||||
'c' - Will perform a system crash by a NULL pointer dereference.
|
||||
A crashdump will be taken if configured.
|
||||
|
||||
'd' - Shows all locks that are held.
|
||||
|
||||
@ -141,8 +142,8 @@ useful when you want to exit a program that will not let you switch consoles.
|
||||
re'B'oot is good when you're unable to shut down. But you should also 'S'ync
|
||||
and 'U'mount first.
|
||||
|
||||
'C'rashdump can be used to manually trigger a crashdump when the system is hung.
|
||||
The kernel needs to have been built with CONFIG_KEXEC enabled.
|
||||
'C'rash can be used to manually trigger a crashdump when the system is hung.
|
||||
Note that this just triggers a crash if there is no dump mechanism available.
|
||||
|
||||
'S'ync is great when your system is locked up, it allows you to sync your
|
||||
disks and will certainly lessen the chance of data loss and fscking. Note
|
||||
|
@ -1,7 +1,7 @@
|
||||
Event Tracing
|
||||
|
||||
Documentation written by Theodore Ts'o
|
||||
Updated by Li Zefan
|
||||
Updated by Li Zefan and Tom Zanussi
|
||||
|
||||
1. Introduction
|
||||
===============
|
||||
@ -22,12 +22,12 @@ tracing information should be printed.
|
||||
---------------------------------
|
||||
|
||||
The events which are available for tracing can be found in the file
|
||||
/debug/tracing/available_events.
|
||||
/sys/kernel/debug/tracing/available_events.
|
||||
|
||||
To enable a particular event, such as 'sched_wakeup', simply echo it
|
||||
to /debug/tracing/set_event. For example:
|
||||
to /sys/kernel/debug/tracing/set_event. For example:
|
||||
|
||||
# echo sched_wakeup >> /debug/tracing/set_event
|
||||
# echo sched_wakeup >> /sys/kernel/debug/tracing/set_event
|
||||
|
||||
[ Note: '>>' is necessary, otherwise it will firstly disable
|
||||
all the events. ]
|
||||
@ -35,15 +35,15 @@ to /debug/tracing/set_event. For example:
|
||||
To disable an event, echo the event name to the set_event file prefixed
|
||||
with an exclamation point:
|
||||
|
||||
# echo '!sched_wakeup' >> /debug/tracing/set_event
|
||||
# echo '!sched_wakeup' >> /sys/kernel/debug/tracing/set_event
|
||||
|
||||
To disable all events, echo an empty line to the set_event file:
|
||||
|
||||
# echo > /debug/tracing/set_event
|
||||
# echo > /sys/kernel/debug/tracing/set_event
|
||||
|
||||
To enable all events, echo '*:*' or '*:' to the set_event file:
|
||||
|
||||
# echo *:* > /debug/tracing/set_event
|
||||
# echo *:* > /sys/kernel/debug/tracing/set_event
|
||||
|
||||
The events are organized into subsystems, such as ext4, irq, sched,
|
||||
etc., and a full event name looks like this: <subsystem>:<event>. The
|
||||
@ -52,29 +52,29 @@ file. All of the events in a subsystem can be specified via the syntax
|
||||
"<subsystem>:*"; for example, to enable all irq events, you can use the
|
||||
command:
|
||||
|
||||
# echo 'irq:*' > /debug/tracing/set_event
|
||||
# echo 'irq:*' > /sys/kernel/debug/tracing/set_event
|
||||
|
||||
2.2 Via the 'enable' toggle
|
||||
---------------------------
|
||||
|
||||
The events available are also listed in /debug/tracing/events/ hierarchy
|
||||
The events available are also listed in /sys/kernel/debug/tracing/events/ hierarchy
|
||||
of directories.
|
||||
|
||||
To enable event 'sched_wakeup':
|
||||
|
||||
# echo 1 > /debug/tracing/events/sched/sched_wakeup/enable
|
||||
# echo 1 > /sys/kernel/debug/tracing/events/sched/sched_wakeup/enable
|
||||
|
||||
To disable it:
|
||||
|
||||
# echo 0 > /debug/tracing/events/sched/sched_wakeup/enable
|
||||
# echo 0 > /sys/kernel/debug/tracing/events/sched/sched_wakeup/enable
|
||||
|
||||
To enable all events in sched subsystem:
|
||||
|
||||
# echo 1 > /debug/tracing/events/sched/enable
|
||||
# echo 1 > /sys/kernel/debug/tracing/events/sched/enable
|
||||
|
||||
To eanble all events:
|
||||
|
||||
# echo 1 > /debug/tracing/events/enable
|
||||
# echo 1 > /sys/kernel/debug/tracing/events/enable
|
||||
|
||||
When reading one of these enable files, there are four results:
|
||||
|
||||
@ -83,8 +83,199 @@ When reading one of these enable files, there are four results:
|
||||
X - there is a mixture of events enabled and disabled
|
||||
? - this file does not affect any event
|
||||
|
||||
2.3 Boot option
|
||||
---------------
|
||||
|
||||
In order to facilitate early boot debugging, use boot option:
|
||||
|
||||
trace_event=[event-list]
|
||||
|
||||
The format of this boot option is the same as described in section 2.1.
|
||||
|
||||
3. Defining an event-enabled tracepoint
|
||||
=======================================
|
||||
|
||||
See The example provided in samples/trace_events
|
||||
|
||||
4. Event formats
|
||||
================
|
||||
|
||||
Each trace event has a 'format' file associated with it that contains
|
||||
a description of each field in a logged event. This information can
|
||||
be used to parse the binary trace stream, and is also the place to
|
||||
find the field names that can be used in event filters (see section 5).
|
||||
|
||||
It also displays the format string that will be used to print the
|
||||
event in text mode, along with the event name and ID used for
|
||||
profiling.
|
||||
|
||||
Every event has a set of 'common' fields associated with it; these are
|
||||
the fields prefixed with 'common_'. The other fields vary between
|
||||
events and correspond to the fields defined in the TRACE_EVENT
|
||||
definition for that event.
|
||||
|
||||
Each field in the format has the form:
|
||||
|
||||
field:field-type field-name; offset:N; size:N;
|
||||
|
||||
where offset is the offset of the field in the trace record and size
|
||||
is the size of the data item, in bytes.
|
||||
|
||||
For example, here's the information displayed for the 'sched_wakeup'
|
||||
event:
|
||||
|
||||
# cat /debug/tracing/events/sched/sched_wakeup/format
|
||||
|
||||
name: sched_wakeup
|
||||
ID: 60
|
||||
format:
|
||||
field:unsigned short common_type; offset:0; size:2;
|
||||
field:unsigned char common_flags; offset:2; size:1;
|
||||
field:unsigned char common_preempt_count; offset:3; size:1;
|
||||
field:int common_pid; offset:4; size:4;
|
||||
field:int common_tgid; offset:8; size:4;
|
||||
|
||||
field:char comm[TASK_COMM_LEN]; offset:12; size:16;
|
||||
field:pid_t pid; offset:28; size:4;
|
||||
field:int prio; offset:32; size:4;
|
||||
field:int success; offset:36; size:4;
|
||||
field:int cpu; offset:40; size:4;
|
||||
|
||||
print fmt: "task %s:%d [%d] success=%d [%03d]", REC->comm, REC->pid,
|
||||
REC->prio, REC->success, REC->cpu
|
||||
|
||||
This event contains 10 fields, the first 5 common and the remaining 5
|
||||
event-specific. All the fields for this event are numeric, except for
|
||||
'comm' which is a string, a distinction important for event filtering.
|
||||
|
||||
5. Event filtering
|
||||
==================
|
||||
|
||||
Trace events can be filtered in the kernel by associating boolean
|
||||
'filter expressions' with them. As soon as an event is logged into
|
||||
the trace buffer, its fields are checked against the filter expression
|
||||
associated with that event type. An event with field values that
|
||||
'match' the filter will appear in the trace output, and an event whose
|
||||
values don't match will be discarded. An event with no filter
|
||||
associated with it matches everything, and is the default when no
|
||||
filter has been set for an event.
|
||||
|
||||
5.1 Expression syntax
|
||||
---------------------
|
||||
|
||||
A filter expression consists of one or more 'predicates' that can be
|
||||
combined using the logical operators '&&' and '||'. A predicate is
|
||||
simply a clause that compares the value of a field contained within a
|
||||
logged event with a constant value and returns either 0 or 1 depending
|
||||
on whether the field value matched (1) or didn't match (0):
|
||||
|
||||
field-name relational-operator value
|
||||
|
||||
Parentheses can be used to provide arbitrary logical groupings and
|
||||
double-quotes can be used to prevent the shell from interpreting
|
||||
operators as shell metacharacters.
|
||||
|
||||
The field-names available for use in filters can be found in the
|
||||
'format' files for trace events (see section 4).
|
||||
|
||||
The relational-operators depend on the type of the field being tested:
|
||||
|
||||
The operators available for numeric fields are:
|
||||
|
||||
==, !=, <, <=, >, >=
|
||||
|
||||
And for string fields they are:
|
||||
|
||||
==, !=
|
||||
|
||||
Currently, only exact string matches are supported.
|
||||
|
||||
Currently, the maximum number of predicates in a filter is 16.
|
||||
|
||||
5.2 Setting filters
|
||||
-------------------
|
||||
|
||||
A filter for an individual event is set by writing a filter expression
|
||||
to the 'filter' file for the given event.
|
||||
|
||||
For example:
|
||||
|
||||
# cd /debug/tracing/events/sched/sched_wakeup
|
||||
# echo "common_preempt_count > 4" > filter
|
||||
|
||||
A slightly more involved example:
|
||||
|
||||
# cd /debug/tracing/events/sched/sched_signal_send
|
||||
# echo "((sig >= 10 && sig < 15) || sig == 17) && comm != bash" > filter
|
||||
|
||||
If there is an error in the expression, you'll get an 'Invalid
|
||||
argument' error when setting it, and the erroneous string along with
|
||||
an error message can be seen by looking at the filter e.g.:
|
||||
|
||||
# cd /debug/tracing/events/sched/sched_signal_send
|
||||
# echo "((sig >= 10 && sig < 15) || dsig == 17) && comm != bash" > filter
|
||||
-bash: echo: write error: Invalid argument
|
||||
# cat filter
|
||||
((sig >= 10 && sig < 15) || dsig == 17) && comm != bash
|
||||
^
|
||||
parse_error: Field not found
|
||||
|
||||
Currently the caret ('^') for an error always appears at the beginning of
|
||||
the filter string; the error message should still be useful though
|
||||
even without more accurate position info.
|
||||
|
||||
5.3 Clearing filters
|
||||
--------------------
|
||||
|
||||
To clear the filter for an event, write a '0' to the event's filter
|
||||
file.
|
||||
|
||||
To clear the filters for all events in a subsystem, write a '0' to the
|
||||
subsystem's filter file.
|
||||
|
||||
5.3 Subsystem filters
|
||||
---------------------
|
||||
|
||||
For convenience, filters for every event in a subsystem can be set or
|
||||
cleared as a group by writing a filter expression into the filter file
|
||||
at the root of the subsytem. Note however, that if a filter for any
|
||||
event within the subsystem lacks a field specified in the subsystem
|
||||
filter, or if the filter can't be applied for any other reason, the
|
||||
filter for that event will retain its previous setting. This can
|
||||
result in an unintended mixture of filters which could lead to
|
||||
confusing (to the user who might think different filters are in
|
||||
effect) trace output. Only filters that reference just the common
|
||||
fields can be guaranteed to propagate successfully to all events.
|
||||
|
||||
Here are a few subsystem filter examples that also illustrate the
|
||||
above points:
|
||||
|
||||
Clear the filters on all events in the sched subsytem:
|
||||
|
||||
# cd /sys/kernel/debug/tracing/events/sched
|
||||
# echo 0 > filter
|
||||
# cat sched_switch/filter
|
||||
none
|
||||
# cat sched_wakeup/filter
|
||||
none
|
||||
|
||||
Set a filter using only common fields for all events in the sched
|
||||
subsytem (all events end up with the same filter):
|
||||
|
||||
# cd /sys/kernel/debug/tracing/events/sched
|
||||
# echo common_pid == 0 > filter
|
||||
# cat sched_switch/filter
|
||||
common_pid == 0
|
||||
# cat sched_wakeup/filter
|
||||
common_pid == 0
|
||||
|
||||
Attempt to set a filter using a non-common field for all events in the
|
||||
sched subsytem (all events but those that have a prev_pid field retain
|
||||
their old filters):
|
||||
|
||||
# cd /sys/kernel/debug/tracing/events/sched
|
||||
# echo prev_pid == 0 > filter
|
||||
# cat sched_switch/filter
|
||||
prev_pid == 0
|
||||
# cat sched_wakeup/filter
|
||||
common_pid == 0
|
||||
|
233
Documentation/trace/ftrace-design.txt
Normal file
233
Documentation/trace/ftrace-design.txt
Normal file
@ -0,0 +1,233 @@
|
||||
function tracer guts
|
||||
====================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
Here we will cover the architecture pieces that the common function tracing
|
||||
code relies on for proper functioning. Things are broken down into increasing
|
||||
complexity so that you can start simple and at least get basic functionality.
|
||||
|
||||
Note that this focuses on architecture implementation details only. If you
|
||||
want more explanation of a feature in terms of common code, review the common
|
||||
ftrace.txt file.
|
||||
|
||||
|
||||
Prerequisites
|
||||
-------------
|
||||
|
||||
Ftrace relies on these features being implemented:
|
||||
STACKTRACE_SUPPORT - implement save_stack_trace()
|
||||
TRACE_IRQFLAGS_SUPPORT - implement include/asm/irqflags.h
|
||||
|
||||
|
||||
HAVE_FUNCTION_TRACER
|
||||
--------------------
|
||||
|
||||
You will need to implement the mcount and the ftrace_stub functions.
|
||||
|
||||
The exact mcount symbol name will depend on your toolchain. Some call it
|
||||
"mcount", "_mcount", or even "__mcount". You can probably figure it out by
|
||||
running something like:
|
||||
$ echo 'main(){}' | gcc -x c -S -o - - -pg | grep mcount
|
||||
call mcount
|
||||
We'll make the assumption below that the symbol is "mcount" just to keep things
|
||||
nice and simple in the examples.
|
||||
|
||||
Keep in mind that the ABI that is in effect inside of the mcount function is
|
||||
*highly* architecture/toolchain specific. We cannot help you in this regard,
|
||||
sorry. Dig up some old documentation and/or find someone more familiar than
|
||||
you to bang ideas off of. Typically, register usage (argument/scratch/etc...)
|
||||
is a major issue at this point, especially in relation to the location of the
|
||||
mcount call (before/after function prologue). You might also want to look at
|
||||
how glibc has implemented the mcount function for your architecture. It might
|
||||
be (semi-)relevant.
|
||||
|
||||
The mcount function should check the function pointer ftrace_trace_function
|
||||
to see if it is set to ftrace_stub. If it is, there is nothing for you to do,
|
||||
so return immediately. If it isn't, then call that function in the same way
|
||||
the mcount function normally calls __mcount_internal -- the first argument is
|
||||
the "frompc" while the second argument is the "selfpc" (adjusted to remove the
|
||||
size of the mcount call that is embedded in the function).
|
||||
|
||||
For example, if the function foo() calls bar(), when the bar() function calls
|
||||
mcount(), the arguments mcount() will pass to the tracer are:
|
||||
"frompc" - the address bar() will use to return to foo()
|
||||
"selfpc" - the address bar() (with _mcount() size adjustment)
|
||||
|
||||
Also keep in mind that this mcount function will be called *a lot*, so
|
||||
optimizing for the default case of no tracer will help the smooth running of
|
||||
your system when tracing is disabled. So the start of the mcount function is
|
||||
typically the bare min with checking things before returning. That also means
|
||||
the code flow should usually kept linear (i.e. no branching in the nop case).
|
||||
This is of course an optimization and not a hard requirement.
|
||||
|
||||
Here is some pseudo code that should help (these functions should actually be
|
||||
implemented in assembly):
|
||||
|
||||
void ftrace_stub(void)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
void mcount(void)
|
||||
{
|
||||
/* save any bare state needed in order to do initial checking */
|
||||
|
||||
extern void (*ftrace_trace_function)(unsigned long, unsigned long);
|
||||
if (ftrace_trace_function != ftrace_stub)
|
||||
goto do_trace;
|
||||
|
||||
/* restore any bare state */
|
||||
|
||||
return;
|
||||
|
||||
do_trace:
|
||||
|
||||
/* save all state needed by the ABI (see paragraph above) */
|
||||
|
||||
unsigned long frompc = ...;
|
||||
unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
|
||||
ftrace_trace_function(frompc, selfpc);
|
||||
|
||||
/* restore all state needed by the ABI */
|
||||
}
|
||||
|
||||
Don't forget to export mcount for modules !
|
||||
extern void mcount(void);
|
||||
EXPORT_SYMBOL(mcount);
|
||||
|
||||
|
||||
HAVE_FUNCTION_TRACE_MCOUNT_TEST
|
||||
-------------------------------
|
||||
|
||||
This is an optional optimization for the normal case when tracing is turned off
|
||||
in the system. If you do not enable this Kconfig option, the common ftrace
|
||||
code will take care of doing the checking for you.
|
||||
|
||||
To support this feature, you only need to check the function_trace_stop
|
||||
variable in the mcount function. If it is non-zero, there is no tracing to be
|
||||
done at all, so you can return.
|
||||
|
||||
This additional pseudo code would simply be:
|
||||
void mcount(void)
|
||||
{
|
||||
/* save any bare state needed in order to do initial checking */
|
||||
|
||||
+ if (function_trace_stop)
|
||||
+ return;
|
||||
|
||||
extern void (*ftrace_trace_function)(unsigned long, unsigned long);
|
||||
if (ftrace_trace_function != ftrace_stub)
|
||||
...
|
||||
|
||||
|
||||
HAVE_FUNCTION_GRAPH_TRACER
|
||||
--------------------------
|
||||
|
||||
Deep breath ... time to do some real work. Here you will need to update the
|
||||
mcount function to check ftrace graph function pointers, as well as implement
|
||||
some functions to save (hijack) and restore the return address.
|
||||
|
||||
The mcount function should check the function pointers ftrace_graph_return
|
||||
(compare to ftrace_stub) and ftrace_graph_entry (compare to
|
||||
ftrace_graph_entry_stub). If either of those are not set to the relevant stub
|
||||
function, call the arch-specific function ftrace_graph_caller which in turn
|
||||
calls the arch-specific function prepare_ftrace_return. Neither of these
|
||||
function names are strictly required, but you should use them anyways to stay
|
||||
consistent across the architecture ports -- easier to compare & contrast
|
||||
things.
|
||||
|
||||
The arguments to prepare_ftrace_return are slightly different than what are
|
||||
passed to ftrace_trace_function. The second argument "selfpc" is the same,
|
||||
but the first argument should be a pointer to the "frompc". Typically this is
|
||||
located on the stack. This allows the function to hijack the return address
|
||||
temporarily to have it point to the arch-specific function return_to_handler.
|
||||
That function will simply call the common ftrace_return_to_handler function and
|
||||
that will return the original return address with which, you can return to the
|
||||
original call site.
|
||||
|
||||
Here is the updated mcount pseudo code:
|
||||
void mcount(void)
|
||||
{
|
||||
...
|
||||
if (ftrace_trace_function != ftrace_stub)
|
||||
goto do_trace;
|
||||
|
||||
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
+ extern void (*ftrace_graph_return)(...);
|
||||
+ extern void (*ftrace_graph_entry)(...);
|
||||
+ if (ftrace_graph_return != ftrace_stub ||
|
||||
+ ftrace_graph_entry != ftrace_graph_entry_stub)
|
||||
+ ftrace_graph_caller();
|
||||
+#endif
|
||||
|
||||
/* restore any bare state */
|
||||
...
|
||||
|
||||
Here is the pseudo code for the new ftrace_graph_caller assembly function:
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
void ftrace_graph_caller(void)
|
||||
{
|
||||
/* save all state needed by the ABI */
|
||||
|
||||
unsigned long *frompc = &...;
|
||||
unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
|
||||
prepare_ftrace_return(frompc, selfpc);
|
||||
|
||||
/* restore all state needed by the ABI */
|
||||
}
|
||||
#endif
|
||||
|
||||
For information on how to implement prepare_ftrace_return(), simply look at
|
||||
the x86 version. The only architecture-specific piece in it is the setup of
|
||||
the fault recovery table (the asm(...) code). The rest should be the same
|
||||
across architectures.
|
||||
|
||||
Here is the pseudo code for the new return_to_handler assembly function. Note
|
||||
that the ABI that applies here is different from what applies to the mcount
|
||||
code. Since you are returning from a function (after the epilogue), you might
|
||||
be able to skimp on things saved/restored (usually just registers used to pass
|
||||
return values).
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
void return_to_handler(void)
|
||||
{
|
||||
/* save all state needed by the ABI (see paragraph above) */
|
||||
|
||||
void (*original_return_point)(void) = ftrace_return_to_handler();
|
||||
|
||||
/* restore all state needed by the ABI */
|
||||
|
||||
/* this is usually either a return or a jump */
|
||||
original_return_point();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
HAVE_FTRACE_NMI_ENTER
|
||||
---------------------
|
||||
|
||||
If you can't trace NMI functions, then skip this option.
|
||||
|
||||
<details to be filled>
|
||||
|
||||
|
||||
HAVE_FTRACE_SYSCALLS
|
||||
---------------------
|
||||
|
||||
<details to be filled>
|
||||
|
||||
|
||||
HAVE_FTRACE_MCOUNT_RECORD
|
||||
-------------------------
|
||||
|
||||
See scripts/recordmcount.pl for more info.
|
||||
|
||||
<details to be filled>
|
||||
|
||||
|
||||
HAVE_DYNAMIC_FTRACE
|
||||
---------------------
|
||||
|
||||
<details to be filled>
|
@ -26,6 +26,12 @@ disabled, and more (ftrace allows for tracer plugins, which
|
||||
means that the list of tracers can always grow).
|
||||
|
||||
|
||||
Implementation Details
|
||||
----------------------
|
||||
|
||||
See ftrace-design.txt for details for arch porters and such.
|
||||
|
||||
|
||||
The File System
|
||||
---------------
|
||||
|
||||
@ -85,26 +91,19 @@ of ftrace. Here is a list of some of the key files:
|
||||
This file holds the output of the trace in a human
|
||||
readable format (described below).
|
||||
|
||||
latency_trace:
|
||||
|
||||
This file shows the same trace but the information
|
||||
is organized more to display possible latencies
|
||||
in the system (described below).
|
||||
|
||||
trace_pipe:
|
||||
|
||||
The output is the same as the "trace" file but this
|
||||
file is meant to be streamed with live tracing.
|
||||
Reads from this file will block until new data
|
||||
is retrieved. Unlike the "trace" and "latency_trace"
|
||||
files, this file is a consumer. This means reading
|
||||
from this file causes sequential reads to display
|
||||
more current data. Once data is read from this
|
||||
file, it is consumed, and will not be read
|
||||
again with a sequential read. The "trace" and
|
||||
"latency_trace" files are static, and if the
|
||||
tracer is not adding more data, they will display
|
||||
the same information every time they are read.
|
||||
Reads from this file will block until new data is
|
||||
retrieved. Unlike the "trace" file, this file is a
|
||||
consumer. This means reading from this file causes
|
||||
sequential reads to display more current data. Once
|
||||
data is read from this file, it is consumed, and
|
||||
will not be read again with a sequential read. The
|
||||
"trace" file is static, and if the tracer is not
|
||||
adding more data,they will display the same
|
||||
information every time they are read.
|
||||
|
||||
trace_options:
|
||||
|
||||
@ -117,10 +116,10 @@ of ftrace. Here is a list of some of the key files:
|
||||
Some of the tracers record the max latency.
|
||||
For example, the time interrupts are disabled.
|
||||
This time is saved in this file. The max trace
|
||||
will also be stored, and displayed by either
|
||||
"trace" or "latency_trace". A new max trace will
|
||||
only be recorded if the latency is greater than
|
||||
the value in this file. (in microseconds)
|
||||
will also be stored, and displayed by "trace".
|
||||
A new max trace will only be recorded if the
|
||||
latency is greater than the value in this
|
||||
file. (in microseconds)
|
||||
|
||||
buffer_size_kb:
|
||||
|
||||
@ -210,7 +209,7 @@ Here is the list of current tracers that may be configured.
|
||||
the trace with the longest max latency.
|
||||
See tracing_max_latency. When a new max is recorded,
|
||||
it replaces the old trace. It is best to view this
|
||||
trace via the latency_trace file.
|
||||
trace with the latency-format option enabled.
|
||||
|
||||
"preemptoff"
|
||||
|
||||
@ -307,8 +306,8 @@ the lowest priority thread (pid 0).
|
||||
Latency trace format
|
||||
--------------------
|
||||
|
||||
For traces that display latency times, the latency_trace file
|
||||
gives somewhat more information to see why a latency happened.
|
||||
When the latency-format option is enabled, the trace file gives
|
||||
somewhat more information to see why a latency happened.
|
||||
Here is a typical trace.
|
||||
|
||||
# tracer: irqsoff
|
||||
@ -380,9 +379,10 @@ explains which is which.
|
||||
|
||||
The above is mostly meaningful for kernel developers.
|
||||
|
||||
time: This differs from the trace file output. The trace file output
|
||||
includes an absolute timestamp. The timestamp used by the
|
||||
latency_trace file is relative to the start of the trace.
|
||||
time: When the latency-format option is enabled, the trace file
|
||||
output includes a timestamp relative to the start of the
|
||||
trace. This differs from the output when latency-format
|
||||
is disabled, which includes an absolute timestamp.
|
||||
|
||||
delay: This is just to help catch your eye a bit better. And
|
||||
needs to be fixed to be only relative to the same CPU.
|
||||
@ -440,7 +440,8 @@ Here are the available options:
|
||||
sym-addr:
|
||||
bash-4000 [01] 1477.606694: simple_strtoul <c0339346>
|
||||
|
||||
verbose - This deals with the latency_trace file.
|
||||
verbose - This deals with the trace file when the
|
||||
latency-format option is enabled.
|
||||
|
||||
bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \
|
||||
(+0.000ms): simple_strtoul (strict_strtoul)
|
||||
@ -472,7 +473,7 @@ Here are the available options:
|
||||
the app is no longer running
|
||||
|
||||
The lookup is performed when you read
|
||||
trace,trace_pipe,latency_trace. Example:
|
||||
trace,trace_pipe. Example:
|
||||
|
||||
a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
|
||||
x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
|
||||
@ -481,6 +482,11 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
|
||||
every scheduling event. Will add overhead if
|
||||
there's a lot of tasks running at once.
|
||||
|
||||
latency-format - This option changes the trace. When
|
||||
it is enabled, the trace displays
|
||||
additional information about the
|
||||
latencies, as described in "Latency
|
||||
trace format".
|
||||
|
||||
sched_switch
|
||||
------------
|
||||
@ -596,12 +602,13 @@ To reset the maximum, echo 0 into tracing_max_latency. Here is
|
||||
an example:
|
||||
|
||||
# echo irqsoff > current_tracer
|
||||
# echo latency-format > trace_options
|
||||
# echo 0 > tracing_max_latency
|
||||
# echo 1 > tracing_enabled
|
||||
# ls -ltr
|
||||
[...]
|
||||
# echo 0 > tracing_enabled
|
||||
# cat latency_trace
|
||||
# cat trace
|
||||
# tracer: irqsoff
|
||||
#
|
||||
irqsoff latency trace v1.1.5 on 2.6.26
|
||||
@ -703,12 +710,13 @@ which preemption was disabled. The control of preemptoff tracer
|
||||
is much like the irqsoff tracer.
|
||||
|
||||
# echo preemptoff > current_tracer
|
||||
# echo latency-format > trace_options
|
||||
# echo 0 > tracing_max_latency
|
||||
# echo 1 > tracing_enabled
|
||||
# ls -ltr
|
||||
[...]
|
||||
# echo 0 > tracing_enabled
|
||||
# cat latency_trace
|
||||
# cat trace
|
||||
# tracer: preemptoff
|
||||
#
|
||||
preemptoff latency trace v1.1.5 on 2.6.26-rc8
|
||||
@ -850,12 +858,13 @@ Again, using this trace is much like the irqsoff and preemptoff
|
||||
tracers.
|
||||
|
||||
# echo preemptirqsoff > current_tracer
|
||||
# echo latency-format > trace_options
|
||||
# echo 0 > tracing_max_latency
|
||||
# echo 1 > tracing_enabled
|
||||
# ls -ltr
|
||||
[...]
|
||||
# echo 0 > tracing_enabled
|
||||
# cat latency_trace
|
||||
# cat trace
|
||||
# tracer: preemptirqsoff
|
||||
#
|
||||
preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
|
||||
@ -1012,11 +1021,12 @@ Instead of performing an 'ls', we will run 'sleep 1' under
|
||||
'chrt' which changes the priority of the task.
|
||||
|
||||
# echo wakeup > current_tracer
|
||||
# echo latency-format > trace_options
|
||||
# echo 0 > tracing_max_latency
|
||||
# echo 1 > tracing_enabled
|
||||
# chrt -f 5 sleep 1
|
||||
# echo 0 > tracing_enabled
|
||||
# cat latency_trace
|
||||
# cat trace
|
||||
# tracer: wakeup
|
||||
#
|
||||
wakeup latency trace v1.1.5 on 2.6.26-rc8
|
||||
|
42
Documentation/trace/function-graph-fold.vim
Normal file
42
Documentation/trace/function-graph-fold.vim
Normal file
@ -0,0 +1,42 @@
|
||||
" Enable folding for ftrace function_graph traces.
|
||||
"
|
||||
" To use, :source this file while viewing a function_graph trace, or use vim's
|
||||
" -S option to load from the command-line together with a trace. You can then
|
||||
" use the usual vim fold commands, such as "za", to open and close nested
|
||||
" functions. While closed, a fold will show the total time taken for a call,
|
||||
" as would normally appear on the line with the closing brace. Folded
|
||||
" functions will not include finish_task_switch(), so folding should remain
|
||||
" relatively sane even through a context switch.
|
||||
"
|
||||
" Note that this will almost certainly only work well with a
|
||||
" single-CPU trace (e.g. trace-cmd report --cpu 1).
|
||||
|
||||
function! FunctionGraphFoldExpr(lnum)
|
||||
let line = getline(a:lnum)
|
||||
if line[-1:] == '{'
|
||||
if line =~ 'finish_task_switch() {$'
|
||||
return '>1'
|
||||
endif
|
||||
return 'a1'
|
||||
elseif line[-1:] == '}'
|
||||
return 's1'
|
||||
else
|
||||
return '='
|
||||
endif
|
||||
endfunction
|
||||
|
||||
function! FunctionGraphFoldText()
|
||||
let s = split(getline(v:foldstart), '|', 1)
|
||||
if getline(v:foldend+1) =~ 'finish_task_switch() {$'
|
||||
let s[2] = ' task switch '
|
||||
else
|
||||
let e = split(getline(v:foldend), '|', 1)
|
||||
let s[2] = e[2]
|
||||
endif
|
||||
return join(s, '|')
|
||||
endfunction
|
||||
|
||||
setlocal foldexpr=FunctionGraphFoldExpr(v:lnum)
|
||||
setlocal foldtext=FunctionGraphFoldText()
|
||||
setlocal foldcolumn=12
|
||||
setlocal foldmethod=expr
|
@ -1,17 +0,0 @@
|
||||
The power tracer collects detailed information about C-state and P-state
|
||||
transitions, instead of just looking at the high-level "average"
|
||||
information.
|
||||
|
||||
There is a helper script found in scrips/tracing/power.pl in the kernel
|
||||
sources which can be used to parse this information and create a
|
||||
Scalable Vector Graphics (SVG) picture from the trace data.
|
||||
|
||||
To use this tracer:
|
||||
|
||||
echo 0 > /sys/kernel/debug/tracing/tracing_enabled
|
||||
echo power > /sys/kernel/debug/tracing/current_tracer
|
||||
echo 1 > /sys/kernel/debug/tracing/tracing_enabled
|
||||
sleep 1
|
||||
echo 0 > /sys/kernel/debug/tracing/tracing_enabled
|
||||
cat /sys/kernel/debug/tracing/trace | \
|
||||
perl scripts/tracing/power.pl > out.sv
|
955
Documentation/trace/ring-buffer-design.txt
Normal file
955
Documentation/trace/ring-buffer-design.txt
Normal file
@ -0,0 +1,955 @@
|
||||
Lockless Ring Buffer Design
|
||||
===========================
|
||||
|
||||
Copyright 2009 Red Hat Inc.
|
||||
Author: Steven Rostedt <srostedt@redhat.com>
|
||||
License: The GNU Free Documentation License, Version 1.2
|
||||
(dual licensed under the GPL v2)
|
||||
Reviewers: Mathieu Desnoyers, Huang Ying, Hidetoshi Seto,
|
||||
and Frederic Weisbecker.
|
||||
|
||||
|
||||
Written for: 2.6.31
|
||||
|
||||
Terminology used in this Document
|
||||
---------------------------------
|
||||
|
||||
tail - where new writes happen in the ring buffer.
|
||||
|
||||
head - where new reads happen in the ring buffer.
|
||||
|
||||
producer - the task that writes into the ring buffer (same as writer)
|
||||
|
||||
writer - same as producer
|
||||
|
||||
consumer - the task that reads from the buffer (same as reader)
|
||||
|
||||
reader - same as consumer.
|
||||
|
||||
reader_page - A page outside the ring buffer used solely (for the most part)
|
||||
by the reader.
|
||||
|
||||
head_page - a pointer to the page that the reader will use next
|
||||
|
||||
tail_page - a pointer to the page that will be written to next
|
||||
|
||||
commit_page - a pointer to the page with the last finished non nested write.
|
||||
|
||||
cmpxchg - hardware assisted atomic transaction that performs the following:
|
||||
|
||||
A = B iff previous A == C
|
||||
|
||||
R = cmpxchg(A, C, B) is saying that we replace A with B if and only if
|
||||
current A is equal to C, and we put the old (current) A into R
|
||||
|
||||
R gets the previous A regardless if A is updated with B or not.
|
||||
|
||||
To see if the update was successful a compare of R == C may be used.
|
||||
|
||||
The Generic Ring Buffer
|
||||
-----------------------
|
||||
|
||||
The ring buffer can be used in either an overwrite mode or in
|
||||
producer/consumer mode.
|
||||
|
||||
Producer/consumer mode is where the producer were to fill up the
|
||||
buffer before the consumer could free up anything, the producer
|
||||
will stop writing to the buffer. This will lose most recent events.
|
||||
|
||||
Overwrite mode is where the produce were to fill up the buffer
|
||||
before the consumer could free up anything, the producer will
|
||||
overwrite the older data. This will lose the oldest events.
|
||||
|
||||
No two writers can write at the same time (on the same per cpu buffer),
|
||||
but a writer may interrupt another writer, but it must finish writing
|
||||
before the previous writer may continue. This is very important to the
|
||||
algorithm. The writers act like a "stack". The way interrupts works
|
||||
enforces this behavior.
|
||||
|
||||
|
||||
writer1 start
|
||||
<preempted> writer2 start
|
||||
<preempted> writer3 start
|
||||
writer3 finishes
|
||||
writer2 finishes
|
||||
writer1 finishes
|
||||
|
||||
This is very much like a writer being preempted by an interrupt and
|
||||
the interrupt doing a write as well.
|
||||
|
||||
Readers can happen at any time. But no two readers may run at the
|
||||
same time, nor can a reader preempt/interrupt another reader. A reader
|
||||
can not preempt/interrupt a writer, but it may read/consume from the
|
||||
buffer at the same time as a writer is writing, but the reader must be
|
||||
on another processor to do so. A reader may read on its own processor
|
||||
and can be preempted by a writer.
|
||||
|
||||
A writer can preempt a reader, but a reader can not preempt a writer.
|
||||
But a reader can read the buffer at the same time (on another processor)
|
||||
as a writer.
|
||||
|
||||
The ring buffer is made up of a list of pages held together by a link list.
|
||||
|
||||
At initialization a reader page is allocated for the reader that is not
|
||||
part of the ring buffer.
|
||||
|
||||
The head_page, tail_page and commit_page are all initialized to point
|
||||
to the same page.
|
||||
|
||||
The reader page is initialized to have its next pointer pointing to
|
||||
the head page, and its previous pointer pointing to a page before
|
||||
the head page.
|
||||
|
||||
The reader has its own page to use. At start up time, this page is
|
||||
allocated but is not attached to the list. When the reader wants
|
||||
to read from the buffer, if its page is empty (like it is on start up)
|
||||
it will swap its page with the head_page. The old reader page will
|
||||
become part of the ring buffer and the head_page will be removed.
|
||||
The page after the inserted page (old reader_page) will become the
|
||||
new head page.
|
||||
|
||||
Once the new page is given to the reader, the reader could do what
|
||||
it wants with it, as long as a writer has left that page.
|
||||
|
||||
A sample of how the reader page is swapped: Note this does not
|
||||
show the head page in the buffer, it is for demonstrating a swap
|
||||
only.
|
||||
|
||||
+------+
|
||||
|reader| RING BUFFER
|
||||
|page |
|
||||
+------+
|
||||
+---+ +---+ +---+
|
||||
| |-->| |-->| |
|
||||
| |<--| |<--| |
|
||||
+---+ +---+ +---+
|
||||
^ | ^ |
|
||||
| +-------------+ |
|
||||
+-----------------+
|
||||
|
||||
|
||||
+------+
|
||||
|reader| RING BUFFER
|
||||
|page |-------------------+
|
||||
+------+ v
|
||||
| +---+ +---+ +---+
|
||||
| | |-->| |-->| |
|
||||
| | |<--| |<--| |<-+
|
||||
| +---+ +---+ +---+ |
|
||||
| ^ | ^ | |
|
||||
| | +-------------+ | |
|
||||
| +-----------------+ |
|
||||
+------------------------------------+
|
||||
|
||||
+------+
|
||||
|reader| RING BUFFER
|
||||
|page |-------------------+
|
||||
+------+ <---------------+ v
|
||||
| ^ +---+ +---+ +---+
|
||||
| | | |-->| |-->| |
|
||||
| | | | | |<--| |<-+
|
||||
| | +---+ +---+ +---+ |
|
||||
| | | ^ | |
|
||||
| | +-------------+ | |
|
||||
| +-----------------------------+ |
|
||||
+------------------------------------+
|
||||
|
||||
+------+
|
||||
|buffer| RING BUFFER
|
||||
|page |-------------------+
|
||||
+------+ <---------------+ v
|
||||
| ^ +---+ +---+ +---+
|
||||
| | | | | |-->| |
|
||||
| | New | | | |<--| |<-+
|
||||
| | Reader +---+ +---+ +---+ |
|
||||
| | page ----^ | |
|
||||
| | | |
|
||||
| +-----------------------------+ |
|
||||
+------------------------------------+
|
||||
|
||||
|
||||
|
||||
It is possible that the page swapped is the commit page and the tail page,
|
||||
if what is in the ring buffer is less than what is held in a buffer page.
|
||||
|
||||
|
||||
reader page commit page tail page
|
||||
| | |
|
||||
v | |
|
||||
+---+ | |
|
||||
| |<----------+ |
|
||||
| |<------------------------+
|
||||
| |------+
|
||||
+---+ |
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
This case is still valid for this algorithm.
|
||||
When the writer leaves the page, it simply goes into the ring buffer
|
||||
since the reader page still points to the next location in the ring
|
||||
buffer.
|
||||
|
||||
|
||||
The main pointers:
|
||||
|
||||
reader page - The page used solely by the reader and is not part
|
||||
of the ring buffer (may be swapped in)
|
||||
|
||||
head page - the next page in the ring buffer that will be swapped
|
||||
with the reader page.
|
||||
|
||||
tail page - the page where the next write will take place.
|
||||
|
||||
commit page - the page that last finished a write.
|
||||
|
||||
The commit page only is updated by the outer most writer in the
|
||||
writer stack. A writer that preempts another writer will not move the
|
||||
commit page.
|
||||
|
||||
When data is written into the ring buffer, a position is reserved
|
||||
in the ring buffer and passed back to the writer. When the writer
|
||||
is finished writing data into that position, it commits the write.
|
||||
|
||||
Another write (or a read) may take place at anytime during this
|
||||
transaction. If another write happens it must finish before continuing
|
||||
with the previous write.
|
||||
|
||||
|
||||
Write reserve:
|
||||
|
||||
Buffer page
|
||||
+---------+
|
||||
|written |
|
||||
+---------+ <--- given back to writer (current commit)
|
||||
|reserved |
|
||||
+---------+ <--- tail pointer
|
||||
| empty |
|
||||
+---------+
|
||||
|
||||
Write commit:
|
||||
|
||||
Buffer page
|
||||
+---------+
|
||||
|written |
|
||||
+---------+
|
||||
|written |
|
||||
+---------+ <--- next positon for write (current commit)
|
||||
| empty |
|
||||
+---------+
|
||||
|
||||
|
||||
If a write happens after the first reserve:
|
||||
|
||||
Buffer page
|
||||
+---------+
|
||||
|written |
|
||||
+---------+ <-- current commit
|
||||
|reserved |
|
||||
+---------+ <--- given back to second writer
|
||||
|reserved |
|
||||
+---------+ <--- tail pointer
|
||||
|
||||
After second writer commits:
|
||||
|
||||
|
||||
Buffer page
|
||||
+---------+
|
||||
|written |
|
||||
+---------+ <--(last full commit)
|
||||
|reserved |
|
||||
+---------+
|
||||
|pending |
|
||||
|commit |
|
||||
+---------+ <--- tail pointer
|
||||
|
||||
When the first writer commits:
|
||||
|
||||
Buffer page
|
||||
+---------+
|
||||
|written |
|
||||
+---------+
|
||||
|written |
|
||||
+---------+
|
||||
|written |
|
||||
+---------+ <--(last full commit and tail pointer)
|
||||
|
||||
|
||||
The commit pointer points to the last write location that was
|
||||
committed without preempting another write. When a write that
|
||||
preempted another write is committed, it only becomes a pending commit
|
||||
and will not be a full commit till all writes have been committed.
|
||||
|
||||
The commit page points to the page that has the last full commit.
|
||||
The tail page points to the page with the last write (before
|
||||
committing).
|
||||
|
||||
The tail page is always equal to or after the commit page. It may
|
||||
be several pages ahead. If the tail page catches up to the commit
|
||||
page then no more writes may take place (regardless of the mode
|
||||
of the ring buffer: overwrite and produce/consumer).
|
||||
|
||||
The order of pages are:
|
||||
|
||||
head page
|
||||
commit page
|
||||
tail page
|
||||
|
||||
Possible scenario:
|
||||
tail page
|
||||
head page commit page |
|
||||
| | |
|
||||
v v v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
There is a special case that the head page is after either the commit page
|
||||
and possibly the tail page. That is when the commit (and tail) page has been
|
||||
swapped with the reader page. This is because the head page is always
|
||||
part of the ring buffer, but the reader page is not. When ever there
|
||||
has been less than a full page that has been committed inside the ring buffer,
|
||||
and a reader swaps out a page, it will be swapping out the commit page.
|
||||
|
||||
|
||||
reader page commit page tail page
|
||||
| | |
|
||||
v | |
|
||||
+---+ | |
|
||||
| |<----------+ |
|
||||
| |<------------------------+
|
||||
| |------+
|
||||
+---+ |
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
^
|
||||
|
|
||||
head page
|
||||
|
||||
|
||||
In this case, the head page will not move when the tail and commit
|
||||
move back into the ring buffer.
|
||||
|
||||
The reader can not swap a page into the ring buffer if the commit page
|
||||
is still on that page. If the read meets the last commit (real commit
|
||||
not pending or reserved), then there is nothing more to read.
|
||||
The buffer is considered empty until another full commit finishes.
|
||||
|
||||
When the tail meets the head page, if the buffer is in overwrite mode,
|
||||
the head page will be pushed ahead one. If the buffer is in producer/consumer
|
||||
mode, the write will fail.
|
||||
|
||||
Overwrite mode:
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
^
|
||||
|
|
||||
head page
|
||||
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
^
|
||||
|
|
||||
head page
|
||||
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
^
|
||||
|
|
||||
head page
|
||||
|
||||
Note, the reader page will still point to the previous head page.
|
||||
But when a swap takes place, it will use the most recent head page.
|
||||
|
||||
|
||||
Making the Ring Buffer Lockless:
|
||||
--------------------------------
|
||||
|
||||
The main idea behind the lockless algorithm is to combine the moving
|
||||
of the head_page pointer with the swapping of pages with the reader.
|
||||
State flags are placed inside the pointer to the page. To do this,
|
||||
each page must be aligned in memory by 4 bytes. This will allow the 2
|
||||
least significant bits of the address to be used as flags. Since
|
||||
they will always be zero for the address. To get the address,
|
||||
simply mask out the flags.
|
||||
|
||||
MASK = ~3
|
||||
|
||||
address & MASK
|
||||
|
||||
Two flags will be kept by these two bits:
|
||||
|
||||
HEADER - the page being pointed to is a head page
|
||||
|
||||
UPDATE - the page being pointed to is being updated by a writer
|
||||
and was or is about to be a head page.
|
||||
|
||||
|
||||
reader page
|
||||
|
|
||||
v
|
||||
+---+
|
||||
| |------+
|
||||
+---+ |
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-H->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
|
||||
The above pointer "-H->" would have the HEADER flag set. That is
|
||||
the next page is the next page to be swapped out by the reader.
|
||||
This pointer means the next page is the head page.
|
||||
|
||||
When the tail page meets the head pointer, it will use cmpxchg to
|
||||
change the pointer to the UPDATE state:
|
||||
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-H->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
"-U->" represents a pointer in the UPDATE state.
|
||||
|
||||
Any access to the reader will need to take some sort of lock to serialize
|
||||
the readers. But the writers will never take a lock to write to the
|
||||
ring buffer. This means we only need to worry about a single reader,
|
||||
and writes only preempt in "stack" formation.
|
||||
|
||||
When the reader tries to swap the page with the ring buffer, it
|
||||
will also use cmpxchg. If the flag bit in the pointer to the
|
||||
head page does not have the HEADER flag set, the compare will fail
|
||||
and the reader will need to look for the new head page and try again.
|
||||
Note, the flag UPDATE and HEADER are never set at the same time.
|
||||
|
||||
The reader swaps the reader page as follows:
|
||||
|
||||
+------+
|
||||
|reader| RING BUFFER
|
||||
|page |
|
||||
+------+
|
||||
+---+ +---+ +---+
|
||||
| |--->| |--->| |
|
||||
| |<---| |<---| |
|
||||
+---+ +---+ +---+
|
||||
^ | ^ |
|
||||
| +---------------+ |
|
||||
+-----H-------------+
|
||||
|
||||
The reader sets the reader page next pointer as HEADER to the page after
|
||||
the head page.
|
||||
|
||||
|
||||
+------+
|
||||
|reader| RING BUFFER
|
||||
|page |-------H-----------+
|
||||
+------+ v
|
||||
| +---+ +---+ +---+
|
||||
| | |--->| |--->| |
|
||||
| | |<---| |<---| |<-+
|
||||
| +---+ +---+ +---+ |
|
||||
| ^ | ^ | |
|
||||
| | +---------------+ | |
|
||||
| +-----H-------------+ |
|
||||
+--------------------------------------+
|
||||
|
||||
It does a cmpxchg with the pointer to the previous head page to make it
|
||||
point to the reader page. Note that the new pointer does not have the HEADER
|
||||
flag set. This action atomically moves the head page forward.
|
||||
|
||||
+------+
|
||||
|reader| RING BUFFER
|
||||
|page |-------H-----------+
|
||||
+------+ v
|
||||
| ^ +---+ +---+ +---+
|
||||
| | | |-->| |-->| |
|
||||
| | | |<--| |<--| |<-+
|
||||
| | +---+ +---+ +---+ |
|
||||
| | | ^ | |
|
||||
| | +-------------+ | |
|
||||
| +-----------------------------+ |
|
||||
+------------------------------------+
|
||||
|
||||
After the new head page is set, the previous pointer of the head page is
|
||||
updated to the reader page.
|
||||
|
||||
+------+
|
||||
|reader| RING BUFFER
|
||||
|page |-------H-----------+
|
||||
+------+ <---------------+ v
|
||||
| ^ +---+ +---+ +---+
|
||||
| | | |-->| |-->| |
|
||||
| | | | | |<--| |<-+
|
||||
| | +---+ +---+ +---+ |
|
||||
| | | ^ | |
|
||||
| | +-------------+ | |
|
||||
| +-----------------------------+ |
|
||||
+------------------------------------+
|
||||
|
||||
+------+
|
||||
|buffer| RING BUFFER
|
||||
|page |-------H-----------+ <--- New head page
|
||||
+------+ <---------------+ v
|
||||
| ^ +---+ +---+ +---+
|
||||
| | | | | |-->| |
|
||||
| | New | | | |<--| |<-+
|
||||
| | Reader +---+ +---+ +---+ |
|
||||
| | page ----^ | |
|
||||
| | | |
|
||||
| +-----------------------------+ |
|
||||
+------------------------------------+
|
||||
|
||||
Another important point. The page that the reader page points back to
|
||||
by its previous pointer (the one that now points to the new head page)
|
||||
never points back to the reader page. That is because the reader page is
|
||||
not part of the ring buffer. Traversing the ring buffer via the next pointers
|
||||
will always stay in the ring buffer. Traversing the ring buffer via the
|
||||
prev pointers may not.
|
||||
|
||||
Note, the way to determine a reader page is simply by examining the previous
|
||||
pointer of the page. If the next pointer of the previous page does not
|
||||
point back to the original page, then the original page is a reader page:
|
||||
|
||||
|
||||
+--------+
|
||||
| reader | next +----+
|
||||
| page |-------->| |<====== (buffer page)
|
||||
+--------+ +----+
|
||||
| | ^
|
||||
| v | next
|
||||
prev | +----+
|
||||
+------------->| |
|
||||
+----+
|
||||
|
||||
The way the head page moves forward:
|
||||
|
||||
When the tail page meets the head page and the buffer is in overwrite mode
|
||||
and more writes take place, the head page must be moved forward before the
|
||||
writer may move the tail page. The way this is done is that the writer
|
||||
performs a cmpxchg to convert the pointer to the head page from the HEADER
|
||||
flag to have the UPDATE flag set. Once this is done, the reader will
|
||||
not be able to swap the head page from the buffer, nor will it be able to
|
||||
move the head page, until the writer is finished with the move.
|
||||
|
||||
This eliminates any races that the reader can have on the writer. The reader
|
||||
must spin, and this is why the reader can not preempt the writer.
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-H->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
The following page will be made into the new head page.
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |-H->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
After the new head page has been set, we can set the old head page
|
||||
pointer back to NORMAL.
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |-H->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
After the head page has been moved, the tail page may now move forward.
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |-H->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
|
||||
The above are the trivial updates. Now for the more complex scenarios.
|
||||
|
||||
|
||||
As stated before, if enough writes preempt the first write, the
|
||||
tail page may make it all the way around the buffer and meet the commit
|
||||
page. At this time, we must start dropping writes (usually with some kind
|
||||
of warning to the user). But what happens if the commit was still on the
|
||||
reader page? The commit page is not part of the ring buffer. The tail page
|
||||
must account for this.
|
||||
|
||||
|
||||
reader page commit page
|
||||
| |
|
||||
v |
|
||||
+---+ |
|
||||
| |<----------+
|
||||
| |
|
||||
| |------+
|
||||
+---+ |
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-H->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
^
|
||||
|
|
||||
tail page
|
||||
|
||||
If the tail page were to simply push the head page forward, the commit when
|
||||
leaving the reader page would not be pointing to the correct page.
|
||||
|
||||
The solution to this is to test if the commit page is on the reader page
|
||||
before pushing the head page. If it is, then it can be assumed that the
|
||||
tail page wrapped the buffer, and we must drop new writes.
|
||||
|
||||
This is not a race condition, because the commit page can only be moved
|
||||
by the outter most writer (the writer that was preempted).
|
||||
This means that the commit will not move while a writer is moving the
|
||||
tail page. The reader can not swap the reader page if it is also being
|
||||
used as the commit page. The reader can simply check that the commit
|
||||
is off the reader page. Once the commit page leaves the reader page
|
||||
it will never go back on it unless a reader does another swap with the
|
||||
buffer page that is also the commit page.
|
||||
|
||||
|
||||
Nested writes
|
||||
-------------
|
||||
|
||||
In the pushing forward of the tail page we must first push forward
|
||||
the head page if the head page is the next page. If the head page
|
||||
is not the next page, the tail page is simply updated with a cmpxchg.
|
||||
|
||||
Only writers move the tail page. This must be done atomically to protect
|
||||
against nested writers.
|
||||
|
||||
temp_page = tail_page
|
||||
next_page = temp_page->next
|
||||
cmpxchg(tail_page, temp_page, next_page)
|
||||
|
||||
The above will update the tail page if it is still pointing to the expected
|
||||
page. If this fails, a nested write pushed it forward, the the current write
|
||||
does not need to push it.
|
||||
|
||||
|
||||
temp page
|
||||
|
|
||||
v
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
Nested write comes in and moves the tail page forward:
|
||||
|
||||
tail page (moved by nested writer)
|
||||
temp page |
|
||||
| |
|
||||
v v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
The above would fail the cmpxchg, but since the tail page has already
|
||||
been moved forward, the writer will just try again to reserve storage
|
||||
on the new tail page.
|
||||
|
||||
But the moving of the head page is a bit more complex.
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-H->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
The write converts the head page pointer to UPDATE.
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
But if a nested writer preempts here. It will see that the next
|
||||
page is a head page, but it is also nested. It will detect that
|
||||
it is nested and will save that information. The detection is the
|
||||
fact that it sees the UPDATE flag instead of a HEADER or NORMAL
|
||||
pointer.
|
||||
|
||||
The nested writer will set the new head page pointer.
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |-H->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
But it will not reset the update back to normal. Only the writer
|
||||
that converted a pointer from HEAD to UPDATE will convert it back
|
||||
to NORMAL.
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |-H->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
After the nested writer finishes, the outer most writer will convert
|
||||
the UPDATE pointer to NORMAL.
|
||||
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |-H->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
|
||||
It can be even more complex if several nested writes came in and moved
|
||||
the tail page ahead several pages:
|
||||
|
||||
|
||||
(first writer)
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-H->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
The write converts the head page pointer to UPDATE.
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |--->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
Next writer comes in, and sees the update and sets up the new
|
||||
head page.
|
||||
|
||||
(second writer)
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |-H->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
The nested writer moves the tail page forward. But does not set the old
|
||||
update page to NORMAL because it is not the outer most writer.
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |-H->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
Another writer preempts and sees the page after the tail page is a head page.
|
||||
It changes it from HEAD to UPDATE.
|
||||
|
||||
(third writer)
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |-U->| |--->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
The writer will move the head page forward:
|
||||
|
||||
|
||||
(third writer)
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |-U->| |-H->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
But now that the third writer did change the HEAD flag to UPDATE it
|
||||
will convert it to normal:
|
||||
|
||||
|
||||
(third writer)
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |--->| |-H->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
|
||||
Then it will move the tail page, and return back to the second writer.
|
||||
|
||||
|
||||
(second writer)
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |--->| |-H->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
|
||||
The second writer will fail to move the tail page because it was already
|
||||
moved, so it will try again and add its data to the new tail page.
|
||||
It will return to the first writer.
|
||||
|
||||
|
||||
(first writer)
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |--->| |-H->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
The first writer can not know atomically test if the tail page moved
|
||||
while it updates the HEAD page. It will then update the head page to
|
||||
what it thinks is the new head page.
|
||||
|
||||
|
||||
(first writer)
|
||||
|
||||
tail page
|
||||
|
|
||||
v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |-H->| |-H->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
Since the cmpxchg returns the old value of the pointer the first writer
|
||||
will see it succeeded in updating the pointer from NORMAL to HEAD.
|
||||
But as we can see, this is not good enough. It must also check to see
|
||||
if the tail page is either where it use to be or on the next page:
|
||||
|
||||
|
||||
(first writer)
|
||||
|
||||
A B tail page
|
||||
| | |
|
||||
v v v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |-H->| |-H->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
If tail page != A and tail page does not equal B, then it must reset the
|
||||
pointer back to NORMAL. The fact that it only needs to worry about
|
||||
nested writers, it only needs to check this after setting the HEAD page.
|
||||
|
||||
|
||||
(first writer)
|
||||
|
||||
A B tail page
|
||||
| | |
|
||||
v v v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |-U->| |--->| |-H->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
||||
Now the writer can update the head page. This is also why the head page must
|
||||
remain in UPDATE and only reset by the outer most writer. This prevents
|
||||
the reader from seeing the incorrect head page.
|
||||
|
||||
|
||||
(first writer)
|
||||
|
||||
A B tail page
|
||||
| | |
|
||||
v v v
|
||||
+---+ +---+ +---+ +---+
|
||||
<---| |--->| |--->| |--->| |-H->
|
||||
--->| |<---| |<---| |<---| |<---
|
||||
+---+ +---+ +---+ +---+
|
||||
|
194
Documentation/vgaarbiter.txt
Normal file
194
Documentation/vgaarbiter.txt
Normal file
@ -0,0 +1,194 @@
|
||||
|
||||
VGA Arbiter
|
||||
===========
|
||||
|
||||
Graphic devices are accessed through ranges in I/O or memory space. While most
|
||||
modern devices allow relocation of such ranges, some "Legacy" VGA devices
|
||||
implemented on PCI will typically have the same "hard-decoded" addresses as
|
||||
they did on ISA. For more details see "PCI Bus Binding to IEEE Std 1275-1994
|
||||
Standard for Boot (Initialization Configuration) Firmware Revision 2.1"
|
||||
Section 7, Legacy Devices.
|
||||
|
||||
The Resource Access Control (RAC) module inside the X server [0] existed for
|
||||
the legacy VGA arbitration task (besides other bus management tasks) when more
|
||||
than one legacy device co-exists on the same machine. But the problem happens
|
||||
when these devices are trying to be accessed by different userspace clients
|
||||
(e.g. two server in parallel). Their address assignments conflict. Moreover,
|
||||
ideally, being an userspace application, it is not the role of the the X
|
||||
server to control bus resources. Therefore an arbitration scheme outside of
|
||||
the X server is needed to control the sharing of these resources. This
|
||||
document introduces the operation of the VGA arbiter implemented for Linux
|
||||
kernel.
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
I. Details and Theory of Operation
|
||||
I.1 vgaarb
|
||||
I.2 libpciaccess
|
||||
I.3 xf86VGAArbiter (X server implementation)
|
||||
II. Credits
|
||||
III.References
|
||||
|
||||
|
||||
I. Details and Theory of Operation
|
||||
==================================
|
||||
|
||||
I.1 vgaarb
|
||||
----------
|
||||
|
||||
The vgaarb is a module of the Linux Kernel. When it is initially loaded, it
|
||||
scans all PCI devices and adds the VGA ones inside the arbitration. The
|
||||
arbiter then enables/disables the decoding on different devices of the VGA
|
||||
legacy instructions. Device which do not want/need to use the arbiter may
|
||||
explicitly tell it by calling vga_set_legacy_decoding().
|
||||
|
||||
The kernel exports a char device interface (/dev/vga_arbiter) to the clients,
|
||||
which has the following semantics:
|
||||
|
||||
open : open user instance of the arbiter. By default, it's attached to
|
||||
the default VGA device of the system.
|
||||
|
||||
close : close user instance. Release locks made by the user
|
||||
|
||||
read : return a string indicating the status of the target like:
|
||||
|
||||
"<card_ID>,decodes=<io_state>,owns=<io_state>,locks=<io_state> (ic,mc)"
|
||||
|
||||
An IO state string is of the form {io,mem,io+mem,none}, mc and
|
||||
ic are respectively mem and io lock counts (for debugging/
|
||||
diagnostic only). "decodes" indicate what the card currently
|
||||
decodes, "owns" indicates what is currently enabled on it, and
|
||||
"locks" indicates what is locked by this card. If the card is
|
||||
unplugged, we get "invalid" then for card_ID and an -ENODEV
|
||||
error is returned for any command until a new card is targeted.
|
||||
|
||||
|
||||
write : write a command to the arbiter. List of commands:
|
||||
|
||||
target <card_ID> : switch target to card <card_ID> (see below)
|
||||
lock <io_state> : acquires locks on target ("none" is an invalid io_state)
|
||||
trylock <io_state> : non-blocking acquire locks on target (returns EBUSY if
|
||||
unsuccessful)
|
||||
unlock <io_state> : release locks on target
|
||||
unlock all : release all locks on target held by this user (not
|
||||
implemented yet)
|
||||
decodes <io_state> : set the legacy decoding attributes for the card
|
||||
|
||||
poll : event if something changes on any card (not just the
|
||||
target)
|
||||
|
||||
card_ID is of the form "PCI:domain:bus:dev.fn". It can be set to "default"
|
||||
to go back to the system default card (TODO: not implemented yet). Currently,
|
||||
only PCI is supported as a prefix, but the userland API may support other bus
|
||||
types in the future, even if the current kernel implementation doesn't.
|
||||
|
||||
Note about locks:
|
||||
|
||||
The driver keeps track of which user has which locks on which card. It
|
||||
supports stacking, like the kernel one. This complexifies the implementation
|
||||
a bit, but makes the arbiter more tolerant to user space problems and able
|
||||
to properly cleanup in all cases when a process dies.
|
||||
Currently, a max of 16 cards can have locks simultaneously issued from
|
||||
user space for a given user (file descriptor instance) of the arbiter.
|
||||
|
||||
In the case of devices hot-{un,}plugged, there is a hook - pci_notify() - to
|
||||
notify them being added/removed in the system and automatically added/removed
|
||||
in the arbiter.
|
||||
|
||||
There's also a in-kernel API of the arbiter in the case of DRM, vgacon and
|
||||
others which may use the arbiter.
|
||||
|
||||
|
||||
I.2 libpciaccess
|
||||
----------------
|
||||
|
||||
To use the vga arbiter char device it was implemented an API inside the
|
||||
libpciaccess library. One fieldd was added to struct pci_device (each device
|
||||
on the system):
|
||||
|
||||
/* the type of resource decoded by the device */
|
||||
int vgaarb_rsrc;
|
||||
|
||||
Besides it, in pci_system were added:
|
||||
|
||||
int vgaarb_fd;
|
||||
int vga_count;
|
||||
struct pci_device *vga_target;
|
||||
struct pci_device *vga_default_dev;
|
||||
|
||||
|
||||
The vga_count is usually need to keep informed how many cards are being
|
||||
arbitrated, so for instance if there's only one then it can totally escape the
|
||||
scheme.
|
||||
|
||||
|
||||
These functions below acquire VGA resources for the given card and mark those
|
||||
resources as locked. If the resources requested are "normal" (and not legacy)
|
||||
resources, the arbiter will first check whether the card is doing legacy
|
||||
decoding for that type of resource. If yes, the lock is "converted" into a
|
||||
legacy resource lock. The arbiter will first look for all VGA cards that
|
||||
might conflict and disable their IOs and/or Memory access, including VGA
|
||||
forwarding on P2P bridges if necessary, so that the requested resources can
|
||||
be used. Then, the card is marked as locking these resources and the IO and/or
|
||||
Memory access is enabled on the card (including VGA forwarding on parent
|
||||
P2P bridges if any). In the case of vga_arb_lock(), the function will block
|
||||
if some conflicting card is already locking one of the required resources (or
|
||||
any resource on a different bus segment, since P2P bridges don't differentiate
|
||||
VGA memory and IO afaik). If the card already owns the resources, the function
|
||||
succeeds. vga_arb_trylock() will return (-EBUSY) instead of blocking. Nested
|
||||
calls are supported (a per-resource counter is maintained).
|
||||
|
||||
|
||||
Set the target device of this client.
|
||||
int pci_device_vgaarb_set_target (struct pci_device *dev);
|
||||
|
||||
|
||||
For instance, in x86 if two devices on the same bus want to lock different
|
||||
resources, both will succeed (lock). If devices are in different buses and
|
||||
trying to lock different resources, only the first who tried succeeds.
|
||||
int pci_device_vgaarb_lock (void);
|
||||
int pci_device_vgaarb_trylock (void);
|
||||
|
||||
Unlock resources of device.
|
||||
int pci_device_vgaarb_unlock (void);
|
||||
|
||||
Indicates to the arbiter if the card decodes legacy VGA IOs, legacy VGA
|
||||
Memory, both, or none. All cards default to both, the card driver (fbdev for
|
||||
example) should tell the arbiter if it has disabled legacy decoding, so the
|
||||
card can be left out of the arbitration process (and can be safe to take
|
||||
interrupts at any time.
|
||||
int pci_device_vgaarb_decodes (int new_vgaarb_rsrc);
|
||||
|
||||
Connects to the arbiter device, allocates the struct
|
||||
int pci_device_vgaarb_init (void);
|
||||
|
||||
Close the connection
|
||||
void pci_device_vgaarb_fini (void);
|
||||
|
||||
|
||||
I.3 xf86VGAArbiter (X server implementation)
|
||||
--------------------------------------------
|
||||
|
||||
(TODO)
|
||||
|
||||
X server basically wraps all the functions that touch VGA registers somehow.
|
||||
|
||||
|
||||
II. Credits
|
||||
===========
|
||||
|
||||
Benjamin Herrenschmidt (IBM?) started this work when he discussed such design
|
||||
with the Xorg community in 2005 [1, 2]. In the end of 2007, Paulo Zanoni and
|
||||
Tiago Vignatti (both of C3SL/Federal University of Paraná) proceeded his work
|
||||
enhancing the kernel code to adapt as a kernel module and also did the
|
||||
implementation of the user space side [3]. Now (2009) Tiago Vignatti and Dave
|
||||
Airlie finally put this work in shape and queued to Jesse Barnes' PCI tree.
|
||||
|
||||
|
||||
III. References
|
||||
==============
|
||||
|
||||
[0] http://cgit.freedesktop.org/xorg/xserver/commit/?id=4b42448a2388d40f257774fbffdccaea87bd0347
|
||||
[1] http://lists.freedesktop.org/archives/xorg/2005-March/006663.html
|
||||
[2] http://lists.freedesktop.org/archives/xorg/2005-March/006745.html
|
||||
[3] http://lists.freedesktop.org/archives/xorg/2007-October/029507.html
|
@ -21,3 +21,5 @@
|
||||
20 -> Hauppauge WinTV-HVR1255 [0070:2251]
|
||||
21 -> Hauppauge WinTV-HVR1210 [0070:2291,0070:2295]
|
||||
22 -> Mygica X8506 DMB-TH [14f1:8651]
|
||||
23 -> Magic-Pro ProHDTV Extreme 2 [14f1:8657]
|
||||
24 -> Hauppauge WinTV-HVR1850 [0070:8541]
|
||||
|
@ -80,3 +80,4 @@
|
||||
79 -> Terratec Cinergy HT PCI MKII [153b:1177]
|
||||
80 -> Hauppauge WinTV-IR Only [0070:9290]
|
||||
81 -> Leadtek WinFast DTV1800 Hybrid [107d:6654]
|
||||
82 -> WinFast DTV2000 H rev. J [107d:6f2b]
|
||||
|
@ -1,5 +1,5 @@
|
||||
0 -> Unknown EM2800 video grabber (em2800) [eb1a:2800]
|
||||
1 -> Unknown EM2750/28xx video grabber (em2820/em2840) [eb1a:2820,eb1a:2821,eb1a:2860,eb1a:2861,eb1a:2870,eb1a:2881,eb1a:2883]
|
||||
1 -> Unknown EM2750/28xx video grabber (em2820/em2840) [eb1a:2710,eb1a:2820,eb1a:2821,eb1a:2860,eb1a:2861,eb1a:2870,eb1a:2881,eb1a:2883]
|
||||
2 -> Terratec Cinergy 250 USB (em2820/em2840) [0ccd:0036]
|
||||
3 -> Pinnacle PCTV USB 2 (em2820/em2840) [2304:0208]
|
||||
4 -> Hauppauge WinTV USB 2 (em2820/em2840) [2040:4200,2040:4201]
|
||||
@ -7,7 +7,7 @@
|
||||
6 -> Terratec Cinergy 200 USB (em2800)
|
||||
7 -> Leadtek Winfast USB II (em2800) [0413:6023]
|
||||
8 -> Kworld USB2800 (em2800)
|
||||
9 -> Pinnacle Dazzle DVC 90/100/101/107 / Kaiser Baas Video to DVD maker (em2820/em2840) [1b80:e302,2304:0207,2304:021a]
|
||||
9 -> Pinnacle Dazzle DVC 90/100/101/107 / Kaiser Baas Video to DVD maker (em2820/em2840) [1b80:e302,1b80:e304,2304:0207,2304:021a]
|
||||
10 -> Hauppauge WinTV HVR 900 (em2880) [2040:6500]
|
||||
11 -> Terratec Hybrid XS (em2880) [0ccd:0042]
|
||||
12 -> Kworld PVR TV 2800 RF (em2820/em2840)
|
||||
@ -20,7 +20,7 @@
|
||||
19 -> EM2860/SAA711X Reference Design (em2860)
|
||||
20 -> AMD ATI TV Wonder HD 600 (em2880) [0438:b002]
|
||||
21 -> eMPIA Technology, Inc. GrabBeeX+ Video Encoder (em2800) [eb1a:2801]
|
||||
22 -> Unknown EM2750/EM2751 webcam grabber (em2750) [eb1a:2750,eb1a:2751]
|
||||
22 -> EM2710/EM2750/EM2751 webcam grabber (em2750) [eb1a:2750,eb1a:2751]
|
||||
23 -> Huaqi DLCW-130 (em2750)
|
||||
24 -> D-Link DUB-T210 TV Tuner (em2820/em2840) [2001:f112]
|
||||
25 -> Gadmei UTV310 (em2820/em2840)
|
||||
@ -33,7 +33,7 @@
|
||||
34 -> Terratec Cinergy A Hybrid XS (em2860) [0ccd:004f]
|
||||
35 -> Typhoon DVD Maker (em2860)
|
||||
36 -> NetGMBH Cam (em2860)
|
||||
37 -> Gadmei UTV330 (em2860)
|
||||
37 -> Gadmei UTV330 (em2860) [eb1a:50a6]
|
||||
38 -> Yakumo MovieMixer (em2861)
|
||||
39 -> KWorld PVRTV 300U (em2861) [eb1a:e300]
|
||||
40 -> Plextor ConvertX PX-TV100U (em2861) [093b:a005]
|
||||
@ -67,3 +67,4 @@
|
||||
69 -> KWorld ATSC 315U HDTV TV Box (em2882) [eb1a:a313]
|
||||
70 -> Evga inDtube (em2882)
|
||||
71 -> Silvercrest Webcam 1.3mpix (em2820/em2840)
|
||||
72 -> Gadmei UTV330+ (em2861)
|
||||
|
@ -153,8 +153,8 @@
|
||||
152 -> Asus Tiger Rev:1.00 [1043:4857]
|
||||
153 -> Kworld Plus TV Analog Lite PCI [17de:7128]
|
||||
154 -> Avermedia AVerTV GO 007 FM Plus [1461:f31d]
|
||||
155 -> Hauppauge WinTV-HVR1120 ATSC/QAM-Hybrid [0070:6706,0070:6708]
|
||||
156 -> Hauppauge WinTV-HVR1110r3 DVB-T/Hybrid [0070:6707,0070:6709,0070:670a]
|
||||
155 -> Hauppauge WinTV-HVR1150 ATSC/QAM-Hybrid [0070:6706,0070:6708]
|
||||
156 -> Hauppauge WinTV-HVR1120 DVB-T/Hybrid [0070:6707,0070:6709,0070:670a]
|
||||
157 -> Avermedia AVerTV Studio 507UA [1461:a11b]
|
||||
158 -> AVerMedia Cardbus TV/Radio (E501R) [1461:b7e9]
|
||||
159 -> Beholder BeholdTV 505 RDS [0000:505B]
|
||||
@ -167,3 +167,7 @@
|
||||
166 -> Beholder BeholdTV 607 RDS [5ace:6073]
|
||||
167 -> Beholder BeholdTV 609 RDS [5ace:6092]
|
||||
168 -> Beholder BeholdTV 609 RDS [5ace:6093]
|
||||
169 -> Compro VideoMate S350/S300 [185b:c900]
|
||||
170 -> AverMedia AverTV Studio 505 [1461:a115]
|
||||
171 -> Beholder BeholdTV X7 [5ace:7595]
|
||||
172 -> RoverMedia TV Link Pro FM [19d1:0138]
|
||||
|
@ -78,3 +78,4 @@ tuner=77 - TCL tuner MF02GIP-5N-E
|
||||
tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner
|
||||
tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
|
||||
tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough
|
||||
tuner=81 - Partsnic (Daewoo) PTI-5NF05
|
||||
|
@ -18,8 +18,8 @@ Table of Contents
|
||||
|
||||
1.0 Introduction
|
||||
|
||||
The file ../drivers/char/c-qcam.c is a device driver for the
|
||||
Logitech (nee Connectix) parallel port interface color CCD camera.
|
||||
The file ../../drivers/media/video/c-qcam.c is a device driver for
|
||||
the Logitech (nee Connectix) parallel port interface color CCD camera.
|
||||
This is a fairly inexpensive device for capturing images. Logitech
|
||||
does not currently provide information for developers, but many people
|
||||
have engineered several solutions for non-Microsoft use of the Color
|
||||
|
@ -44,7 +44,9 @@ zc3xx 0458:7007 Genius VideoCam V2
|
||||
zc3xx 0458:700c Genius VideoCam V3
|
||||
zc3xx 0458:700f Genius VideoCam Web V2
|
||||
sonixj 0458:7025 Genius Eye 311Q
|
||||
sn9c20x 0458:7029 Genius Look 320s
|
||||
sonixj 0458:702e Genius Slim 310 NB
|
||||
sn9c20x 045e:00f4 LifeCam VX-6000 (SN9C20x + OV9650)
|
||||
sonixj 045e:00f5 MicroSoft VX3000
|
||||
sonixj 045e:00f7 MicroSoft VX1000
|
||||
ov519 045e:028c Micro$oft xbox cam
|
||||
@ -138,6 +140,7 @@ spca500 04fc:7333 PalmPixDC85
|
||||
sunplus 04fc:ffff Pure DigitalDakota
|
||||
spca501 0506:00df 3Com HomeConnect Lite
|
||||
sunplus 052b:1513 Megapix V4
|
||||
sunplus 052b:1803 MegaImage VI
|
||||
tv8532 0545:808b Veo Stingray
|
||||
tv8532 0545:8333 Veo Stingray
|
||||
sunplus 0546:3155 Polaroid PDC3070
|
||||
@ -180,6 +183,7 @@ ov534 06f8:3002 Hercules Blog Webcam
|
||||
ov534 06f8:3003 Hercules Dualpix HD Weblog
|
||||
sonixj 06f8:3004 Hercules Classic Silver
|
||||
sonixj 06f8:3008 Hercules Deluxe Optical Glass
|
||||
pac7311 06f8:3009 Hercules Classic Link
|
||||
spca508 0733:0110 ViewQuest VQ110
|
||||
spca508 0130:0130 Clone Digital Webcam 11043
|
||||
spca501 0733:0401 Intel Create and Share
|
||||
@ -233,8 +237,10 @@ pac7311 093a:2621 PAC731x
|
||||
pac7311 093a:2622 Genius Eye 312
|
||||
pac7311 093a:2624 PAC7302
|
||||
pac7311 093a:2626 Labtec 2200
|
||||
pac7311 093a:2629 Genious iSlim 300
|
||||
pac7311 093a:262a Webcam 300k
|
||||
pac7311 093a:262c Philips SPC 230 NC
|
||||
jeilinj 0979:0280 Sakar 57379
|
||||
zc3xx 0ac8:0302 Z-star Vimicro zc0302
|
||||
vc032x 0ac8:0321 Vimicro generic vc0321
|
||||
vc032x 0ac8:0323 Vimicro Vc0323
|
||||
@ -245,6 +251,7 @@ zc3xx 0ac8:305b Z-star Vimicro zc0305b
|
||||
zc3xx 0ac8:307b Ldlc VC302+Ov7620
|
||||
vc032x 0ac8:c001 Sony embedded vimicro
|
||||
vc032x 0ac8:c002 Sony embedded vimicro
|
||||
vc032x 0ac8:c301 Samsung Q1 Ultra Premium
|
||||
spca508 0af9:0010 Hama USB Sightcam 100
|
||||
spca508 0af9:0011 Hama USB Sightcam 100
|
||||
sonixb 0c45:6001 Genius VideoCAM NB
|
||||
@ -282,6 +289,29 @@ sonixj 0c45:613a Microdia Sonix PC Camera
|
||||
sonixj 0c45:613b Surfer SN-206
|
||||
sonixj 0c45:613c Sonix Pccam168
|
||||
sonixj 0c45:6143 Sonix Pccam168
|
||||
sonixj 0c45:6148 Digitus DA-70811/ZSMC USB PC Camera ZS211/Microdia
|
||||
sn9c20x 0c45:6240 PC Camera (SN9C201 + MT9M001)
|
||||
sn9c20x 0c45:6242 PC Camera (SN9C201 + MT9M111)
|
||||
sn9c20x 0c45:6248 PC Camera (SN9C201 + OV9655)
|
||||
sn9c20x 0c45:624e PC Camera (SN9C201 + SOI968)
|
||||
sn9c20x 0c45:624f PC Camera (SN9C201 + OV9650)
|
||||
sn9c20x 0c45:6251 PC Camera (SN9C201 + OV9650)
|
||||
sn9c20x 0c45:6253 PC Camera (SN9C201 + OV9650)
|
||||
sn9c20x 0c45:6260 PC Camera (SN9C201 + OV7670)
|
||||
sn9c20x 0c45:6270 PC Camera (SN9C201 + MT9V011/MT9V111/MT9V112)
|
||||
sn9c20x 0c45:627b PC Camera (SN9C201 + OV7660)
|
||||
sn9c20x 0c45:627c PC Camera (SN9C201 + HV7131R)
|
||||
sn9c20x 0c45:627f PC Camera (SN9C201 + OV9650)
|
||||
sn9c20x 0c45:6280 PC Camera (SN9C202 + MT9M001)
|
||||
sn9c20x 0c45:6282 PC Camera (SN9C202 + MT9M111)
|
||||
sn9c20x 0c45:6288 PC Camera (SN9C202 + OV9655)
|
||||
sn9c20x 0c45:628e PC Camera (SN9C202 + SOI968)
|
||||
sn9c20x 0c45:628f PC Camera (SN9C202 + OV9650)
|
||||
sn9c20x 0c45:62a0 PC Camera (SN9C202 + OV7670)
|
||||
sn9c20x 0c45:62b0 PC Camera (SN9C202 + MT9V011/MT9V111/MT9V112)
|
||||
sn9c20x 0c45:62b3 PC Camera (SN9C202 + OV9655)
|
||||
sn9c20x 0c45:62bb PC Camera (SN9C202 + OV7660)
|
||||
sn9c20x 0c45:62bc PC Camera (SN9C202 + HV7131R)
|
||||
sunplus 0d64:0303 Sunplus FashionCam DXG
|
||||
etoms 102c:6151 Qcam Sangha CIF
|
||||
etoms 102c:6251 Qcam xxxxxx VGA
|
||||
@ -290,6 +320,7 @@ spca561 10fd:7e50 FlyCam Usb 100
|
||||
zc3xx 10fd:8050 Typhoon Webshot II USB 300k
|
||||
ov534 1415:2000 Sony HD Eye for PS3 (SLEH 00201)
|
||||
pac207 145f:013a Trust WB-1300N
|
||||
sn9c20x 145f:013d Trust WB-3600R
|
||||
vc032x 15b8:6001 HP 2.0 Megapixel
|
||||
vc032x 15b8:6002 HP 2.0 Megapixel rz406aa
|
||||
spca501 1776:501c Arowana 300K CMOS Camera
|
||||
@ -300,4 +331,11 @@ spca500 2899:012c Toptro Industrial
|
||||
spca508 8086:0110 Intel Easy PC Camera
|
||||
spca500 8086:0630 Intel Pocket PC Camera
|
||||
spca506 99fa:8988 Grandtec V.cap
|
||||
sn9c20x a168:0610 Dino-Lite Digital Microscope (SN9C201 + HV7131R)
|
||||
sn9c20x a168:0611 Dino-Lite Digital Microscope (SN9C201 + HV7131R)
|
||||
sn9c20x a168:0613 Dino-Lite Digital Microscope (SN9C201 + HV7131R)
|
||||
sn9c20x a168:0618 Dino-Lite Digital Microscope (SN9C201 + HV7131R)
|
||||
sn9c20x a168:0614 Dino-Lite Digital Microscope (SN9C201 + MT9M111)
|
||||
sn9c20x a168:0615 Dino-Lite Digital Microscope (SN9C201 + MT9M111)
|
||||
sn9c20x a168:0617 Dino-Lite Digital Microscope (SN9C201 + MT9M111)
|
||||
spca561 abcd:cdee Petcam
|
||||
|
176
Documentation/video4linux/si4713.txt
Normal file
176
Documentation/video4linux/si4713.txt
Normal file
@ -0,0 +1,176 @@
|
||||
Driver for I2C radios for the Silicon Labs Si4713 FM Radio Transmitters
|
||||
|
||||
Copyright (c) 2009 Nokia Corporation
|
||||
Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
|
||||
|
||||
|
||||
Information about the Device
|
||||
============================
|
||||
This chip is a Silicon Labs product. It is a I2C device, currently on 0x63 address.
|
||||
Basically, it has transmission and signal noise level measurement features.
|
||||
|
||||
The Si4713 integrates transmit functions for FM broadcast stereo transmission.
|
||||
The chip also allows integrated receive power scanning to identify low signal
|
||||
power FM channels.
|
||||
|
||||
The chip is programmed using commands and responses. There are also several
|
||||
properties which can change the behavior of this chip.
|
||||
|
||||
Users must comply with local regulations on radio frequency (RF) transmission.
|
||||
|
||||
Device driver description
|
||||
=========================
|
||||
There are two modules to handle this device. One is a I2C device driver
|
||||
and the other is a platform driver.
|
||||
|
||||
The I2C device driver exports a v4l2-subdev interface to the kernel.
|
||||
All properties can also be accessed by v4l2 extended controls interface, by
|
||||
using the v4l2-subdev calls (g_ext_ctrls, s_ext_ctrls).
|
||||
|
||||
The platform device driver exports a v4l2 radio device interface to user land.
|
||||
So, it uses the I2C device driver as a sub device in order to send the user
|
||||
commands to the actual device. Basically it is a wrapper to the I2C device driver.
|
||||
|
||||
Applications can use v4l2 radio API to specify frequency of operation, mute state,
|
||||
etc. But mostly of its properties will be present in the extended controls.
|
||||
|
||||
When the v4l2 mute property is set to 1 (true), the driver will turn the chip off.
|
||||
|
||||
Properties description
|
||||
======================
|
||||
|
||||
The properties can be accessed using v4l2 extended controls.
|
||||
Here is an output from v4l2-ctl util:
|
||||
/ # v4l2-ctl -d /dev/radio0 --all -L
|
||||
Driver Info:
|
||||
Driver name : radio-si4713
|
||||
Card type : Silicon Labs Si4713 Modulator
|
||||
Bus info :
|
||||
Driver version: 0
|
||||
Capabilities : 0x00080800
|
||||
RDS Output
|
||||
Modulator
|
||||
Audio output: 0 (FM Modulator Audio Out)
|
||||
Frequency: 1408000 (88.000000 MHz)
|
||||
Video Standard = 0x00000000
|
||||
Modulator:
|
||||
Name : FM Modulator
|
||||
Capabilities : 62.5 Hz stereo rds
|
||||
Frequency range : 76.0 MHz - 108.0 MHz
|
||||
Subchannel modulation: stereo+rds
|
||||
|
||||
User Controls
|
||||
|
||||
mute (bool) : default=1 value=0
|
||||
|
||||
FM Radio Modulator Controls
|
||||
|
||||
rds_signal_deviation (int) : min=0 max=90000 step=10 default=200 value=200 flags=slider
|
||||
rds_program_id (int) : min=0 max=65535 step=1 default=0 value=0
|
||||
rds_program_type (int) : min=0 max=31 step=1 default=0 value=0
|
||||
rds_ps_name (str) : min=0 max=96 step=8 value='si4713 '
|
||||
rds_radio_text (str) : min=0 max=384 step=32 value=''
|
||||
audio_limiter_feature_enabled (bool) : default=1 value=1
|
||||
audio_limiter_release_time (int) : min=250 max=102390 step=50 default=5010 value=5010 flags=slider
|
||||
audio_limiter_deviation (int) : min=0 max=90000 step=10 default=66250 value=66250 flags=slider
|
||||
audio_compression_feature_enabl (bool) : default=1 value=1
|
||||
audio_compression_gain (int) : min=0 max=20 step=1 default=15 value=15 flags=slider
|
||||
audio_compression_threshold (int) : min=-40 max=0 step=1 default=-40 value=-40 flags=slider
|
||||
audio_compression_attack_time (int) : min=0 max=5000 step=500 default=0 value=0 flags=slider
|
||||
audio_compression_release_time (int) : min=100000 max=1000000 step=100000 default=1000000 value=1000000 flags=slider
|
||||
pilot_tone_feature_enabled (bool) : default=1 value=1
|
||||
pilot_tone_deviation (int) : min=0 max=90000 step=10 default=6750 value=6750 flags=slider
|
||||
pilot_tone_frequency (int) : min=0 max=19000 step=1 default=19000 value=19000 flags=slider
|
||||
pre_emphasis_settings (menu) : min=0 max=2 default=1 value=1
|
||||
tune_power_level (int) : min=0 max=120 step=1 default=88 value=88 flags=slider
|
||||
tune_antenna_capacitor (int) : min=0 max=191 step=1 default=0 value=110 flags=slider
|
||||
/ #
|
||||
|
||||
Here is a summary of them:
|
||||
|
||||
* Pilot is an audible tone sent by the device.
|
||||
|
||||
pilot_frequency - Configures the frequency of the stereo pilot tone.
|
||||
pilot_deviation - Configures pilot tone frequency deviation level.
|
||||
pilot_enabled - Enables or disables the pilot tone feature.
|
||||
|
||||
* The si4713 device is capable of applying audio compression to the transmitted signal.
|
||||
|
||||
acomp_enabled - Enables or disables the audio dynamic range control feature.
|
||||
acomp_gain - Sets the gain for audio dynamic range control.
|
||||
acomp_threshold - Sets the threshold level for audio dynamic range control.
|
||||
acomp_attack_time - Sets the attack time for audio dynamic range control.
|
||||
acomp_release_time - Sets the release time for audio dynamic range control.
|
||||
|
||||
* Limiter setups audio deviation limiter feature. Once a over deviation occurs,
|
||||
it is possible to adjust the front-end gain of the audio input and always
|
||||
prevent over deviation.
|
||||
|
||||
limiter_enabled - Enables or disables the limiter feature.
|
||||
limiter_deviation - Configures audio frequency deviation level.
|
||||
limiter_release_time - Sets the limiter release time.
|
||||
|
||||
* Tuning power
|
||||
|
||||
power_level - Sets the output power level for signal transmission.
|
||||
antenna_capacitor - This selects the value of antenna tuning capacitor manually
|
||||
or automatically if set to zero.
|
||||
|
||||
* RDS related
|
||||
|
||||
rds_ps_name - Sets the RDS ps name field for transmission.
|
||||
rds_radio_text - Sets the RDS radio text for transmission.
|
||||
rds_pi - Sets the RDS PI field for transmission.
|
||||
rds_pty - Sets the RDS PTY field for transmission.
|
||||
|
||||
* Region related
|
||||
|
||||
preemphasis - sets the preemphasis to be applied for transmission.
|
||||
|
||||
RNL
|
||||
===
|
||||
|
||||
This device also has an interface to measure received noise level. To do that, you should
|
||||
ioctl the device node. Here is an code of example:
|
||||
|
||||
int main (int argc, char *argv[])
|
||||
{
|
||||
struct si4713_rnl rnl;
|
||||
int fd = open("/dev/radio0", O_RDWR);
|
||||
int rval;
|
||||
|
||||
if (argc < 2)
|
||||
return -EINVAL;
|
||||
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
sscanf(argv[1], "%d", &rnl.frequency);
|
||||
|
||||
rval = ioctl(fd, SI4713_IOC_MEASURE_RNL, &rnl);
|
||||
if (rval < 0)
|
||||
return rval;
|
||||
|
||||
printf("received noise level: %d\n", rnl.rnl);
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
The struct si4713_rnl and SI4713_IOC_MEASURE_RNL are defined under
|
||||
include/media/si4713.h.
|
||||
|
||||
Stereo/Mono and RDS subchannels
|
||||
===============================
|
||||
|
||||
The device can also be configured using the available sub channels for
|
||||
transmission. To do that use S/G_MODULATOR ioctl and configure txsubchans properly.
|
||||
Refer to v4l2-spec for proper use of this ioctl.
|
||||
|
||||
Testing
|
||||
=======
|
||||
Testing is usually done with v4l2-ctl utility for managing FM tuner cards.
|
||||
The tool can be found in v4l-dvb repository under v4l2-apps/util directory.
|
||||
|
||||
Example for setting rds ps name:
|
||||
# v4l2-ctl -d /dev/radio0 --set-ctrl=rds_ps_name="Dummy"
|
||||
|
@ -41,6 +41,8 @@ Possible debug options are
|
||||
P Poisoning (object and padding)
|
||||
U User tracking (free and alloc)
|
||||
T Trace (please only use on single slabs)
|
||||
O Switch debugging off for caches that would have
|
||||
caused higher minimum slab orders
|
||||
- Switch all debugging off (useful if the kernel is
|
||||
configured with CONFIG_SLUB_DEBUG_ON)
|
||||
|
||||
@ -59,6 +61,14 @@ to the dentry cache with
|
||||
|
||||
slub_debug=F,dentry
|
||||
|
||||
Debugging options may require the minimum possible slab order to increase as
|
||||
a result of storing the metadata (for example, caches with PAGE_SIZE object
|
||||
sizes). This has a higher liklihood of resulting in slab allocation errors
|
||||
in low memory situations or if there's high fragmentation of memory. To
|
||||
switch off debugging for such caches by default, use
|
||||
|
||||
slub_debug=O
|
||||
|
||||
In case you forgot to enable debugging on the kernel command line: It is
|
||||
possible to enable debugging manually when the kernel is up. Look at the
|
||||
contents of:
|
||||
|
@ -599,6 +599,7 @@ Protocol: 2.07+
|
||||
0x00000000 The default x86/PC environment
|
||||
0x00000001 lguest
|
||||
0x00000002 Xen
|
||||
0x00000003 Moorestown MID
|
||||
|
||||
Field name: hardware_subarch_data
|
||||
Type: write (subarch-dependent)
|
||||
|
@ -12,6 +12,7 @@ Offset Proto Name Meaning
|
||||
000/040 ALL screen_info Text mode or frame buffer information
|
||||
(struct screen_info)
|
||||
040/014 ALL apm_bios_info APM BIOS information (struct apm_bios_info)
|
||||
058/008 ALL tboot_addr Physical address of tboot shared page
|
||||
060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information
|
||||
(struct ist_info)
|
||||
080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!!
|
||||
|
2942
MAINTAINERS
2942
MAINTAINERS
File diff suppressed because it is too large
Load Diff
4
Makefile
4
Makefile
@ -1,7 +1,7 @@
|
||||
VERSION = 2
|
||||
PATCHLEVEL = 6
|
||||
SUBLEVEL = 31
|
||||
EXTRAVERSION = -rc4
|
||||
EXTRAVERSION =
|
||||
NAME = Man-Eating Seals of Antiquity
|
||||
|
||||
# *DOCUMENTATION*
|
||||
@ -325,7 +325,7 @@ CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
|
||||
MODFLAGS = -DMODULE
|
||||
CFLAGS_MODULE = $(MODFLAGS)
|
||||
AFLAGS_MODULE = $(MODFLAGS)
|
||||
LDFLAGS_MODULE =
|
||||
LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds
|
||||
CFLAGS_KERNEL =
|
||||
AFLAGS_KERNEL =
|
||||
CFLAGS_GCOV = -fprofile-arcs -ftest-coverage
|
||||
|
@ -15,7 +15,10 @@ worry too much about getting the wrong person. If you are unsure send it
|
||||
to the person responsible for the code relevant to what you were doing.
|
||||
If it occurs repeatably try and describe how to recreate it. That is
|
||||
worth even more than the oops itself. The list of maintainers and
|
||||
mailing lists is in the MAINTAINERS file in this directory.
|
||||
mailing lists is in the MAINTAINERS file in this directory. If you
|
||||
know the file name that causes the problem you can use the following
|
||||
command in this directory to find some of the maintainers of that file:
|
||||
perl scripts/get_maintainer.pl -f <filename>
|
||||
|
||||
If it is a security bug, please copy the Security Contact listed
|
||||
in the MAINTAINERS file. They can help coordinate bugfix and disclosure.
|
||||
|
13
arch/Kconfig
13
arch/Kconfig
@ -9,6 +9,7 @@ config OPROFILE
|
||||
depends on TRACING_SUPPORT
|
||||
select TRACING
|
||||
select RING_BUFFER
|
||||
select RING_BUFFER_ALLOW_SWAP
|
||||
help
|
||||
OProfile is a profiling system capable of profiling the
|
||||
whole system, include the kernel, kernel modules, libraries,
|
||||
@ -30,6 +31,18 @@ config OPROFILE_IBS
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config OPROFILE_EVENT_MULTIPLEX
|
||||
bool "OProfile multiplexing support (EXPERIMENTAL)"
|
||||
default n
|
||||
depends on OPROFILE && X86
|
||||
help
|
||||
The number of hardware counters is limited. The multiplexing
|
||||
feature enables OProfile to gather more events than counters
|
||||
are provided by the hardware. This is realized by switching
|
||||
between events at an user specified time interval.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config HAVE_OPROFILE
|
||||
bool
|
||||
|
||||
|
@ -9,10 +9,6 @@
|
||||
#define unmap_page_from_agp(page)
|
||||
#define flush_agp_cache() mb()
|
||||
|
||||
/* Convert a physical address to an address suitable for the GART. */
|
||||
#define phys_to_gart(x) (x)
|
||||
#define gart_to_phys(x) (x)
|
||||
|
||||
/* GATT allocation. Returns/accepts GATT kernel virtual address. */
|
||||
#define alloc_gatt_pages(order) \
|
||||
((char *)__get_free_pages(GFP_KERNEL, (order)))
|
||||
|
@ -52,7 +52,6 @@ struct pci_controller {
|
||||
bus numbers. */
|
||||
|
||||
#define pcibios_assign_all_busses() 1
|
||||
#define pcibios_scan_all_fns(a, b) 0
|
||||
|
||||
#define PCIBIOS_MIN_IO alpha_mv.min_io_address
|
||||
#define PCIBIOS_MIN_MEM alpha_mv.min_mem_address
|
||||
|
@ -1,102 +1,18 @@
|
||||
#ifndef __ALPHA_PERCPU_H
|
||||
#define __ALPHA_PERCPU_H
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/percpu-defs.h>
|
||||
|
||||
/*
|
||||
* Determine the real variable name from the name visible in the
|
||||
* kernel sources.
|
||||
*/
|
||||
#define per_cpu_var(var) per_cpu__##var
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
/*
|
||||
* per_cpu_offset() is the offset that has to be added to a
|
||||
* percpu variable to get to the instance for a certain processor.
|
||||
*/
|
||||
extern unsigned long __per_cpu_offset[NR_CPUS];
|
||||
|
||||
#define per_cpu_offset(x) (__per_cpu_offset[x])
|
||||
|
||||
#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
#define my_cpu_offset per_cpu_offset(smp_processor_id())
|
||||
#else
|
||||
#define my_cpu_offset __my_cpu_offset
|
||||
#endif
|
||||
|
||||
#ifndef MODULE
|
||||
#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset))
|
||||
#define PER_CPU_DEF_ATTRIBUTES
|
||||
#else
|
||||
/*
|
||||
* To calculate addresses of locally defined variables, GCC uses 32-bit
|
||||
* displacement from the GP. Which doesn't work for per cpu variables in
|
||||
* modules, as an offset to the kernel per cpu area is way above 4G.
|
||||
* To calculate addresses of locally defined variables, GCC uses
|
||||
* 32-bit displacement from the GP. Which doesn't work for per cpu
|
||||
* variables in modules, as an offset to the kernel per cpu area is
|
||||
* way above 4G.
|
||||
*
|
||||
* This forces allocation of a GOT entry for per cpu variable using
|
||||
* ldq instruction with a 'literal' relocation.
|
||||
* Always use weak definitions for percpu variables in modules.
|
||||
*/
|
||||
#define SHIFT_PERCPU_PTR(var, offset) ({ \
|
||||
extern int simple_identifier_##var(void); \
|
||||
unsigned long __ptr, tmp_gp; \
|
||||
asm ( "br %1, 1f \n\
|
||||
1: ldgp %1, 0(%1) \n\
|
||||
ldq %0, per_cpu__" #var"(%1)\t!literal" \
|
||||
: "=&r"(__ptr), "=&r"(tmp_gp)); \
|
||||
(typeof(&per_cpu_var(var)))(__ptr + (offset)); })
|
||||
|
||||
#define PER_CPU_DEF_ATTRIBUTES __used
|
||||
|
||||
#endif /* MODULE */
|
||||
|
||||
/*
|
||||
* A percpu variable may point to a discarded regions. The following are
|
||||
* established ways to produce a usable pointer from the percpu variable
|
||||
* offset.
|
||||
*/
|
||||
#define per_cpu(var, cpu) \
|
||||
(*SHIFT_PERCPU_PTR(var, per_cpu_offset(cpu)))
|
||||
#define __get_cpu_var(var) \
|
||||
(*SHIFT_PERCPU_PTR(var, my_cpu_offset))
|
||||
#define __raw_get_cpu_var(var) \
|
||||
(*SHIFT_PERCPU_PTR(var, __my_cpu_offset))
|
||||
|
||||
#else /* ! SMP */
|
||||
|
||||
#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var)))
|
||||
#define __get_cpu_var(var) per_cpu_var(var)
|
||||
#define __raw_get_cpu_var(var) per_cpu_var(var)
|
||||
|
||||
#define PER_CPU_DEF_ATTRIBUTES
|
||||
|
||||
#endif /* SMP */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#define PER_CPU_BASE_SECTION ".data.percpu"
|
||||
#else
|
||||
#define PER_CPU_BASE_SECTION ".data"
|
||||
#if defined(MODULE) && defined(CONFIG_SMP)
|
||||
#define ARCH_NEEDS_WEAK_PER_CPU
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
#ifdef MODULE
|
||||
#define PER_CPU_SHARED_ALIGNED_SECTION ""
|
||||
#else
|
||||
#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
|
||||
#endif
|
||||
#define PER_CPU_FIRST_SECTION ".first"
|
||||
|
||||
#else
|
||||
|
||||
#define PER_CPU_SHARED_ALIGNED_SECTION ""
|
||||
#define PER_CPU_FIRST_SECTION ""
|
||||
|
||||
#endif
|
||||
|
||||
#define PER_CPU_ATTRIBUTES
|
||||
#include <asm-generic/percpu.h>
|
||||
|
||||
#endif /* __ALPHA_PERCPU_H */
|
||||
|
@ -32,6 +32,8 @@
|
||||
#define SO_RCVTIMEO 0x1012
|
||||
#define SO_SNDTIMEO 0x1013
|
||||
#define SO_ACCEPTCONN 0x1014
|
||||
#define SO_PROTOCOL 0x1028
|
||||
#define SO_DOMAIN 0x1029
|
||||
|
||||
/* linux-specific, might as well be the same as on i386 */
|
||||
#define SO_NO_CHECK 11
|
||||
|
@ -75,6 +75,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
|
||||
#define TIF_UAC_SIGBUS 7
|
||||
#define TIF_MEMDIE 8
|
||||
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
|
||||
#define TIF_NOTIFY_RESUME 10 /* callback before returning to user */
|
||||
#define TIF_FREEZE 16 /* is freezing for suspend */
|
||||
|
||||
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
||||
@ -82,10 +83,12 @@ register struct thread_info *__current_thread_info __asm__("$8");
|
||||
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
|
||||
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
|
||||
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
|
||||
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
|
||||
#define _TIF_FREEZE (1<<TIF_FREEZE)
|
||||
|
||||
/* Work to do on interrupt/exception return. */
|
||||
#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED)
|
||||
#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
|
||||
_TIF_NOTIFY_RESUME)
|
||||
|
||||
/* Work to do on any return to userspace. */
|
||||
#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \
|
||||
|
@ -9,7 +9,7 @@
|
||||
|
||||
#include <asm-generic/tlb.h>
|
||||
|
||||
#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, pte)
|
||||
#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd)
|
||||
#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte)
|
||||
#define __pmd_free_tlb(tlb, pmd, address) pmd_free((tlb)->mm, pmd)
|
||||
|
||||
#endif
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define _ALPHA_TLBFLUSH_H
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/compiler.h>
|
||||
#include <asm/pgalloc.h>
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/tracehook.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/sigcontext.h>
|
||||
@ -683,4 +684,11 @@ do_notify_resume(struct pt_regs *regs, struct switch_stack *sw,
|
||||
{
|
||||
if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
|
||||
do_signal(regs, sw, r0, r19);
|
||||
|
||||
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
|
||||
clear_thread_flag(TIF_NOTIFY_RESUME);
|
||||
tracehook_notify_resume(regs);
|
||||
if (current->replacement_session_keyring)
|
||||
key_replace_session_keyring();
|
||||
}
|
||||
}
|
||||
|
@ -134,13 +134,6 @@ SECTIONS
|
||||
__bss_stop = .;
|
||||
_end = .;
|
||||
|
||||
/* Sections to be discarded */
|
||||
/DISCARD/ : {
|
||||
EXIT_TEXT
|
||||
EXIT_DATA
|
||||
*(.exitcall.exit)
|
||||
}
|
||||
|
||||
.mdebug 0 : {
|
||||
*(.mdebug)
|
||||
}
|
||||
@ -150,4 +143,6 @@ SECTIONS
|
||||
|
||||
STABS_DEBUG
|
||||
DWARF_DEBUG
|
||||
|
||||
DISCARDS
|
||||
}
|
||||
|
264
arch/arm/Kconfig
264
arch/arm/Kconfig
@ -46,10 +46,6 @@ config GENERIC_CLOCKEVENTS_BROADCAST
|
||||
depends on GENERIC_CLOCKEVENTS
|
||||
default y if SMP && !LOCAL_TIMERS
|
||||
|
||||
config MMU
|
||||
bool
|
||||
default y
|
||||
|
||||
config NO_IOPORT
|
||||
bool
|
||||
|
||||
@ -126,6 +122,13 @@ config ARCH_HAS_ILOG2_U32
|
||||
config ARCH_HAS_ILOG2_U64
|
||||
bool
|
||||
|
||||
config ARCH_HAS_CPUFREQ
|
||||
bool
|
||||
help
|
||||
Internal node to signify that the ARCH has CPUFREQ support
|
||||
and that the relevant menu configurations are displayed for
|
||||
it.
|
||||
|
||||
config GENERIC_HWEIGHT
|
||||
bool
|
||||
default y
|
||||
@ -188,6 +191,13 @@ source "kernel/Kconfig.freezer"
|
||||
|
||||
menu "System Type"
|
||||
|
||||
config MMU
|
||||
bool "MMU-based Paged Memory Management Support"
|
||||
default y
|
||||
help
|
||||
Select if you want MMU-based virtualised addressing space
|
||||
support by paged memory management. If unsure, say 'Y'.
|
||||
|
||||
choice
|
||||
prompt "ARM system type"
|
||||
default ARCH_VERSATILE
|
||||
@ -203,6 +213,7 @@ config ARCH_AAEC2000
|
||||
config ARCH_INTEGRATOR
|
||||
bool "ARM Ltd. Integrator family"
|
||||
select ARM_AMBA
|
||||
select ARCH_HAS_CPUFREQ
|
||||
select HAVE_CLK
|
||||
select COMMON_CLKDEV
|
||||
select ICST525
|
||||
@ -217,6 +228,7 @@ config ARCH_REALVIEW
|
||||
select ICST307
|
||||
select GENERIC_TIME
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select ARCH_WANT_OPTIONAL_GPIOLIB
|
||||
help
|
||||
This enables support for ARM Ltd RealView boards.
|
||||
|
||||
@ -229,6 +241,7 @@ config ARCH_VERSATILE
|
||||
select ICST307
|
||||
select GENERIC_TIME
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select ARCH_WANT_OPTIONAL_GPIOLIB
|
||||
help
|
||||
This enables support for ARM Ltd Versatile board.
|
||||
|
||||
@ -327,6 +340,20 @@ config ARCH_H720X
|
||||
help
|
||||
This enables support for systems based on the Hynix HMS720x
|
||||
|
||||
config ARCH_NOMADIK
|
||||
bool "STMicroelectronics Nomadik"
|
||||
select ARM_AMBA
|
||||
select ARM_VIC
|
||||
select CPU_ARM926T
|
||||
select HAVE_CLK
|
||||
select COMMON_CLKDEV
|
||||
select GENERIC_TIME
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select GENERIC_GPIO
|
||||
select ARCH_REQUIRE_GPIOLIB
|
||||
help
|
||||
Support for the Nomadik platform by ST-Ericsson
|
||||
|
||||
config ARCH_IOP13XX
|
||||
bool "IOP13xx-based"
|
||||
depends on MMU
|
||||
@ -493,10 +520,18 @@ config ARCH_W90X900
|
||||
select CPU_ARM926T
|
||||
select ARCH_REQUIRE_GPIOLIB
|
||||
select GENERIC_GPIO
|
||||
select HAVE_CLK
|
||||
select COMMON_CLKDEV
|
||||
select GENERIC_TIME
|
||||
select GENERIC_CLOCKEVENTS
|
||||
help
|
||||
Support for Nuvoton (Winbond logic dept.) ARM9 processor,You
|
||||
can login www.mcuos.com or www.nuvoton.com to know more.
|
||||
Support for Nuvoton (Winbond logic dept.) ARM9 processor,
|
||||
At present, the w90x900 has been renamed nuc900, regarding
|
||||
the ARM series product line, you can login the following
|
||||
link address to know more.
|
||||
|
||||
<http://www.nuvoton.com/hq/enu/ProductAndSales/ProductLines/
|
||||
ConsumerElectronicsIC/ARMMicrocontroller/ARMMicrocontroller>
|
||||
|
||||
config ARCH_PNX4008
|
||||
bool "Philips Nexperia PNX4008 Mobile"
|
||||
@ -509,6 +544,7 @@ config ARCH_PXA
|
||||
bool "PXA2xx/PXA3xx-based"
|
||||
depends on MMU
|
||||
select ARCH_MTD_XIP
|
||||
select ARCH_HAS_CPUFREQ
|
||||
select GENERIC_GPIO
|
||||
select HAVE_CLK
|
||||
select COMMON_CLKDEV
|
||||
@ -551,6 +587,7 @@ config ARCH_SA1100
|
||||
select ISA
|
||||
select ARCH_SPARSEMEM_ENABLE
|
||||
select ARCH_MTD_XIP
|
||||
select ARCH_HAS_CPUFREQ
|
||||
select GENERIC_GPIO
|
||||
select GENERIC_TIME
|
||||
select GENERIC_CLOCKEVENTS
|
||||
@ -563,6 +600,7 @@ config ARCH_SA1100
|
||||
config ARCH_S3C2410
|
||||
bool "Samsung S3C2410, S3C2412, S3C2413, S3C2440, S3C2442, S3C2443"
|
||||
select GENERIC_GPIO
|
||||
select ARCH_HAS_CPUFREQ
|
||||
select HAVE_CLK
|
||||
help
|
||||
Samsung S3C2410X CPU based systems, such as the Simtec Electronics
|
||||
@ -573,9 +611,18 @@ config ARCH_S3C64XX
|
||||
bool "Samsung S3C64XX"
|
||||
select GENERIC_GPIO
|
||||
select HAVE_CLK
|
||||
select ARCH_HAS_CPUFREQ
|
||||
help
|
||||
Samsung S3C64XX series based systems
|
||||
|
||||
config ARCH_S5PC1XX
|
||||
bool "Samsung S5PC1XX"
|
||||
select GENERIC_GPIO
|
||||
select HAVE_CLK
|
||||
select CPU_V7
|
||||
help
|
||||
Samsung S5PC1XX series based systems
|
||||
|
||||
config ARCH_SHARK
|
||||
bool "Shark"
|
||||
select CPU_SA110
|
||||
@ -632,11 +679,24 @@ config ARCH_OMAP
|
||||
select GENERIC_GPIO
|
||||
select HAVE_CLK
|
||||
select ARCH_REQUIRE_GPIOLIB
|
||||
select ARCH_HAS_CPUFREQ
|
||||
select GENERIC_TIME
|
||||
select GENERIC_CLOCKEVENTS
|
||||
help
|
||||
Support for TI's OMAP platform (OMAP1 and OMAP2).
|
||||
|
||||
config ARCH_BCMRING
|
||||
bool "Broadcom BCMRING"
|
||||
depends on MMU
|
||||
select CPU_V6
|
||||
select ARM_AMBA
|
||||
select COMMON_CLKDEV
|
||||
select GENERIC_TIME
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select ARCH_WANT_OPTIONAL_GPIOLIB
|
||||
help
|
||||
Support for Broadcom's BCMRing platform.
|
||||
|
||||
endchoice
|
||||
|
||||
source "arch/arm/mach-clps711x/Kconfig"
|
||||
@ -685,6 +745,7 @@ source "arch/arm/mach-kirkwood/Kconfig"
|
||||
source "arch/arm/plat-s3c24xx/Kconfig"
|
||||
source "arch/arm/plat-s3c64xx/Kconfig"
|
||||
source "arch/arm/plat-s3c/Kconfig"
|
||||
source "arch/arm/plat-s5pc1xx/Kconfig"
|
||||
|
||||
if ARCH_S3C2410
|
||||
source "arch/arm/mach-s3c2400/Kconfig"
|
||||
@ -702,6 +763,10 @@ endif
|
||||
|
||||
source "arch/arm/plat-stmp3xxx/Kconfig"
|
||||
|
||||
if ARCH_S5PC1XX
|
||||
source "arch/arm/mach-s5pc100/Kconfig"
|
||||
endif
|
||||
|
||||
source "arch/arm/mach-lh7a40x/Kconfig"
|
||||
|
||||
source "arch/arm/mach-h720x/Kconfig"
|
||||
@ -716,6 +781,8 @@ source "arch/arm/mach-at91/Kconfig"
|
||||
|
||||
source "arch/arm/plat-mxc/Kconfig"
|
||||
|
||||
source "arch/arm/mach-nomadik/Kconfig"
|
||||
|
||||
source "arch/arm/mach-netx/Kconfig"
|
||||
|
||||
source "arch/arm/mach-ns9xxx/Kconfig"
|
||||
@ -730,6 +797,8 @@ source "arch/arm/mach-u300/Kconfig"
|
||||
|
||||
source "arch/arm/mach-w90x900/Kconfig"
|
||||
|
||||
source "arch/arm/mach-bcmring/Kconfig"
|
||||
|
||||
# Definitions to make life easier
|
||||
config ARCH_ACORN
|
||||
bool
|
||||
@ -962,18 +1031,7 @@ config LOCAL_TIMERS
|
||||
accounting to be spread across the timer interval, preventing a
|
||||
"thundering herd" at every timer tick.
|
||||
|
||||
config PREEMPT
|
||||
bool "Preemptible Kernel (EXPERIMENTAL)"
|
||||
depends on EXPERIMENTAL
|
||||
help
|
||||
This option reduces the latency of the kernel when reacting to
|
||||
real-time or interactive events by allowing a low priority process to
|
||||
be preempted even if it is in kernel mode executing a system call.
|
||||
This allows applications to run more reliably even when the system is
|
||||
under load.
|
||||
|
||||
Say Y here if you are building a kernel for a desktop, embedded
|
||||
or real-time system. Say N if you are unsure.
|
||||
source kernel/Kconfig.preempt
|
||||
|
||||
config HZ
|
||||
int
|
||||
@ -983,6 +1041,21 @@ config HZ
|
||||
default AT91_TIMER_HZ if ARCH_AT91
|
||||
default 100
|
||||
|
||||
config THUMB2_KERNEL
|
||||
bool "Compile the kernel in Thumb-2 mode"
|
||||
depends on CPU_V7 && EXPERIMENTAL
|
||||
select AEABI
|
||||
select ARM_ASM_UNIFIED
|
||||
help
|
||||
By enabling this option, the kernel will be compiled in
|
||||
Thumb-2 mode. A compiler/assembler that understand the unified
|
||||
ARM-Thumb syntax is needed.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config ARM_ASM_UNIFIED
|
||||
bool
|
||||
|
||||
config AEABI
|
||||
bool "Use the ARM EABI to compile the kernel"
|
||||
help
|
||||
@ -1054,6 +1127,11 @@ config HIGHMEM
|
||||
|
||||
If unsure, say n.
|
||||
|
||||
config HIGHPTE
|
||||
bool "Allocate 2nd-level pagetables from highmem"
|
||||
depends on HIGHMEM
|
||||
depends on !OUTER_CACHE
|
||||
|
||||
source "mm/Kconfig"
|
||||
|
||||
config LEDS
|
||||
@ -1241,7 +1319,7 @@ endmenu
|
||||
|
||||
menu "CPU Power Management"
|
||||
|
||||
if (ARCH_SA1100 || ARCH_INTEGRATOR || ARCH_OMAP || ARCH_PXA || ARCH_S3C64XX)
|
||||
if ARCH_HAS_CPUFREQ
|
||||
|
||||
source "drivers/cpufreq/Kconfig"
|
||||
|
||||
@ -1276,6 +1354,52 @@ config CPU_FREQ_S3C64XX
|
||||
bool "CPUfreq support for Samsung S3C64XX CPUs"
|
||||
depends on CPU_FREQ && CPU_S3C6410
|
||||
|
||||
config CPU_FREQ_S3C
|
||||
bool
|
||||
help
|
||||
Internal configuration node for common cpufreq on Samsung SoC
|
||||
|
||||
config CPU_FREQ_S3C24XX
|
||||
bool "CPUfreq driver for Samsung S3C24XX series CPUs"
|
||||
depends on ARCH_S3C2410 && CPU_FREQ && EXPERIMENTAL
|
||||
select CPU_FREQ_S3C
|
||||
help
|
||||
This enables the CPUfreq driver for the Samsung S3C24XX family
|
||||
of CPUs.
|
||||
|
||||
For details, take a look at <file:Documentation/cpu-freq>.
|
||||
|
||||
If in doubt, say N.
|
||||
|
||||
config CPU_FREQ_S3C24XX_PLL
|
||||
bool "Support CPUfreq changing of PLL frequency"
|
||||
depends on CPU_FREQ_S3C24XX && EXPERIMENTAL
|
||||
help
|
||||
Compile in support for changing the PLL frequency from the
|
||||
S3C24XX series CPUfreq driver. The PLL takes time to settle
|
||||
after a frequency change, so by default it is not enabled.
|
||||
|
||||
This also means that the PLL tables for the selected CPU(s) will
|
||||
be built which may increase the size of the kernel image.
|
||||
|
||||
config CPU_FREQ_S3C24XX_DEBUG
|
||||
bool "Debug CPUfreq Samsung driver core"
|
||||
depends on CPU_FREQ_S3C24XX
|
||||
help
|
||||
Enable s3c_freq_dbg for the Samsung S3C CPUfreq core
|
||||
|
||||
config CPU_FREQ_S3C24XX_IODEBUG
|
||||
bool "Debug CPUfreq Samsung driver IO timing"
|
||||
depends on CPU_FREQ_S3C24XX
|
||||
help
|
||||
Enable s3c_freq_iodbg for the Samsung S3C CPUfreq core
|
||||
|
||||
config CPU_FREQ_S3C24XX_DEBUGFS
|
||||
bool "Export debugfs for CPUFreq"
|
||||
depends on CPU_FREQ_S3C24XX && DEBUG_FS
|
||||
help
|
||||
Export status information via debugfs.
|
||||
|
||||
endif
|
||||
|
||||
source "drivers/cpuidle/Kconfig"
|
||||
@ -1377,107 +1501,7 @@ endmenu
|
||||
|
||||
source "net/Kconfig"
|
||||
|
||||
menu "Device Drivers"
|
||||
|
||||
source "drivers/base/Kconfig"
|
||||
|
||||
source "drivers/connector/Kconfig"
|
||||
|
||||
if ALIGNMENT_TRAP || !CPU_CP15_MMU
|
||||
source "drivers/mtd/Kconfig"
|
||||
endif
|
||||
|
||||
source "drivers/parport/Kconfig"
|
||||
|
||||
source "drivers/pnp/Kconfig"
|
||||
|
||||
source "drivers/block/Kconfig"
|
||||
|
||||
# misc before ide - BLK_DEV_SGIIOC4 depends on SGI_IOC4
|
||||
|
||||
source "drivers/misc/Kconfig"
|
||||
|
||||
source "drivers/ide/Kconfig"
|
||||
|
||||
source "drivers/scsi/Kconfig"
|
||||
|
||||
source "drivers/ata/Kconfig"
|
||||
|
||||
source "drivers/md/Kconfig"
|
||||
|
||||
source "drivers/message/fusion/Kconfig"
|
||||
|
||||
source "drivers/ieee1394/Kconfig"
|
||||
|
||||
source "drivers/message/i2o/Kconfig"
|
||||
|
||||
source "drivers/net/Kconfig"
|
||||
|
||||
source "drivers/isdn/Kconfig"
|
||||
|
||||
# input before char - char/joystick depends on it. As does USB.
|
||||
|
||||
source "drivers/input/Kconfig"
|
||||
|
||||
source "drivers/char/Kconfig"
|
||||
|
||||
source "drivers/i2c/Kconfig"
|
||||
|
||||
source "drivers/spi/Kconfig"
|
||||
|
||||
source "drivers/gpio/Kconfig"
|
||||
|
||||
source "drivers/w1/Kconfig"
|
||||
|
||||
source "drivers/power/Kconfig"
|
||||
|
||||
source "drivers/hwmon/Kconfig"
|
||||
|
||||
source "drivers/thermal/Kconfig"
|
||||
|
||||
source "drivers/watchdog/Kconfig"
|
||||
|
||||
source "drivers/ssb/Kconfig"
|
||||
|
||||
#source "drivers/l3/Kconfig"
|
||||
|
||||
source "drivers/mfd/Kconfig"
|
||||
|
||||
source "drivers/media/Kconfig"
|
||||
|
||||
source "drivers/video/Kconfig"
|
||||
|
||||
source "sound/Kconfig"
|
||||
|
||||
source "drivers/hid/Kconfig"
|
||||
|
||||
source "drivers/usb/Kconfig"
|
||||
|
||||
source "drivers/uwb/Kconfig"
|
||||
|
||||
source "drivers/mmc/Kconfig"
|
||||
|
||||
source "drivers/memstick/Kconfig"
|
||||
|
||||
source "drivers/accessibility/Kconfig"
|
||||
|
||||
source "drivers/leds/Kconfig"
|
||||
|
||||
source "drivers/rtc/Kconfig"
|
||||
|
||||
source "drivers/dma/Kconfig"
|
||||
|
||||
source "drivers/dca/Kconfig"
|
||||
|
||||
source "drivers/auxdisplay/Kconfig"
|
||||
|
||||
source "drivers/regulator/Kconfig"
|
||||
|
||||
source "drivers/uio/Kconfig"
|
||||
|
||||
source "drivers/staging/Kconfig"
|
||||
|
||||
endmenu
|
||||
source "drivers/Kconfig"
|
||||
|
||||
source "fs/Kconfig"
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user