mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-13 16:40:22 +00:00
Merge branch 'master' into upstream
This commit is contained in:
commit
aebb1153ac
@ -1,13 +1,12 @@
|
||||
What: devfs
|
||||
Date: July 2005
|
||||
Date: July 2005 (scheduled), finally removed in kernel v2.6.18
|
||||
Contact: Greg Kroah-Hartman <gregkh@suse.de>
|
||||
Description:
|
||||
devfs has been unmaintained for a number of years, has unfixable
|
||||
races, contains a naming policy within the kernel that is
|
||||
against the LSB, and can be replaced by using udev.
|
||||
The files fs/devfs/*, include/linux/devfs_fs*.h will be removed,
|
||||
The files fs/devfs/*, include/linux/devfs_fs*.h were removed,
|
||||
along with the the assorted devfs function calls throughout the
|
||||
kernel tree.
|
||||
|
||||
Users:
|
||||
|
88
Documentation/ABI/testing/sysfs-power
Normal file
88
Documentation/ABI/testing/sysfs-power
Normal file
@ -0,0 +1,88 @@
|
||||
What: /sys/power/
|
||||
Date: August 2006
|
||||
Contact: Rafael J. Wysocki <rjw@sisk.pl>
|
||||
Description:
|
||||
The /sys/power directory will contain files that will
|
||||
provide a unified interface to the power management
|
||||
subsystem.
|
||||
|
||||
What: /sys/power/state
|
||||
Date: August 2006
|
||||
Contact: Rafael J. Wysocki <rjw@sisk.pl>
|
||||
Description:
|
||||
The /sys/power/state file controls the system power state.
|
||||
Reading from this file returns what states are supported,
|
||||
which is hard-coded to 'standby' (Power-On Suspend), 'mem'
|
||||
(Suspend-to-RAM), and 'disk' (Suspend-to-Disk).
|
||||
|
||||
Writing to this file one of these strings causes the system to
|
||||
transition into that state. Please see the file
|
||||
Documentation/power/states.txt for a description of each of
|
||||
these states.
|
||||
|
||||
What: /sys/power/disk
|
||||
Date: August 2006
|
||||
Contact: Rafael J. Wysocki <rjw@sisk.pl>
|
||||
Description:
|
||||
The /sys/power/disk file controls the operating mode of the
|
||||
suspend-to-disk mechanism. Reading from this file returns
|
||||
the name of the method by which the system will be put to
|
||||
sleep on the next suspend. There are four methods supported:
|
||||
'firmware' - means that the memory image will be saved to disk
|
||||
by some firmware, in which case we also assume that the
|
||||
firmware will handle the system suspend.
|
||||
'platform' - the memory image will be saved by the kernel and
|
||||
the system will be put to sleep by the platform driver (e.g.
|
||||
ACPI or other PM registers).
|
||||
'shutdown' - the memory image will be saved by the kernel and
|
||||
the system will be powered off.
|
||||
'reboot' - the memory image will be saved by the kernel and
|
||||
the system will be rebooted.
|
||||
|
||||
The suspend-to-disk method may be chosen by writing to this
|
||||
file one of the accepted strings:
|
||||
|
||||
'firmware'
|
||||
'platform'
|
||||
'shutdown'
|
||||
'reboot'
|
||||
|
||||
It will only change to 'firmware' or 'platform' if the system
|
||||
supports that.
|
||||
|
||||
What: /sys/power/image_size
|
||||
Date: August 2006
|
||||
Contact: Rafael J. Wysocki <rjw@sisk.pl>
|
||||
Description:
|
||||
The /sys/power/image_size file controls the size of the image
|
||||
created by the suspend-to-disk mechanism. It can be written a
|
||||
string representing a non-negative integer that will be used
|
||||
as an upper limit of the image size, in bytes. The kernel's
|
||||
suspend-to-disk code will do its best to ensure the image size
|
||||
will not exceed this number. However, if it turns out to be
|
||||
impossible, the kernel will try to suspend anyway using the
|
||||
smallest image possible. In particular, if "0" is written to
|
||||
this file, the suspend image will be as small as possible.
|
||||
|
||||
Reading from this file will display the current image size
|
||||
limit, which is set to 500 MB by default.
|
||||
|
||||
What: /sys/power/pm_trace
|
||||
Date: August 2006
|
||||
Contact: Rafael J. Wysocki <rjw@sisk.pl>
|
||||
Description:
|
||||
The /sys/power/pm_trace file controls the code which saves the
|
||||
last PM event point in the RTC across reboots, so that you can
|
||||
debug a machine that just hangs during suspend (or more
|
||||
commonly, during resume). Namely, the RTC is only used to save
|
||||
the last PM event point if this file contains '1'. Initially
|
||||
it contains '0' which may be changed to '1' by writing a
|
||||
string representing a nonzero integer into it.
|
||||
|
||||
To use this debugging feature you should attempt to suspend
|
||||
the machine, then reboot it and run
|
||||
|
||||
dmesg -s 1000000 | grep 'hash matches'
|
||||
|
||||
CAUTION: Using it will cause your machine's real-time (CMOS)
|
||||
clock to be set to a random invalid time after a resume.
|
@ -43,59 +43,52 @@
|
||||
|
||||
<para>A Universal Serial Bus (USB) is used to connect a host,
|
||||
such as a PC or workstation, to a number of peripheral
|
||||
devices. USB uses a tree structure, with the host at the
|
||||
devices. USB uses a tree structure, with the host as the
|
||||
root (the system's master), hubs as interior nodes, and
|
||||
peripheral devices as leaves (and slaves).
|
||||
peripherals as leaves (and slaves).
|
||||
Modern PCs support several such trees of USB devices, usually
|
||||
one USB 2.0 tree (480 Mbit/sec each) with
|
||||
a few USB 1.1 trees (12 Mbit/sec each) that are used when you
|
||||
connect a USB 1.1 device directly to the machine's "root hub".
|
||||
</para>
|
||||
|
||||
<para>That master/slave asymmetry was designed in part for
|
||||
ease of use. It is not physically possible to assemble
|
||||
(legal) USB cables incorrectly: all upstream "to-the-host"
|
||||
connectors are the rectangular type, matching the sockets on
|
||||
root hubs, and the downstream type are the squarish type
|
||||
(or they are built in to the peripheral).
|
||||
Software doesn't need to deal with distributed autoconfiguration
|
||||
since the pre-designated master node manages all that.
|
||||
At the electrical level, bus protocol overhead is reduced by
|
||||
eliminating arbitration and moving scheduling into host software.
|
||||
<para>That master/slave asymmetry was designed-in for a number of
|
||||
reasons, one being ease of use. It is not physically possible to
|
||||
assemble (legal) USB cables incorrectly: all upstream "to the host"
|
||||
connectors are the rectangular type (matching the sockets on
|
||||
root hubs), and all downstream connectors are the squarish type
|
||||
(or they are built into the peripheral).
|
||||
Also, the host software doesn't need to deal with distributed
|
||||
auto-configuration since the pre-designated master node manages all that.
|
||||
And finally, at the electrical level, bus protocol overhead is reduced by
|
||||
eliminating arbitration and moving scheduling into the host software.
|
||||
</para>
|
||||
|
||||
<para>USB 1.0 was announced in January 1996, and was revised
|
||||
<para>USB 1.0 was announced in January 1996 and was revised
|
||||
as USB 1.1 (with improvements in hub specification and
|
||||
support for interrupt-out transfers) in September 1998.
|
||||
USB 2.0 was released in April 2000, including high speed
|
||||
transfers and transaction translating hubs (used for USB 1.1
|
||||
USB 2.0 was released in April 2000, adding high-speed
|
||||
transfers and transaction-translating hubs (used for USB 1.1
|
||||
and 1.0 backward compatibility).
|
||||
</para>
|
||||
|
||||
<para>USB support was added to Linux early in the 2.2 kernel series
|
||||
shortly before the 2.3 development forked off. Updates
|
||||
from 2.3 were regularly folded back into 2.2 releases, bringing
|
||||
new features such as <filename>/sbin/hotplug</filename> support,
|
||||
more drivers, and more robustness.
|
||||
The 2.5 kernel series continued such improvements, and also
|
||||
worked on USB 2.0 support,
|
||||
higher performance,
|
||||
better consistency between host controller drivers,
|
||||
API simplification (to make bugs less likely),
|
||||
and providing internal "kerneldoc" documentation.
|
||||
<para>Kernel developers added USB support to Linux early in the 2.2 kernel
|
||||
series, shortly before 2.3 development forked. Updates from 2.3 were
|
||||
regularly folded back into 2.2 releases, which improved reliability and
|
||||
brought <filename>/sbin/hotplug</filename> support as well more drivers.
|
||||
Such improvements were continued in the 2.5 kernel series, where they added
|
||||
USB 2.0 support, improved performance, and made the host controller drivers
|
||||
(HCDs) more consistent. They also simplified the API (to make bugs less
|
||||
likely) and added internal "kerneldoc" documentation.
|
||||
</para>
|
||||
|
||||
<para>Linux can run inside USB devices as well as on
|
||||
the hosts that control the devices.
|
||||
Because the Linux 2.x USB support evolved to support mass market
|
||||
platforms such as Apple Macintosh or PC-compatible systems,
|
||||
it didn't address design concerns for those types of USB systems.
|
||||
So it can't be used inside mass-market PDAs, or other peripherals.
|
||||
USB device drivers running inside those Linux peripherals
|
||||
But USB device drivers running inside those peripherals
|
||||
don't do the same things as the ones running inside hosts,
|
||||
and so they've been given a different name:
|
||||
they're called <emphasis>gadget drivers</emphasis>.
|
||||
This document does not present gadget drivers.
|
||||
so they've been given a different name:
|
||||
<emphasis>gadget drivers</emphasis>.
|
||||
This document does not cover gadget drivers.
|
||||
</para>
|
||||
|
||||
</chapter>
|
||||
@ -103,17 +96,14 @@
|
||||
<chapter id="host">
|
||||
<title>USB Host-Side API Model</title>
|
||||
|
||||
<para>Within the kernel,
|
||||
host-side drivers for USB devices talk to the "usbcore" APIs.
|
||||
There are two types of public "usbcore" APIs, targetted at two different
|
||||
layers of USB driver. Those are
|
||||
<emphasis>general purpose</emphasis> drivers, exposed through
|
||||
driver frameworks such as block, character, or network devices;
|
||||
and drivers that are <emphasis>part of the core</emphasis>,
|
||||
which are involved in managing a USB bus.
|
||||
Such core drivers include the <emphasis>hub</emphasis> driver,
|
||||
which manages trees of USB devices, and several different kinds
|
||||
of <emphasis>host controller driver (HCD)</emphasis>,
|
||||
<para>Host-side drivers for USB devices talk to the "usbcore" APIs.
|
||||
There are two. One is intended for
|
||||
<emphasis>general-purpose</emphasis> drivers (exposed through
|
||||
driver frameworks), and the other is for drivers that are
|
||||
<emphasis>part of the core</emphasis>.
|
||||
Such core drivers include the <emphasis>hub</emphasis> driver
|
||||
(which manages trees of USB devices) and several different kinds
|
||||
of <emphasis>host controller drivers</emphasis>,
|
||||
which control individual busses.
|
||||
</para>
|
||||
|
||||
@ -122,21 +112,21 @@
|
||||
|
||||
<itemizedlist>
|
||||
|
||||
<listitem><para>USB supports four kinds of data transfer
|
||||
(control, bulk, interrupt, and isochronous). Two transfer
|
||||
types use bandwidth as it's available (control and bulk),
|
||||
while the other two types of transfer (interrupt and isochronous)
|
||||
<listitem><para>USB supports four kinds of data transfers
|
||||
(control, bulk, interrupt, and isochronous). Two of them (control
|
||||
and bulk) use bandwidth as it's available,
|
||||
while the other two (interrupt and isochronous)
|
||||
are scheduled to provide guaranteed bandwidth.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>The device description model includes one or more
|
||||
"configurations" per device, only one of which is active at a time.
|
||||
Devices that are capable of high speed operation must also support
|
||||
full speed configurations, along with a way to ask about the
|
||||
"other speed" configurations that might be used.
|
||||
Devices that are capable of high-speed operation must also support
|
||||
full-speed configurations, along with a way to ask about the
|
||||
"other speed" configurations which might be used.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>Configurations have one or more "interface", each
|
||||
<listitem><para>Configurations have one or more "interfaces", each
|
||||
of which may have "alternate settings". Interfaces may be
|
||||
standardized by USB "Class" specifications, or may be specific to
|
||||
a vendor or device.</para>
|
||||
@ -162,7 +152,7 @@
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>The Linux USB API supports synchronous calls for
|
||||
control and bulk messaging.
|
||||
control and bulk messages.
|
||||
It also supports asynchnous calls for all kinds of data transfer,
|
||||
using request structures called "URBs" (USB Request Blocks).
|
||||
</para></listitem>
|
||||
@ -463,14 +453,25 @@
|
||||
file in your Linux kernel sources.
|
||||
</para>
|
||||
|
||||
<para>Otherwise the main use for this file from programs
|
||||
is to poll() it to get notifications of usb devices
|
||||
as they're plugged or unplugged.
|
||||
To see what changed, you'd need to read the file and
|
||||
compare "before" and "after" contents, scan the filesystem,
|
||||
or see its hotplug event.
|
||||
</para>
|
||||
<para>This file, in combination with the poll() system call, can
|
||||
also be used to detect when devices are added or removed:
|
||||
<programlisting>int fd;
|
||||
struct pollfd pfd;
|
||||
|
||||
fd = open("/proc/bus/usb/devices", O_RDONLY);
|
||||
pfd = { fd, POLLIN, 0 };
|
||||
for (;;) {
|
||||
/* The first time through, this call will return immediately. */
|
||||
poll(&pfd, 1, -1);
|
||||
|
||||
/* To see what's changed, compare the file's previous and current
|
||||
contents or scan the filesystem. (Scanning is more precise.) */
|
||||
}</programlisting>
|
||||
Note that this behavior is intended to be used for informational
|
||||
and debug purposes. It would be more appropriate to use programs
|
||||
such as udev or HAL to initialize a device or start a user-mode
|
||||
helper program, for instance.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1>
|
||||
|
@ -358,7 +358,8 @@ Here is a list of some of the different kernel trees available:
|
||||
quilt trees:
|
||||
- USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman <gregkh@suse.de>
|
||||
kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
|
||||
|
||||
- x86-64, partly i386, Andi Kleen <ak@suse.de>
|
||||
ftp.firstfloor.org:/pub/ak/x86_64/quilt/
|
||||
|
||||
Bug Reporting
|
||||
-------------
|
||||
|
@ -2543,6 +2543,9 @@ Your cooperation is appreciated.
|
||||
64 = /dev/usb/rio500 Diamond Rio 500
|
||||
65 = /dev/usb/usblcd USBLCD Interface (info@usblcd.de)
|
||||
66 = /dev/usb/cpad0 Synaptics cPad (mouse/LCD)
|
||||
67 = /dev/usb/adutux0 1st Ontrak ADU device
|
||||
...
|
||||
76 = /dev/usb/adutux10 10th Ontrak ADU device
|
||||
96 = /dev/usb/hiddev0 1st USB HID device
|
||||
...
|
||||
111 = /dev/usb/hiddev15 16th USB HID device
|
||||
|
@ -6,6 +6,21 @@ be removed from this file.
|
||||
|
||||
---------------------------
|
||||
|
||||
What: /sys/devices/.../power/state
|
||||
dev->power.power_state
|
||||
dpm_runtime_{suspend,resume)()
|
||||
When: July 2007
|
||||
Why: Broken design for runtime control over driver power states, confusing
|
||||
driver-internal runtime power management with: mechanisms to support
|
||||
system-wide sleep state transitions; event codes that distinguish
|
||||
different phases of swsusp "sleep" transitions; and userspace policy
|
||||
inputs. This framework was never widely used, and most attempts to
|
||||
use it were broken. Drivers should instead be exposing domain-specific
|
||||
interfaces either to kernel or to userspace.
|
||||
Who: Pavel Machek <pavel@suse.cz>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: RAW driver (CONFIG_RAW_DRIVER)
|
||||
When: December 2005
|
||||
Why: declared obsolete since kernel 2.6.3
|
||||
@ -55,6 +70,18 @@ Who: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: sys_sysctl
|
||||
When: January 2007
|
||||
Why: The same information is available through /proc/sys and that is the
|
||||
interface user space prefers to use. And there do not appear to be
|
||||
any existing user in user space of sys_sysctl. The additional
|
||||
maintenance overhead of keeping a set of binary names gets
|
||||
in the way of doing a good job of maintaining this interface.
|
||||
|
||||
Who: Eric Biederman <ebiederm@xmission.com>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
|
||||
When: November 2005
|
||||
Files: drivers/pcmcia/: pcmcia_ioctl.c
|
||||
@ -202,14 +229,6 @@ Who: Nick Piggin <npiggin@suse.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: Support for the MIPS EV96100 evaluation board
|
||||
When: September 2006
|
||||
Why: Does no longer build since at least November 15, 2003, apparently
|
||||
no userbase left.
|
||||
Who: Ralf Baechle <ralf@linux-mips.org>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: Support for the Momentum / PMC-Sierra Jaguar ATX evaluation board
|
||||
When: September 2006
|
||||
Why: Does no longer build since quite some time, and was never popular,
|
||||
@ -294,3 +313,24 @@ Why: The frame diverter is included in most distribution kernels, but is
|
||||
It is not clear if anyone is still using it.
|
||||
Who: Stephen Hemminger <shemminger@osdl.org>
|
||||
|
||||
---------------------------
|
||||
|
||||
|
||||
What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment
|
||||
When: Oktober 2008
|
||||
Why: The stacking of class devices makes these values misleading and
|
||||
inconsistent.
|
||||
Class devices should not carry any of these properties, and bus
|
||||
devices have SUBSYTEM and DRIVER as a replacement.
|
||||
Who: Kay Sievers <kay.sievers@suse.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: i2c-isa
|
||||
When: December 2006
|
||||
Why: i2c-isa is a non-sense and doesn't fit in the device driver
|
||||
model. Drivers relying on it are better implemented as platform
|
||||
drivers.
|
||||
Who: Jean Delvare <khali@linux-fr.org>
|
||||
|
||||
---------------------------
|
||||
|
@ -1124,11 +1124,15 @@ debugging information is displayed on console.
|
||||
NMI switch that most IA32 servers have fires unknown NMI up, for example.
|
||||
If a system hangs up, try pressing the NMI switch.
|
||||
|
||||
[NOTE]
|
||||
This function and oprofile share a NMI callback. Therefore this function
|
||||
cannot be enabled when oprofile is activated.
|
||||
And NMI watchdog will be disabled when the value in this file is set to
|
||||
non-zero.
|
||||
nmi_watchdog
|
||||
------------
|
||||
|
||||
Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
|
||||
the NMI watchdog is enabled and will continuously test all online cpus to
|
||||
determine whether or not they are still functioning properly.
|
||||
|
||||
Because the NMI watchdog shares registers with oprofile, by disabling the NMI
|
||||
watchdog, oprofile may have more registers to utilize.
|
||||
|
||||
|
||||
2.4 /proc/sys/vm - The virtual memory subsystem
|
||||
|
@ -7,9 +7,12 @@ Supported adapters:
|
||||
* VIA Technologies, Inc. VT82C686A/B
|
||||
Datasheet: Sometimes available at the VIA website
|
||||
|
||||
* VIA Technologies, Inc. VT8231, VT8233, VT8233A, VT8235, VT8237R
|
||||
* VIA Technologies, Inc. VT8231, VT8233, VT8233A
|
||||
Datasheet: available on request from VIA
|
||||
|
||||
* VIA Technologies, Inc. VT8235, VT8237R, VT8237A, VT8251
|
||||
Datasheet: available on request and under NDA from VIA
|
||||
|
||||
Authors:
|
||||
Kyösti Mälkki <kmalkki@cc.hut.fi>,
|
||||
Mark D. Studebaker <mdsxyz123@yahoo.com>,
|
||||
@ -39,6 +42,8 @@ Your lspci -n listing must show one of these :
|
||||
device 1106:8235 (VT8231 function 4)
|
||||
device 1106:3177 (VT8235)
|
||||
device 1106:3227 (VT8237R)
|
||||
device 1106:3337 (VT8237A)
|
||||
device 1106:3287 (VT8251)
|
||||
|
||||
If none of these show up, you should look in the BIOS for settings like
|
||||
enable ACPI / SMBus or even USB.
|
||||
|
@ -6,9 +6,12 @@ This module is a very simple fake I2C/SMBus driver. It implements four
|
||||
types of SMBus commands: write quick, (r/w) byte, (r/w) byte data, and
|
||||
(r/w) word data.
|
||||
|
||||
You need to provide a chip address as a module parameter when loading
|
||||
this driver, which will then only react to SMBus commands to this address.
|
||||
|
||||
No hardware is needed nor associated with this module. It will accept write
|
||||
quick commands to all addresses; it will respond to the other commands (also
|
||||
to all addresses) by reading from or writing to an array in memory. It will
|
||||
quick commands to one address; it will respond to the other commands (also
|
||||
to one address) by reading from or writing to an array in memory. It will
|
||||
also spam the kernel logs for every command it handles.
|
||||
|
||||
A pointer register with auto-increment is implemented for all byte
|
||||
@ -21,6 +24,11 @@ The typical use-case is like this:
|
||||
3. load the target sensors chip driver module
|
||||
4. observe its behavior in the kernel log
|
||||
|
||||
PARAMETERS:
|
||||
|
||||
int chip_addr:
|
||||
The SMBus address to emulate a chip at.
|
||||
|
||||
CAVEATS:
|
||||
|
||||
There are independent arrays for byte/data and word/data commands. Depending
|
||||
@ -33,6 +41,9 @@ If the hardware for your driver has banked registers (e.g. Winbond sensors
|
||||
chips) this module will not work well - although it could be extended to
|
||||
support that pretty easily.
|
||||
|
||||
Only one chip address is supported - although this module could be
|
||||
extended to support more.
|
||||
|
||||
If you spam it hard enough, printk can be lossy. This module really wants
|
||||
something like relayfs.
|
||||
|
||||
|
@ -421,6 +421,11 @@ more details, with real examples.
|
||||
The second argument is optional, and if supplied will be used
|
||||
if first argument is not supported.
|
||||
|
||||
as-instr
|
||||
as-instr checks if the assembler reports a specific instruction
|
||||
and then outputs either option1 or option2
|
||||
C escapes are supported in the test instruction
|
||||
|
||||
cc-option
|
||||
cc-option is used to check if $(CC) supports a given option, and not
|
||||
supported to use an optional second option.
|
||||
|
@ -573,8 +573,6 @@ running once the system is up.
|
||||
gscd= [HW,CD]
|
||||
Format: <io>
|
||||
|
||||
gt96100eth= [NET] MIPS GT96100 Advanced Communication Controller
|
||||
|
||||
gus= [HW,OSS]
|
||||
Format: <io>,<irq>,<dma>,<dma16>
|
||||
|
||||
@ -1240,7 +1238,11 @@ running once the system is up.
|
||||
bootloader. This is currently used on
|
||||
IXP2000 systems where the bus has to be
|
||||
configured a certain way for adjunct CPUs.
|
||||
|
||||
noearly [X86] Don't do any early type 1 scanning.
|
||||
This might help on some broken boards which
|
||||
machine check when some devices' config space
|
||||
is read. But various workarounds are disabled
|
||||
and some IOMMU drivers will not work.
|
||||
pcmv= [HW,PCMCIA] BadgePAD 4
|
||||
|
||||
pd. [PARIDE]
|
||||
@ -1368,6 +1370,9 @@ running once the system is up.
|
||||
Reserves a hole at the top of the kernel virtual
|
||||
address space.
|
||||
|
||||
reset_devices [KNL] Force drivers to reset the underlying device
|
||||
during initialization.
|
||||
|
||||
resume= [SWSUSP]
|
||||
Specify the partition device for software suspend
|
||||
|
||||
|
@ -116,6 +116,9 @@ FURTHER NOTES ON NO-MMU MMAP
|
||||
(*) A list of all the mappings on the system is visible through /proc/maps in
|
||||
no-MMU mode.
|
||||
|
||||
(*) A list of all the mappings in use by a process is visible through
|
||||
/proc/<pid>/maps in no-MMU mode.
|
||||
|
||||
(*) Supplying MAP_FIXED or a requesting a particular mapping address will
|
||||
result in an error.
|
||||
|
||||
@ -125,6 +128,49 @@ FURTHER NOTES ON NO-MMU MMAP
|
||||
error will result if they don't. This is most likely to be encountered
|
||||
with character device files, pipes, fifos and sockets.
|
||||
|
||||
|
||||
==========================
|
||||
INTERPROCESS SHARED MEMORY
|
||||
==========================
|
||||
|
||||
Both SYSV IPC SHM shared memory and POSIX shared memory is supported in NOMMU
|
||||
mode. The former through the usual mechanism, the latter through files created
|
||||
on ramfs or tmpfs mounts.
|
||||
|
||||
|
||||
=======
|
||||
FUTEXES
|
||||
=======
|
||||
|
||||
Futexes are supported in NOMMU mode if the arch supports them. An error will
|
||||
be given if an address passed to the futex system call lies outside the
|
||||
mappings made by a process or if the mapping in which the address lies does not
|
||||
support futexes (such as an I/O chardev mapping).
|
||||
|
||||
|
||||
=============
|
||||
NO-MMU MREMAP
|
||||
=============
|
||||
|
||||
The mremap() function is partially supported. It may change the size of a
|
||||
mapping, and may move it[*] if MREMAP_MAYMOVE is specified and if the new size
|
||||
of the mapping exceeds the size of the slab object currently occupied by the
|
||||
memory to which the mapping refers, or if a smaller slab object could be used.
|
||||
|
||||
MREMAP_FIXED is not supported, though it is ignored if there's no change of
|
||||
address and the object does not need to be moved.
|
||||
|
||||
Shared mappings may not be moved. Shareable mappings may not be moved either,
|
||||
even if they are not currently shared.
|
||||
|
||||
The mremap() function must be given an exact match for base address and size of
|
||||
a previously mapped object. It may not be used to create holes in existing
|
||||
mappings, move parts of existing mappings or resize parts of mappings. It must
|
||||
act on a complete mapping.
|
||||
|
||||
[*] Not currently supported.
|
||||
|
||||
|
||||
============================================
|
||||
PROVIDING SHAREABLE CHARACTER DEVICE SUPPORT
|
||||
============================================
|
||||
|
253
Documentation/pcieaer-howto.txt
Normal file
253
Documentation/pcieaer-howto.txt
Normal file
@ -0,0 +1,253 @@
|
||||
The PCI Express Advanced Error Reporting Driver Guide HOWTO
|
||||
T. Long Nguyen <tom.l.nguyen@intel.com>
|
||||
Yanmin Zhang <yanmin.zhang@intel.com>
|
||||
07/29/2006
|
||||
|
||||
|
||||
1. Overview
|
||||
|
||||
1.1 About this guide
|
||||
|
||||
This guide describes the basics of the PCI Express Advanced Error
|
||||
Reporting (AER) driver and provides information on how to use it, as
|
||||
well as how to enable the drivers of endpoint devices to conform with
|
||||
PCI Express AER driver.
|
||||
|
||||
1.2 Copyright © Intel Corporation 2006.
|
||||
|
||||
1.3 What is the PCI Express AER Driver?
|
||||
|
||||
PCI Express error signaling can occur on the PCI Express link itself
|
||||
or on behalf of transactions initiated on the link. PCI Express
|
||||
defines two error reporting paradigms: the baseline capability and
|
||||
the Advanced Error Reporting capability. The baseline capability is
|
||||
required of all PCI Express components providing a minimum defined
|
||||
set of error reporting requirements. Advanced Error Reporting
|
||||
capability is implemented with a PCI Express advanced error reporting
|
||||
extended capability structure providing more robust error reporting.
|
||||
|
||||
The PCI Express AER driver provides the infrastructure to support PCI
|
||||
Express Advanced Error Reporting capability. The PCI Express AER
|
||||
driver provides three basic functions:
|
||||
|
||||
- Gathers the comprehensive error information if errors occurred.
|
||||
- Reports error to the users.
|
||||
- Performs error recovery actions.
|
||||
|
||||
AER driver only attaches root ports which support PCI-Express AER
|
||||
capability.
|
||||
|
||||
|
||||
2. User Guide
|
||||
|
||||
2.1 Include the PCI Express AER Root Driver into the Linux Kernel
|
||||
|
||||
The PCI Express AER Root driver is a Root Port service driver attached
|
||||
to the PCI Express Port Bus driver. If a user wants to use it, the driver
|
||||
has to be compiled. Option CONFIG_PCIEAER supports this capability. It
|
||||
depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and
|
||||
CONFIG_PCIEAER = y.
|
||||
|
||||
2.2 Load PCI Express AER Root Driver
|
||||
There is a case where a system has AER support in BIOS. Enabling the AER
|
||||
Root driver and having AER support in BIOS may result unpredictable
|
||||
behavior. To avoid this conflict, a successful load of the AER Root driver
|
||||
requires ACPI _OSC support in the BIOS to allow the AER Root driver to
|
||||
request for native control of AER. See the PCI FW 3.0 Specification for
|
||||
details regarding OSC usage. Currently, lots of firmwares don't provide
|
||||
_OSC support while they use PCI Express. To support such firmwares,
|
||||
forceload, a parameter of type bool, could enable AER to continue to
|
||||
be initiated although firmwares have no _OSC support. To enable the
|
||||
walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line
|
||||
when booting kernel. Note that forceload=n by default.
|
||||
|
||||
2.3 AER error output
|
||||
When a PCI-E AER error is captured, an error message will be outputed to
|
||||
console. If it's a correctable error, it is outputed as a warning.
|
||||
Otherwise, it is printed as an error. So users could choose different
|
||||
log level to filter out correctable error messages.
|
||||
|
||||
Below shows an example.
|
||||
+------ PCI-Express Device Error -----+
|
||||
Error Severity : Uncorrected (Fatal)
|
||||
PCIE Bus Error type : Transaction Layer
|
||||
Unsupported Request : First
|
||||
Requester ID : 0500
|
||||
VendorID=8086h, DeviceID=0329h, Bus=05h, Device=00h, Function=00h
|
||||
TLB Header:
|
||||
04000001 00200a03 05010000 00050100
|
||||
|
||||
In the example, 'Requester ID' means the ID of the device who sends
|
||||
the error message to root port. Pls. refer to pci express specs for
|
||||
other fields.
|
||||
|
||||
|
||||
3. Developer Guide
|
||||
|
||||
To enable AER aware support requires a software driver to configure
|
||||
the AER capability structure within its device and to provide callbacks.
|
||||
|
||||
To support AER better, developers need understand how AER does work
|
||||
firstly.
|
||||
|
||||
PCI Express errors are classified into two types: correctable errors
|
||||
and uncorrectable errors. This classification is based on the impacts
|
||||
of those errors, which may result in degraded performance or function
|
||||
failure.
|
||||
|
||||
Correctable errors pose no impacts on the functionality of the
|
||||
interface. The PCI Express protocol can recover without any software
|
||||
intervention or any loss of data. These errors are detected and
|
||||
corrected by hardware. Unlike correctable errors, uncorrectable
|
||||
errors impact functionality of the interface. Uncorrectable errors
|
||||
can cause a particular transaction or a particular PCI Express link
|
||||
to be unreliable. Depending on those error conditions, uncorrectable
|
||||
errors are further classified into non-fatal errors and fatal errors.
|
||||
Non-fatal errors cause the particular transaction to be unreliable,
|
||||
but the PCI Express link itself is fully functional. Fatal errors, on
|
||||
the other hand, cause the link to be unreliable.
|
||||
|
||||
When AER is enabled, a PCI Express device will automatically send an
|
||||
error message to the PCIE root port above it when the device captures
|
||||
an error. The Root Port, upon receiving an error reporting message,
|
||||
internally processes and logs the error message in its PCI Express
|
||||
capability structure. Error information being logged includes storing
|
||||
the error reporting agent's requestor ID into the Error Source
|
||||
Identification Registers and setting the error bits of the Root Error
|
||||
Status Register accordingly. If AER error reporting is enabled in Root
|
||||
Error Command Register, the Root Port generates an interrupt if an
|
||||
error is detected.
|
||||
|
||||
Note that the errors as described above are related to the PCI Express
|
||||
hierarchy and links. These errors do not include any device specific
|
||||
errors because device specific errors will still get sent directly to
|
||||
the device driver.
|
||||
|
||||
3.1 Configure the AER capability structure
|
||||
|
||||
AER aware drivers of PCI Express component need change the device
|
||||
control registers to enable AER. They also could change AER registers,
|
||||
including mask and severity registers. Helper function
|
||||
pci_enable_pcie_error_reporting could be used to enable AER. See
|
||||
section 3.3.
|
||||
|
||||
3.2. Provide callbacks
|
||||
|
||||
3.2.1 callback reset_link to reset pci express link
|
||||
|
||||
This callback is used to reset the pci express physical link when a
|
||||
fatal error happens. The root port aer service driver provides a
|
||||
default reset_link function, but different upstream ports might
|
||||
have different specifications to reset pci express link, so all
|
||||
upstream ports should provide their own reset_link functions.
|
||||
|
||||
In struct pcie_port_service_driver, a new pointer, reset_link, is
|
||||
added.
|
||||
|
||||
pci_ers_result_t (*reset_link) (struct pci_dev *dev);
|
||||
|
||||
Section 3.2.2.2 provides more detailed info on when to call
|
||||
reset_link.
|
||||
|
||||
3.2.2 PCI error-recovery callbacks
|
||||
|
||||
The PCI Express AER Root driver uses error callbacks to coordinate
|
||||
with downstream device drivers associated with a hierarchy in question
|
||||
when performing error recovery actions.
|
||||
|
||||
Data struct pci_driver has a pointer, err_handler, to point to
|
||||
pci_error_handlers who consists of a couple of callback function
|
||||
pointers. AER driver follows the rules defined in
|
||||
pci-error-recovery.txt except pci express specific parts (e.g.
|
||||
reset_link). Pls. refer to pci-error-recovery.txt for detailed
|
||||
definitions of the callbacks.
|
||||
|
||||
Below sections specify when to call the error callback functions.
|
||||
|
||||
3.2.2.1 Correctable errors
|
||||
|
||||
Correctable errors pose no impacts on the functionality of
|
||||
the interface. The PCI Express protocol can recover without any
|
||||
software intervention or any loss of data. These errors do not
|
||||
require any recovery actions. The AER driver clears the device's
|
||||
correctable error status register accordingly and logs these errors.
|
||||
|
||||
3.2.2.2 Non-correctable (non-fatal and fatal) errors
|
||||
|
||||
If an error message indicates a non-fatal error, performing link reset
|
||||
at upstream is not required. The AER driver calls error_detected(dev,
|
||||
pci_channel_io_normal) to all drivers associated within a hierarchy in
|
||||
question. for example,
|
||||
EndPoint<==>DownstreamPort B<==>UpstreamPort A<==>RootPort.
|
||||
If Upstream port A captures an AER error, the hierarchy consists of
|
||||
Downstream port B and EndPoint.
|
||||
|
||||
A driver may return PCI_ERS_RESULT_CAN_RECOVER,
|
||||
PCI_ERS_RESULT_DISCONNECT, or PCI_ERS_RESULT_NEED_RESET, depending on
|
||||
whether it can recover or the AER driver calls mmio_enabled as next.
|
||||
|
||||
If an error message indicates a fatal error, kernel will broadcast
|
||||
error_detected(dev, pci_channel_io_frozen) to all drivers within
|
||||
a hierarchy in question. Then, performing link reset at upstream is
|
||||
necessary. As different kinds of devices might use different approaches
|
||||
to reset link, AER port service driver is required to provide the
|
||||
function to reset link. Firstly, kernel looks for if the upstream
|
||||
component has an aer driver. If it has, kernel uses the reset_link
|
||||
callback of the aer driver. If the upstream component has no aer driver
|
||||
and the port is downstream port, we will use the aer driver of the
|
||||
root port who reports the AER error. As for upstream ports,
|
||||
they should provide their own aer service drivers with reset_link
|
||||
function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and
|
||||
reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes
|
||||
to mmio_enabled.
|
||||
|
||||
3.3 helper functions
|
||||
|
||||
3.3.1 int pci_find_aer_capability(struct pci_dev *dev);
|
||||
pci_find_aer_capability locates the PCI Express AER capability
|
||||
in the device configuration space. If the device doesn't support
|
||||
PCI-Express AER, the function returns 0.
|
||||
|
||||
3.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
|
||||
pci_enable_pcie_error_reporting enables the device to send error
|
||||
messages to root port when an error is detected. Note that devices
|
||||
don't enable the error reporting by default, so device drivers need
|
||||
call this function to enable it.
|
||||
|
||||
3.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
|
||||
pci_disable_pcie_error_reporting disables the device to send error
|
||||
messages to root port when an error is detected.
|
||||
|
||||
3.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
|
||||
pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
|
||||
error status register.
|
||||
|
||||
3.4 Frequent Asked Questions
|
||||
|
||||
Q: What happens if a PCI Express device driver does not provide an
|
||||
error recovery handler (pci_driver->err_handler is equal to NULL)?
|
||||
|
||||
A: The devices attached with the driver won't be recovered. If the
|
||||
error is fatal, kernel will print out warning messages. Please refer
|
||||
to section 3 for more information.
|
||||
|
||||
Q: What happens if an upstream port service driver does not provide
|
||||
callback reset_link?
|
||||
|
||||
A: Fatal error recovery will fail if the errors are reported by the
|
||||
upstream ports who are attached by the service driver.
|
||||
|
||||
Q: How does this infrastructure deal with driver that is not PCI
|
||||
Express aware?
|
||||
|
||||
A: This infrastructure calls the error callback functions of the
|
||||
driver when an error happens. But if the driver is not aware of
|
||||
PCI Express, the device might not report its own errors to root
|
||||
port.
|
||||
|
||||
Q: What modifications will that driver need to make it compatible
|
||||
with the PCI Express AER Root driver?
|
||||
|
||||
A: It could call the helper functions to enable AER in devices and
|
||||
cleanup uncorrectable status register. Pls. refer to section 3.3.
|
||||
|
@ -1,208 +1,553 @@
|
||||
Most of the code in Linux is device drivers, so most of the Linux power
|
||||
management code is also driver-specific. Most drivers will do very little;
|
||||
others, especially for platforms with small batteries (like cell phones),
|
||||
will do a lot.
|
||||
|
||||
Device Power Management
|
||||
This writeup gives an overview of how drivers interact with system-wide
|
||||
power management goals, emphasizing the models and interfaces that are
|
||||
shared by everything that hooks up to the driver model core. Read it as
|
||||
background for the domain-specific work you'd do with any specific driver.
|
||||
|
||||
|
||||
Device power management encompasses two areas - the ability to save
|
||||
state and transition a device to a low-power state when the system is
|
||||
entering a low-power state; and the ability to transition a device to
|
||||
a low-power state while the system is running (and independently of
|
||||
any other power management activity).
|
||||
Two Models for Device Power Management
|
||||
======================================
|
||||
Drivers will use one or both of these models to put devices into low-power
|
||||
states:
|
||||
|
||||
System Sleep model:
|
||||
Drivers can enter low power states as part of entering system-wide
|
||||
low-power states like "suspend-to-ram", or (mostly for systems with
|
||||
disks) "hibernate" (suspend-to-disk).
|
||||
|
||||
This is something that device, bus, and class drivers collaborate on
|
||||
by implementing various role-specific suspend and resume methods to
|
||||
cleanly power down hardware and software subsystems, then reactivate
|
||||
them without loss of data.
|
||||
|
||||
Some drivers can manage hardware wakeup events, which make the system
|
||||
leave that low-power state. This feature may be disabled using the
|
||||
relevant /sys/devices/.../power/wakeup file; enabling it may cost some
|
||||
power usage, but let the whole system enter low power states more often.
|
||||
|
||||
Runtime Power Management model:
|
||||
Drivers may also enter low power states while the system is running,
|
||||
independently of other power management activity. Upstream drivers
|
||||
will normally not know (or care) if the device is in some low power
|
||||
state when issuing requests; the driver will auto-resume anything
|
||||
that's needed when it gets a request.
|
||||
|
||||
This doesn't have, or need much infrastructure; it's just something you
|
||||
should do when writing your drivers. For example, clk_disable() unused
|
||||
clocks as part of minimizing power drain for currently-unused hardware.
|
||||
Of course, sometimes clusters of drivers will collaborate with each
|
||||
other, which could involve task-specific power management.
|
||||
|
||||
There's not a lot to be said about those low power states except that they
|
||||
are very system-specific, and often device-specific. Also, that if enough
|
||||
drivers put themselves into low power states (at "runtime"), the effect may be
|
||||
the same as entering some system-wide low-power state (system sleep) ... and
|
||||
that synergies exist, so that several drivers using runtime pm might put the
|
||||
system into a state where even deeper power saving options are available.
|
||||
|
||||
Most suspended devices will have quiesced all I/O: no more DMA or irqs, no
|
||||
more data read or written, and requests from upstream drivers are no longer
|
||||
accepted. A given bus or platform may have different requirements though.
|
||||
|
||||
Examples of hardware wakeup events include an alarm from a real time clock,
|
||||
network wake-on-LAN packets, keyboard or mouse activity, and media insertion
|
||||
or removal (for PCMCIA, MMC/SD, USB, and so on).
|
||||
|
||||
|
||||
Methods
|
||||
Interfaces for Entering System Sleep States
|
||||
===========================================
|
||||
Most of the programming interfaces a device driver needs to know about
|
||||
relate to that first model: entering a system-wide low power state,
|
||||
rather than just minimizing power consumption by one device.
|
||||
|
||||
The methods to suspend and resume devices reside in struct bus_type:
|
||||
|
||||
Bus Driver Methods
|
||||
------------------
|
||||
The core methods to suspend and resume devices reside in struct bus_type.
|
||||
These are mostly of interest to people writing infrastructure for busses
|
||||
like PCI or USB, or because they define the primitives that device drivers
|
||||
may need to apply in domain-specific ways to their devices:
|
||||
|
||||
struct bus_type {
|
||||
...
|
||||
int (*suspend)(struct device * dev, pm_message_t state);
|
||||
int (*resume)(struct device * dev);
|
||||
...
|
||||
int (*suspend)(struct device *dev, pm_message_t state);
|
||||
int (*suspend_late)(struct device *dev, pm_message_t state);
|
||||
|
||||
int (*resume_early)(struct device *dev);
|
||||
int (*resume)(struct device *dev);
|
||||
};
|
||||
|
||||
Each bus driver is responsible implementing these methods, translating
|
||||
the call into a bus-specific request and forwarding the call to the
|
||||
bus-specific drivers. For example, PCI drivers implement suspend() and
|
||||
resume() methods in struct pci_driver. The PCI core is simply
|
||||
responsible for translating the pointers to PCI-specific ones and
|
||||
calling the low-level driver.
|
||||
Bus drivers implement those methods as appropriate for the hardware and
|
||||
the drivers using it; PCI works differently from USB, and so on. Not many
|
||||
people write bus drivers; most driver code is a "device driver" that
|
||||
builds on top of bus-specific framework code.
|
||||
|
||||
This is done to a) ease transition to the new power management methods
|
||||
and leverage the existing PM code in various bus drivers; b) allow
|
||||
buses to implement generic and default PM routines for devices, and c)
|
||||
make the flow of execution obvious to the reader.
|
||||
For more information on these driver calls, see the description later;
|
||||
they are called in phases for every device, respecting the parent-child
|
||||
sequencing in the driver model tree. Note that as this is being written,
|
||||
only the suspend() and resume() are widely available; not many bus drivers
|
||||
leverage all of those phases, or pass them down to lower driver levels.
|
||||
|
||||
|
||||
System Power Management
|
||||
/sys/devices/.../power/wakeup files
|
||||
-----------------------------------
|
||||
All devices in the driver model have two flags to control handling of
|
||||
wakeup events, which are hardware signals that can force the device and/or
|
||||
system out of a low power state. These are initialized by bus or device
|
||||
driver code using device_init_wakeup(dev,can_wakeup).
|
||||
|
||||
When the system enters a low-power state, the device tree is walked in
|
||||
a depth-first fashion to transition each device into a low-power
|
||||
state. The ordering of the device tree is guaranteed by the order in
|
||||
which devices get registered - children are never registered before
|
||||
their ancestors, and devices are placed at the back of the list when
|
||||
registered. By walking the list in reverse order, we are guaranteed to
|
||||
suspend devices in the proper order.
|
||||
The "can_wakeup" flag just records whether the device (and its driver) can
|
||||
physically support wakeup events. When that flag is clear, the sysfs
|
||||
"wakeup" file is empty, and device_may_wakeup() returns false.
|
||||
|
||||
Devices are suspended once with interrupts enabled. Drivers are
|
||||
expected to stop I/O transactions, save device state, and place the
|
||||
device into a low-power state. Drivers may sleep, allocate memory,
|
||||
etc. at will.
|
||||
For devices that can issue wakeup events, a separate flag controls whether
|
||||
that device should try to use its wakeup mechanism. The initial value of
|
||||
device_may_wakeup() will be true, so that the device's "wakeup" file holds
|
||||
the value "enabled". Userspace can change that to "disabled" so that
|
||||
device_may_wakeup() returns false; or change it back to "enabled" (so that
|
||||
it returns true again).
|
||||
|
||||
Some devices are broken and will inevitably have problems powering
|
||||
down or disabling themselves with interrupts enabled. For these
|
||||
special cases, they may return -EAGAIN. This will put the device on a
|
||||
list to be taken care of later. When interrupts are disabled, before
|
||||
we enter the low-power state, their drivers are called again to put
|
||||
their device to sleep.
|
||||
|
||||
On resume, the devices that returned -EAGAIN will be called to power
|
||||
themselves back on with interrupts disabled. Once interrupts have been
|
||||
re-enabled, the rest of the drivers will be called to resume their
|
||||
devices. On resume, a driver is responsible for powering back on each
|
||||
device, restoring state, and re-enabling I/O transactions for that
|
||||
device.
|
||||
EXAMPLE: PCI Device Driver Methods
|
||||
-----------------------------------
|
||||
PCI framework software calls these methods when the PCI device driver bound
|
||||
to a device device has provided them:
|
||||
|
||||
struct pci_driver {
|
||||
...
|
||||
int (*suspend)(struct pci_device *pdev, pm_message_t state);
|
||||
int (*suspend_late)(struct pci_device *pdev, pm_message_t state);
|
||||
|
||||
int (*resume_early)(struct pci_device *pdev);
|
||||
int (*resume)(struct pci_device *pdev);
|
||||
};
|
||||
|
||||
Drivers will implement those methods, and call PCI-specific procedures
|
||||
like pci_set_power_state(), pci_enable_wake(), pci_save_state(), and
|
||||
pci_restore_state() to manage PCI-specific mechanisms. (PCI config space
|
||||
could be saved during driver probe, if it weren't for the fact that some
|
||||
systems rely on userspace tweaking using setpci.) Devices are suspended
|
||||
before their bridges enter low power states, and likewise bridges resume
|
||||
before their devices.
|
||||
|
||||
|
||||
Upper Layers of Driver Stacks
|
||||
-----------------------------
|
||||
Device drivers generally have at least two interfaces, and the methods
|
||||
sketched above are the ones which apply to the lower level (nearer PCI, USB,
|
||||
or other bus hardware). The network and block layers are examples of upper
|
||||
level interfaces, as is a character device talking to userspace.
|
||||
|
||||
Power management requests normally need to flow through those upper levels,
|
||||
which often use domain-oriented requests like "blank that screen". In
|
||||
some cases those upper levels will have power management intelligence that
|
||||
relates to end-user activity, or other devices that work in cooperation.
|
||||
|
||||
When those interfaces are structured using class interfaces, there is a
|
||||
standard way to have the upper layer stop issuing requests to a given
|
||||
class device (and restart later):
|
||||
|
||||
struct class {
|
||||
...
|
||||
int (*suspend)(struct device *dev, pm_message_t state);
|
||||
int (*resume)(struct device *dev);
|
||||
};
|
||||
|
||||
Those calls are issued in specific phases of the process by which the
|
||||
system enters a low power "suspend" state, or resumes from it.
|
||||
|
||||
|
||||
Calling Drivers to Enter System Sleep States
|
||||
============================================
|
||||
When the system enters a low power state, each device's driver is asked
|
||||
to suspend the device by putting it into state compatible with the target
|
||||
system state. That's usually some version of "off", but the details are
|
||||
system-specific. Also, wakeup-enabled devices will usually stay partly
|
||||
functional in order to wake the system.
|
||||
|
||||
When the system leaves that low power state, the device's driver is asked
|
||||
to resume it. The suspend and resume operations always go together, and
|
||||
both are multi-phase operations.
|
||||
|
||||
For simple drivers, suspend might quiesce the device using the class code
|
||||
and then turn its hardware as "off" as possible with late_suspend. The
|
||||
matching resume calls would then completely reinitialize the hardware
|
||||
before reactivating its class I/O queues.
|
||||
|
||||
More power-aware drivers drivers will use more than one device low power
|
||||
state, either at runtime or during system sleep states, and might trigger
|
||||
system wakeup events.
|
||||
|
||||
|
||||
Call Sequence Guarantees
|
||||
------------------------
|
||||
To ensure that bridges and similar links needed to talk to a device are
|
||||
available when the device is suspended or resumed, the device tree is
|
||||
walked in a bottom-up order to suspend devices. A top-down order is
|
||||
used to resume those devices.
|
||||
|
||||
The ordering of the device tree is defined by the order in which devices
|
||||
get registered: a child can never be registered, probed or resumed before
|
||||
its parent; and can't be removed or suspended after that parent.
|
||||
|
||||
The policy is that the device tree should match hardware bus topology.
|
||||
(Or at least the control bus, for devices which use multiple busses.)
|
||||
|
||||
|
||||
Suspending Devices
|
||||
------------------
|
||||
Suspending a given device is done in several phases. Suspending the
|
||||
system always includes every phase, executing calls for every device
|
||||
before the next phase begins. Not all busses or classes support all
|
||||
these callbacks; and not all drivers use all the callbacks.
|
||||
|
||||
The phases are seen by driver notifications issued in this order:
|
||||
|
||||
1 class.suspend(dev, message) is called after tasks are frozen, for
|
||||
devices associated with a class that has such a method. This
|
||||
method may sleep.
|
||||
|
||||
Since I/O activity usually comes from such higher layers, this is
|
||||
a good place to quiesce all drivers of a given type (and keep such
|
||||
code out of those drivers).
|
||||
|
||||
2 bus.suspend(dev, message) is called next. This method may sleep,
|
||||
and is often morphed into a device driver call with bus-specific
|
||||
parameters and/or rules.
|
||||
|
||||
This call should handle parts of device suspend logic that require
|
||||
sleeping. It probably does work to quiesce the device which hasn't
|
||||
been abstracted into class.suspend() or bus.suspend_late().
|
||||
|
||||
3 bus.suspend_late(dev, message) is called with IRQs disabled, and
|
||||
with only one CPU active. Until the bus.resume_early() phase
|
||||
completes (see later), IRQs are not enabled again. This method
|
||||
won't be exposed by all busses; for message based busses like USB,
|
||||
I2C, or SPI, device interactions normally require IRQs. This bus
|
||||
call may be morphed into a driver call with bus-specific parameters.
|
||||
|
||||
This call might save low level hardware state that might otherwise
|
||||
be lost in the upcoming low power state, and actually put the
|
||||
device into a low power state ... so that in some cases the device
|
||||
may stay partly usable until this late. This "late" call may also
|
||||
help when coping with hardware that behaves badly.
|
||||
|
||||
The pm_message_t parameter is currently used to refine those semantics
|
||||
(described later).
|
||||
|
||||
At the end of those phases, drivers should normally have stopped all I/O
|
||||
transactions (DMA, IRQs), saved enough state that they can re-initialize
|
||||
or restore previous state (as needed by the hardware), and placed the
|
||||
device into a low-power state. On many platforms they will also use
|
||||
clk_disable() to gate off one or more clock sources; sometimes they will
|
||||
also switch off power supplies, or reduce voltages. Drivers which have
|
||||
runtime PM support may already have performed some or all of the steps
|
||||
needed to prepare for the upcoming system sleep state.
|
||||
|
||||
When any driver sees that its device_can_wakeup(dev), it should make sure
|
||||
to use the relevant hardware signals to trigger a system wakeup event.
|
||||
For example, enable_irq_wake() might identify GPIO signals hooked up to
|
||||
a switch or other external hardware, and pci_enable_wake() does something
|
||||
similar for PCI's PME# signal.
|
||||
|
||||
If a driver (or bus, or class) fails it suspend method, the system won't
|
||||
enter the desired low power state; it will resume all the devices it's
|
||||
suspended so far.
|
||||
|
||||
Note that drivers may need to perform different actions based on the target
|
||||
system lowpower/sleep state. At this writing, there are only platform
|
||||
specific APIs through which drivers could determine those target states.
|
||||
|
||||
|
||||
Device Low Power (suspend) States
|
||||
---------------------------------
|
||||
Device low-power states aren't very standard. One device might only handle
|
||||
"on" and "off, while another might support a dozen different versions of
|
||||
"on" (how many engines are active?), plus a state that gets back to "on"
|
||||
faster than from a full "off".
|
||||
|
||||
Some busses define rules about what different suspend states mean. PCI
|
||||
gives one example: after the suspend sequence completes, a non-legacy
|
||||
PCI device may not perform DMA or issue IRQs, and any wakeup events it
|
||||
issues would be issued through the PME# bus signal. Plus, there are
|
||||
several PCI-standard device states, some of which are optional.
|
||||
|
||||
In contrast, integrated system-on-chip processors often use irqs as the
|
||||
wakeup event sources (so drivers would call enable_irq_wake) and might
|
||||
be able to treat DMA completion as a wakeup event (sometimes DMA can stay
|
||||
active too, it'd only be the CPU and some peripherals that sleep).
|
||||
|
||||
Some details here may be platform-specific. Systems may have devices that
|
||||
can be fully active in certain sleep states, such as an LCD display that's
|
||||
refreshed using DMA while most of the system is sleeping lightly ... and
|
||||
its frame buffer might even be updated by a DSP or other non-Linux CPU while
|
||||
the Linux control processor stays idle.
|
||||
|
||||
Moreover, the specific actions taken may depend on the target system state.
|
||||
One target system state might allow a given device to be very operational;
|
||||
another might require a hard shut down with re-initialization on resume.
|
||||
And two different target systems might use the same device in different
|
||||
ways; the aforementioned LCD might be active in one product's "standby",
|
||||
but a different product using the same SOC might work differently.
|
||||
|
||||
|
||||
Meaning of pm_message_t.event
|
||||
-----------------------------
|
||||
Parameters to suspend calls include the device affected and a message of
|
||||
type pm_message_t, which has one field: the event. If driver does not
|
||||
recognize the event code, suspend calls may abort the request and return
|
||||
a negative errno. However, most drivers will be fine if they implement
|
||||
PM_EVENT_SUSPEND semantics for all messages.
|
||||
|
||||
The event codes are used to refine the goal of suspending the device, and
|
||||
mostly matter when creating or resuming system memory image snapshots, as
|
||||
used with suspend-to-disk:
|
||||
|
||||
PM_EVENT_SUSPEND -- quiesce the driver and put hardware into a low-power
|
||||
state. When used with system sleep states like "suspend-to-RAM" or
|
||||
"standby", the upcoming resume() call will often be able to rely on
|
||||
state kept in hardware, or issue system wakeup events. When used
|
||||
instead with suspend-to-disk, few devices support this capability;
|
||||
most are completely powered off.
|
||||
|
||||
PM_EVENT_FREEZE -- quiesce the driver, but don't necessarily change into
|
||||
any low power mode. A system snapshot is about to be taken, often
|
||||
followed by a call to the driver's resume() method. Neither wakeup
|
||||
events nor DMA are allowed.
|
||||
|
||||
PM_EVENT_PRETHAW -- quiesce the driver, knowing that the upcoming resume()
|
||||
will restore a suspend-to-disk snapshot from a different kernel image.
|
||||
Drivers that are smart enough to look at their hardware state during
|
||||
resume() processing need that state to be correct ... a PRETHAW could
|
||||
be used to invalidate that state (by resetting the device), like a
|
||||
shutdown() invocation would before a kexec() or system halt. Other
|
||||
drivers might handle this the same way as PM_EVENT_FREEZE. Neither
|
||||
wakeup events nor DMA are allowed.
|
||||
|
||||
To enter "standby" (ACPI S1) or "Suspend to RAM" (STR, ACPI S3) states, or
|
||||
the similarly named APM states, only PM_EVENT_SUSPEND is used; for "Suspend
|
||||
to Disk" (STD, hibernate, ACPI S4), all of those event codes are used.
|
||||
|
||||
There's also PM_EVENT_ON, a value which never appears as a suspend event
|
||||
but is sometimes used to record the "not suspended" device state.
|
||||
|
||||
|
||||
Resuming Devices
|
||||
----------------
|
||||
Resuming is done in multiple phases, much like suspending, with all
|
||||
devices processing each phase's calls before the next phase begins.
|
||||
|
||||
The phases are seen by driver notifications issued in this order:
|
||||
|
||||
1 bus.resume_early(dev) is called with IRQs disabled, and with
|
||||
only one CPU active. As with bus.suspend_late(), this method
|
||||
won't be supported on busses that require IRQs in order to
|
||||
interact with devices.
|
||||
|
||||
This reverses the effects of bus.suspend_late().
|
||||
|
||||
2 bus.resume(dev) is called next. This may be morphed into a device
|
||||
driver call with bus-specific parameters; implementations may sleep.
|
||||
|
||||
This reverses the effects of bus.suspend().
|
||||
|
||||
3 class.resume(dev) is called for devices associated with a class
|
||||
that has such a method. Implementations may sleep.
|
||||
|
||||
This reverses the effects of class.suspend(), and would usually
|
||||
reactivate the device's I/O queue.
|
||||
|
||||
At the end of those phases, drivers should normally be as functional as
|
||||
they were before suspending: I/O can be performed using DMA and IRQs, and
|
||||
the relevant clocks are gated on. The device need not be "fully on"; it
|
||||
might be in a runtime lowpower/suspend state that acts as if it were.
|
||||
|
||||
However, the details here may again be platform-specific. For example,
|
||||
some systems support multiple "run" states, and the mode in effect at
|
||||
the end of resume() might not be the one which preceded suspension.
|
||||
That means availability of certain clocks or power supplies changed,
|
||||
which could easily affect how a driver works.
|
||||
|
||||
|
||||
Drivers need to be able to handle hardware which has been reset since the
|
||||
suspend methods were called, for example by complete reinitialization.
|
||||
This may be the hardest part, and the one most protected by NDA'd documents
|
||||
and chip errata. It's simplest if the hardware state hasn't changed since
|
||||
the suspend() was called, but that can't always be guaranteed.
|
||||
|
||||
Drivers must also be prepared to notice that the device has been removed
|
||||
while the system was powered off, whenever that's physically possible.
|
||||
PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of busses
|
||||
where common Linux platforms will see such removal. Details of how drivers
|
||||
will notice and handle such removals are currently bus-specific, and often
|
||||
involve a separate thread.
|
||||
|
||||
|
||||
Note that the bus-specific runtime PM wakeup mechanism can exist, and might
|
||||
be defined to share some of the same driver code as for system wakeup. For
|
||||
example, a bus-specific device driver's resume() method might be used there,
|
||||
so it wouldn't only be called from bus.resume() during system-wide wakeup.
|
||||
See bus-specific information about how runtime wakeup events are handled.
|
||||
|
||||
|
||||
System Devices
|
||||
--------------
|
||||
System devices follow a slightly different API, which can be found in
|
||||
|
||||
include/linux/sysdev.h
|
||||
drivers/base/sys.c
|
||||
|
||||
System devices will only be suspended with interrupts disabled, and
|
||||
after all other devices have been suspended. On resume, they will be
|
||||
resumed before any other devices, and also with interrupts disabled.
|
||||
System devices will only be suspended with interrupts disabled, and after
|
||||
all other devices have been suspended. On resume, they will be resumed
|
||||
before any other devices, and also with interrupts disabled.
|
||||
|
||||
That is, IRQs are disabled, the suspend_late() phase begins, then the
|
||||
sysdev_driver.suspend() phase, and the system enters a sleep state. Then
|
||||
the sysdev_driver.resume() phase begins, followed by the resume_early()
|
||||
phase, after which IRQs are enabled.
|
||||
|
||||
Code to actually enter and exit the system-wide low power state sometimes
|
||||
involves hardware details that are only known to the boot firmware, and
|
||||
may leave a CPU running software (from SRAM or flash memory) that monitors
|
||||
the system and manages its wakeup sequence.
|
||||
|
||||
|
||||
Runtime Power Management
|
||||
========================
|
||||
Many devices are able to dynamically power down while the system is still
|
||||
running. This feature is useful for devices that are not being used, and
|
||||
can offer significant power savings on a running system. These devices
|
||||
often support a range of runtime power states, which might use names such
|
||||
as "off", "sleep", "idle", "active", and so on. Those states will in some
|
||||
cases (like PCI) be partially constrained by a bus the device uses, and will
|
||||
usually include hardware states that are also used in system sleep states.
|
||||
|
||||
Many devices are able to dynamically power down while the system is
|
||||
still running. This feature is useful for devices that are not being
|
||||
used, and can offer significant power savings on a running system.
|
||||
However, note that if a driver puts a device into a runtime low power state
|
||||
and the system then goes into a system-wide sleep state, it normally ought
|
||||
to resume into that runtime low power state rather than "full on". Such
|
||||
distinctions would be part of the driver-internal state machine for that
|
||||
hardware; the whole point of runtime power management is to be sure that
|
||||
drivers are decoupled in that way from the state machine governing phases
|
||||
of the system-wide power/sleep state transitions.
|
||||
|
||||
In each device's directory, there is a 'power' directory, which
|
||||
contains at least a 'state' file. Reading from this file displays what
|
||||
power state the device is currently in. Writing to this file initiates
|
||||
a transition to the specified power state, which must be a decimal in
|
||||
the range 1-3, inclusive; or 0 for 'On'.
|
||||
|
||||
The PM core will call the ->suspend() method in the bus_type object
|
||||
that the device belongs to if the specified state is not 0, or
|
||||
->resume() if it is.
|
||||
Power Saving Techniques
|
||||
-----------------------
|
||||
Normally runtime power management is handled by the drivers without specific
|
||||
userspace or kernel intervention, by device-aware use of techniques like:
|
||||
|
||||
Nothing will happen if the specified state is the same state the
|
||||
device is currently in.
|
||||
Using information provided by other system layers
|
||||
- stay deeply "off" except between open() and close()
|
||||
- if transceiver/PHY indicates "nobody connected", stay "off"
|
||||
- application protocols may include power commands or hints
|
||||
|
||||
If the device is already in a low-power state, and the specified state
|
||||
is another, but different, low-power state, the ->resume() method will
|
||||
first be called to power the device back on, then ->suspend() will be
|
||||
called again with the new state.
|
||||
Using fewer CPU cycles
|
||||
- using DMA instead of PIO
|
||||
- removing timers, or making them lower frequency
|
||||
- shortening "hot" code paths
|
||||
- eliminating cache misses
|
||||
- (sometimes) offloading work to device firmware
|
||||
|
||||
The driver is responsible for saving the working state of the device
|
||||
and putting it into the low-power state specified. If this was
|
||||
successful, it returns 0, and the device's power_state field is
|
||||
updated.
|
||||
Reducing other resource costs
|
||||
- gating off unused clocks in software (or hardware)
|
||||
- switching off unused power supplies
|
||||
- eliminating (or delaying/merging) IRQs
|
||||
- tuning DMA to use word and/or burst modes
|
||||
|
||||
The driver must take care to know whether or not it is able to
|
||||
properly resume the device, including all step of reinitialization
|
||||
necessary. (This is the hardest part, and the one most protected by
|
||||
NDA'd documents).
|
||||
Using device-specific low power states
|
||||
- using lower voltages
|
||||
- avoiding needless DMA transfers
|
||||
|
||||
The driver must also take care not to suspend a device that is
|
||||
currently in use. It is their responsibility to provide their own
|
||||
exclusion mechanisms.
|
||||
Read your hardware documentation carefully to see the opportunities that
|
||||
may be available. If you can, measure the actual power usage and check
|
||||
it against the budget established for your project.
|
||||
|
||||
The runtime power transition happens with interrupts enabled. If a
|
||||
device cannot support being powered down with interrupts, it may
|
||||
return -EAGAIN (as it would during a system power management
|
||||
transition), but it will _not_ be called again, and the transaction
|
||||
will fail.
|
||||
|
||||
There is currently no way to know what states a device or driver
|
||||
supports a priori. This will change in the future.
|
||||
Examples: USB hosts, system timer, system CPU
|
||||
----------------------------------------------
|
||||
USB host controllers make interesting, if complex, examples. In many cases
|
||||
these have no work to do: no USB devices are connected, or all of them are
|
||||
in the USB "suspend" state. Linux host controller drivers can then disable
|
||||
periodic DMA transfers that would otherwise be a constant power drain on the
|
||||
memory subsystem, and enter a suspend state. In power-aware controllers,
|
||||
entering that suspend state may disable the clock used with USB signaling,
|
||||
saving a certain amount of power.
|
||||
|
||||
pm_message_t meaning
|
||||
The controller will be woken from that state (with an IRQ) by changes to the
|
||||
signal state on the data lines of a given port, for example by an existing
|
||||
peripheral requesting "remote wakeup" or by plugging a new peripheral. The
|
||||
same wakeup mechanism usually works from "standby" sleep states, and on some
|
||||
systems also from "suspend to RAM" (or even "suspend to disk") states.
|
||||
(Except that ACPI may be involved instead of normal IRQs, on some hardware.)
|
||||
|
||||
pm_message_t has two fields. event ("major"), and flags. If driver
|
||||
does not know event code, it aborts the request, returning error. Some
|
||||
drivers may need to deal with special cases based on the actual type
|
||||
of suspend operation being done at the system level. This is why
|
||||
there are flags.
|
||||
System devices like timers and CPUs may have special roles in the platform
|
||||
power management scheme. For example, system timers using a "dynamic tick"
|
||||
approach don't just save CPU cycles (by eliminating needless timer IRQs),
|
||||
but they may also open the door to using lower power CPU "idle" states that
|
||||
cost more than a jiffie to enter and exit. On x86 systems these are states
|
||||
like "C3"; note that periodic DMA transfers from a USB host controller will
|
||||
also prevent entry to a C3 state, much like a periodic timer IRQ.
|
||||
|
||||
Event codes are:
|
||||
That kind of runtime mechanism interaction is common. "System On Chip" (SOC)
|
||||
processors often have low power idle modes that can't be entered unless
|
||||
certain medium-speed clocks (often 12 or 48 MHz) are gated off. When the
|
||||
drivers gate those clocks effectively, then the system idle task may be able
|
||||
to use the lower power idle modes and thereby increase battery life.
|
||||
|
||||
ON -- no need to do anything except special cases like broken
|
||||
HW.
|
||||
If the CPU can have a "cpufreq" driver, there also may be opportunities
|
||||
to shift to lower voltage settings and reduce the power cost of executing
|
||||
a given number of instructions. (Without voltage adjustment, it's rare
|
||||
for cpufreq to save much power; the cost-per-instruction must go down.)
|
||||
|
||||
# NOTIFICATION -- pretty much same as ON?
|
||||
|
||||
FREEZE -- stop DMA and interrupts, and be prepared to reinit HW from
|
||||
scratch. That probably means stop accepting upstream requests, the
|
||||
actual policy of what to do with them being specific to a given
|
||||
driver. It's acceptable for a network driver to just drop packets
|
||||
while a block driver is expected to block the queue so no request is
|
||||
lost. (Use IDE as an example on how to do that). FREEZE requires no
|
||||
power state change, and it's expected for drivers to be able to
|
||||
quickly transition back to operating state.
|
||||
/sys/devices/.../power/state files
|
||||
==================================
|
||||
For now you can also test some of this functionality using sysfs.
|
||||
|
||||
SUSPEND -- like FREEZE, but also put hardware into low-power state. If
|
||||
there's need to distinguish several levels of sleep, additional flag
|
||||
is probably best way to do that.
|
||||
DEPRECATED: USE "power/state" ONLY FOR DRIVER TESTING, AND
|
||||
AVOID USING dev->power.power_state IN DRIVERS.
|
||||
|
||||
Transitions are only from a resumed state to a suspended state, never
|
||||
between 2 suspended states. (ON -> FREEZE or ON -> SUSPEND can happen,
|
||||
FREEZE -> SUSPEND or SUSPEND -> FREEZE can not).
|
||||
THESE WILL BE REMOVED. IF THE "power/state" FILE GETS REPLACED,
|
||||
IT WILL BECOME SOMETHING COUPLED TO THE BUS OR DRIVER.
|
||||
|
||||
All events are:
|
||||
In each device's directory, there is a 'power' directory, which contains
|
||||
at least a 'state' file. The value of this field is effectively boolean,
|
||||
PM_EVENT_ON or PM_EVENT_SUSPEND.
|
||||
|
||||
[NOTE NOTE NOTE: If you are driver author, you should not care; you
|
||||
should only look at event, and ignore flags.]
|
||||
* Reading from this file displays a value corresponding to
|
||||
the power.power_state.event field. All nonzero values are
|
||||
displayed as "2", corresponding to a low power state; zero
|
||||
is displayed as "0", corresponding to normal operation.
|
||||
|
||||
#Prepare for suspend -- userland is still running but we are going to
|
||||
#enter suspend state. This gives drivers chance to load firmware from
|
||||
#disk and store it in memory, or do other activities taht require
|
||||
#operating userland, ability to kmalloc GFP_KERNEL, etc... All of these
|
||||
#are forbiden once the suspend dance is started.. event = ON, flags =
|
||||
#PREPARE_TO_SUSPEND
|
||||
* Writing to this file initiates a transition using the
|
||||
specified event code number; only '0', '2', and '3' are
|
||||
accepted (without a newline); '2' and '3' are both
|
||||
mapped to PM_EVENT_SUSPEND.
|
||||
|
||||
Apm standby -- prepare for APM event. Quiesce devices to make life
|
||||
easier for APM BIOS. event = FREEZE, flags = APM_STANDBY
|
||||
On writes, the PM core relies on that recorded event code and the device/bus
|
||||
capabilities to determine whether it uses a partial suspend() or resume()
|
||||
sequence to change things so that the recorded event corresponds to the
|
||||
numeric parameter.
|
||||
|
||||
Apm suspend -- same as APM_STANDBY, but it we should probably avoid
|
||||
spinning down disks. event = FREEZE, flags = APM_SUSPEND
|
||||
- If the bus requires the irqs-disabled suspend_late()/resume_early()
|
||||
phases, writes fail because those operations are not supported here.
|
||||
|
||||
System halt, reboot -- quiesce devices to make life easier for BIOS. event
|
||||
= FREEZE, flags = SYSTEM_HALT or SYSTEM_REBOOT
|
||||
- If the recorded value is the expected value, nothing is done.
|
||||
|
||||
System shutdown -- at least disks need to be spun down, or data may be
|
||||
lost. Quiesce devices, just to make life easier for BIOS. event =
|
||||
FREEZE, flags = SYSTEM_SHUTDOWN
|
||||
- If the recorded value is nonzero, the device is partially resumed,
|
||||
using the bus.resume() and/or class.resume() methods.
|
||||
|
||||
Kexec -- turn off DMAs and put hardware into some state where new
|
||||
kernel can take over. event = FREEZE, flags = KEXEC
|
||||
- If the target value is nonzero, the device is partially suspended,
|
||||
using the class.suspend() and/or bus.suspend() methods and the
|
||||
PM_EVENT_SUSPEND message.
|
||||
|
||||
Powerdown at end of swsusp -- very similar to SYSTEM_SHUTDOWN, except wake
|
||||
may need to be enabled on some devices. This actually has at least 3
|
||||
subtypes, system can reboot, enter S4 and enter S5 at the end of
|
||||
swsusp. event = FREEZE, flags = SWSUSP and one of SYSTEM_REBOOT,
|
||||
SYSTEM_SHUTDOWN, SYSTEM_S4
|
||||
|
||||
Suspend to ram -- put devices into low power state. event = SUSPEND,
|
||||
flags = SUSPEND_TO_RAM
|
||||
|
||||
Freeze for swsusp snapshot -- stop DMA and interrupts. No need to put
|
||||
devices into low power mode, but you must be able to reinitialize
|
||||
device from scratch in resume method. This has two flavors, its done
|
||||
once on suspending kernel, once on resuming kernel. event = FREEZE,
|
||||
flags = DURING_SUSPEND or DURING_RESUME
|
||||
|
||||
Device detach requested from /sys -- deinitialize device; proably same as
|
||||
SYSTEM_SHUTDOWN, I do not understand this one too much. probably event
|
||||
= FREEZE, flags = DEV_DETACH.
|
||||
|
||||
#These are not really events sent:
|
||||
#
|
||||
#System fully on -- device is working normally; this is probably never
|
||||
#passed to suspend() method... event = ON, flags = 0
|
||||
#
|
||||
#Ready after resume -- userland is now running, again. Time to free any
|
||||
#memory you ate during prepare to suspend... event = ON, flags =
|
||||
#READY_AFTER_RESUME
|
||||
#
|
||||
Drivers have no way to tell whether their suspend() and resume() calls
|
||||
have come through the sysfs power/state file or as part of entering a
|
||||
system sleep state, except that when accessed through sysfs the normal
|
||||
parent/child sequencing rules are ignored. Drivers (such as bus, bridge,
|
||||
or hub drivers) which expose child devices may need to enforce those rules
|
||||
on their own.
|
||||
|
@ -41,11 +41,6 @@ Board-specific code:
|
||||
|
|
||||
.. more boards here ...
|
||||
|
||||
It should also be noted that each board is required to have some certain
|
||||
headers. At the time of this writing, io.h is the only thing that needs
|
||||
to be provided for each board, and can generally just reference generic
|
||||
functions (with the exception of isa_port2addr).
|
||||
|
||||
Next, for companion chips:
|
||||
.
|
||||
`-- arch
|
||||
@ -104,12 +99,13 @@ and then populate that with sub-directories for each member of the family.
|
||||
Both the Solution Engine and the hp6xx boards are an example of this.
|
||||
|
||||
After you have setup your new arch/sh/boards/ directory, remember that you
|
||||
also must add a directory in include/asm-sh for headers localized to this
|
||||
board. In order to interoperate seamlessly with the build system, it's best
|
||||
to have this directory the same as the arch/sh/boards/ directory name,
|
||||
though if your board is again part of a family, the build system has ways
|
||||
of dealing with this, and you can feel free to name the directory after
|
||||
the family member itself.
|
||||
should also add a directory in include/asm-sh for headers localized to this
|
||||
board (if there are going to be more than one). In order to interoperate
|
||||
seamlessly with the build system, it's best to have this directory the same
|
||||
as the arch/sh/boards/ directory name, though if your board is again part of
|
||||
a family, the build system has ways of dealing with this (via incdir-y
|
||||
overloading), and you can feel free to name the directory after the family
|
||||
member itself.
|
||||
|
||||
There are a few things that each board is required to have, both in the
|
||||
arch/sh/boards and the include/asm-sh/ heirarchy. In order to better
|
||||
@ -122,6 +118,7 @@ might look something like:
|
||||
* arch/sh/boards/vapor/setup.c - Setup code for imaginary board
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <asm/rtc.h> /* for board_time_init() */
|
||||
|
||||
const char *get_system_type(void)
|
||||
{
|
||||
@ -152,79 +149,57 @@ int __init platform_setup(void)
|
||||
}
|
||||
|
||||
Our new imaginary board will also have to tie into the machvec in order for it
|
||||
to be of any use. Currently the machvec is slowly on its way out, but is still
|
||||
required for the time being. As such, let us take a look at what needs to be
|
||||
done for the machvec assignment.
|
||||
to be of any use.
|
||||
|
||||
machvec functions fall into a number of categories:
|
||||
|
||||
- I/O functions to IO memory (inb etc) and PCI/main memory (readb etc).
|
||||
- I/O remapping functions (ioremap etc)
|
||||
- some initialisation functions
|
||||
- a 'heartbeat' function
|
||||
- some miscellaneous flags
|
||||
- I/O mapping functions (ioport_map, ioport_unmap, etc).
|
||||
- a 'heartbeat' function.
|
||||
- PCI and IRQ initialization routines.
|
||||
- Consistent allocators (for boards that need special allocators,
|
||||
particularly for allocating out of some board-specific SRAM for DMA
|
||||
handles).
|
||||
|
||||
The tree can be built in two ways:
|
||||
- as a fully generic build. All drivers are linked in, and all functions
|
||||
go through the machvec
|
||||
- as a machine specific build. In this case only the required drivers
|
||||
will be linked in, and some macros may be redefined to not go through
|
||||
the machvec where performance is important (in particular IO functions).
|
||||
There are machvec functions added and removed over time, so always be sure to
|
||||
consult include/asm-sh/machvec.h for the current state of the machvec.
|
||||
|
||||
There are three ways in which IO can be performed:
|
||||
- none at all. This is really only useful for the 'unknown' machine type,
|
||||
which us designed to run on a machine about which we know nothing, and
|
||||
so all all IO instructions do nothing.
|
||||
- fully custom. In this case all IO functions go to a machine specific
|
||||
set of functions which can do what they like
|
||||
- a generic set of functions. These will cope with most situations,
|
||||
and rely on a single function, mv_port2addr, which is called through the
|
||||
machine vector, and converts an IO address into a memory address, which
|
||||
can be read from/written to directly.
|
||||
The kernel will automatically wrap in generic routines for undefined function
|
||||
pointers in the machvec at boot time, as machvec functions are referenced
|
||||
unconditionally throughout most of the tree. Some boards have incredibly
|
||||
sparse machvecs (such as the dreamcast and sh03), whereas others must define
|
||||
virtually everything (rts7751r2d).
|
||||
|
||||
Thus adding a new machine involves the following steps (I will assume I am
|
||||
adding a machine called vapor):
|
||||
Adding a new machine is relatively trivial (using vapor as an example):
|
||||
|
||||
- add a new file include/asm-sh/vapor/io.h which contains prototypes for
|
||||
If the board-specific definitions are quite minimalistic, as is the case for
|
||||
the vast majority of boards, simply having a single board-specific header is
|
||||
sufficient.
|
||||
|
||||
- add a new file include/asm-sh/vapor.h which contains prototypes for
|
||||
any machine specific IO functions prefixed with the machine name, for
|
||||
example vapor_inb. These will be needed when filling out the machine
|
||||
vector.
|
||||
|
||||
This is the minimum that is required, however there are ample
|
||||
opportunities to optimise this. In particular, by making the prototypes
|
||||
inline function definitions, it is possible to inline the function when
|
||||
building machine specific versions. Note that the machine vector
|
||||
functions will still be needed, so that a module built for a generic
|
||||
setup can be loaded.
|
||||
Note that these prototypes are generated automatically by setting
|
||||
__IO_PREFIX to something sensible. A typical example would be:
|
||||
|
||||
- add a new file arch/sh/boards/vapor/mach.c. This contains the definition
|
||||
of the machine vector. When building the machine specific version, this
|
||||
will be the real machine vector (via an alias), while in the generic
|
||||
version is used to initialise the machine vector, and then freed, by
|
||||
making it initdata. This should be defined as:
|
||||
#define __IO_PREFIX vapor
|
||||
#include <asm/io_generic.h>
|
||||
|
||||
struct sh_machine_vector mv_vapor __initmv = {
|
||||
.mv_name = "vapor",
|
||||
}
|
||||
ALIAS_MV(vapor)
|
||||
somewhere in the board-specific header. Any boards being ported that still
|
||||
have a legacy io.h should remove it entirely and switch to the new model.
|
||||
|
||||
- finally add a file arch/sh/boards/vapor/io.c, which contains
|
||||
definitions of the machine specific io functions.
|
||||
- Add machine vector definitions to the board's setup.c. At a bare minimum,
|
||||
this must be defined as something like:
|
||||
|
||||
A note about initialisation functions. Three initialisation functions are
|
||||
provided in the machine vector:
|
||||
- mv_arch_init - called very early on from setup_arch
|
||||
- mv_init_irq - called from init_IRQ, after the generic SH interrupt
|
||||
initialisation
|
||||
- mv_init_pci - currently not used
|
||||
struct sh_machine_vector mv_vapor __initmv = {
|
||||
.mv_name = "vapor",
|
||||
};
|
||||
ALIAS_MV(vapor)
|
||||
|
||||
Any other remaining functions which need to be called at start up can be
|
||||
added to the list using the __initcalls macro (or module_init if the code
|
||||
can be built as a module). Many generic drivers probe to see if the device
|
||||
they are targeting is present, however this may not always be appropriate,
|
||||
so a flag can be added to the machine vector which will be set on those
|
||||
machines which have the hardware in question, reducing the probe to a
|
||||
single conditional.
|
||||
- finally add a file arch/sh/boards/vapor/io.c, which contains definitions of
|
||||
the machine specific io functions (if there are enough to warrant it).
|
||||
|
||||
3. Hooking into the Build System
|
||||
================================
|
||||
@ -303,4 +278,3 @@ which will in turn copy the defconfig for this board, run it through
|
||||
oldconfig (prompting you for any new options since the time of creation),
|
||||
and start you on your way to having a functional kernel for your new
|
||||
board.
|
||||
|
||||
|
33
Documentation/sh/register-banks.txt
Normal file
33
Documentation/sh/register-banks.txt
Normal file
@ -0,0 +1,33 @@
|
||||
Notes on register bank usage in the kernel
|
||||
==========================================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The SH-3 and SH-4 CPU families traditionally include a single partial register
|
||||
bank (selected by SR.RB, only r0 ... r7 are banked), whereas other families
|
||||
may have more full-featured banking or simply no such capabilities at all.
|
||||
|
||||
SR.RB banking
|
||||
-------------
|
||||
|
||||
In the case of this type of banking, banked registers are mapped directly to
|
||||
r0 ... r7 if SR.RB is set to the bank we are interested in, otherwise ldc/stc
|
||||
can still be used to reference the banked registers (as r0_bank ... r7_bank)
|
||||
when in the context of another bank. The developer must keep the SR.RB value
|
||||
in mind when writing code that utilizes these banked registers, for obvious
|
||||
reasons. Userspace is also not able to poke at the bank1 values, so these can
|
||||
be used rather effectively as scratch registers by the kernel.
|
||||
|
||||
Presently the kernel uses several of these registers.
|
||||
|
||||
- r0_bank, r1_bank (referenced as k0 and k1, used for scratch
|
||||
registers when doing exception handling).
|
||||
- r2_bank (used to track the EXPEVT/INTEVT code)
|
||||
- Used by do_IRQ() and friends for doing irq mapping based off
|
||||
of the interrupt exception vector jump table offset
|
||||
- r6_bank (global interrupt mask)
|
||||
- The SR.IMASK interrupt handler makes use of this to set the
|
||||
interrupt priority level (used by local_irq_enable())
|
||||
- r7_bank (current)
|
||||
|
@ -98,13 +98,13 @@ one or more packets could finish before an error stops further endpoint I/O.
|
||||
error, a failure to respond (often caused by
|
||||
device disconnect), or some other fault.
|
||||
|
||||
-ETIMEDOUT (**) No response packet received within the prescribed
|
||||
-ETIME (**) No response packet received within the prescribed
|
||||
bus turn-around time. This error may instead be
|
||||
reported as -EPROTO or -EILSEQ.
|
||||
|
||||
Note that the synchronous USB message functions
|
||||
also use this code to indicate timeout expired
|
||||
before the transfer completed.
|
||||
-ETIMEDOUT Synchronous USB message functions use this code
|
||||
to indicate timeout expired before the transfer
|
||||
completed, and no other error was reported by HC.
|
||||
|
||||
-EPIPE (**) Endpoint stalled. For non-control endpoints,
|
||||
reset this status with usb_clear_halt().
|
||||
@ -163,6 +163,3 @@ usb_get_*/usb_set_*():
|
||||
usb_control_msg():
|
||||
usb_bulk_msg():
|
||||
-ETIMEDOUT Timeout expired before the transfer completed.
|
||||
In the future this code may change to -ETIME,
|
||||
whose definition is a closer match to this sort
|
||||
of error.
|
||||
|
@ -433,6 +433,11 @@ Options supported:
|
||||
See http://www.uuhaus.de/linux/palmconnect.html for up-to-date
|
||||
information on this driver.
|
||||
|
||||
AIRcable USB Dongle Bluetooth driver
|
||||
If there is the cdc_acm driver loaded in the system, you will find that the
|
||||
cdc_acm claims the device before AIRcable can. This is simply corrected
|
||||
by unloading both modules and then loading the aircable module before
|
||||
cdc_acm module
|
||||
|
||||
Generic Serial driver
|
||||
|
||||
|
@ -245,6 +245,13 @@ Debugging
|
||||
newfallback: use new unwinder but fall back to old if it gets
|
||||
stuck (default)
|
||||
|
||||
call_trace=[old|both|newfallback|new]
|
||||
old: use old inexact backtracer
|
||||
new: use new exact dwarf2 unwinder
|
||||
both: print entries from both
|
||||
newfallback: use new unwinder but fall back to old if it gets
|
||||
stuck (default)
|
||||
|
||||
Misc
|
||||
|
||||
noreplacement Don't replace instructions with more appropriate ones
|
||||
|
99
Documentation/x86_64/kernel-stacks
Normal file
99
Documentation/x86_64/kernel-stacks
Normal file
@ -0,0 +1,99 @@
|
||||
Most of the text from Keith Owens, hacked by AK
|
||||
|
||||
x86_64 page size (PAGE_SIZE) is 4K.
|
||||
|
||||
Like all other architectures, x86_64 has a kernel stack for every
|
||||
active thread. These thread stacks are THREAD_SIZE (2*PAGE_SIZE) big.
|
||||
These stacks contain useful data as long as a thread is alive or a
|
||||
zombie. While the thread is in user space the kernel stack is empty
|
||||
except for the thread_info structure at the bottom.
|
||||
|
||||
In addition to the per thread stacks, there are specialized stacks
|
||||
associated with each cpu. These stacks are only used while the kernel
|
||||
is in control on that cpu, when a cpu returns to user space the
|
||||
specialized stacks contain no useful data. The main cpu stacks is
|
||||
|
||||
* Interrupt stack. IRQSTACKSIZE
|
||||
|
||||
Used for external hardware interrupts. If this is the first external
|
||||
hardware interrupt (i.e. not a nested hardware interrupt) then the
|
||||
kernel switches from the current task to the interrupt stack. Like
|
||||
the split thread and interrupt stacks on i386 (with CONFIG_4KSTACKS),
|
||||
this gives more room for kernel interrupt processing without having
|
||||
to increase the size of every per thread stack.
|
||||
|
||||
The interrupt stack is also used when processing a softirq.
|
||||
|
||||
Switching to the kernel interrupt stack is done by software based on a
|
||||
per CPU interrupt nest counter. This is needed because x86-64 "IST"
|
||||
hardware stacks cannot nest without races.
|
||||
|
||||
x86_64 also has a feature which is not available on i386, the ability
|
||||
to automatically switch to a new stack for designated events such as
|
||||
double fault or NMI, which makes it easier to handle these unusual
|
||||
events on x86_64. This feature is called the Interrupt Stack Table
|
||||
(IST). There can be up to 7 IST entries per cpu. The IST code is an
|
||||
index into the Task State Segment (TSS), the IST entries in the TSS
|
||||
point to dedicated stacks, each stack can be a different size.
|
||||
|
||||
An IST is selected by an non-zero value in the IST field of an
|
||||
interrupt-gate descriptor. When an interrupt occurs and the hardware
|
||||
loads such a descriptor, the hardware automatically sets the new stack
|
||||
pointer based on the IST value, then invokes the interrupt handler. If
|
||||
software wants to allow nested IST interrupts then the handler must
|
||||
adjust the IST values on entry to and exit from the interrupt handler.
|
||||
(this is occasionally done, e.g. for debug exceptions)
|
||||
|
||||
Events with different IST codes (i.e. with different stacks) can be
|
||||
nested. For example, a debug interrupt can safely be interrupted by an
|
||||
NMI. arch/x86_64/kernel/entry.S::paranoidentry adjusts the stack
|
||||
pointers on entry to and exit from all IST events, in theory allowing
|
||||
IST events with the same code to be nested. However in most cases, the
|
||||
stack size allocated to an IST assumes no nesting for the same code.
|
||||
If that assumption is ever broken then the stacks will become corrupt.
|
||||
|
||||
The currently assigned IST stacks are :-
|
||||
|
||||
* STACKFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for interrupt 12 - Stack Fault Exception (#SS).
|
||||
|
||||
This allows to recover from invalid stack segments. Rarely
|
||||
happens.
|
||||
|
||||
* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for interrupt 8 - Double Fault Exception (#DF).
|
||||
|
||||
Invoked when handling a exception causes another exception. Happens
|
||||
when the kernel is very confused (e.g. kernel stack pointer corrupt)
|
||||
Using a separate stack allows to recover from it well enough in many
|
||||
cases to still output an oops.
|
||||
|
||||
* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for non-maskable interrupts (NMI).
|
||||
|
||||
NMI can be delivered at any time, including when the kernel is in the
|
||||
middle of switching stacks. Using IST for NMI events avoids making
|
||||
assumptions about the previous state of the kernel stack.
|
||||
|
||||
* DEBUG_STACK. DEBUG_STKSZ
|
||||
|
||||
Used for hardware debug interrupts (interrupt 1) and for software
|
||||
debug interrupts (INT3).
|
||||
|
||||
When debugging a kernel, debug interrupts (both hardware and
|
||||
software) can occur at any time. Using IST for these interrupts
|
||||
avoids making assumptions about the previous state of the kernel
|
||||
stack.
|
||||
|
||||
* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for interrupt 18 - Machine Check Exception (#MC).
|
||||
|
||||
MCE can be delivered at any time, including when the kernel is in the
|
||||
middle of switching stacks. Using IST for MCE events avoids making
|
||||
assumptions about the previous state of the kernel stack.
|
||||
|
||||
For more details see the Intel IA32 or AMD AMD64 architecture manuals.
|
6
Makefile
6
Makefile
@ -1385,9 +1385,13 @@ endif #ifeq ($(config-targets),1)
|
||||
endif #ifeq ($(mixed-targets),1)
|
||||
|
||||
PHONY += checkstack kernelrelease kernelversion
|
||||
|
||||
# Use $(SUBARCH) here instead of $(ARCH) so that this works for UML.
|
||||
# In the UML case, $(SUBARCH) is the name of the underlying
|
||||
# architecture, while for all other arches, it is the same as $(ARCH).
|
||||
checkstack:
|
||||
$(OBJDUMP) -d vmlinux $$(find . -name '*.ko') | \
|
||||
$(PERL) $(src)/scripts/checkstack.pl $(ARCH)
|
||||
$(PERL) $(src)/scripts/checkstack.pl $(SUBARCH)
|
||||
|
||||
kernelrelease:
|
||||
$(if $(wildcard include/config/kernel.release), $(Q)echo $(KERNELRELEASE), \
|
||||
|
@ -284,21 +284,9 @@ static struct pxaficp_platform_data corgi_ficp_platform_data = {
|
||||
/*
|
||||
* USB Device Controller
|
||||
*/
|
||||
static void corgi_udc_command(int cmd)
|
||||
{
|
||||
switch(cmd) {
|
||||
case PXA2XX_UDC_CMD_CONNECT:
|
||||
GPSR(CORGI_GPIO_USB_PULLUP) = GPIO_bit(CORGI_GPIO_USB_PULLUP);
|
||||
break;
|
||||
case PXA2XX_UDC_CMD_DISCONNECT:
|
||||
GPCR(CORGI_GPIO_USB_PULLUP) = GPIO_bit(CORGI_GPIO_USB_PULLUP);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static struct pxa2xx_udc_mach_info udc_info __initdata = {
|
||||
/* no connect GPIO; corgi can't tell connection status */
|
||||
.udc_command = corgi_udc_command,
|
||||
.gpio_pullup = CORGI_GPIO_USB_PULLUP,
|
||||
};
|
||||
|
||||
|
||||
@ -350,7 +338,6 @@ static void __init corgi_init(void)
|
||||
corgi_ssp_set_machinfo(&corgi_ssp_machinfo);
|
||||
|
||||
pxa_gpio_mode(CORGI_GPIO_IR_ON | GPIO_OUT);
|
||||
pxa_gpio_mode(CORGI_GPIO_USB_PULLUP | GPIO_OUT);
|
||||
pxa_gpio_mode(CORGI_GPIO_HSYNC | GPIO_IN);
|
||||
|
||||
pxa_set_udc_info(&udc_info);
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/usb_otg.h>
|
||||
#include <linux/usb/otg.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/irq.h>
|
||||
|
@ -15,7 +15,8 @@
|
||||
|
||||
void show_dtlb_entry(unsigned int index)
|
||||
{
|
||||
unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
|
||||
unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
mmucr_save = sysreg_read(MMUCR);
|
||||
@ -305,7 +306,8 @@ static void tlb_stop(struct seq_file *tlb, void *v)
|
||||
|
||||
static int tlb_show(struct seq_file *tlb, void *v)
|
||||
{
|
||||
unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
|
||||
unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save;
|
||||
unsigned long flags;
|
||||
unsigned long *index = v;
|
||||
|
||||
if (*index == 0)
|
||||
|
@ -166,7 +166,6 @@ config X86_VISWS
|
||||
|
||||
config X86_GENERICARCH
|
||||
bool "Generic architecture (Summit, bigsmp, ES7000, default)"
|
||||
depends on SMP
|
||||
help
|
||||
This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
|
||||
It is intended for a generic binary kernel.
|
||||
@ -263,7 +262,7 @@ source "kernel/Kconfig.preempt"
|
||||
|
||||
config X86_UP_APIC
|
||||
bool "Local APIC support on uniprocessors"
|
||||
depends on !SMP && !(X86_VISWS || X86_VOYAGER)
|
||||
depends on !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH)
|
||||
help
|
||||
A local APIC (Advanced Programmable Interrupt Controller) is an
|
||||
integrated interrupt controller in the CPU. If you have a single-CPU
|
||||
@ -288,12 +287,12 @@ config X86_UP_IOAPIC
|
||||
|
||||
config X86_LOCAL_APIC
|
||||
bool
|
||||
depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
|
||||
depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH
|
||||
default y
|
||||
|
||||
config X86_IO_APIC
|
||||
bool
|
||||
depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
|
||||
depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH
|
||||
default y
|
||||
|
||||
config X86_VISWS_APIC
|
||||
@ -402,6 +401,7 @@ config X86_REBOOTFIXUPS
|
||||
|
||||
config MICROCODE
|
||||
tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
|
||||
select FW_LOADER
|
||||
---help---
|
||||
If you say Y here and also to "/dev file system support" in the
|
||||
'File systems' section, you will be able to update the microcode on
|
||||
@ -417,6 +417,11 @@ config MICROCODE
|
||||
To compile this driver as a module, choose M here: the
|
||||
module will be called microcode.
|
||||
|
||||
config MICROCODE_OLD_INTERFACE
|
||||
bool
|
||||
depends on MICROCODE
|
||||
default y
|
||||
|
||||
config X86_MSR
|
||||
tristate "/dev/cpu/*/msr - Model-specific register support"
|
||||
help
|
||||
@ -599,12 +604,10 @@ config ARCH_SELECT_MEMORY_MODEL
|
||||
def_bool y
|
||||
depends on ARCH_SPARSEMEM_ENABLE
|
||||
|
||||
source "mm/Kconfig"
|
||||
config ARCH_POPULATES_NODE_MAP
|
||||
def_bool y
|
||||
|
||||
config HAVE_ARCH_EARLY_PFN_TO_NID
|
||||
bool
|
||||
default y
|
||||
depends on NUMA
|
||||
source "mm/Kconfig"
|
||||
|
||||
config HIGHPTE
|
||||
bool "Allocate 3rd-level pagetables from highmem"
|
||||
@ -741,8 +744,7 @@ config SECCOMP
|
||||
source kernel/Kconfig.hz
|
||||
|
||||
config KEXEC
|
||||
bool "kexec system call (EXPERIMENTAL)"
|
||||
depends on EXPERIMENTAL
|
||||
bool "kexec system call"
|
||||
help
|
||||
kexec is a system call that implements the ability to shutdown your
|
||||
current kernel, and to start another kernel. It is like a reboot
|
||||
@ -763,6 +765,13 @@ config CRASH_DUMP
|
||||
depends on HIGHMEM
|
||||
help
|
||||
Generate crash dump after being started by kexec.
|
||||
This should be normally only set in special crash dump kernels
|
||||
which are loaded in the main kernel with kexec-tools into
|
||||
a specially reserved region and then later executed after
|
||||
a crash by kdump/kexec. The crash dump kernel must be compiled
|
||||
to a memory address not used by the main kernel or BIOS using
|
||||
PHYSICAL_START.
|
||||
For more details see Documentation/kdump/kdump.txt
|
||||
|
||||
config PHYSICAL_START
|
||||
hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
|
||||
|
@ -46,6 +46,14 @@ cflags-y += -ffreestanding
|
||||
# a lot more stack due to the lack of sharing of stacklots:
|
||||
CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
|
||||
|
||||
# do binutils support CFI?
|
||||
cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
|
||||
AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
|
||||
|
||||
# is .cfi_signal_frame supported too?
|
||||
cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
|
||||
AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
|
||||
|
||||
CFLAGS += $(cflags-y)
|
||||
|
||||
# Default subarch .c files
|
||||
|
@ -15,42 +15,95 @@
|
||||
#include <asm/setup.h>
|
||||
|
||||
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
||||
|
||||
# It is assumed that %ds == INITSEG here
|
||||
|
||||
movb $0, (EDD_MBR_SIG_NR_BUF)
|
||||
movb $0, (EDDNR)
|
||||
|
||||
# Check the command line for two options:
|
||||
# Check the command line for options:
|
||||
# edd=of disables EDD completely (edd=off)
|
||||
# edd=sk skips the MBR test (edd=skipmbr)
|
||||
# edd=on re-enables EDD (edd=on)
|
||||
|
||||
pushl %esi
|
||||
cmpl $0, %cs:cmd_line_ptr
|
||||
jz done_cl
|
||||
movw $edd_mbr_sig_start, %di # Default to edd=on
|
||||
|
||||
movl %cs:(cmd_line_ptr), %esi
|
||||
# ds:esi has the pointer to the command line now
|
||||
movl $(COMMAND_LINE_SIZE-7), %ecx
|
||||
# loop through kernel command line one byte at a time
|
||||
cl_loop:
|
||||
cmpl $EDD_CL_EQUALS, (%si)
|
||||
andl %esi, %esi
|
||||
jz old_cl # Old boot protocol?
|
||||
|
||||
# Convert to a real-mode pointer in fs:si
|
||||
movl %esi, %eax
|
||||
shrl $4, %eax
|
||||
movw %ax, %fs
|
||||
andw $0xf, %si
|
||||
jmp have_cl_pointer
|
||||
|
||||
# Old-style boot protocol?
|
||||
old_cl:
|
||||
push %ds # aka INITSEG
|
||||
pop %fs
|
||||
|
||||
cmpw $0xa33f, (0x20)
|
||||
jne done_cl # No command line at all?
|
||||
movw (0x22), %si # Pointer relative to INITSEG
|
||||
|
||||
# fs:si has the pointer to the command line now
|
||||
have_cl_pointer:
|
||||
|
||||
# Loop through kernel command line one byte at a time. Just in
|
||||
# case the loader is buggy and failed to null-terminate the command line
|
||||
# terminate if we get close enough to the end of the segment that we
|
||||
# cannot fit "edd=XX"...
|
||||
cl_atspace:
|
||||
cmpw $-5, %si # Watch for segment wraparound
|
||||
jae done_cl
|
||||
movl %fs:(%si), %eax
|
||||
andb %al, %al # End of line?
|
||||
jz done_cl
|
||||
cmpl $EDD_CL_EQUALS, %eax
|
||||
jz found_edd_equals
|
||||
incl %esi
|
||||
loop cl_loop
|
||||
jmp done_cl
|
||||
cmpb $0x20, %al # <= space consider whitespace
|
||||
ja cl_skipword
|
||||
incw %si
|
||||
jmp cl_atspace
|
||||
|
||||
cl_skipword:
|
||||
cmpw $-5, %si # Watch for segment wraparound
|
||||
jae done_cl
|
||||
movb %fs:(%si), %al # End of string?
|
||||
andb %al, %al
|
||||
jz done_cl
|
||||
cmpb $0x20, %al
|
||||
jbe cl_atspace
|
||||
incw %si
|
||||
jmp cl_skipword
|
||||
|
||||
found_edd_equals:
|
||||
# only looking at first two characters after equals
|
||||
addl $4, %esi
|
||||
cmpw $EDD_CL_OFF, (%si) # edd=of
|
||||
jz do_edd_off
|
||||
cmpw $EDD_CL_SKIP, (%si) # edd=sk
|
||||
jz do_edd_skipmbr
|
||||
jmp done_cl
|
||||
# late overrides early on the command line, so keep going after finding something
|
||||
movw %fs:4(%si), %ax
|
||||
cmpw $EDD_CL_OFF, %ax # edd=of
|
||||
je do_edd_off
|
||||
cmpw $EDD_CL_SKIP, %ax # edd=sk
|
||||
je do_edd_skipmbr
|
||||
cmpw $EDD_CL_ON, %ax # edd=on
|
||||
je do_edd_on
|
||||
jmp cl_skipword
|
||||
do_edd_skipmbr:
|
||||
popl %esi
|
||||
jmp edd_start
|
||||
movw $edd_start, %di
|
||||
jmp cl_skipword
|
||||
do_edd_off:
|
||||
popl %esi
|
||||
jmp edd_done
|
||||
movw $edd_done, %di
|
||||
jmp cl_skipword
|
||||
do_edd_on:
|
||||
movw $edd_mbr_sig_start, %di
|
||||
jmp cl_skipword
|
||||
|
||||
done_cl:
|
||||
popl %esi
|
||||
|
||||
jmpw *%di
|
||||
|
||||
# Read the first sector of each BIOS disk device and store the 4-byte signature
|
||||
edd_mbr_sig_start:
|
||||
|
@ -494,12 +494,12 @@ no_voyager:
|
||||
movw %cs, %ax # aka SETUPSEG
|
||||
subw $DELTA_INITSEG, %ax # aka INITSEG
|
||||
movw %ax, %ds
|
||||
movw $0, (0x1ff) # default is no pointing device
|
||||
movb $0, (0x1ff) # default is no pointing device
|
||||
int $0x11 # int 0x11: equipment list
|
||||
testb $0x04, %al # check if mouse installed
|
||||
jz no_psmouse
|
||||
|
||||
movw $0xAA, (0x1ff) # device present
|
||||
movb $0xAA, (0x1ff) # device present
|
||||
no_psmouse:
|
||||
|
||||
#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
|
||||
|
1063
arch/i386/defconfig
1063
arch/i386/defconfig
File diff suppressed because it is too large
Load Diff
@ -4,7 +4,7 @@
|
||||
|
||||
extra-y := head.o init_task.o vmlinux.lds
|
||||
|
||||
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
|
||||
obj-y := process.o signal.o entry.o traps.o irq.o \
|
||||
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
|
||||
pci-dma.o i386_ksyms.o i387.o bootflag.o \
|
||||
quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
|
||||
@ -81,4 +81,5 @@ $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
|
||||
$(call if_changed,syscall)
|
||||
|
||||
k8-y += ../../x86_64/kernel/k8.o
|
||||
stacktrace-y += ../../x86_64/kernel/stacktrace.o
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
obj-$(CONFIG_ACPI) += boot.o
|
||||
ifneq ($(CONFIG_PCI),)
|
||||
obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
|
||||
endif
|
||||
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
|
||||
|
||||
ifneq ($(CONFIG_ACPI_PROCESSOR),)
|
||||
|
@ -26,9 +26,12 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/efi.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/ioport.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/io_apic.h>
|
||||
@ -36,11 +39,17 @@
|
||||
#include <asm/io.h>
|
||||
#include <asm/mpspec.h>
|
||||
|
||||
static int __initdata acpi_force = 0;
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
int acpi_disabled = 0;
|
||||
#else
|
||||
int acpi_disabled = 1;
|
||||
#endif
|
||||
EXPORT_SYMBOL(acpi_disabled);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
extern void __init clustered_apic_check(void);
|
||||
|
||||
extern int gsi_irq_sharing(int gsi);
|
||||
#include <asm/proto.h>
|
||||
|
||||
static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
|
||||
@ -506,16 +515,76 @@ EXPORT_SYMBOL(acpi_register_gsi);
|
||||
#ifdef CONFIG_ACPI_HOTPLUG_CPU
|
||||
int acpi_map_lsapic(acpi_handle handle, int *pcpu)
|
||||
{
|
||||
/* TBD */
|
||||
return -EINVAL;
|
||||
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
|
||||
union acpi_object *obj;
|
||||
struct acpi_table_lapic *lapic;
|
||||
cpumask_t tmp_map, new_map;
|
||||
u8 physid;
|
||||
int cpu;
|
||||
|
||||
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
|
||||
return -EINVAL;
|
||||
|
||||
if (!buffer.length || !buffer.pointer)
|
||||
return -EINVAL;
|
||||
|
||||
obj = buffer.pointer;
|
||||
if (obj->type != ACPI_TYPE_BUFFER ||
|
||||
obj->buffer.length < sizeof(*lapic)) {
|
||||
kfree(buffer.pointer);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
lapic = (struct acpi_table_lapic *)obj->buffer.pointer;
|
||||
|
||||
if ((lapic->header.type != ACPI_MADT_LAPIC) ||
|
||||
(!lapic->flags.enabled)) {
|
||||
kfree(buffer.pointer);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
physid = lapic->id;
|
||||
|
||||
kfree(buffer.pointer);
|
||||
buffer.length = ACPI_ALLOCATE_BUFFER;
|
||||
buffer.pointer = NULL;
|
||||
|
||||
tmp_map = cpu_present_map;
|
||||
mp_register_lapic(physid, lapic->flags.enabled);
|
||||
|
||||
/*
|
||||
* If mp_register_lapic successfully generates a new logical cpu
|
||||
* number, then the following will get us exactly what was mapped
|
||||
*/
|
||||
cpus_andnot(new_map, cpu_present_map, tmp_map);
|
||||
if (cpus_empty(new_map)) {
|
||||
printk ("Unable to map lapic to logical cpu number\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cpu = first_cpu(new_map);
|
||||
|
||||
*pcpu = cpu;
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(acpi_map_lsapic);
|
||||
|
||||
int acpi_unmap_lsapic(int cpu)
|
||||
{
|
||||
/* TBD */
|
||||
return -EINVAL;
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) {
|
||||
x86_acpiid_to_apicid[i] = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
x86_cpu_to_apicid[cpu] = -1;
|
||||
cpu_clear(cpu, cpu_present_map);
|
||||
num_processors--;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(acpi_unmap_lsapic);
|
||||
@ -579,6 +648,8 @@ static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
|
||||
static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
|
||||
{
|
||||
struct acpi_table_hpet *hpet_tbl;
|
||||
struct resource *hpet_res;
|
||||
resource_size_t res_start;
|
||||
|
||||
if (!phys || !size)
|
||||
return -EINVAL;
|
||||
@ -594,12 +665,26 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
|
||||
"memory.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define HPET_RESOURCE_NAME_SIZE 9
|
||||
hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
|
||||
if (hpet_res) {
|
||||
memset(hpet_res, 0, sizeof(*hpet_res));
|
||||
hpet_res->name = (void *)&hpet_res[1];
|
||||
hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
||||
snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE,
|
||||
"HPET %u", hpet_tbl->number);
|
||||
hpet_res->end = (1 * 1024) - 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
vxtime.hpet_address = hpet_tbl->addr.addrl |
|
||||
((long)hpet_tbl->addr.addrh << 32);
|
||||
|
||||
printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
|
||||
hpet_tbl->id, vxtime.hpet_address);
|
||||
|
||||
res_start = vxtime.hpet_address;
|
||||
#else /* X86 */
|
||||
{
|
||||
extern unsigned long hpet_address;
|
||||
@ -607,9 +692,17 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
|
||||
hpet_address = hpet_tbl->addr.addrl;
|
||||
printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
|
||||
hpet_tbl->id, hpet_address);
|
||||
|
||||
res_start = hpet_address;
|
||||
}
|
||||
#endif /* X86 */
|
||||
|
||||
if (hpet_res) {
|
||||
hpet_res->start = res_start;
|
||||
hpet_res->end += res_start;
|
||||
insert_resource(&iomem_resource, hpet_res);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
@ -860,8 +953,6 @@ static void __init acpi_process_madt(void)
|
||||
return;
|
||||
}
|
||||
|
||||
extern int acpi_force;
|
||||
|
||||
#ifdef __i386__
|
||||
|
||||
static int __init disable_acpi_irq(struct dmi_system_id *d)
|
||||
@ -1163,3 +1254,75 @@ int __init acpi_boot_init(void)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init parse_acpi(char *arg)
|
||||
{
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
/* "acpi=off" disables both ACPI table parsing and interpreter */
|
||||
if (strcmp(arg, "off") == 0) {
|
||||
disable_acpi();
|
||||
}
|
||||
/* acpi=force to over-ride black-list */
|
||||
else if (strcmp(arg, "force") == 0) {
|
||||
acpi_force = 1;
|
||||
acpi_ht = 1;
|
||||
acpi_disabled = 0;
|
||||
}
|
||||
/* acpi=strict disables out-of-spec workarounds */
|
||||
else if (strcmp(arg, "strict") == 0) {
|
||||
acpi_strict = 1;
|
||||
}
|
||||
/* Limit ACPI just to boot-time to enable HT */
|
||||
else if (strcmp(arg, "ht") == 0) {
|
||||
if (!acpi_force)
|
||||
disable_acpi();
|
||||
acpi_ht = 1;
|
||||
}
|
||||
/* "acpi=noirq" disables ACPI interrupt routing */
|
||||
else if (strcmp(arg, "noirq") == 0) {
|
||||
acpi_noirq_set();
|
||||
} else {
|
||||
/* Core will printk when we return error. */
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
early_param("acpi", parse_acpi);
|
||||
|
||||
/* FIXME: Using pci= for an ACPI parameter is a travesty. */
|
||||
static int __init parse_pci(char *arg)
|
||||
{
|
||||
if (arg && strcmp(arg, "noacpi") == 0)
|
||||
acpi_disable_pci();
|
||||
return 0;
|
||||
}
|
||||
early_param("pci", parse_pci);
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
static int __init parse_acpi_skip_timer_override(char *arg)
|
||||
{
|
||||
acpi_skip_timer_override = 1;
|
||||
return 0;
|
||||
}
|
||||
early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override);
|
||||
#endif /* CONFIG_X86_IO_APIC */
|
||||
|
||||
static int __init setup_acpi_sci(char *s)
|
||||
{
|
||||
if (!s)
|
||||
return -EINVAL;
|
||||
if (!strcmp(s, "edge"))
|
||||
acpi_sci_flags.trigger = 1;
|
||||
else if (!strcmp(s, "level"))
|
||||
acpi_sci_flags.trigger = 3;
|
||||
else if (!strcmp(s, "high"))
|
||||
acpi_sci_flags.polarity = 1;
|
||||
else if (!strcmp(s, "low"))
|
||||
acpi_sci_flags.polarity = 3;
|
||||
else
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
early_param("acpi_sci", setup_acpi_sci);
|
||||
|
@ -48,7 +48,11 @@ void __init check_acpi_pci(void)
|
||||
int num, slot, func;
|
||||
|
||||
/* Assume the machine supports type 1. If not it will
|
||||
always read ffffffff and should not have any side effect. */
|
||||
always read ffffffff and should not have any side effect.
|
||||
Actually a few buggy systems can machine check. Allow the user
|
||||
to disable it by command line option at least -AK */
|
||||
if (!early_pci_allowed())
|
||||
return;
|
||||
|
||||
/* Poor man's PCI discovery */
|
||||
for (num = 0; num < 32; num++) {
|
||||
|
@ -52,7 +52,18 @@ static cpumask_t timer_bcast_ipi;
|
||||
/*
|
||||
* Knob to control our willingness to enable the local APIC.
|
||||
*/
|
||||
int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
|
||||
static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
|
||||
|
||||
static inline void lapic_disable(void)
|
||||
{
|
||||
enable_local_apic = -1;
|
||||
clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
|
||||
}
|
||||
|
||||
static inline void lapic_enable(void)
|
||||
{
|
||||
enable_local_apic = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Debug level
|
||||
@ -586,8 +597,7 @@ void __devinit setup_local_APIC(void)
|
||||
printk("No ESR for 82489DX.\n");
|
||||
}
|
||||
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
setup_apic_nmi_watchdog();
|
||||
setup_apic_nmi_watchdog(NULL);
|
||||
apic_pm_activate();
|
||||
}
|
||||
|
||||
@ -1373,3 +1383,18 @@ int __init APIC_init_uniprocessor (void)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init parse_lapic(char *arg)
|
||||
{
|
||||
lapic_enable();
|
||||
return 0;
|
||||
}
|
||||
early_param("lapic", parse_lapic);
|
||||
|
||||
static int __init parse_nolapic(char *arg)
|
||||
{
|
||||
lapic_disable();
|
||||
return 0;
|
||||
}
|
||||
early_param("nolapic", parse_nolapic);
|
||||
|
||||
|
@ -22,7 +22,7 @@
|
||||
extern void vide(void);
|
||||
__asm__(".align 4\nvide: ret");
|
||||
|
||||
static void __init init_amd(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
int mbytes = num_physpages >> (20-PAGE_SHIFT);
|
||||
@ -246,7 +246,7 @@ static void __init init_amd(struct cpuinfo_x86 *c)
|
||||
num_cache_leaves = 3;
|
||||
}
|
||||
|
||||
static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
|
||||
static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
|
||||
{
|
||||
/* AMD errata T13 (order #21922) */
|
||||
if ((c->x86 == 6)) {
|
||||
@ -259,7 +259,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
|
||||
return size;
|
||||
}
|
||||
|
||||
static struct cpu_dev amd_cpu_dev __initdata = {
|
||||
static struct cpu_dev amd_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "AMD",
|
||||
.c_ident = { "AuthenticAMD" },
|
||||
.c_models = {
|
||||
@ -275,7 +275,6 @@ static struct cpu_dev amd_cpu_dev __initdata = {
|
||||
},
|
||||
},
|
||||
.c_init = init_amd,
|
||||
.c_identify = generic_identify,
|
||||
.c_size_cache = amd_size_cache,
|
||||
};
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
|
||||
#ifdef CONFIG_X86_OOSTORE
|
||||
|
||||
static u32 __init power2(u32 x)
|
||||
static u32 __cpuinit power2(u32 x)
|
||||
{
|
||||
u32 s=1;
|
||||
while(s<=x)
|
||||
@ -22,7 +22,7 @@ static u32 __init power2(u32 x)
|
||||
* Set up an actual MCR
|
||||
*/
|
||||
|
||||
static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
|
||||
static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
@ -40,7 +40,7 @@ static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
|
||||
* Shortcut: We know you can't put 4Gig of RAM on a winchip
|
||||
*/
|
||||
|
||||
static u32 __init ramtop(void) /* 16388 */
|
||||
static u32 __cpuinit ramtop(void) /* 16388 */
|
||||
{
|
||||
int i;
|
||||
u32 top = 0;
|
||||
@ -91,7 +91,7 @@ static u32 __init ramtop(void) /* 16388 */
|
||||
* Compute a set of MCR's to give maximum coverage
|
||||
*/
|
||||
|
||||
static int __init centaur_mcr_compute(int nr, int key)
|
||||
static int __cpuinit centaur_mcr_compute(int nr, int key)
|
||||
{
|
||||
u32 mem = ramtop();
|
||||
u32 root = power2(mem);
|
||||
@ -166,7 +166,7 @@ static int __init centaur_mcr_compute(int nr, int key)
|
||||
return ct;
|
||||
}
|
||||
|
||||
static void __init centaur_create_optimal_mcr(void)
|
||||
static void __cpuinit centaur_create_optimal_mcr(void)
|
||||
{
|
||||
int i;
|
||||
/*
|
||||
@ -189,7 +189,7 @@ static void __init centaur_create_optimal_mcr(void)
|
||||
wrmsr(MSR_IDT_MCR0+i, 0, 0);
|
||||
}
|
||||
|
||||
static void __init winchip2_create_optimal_mcr(void)
|
||||
static void __cpuinit winchip2_create_optimal_mcr(void)
|
||||
{
|
||||
u32 lo, hi;
|
||||
int i;
|
||||
@ -227,7 +227,7 @@ static void __init winchip2_create_optimal_mcr(void)
|
||||
* Handle the MCR key on the Winchip 2.
|
||||
*/
|
||||
|
||||
static void __init winchip2_unprotect_mcr(void)
|
||||
static void __cpuinit winchip2_unprotect_mcr(void)
|
||||
{
|
||||
u32 lo, hi;
|
||||
u32 key;
|
||||
@ -239,7 +239,7 @@ static void __init winchip2_unprotect_mcr(void)
|
||||
wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
||||
}
|
||||
|
||||
static void __init winchip2_protect_mcr(void)
|
||||
static void __cpuinit winchip2_protect_mcr(void)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
@ -257,7 +257,7 @@ static void __init winchip2_protect_mcr(void)
|
||||
#define RNG_ENABLED (1 << 3)
|
||||
#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
|
||||
|
||||
static void __init init_c3(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit init_c3(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
@ -303,7 +303,7 @@ static void __init init_c3(struct cpuinfo_x86 *c)
|
||||
display_cacheinfo(c);
|
||||
}
|
||||
|
||||
static void __init init_centaur(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
|
||||
{
|
||||
enum {
|
||||
ECX8=1<<1,
|
||||
@ -442,7 +442,7 @@ static void __init init_centaur(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
|
||||
static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
|
||||
{
|
||||
/* VIA C3 CPUs (670-68F) need further shifting. */
|
||||
if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
|
||||
@ -457,7 +457,7 @@ static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size
|
||||
return size;
|
||||
}
|
||||
|
||||
static struct cpu_dev centaur_cpu_dev __initdata = {
|
||||
static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "Centaur",
|
||||
.c_ident = { "CentaurHauls" },
|
||||
.c_init = init_centaur,
|
||||
|
@ -36,7 +36,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
|
||||
|
||||
extern int disable_pse;
|
||||
|
||||
static void default_init(struct cpuinfo_x86 * c)
|
||||
static void __cpuinit default_init(struct cpuinfo_x86 * c)
|
||||
{
|
||||
/* Not much we can do here... */
|
||||
/* Check if at least it has cpuid */
|
||||
@ -49,7 +49,7 @@ static void default_init(struct cpuinfo_x86 * c)
|
||||
}
|
||||
}
|
||||
|
||||
static struct cpu_dev default_cpu = {
|
||||
static struct cpu_dev __cpuinitdata default_cpu = {
|
||||
.c_init = default_init,
|
||||
.c_vendor = "Unknown",
|
||||
};
|
||||
@ -265,7 +265,7 @@ static void __init early_cpu_detect(void)
|
||||
}
|
||||
}
|
||||
|
||||
void __cpuinit generic_identify(struct cpuinfo_x86 * c)
|
||||
static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
|
||||
{
|
||||
u32 tfms, xlvl;
|
||||
int ebx;
|
||||
@ -675,7 +675,7 @@ old_gdt:
|
||||
#endif
|
||||
|
||||
/* Clear %fs and %gs. */
|
||||
asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
|
||||
asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
|
||||
|
||||
/* Clear all 6 debug registers: */
|
||||
set_debugreg(0, 0);
|
||||
|
@ -24,7 +24,5 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
|
||||
extern int get_model_name(struct cpuinfo_x86 *c);
|
||||
extern void display_cacheinfo(struct cpuinfo_x86 *c);
|
||||
|
||||
extern void generic_identify(struct cpuinfo_x86 * c);
|
||||
|
||||
extern void early_intel_workaround(struct cpuinfo_x86 *c);
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
/*
|
||||
* Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
|
||||
*/
|
||||
static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
|
||||
static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
|
||||
{
|
||||
unsigned char ccr2, ccr3;
|
||||
unsigned long flags;
|
||||
@ -52,25 +52,25 @@ static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
|
||||
* Actually since bugs.h doesn't even reference this perhaps someone should
|
||||
* fix the documentation ???
|
||||
*/
|
||||
static unsigned char Cx86_dir0_msb __initdata = 0;
|
||||
static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
|
||||
|
||||
static char Cx86_model[][9] __initdata = {
|
||||
static char Cx86_model[][9] __cpuinitdata = {
|
||||
"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
|
||||
"M II ", "Unknown"
|
||||
};
|
||||
static char Cx486_name[][5] __initdata = {
|
||||
static char Cx486_name[][5] __cpuinitdata = {
|
||||
"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
|
||||
"SRx2", "DRx2"
|
||||
};
|
||||
static char Cx486S_name[][4] __initdata = {
|
||||
static char Cx486S_name[][4] __cpuinitdata = {
|
||||
"S", "S2", "Se", "S2e"
|
||||
};
|
||||
static char Cx486D_name[][4] __initdata = {
|
||||
static char Cx486D_name[][4] __cpuinitdata = {
|
||||
"DX", "DX2", "?", "?", "?", "DX4"
|
||||
};
|
||||
static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock";
|
||||
static char cyrix_model_mult1[] __initdata = "12??43";
|
||||
static char cyrix_model_mult2[] __initdata = "12233445";
|
||||
static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
|
||||
static char cyrix_model_mult1[] __cpuinitdata = "12??43";
|
||||
static char cyrix_model_mult2[] __cpuinitdata = "12233445";
|
||||
|
||||
/*
|
||||
* Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
|
||||
@ -82,7 +82,7 @@ static char cyrix_model_mult2[] __initdata = "12233445";
|
||||
|
||||
extern void calibrate_delay(void) __init;
|
||||
|
||||
static void __init check_cx686_slop(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@ -107,7 +107,7 @@ static void __init check_cx686_slop(struct cpuinfo_x86 *c)
|
||||
}
|
||||
|
||||
|
||||
static void __init set_cx86_reorder(void)
|
||||
static void __cpuinit set_cx86_reorder(void)
|
||||
{
|
||||
u8 ccr3;
|
||||
|
||||
@ -122,7 +122,7 @@ static void __init set_cx86_reorder(void)
|
||||
setCx86(CX86_CCR3, ccr3);
|
||||
}
|
||||
|
||||
static void __init set_cx86_memwb(void)
|
||||
static void __cpuinit set_cx86_memwb(void)
|
||||
{
|
||||
u32 cr0;
|
||||
|
||||
@ -137,7 +137,7 @@ static void __init set_cx86_memwb(void)
|
||||
setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
|
||||
}
|
||||
|
||||
static void __init set_cx86_inc(void)
|
||||
static void __cpuinit set_cx86_inc(void)
|
||||
{
|
||||
unsigned char ccr3;
|
||||
|
||||
@ -158,7 +158,7 @@ static void __init set_cx86_inc(void)
|
||||
* Configure later MediaGX and/or Geode processor.
|
||||
*/
|
||||
|
||||
static void __init geode_configure(void)
|
||||
static void __cpuinit geode_configure(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
u8 ccr3, ccr4;
|
||||
@ -184,14 +184,14 @@ static void __init geode_configure(void)
|
||||
|
||||
|
||||
#ifdef CONFIG_PCI
|
||||
static struct pci_device_id __initdata cyrix_55x0[] = {
|
||||
static struct pci_device_id __cpuinitdata cyrix_55x0[] = {
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
|
||||
{ },
|
||||
};
|
||||
#endif
|
||||
|
||||
static void __init init_cyrix(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
|
||||
char *buf = c->x86_model_id;
|
||||
@ -346,7 +346,7 @@ static void __init init_cyrix(struct cpuinfo_x86 *c)
|
||||
/*
|
||||
* Handle National Semiconductor branded processors
|
||||
*/
|
||||
static void __init init_nsc(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/* There may be GX1 processors in the wild that are branded
|
||||
* NSC and not Cyrix.
|
||||
@ -394,7 +394,7 @@ static inline int test_cyrix_52div(void)
|
||||
return (unsigned char) (test >> 8) == 0x02;
|
||||
}
|
||||
|
||||
static void cyrix_identify(struct cpuinfo_x86 * c)
|
||||
static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
|
||||
{
|
||||
/* Detect Cyrix with disabled CPUID */
|
||||
if ( c->x86 == 4 && test_cyrix_52div() ) {
|
||||
@ -427,10 +427,9 @@ static void cyrix_identify(struct cpuinfo_x86 * c)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
generic_identify(c);
|
||||
}
|
||||
|
||||
static struct cpu_dev cyrix_cpu_dev __initdata = {
|
||||
static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "Cyrix",
|
||||
.c_ident = { "CyrixInstead" },
|
||||
.c_init = init_cyrix,
|
||||
@ -453,11 +452,10 @@ static int __init cyrix_exit_cpu(void)
|
||||
|
||||
late_initcall(cyrix_exit_cpu);
|
||||
|
||||
static struct cpu_dev nsc_cpu_dev __initdata = {
|
||||
static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "NSC",
|
||||
.c_ident = { "Geode by NSC" },
|
||||
.c_init = init_nsc,
|
||||
.c_identify = generic_identify,
|
||||
};
|
||||
|
||||
int __init nsc_init_cpu(void)
|
||||
|
@ -198,7 +198,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
|
||||
}
|
||||
|
||||
|
||||
static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
|
||||
static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
|
||||
{
|
||||
/* Intel PIII Tualatin. This comes in two flavours.
|
||||
* One has 256kb of cache, the other 512. We have no way
|
||||
@ -263,7 +263,6 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
|
||||
},
|
||||
},
|
||||
.c_init = init_intel,
|
||||
.c_identify = generic_identify,
|
||||
.c_size_cache = intel_size_cache,
|
||||
};
|
||||
|
||||
|
@ -1,2 +1,2 @@
|
||||
obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o
|
||||
obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o
|
||||
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
|
||||
|
@ -13,6 +13,8 @@
|
||||
#include <asm/msr.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
#include <asm/therm_throt.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* as supported by the P4/Xeon family */
|
||||
@ -44,25 +46,12 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
|
||||
/* P4/Xeon Thermal transition interrupt handler */
|
||||
static void intel_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
u32 l, h;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
static unsigned long next[NR_CPUS];
|
||||
__u64 msr_val;
|
||||
|
||||
ack_APIC_irq();
|
||||
|
||||
if (time_after(next[cpu], jiffies))
|
||||
return;
|
||||
|
||||
next[cpu] = jiffies + HZ*5;
|
||||
rdmsr(MSR_IA32_THERM_STATUS, l, h);
|
||||
if (l & 0x1) {
|
||||
printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
|
||||
printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
|
||||
cpu);
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
} else {
|
||||
printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
|
||||
}
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
therm_throt_process(msr_val & 0x1);
|
||||
}
|
||||
|
||||
/* Thermal interrupt handler for this CPU setup */
|
||||
@ -122,10 +111,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
|
||||
rdmsr (MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
|
||||
|
||||
|
||||
l = apic_read (APIC_LVTTHMR);
|
||||
apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
return;
|
||||
}
|
||||
#endif /* CONFIG_X86_MCE_P4THERMAL */
|
||||
|
180
arch/i386/kernel/cpu/mcheck/therm_throt.c
Normal file
180
arch/i386/kernel/cpu/mcheck/therm_throt.c
Normal file
@ -0,0 +1,180 @@
|
||||
/*
|
||||
* linux/arch/i386/kerne/cpu/mcheck/therm_throt.c
|
||||
*
|
||||
* Thermal throttle event support code (such as syslog messaging and rate
|
||||
* limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
|
||||
* This allows consistent reporting of CPU thermal throttle events.
|
||||
*
|
||||
* Maintains a counter in /sys that keeps track of the number of thermal
|
||||
* events, such that the user knows how bad the thermal problem might be
|
||||
* (since the logging to syslog and mcelog is rate limited).
|
||||
*
|
||||
* Author: Dmitriy Zavin (dmitriyz@google.com)
|
||||
*
|
||||
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
|
||||
* Inspired by Ross Biro's and Al Borchers' counter code.
|
||||
*/
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <asm/therm_throt.h>
|
||||
|
||||
/* How long to wait between reporting thermal events */
|
||||
#define CHECK_INTERVAL (300 * HZ)
|
||||
|
||||
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
|
||||
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
|
||||
atomic_t therm_throt_en = ATOMIC_INIT(0);
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
#define define_therm_throt_sysdev_one_ro(_name) \
|
||||
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
|
||||
|
||||
#define define_therm_throt_sysdev_show_func(name) \
|
||||
static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
|
||||
char *buf) \
|
||||
{ \
|
||||
unsigned int cpu = dev->id; \
|
||||
ssize_t ret; \
|
||||
\
|
||||
preempt_disable(); /* CPU hotplug */ \
|
||||
if (cpu_online(cpu)) \
|
||||
ret = sprintf(buf, "%lu\n", \
|
||||
per_cpu(thermal_throttle_##name, cpu)); \
|
||||
else \
|
||||
ret = 0; \
|
||||
preempt_enable(); \
|
||||
\
|
||||
return ret; \
|
||||
}
|
||||
|
||||
define_therm_throt_sysdev_show_func(count);
|
||||
define_therm_throt_sysdev_one_ro(count);
|
||||
|
||||
static struct attribute *thermal_throttle_attrs[] = {
|
||||
&attr_count.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group thermal_throttle_attr_group = {
|
||||
.attrs = thermal_throttle_attrs,
|
||||
.name = "thermal_throttle"
|
||||
};
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
/***
|
||||
* therm_throt_process - Process thermal throttling event from interrupt
|
||||
* @curr: Whether the condition is current or not (boolean), since the
|
||||
* thermal interrupt normally gets called both when the thermal
|
||||
* event begins and once the event has ended.
|
||||
*
|
||||
* This function is called by the thermal interrupt after the
|
||||
* IRQ has been acknowledged.
|
||||
*
|
||||
* It will take care of rate limiting and printing messages to the syslog.
|
||||
*
|
||||
* Returns: 0 : Event should NOT be further logged, i.e. still in
|
||||
* "timeout" from previous log message.
|
||||
* 1 : Event should be logged further, and a message has been
|
||||
* printed to the syslog.
|
||||
*/
|
||||
int therm_throt_process(int curr)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
__u64 tmp_jiffs = get_jiffies_64();
|
||||
|
||||
if (curr)
|
||||
__get_cpu_var(thermal_throttle_count)++;
|
||||
|
||||
if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
|
||||
return 0;
|
||||
|
||||
__get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
|
||||
|
||||
/* if we just entered the thermal event */
|
||||
if (curr) {
|
||||
printk(KERN_CRIT "CPU%d: Temperature above threshold, "
|
||||
"cpu clock throttled (total events = %lu)\n", cpu,
|
||||
__get_cpu_var(thermal_throttle_count));
|
||||
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
} else {
|
||||
printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
/* Add/Remove thermal_throttle interface for CPU device */
|
||||
static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev)
|
||||
{
|
||||
sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev)
|
||||
{
|
||||
sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Mutex protecting device creation against CPU hotplug */
|
||||
static DEFINE_MUTEX(therm_cpu_lock);
|
||||
|
||||
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
|
||||
static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
struct sys_device *sys_dev;
|
||||
|
||||
sys_dev = get_cpu_sysdev(cpu);
|
||||
mutex_lock(&therm_cpu_lock);
|
||||
switch (action) {
|
||||
case CPU_ONLINE:
|
||||
thermal_throttle_add_dev(sys_dev);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
thermal_throttle_remove_dev(sys_dev);
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&therm_cpu_lock);
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block thermal_throttle_cpu_notifier =
|
||||
{
|
||||
.notifier_call = thermal_throttle_cpu_callback,
|
||||
};
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static __init int thermal_throttle_init_device(void)
|
||||
{
|
||||
unsigned int cpu = 0;
|
||||
|
||||
if (!atomic_read(&therm_throt_en))
|
||||
return 0;
|
||||
|
||||
register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
mutex_lock(&therm_cpu_lock);
|
||||
#endif
|
||||
/* connect live CPUs to sysfs */
|
||||
for_each_online_cpu(cpu)
|
||||
thermal_throttle_add_dev(get_cpu_sysdev(cpu));
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
mutex_unlock(&therm_cpu_lock);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
device_initcall(thermal_throttle_init_device);
|
||||
#endif /* CONFIG_SYSFS */
|
@ -10,7 +10,7 @@
|
||||
* to have CPUID. (Thanks to Herbert Oppmann)
|
||||
*/
|
||||
|
||||
static int __init deep_magic_nexgen_probe(void)
|
||||
static int __cpuinit deep_magic_nexgen_probe(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -27,21 +27,20 @@ static int __init deep_magic_nexgen_probe(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __init init_nexgen(struct cpuinfo_x86 * c)
|
||||
static void __cpuinit init_nexgen(struct cpuinfo_x86 * c)
|
||||
{
|
||||
c->x86_cache_size = 256; /* A few had 1 MB... */
|
||||
}
|
||||
|
||||
static void __init nexgen_identify(struct cpuinfo_x86 * c)
|
||||
static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c)
|
||||
{
|
||||
/* Detect NexGen with old hypercode */
|
||||
if ( deep_magic_nexgen_probe() ) {
|
||||
strcpy(c->x86_vendor_id, "NexGenDriven");
|
||||
}
|
||||
generic_identify(c);
|
||||
}
|
||||
|
||||
static struct cpu_dev nexgen_cpu_dev __initdata = {
|
||||
static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "Nexgen",
|
||||
.c_ident = { "NexGenDriven" },
|
||||
.c_models = {
|
||||
|
@ -46,8 +46,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
|
||||
|
||||
/* Intel-defined (#2) */
|
||||
"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
|
||||
"tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
|
||||
NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
|
||||
/* VIA/Cyrix/Centaur-defined */
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
static void __init init_rise(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit init_rise(struct cpuinfo_x86 *c)
|
||||
{
|
||||
printk("CPU: Rise iDragon");
|
||||
if (c->x86_model > 2)
|
||||
@ -28,7 +28,7 @@ static void __init init_rise(struct cpuinfo_x86 *c)
|
||||
set_bit(X86_FEATURE_CX8, c->x86_capability);
|
||||
}
|
||||
|
||||
static struct cpu_dev rise_cpu_dev __initdata = {
|
||||
static struct cpu_dev rise_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "Rise",
|
||||
.c_ident = { "RiseRiseRise" },
|
||||
.c_models = {
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <asm/msr.h>
|
||||
#include "cpu.h"
|
||||
|
||||
static void __init init_transmeta(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int cap_mask, uk, max, dummy;
|
||||
unsigned int cms_rev1, cms_rev2;
|
||||
@ -85,10 +85,9 @@ static void __init init_transmeta(struct cpuinfo_x86 *c)
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __init transmeta_identify(struct cpuinfo_x86 * c)
|
||||
static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c)
|
||||
{
|
||||
u32 xlvl;
|
||||
generic_identify(c);
|
||||
|
||||
/* Transmeta-defined flags: level 0x80860001 */
|
||||
xlvl = cpuid_eax(0x80860000);
|
||||
@ -98,7 +97,7 @@ static void __init transmeta_identify(struct cpuinfo_x86 * c)
|
||||
}
|
||||
}
|
||||
|
||||
static struct cpu_dev transmeta_cpu_dev __initdata = {
|
||||
static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "Transmeta",
|
||||
.c_ident = { "GenuineTMx86", "TransmetaCPU" },
|
||||
.c_init = init_transmeta,
|
||||
|
@ -5,12 +5,8 @@
|
||||
|
||||
/* UMC chips appear to be only either 386 or 486, so no special init takes place.
|
||||
*/
|
||||
static void __init init_umc(struct cpuinfo_x86 * c)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static struct cpu_dev umc_cpu_dev __initdata = {
|
||||
static struct cpu_dev umc_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "UMC",
|
||||
.c_ident = { "UMC UMC UMC" },
|
||||
.c_models = {
|
||||
@ -21,7 +17,6 @@ static struct cpu_dev umc_cpu_dev __initdata = {
|
||||
}
|
||||
},
|
||||
},
|
||||
.c_init = init_umc,
|
||||
};
|
||||
|
||||
int __init umc_init_cpu(void)
|
||||
|
@ -22,6 +22,8 @@
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/kdebug.h>
|
||||
|
||||
#include <mach_ipi.h>
|
||||
|
||||
|
||||
@ -93,16 +95,25 @@ static void crash_save_self(struct pt_regs *regs)
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
|
||||
static atomic_t waiting_for_crash_ipi;
|
||||
|
||||
static int crash_nmi_callback(struct pt_regs *regs, int cpu)
|
||||
static int crash_nmi_callback(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
struct pt_regs *regs;
|
||||
struct pt_regs fixed_regs;
|
||||
int cpu;
|
||||
|
||||
if (val != DIE_NMI_IPI)
|
||||
return NOTIFY_OK;
|
||||
|
||||
regs = ((struct die_args *)data)->regs;
|
||||
cpu = raw_smp_processor_id();
|
||||
|
||||
/* Don't do anything if this handler is invoked on crashing cpu.
|
||||
* Otherwise, system will completely hang. Crashing cpu can get
|
||||
* an NMI if system was initially booted with nmi_watchdog parameter.
|
||||
*/
|
||||
if (cpu == crashing_cpu)
|
||||
return 1;
|
||||
return NOTIFY_STOP;
|
||||
local_irq_disable();
|
||||
|
||||
if (!user_mode_vm(regs)) {
|
||||
@ -125,13 +136,18 @@ static void smp_send_nmi_allbutself(void)
|
||||
send_IPI_allbutself(NMI_VECTOR);
|
||||
}
|
||||
|
||||
static struct notifier_block crash_nmi_nb = {
|
||||
.notifier_call = crash_nmi_callback,
|
||||
};
|
||||
|
||||
static void nmi_shootdown_cpus(void)
|
||||
{
|
||||
unsigned long msecs;
|
||||
|
||||
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
|
||||
/* Would it be better to replace the trap vector here? */
|
||||
set_nmi_callback(crash_nmi_callback);
|
||||
if (register_die_notifier(&crash_nmi_nb))
|
||||
return; /* return what? */
|
||||
/* Ensure the new callback function is set before sending
|
||||
* out the NMI
|
||||
*/
|
||||
|
@ -76,8 +76,15 @@ DF_MASK = 0x00000400
|
||||
NT_MASK = 0x00004000
|
||||
VM_MASK = 0x00020000
|
||||
|
||||
/* These are replaces for paravirtualization */
|
||||
#define DISABLE_INTERRUPTS cli
|
||||
#define ENABLE_INTERRUPTS sti
|
||||
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
|
||||
#define INTERRUPT_RETURN iret
|
||||
#define GET_CR0_INTO_EAX movl %cr0, %eax
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
#define preempt_stop cli; TRACE_IRQS_OFF
|
||||
#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
|
||||
#else
|
||||
#define preempt_stop
|
||||
#define resume_kernel restore_nocheck
|
||||
@ -176,18 +183,21 @@ VM_MASK = 0x00020000
|
||||
|
||||
#define RING0_INT_FRAME \
|
||||
CFI_STARTPROC simple;\
|
||||
CFI_SIGNAL_FRAME;\
|
||||
CFI_DEF_CFA esp, 3*4;\
|
||||
/*CFI_OFFSET cs, -2*4;*/\
|
||||
CFI_OFFSET eip, -3*4
|
||||
|
||||
#define RING0_EC_FRAME \
|
||||
CFI_STARTPROC simple;\
|
||||
CFI_SIGNAL_FRAME;\
|
||||
CFI_DEF_CFA esp, 4*4;\
|
||||
/*CFI_OFFSET cs, -2*4;*/\
|
||||
CFI_OFFSET eip, -3*4
|
||||
|
||||
#define RING0_PTREGS_FRAME \
|
||||
CFI_STARTPROC simple;\
|
||||
CFI_SIGNAL_FRAME;\
|
||||
CFI_DEF_CFA esp, OLDESP-EBX;\
|
||||
/*CFI_OFFSET cs, CS-OLDESP;*/\
|
||||
CFI_OFFSET eip, EIP-OLDESP;\
|
||||
@ -233,10 +243,11 @@ ret_from_intr:
|
||||
check_userspace:
|
||||
movl EFLAGS(%esp), %eax # mix EFLAGS and CS
|
||||
movb CS(%esp), %al
|
||||
testl $(VM_MASK | 3), %eax
|
||||
jz resume_kernel
|
||||
andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
|
||||
cmpl $USER_RPL, %eax
|
||||
jb resume_kernel # not returning to v8086 or userspace
|
||||
ENTRY(resume_userspace)
|
||||
cli # make sure we don't miss an interrupt
|
||||
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
|
||||
# setting need_resched or sigpending
|
||||
# between sampling and the iret
|
||||
movl TI_flags(%ebp), %ecx
|
||||
@ -247,7 +258,7 @@ ENTRY(resume_userspace)
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
ENTRY(resume_kernel)
|
||||
cli
|
||||
DISABLE_INTERRUPTS
|
||||
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
|
||||
jnz restore_nocheck
|
||||
need_resched:
|
||||
@ -267,6 +278,7 @@ need_resched:
|
||||
# sysenter call handler stub
|
||||
ENTRY(sysenter_entry)
|
||||
CFI_STARTPROC simple
|
||||
CFI_SIGNAL_FRAME
|
||||
CFI_DEF_CFA esp, 0
|
||||
CFI_REGISTER esp, ebp
|
||||
movl TSS_sysenter_esp0(%esp),%esp
|
||||
@ -275,7 +287,7 @@ sysenter_past_esp:
|
||||
* No need to follow this irqs on/off section: the syscall
|
||||
* disabled irqs and here we enable it straight after entry:
|
||||
*/
|
||||
sti
|
||||
ENABLE_INTERRUPTS
|
||||
pushl $(__USER_DS)
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
/*CFI_REL_OFFSET ss, 0*/
|
||||
@ -320,7 +332,7 @@ sysenter_past_esp:
|
||||
jae syscall_badsys
|
||||
call *sys_call_table(,%eax,4)
|
||||
movl %eax,EAX(%esp)
|
||||
cli
|
||||
DISABLE_INTERRUPTS
|
||||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%ebp), %ecx
|
||||
testw $_TIF_ALLWORK_MASK, %cx
|
||||
@ -330,8 +342,7 @@ sysenter_past_esp:
|
||||
movl OLDESP(%esp), %ecx
|
||||
xorl %ebp,%ebp
|
||||
TRACE_IRQS_ON
|
||||
sti
|
||||
sysexit
|
||||
ENABLE_INTERRUPTS_SYSEXIT
|
||||
CFI_ENDPROC
|
||||
|
||||
|
||||
@ -356,7 +367,7 @@ syscall_call:
|
||||
call *sys_call_table(,%eax,4)
|
||||
movl %eax,EAX(%esp) # store the return value
|
||||
syscall_exit:
|
||||
cli # make sure we don't miss an interrupt
|
||||
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
|
||||
# setting need_resched or sigpending
|
||||
# between sampling and the iret
|
||||
TRACE_IRQS_OFF
|
||||
@ -371,8 +382,8 @@ restore_all:
|
||||
# See comments in process.c:copy_thread() for details.
|
||||
movb OLDSS(%esp), %ah
|
||||
movb CS(%esp), %al
|
||||
andl $(VM_MASK | (4 << 8) | 3), %eax
|
||||
cmpl $((4 << 8) | 3), %eax
|
||||
andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
|
||||
cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
|
||||
CFI_REMEMBER_STATE
|
||||
je ldt_ss # returning to user-space with LDT SS
|
||||
restore_nocheck:
|
||||
@ -381,11 +392,11 @@ restore_nocheck_notrace:
|
||||
RESTORE_REGS
|
||||
addl $4, %esp
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
1: iret
|
||||
1: INTERRUPT_RETURN
|
||||
.section .fixup,"ax"
|
||||
iret_exc:
|
||||
TRACE_IRQS_ON
|
||||
sti
|
||||
ENABLE_INTERRUPTS
|
||||
pushl $0 # no error code
|
||||
pushl $do_iret_error
|
||||
jmp error_code
|
||||
@ -409,7 +420,7 @@ ldt_ss:
|
||||
* dosemu and wine happy. */
|
||||
subl $8, %esp # reserve space for switch16 pointer
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
cli
|
||||
DISABLE_INTERRUPTS
|
||||
TRACE_IRQS_OFF
|
||||
movl %esp, %eax
|
||||
/* Set up the 16bit stack frame with switch32 pointer on top,
|
||||
@ -419,7 +430,7 @@ ldt_ss:
|
||||
TRACE_IRQS_IRET
|
||||
RESTORE_REGS
|
||||
lss 20+4(%esp), %esp # switch to 16bit stack
|
||||
1: iret
|
||||
1: INTERRUPT_RETURN
|
||||
.section __ex_table,"a"
|
||||
.align 4
|
||||
.long 1b,iret_exc
|
||||
@ -434,7 +445,7 @@ work_pending:
|
||||
jz work_notifysig
|
||||
work_resched:
|
||||
call schedule
|
||||
cli # make sure we don't miss an interrupt
|
||||
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
|
||||
# setting need_resched or sigpending
|
||||
# between sampling and the iret
|
||||
TRACE_IRQS_OFF
|
||||
@ -490,7 +501,7 @@ syscall_exit_work:
|
||||
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
|
||||
jz work_pending
|
||||
TRACE_IRQS_ON
|
||||
sti # could let do_syscall_trace() call
|
||||
ENABLE_INTERRUPTS # could let do_syscall_trace() call
|
||||
# schedule() instead
|
||||
movl %esp, %eax
|
||||
movl $1, %edx
|
||||
@ -591,11 +602,9 @@ ENTRY(name) \
|
||||
/* The include is where all of the SMP etc. interrupts come from */
|
||||
#include "entry_arch.h"
|
||||
|
||||
ENTRY(divide_error)
|
||||
RING0_INT_FRAME
|
||||
pushl $0 # no error code
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
pushl $do_divide_error
|
||||
KPROBE_ENTRY(page_fault)
|
||||
RING0_EC_FRAME
|
||||
pushl $do_page_fault
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
ALIGN
|
||||
error_code:
|
||||
@ -645,6 +654,7 @@ error_code:
|
||||
call *%edi
|
||||
jmp ret_from_exception
|
||||
CFI_ENDPROC
|
||||
KPROBE_END(page_fault)
|
||||
|
||||
ENTRY(coprocessor_error)
|
||||
RING0_INT_FRAME
|
||||
@ -669,7 +679,7 @@ ENTRY(device_not_available)
|
||||
pushl $-1 # mark this as an int
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
SAVE_ALL
|
||||
movl %cr0, %eax
|
||||
GET_CR0_INTO_EAX
|
||||
testl $0x4, %eax # EM (math emulation bit)
|
||||
jne device_not_available_emulate
|
||||
preempt_stop
|
||||
@ -702,9 +712,15 @@ device_not_available_emulate:
|
||||
jne ok; \
|
||||
label: \
|
||||
movl TSS_sysenter_esp0+offset(%esp),%esp; \
|
||||
CFI_DEF_CFA esp, 0; \
|
||||
CFI_UNDEFINED eip; \
|
||||
pushfl; \
|
||||
CFI_ADJUST_CFA_OFFSET 4; \
|
||||
pushl $__KERNEL_CS; \
|
||||
pushl $sysenter_past_esp
|
||||
CFI_ADJUST_CFA_OFFSET 4; \
|
||||
pushl $sysenter_past_esp; \
|
||||
CFI_ADJUST_CFA_OFFSET 4; \
|
||||
CFI_REL_OFFSET eip, 0
|
||||
|
||||
KPROBE_ENTRY(debug)
|
||||
RING0_INT_FRAME
|
||||
@ -720,7 +736,8 @@ debug_stack_correct:
|
||||
call do_debug
|
||||
jmp ret_from_exception
|
||||
CFI_ENDPROC
|
||||
.previous .text
|
||||
KPROBE_END(debug)
|
||||
|
||||
/*
|
||||
* NMI is doubly nasty. It can happen _while_ we're handling
|
||||
* a debug fault, and the debug fault hasn't yet been able to
|
||||
@ -729,7 +746,7 @@ debug_stack_correct:
|
||||
* check whether we got an NMI on the debug path where the debug
|
||||
* fault happened on the sysenter path.
|
||||
*/
|
||||
ENTRY(nmi)
|
||||
KPROBE_ENTRY(nmi)
|
||||
RING0_INT_FRAME
|
||||
pushl %eax
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
@ -754,6 +771,7 @@ ENTRY(nmi)
|
||||
cmpl $sysenter_entry,12(%esp)
|
||||
je nmi_debug_stack_check
|
||||
nmi_stack_correct:
|
||||
/* We have a RING0_INT_FRAME here */
|
||||
pushl %eax
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
SAVE_ALL
|
||||
@ -764,9 +782,12 @@ nmi_stack_correct:
|
||||
CFI_ENDPROC
|
||||
|
||||
nmi_stack_fixup:
|
||||
RING0_INT_FRAME
|
||||
FIX_STACK(12,nmi_stack_correct, 1)
|
||||
jmp nmi_stack_correct
|
||||
|
||||
nmi_debug_stack_check:
|
||||
/* We have a RING0_INT_FRAME here */
|
||||
cmpw $__KERNEL_CS,16(%esp)
|
||||
jne nmi_stack_correct
|
||||
cmpl $debug,(%esp)
|
||||
@ -777,8 +798,10 @@ nmi_debug_stack_check:
|
||||
jmp nmi_stack_correct
|
||||
|
||||
nmi_16bit_stack:
|
||||
RING0_INT_FRAME
|
||||
/* create the pointer to lss back */
|
||||
/* We have a RING0_INT_FRAME here.
|
||||
*
|
||||
* create the pointer to lss back
|
||||
*/
|
||||
pushl %ss
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
pushl %esp
|
||||
@ -799,12 +822,13 @@ nmi_16bit_stack:
|
||||
call do_nmi
|
||||
RESTORE_REGS
|
||||
lss 12+4(%esp), %esp # back to 16bit stack
|
||||
1: iret
|
||||
1: INTERRUPT_RETURN
|
||||
CFI_ENDPROC
|
||||
.section __ex_table,"a"
|
||||
.align 4
|
||||
.long 1b,iret_exc
|
||||
.previous
|
||||
KPROBE_END(nmi)
|
||||
|
||||
KPROBE_ENTRY(int3)
|
||||
RING0_INT_FRAME
|
||||
@ -816,7 +840,7 @@ KPROBE_ENTRY(int3)
|
||||
call do_int3
|
||||
jmp ret_from_exception
|
||||
CFI_ENDPROC
|
||||
.previous .text
|
||||
KPROBE_END(int3)
|
||||
|
||||
ENTRY(overflow)
|
||||
RING0_INT_FRAME
|
||||
@ -881,7 +905,7 @@ KPROBE_ENTRY(general_protection)
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
jmp error_code
|
||||
CFI_ENDPROC
|
||||
.previous .text
|
||||
KPROBE_END(general_protection)
|
||||
|
||||
ENTRY(alignment_check)
|
||||
RING0_EC_FRAME
|
||||
@ -890,13 +914,14 @@ ENTRY(alignment_check)
|
||||
jmp error_code
|
||||
CFI_ENDPROC
|
||||
|
||||
KPROBE_ENTRY(page_fault)
|
||||
RING0_EC_FRAME
|
||||
pushl $do_page_fault
|
||||
ENTRY(divide_error)
|
||||
RING0_INT_FRAME
|
||||
pushl $0 # no error code
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
pushl $do_divide_error
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
jmp error_code
|
||||
CFI_ENDPROC
|
||||
.previous .text
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
ENTRY(machine_check)
|
||||
@ -949,6 +974,19 @@ ENTRY(arch_unwind_init_running)
|
||||
ENDPROC(arch_unwind_init_running)
|
||||
#endif
|
||||
|
||||
ENTRY(kernel_thread_helper)
|
||||
pushl $0 # fake return address for unwinder
|
||||
CFI_STARTPROC
|
||||
movl %edx,%eax
|
||||
push %edx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
call *%ebx
|
||||
push %eax
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
call do_exit
|
||||
CFI_ENDPROC
|
||||
ENDPROC(kernel_thread_helper)
|
||||
|
||||
.section .rodata,"a"
|
||||
#include "syscall_table.S"
|
||||
|
||||
|
@ -371,8 +371,65 @@ rp_sidt:
|
||||
addl $8,%edi
|
||||
dec %ecx
|
||||
jne rp_sidt
|
||||
|
||||
.macro set_early_handler handler,trapno
|
||||
lea \handler,%edx
|
||||
movl $(__KERNEL_CS << 16),%eax
|
||||
movw %dx,%ax
|
||||
movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
|
||||
lea idt_table,%edi
|
||||
movl %eax,8*\trapno(%edi)
|
||||
movl %edx,8*\trapno+4(%edi)
|
||||
.endm
|
||||
|
||||
set_early_handler handler=early_divide_err,trapno=0
|
||||
set_early_handler handler=early_illegal_opcode,trapno=6
|
||||
set_early_handler handler=early_protection_fault,trapno=13
|
||||
set_early_handler handler=early_page_fault,trapno=14
|
||||
|
||||
ret
|
||||
|
||||
early_divide_err:
|
||||
xor %edx,%edx
|
||||
pushl $0 /* fake errcode */
|
||||
jmp early_fault
|
||||
|
||||
early_illegal_opcode:
|
||||
movl $6,%edx
|
||||
pushl $0 /* fake errcode */
|
||||
jmp early_fault
|
||||
|
||||
early_protection_fault:
|
||||
movl $13,%edx
|
||||
jmp early_fault
|
||||
|
||||
early_page_fault:
|
||||
movl $14,%edx
|
||||
jmp early_fault
|
||||
|
||||
early_fault:
|
||||
cld
|
||||
#ifdef CONFIG_PRINTK
|
||||
movl $(__KERNEL_DS),%eax
|
||||
movl %eax,%ds
|
||||
movl %eax,%es
|
||||
cmpl $2,early_recursion_flag
|
||||
je hlt_loop
|
||||
incl early_recursion_flag
|
||||
movl %cr2,%eax
|
||||
pushl %eax
|
||||
pushl %edx /* trapno */
|
||||
pushl $fault_msg
|
||||
#ifdef CONFIG_EARLY_PRINTK
|
||||
call early_printk
|
||||
#else
|
||||
call printk
|
||||
#endif
|
||||
#endif
|
||||
hlt_loop:
|
||||
hlt
|
||||
jmp hlt_loop
|
||||
|
||||
/* This is the default interrupt "handler" :-) */
|
||||
ALIGN
|
||||
ignore_int:
|
||||
@ -386,6 +443,9 @@ ignore_int:
|
||||
movl $(__KERNEL_DS),%eax
|
||||
movl %eax,%ds
|
||||
movl %eax,%es
|
||||
cmpl $2,early_recursion_flag
|
||||
je hlt_loop
|
||||
incl early_recursion_flag
|
||||
pushl 16(%esp)
|
||||
pushl 24(%esp)
|
||||
pushl 32(%esp)
|
||||
@ -431,9 +491,16 @@ ENTRY(stack_start)
|
||||
|
||||
ready: .byte 0
|
||||
|
||||
early_recursion_flag:
|
||||
.long 0
|
||||
|
||||
int_msg:
|
||||
.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
|
||||
|
||||
fault_msg:
|
||||
.ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n"
|
||||
.asciz "Stack: %p %p %p %p %p %p %p %p\n"
|
||||
|
||||
/*
|
||||
* The IDT and GDT 'descriptors' are a strange 48-bit object
|
||||
* only used by the lidt and lgdt instructions. They are not
|
||||
|
@ -45,6 +45,8 @@ static void end_8259A_irq (unsigned int irq)
|
||||
|
||||
#define shutdown_8259A_irq disable_8259A_irq
|
||||
|
||||
static int i8259A_auto_eoi;
|
||||
|
||||
static void mask_and_ack_8259A(unsigned int);
|
||||
|
||||
unsigned int startup_8259A_irq(unsigned int irq)
|
||||
@ -253,7 +255,7 @@ static void save_ELCR(char *trigger)
|
||||
|
||||
static int i8259A_resume(struct sys_device *dev)
|
||||
{
|
||||
init_8259A(0);
|
||||
init_8259A(i8259A_auto_eoi);
|
||||
restore_ELCR(irq_trigger);
|
||||
return 0;
|
||||
}
|
||||
@ -301,6 +303,8 @@ void init_8259A(int auto_eoi)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
i8259A_auto_eoi = auto_eoi;
|
||||
|
||||
spin_lock_irqsave(&i8259A_lock, flags);
|
||||
|
||||
outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
|
||||
|
@ -40,6 +40,7 @@
|
||||
#include <asm/nmi.h>
|
||||
|
||||
#include <mach_apic.h>
|
||||
#include <mach_apicdef.h>
|
||||
|
||||
#include "io_ports.h"
|
||||
|
||||
@ -65,7 +66,7 @@ int sis_apic_bug = -1;
|
||||
*/
|
||||
int nr_ioapic_registers[MAX_IO_APICS];
|
||||
|
||||
int disable_timer_pin_1 __initdata;
|
||||
static int disable_timer_pin_1 __initdata;
|
||||
|
||||
/*
|
||||
* Rough estimation of how many shared IRQs there are, can
|
||||
@ -93,6 +94,34 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
|
||||
#define vector_to_irq(vector) (vector)
|
||||
#endif
|
||||
|
||||
|
||||
union entry_union {
|
||||
struct { u32 w1, w2; };
|
||||
struct IO_APIC_route_entry entry;
|
||||
};
|
||||
|
||||
static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
|
||||
{
|
||||
union entry_union eu;
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
|
||||
eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
return eu.entry;
|
||||
}
|
||||
|
||||
static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
||||
{
|
||||
unsigned long flags;
|
||||
union entry_union eu;
|
||||
eu.entry = e;
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
||||
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
|
||||
* shared ISA-space IRQs, so we have to support them. We are super
|
||||
@ -200,13 +229,9 @@ static void unmask_IO_APIC_irq (unsigned int irq)
|
||||
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
|
||||
{
|
||||
struct IO_APIC_route_entry entry;
|
||||
unsigned long flags;
|
||||
|
||||
/* Check delivery_mode to be sure we're not clearing an SMI pin */
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
*(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
|
||||
*(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
entry = ioapic_read_entry(apic, pin);
|
||||
if (entry.delivery_mode == dest_SMI)
|
||||
return;
|
||||
|
||||
@ -215,10 +240,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
|
||||
*/
|
||||
memset(&entry, 0, sizeof(entry));
|
||||
entry.mask = 1;
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
|
||||
io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
ioapic_write_entry(apic, pin, entry);
|
||||
}
|
||||
|
||||
static void clear_IO_APIC (void)
|
||||
@ -1283,9 +1305,8 @@ static void __init setup_IO_APIC_irqs(void)
|
||||
if (!apic && (irq < 16))
|
||||
disable_8259A_irq(irq);
|
||||
}
|
||||
ioapic_write_entry(apic, pin, entry);
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
|
||||
io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
|
||||
set_native_irq_info(irq, TARGET_CPUS);
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
}
|
||||
@ -1301,7 +1322,6 @@ static void __init setup_IO_APIC_irqs(void)
|
||||
static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
|
||||
{
|
||||
struct IO_APIC_route_entry entry;
|
||||
unsigned long flags;
|
||||
|
||||
memset(&entry,0,sizeof(entry));
|
||||
|
||||
@ -1331,10 +1351,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
|
||||
/*
|
||||
* Add it to the IO-APIC irq-routing table:
|
||||
*/
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
|
||||
io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
ioapic_write_entry(apic, pin, entry);
|
||||
|
||||
enable_8259A_irq(0);
|
||||
}
|
||||
@ -1444,10 +1461,7 @@ void __init print_IO_APIC(void)
|
||||
for (i = 0; i <= reg_01.bits.entries; i++) {
|
||||
struct IO_APIC_route_entry entry;
|
||||
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
*(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
|
||||
*(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
entry = ioapic_read_entry(apic, i);
|
||||
|
||||
printk(KERN_DEBUG " %02x %03X %02X ",
|
||||
i,
|
||||
@ -1666,10 +1680,7 @@ static void __init enable_IO_APIC(void)
|
||||
/* See if any of the pins is in ExtINT mode */
|
||||
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
||||
struct IO_APIC_route_entry entry;
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
*(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
|
||||
*(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
entry = ioapic_read_entry(apic, pin);
|
||||
|
||||
|
||||
/* If the interrupt line is enabled and in ExtInt mode
|
||||
@ -1726,7 +1737,6 @@ void disable_IO_APIC(void)
|
||||
*/
|
||||
if (ioapic_i8259.pin != -1) {
|
||||
struct IO_APIC_route_entry entry;
|
||||
unsigned long flags;
|
||||
|
||||
memset(&entry, 0, sizeof(entry));
|
||||
entry.mask = 0; /* Enabled */
|
||||
@ -1743,12 +1753,7 @@ void disable_IO_APIC(void)
|
||||
/*
|
||||
* Add it to the IO-APIC irq-routing table:
|
||||
*/
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
|
||||
*(((int *)&entry)+1));
|
||||
io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
|
||||
*(((int *)&entry)+0));
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
|
||||
}
|
||||
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
|
||||
}
|
||||
@ -2213,17 +2218,13 @@ static inline void unlock_ExtINT_logic(void)
|
||||
int apic, pin, i;
|
||||
struct IO_APIC_route_entry entry0, entry1;
|
||||
unsigned char save_control, save_freq_select;
|
||||
unsigned long flags;
|
||||
|
||||
pin = find_isa_irq_pin(8, mp_INT);
|
||||
apic = find_isa_irq_apic(8, mp_INT);
|
||||
if (pin == -1)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
*(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
|
||||
*(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
entry0 = ioapic_read_entry(apic, pin);
|
||||
clear_IO_APIC_pin(apic, pin);
|
||||
|
||||
memset(&entry1, 0, sizeof(entry1));
|
||||
@ -2236,10 +2237,7 @@ static inline void unlock_ExtINT_logic(void)
|
||||
entry1.trigger = 0;
|
||||
entry1.vector = 0;
|
||||
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
|
||||
io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
ioapic_write_entry(apic, pin, entry1);
|
||||
|
||||
save_control = CMOS_READ(RTC_CONTROL);
|
||||
save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
|
||||
@ -2258,10 +2256,7 @@ static inline void unlock_ExtINT_logic(void)
|
||||
CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
|
||||
clear_IO_APIC_pin(apic, pin);
|
||||
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
|
||||
io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
ioapic_write_entry(apic, pin, entry0);
|
||||
}
|
||||
|
||||
int timer_uses_ioapic_pin_0;
|
||||
@ -2461,17 +2456,12 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
|
||||
{
|
||||
struct IO_APIC_route_entry *entry;
|
||||
struct sysfs_ioapic_data *data;
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
data = container_of(dev, struct sysfs_ioapic_data, dev);
|
||||
entry = data->entry;
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
|
||||
*(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
|
||||
*(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
|
||||
}
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
|
||||
entry[i] = ioapic_read_entry(dev->id, i);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2493,11 +2483,9 @@ static int ioapic_resume(struct sys_device *dev)
|
||||
reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
|
||||
io_apic_write(dev->id, 0, reg_00.raw);
|
||||
}
|
||||
for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
|
||||
io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
|
||||
io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
|
||||
}
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
|
||||
ioapic_write_entry(dev->id, i, entry[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2694,9 +2682,8 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
|
||||
if (!ioapic && (irq < 16))
|
||||
disable_8259A_irq(irq);
|
||||
|
||||
ioapic_write_entry(ioapic, pin, entry);
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
|
||||
io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
|
||||
set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
|
||||
@ -2704,3 +2691,25 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
|
||||
}
|
||||
|
||||
#endif /* CONFIG_ACPI */
|
||||
|
||||
static int __init parse_disable_timer_pin_1(char *arg)
|
||||
{
|
||||
disable_timer_pin_1 = 1;
|
||||
return 0;
|
||||
}
|
||||
early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
|
||||
|
||||
static int __init parse_enable_timer_pin_1(char *arg)
|
||||
{
|
||||
disable_timer_pin_1 = -1;
|
||||
return 0;
|
||||
}
|
||||
early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
|
||||
|
||||
static int __init parse_noapic(char *arg)
|
||||
{
|
||||
/* disable IO-APIC */
|
||||
disable_ioapic_setup();
|
||||
return 0;
|
||||
}
|
||||
early_param("noapic", parse_noapic);
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
@ -20,70 +21,13 @@
|
||||
#include <asm/system.h>
|
||||
|
||||
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
|
||||
|
||||
#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
|
||||
#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
|
||||
#define L2_ATTR (_PAGE_PRESENT)
|
||||
|
||||
#define LEVEL0_SIZE (1UL << 12UL)
|
||||
|
||||
#ifndef CONFIG_X86_PAE
|
||||
#define LEVEL1_SIZE (1UL << 22UL)
|
||||
static u32 pgtable_level1[1024] PAGE_ALIGNED;
|
||||
|
||||
static void identity_map_page(unsigned long address)
|
||||
{
|
||||
unsigned long level1_index, level2_index;
|
||||
u32 *pgtable_level2;
|
||||
|
||||
/* Find the current page table */
|
||||
pgtable_level2 = __va(read_cr3());
|
||||
|
||||
/* Find the indexes of the physical address to identity map */
|
||||
level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
|
||||
level2_index = address / LEVEL1_SIZE;
|
||||
|
||||
/* Identity map the page table entry */
|
||||
pgtable_level1[level1_index] = address | L0_ATTR;
|
||||
pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
|
||||
|
||||
/* Flush the tlb so the new mapping takes effect.
|
||||
* Global tlb entries are not flushed but that is not an issue.
|
||||
*/
|
||||
load_cr3(pgtable_level2);
|
||||
}
|
||||
|
||||
#else
|
||||
#define LEVEL1_SIZE (1UL << 21UL)
|
||||
#define LEVEL2_SIZE (1UL << 30UL)
|
||||
static u64 pgtable_level1[512] PAGE_ALIGNED;
|
||||
static u64 pgtable_level2[512] PAGE_ALIGNED;
|
||||
|
||||
static void identity_map_page(unsigned long address)
|
||||
{
|
||||
unsigned long level1_index, level2_index, level3_index;
|
||||
u64 *pgtable_level3;
|
||||
|
||||
/* Find the current page table */
|
||||
pgtable_level3 = __va(read_cr3());
|
||||
|
||||
/* Find the indexes of the physical address to identity map */
|
||||
level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
|
||||
level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
|
||||
level3_index = address / LEVEL2_SIZE;
|
||||
|
||||
/* Identity map the page table entry */
|
||||
pgtable_level1[level1_index] = address | L0_ATTR;
|
||||
pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
|
||||
set_64bit(&pgtable_level3[level3_index],
|
||||
__pa(pgtable_level2) | L2_ATTR);
|
||||
|
||||
/* Flush the tlb so the new mapping takes effect.
|
||||
* Global tlb entries are not flushed but that is not an issue.
|
||||
*/
|
||||
load_cr3(pgtable_level3);
|
||||
}
|
||||
static u32 kexec_pgd[1024] PAGE_ALIGNED;
|
||||
#ifdef CONFIG_X86_PAE
|
||||
static u32 kexec_pmd0[1024] PAGE_ALIGNED;
|
||||
static u32 kexec_pmd1[1024] PAGE_ALIGNED;
|
||||
#endif
|
||||
static u32 kexec_pte0[1024] PAGE_ALIGNED;
|
||||
static u32 kexec_pte1[1024] PAGE_ALIGNED;
|
||||
|
||||
static void set_idt(void *newidt, __u16 limit)
|
||||
{
|
||||
@ -127,16 +71,6 @@ static void load_segments(void)
|
||||
#undef __STR
|
||||
}
|
||||
|
||||
typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
|
||||
unsigned long indirection_page,
|
||||
unsigned long reboot_code_buffer,
|
||||
unsigned long start_address,
|
||||
unsigned int has_pae) ATTRIB_NORET;
|
||||
|
||||
extern const unsigned char relocate_new_kernel[];
|
||||
extern void relocate_new_kernel_end(void);
|
||||
extern const unsigned int relocate_new_kernel_size;
|
||||
|
||||
/*
|
||||
* A architecture hook called to validate the
|
||||
* proposed image and prepare the control pages
|
||||
@ -169,25 +103,29 @@ void machine_kexec_cleanup(struct kimage *image)
|
||||
*/
|
||||
NORET_TYPE void machine_kexec(struct kimage *image)
|
||||
{
|
||||
unsigned long page_list;
|
||||
unsigned long reboot_code_buffer;
|
||||
|
||||
relocate_new_kernel_t rnk;
|
||||
unsigned long page_list[PAGES_NR];
|
||||
void *control_page;
|
||||
|
||||
/* Interrupts aren't acceptable while we reboot */
|
||||
local_irq_disable();
|
||||
|
||||
/* Compute some offsets */
|
||||
reboot_code_buffer = page_to_pfn(image->control_code_page)
|
||||
<< PAGE_SHIFT;
|
||||
page_list = image->head;
|
||||
control_page = page_address(image->control_code_page);
|
||||
memcpy(control_page, relocate_kernel, PAGE_SIZE);
|
||||
|
||||
/* Set up an identity mapping for the reboot_code_buffer */
|
||||
identity_map_page(reboot_code_buffer);
|
||||
|
||||
/* copy it out */
|
||||
memcpy((void *)reboot_code_buffer, relocate_new_kernel,
|
||||
relocate_new_kernel_size);
|
||||
page_list[PA_CONTROL_PAGE] = __pa(control_page);
|
||||
page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
|
||||
page_list[PA_PGD] = __pa(kexec_pgd);
|
||||
page_list[VA_PGD] = (unsigned long)kexec_pgd;
|
||||
#ifdef CONFIG_X86_PAE
|
||||
page_list[PA_PMD_0] = __pa(kexec_pmd0);
|
||||
page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
|
||||
page_list[PA_PMD_1] = __pa(kexec_pmd1);
|
||||
page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
|
||||
#endif
|
||||
page_list[PA_PTE_0] = __pa(kexec_pte0);
|
||||
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
|
||||
page_list[PA_PTE_1] = __pa(kexec_pte1);
|
||||
page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
|
||||
|
||||
/* The segment registers are funny things, they have both a
|
||||
* visible and an invisible part. Whenever the visible part is
|
||||
@ -206,6 +144,28 @@ NORET_TYPE void machine_kexec(struct kimage *image)
|
||||
set_idt(phys_to_virt(0),0);
|
||||
|
||||
/* now call it */
|
||||
rnk = (relocate_new_kernel_t) reboot_code_buffer;
|
||||
(*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
|
||||
relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
|
||||
image->start, cpu_has_pae);
|
||||
}
|
||||
|
||||
/* crashkernel=size@addr specifies the location to reserve for
|
||||
* a crash kernel. By reserving this memory we guarantee
|
||||
* that linux never sets it up as a DMA target.
|
||||
* Useful for holding code to do something appropriate
|
||||
* after a kernel panic.
|
||||
*/
|
||||
static int __init parse_crashkernel(char *arg)
|
||||
{
|
||||
unsigned long size, base;
|
||||
size = memparse(arg, &arg);
|
||||
if (*arg == '@') {
|
||||
base = memparse(arg+1, &arg);
|
||||
/* FIXME: Do I want a sanity check
|
||||
* to validate the memory range?
|
||||
*/
|
||||
crashk_res.start = base;
|
||||
crashk_res.end = base + size - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
early_param("crashkernel", parse_crashkernel);
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mca.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/io.h>
|
||||
#include <linux/proc_fs.h>
|
||||
@ -414,7 +415,8 @@ subsys_initcall(mca_init);
|
||||
|
||||
/*--------------------------------------------------------------------*/
|
||||
|
||||
static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
|
||||
static __kprobes void
|
||||
mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
|
||||
{
|
||||
int slot = mca_dev->slot;
|
||||
|
||||
@ -444,7 +446,7 @@ static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
|
||||
|
||||
/*--------------------------------------------------------------------*/
|
||||
|
||||
static int mca_handle_nmi_callback(struct device *dev, void *data)
|
||||
static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
|
||||
{
|
||||
struct mca_device *mca_dev = to_mca_device(dev);
|
||||
unsigned char pos5;
|
||||
@ -462,7 +464,7 @@ static int mca_handle_nmi_callback(struct device *dev, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mca_handle_nmi(void)
|
||||
void __kprobes mca_handle_nmi(void)
|
||||
{
|
||||
/* First try - scan the various adapters and see if a specific
|
||||
* adapter was responsible for the error.
|
||||
|
@ -2,6 +2,7 @@
|
||||
* Intel CPU Microcode Update Driver for Linux
|
||||
*
|
||||
* Copyright (C) 2000-2004 Tigran Aivazian
|
||||
* 2006 Shaohua Li <shaohua.li@intel.com>
|
||||
*
|
||||
* This driver allows to upgrade microcode on Intel processors
|
||||
* belonging to IA-32 family - PentiumPro, Pentium II,
|
||||
@ -82,6 +83,9 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/platform_device.h>
|
||||
|
||||
#include <asm/msr.h>
|
||||
#include <asm/uaccess.h>
|
||||
@ -91,9 +95,6 @@ MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
|
||||
MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
static int verbose;
|
||||
module_param(verbose, int, 0644);
|
||||
|
||||
#define MICROCODE_VERSION "1.14a"
|
||||
|
||||
#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */
|
||||
@ -120,55 +121,40 @@ static DEFINE_SPINLOCK(microcode_update_lock);
|
||||
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
|
||||
static DEFINE_MUTEX(microcode_mutex);
|
||||
|
||||
static void __user *user_buffer; /* user area microcode data buffer */
|
||||
static unsigned int user_buffer_size; /* it's size */
|
||||
|
||||
typedef enum mc_error_code {
|
||||
MC_SUCCESS = 0,
|
||||
MC_IGNORED = 1,
|
||||
MC_NOTFOUND = 2,
|
||||
MC_MARKED = 3,
|
||||
MC_ALLOCATED = 4,
|
||||
} mc_error_code_t;
|
||||
|
||||
static struct ucode_cpu_info {
|
||||
int valid;
|
||||
unsigned int sig;
|
||||
unsigned int pf, orig_pf;
|
||||
unsigned int pf;
|
||||
unsigned int rev;
|
||||
unsigned int cksum;
|
||||
mc_error_code_t err;
|
||||
microcode_t *mc;
|
||||
} ucode_cpu_info[NR_CPUS];
|
||||
|
||||
static int microcode_open (struct inode *unused1, struct file *unused2)
|
||||
{
|
||||
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
|
||||
}
|
||||
|
||||
static void collect_cpu_info (void *unused)
|
||||
static void collect_cpu_info(int cpu_num)
|
||||
{
|
||||
int cpu_num = smp_processor_id();
|
||||
struct cpuinfo_x86 *c = cpu_data + cpu_num;
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
|
||||
unsigned int val[2];
|
||||
|
||||
uci->sig = uci->pf = uci->rev = uci->cksum = 0;
|
||||
uci->err = MC_NOTFOUND;
|
||||
/* We should bind the task to the CPU */
|
||||
BUG_ON(raw_smp_processor_id() != cpu_num);
|
||||
uci->pf = uci->rev = 0;
|
||||
uci->mc = NULL;
|
||||
uci->valid = 1;
|
||||
|
||||
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
|
||||
cpu_has(c, X86_FEATURE_IA64)) {
|
||||
printk(KERN_ERR "microcode: CPU%d not a capable Intel processor\n", cpu_num);
|
||||
printk(KERN_ERR "microcode: CPU%d not a capable Intel "
|
||||
"processor\n", cpu_num);
|
||||
uci->valid = 0;
|
||||
return;
|
||||
} else {
|
||||
uci->sig = cpuid_eax(0x00000001);
|
||||
}
|
||||
|
||||
if ((c->x86_model >= 5) || (c->x86 > 6)) {
|
||||
/* get processor flags from MSR 0x17 */
|
||||
rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
|
||||
uci->pf = 1 << ((val[1] >> 18) & 7);
|
||||
}
|
||||
uci->orig_pf = uci->pf;
|
||||
uci->sig = cpuid_eax(0x00000001);
|
||||
|
||||
if ((c->x86_model >= 5) || (c->x86 > 6)) {
|
||||
/* get processor flags from MSR 0x17 */
|
||||
rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
|
||||
uci->pf = 1 << ((val[1] >> 18) & 7);
|
||||
}
|
||||
|
||||
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
|
||||
@ -180,218 +166,159 @@ static void collect_cpu_info (void *unused)
|
||||
uci->sig, uci->pf, uci->rev);
|
||||
}
|
||||
|
||||
static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_header, int sig, int pf, int cksum)
|
||||
static inline int microcode_update_match(int cpu_num,
|
||||
microcode_header_t *mc_header, int sig, int pf)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
|
||||
|
||||
pr_debug("Microcode Found.\n");
|
||||
pr_debug(" Header Revision 0x%x\n", mc_header->hdrver);
|
||||
pr_debug(" Loader Revision 0x%x\n", mc_header->ldrver);
|
||||
pr_debug(" Revision 0x%x \n", mc_header->rev);
|
||||
pr_debug(" Date %x/%x/%x\n",
|
||||
((mc_header->date >> 24 ) & 0xff),
|
||||
((mc_header->date >> 16 ) & 0xff),
|
||||
(mc_header->date & 0xFFFF));
|
||||
pr_debug(" Signature 0x%x\n", sig);
|
||||
pr_debug(" Type 0x%x Family 0x%x Model 0x%x Stepping 0x%x\n",
|
||||
((sig >> 12) & 0x3),
|
||||
((sig >> 8) & 0xf),
|
||||
((sig >> 4) & 0xf),
|
||||
((sig & 0xf)));
|
||||
pr_debug(" Processor Flags 0x%x\n", pf);
|
||||
pr_debug(" Checksum 0x%x\n", cksum);
|
||||
if (!sigmatch(sig, uci->sig, pf, uci->pf)
|
||||
|| mc_header->rev <= uci->rev)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mc_header->rev < uci->rev) {
|
||||
if (uci->err == MC_NOTFOUND) {
|
||||
uci->err = MC_IGNORED;
|
||||
uci->cksum = mc_header->rev;
|
||||
} else if (uci->err == MC_IGNORED && uci->cksum < mc_header->rev)
|
||||
uci->cksum = mc_header->rev;
|
||||
} else if (mc_header->rev == uci->rev) {
|
||||
if (uci->err < MC_MARKED) {
|
||||
/* notify the caller of success on this cpu */
|
||||
uci->err = MC_SUCCESS;
|
||||
static int microcode_sanity_check(void *mc)
|
||||
{
|
||||
microcode_header_t *mc_header = mc;
|
||||
struct extended_sigtable *ext_header = NULL;
|
||||
struct extended_signature *ext_sig;
|
||||
unsigned long total_size, data_size, ext_table_size;
|
||||
int sum, orig_sum, ext_sigcount = 0, i;
|
||||
|
||||
total_size = get_totalsize(mc_header);
|
||||
data_size = get_datasize(mc_header);
|
||||
if (data_size + MC_HEADER_SIZE > total_size) {
|
||||
printk(KERN_ERR "microcode: error! "
|
||||
"Bad data size in microcode data file\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
|
||||
printk(KERN_ERR "microcode: error! "
|
||||
"Unknown microcode update format\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
|
||||
if (ext_table_size) {
|
||||
if ((ext_table_size < EXT_HEADER_SIZE)
|
||||
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
|
||||
printk(KERN_ERR "microcode: error! "
|
||||
"Small exttable size in microcode data file\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (uci->err != MC_ALLOCATED || mc_header->rev > uci->mc->hdr.rev) {
|
||||
pr_debug("microcode: CPU%d found a matching microcode update with "
|
||||
" revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
|
||||
uci->cksum = cksum;
|
||||
uci->pf = pf; /* keep the original mc pf for cksum calculation */
|
||||
uci->err = MC_MARKED; /* found the match */
|
||||
for_each_online_cpu(cpu_num) {
|
||||
if (ucode_cpu_info + cpu_num != uci
|
||||
&& ucode_cpu_info[cpu_num].mc == uci->mc) {
|
||||
uci->mc = NULL;
|
||||
break;
|
||||
}
|
||||
ext_header = mc + MC_HEADER_SIZE + data_size;
|
||||
if (ext_table_size != exttable_size(ext_header)) {
|
||||
printk(KERN_ERR "microcode: error! "
|
||||
"Bad exttable size in microcode data file\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
if (uci->mc != NULL) {
|
||||
vfree(uci->mc);
|
||||
uci->mc = NULL;
|
||||
ext_sigcount = ext_header->count;
|
||||
}
|
||||
|
||||
/* check extended table checksum */
|
||||
if (ext_table_size) {
|
||||
int ext_table_sum = 0;
|
||||
int *ext_tablep = (int *)ext_header;
|
||||
|
||||
i = ext_table_size / DWSIZE;
|
||||
while (i--)
|
||||
ext_table_sum += ext_tablep[i];
|
||||
if (ext_table_sum) {
|
||||
printk(KERN_WARNING "microcode: aborting, "
|
||||
"bad extended signature table checksum\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
||||
/* calculate the checksum */
|
||||
orig_sum = 0;
|
||||
i = (MC_HEADER_SIZE + data_size) / DWSIZE;
|
||||
while (i--)
|
||||
orig_sum += ((int *)mc)[i];
|
||||
if (orig_sum) {
|
||||
printk(KERN_ERR "microcode: aborting, bad checksum\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!ext_table_size)
|
||||
return 0;
|
||||
/* check extended signature checksum */
|
||||
for (i = 0; i < ext_sigcount; i++) {
|
||||
ext_sig = (struct extended_signature *)((void *)ext_header
|
||||
+ EXT_HEADER_SIZE + EXT_SIGNATURE_SIZE * i);
|
||||
sum = orig_sum
|
||||
- (mc_header->sig + mc_header->pf + mc_header->cksum)
|
||||
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
|
||||
if (sum) {
|
||||
printk(KERN_ERR "microcode: aborting, bad checksum\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int find_matching_ucodes (void)
|
||||
/*
|
||||
* return 0 - no update found
|
||||
* return 1 - found update
|
||||
* return < 0 - error
|
||||
*/
|
||||
static int get_maching_microcode(void *mc, int cpu)
|
||||
{
|
||||
int cursor = 0;
|
||||
int error = 0;
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
microcode_header_t *mc_header = mc;
|
||||
struct extended_sigtable *ext_header;
|
||||
unsigned long total_size = get_totalsize(mc_header);
|
||||
int ext_sigcount, i;
|
||||
struct extended_signature *ext_sig;
|
||||
void *new_mc;
|
||||
|
||||
while (cursor + MC_HEADER_SIZE < user_buffer_size) {
|
||||
microcode_header_t mc_header;
|
||||
void *newmc = NULL;
|
||||
int i, sum, cpu_num, allocated_flag, total_size, data_size, ext_table_size;
|
||||
if (microcode_update_match(cpu, mc_header,
|
||||
mc_header->sig, mc_header->pf))
|
||||
goto find;
|
||||
|
||||
if (copy_from_user(&mc_header, user_buffer + cursor, MC_HEADER_SIZE)) {
|
||||
printk(KERN_ERR "microcode: error! Can not read user data\n");
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
|
||||
return 0;
|
||||
|
||||
total_size = get_totalsize(&mc_header);
|
||||
if ((cursor + total_size > user_buffer_size) || (total_size < DEFAULT_UCODE_TOTALSIZE)) {
|
||||
printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
ext_header = (struct extended_sigtable *)(mc +
|
||||
get_datasize(mc_header) + MC_HEADER_SIZE);
|
||||
ext_sigcount = ext_header->count;
|
||||
ext_sig = (struct extended_signature *)((void *)ext_header
|
||||
+ EXT_HEADER_SIZE);
|
||||
for (i = 0; i < ext_sigcount; i++) {
|
||||
if (microcode_update_match(cpu, mc_header,
|
||||
ext_sig->sig, ext_sig->pf))
|
||||
goto find;
|
||||
ext_sig++;
|
||||
}
|
||||
return 0;
|
||||
find:
|
||||
pr_debug("microcode: CPU %d found a matching microcode update with"
|
||||
" version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev);
|
||||
new_mc = vmalloc(total_size);
|
||||
if (!new_mc) {
|
||||
printk(KERN_ERR "microcode: error! Can not allocate memory\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
data_size = get_datasize(&mc_header);
|
||||
if ((data_size + MC_HEADER_SIZE > total_size) || (data_size < DEFAULT_UCODE_DATASIZE)) {
|
||||
printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
/* free previous update file */
|
||||
vfree(uci->mc);
|
||||
|
||||
if (mc_header.ldrver != 1 || mc_header.hdrver != 1) {
|
||||
printk(KERN_ERR "microcode: error! Unknown microcode update format\n");
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for_each_online_cpu(cpu_num) {
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
|
||||
|
||||
if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->orig_pf))
|
||||
mark_microcode_update(cpu_num, &mc_header, mc_header.sig, mc_header.pf, mc_header.cksum);
|
||||
}
|
||||
|
||||
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
|
||||
if (ext_table_size) {
|
||||
struct extended_sigtable ext_header;
|
||||
struct extended_signature ext_sig;
|
||||
int ext_sigcount;
|
||||
|
||||
if ((ext_table_size < EXT_HEADER_SIZE)
|
||||
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
|
||||
printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (copy_from_user(&ext_header, user_buffer + cursor
|
||||
+ MC_HEADER_SIZE + data_size, EXT_HEADER_SIZE)) {
|
||||
printk(KERN_ERR "microcode: error! Can not read user data\n");
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
if (ext_table_size != exttable_size(&ext_header)) {
|
||||
printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ext_sigcount = ext_header.count;
|
||||
|
||||
for (i = 0; i < ext_sigcount; i++) {
|
||||
if (copy_from_user(&ext_sig, user_buffer + cursor + MC_HEADER_SIZE + data_size + EXT_HEADER_SIZE
|
||||
+ EXT_SIGNATURE_SIZE * i, EXT_SIGNATURE_SIZE)) {
|
||||
printk(KERN_ERR "microcode: error! Can not read user data\n");
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
for_each_online_cpu(cpu_num) {
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
|
||||
|
||||
if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->orig_pf)) {
|
||||
mark_microcode_update(cpu_num, &mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* now check if any cpu has matched */
|
||||
allocated_flag = 0;
|
||||
sum = 0;
|
||||
for_each_online_cpu(cpu_num) {
|
||||
if (ucode_cpu_info[cpu_num].err == MC_MARKED) {
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
|
||||
if (!allocated_flag) {
|
||||
allocated_flag = 1;
|
||||
newmc = vmalloc(total_size);
|
||||
if (!newmc) {
|
||||
printk(KERN_ERR "microcode: error! Can not allocate memory\n");
|
||||
error = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
if (copy_from_user(newmc + MC_HEADER_SIZE,
|
||||
user_buffer + cursor + MC_HEADER_SIZE,
|
||||
total_size - MC_HEADER_SIZE)) {
|
||||
printk(KERN_ERR "microcode: error! Can not read user data\n");
|
||||
vfree(newmc);
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
memcpy(newmc, &mc_header, MC_HEADER_SIZE);
|
||||
/* check extended table checksum */
|
||||
if (ext_table_size) {
|
||||
int ext_table_sum = 0;
|
||||
int * ext_tablep = (((void *) newmc) + MC_HEADER_SIZE + data_size);
|
||||
i = ext_table_size / DWSIZE;
|
||||
while (i--) ext_table_sum += ext_tablep[i];
|
||||
if (ext_table_sum) {
|
||||
printk(KERN_WARNING "microcode: aborting, bad extended signature table checksum\n");
|
||||
vfree(newmc);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* calculate the checksum */
|
||||
i = (MC_HEADER_SIZE + data_size) / DWSIZE;
|
||||
while (i--) sum += ((int *)newmc)[i];
|
||||
sum -= (mc_header.sig + mc_header.pf + mc_header.cksum);
|
||||
}
|
||||
ucode_cpu_info[cpu_num].mc = newmc;
|
||||
ucode_cpu_info[cpu_num].err = MC_ALLOCATED; /* mc updated */
|
||||
if (sum + uci->sig + uci->pf + uci->cksum != 0) {
|
||||
printk(KERN_ERR "microcode: CPU%d aborting, bad checksum\n", cpu_num);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
cursor += total_size; /* goto the next update patch */
|
||||
} /* end of while */
|
||||
out:
|
||||
return error;
|
||||
memcpy(new_mc, mc, total_size);
|
||||
uci->mc = new_mc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void do_update_one (void * unused)
|
||||
static void apply_microcode(int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned int val[2];
|
||||
int cpu_num = smp_processor_id();
|
||||
int cpu_num = raw_smp_processor_id();
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
|
||||
|
||||
if (uci->mc == NULL) {
|
||||
if (verbose) {
|
||||
if (uci->err == MC_SUCCESS)
|
||||
printk(KERN_INFO "microcode: CPU%d already at revision 0x%x\n",
|
||||
cpu_num, uci->rev);
|
||||
else
|
||||
printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num);
|
||||
}
|
||||
/* We should bind the task to the CPU */
|
||||
BUG_ON(cpu_num != cpu);
|
||||
|
||||
if (uci->mc == NULL)
|
||||
return;
|
||||
}
|
||||
|
||||
/* serialize access to the physical write to MSR 0x79 */
|
||||
spin_lock_irqsave(µcode_update_lock, flags);
|
||||
@ -408,68 +335,107 @@ static void do_update_one (void * unused)
|
||||
/* get the current revision from MSR 0x8B */
|
||||
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
|
||||
|
||||
/* notify the caller of success on this cpu */
|
||||
uci->err = MC_SUCCESS;
|
||||
spin_unlock_irqrestore(µcode_update_lock, flags);
|
||||
printk(KERN_INFO "microcode: CPU%d updated from revision "
|
||||
if (val[1] != uci->mc->hdr.rev) {
|
||||
printk(KERN_ERR "microcode: CPU%d updated from revision "
|
||||
"0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]);
|
||||
return;
|
||||
}
|
||||
pr_debug("microcode: CPU%d updated from revision "
|
||||
"0x%x to 0x%x, date = %08x \n",
|
||||
cpu_num, uci->rev, val[1], uci->mc->hdr.date);
|
||||
return;
|
||||
uci->rev = val[1];
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MICROCODE_OLD_INTERFACE
|
||||
static void __user *user_buffer; /* user area microcode data buffer */
|
||||
static unsigned int user_buffer_size; /* it's size */
|
||||
|
||||
static long get_next_ucode(void **mc, long offset)
|
||||
{
|
||||
microcode_header_t mc_header;
|
||||
unsigned long total_size;
|
||||
|
||||
/* No more data */
|
||||
if (offset >= user_buffer_size)
|
||||
return 0;
|
||||
if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) {
|
||||
printk(KERN_ERR "microcode: error! Can not read user data\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
total_size = get_totalsize(&mc_header);
|
||||
if (offset + total_size > user_buffer_size) {
|
||||
printk(KERN_ERR "microcode: error! Bad total size in microcode "
|
||||
"data file\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
*mc = vmalloc(total_size);
|
||||
if (!*mc)
|
||||
return -ENOMEM;
|
||||
if (copy_from_user(*mc, user_buffer + offset, total_size)) {
|
||||
printk(KERN_ERR "microcode: error! Can not read user data\n");
|
||||
vfree(*mc);
|
||||
return -EFAULT;
|
||||
}
|
||||
return offset + total_size;
|
||||
}
|
||||
|
||||
static int do_microcode_update (void)
|
||||
{
|
||||
int i, error;
|
||||
long cursor = 0;
|
||||
int error = 0;
|
||||
void *new_mc;
|
||||
int cpu;
|
||||
cpumask_t old;
|
||||
|
||||
if (on_each_cpu(collect_cpu_info, NULL, 1, 1) != 0) {
|
||||
printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
|
||||
error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
old = current->cpus_allowed;
|
||||
|
||||
if ((error = find_matching_ucodes())) {
|
||||
printk(KERN_ERR "microcode: Error in the microcode data\n");
|
||||
goto out_free;
|
||||
}
|
||||
while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) {
|
||||
error = microcode_sanity_check(new_mc);
|
||||
if (error)
|
||||
goto out;
|
||||
/*
|
||||
* It's possible the data file has multiple matching ucode,
|
||||
* lets keep searching till the latest version
|
||||
*/
|
||||
for_each_online_cpu(cpu) {
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
if (on_each_cpu(do_update_one, NULL, 1, 1) != 0) {
|
||||
printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
|
||||
error = -EIO;
|
||||
}
|
||||
|
||||
out_free:
|
||||
for_each_online_cpu(i) {
|
||||
if (ucode_cpu_info[i].mc) {
|
||||
int j;
|
||||
void *tmp = ucode_cpu_info[i].mc;
|
||||
vfree(tmp);
|
||||
for_each_online_cpu(j) {
|
||||
if (ucode_cpu_info[j].mc == tmp)
|
||||
ucode_cpu_info[j].mc = NULL;
|
||||
}
|
||||
if (!uci->valid)
|
||||
continue;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
error = get_maching_microcode(new_mc, cpu);
|
||||
if (error < 0)
|
||||
goto out;
|
||||
if (error == 1)
|
||||
apply_microcode(cpu);
|
||||
}
|
||||
if (ucode_cpu_info[i].err == MC_IGNORED && verbose)
|
||||
printk(KERN_WARNING "microcode: CPU%d not 'upgrading' to earlier revision"
|
||||
" 0x%x (current=0x%x)\n", i, ucode_cpu_info[i].cksum, ucode_cpu_info[i].rev);
|
||||
vfree(new_mc);
|
||||
}
|
||||
out:
|
||||
if (cursor > 0)
|
||||
vfree(new_mc);
|
||||
if (cursor < 0)
|
||||
error = cursor;
|
||||
set_cpus_allowed(current, old);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int microcode_open (struct inode *unused1, struct file *unused2)
|
||||
{
|
||||
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
|
||||
}
|
||||
|
||||
static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
if (len < DEFAULT_UCODE_TOTALSIZE) {
|
||||
printk(KERN_ERR "microcode: not enough data\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((len >> PAGE_SHIFT) > num_physpages) {
|
||||
printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
lock_cpu_hotplug();
|
||||
mutex_lock(µcode_mutex);
|
||||
|
||||
user_buffer = (void __user *) buf;
|
||||
@ -480,6 +446,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_
|
||||
ret = (ssize_t)len;
|
||||
|
||||
mutex_unlock(µcode_mutex);
|
||||
unlock_cpu_hotplug();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -496,7 +463,7 @@ static struct miscdevice microcode_dev = {
|
||||
.fops = µcode_fops,
|
||||
};
|
||||
|
||||
static int __init microcode_init (void)
|
||||
static int __init microcode_dev_init (void)
|
||||
{
|
||||
int error;
|
||||
|
||||
@ -508,6 +475,280 @@ static int __init microcode_init (void)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit microcode_dev_exit (void)
|
||||
{
|
||||
misc_deregister(µcode_dev);
|
||||
}
|
||||
|
||||
MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
|
||||
#else
|
||||
#define microcode_dev_init() 0
|
||||
#define microcode_dev_exit() do { } while(0)
|
||||
#endif
|
||||
|
||||
static long get_next_ucode_from_buffer(void **mc, void *buf,
|
||||
unsigned long size, long offset)
|
||||
{
|
||||
microcode_header_t *mc_header;
|
||||
unsigned long total_size;
|
||||
|
||||
/* No more data */
|
||||
if (offset >= size)
|
||||
return 0;
|
||||
mc_header = (microcode_header_t *)(buf + offset);
|
||||
total_size = get_totalsize(mc_header);
|
||||
|
||||
if (offset + total_size > size) {
|
||||
printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*mc = vmalloc(total_size);
|
||||
if (!*mc) {
|
||||
printk(KERN_ERR "microcode: error! Can not allocate memory\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
memcpy(*mc, buf + offset, total_size);
|
||||
return offset + total_size;
|
||||
}
|
||||
|
||||
/* fake device for request_firmware */
|
||||
static struct platform_device *microcode_pdev;
|
||||
|
||||
static int cpu_request_microcode(int cpu)
|
||||
{
|
||||
char name[30];
|
||||
struct cpuinfo_x86 *c = cpu_data + cpu;
|
||||
const struct firmware *firmware;
|
||||
void *buf;
|
||||
unsigned long size;
|
||||
long offset = 0;
|
||||
int error;
|
||||
void *mc;
|
||||
|
||||
/* We should bind the task to the CPU */
|
||||
BUG_ON(cpu != raw_smp_processor_id());
|
||||
sprintf(name,"intel-ucode/%02x-%02x-%02x",
|
||||
c->x86, c->x86_model, c->x86_mask);
|
||||
error = request_firmware(&firmware, name, µcode_pdev->dev);
|
||||
if (error) {
|
||||
pr_debug("ucode data file %s load failed\n", name);
|
||||
return error;
|
||||
}
|
||||
buf = (void *)firmware->data;
|
||||
size = firmware->size;
|
||||
while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset))
|
||||
> 0) {
|
||||
error = microcode_sanity_check(mc);
|
||||
if (error)
|
||||
break;
|
||||
error = get_maching_microcode(mc, cpu);
|
||||
if (error < 0)
|
||||
break;
|
||||
/*
|
||||
* It's possible the data file has multiple matching ucode,
|
||||
* lets keep searching till the latest version
|
||||
*/
|
||||
if (error == 1) {
|
||||
apply_microcode(cpu);
|
||||
error = 0;
|
||||
}
|
||||
vfree(mc);
|
||||
}
|
||||
if (offset > 0)
|
||||
vfree(mc);
|
||||
if (offset < 0)
|
||||
error = offset;
|
||||
release_firmware(firmware);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static void microcode_init_cpu(int cpu)
|
||||
{
|
||||
cpumask_t old;
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
old = current->cpus_allowed;
|
||||
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
mutex_lock(µcode_mutex);
|
||||
collect_cpu_info(cpu);
|
||||
if (uci->valid)
|
||||
cpu_request_microcode(cpu);
|
||||
mutex_unlock(µcode_mutex);
|
||||
set_cpus_allowed(current, old);
|
||||
}
|
||||
|
||||
static void microcode_fini_cpu(int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
mutex_lock(µcode_mutex);
|
||||
uci->valid = 0;
|
||||
vfree(uci->mc);
|
||||
uci->mc = NULL;
|
||||
mutex_unlock(µcode_mutex);
|
||||
}
|
||||
|
||||
static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
|
||||
char *end;
|
||||
unsigned long val = simple_strtoul(buf, &end, 0);
|
||||
int err = 0;
|
||||
int cpu = dev->id;
|
||||
|
||||
if (end == buf)
|
||||
return -EINVAL;
|
||||
if (val == 1) {
|
||||
cpumask_t old;
|
||||
|
||||
old = current->cpus_allowed;
|
||||
|
||||
lock_cpu_hotplug();
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
|
||||
mutex_lock(µcode_mutex);
|
||||
if (uci->valid)
|
||||
err = cpu_request_microcode(cpu);
|
||||
mutex_unlock(µcode_mutex);
|
||||
unlock_cpu_hotplug();
|
||||
set_cpus_allowed(current, old);
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
return sz;
|
||||
}
|
||||
|
||||
static ssize_t version_show(struct sys_device *dev, char *buf)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
|
||||
|
||||
return sprintf(buf, "0x%x\n", uci->rev);
|
||||
}
|
||||
|
||||
static ssize_t pf_show(struct sys_device *dev, char *buf)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
|
||||
|
||||
return sprintf(buf, "0x%x\n", uci->pf);
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
|
||||
static SYSDEV_ATTR(version, 0400, version_show, NULL);
|
||||
static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
|
||||
|
||||
static struct attribute *mc_default_attrs[] = {
|
||||
&attr_reload.attr,
|
||||
&attr_version.attr,
|
||||
&attr_processor_flags.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group mc_attr_group = {
|
||||
.attrs = mc_default_attrs,
|
||||
.name = "microcode",
|
||||
};
|
||||
|
||||
static int mc_sysdev_add(struct sys_device *sys_dev)
|
||||
{
|
||||
int cpu = sys_dev->id;
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
if (!cpu_online(cpu))
|
||||
return 0;
|
||||
pr_debug("Microcode:CPU %d added\n", cpu);
|
||||
memset(uci, 0, sizeof(*uci));
|
||||
sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
|
||||
|
||||
microcode_init_cpu(cpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mc_sysdev_remove(struct sys_device *sys_dev)
|
||||
{
|
||||
int cpu = sys_dev->id;
|
||||
|
||||
if (!cpu_online(cpu))
|
||||
return 0;
|
||||
pr_debug("Microcode:CPU %d removed\n", cpu);
|
||||
microcode_fini_cpu(cpu);
|
||||
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mc_sysdev_resume(struct sys_device *dev)
|
||||
{
|
||||
int cpu = dev->id;
|
||||
|
||||
if (!cpu_online(cpu))
|
||||
return 0;
|
||||
pr_debug("Microcode:CPU %d resumed\n", cpu);
|
||||
/* only CPU 0 will apply ucode here */
|
||||
apply_microcode(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct sysdev_driver mc_sysdev_driver = {
|
||||
.add = mc_sysdev_add,
|
||||
.remove = mc_sysdev_remove,
|
||||
.resume = mc_sysdev_resume,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static __cpuinit int
|
||||
mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
struct sys_device *sys_dev;
|
||||
|
||||
sys_dev = get_cpu_sysdev(cpu);
|
||||
switch (action) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_DOWN_FAILED:
|
||||
mc_sysdev_add(sys_dev);
|
||||
break;
|
||||
case CPU_DOWN_PREPARE:
|
||||
mc_sysdev_remove(sys_dev);
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block mc_cpu_notifier = {
|
||||
.notifier_call = mc_cpu_callback,
|
||||
};
|
||||
#endif
|
||||
|
||||
static int __init microcode_init (void)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = microcode_dev_init();
|
||||
if (error)
|
||||
return error;
|
||||
microcode_pdev = platform_device_register_simple("microcode", -1,
|
||||
NULL, 0);
|
||||
if (IS_ERR(microcode_pdev)) {
|
||||
microcode_dev_exit();
|
||||
return PTR_ERR(microcode_pdev);
|
||||
}
|
||||
|
||||
lock_cpu_hotplug();
|
||||
error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
|
||||
unlock_cpu_hotplug();
|
||||
if (error) {
|
||||
microcode_dev_exit();
|
||||
platform_device_unregister(microcode_pdev);
|
||||
return error;
|
||||
}
|
||||
|
||||
register_hotcpu_notifier(&mc_cpu_notifier);
|
||||
|
||||
printk(KERN_INFO
|
||||
"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
|
||||
return 0;
|
||||
@ -515,9 +756,16 @@ static int __init microcode_init (void)
|
||||
|
||||
static void __exit microcode_exit (void)
|
||||
{
|
||||
misc_deregister(µcode_dev);
|
||||
microcode_dev_exit();
|
||||
|
||||
unregister_hotcpu_notifier(&mc_cpu_notifier);
|
||||
|
||||
lock_cpu_hotplug();
|
||||
sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
|
||||
unlock_cpu_hotplug();
|
||||
|
||||
platform_device_unregister(microcode_pdev);
|
||||
}
|
||||
|
||||
module_init(microcode_init)
|
||||
module_exit(microcode_exit)
|
||||
MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <asm/io_apic.h>
|
||||
|
||||
#include <mach_apic.h>
|
||||
#include <mach_apicdef.h>
|
||||
#include <mach_mpparse.h>
|
||||
#include <bios_ebda.h>
|
||||
|
||||
@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
|
||||
/* Processor that is doing the boot up */
|
||||
unsigned int boot_cpu_physical_apicid = -1U;
|
||||
/* Internal processor count */
|
||||
static unsigned int __devinitdata num_processors;
|
||||
unsigned int __cpuinitdata num_processors;
|
||||
|
||||
/* Bitmask of physically existing CPUs */
|
||||
physid_mask_t phys_cpu_present_map;
|
||||
@ -228,12 +229,14 @@ static void __init MP_bus_info (struct mpc_config_bus *m)
|
||||
|
||||
mpc_oem_bus_info(m, str, translation_table[mpc_record]);
|
||||
|
||||
#if MAX_MP_BUSSES < 256
|
||||
if (m->mpc_busid >= MAX_MP_BUSSES) {
|
||||
printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
|
||||
" is too large, max. supported is %d\n",
|
||||
m->mpc_busid, str, MAX_MP_BUSSES - 1);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
|
||||
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
|
||||
@ -293,19 +296,6 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
|
||||
m->mpc_irqtype, m->mpc_irqflag & 3,
|
||||
(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
|
||||
m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
|
||||
/*
|
||||
* Well it seems all SMP boards in existence
|
||||
* use ExtINT/LVT1 == LINT0 and
|
||||
* NMI/LVT2 == LINT1 - the following check
|
||||
* will show us if this assumptions is false.
|
||||
* Until then we do not have to add baggage.
|
||||
*/
|
||||
if ((m->mpc_irqtype == mp_ExtINT) &&
|
||||
(m->mpc_destapiclint != 0))
|
||||
BUG();
|
||||
if ((m->mpc_irqtype == mp_NMI) &&
|
||||
(m->mpc_destapiclint != 1))
|
||||
BUG();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_NUMAQ
|
||||
@ -822,8 +812,7 @@ int es7000_plat;
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
|
||||
void __init mp_register_lapic_address (
|
||||
u64 address)
|
||||
void __init mp_register_lapic_address(u64 address)
|
||||
{
|
||||
mp_lapic_addr = (unsigned long) address;
|
||||
|
||||
@ -835,13 +824,10 @@ void __init mp_register_lapic_address (
|
||||
Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
|
||||
}
|
||||
|
||||
|
||||
void __devinit mp_register_lapic (
|
||||
u8 id,
|
||||
u8 enabled)
|
||||
void __devinit mp_register_lapic (u8 id, u8 enabled)
|
||||
{
|
||||
struct mpc_config_processor processor;
|
||||
int boot_cpu = 0;
|
||||
int boot_cpu = 0;
|
||||
|
||||
if (MAX_APICS - id <= 0) {
|
||||
printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
|
||||
@ -878,11 +864,9 @@ static struct mp_ioapic_routing {
|
||||
u32 pin_programmed[4];
|
||||
} mp_ioapic_routing[MAX_IO_APICS];
|
||||
|
||||
|
||||
static int mp_find_ioapic (
|
||||
int gsi)
|
||||
static int mp_find_ioapic (int gsi)
|
||||
{
|
||||
int i = 0;
|
||||
int i = 0;
|
||||
|
||||
/* Find the IOAPIC that manages this GSI. */
|
||||
for (i = 0; i < nr_ioapics; i++) {
|
||||
@ -895,15 +879,11 @@ static int mp_find_ioapic (
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
void __init mp_register_ioapic (
|
||||
u8 id,
|
||||
u32 address,
|
||||
u32 gsi_base)
|
||||
void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
|
||||
{
|
||||
int idx = 0;
|
||||
int tmpid;
|
||||
int idx = 0;
|
||||
int tmpid;
|
||||
|
||||
if (nr_ioapics >= MAX_IO_APICS) {
|
||||
printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
|
||||
@ -949,16 +929,10 @@ void __init mp_register_ioapic (
|
||||
mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
|
||||
mp_ioapic_routing[idx].gsi_base,
|
||||
mp_ioapic_routing[idx].gsi_end);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void __init mp_override_legacy_irq (
|
||||
u8 bus_irq,
|
||||
u8 polarity,
|
||||
u8 trigger,
|
||||
u32 gsi)
|
||||
void __init
|
||||
mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
|
||||
{
|
||||
struct mpc_config_intsrc intsrc;
|
||||
int ioapic = -1;
|
||||
@ -996,15 +970,13 @@ void __init mp_override_legacy_irq (
|
||||
mp_irqs[mp_irq_entries] = intsrc;
|
||||
if (++mp_irq_entries == MAX_IRQ_SOURCES)
|
||||
panic("Max # of irq sources exceeded!\n");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void __init mp_config_acpi_legacy_irqs (void)
|
||||
{
|
||||
struct mpc_config_intsrc intsrc;
|
||||
int i = 0;
|
||||
int ioapic = -1;
|
||||
int i = 0;
|
||||
int ioapic = -1;
|
||||
|
||||
/*
|
||||
* Fabricate the legacy ISA bus (bus #31).
|
||||
@ -1073,12 +1045,12 @@ void __init mp_config_acpi_legacy_irqs (void)
|
||||
|
||||
#define MAX_GSI_NUM 4096
|
||||
|
||||
int mp_register_gsi (u32 gsi, int triggering, int polarity)
|
||||
int mp_register_gsi(u32 gsi, int triggering, int polarity)
|
||||
{
|
||||
int ioapic = -1;
|
||||
int ioapic_pin = 0;
|
||||
int idx, bit = 0;
|
||||
static int pci_irq = 16;
|
||||
int ioapic = -1;
|
||||
int ioapic_pin = 0;
|
||||
int idx, bit = 0;
|
||||
static int pci_irq = 16;
|
||||
/*
|
||||
* Mapping between Global System Interrups, which
|
||||
* represent all possible interrupts, and IRQs
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -37,6 +37,7 @@
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/personality.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
@ -320,15 +321,6 @@ void show_regs(struct pt_regs * regs)
|
||||
* the "args".
|
||||
*/
|
||||
extern void kernel_thread_helper(void);
|
||||
__asm__(".section .text\n"
|
||||
".align 4\n"
|
||||
"kernel_thread_helper:\n\t"
|
||||
"movl %edx,%eax\n\t"
|
||||
"pushl %edx\n\t"
|
||||
"call *%ebx\n\t"
|
||||
"pushl %eax\n\t"
|
||||
"call do_exit\n"
|
||||
".previous");
|
||||
|
||||
/*
|
||||
* Create a kernel thread
|
||||
@ -346,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
|
||||
regs.xes = __USER_DS;
|
||||
regs.orig_eax = -1;
|
||||
regs.eip = (unsigned long) kernel_thread_helper;
|
||||
regs.xcs = __KERNEL_CS;
|
||||
regs.xcs = __KERNEL_CS | get_kernel_rpl();
|
||||
regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
|
||||
|
||||
/* Ok, create the new process.. */
|
||||
@ -905,7 +897,7 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
|
||||
|
||||
unsigned long arch_align_stack(unsigned long sp)
|
||||
{
|
||||
if (randomize_va_space)
|
||||
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
|
||||
sp -= get_random_int() % 8192;
|
||||
return sp & ~0xf;
|
||||
}
|
||||
|
@ -185,17 +185,17 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_
|
||||
return addr;
|
||||
}
|
||||
|
||||
static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs)
|
||||
static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
|
||||
{
|
||||
int i, copied;
|
||||
unsigned char opcode[16];
|
||||
unsigned char opcode[15];
|
||||
unsigned long addr = convert_eip_to_linear(child, regs);
|
||||
|
||||
copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
|
||||
for (i = 0; i < copied; i++) {
|
||||
switch (opcode[i]) {
|
||||
/* popf */
|
||||
case 0x9d:
|
||||
/* popf and iret */
|
||||
case 0x9d: case 0xcf:
|
||||
return 1;
|
||||
/* opcode and address size prefixes */
|
||||
case 0x66: case 0x67:
|
||||
@ -247,7 +247,7 @@ static void set_singlestep(struct task_struct *child)
|
||||
* don't mark it as being "us" that set it, so that we
|
||||
* won't clear it by hand later.
|
||||
*/
|
||||
if (is_at_popf(child, regs))
|
||||
if (is_setting_trap_flag(child, regs))
|
||||
return;
|
||||
|
||||
child->ptrace |= PT_DTRACE;
|
||||
|
@ -7,16 +7,138 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/kexec.h>
|
||||
|
||||
/*
|
||||
* Must be relocatable PIC code callable as a C function
|
||||
*/
|
||||
|
||||
#define PTR(x) (x << 2)
|
||||
#define PAGE_ALIGNED (1 << PAGE_SHIFT)
|
||||
#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
|
||||
#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
|
||||
|
||||
.text
|
||||
.align PAGE_ALIGNED
|
||||
.globl relocate_kernel
|
||||
relocate_kernel:
|
||||
movl 8(%esp), %ebp /* list of pages */
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/* map the control page at its virtual address */
|
||||
|
||||
movl PTR(VA_PGD)(%ebp), %edi
|
||||
movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
|
||||
andl $0xc0000000, %eax
|
||||
shrl $27, %eax
|
||||
addl %edi, %eax
|
||||
|
||||
movl PTR(PA_PMD_0)(%ebp), %edx
|
||||
orl $PAE_PGD_ATTR, %edx
|
||||
movl %edx, (%eax)
|
||||
|
||||
movl PTR(VA_PMD_0)(%ebp), %edi
|
||||
movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
|
||||
andl $0x3fe00000, %eax
|
||||
shrl $18, %eax
|
||||
addl %edi, %eax
|
||||
|
||||
movl PTR(PA_PTE_0)(%ebp), %edx
|
||||
orl $PAGE_ATTR, %edx
|
||||
movl %edx, (%eax)
|
||||
|
||||
movl PTR(VA_PTE_0)(%ebp), %edi
|
||||
movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
|
||||
andl $0x001ff000, %eax
|
||||
shrl $9, %eax
|
||||
addl %edi, %eax
|
||||
|
||||
movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
|
||||
orl $PAGE_ATTR, %edx
|
||||
movl %edx, (%eax)
|
||||
|
||||
/* identity map the control page at its physical address */
|
||||
|
||||
movl PTR(VA_PGD)(%ebp), %edi
|
||||
movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
|
||||
andl $0xc0000000, %eax
|
||||
shrl $27, %eax
|
||||
addl %edi, %eax
|
||||
|
||||
movl PTR(PA_PMD_1)(%ebp), %edx
|
||||
orl $PAE_PGD_ATTR, %edx
|
||||
movl %edx, (%eax)
|
||||
|
||||
movl PTR(VA_PMD_1)(%ebp), %edi
|
||||
movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
|
||||
andl $0x3fe00000, %eax
|
||||
shrl $18, %eax
|
||||
addl %edi, %eax
|
||||
|
||||
movl PTR(PA_PTE_1)(%ebp), %edx
|
||||
orl $PAGE_ATTR, %edx
|
||||
movl %edx, (%eax)
|
||||
|
||||
movl PTR(VA_PTE_1)(%ebp), %edi
|
||||
movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
|
||||
andl $0x001ff000, %eax
|
||||
shrl $9, %eax
|
||||
addl %edi, %eax
|
||||
|
||||
movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
|
||||
orl $PAGE_ATTR, %edx
|
||||
movl %edx, (%eax)
|
||||
#else
|
||||
/* map the control page at its virtual address */
|
||||
|
||||
movl PTR(VA_PGD)(%ebp), %edi
|
||||
movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
|
||||
andl $0xffc00000, %eax
|
||||
shrl $20, %eax
|
||||
addl %edi, %eax
|
||||
|
||||
movl PTR(PA_PTE_0)(%ebp), %edx
|
||||
orl $PAGE_ATTR, %edx
|
||||
movl %edx, (%eax)
|
||||
|
||||
movl PTR(VA_PTE_0)(%ebp), %edi
|
||||
movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
|
||||
andl $0x003ff000, %eax
|
||||
shrl $10, %eax
|
||||
addl %edi, %eax
|
||||
|
||||
movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
|
||||
orl $PAGE_ATTR, %edx
|
||||
movl %edx, (%eax)
|
||||
|
||||
/* identity map the control page at its physical address */
|
||||
|
||||
movl PTR(VA_PGD)(%ebp), %edi
|
||||
movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
|
||||
andl $0xffc00000, %eax
|
||||
shrl $20, %eax
|
||||
addl %edi, %eax
|
||||
|
||||
movl PTR(PA_PTE_1)(%ebp), %edx
|
||||
orl $PAGE_ATTR, %edx
|
||||
movl %edx, (%eax)
|
||||
|
||||
movl PTR(VA_PTE_1)(%ebp), %edi
|
||||
movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
|
||||
andl $0x003ff000, %eax
|
||||
shrl $10, %eax
|
||||
addl %edi, %eax
|
||||
|
||||
movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
|
||||
orl $PAGE_ATTR, %edx
|
||||
movl %edx, (%eax)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Must be relocatable PIC code callable as a C function, that once
|
||||
* it starts can not use the previous processes stack.
|
||||
*/
|
||||
.globl relocate_new_kernel
|
||||
relocate_new_kernel:
|
||||
/* read the arguments and say goodbye to the stack */
|
||||
movl 4(%esp), %ebx /* page_list */
|
||||
movl 8(%esp), %ebp /* reboot_code_buffer */
|
||||
movl 8(%esp), %ebp /* list of pages */
|
||||
movl 12(%esp), %edx /* start address */
|
||||
movl 16(%esp), %ecx /* cpu_has_pae */
|
||||
|
||||
@ -24,11 +146,26 @@ relocate_new_kernel:
|
||||
pushl $0
|
||||
popfl
|
||||
|
||||
/* set a new stack at the bottom of our page... */
|
||||
lea 4096(%ebp), %esp
|
||||
/* get physical address of control page now */
|
||||
/* this is impossible after page table switch */
|
||||
movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
|
||||
|
||||
/* store the parameters back on the stack */
|
||||
pushl %edx /* store the start address */
|
||||
/* switch to new set of page tables */
|
||||
movl PTR(PA_PGD)(%ebp), %eax
|
||||
movl %eax, %cr3
|
||||
|
||||
/* setup a new stack at the end of the physical control page */
|
||||
lea 4096(%edi), %esp
|
||||
|
||||
/* jump to identity mapped page */
|
||||
movl %edi, %eax
|
||||
addl $(identity_mapped - relocate_kernel), %eax
|
||||
pushl %eax
|
||||
ret
|
||||
|
||||
identity_mapped:
|
||||
/* store the start address on the stack */
|
||||
pushl %edx
|
||||
|
||||
/* Set cr0 to a known state:
|
||||
* 31 0 == Paging disabled
|
||||
@ -113,8 +250,3 @@ relocate_new_kernel:
|
||||
xorl %edi, %edi
|
||||
xorl %ebp, %ebp
|
||||
ret
|
||||
relocate_new_kernel_end:
|
||||
|
||||
.globl relocate_new_kernel_size
|
||||
relocate_new_kernel_size:
|
||||
.long relocate_new_kernel_end - relocate_new_kernel
|
||||
|
@ -1,134 +0,0 @@
|
||||
/*
|
||||
* i386 semaphore implementation.
|
||||
*
|
||||
* (C) Copyright 1999 Linus Torvalds
|
||||
*
|
||||
* Portions Copyright 1999 Red Hat, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
|
||||
*/
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
/*
|
||||
* The semaphore operations have a special calling sequence that
|
||||
* allow us to do a simpler in-line version of them. These routines
|
||||
* need to convert that sequence back into the C sequence when
|
||||
* there is contention on the semaphore.
|
||||
*
|
||||
* %eax contains the semaphore pointer on entry. Save the C-clobbered
|
||||
* registers (%eax, %edx and %ecx) except %eax whish is either a return
|
||||
* value or just clobbered..
|
||||
*/
|
||||
asm(
|
||||
".section .sched.text\n"
|
||||
".align 4\n"
|
||||
".globl __down_failed\n"
|
||||
"__down_failed:\n\t"
|
||||
#if defined(CONFIG_FRAME_POINTER)
|
||||
"pushl %ebp\n\t"
|
||||
"movl %esp,%ebp\n\t"
|
||||
#endif
|
||||
"pushl %edx\n\t"
|
||||
"pushl %ecx\n\t"
|
||||
"call __down\n\t"
|
||||
"popl %ecx\n\t"
|
||||
"popl %edx\n\t"
|
||||
#if defined(CONFIG_FRAME_POINTER)
|
||||
"movl %ebp,%esp\n\t"
|
||||
"popl %ebp\n\t"
|
||||
#endif
|
||||
"ret"
|
||||
);
|
||||
|
||||
asm(
|
||||
".section .sched.text\n"
|
||||
".align 4\n"
|
||||
".globl __down_failed_interruptible\n"
|
||||
"__down_failed_interruptible:\n\t"
|
||||
#if defined(CONFIG_FRAME_POINTER)
|
||||
"pushl %ebp\n\t"
|
||||
"movl %esp,%ebp\n\t"
|
||||
#endif
|
||||
"pushl %edx\n\t"
|
||||
"pushl %ecx\n\t"
|
||||
"call __down_interruptible\n\t"
|
||||
"popl %ecx\n\t"
|
||||
"popl %edx\n\t"
|
||||
#if defined(CONFIG_FRAME_POINTER)
|
||||
"movl %ebp,%esp\n\t"
|
||||
"popl %ebp\n\t"
|
||||
#endif
|
||||
"ret"
|
||||
);
|
||||
|
||||
asm(
|
||||
".section .sched.text\n"
|
||||
".align 4\n"
|
||||
".globl __down_failed_trylock\n"
|
||||
"__down_failed_trylock:\n\t"
|
||||
#if defined(CONFIG_FRAME_POINTER)
|
||||
"pushl %ebp\n\t"
|
||||
"movl %esp,%ebp\n\t"
|
||||
#endif
|
||||
"pushl %edx\n\t"
|
||||
"pushl %ecx\n\t"
|
||||
"call __down_trylock\n\t"
|
||||
"popl %ecx\n\t"
|
||||
"popl %edx\n\t"
|
||||
#if defined(CONFIG_FRAME_POINTER)
|
||||
"movl %ebp,%esp\n\t"
|
||||
"popl %ebp\n\t"
|
||||
#endif
|
||||
"ret"
|
||||
);
|
||||
|
||||
asm(
|
||||
".section .sched.text\n"
|
||||
".align 4\n"
|
||||
".globl __up_wakeup\n"
|
||||
"__up_wakeup:\n\t"
|
||||
"pushl %edx\n\t"
|
||||
"pushl %ecx\n\t"
|
||||
"call __up\n\t"
|
||||
"popl %ecx\n\t"
|
||||
"popl %edx\n\t"
|
||||
"ret"
|
||||
);
|
||||
|
||||
/*
|
||||
* rw spinlock fallbacks
|
||||
*/
|
||||
#if defined(CONFIG_SMP)
|
||||
asm(
|
||||
".section .sched.text\n"
|
||||
".align 4\n"
|
||||
".globl __write_lock_failed\n"
|
||||
"__write_lock_failed:\n\t"
|
||||
LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
|
||||
"1: rep; nop\n\t"
|
||||
"cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
|
||||
"jne 1b\n\t"
|
||||
LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
|
||||
"jnz __write_lock_failed\n\t"
|
||||
"ret"
|
||||
);
|
||||
|
||||
asm(
|
||||
".section .sched.text\n"
|
||||
".align 4\n"
|
||||
".globl __read_lock_failed\n"
|
||||
"__read_lock_failed:\n\t"
|
||||
LOCK_PREFIX "incl (%eax)\n"
|
||||
"1: rep; nop\n\t"
|
||||
"cmpl $1,(%eax)\n\t"
|
||||
"js 1b\n\t"
|
||||
LOCK_PREFIX "decl (%eax)\n\t"
|
||||
"js __read_lock_failed\n\t"
|
||||
"ret"
|
||||
);
|
||||
#endif
|
@ -90,18 +90,6 @@ EXPORT_SYMBOL(boot_cpu_data);
|
||||
|
||||
unsigned long mmu_cr4_features;
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
int acpi_disabled = 0;
|
||||
#else
|
||||
int acpi_disabled = 1;
|
||||
#endif
|
||||
EXPORT_SYMBOL(acpi_disabled);
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
int __initdata acpi_force = 0;
|
||||
extern acpi_interrupt_flags acpi_sci_flags;
|
||||
#endif
|
||||
|
||||
/* for MCA, but anyone else can use it if they want */
|
||||
unsigned int machine_id;
|
||||
#ifdef CONFIG_MCA
|
||||
@ -149,7 +137,6 @@ EXPORT_SYMBOL(ist_info);
|
||||
struct e820map e820;
|
||||
|
||||
extern void early_cpu_init(void);
|
||||
extern void generic_apic_probe(char *);
|
||||
extern int root_mountflags;
|
||||
|
||||
unsigned long saved_videomode;
|
||||
@ -701,238 +688,132 @@ static inline void copy_edd(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __init parse_cmdline_early (char ** cmdline_p)
|
||||
static int __initdata user_defined_memmap = 0;
|
||||
|
||||
/*
|
||||
* "mem=nopentium" disables the 4MB page tables.
|
||||
* "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
|
||||
* to <mem>, overriding the bios size.
|
||||
* "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
|
||||
* <start> to <start>+<mem>, overriding the bios size.
|
||||
*
|
||||
* HPA tells me bootloaders need to parse mem=, so no new
|
||||
* option should be mem= [also see Documentation/i386/boot.txt]
|
||||
*/
|
||||
static int __init parse_mem(char *arg)
|
||||
{
|
||||
char c = ' ', *to = command_line, *from = saved_command_line;
|
||||
int len = 0;
|
||||
int userdef = 0;
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
/* Save unparsed command line copy for /proc/cmdline */
|
||||
saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
|
||||
|
||||
for (;;) {
|
||||
if (c != ' ')
|
||||
goto next_char;
|
||||
/*
|
||||
* "mem=nopentium" disables the 4MB page tables.
|
||||
* "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
|
||||
* to <mem>, overriding the bios size.
|
||||
* "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
|
||||
* <start> to <start>+<mem>, overriding the bios size.
|
||||
*
|
||||
* HPA tells me bootloaders need to parse mem=, so no new
|
||||
* option should be mem= [also see Documentation/i386/boot.txt]
|
||||
if (strcmp(arg, "nopentium") == 0) {
|
||||
clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
|
||||
disable_pse = 1;
|
||||
} else {
|
||||
/* If the user specifies memory size, we
|
||||
* limit the BIOS-provided memory map to
|
||||
* that size. exactmap can be used to specify
|
||||
* the exact map. mem=number can be used to
|
||||
* trim the existing memory map.
|
||||
*/
|
||||
if (!memcmp(from, "mem=", 4)) {
|
||||
if (to != command_line)
|
||||
to--;
|
||||
if (!memcmp(from+4, "nopentium", 9)) {
|
||||
from += 9+4;
|
||||
clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
|
||||
disable_pse = 1;
|
||||
} else {
|
||||
/* If the user specifies memory size, we
|
||||
* limit the BIOS-provided memory map to
|
||||
* that size. exactmap can be used to specify
|
||||
* the exact map. mem=number can be used to
|
||||
* trim the existing memory map.
|
||||
*/
|
||||
unsigned long long mem_size;
|
||||
unsigned long long mem_size;
|
||||
|
||||
mem_size = memparse(from+4, &from);
|
||||
limit_regions(mem_size);
|
||||
userdef=1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (!memcmp(from, "memmap=", 7)) {
|
||||
if (to != command_line)
|
||||
to--;
|
||||
if (!memcmp(from+7, "exactmap", 8)) {
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
/* If we are doing a crash dump, we
|
||||
* still need to know the real mem
|
||||
* size before original memory map is
|
||||
* reset.
|
||||
*/
|
||||
find_max_pfn();
|
||||
saved_max_pfn = max_pfn;
|
||||
#endif
|
||||
from += 8+7;
|
||||
e820.nr_map = 0;
|
||||
userdef = 1;
|
||||
} else {
|
||||
/* If the user specifies memory size, we
|
||||
* limit the BIOS-provided memory map to
|
||||
* that size. exactmap can be used to specify
|
||||
* the exact map. mem=number can be used to
|
||||
* trim the existing memory map.
|
||||
*/
|
||||
unsigned long long start_at, mem_size;
|
||||
|
||||
mem_size = memparse(from+7, &from);
|
||||
if (*from == '@') {
|
||||
start_at = memparse(from+1, &from);
|
||||
add_memory_region(start_at, mem_size, E820_RAM);
|
||||
} else if (*from == '#') {
|
||||
start_at = memparse(from+1, &from);
|
||||
add_memory_region(start_at, mem_size, E820_ACPI);
|
||||
} else if (*from == '$') {
|
||||
start_at = memparse(from+1, &from);
|
||||
add_memory_region(start_at, mem_size, E820_RESERVED);
|
||||
} else {
|
||||
limit_regions(mem_size);
|
||||
userdef=1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else if (!memcmp(from, "noexec=", 7))
|
||||
noexec_setup(from + 7);
|
||||
|
||||
|
||||
#ifdef CONFIG_X86_SMP
|
||||
/*
|
||||
* If the BIOS enumerates physical processors before logical,
|
||||
* maxcpus=N at enumeration-time can be used to disable HT.
|
||||
*/
|
||||
else if (!memcmp(from, "maxcpus=", 8)) {
|
||||
extern unsigned int maxcpus;
|
||||
|
||||
maxcpus = simple_strtoul(from + 8, NULL, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
/* "acpi=off" disables both ACPI table parsing and interpreter */
|
||||
else if (!memcmp(from, "acpi=off", 8)) {
|
||||
disable_acpi();
|
||||
}
|
||||
|
||||
/* acpi=force to over-ride black-list */
|
||||
else if (!memcmp(from, "acpi=force", 10)) {
|
||||
acpi_force = 1;
|
||||
acpi_ht = 1;
|
||||
acpi_disabled = 0;
|
||||
}
|
||||
|
||||
/* acpi=strict disables out-of-spec workarounds */
|
||||
else if (!memcmp(from, "acpi=strict", 11)) {
|
||||
acpi_strict = 1;
|
||||
}
|
||||
|
||||
/* Limit ACPI just to boot-time to enable HT */
|
||||
else if (!memcmp(from, "acpi=ht", 7)) {
|
||||
if (!acpi_force)
|
||||
disable_acpi();
|
||||
acpi_ht = 1;
|
||||
}
|
||||
|
||||
/* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
|
||||
else if (!memcmp(from, "pci=noacpi", 10)) {
|
||||
acpi_disable_pci();
|
||||
}
|
||||
/* "acpi=noirq" disables ACPI interrupt routing */
|
||||
else if (!memcmp(from, "acpi=noirq", 10)) {
|
||||
acpi_noirq_set();
|
||||
}
|
||||
|
||||
else if (!memcmp(from, "acpi_sci=edge", 13))
|
||||
acpi_sci_flags.trigger = 1;
|
||||
|
||||
else if (!memcmp(from, "acpi_sci=level", 14))
|
||||
acpi_sci_flags.trigger = 3;
|
||||
|
||||
else if (!memcmp(from, "acpi_sci=high", 13))
|
||||
acpi_sci_flags.polarity = 1;
|
||||
|
||||
else if (!memcmp(from, "acpi_sci=low", 12))
|
||||
acpi_sci_flags.polarity = 3;
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
else if (!memcmp(from, "acpi_skip_timer_override", 24))
|
||||
acpi_skip_timer_override = 1;
|
||||
|
||||
if (!memcmp(from, "disable_timer_pin_1", 19))
|
||||
disable_timer_pin_1 = 1;
|
||||
if (!memcmp(from, "enable_timer_pin_1", 18))
|
||||
disable_timer_pin_1 = -1;
|
||||
|
||||
/* disable IO-APIC */
|
||||
else if (!memcmp(from, "noapic", 6))
|
||||
disable_ioapic_setup();
|
||||
#endif /* CONFIG_X86_IO_APIC */
|
||||
#endif /* CONFIG_ACPI */
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
/* enable local APIC */
|
||||
else if (!memcmp(from, "lapic", 5))
|
||||
lapic_enable();
|
||||
|
||||
/* disable local APIC */
|
||||
else if (!memcmp(from, "nolapic", 6))
|
||||
lapic_disable();
|
||||
#endif /* CONFIG_X86_LOCAL_APIC */
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
/* crashkernel=size@addr specifies the location to reserve for
|
||||
* a crash kernel. By reserving this memory we guarantee
|
||||
* that linux never set's it up as a DMA target.
|
||||
* Useful for holding code to do something appropriate
|
||||
* after a kernel panic.
|
||||
*/
|
||||
else if (!memcmp(from, "crashkernel=", 12)) {
|
||||
unsigned long size, base;
|
||||
size = memparse(from+12, &from);
|
||||
if (*from == '@') {
|
||||
base = memparse(from+1, &from);
|
||||
/* FIXME: Do I want a sanity check
|
||||
* to validate the memory range?
|
||||
*/
|
||||
crashk_res.start = base;
|
||||
crashk_res.end = base + size - 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_PROC_VMCORE
|
||||
/* elfcorehdr= specifies the location of elf core header
|
||||
* stored by the crashed kernel.
|
||||
*/
|
||||
else if (!memcmp(from, "elfcorehdr=", 11))
|
||||
elfcorehdr_addr = memparse(from+11, &from);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* highmem=size forces highmem to be exactly 'size' bytes.
|
||||
* This works even on boxes that have no highmem otherwise.
|
||||
* This also works to reduce highmem size on bigger boxes.
|
||||
*/
|
||||
else if (!memcmp(from, "highmem=", 8))
|
||||
highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* vmalloc=size forces the vmalloc area to be exactly 'size'
|
||||
* bytes. This can be used to increase (or decrease) the
|
||||
* vmalloc area - the default is 128m.
|
||||
*/
|
||||
else if (!memcmp(from, "vmalloc=", 8))
|
||||
__VMALLOC_RESERVE = memparse(from+8, &from);
|
||||
|
||||
next_char:
|
||||
c = *(from++);
|
||||
if (!c)
|
||||
break;
|
||||
if (COMMAND_LINE_SIZE <= ++len)
|
||||
break;
|
||||
*(to++) = c;
|
||||
}
|
||||
*to = '\0';
|
||||
*cmdline_p = command_line;
|
||||
if (userdef) {
|
||||
printk(KERN_INFO "user-defined physical RAM map:\n");
|
||||
print_memory_map("user");
|
||||
mem_size = memparse(arg, &arg);
|
||||
limit_regions(mem_size);
|
||||
user_defined_memmap = 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
early_param("mem", parse_mem);
|
||||
|
||||
static int __init parse_memmap(char *arg)
|
||||
{
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(arg, "exactmap") == 0) {
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
/* If we are doing a crash dump, we
|
||||
* still need to know the real mem
|
||||
* size before original memory map is
|
||||
* reset.
|
||||
*/
|
||||
find_max_pfn();
|
||||
saved_max_pfn = max_pfn;
|
||||
#endif
|
||||
e820.nr_map = 0;
|
||||
user_defined_memmap = 1;
|
||||
} else {
|
||||
/* If the user specifies memory size, we
|
||||
* limit the BIOS-provided memory map to
|
||||
* that size. exactmap can be used to specify
|
||||
* the exact map. mem=number can be used to
|
||||
* trim the existing memory map.
|
||||
*/
|
||||
unsigned long long start_at, mem_size;
|
||||
|
||||
mem_size = memparse(arg, &arg);
|
||||
if (*arg == '@') {
|
||||
start_at = memparse(arg+1, &arg);
|
||||
add_memory_region(start_at, mem_size, E820_RAM);
|
||||
} else if (*arg == '#') {
|
||||
start_at = memparse(arg+1, &arg);
|
||||
add_memory_region(start_at, mem_size, E820_ACPI);
|
||||
} else if (*arg == '$') {
|
||||
start_at = memparse(arg+1, &arg);
|
||||
add_memory_region(start_at, mem_size, E820_RESERVED);
|
||||
} else {
|
||||
limit_regions(mem_size);
|
||||
user_defined_memmap = 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
early_param("memmap", parse_memmap);
|
||||
|
||||
#ifdef CONFIG_PROC_VMCORE
|
||||
/* elfcorehdr= specifies the location of elf core header
|
||||
* stored by the crashed kernel.
|
||||
*/
|
||||
static int __init parse_elfcorehdr(char *arg)
|
||||
{
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
elfcorehdr_addr = memparse(arg, &arg);
|
||||
return 0;
|
||||
}
|
||||
early_param("elfcorehdr", parse_elfcorehdr);
|
||||
#endif /* CONFIG_PROC_VMCORE */
|
||||
|
||||
/*
|
||||
* highmem=size forces highmem to be exactly 'size' bytes.
|
||||
* This works even on boxes that have no highmem otherwise.
|
||||
* This also works to reduce highmem size on bigger boxes.
|
||||
*/
|
||||
static int __init parse_highmem(char *arg)
|
||||
{
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
|
||||
return 0;
|
||||
}
|
||||
early_param("highmem", parse_highmem);
|
||||
|
||||
/*
|
||||
* vmalloc=size forces the vmalloc area to be exactly 'size'
|
||||
* bytes. This can be used to increase (or decrease) the
|
||||
* vmalloc area - the default is 128m.
|
||||
*/
|
||||
static int __init parse_vmalloc(char *arg)
|
||||
{
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
__VMALLOC_RESERVE = memparse(arg, &arg);
|
||||
return 0;
|
||||
}
|
||||
early_param("vmalloc", parse_vmalloc);
|
||||
|
||||
/*
|
||||
* reservetop=size reserves a hole at the top of the kernel address space which
|
||||
@ -1189,6 +1070,14 @@ static unsigned long __init setup_memory(void)
|
||||
}
|
||||
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
|
||||
pages_to_mb(highend_pfn - highstart_pfn));
|
||||
num_physpages = highend_pfn;
|
||||
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
|
||||
#else
|
||||
num_physpages = max_low_pfn;
|
||||
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
|
||||
#endif
|
||||
#ifdef CONFIG_FLATMEM
|
||||
max_mapnr = num_physpages;
|
||||
#endif
|
||||
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
|
||||
pages_to_mb(max_low_pfn));
|
||||
@ -1200,22 +1089,20 @@ static unsigned long __init setup_memory(void)
|
||||
|
||||
void __init zone_sizes_init(void)
|
||||
{
|
||||
unsigned long zones_size[MAX_NR_ZONES] = { 0, };
|
||||
unsigned int max_dma, low;
|
||||
|
||||
max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
|
||||
low = max_low_pfn;
|
||||
|
||||
if (low < max_dma)
|
||||
zones_size[ZONE_DMA] = low;
|
||||
else {
|
||||
zones_size[ZONE_DMA] = max_dma;
|
||||
zones_size[ZONE_NORMAL] = low - max_dma;
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
zones_size[ZONE_HIGHMEM] = highend_pfn - low;
|
||||
unsigned long max_zone_pfns[MAX_NR_ZONES] = {
|
||||
virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT,
|
||||
max_low_pfn,
|
||||
highend_pfn};
|
||||
add_active_range(0, 0, highend_pfn);
|
||||
#else
|
||||
unsigned long max_zone_pfns[MAX_NR_ZONES] = {
|
||||
virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT,
|
||||
max_low_pfn};
|
||||
add_active_range(0, 0, max_low_pfn);
|
||||
#endif
|
||||
}
|
||||
free_area_init(zones_size);
|
||||
|
||||
free_area_init_nodes(max_zone_pfns);
|
||||
}
|
||||
#else
|
||||
extern unsigned long __init setup_memory(void);
|
||||
@ -1518,17 +1405,15 @@ void __init setup_arch(char **cmdline_p)
|
||||
data_resource.start = virt_to_phys(_etext);
|
||||
data_resource.end = virt_to_phys(_edata)-1;
|
||||
|
||||
parse_cmdline_early(cmdline_p);
|
||||
parse_early_param();
|
||||
|
||||
#ifdef CONFIG_EARLY_PRINTK
|
||||
{
|
||||
char *s = strstr(*cmdline_p, "earlyprintk=");
|
||||
if (s) {
|
||||
setup_early_printk(strchr(s, '=') + 1);
|
||||
printk("early console enabled\n");
|
||||
}
|
||||
if (user_defined_memmap) {
|
||||
printk(KERN_INFO "user-defined physical RAM map:\n");
|
||||
print_memory_map("user");
|
||||
}
|
||||
#endif
|
||||
|
||||
strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
|
||||
*cmdline_p = command_line;
|
||||
|
||||
max_low_pfn = setup_memory();
|
||||
|
||||
@ -1557,7 +1442,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
dmi_scan_machine();
|
||||
|
||||
#ifdef CONFIG_X86_GENERICARCH
|
||||
generic_apic_probe(*cmdline_p);
|
||||
generic_apic_probe();
|
||||
#endif
|
||||
if (efi_enabled)
|
||||
efi_map_memmap();
|
||||
@ -1569,9 +1454,11 @@ void __init setup_arch(char **cmdline_p)
|
||||
acpi_boot_table_init();
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PCI
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
check_acpi_pci(); /* Checks more than just ACPI actually */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
acpi_boot_init();
|
||||
|
@ -177,6 +177,9 @@ static void __devinit smp_store_cpu_info(int id)
|
||||
*/
|
||||
if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
|
||||
|
||||
if (num_possible_cpus() == 1)
|
||||
goto valid_k7;
|
||||
|
||||
/* Athlon 660/661 is valid. */
|
||||
if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
|
||||
goto valid_k7;
|
||||
@ -1376,7 +1379,8 @@ int __cpu_disable(void)
|
||||
*/
|
||||
if (cpu == 0)
|
||||
return -EBUSY;
|
||||
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
stop_apic_nmi_watchdog(NULL);
|
||||
clear_local_APIC();
|
||||
/* Allow any queued timer interrupts to get serviced */
|
||||
local_irq_enable();
|
||||
@ -1490,3 +1494,16 @@ void __init smp_intr_init(void)
|
||||
/* IPI for generic function call */
|
||||
set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the BIOS enumerates physical processors before logical,
|
||||
* maxcpus=N at enumeration-time can be used to disable HT.
|
||||
*/
|
||||
static int __init parse_maxcpus(char *arg)
|
||||
{
|
||||
extern unsigned int maxcpus;
|
||||
|
||||
maxcpus = simple_strtoul(arg, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
early_param("maxcpus", parse_maxcpus);
|
||||
|
@ -54,8 +54,6 @@ struct node_memory_chunk_s {
|
||||
static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
|
||||
|
||||
static int num_memory_chunks; /* total number of memory chunks */
|
||||
static int zholes_size_init;
|
||||
static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
|
||||
|
||||
extern void * boot_ioremap(unsigned long, unsigned long);
|
||||
|
||||
@ -135,47 +133,6 @@ static void __init parse_memory_affinity_structure (char *sratp)
|
||||
"enabled and removable" : "enabled" ) );
|
||||
}
|
||||
|
||||
/* Take a chunk of pages from page frame cstart to cend and count the number
|
||||
* of pages in each zone, returned via zones[].
|
||||
*/
|
||||
static __init void chunk_to_zones(unsigned long cstart, unsigned long cend,
|
||||
unsigned long *zones)
|
||||
{
|
||||
unsigned long max_dma;
|
||||
extern unsigned long max_low_pfn;
|
||||
|
||||
int z;
|
||||
unsigned long rend;
|
||||
|
||||
/* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide
|
||||
* similarly scoped information and should be handled in a consistant
|
||||
* manner.
|
||||
*/
|
||||
max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
|
||||
|
||||
/* Split the hole into the zones in which it falls. Repeatedly
|
||||
* take the segment in which the remaining hole starts, round it
|
||||
* to the end of that zone.
|
||||
*/
|
||||
memset(zones, 0, MAX_NR_ZONES * sizeof(long));
|
||||
while (cstart < cend) {
|
||||
if (cstart < max_dma) {
|
||||
z = ZONE_DMA;
|
||||
rend = (cend < max_dma)? cend : max_dma;
|
||||
|
||||
} else if (cstart < max_low_pfn) {
|
||||
z = ZONE_NORMAL;
|
||||
rend = (cend < max_low_pfn)? cend : max_low_pfn;
|
||||
|
||||
} else {
|
||||
z = ZONE_HIGHMEM;
|
||||
rend = cend;
|
||||
}
|
||||
zones[z] += rend - cstart;
|
||||
cstart = rend;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The SRAT table always lists ascending addresses, so can always
|
||||
* assume that the first "start" address that you see is the real
|
||||
@ -220,7 +177,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
|
||||
|
||||
memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */
|
||||
memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
|
||||
memset(zholes_size, 0, sizeof(zholes_size));
|
||||
|
||||
num_memory_chunks = 0;
|
||||
while (p < end) {
|
||||
@ -284,6 +240,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
|
||||
printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
|
||||
j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
|
||||
node_read_chunk(chunk->nid, chunk);
|
||||
add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
|
||||
}
|
||||
|
||||
for_each_online_node(nid) {
|
||||
@ -392,57 +349,7 @@ int __init get_memcfg_from_srat(void)
|
||||
return acpi20_parse_srat((struct acpi_table_srat *)header);
|
||||
}
|
||||
out_err:
|
||||
remove_all_active_ranges();
|
||||
printk("failed to get NUMA memory information from SRAT table\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* For each node run the memory list to determine whether there are
|
||||
* any memory holes. For each hole determine which ZONE they fall
|
||||
* into.
|
||||
*
|
||||
* NOTE#1: this requires knowledge of the zone boundries and so
|
||||
* _cannot_ be performed before those are calculated in setup_memory.
|
||||
*
|
||||
* NOTE#2: we rely on the fact that the memory chunks are ordered by
|
||||
* start pfn number during setup.
|
||||
*/
|
||||
static void __init get_zholes_init(void)
|
||||
{
|
||||
int nid;
|
||||
int c;
|
||||
int first;
|
||||
unsigned long end = 0;
|
||||
|
||||
for_each_online_node(nid) {
|
||||
first = 1;
|
||||
for (c = 0; c < num_memory_chunks; c++){
|
||||
if (node_memory_chunk[c].nid == nid) {
|
||||
if (first) {
|
||||
end = node_memory_chunk[c].end_pfn;
|
||||
first = 0;
|
||||
|
||||
} else {
|
||||
/* Record any gap between this chunk
|
||||
* and the previous chunk on this node
|
||||
* against the zones it spans.
|
||||
*/
|
||||
chunk_to_zones(end,
|
||||
node_memory_chunk[c].start_pfn,
|
||||
&zholes_size[nid * MAX_NR_ZONES]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long * __init get_zholes_size(int nid)
|
||||
{
|
||||
if (!zholes_size_init) {
|
||||
zholes_size_init++;
|
||||
get_zholes_init();
|
||||
}
|
||||
if (nid >= MAX_NUMNODES || !node_online(nid))
|
||||
printk("%s: nid = %d is invalid/offline. num_online_nodes = %d",
|
||||
__FUNCTION__, nid, num_online_nodes());
|
||||
return &zholes_size[nid * MAX_NR_ZONES];
|
||||
}
|
||||
|
@ -1,98 +0,0 @@
|
||||
/*
|
||||
* arch/i386/kernel/stacktrace.c
|
||||
*
|
||||
* Stack trace management functions
|
||||
*
|
||||
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/stacktrace.h>
|
||||
|
||||
static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
|
||||
{
|
||||
return p > (void *)tinfo &&
|
||||
p < (void *)tinfo + THREAD_SIZE - 3;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save stack-backtrace addresses into a stack_trace buffer:
|
||||
*/
|
||||
static inline unsigned long
|
||||
save_context_stack(struct stack_trace *trace, unsigned int skip,
|
||||
struct thread_info *tinfo, unsigned long *stack,
|
||||
unsigned long ebp)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
while (valid_stack_ptr(tinfo, (void *)ebp)) {
|
||||
addr = *(unsigned long *)(ebp + 4);
|
||||
if (!skip)
|
||||
trace->entries[trace->nr_entries++] = addr;
|
||||
else
|
||||
skip--;
|
||||
if (trace->nr_entries >= trace->max_entries)
|
||||
break;
|
||||
/*
|
||||
* break out of recursive entries (such as
|
||||
* end_of_stack_stop_unwind_function):
|
||||
*/
|
||||
if (ebp == *(unsigned long *)ebp)
|
||||
break;
|
||||
|
||||
ebp = *(unsigned long *)ebp;
|
||||
}
|
||||
#else
|
||||
while (valid_stack_ptr(tinfo, stack)) {
|
||||
addr = *stack++;
|
||||
if (__kernel_text_address(addr)) {
|
||||
if (!skip)
|
||||
trace->entries[trace->nr_entries++] = addr;
|
||||
else
|
||||
skip--;
|
||||
if (trace->nr_entries >= trace->max_entries)
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return ebp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save stack-backtrace addresses into a stack_trace buffer.
|
||||
* If all_contexts is set, all contexts (hardirq, softirq and process)
|
||||
* are saved. If not set then only the current context is saved.
|
||||
*/
|
||||
void save_stack_trace(struct stack_trace *trace,
|
||||
struct task_struct *task, int all_contexts,
|
||||
unsigned int skip)
|
||||
{
|
||||
unsigned long ebp;
|
||||
unsigned long *stack = &ebp;
|
||||
|
||||
WARN_ON(trace->nr_entries || !trace->max_entries);
|
||||
|
||||
if (!task || task == current) {
|
||||
/* Grab ebp right from our regs: */
|
||||
asm ("movl %%ebp, %0" : "=r" (ebp));
|
||||
} else {
|
||||
/* ebp is the last reg pushed by switch_to(): */
|
||||
ebp = *(unsigned long *) task->thread.esp;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
struct thread_info *context = (struct thread_info *)
|
||||
((unsigned long)stack & (~(THREAD_SIZE - 1)));
|
||||
|
||||
ebp = save_context_stack(trace, skip, context, stack, ebp);
|
||||
stack = (unsigned long *)context->previous_esp;
|
||||
if (!all_contexts || !stack ||
|
||||
trace->nr_entries >= trace->max_entries)
|
||||
break;
|
||||
trace->entries[trace->nr_entries++] = ULONG_MAX;
|
||||
if (trace->nr_entries >= trace->max_entries)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -317,3 +317,4 @@ ENTRY(sys_call_table)
|
||||
.long sys_tee /* 315 */
|
||||
.long sys_vmsplice
|
||||
.long sys_move_pages
|
||||
.long sys_getcpu
|
||||
|
@ -130,18 +130,33 @@ static int set_rtc_mmss(unsigned long nowtime)
|
||||
|
||||
int timer_ack;
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
|
||||
unsigned long profile_pc(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long pc = instruction_pointer(regs);
|
||||
|
||||
if (!user_mode_vm(regs) && in_lock_functions(pc))
|
||||
#ifdef CONFIG_SMP
|
||||
if (!user_mode_vm(regs) && in_lock_functions(pc)) {
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
return *(unsigned long *)(regs->ebp + 4);
|
||||
|
||||
#else
|
||||
unsigned long *sp;
|
||||
if ((regs->xcs & 3) == 0)
|
||||
sp = (unsigned long *)®s->esp;
|
||||
else
|
||||
sp = (unsigned long *)regs->esp;
|
||||
/* Return address is either directly at stack pointer
|
||||
or above a saved eflags. Eflags has bits 22-31 zero,
|
||||
kernel addresses don't. */
|
||||
if (sp[0] >> 22)
|
||||
return sp[0];
|
||||
if (sp[1] >> 22)
|
||||
return sp[1];
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
return pc;
|
||||
}
|
||||
EXPORT_SYMBOL(profile_pc);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is the same as the above, except we _also_ save the current
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/mmzone.h>
|
||||
#include <asm/cpu.h>
|
||||
|
||||
static struct i386_cpu cpu_devices[NR_CPUS];
|
||||
@ -55,34 +56,18 @@ EXPORT_SYMBOL(arch_register_cpu);
|
||||
EXPORT_SYMBOL(arch_unregister_cpu);
|
||||
#endif /*CONFIG_HOTPLUG_CPU*/
|
||||
|
||||
|
||||
static int __init topology_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
#include <linux/mmzone.h>
|
||||
|
||||
static int __init topology_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_online_node(i)
|
||||
register_one_node(i);
|
||||
|
||||
for_each_present_cpu(i)
|
||||
arch_register_cpu(i);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_NUMA */
|
||||
|
||||
static int __init topology_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_present_cpu(i)
|
||||
arch_register_cpu(i);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
for_each_present_cpu(i)
|
||||
arch_register_cpu(i);
|
||||
return 0;
|
||||
}
|
||||
|
||||
subsys_initcall(topology_init);
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/unwind.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#ifdef CONFIG_EISA
|
||||
#include <linux/ioport.h>
|
||||
@ -40,7 +41,6 @@
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/debugreg.h>
|
||||
@ -51,6 +51,7 @@
|
||||
#include <asm/smp.h>
|
||||
#include <asm/arch_hooks.h>
|
||||
#include <asm/kdebug.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
#include <linux/module.h>
|
||||
|
||||
@ -118,26 +119,16 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
|
||||
p < (void *)tinfo + THREAD_SIZE - 3;
|
||||
}
|
||||
|
||||
/*
|
||||
* Print one address/symbol entries per line.
|
||||
*/
|
||||
static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
|
||||
{
|
||||
printk(" [<%08lx>] ", addr);
|
||||
|
||||
print_symbol("%s\n", addr);
|
||||
}
|
||||
|
||||
static inline unsigned long print_context_stack(struct thread_info *tinfo,
|
||||
unsigned long *stack, unsigned long ebp,
|
||||
char *log_lvl)
|
||||
struct stacktrace_ops *ops, void *data)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
while (valid_stack_ptr(tinfo, (void *)ebp)) {
|
||||
addr = *(unsigned long *)(ebp + 4);
|
||||
print_addr_and_symbol(addr, log_lvl);
|
||||
ops->address(data, addr);
|
||||
/*
|
||||
* break out of recursive entries (such as
|
||||
* end_of_stack_stop_unwind_function):
|
||||
@ -150,30 +141,37 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
|
||||
while (valid_stack_ptr(tinfo, stack)) {
|
||||
addr = *stack++;
|
||||
if (__kernel_text_address(addr))
|
||||
print_addr_and_symbol(addr, log_lvl);
|
||||
ops->address(data, addr);
|
||||
}
|
||||
#endif
|
||||
return ebp;
|
||||
}
|
||||
|
||||
struct ops_and_data {
|
||||
struct stacktrace_ops *ops;
|
||||
void *data;
|
||||
};
|
||||
|
||||
static asmlinkage int
|
||||
show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
|
||||
dump_trace_unwind(struct unwind_frame_info *info, void *data)
|
||||
{
|
||||
struct ops_and_data *oad = (struct ops_and_data *)data;
|
||||
int n = 0;
|
||||
|
||||
while (unwind(info) == 0 && UNW_PC(info)) {
|
||||
n++;
|
||||
print_addr_and_symbol(UNW_PC(info), log_lvl);
|
||||
oad->ops->address(oad->data, UNW_PC(info));
|
||||
if (arch_unw_user_mode(info))
|
||||
break;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, char *log_lvl)
|
||||
void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack,
|
||||
struct stacktrace_ops *ops, void *data)
|
||||
{
|
||||
unsigned long ebp;
|
||||
unsigned long ebp = 0;
|
||||
|
||||
if (!task)
|
||||
task = current;
|
||||
@ -181,54 +179,116 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
if (call_trace >= 0) {
|
||||
int unw_ret = 0;
|
||||
struct unwind_frame_info info;
|
||||
struct ops_and_data oad = { .ops = ops, .data = data };
|
||||
|
||||
if (regs) {
|
||||
if (unwind_init_frame_info(&info, task, regs) == 0)
|
||||
unw_ret = show_trace_unwind(&info, log_lvl);
|
||||
unw_ret = dump_trace_unwind(&info, &oad);
|
||||
} else if (task == current)
|
||||
unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
|
||||
unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
|
||||
else {
|
||||
if (unwind_init_blocked(&info, task) == 0)
|
||||
unw_ret = show_trace_unwind(&info, log_lvl);
|
||||
unw_ret = dump_trace_unwind(&info, &oad);
|
||||
}
|
||||
if (unw_ret > 0) {
|
||||
if (call_trace == 1 && !arch_unw_user_mode(&info)) {
|
||||
print_symbol("DWARF2 unwinder stuck at %s\n",
|
||||
ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
|
||||
UNW_PC(&info));
|
||||
if (UNW_SP(&info) >= PAGE_OFFSET) {
|
||||
printk("Leftover inexact backtrace:\n");
|
||||
ops->warning(data, "Leftover inexact backtrace:\n");
|
||||
stack = (void *)UNW_SP(&info);
|
||||
if (!stack)
|
||||
return;
|
||||
ebp = UNW_FP(&info);
|
||||
} else
|
||||
printk("Full inexact backtrace again:\n");
|
||||
ops->warning(data, "Full inexact backtrace again:\n");
|
||||
} else if (call_trace >= 1)
|
||||
return;
|
||||
else
|
||||
printk("Full inexact backtrace again:\n");
|
||||
ops->warning(data, "Full inexact backtrace again:\n");
|
||||
} else
|
||||
printk("Inexact backtrace:\n");
|
||||
ops->warning(data, "Inexact backtrace:\n");
|
||||
}
|
||||
if (!stack) {
|
||||
unsigned long dummy;
|
||||
stack = &dummy;
|
||||
if (task && task != current)
|
||||
stack = (unsigned long *)task->thread.esp;
|
||||
}
|
||||
|
||||
if (task == current) {
|
||||
/* Grab ebp right from our regs */
|
||||
asm ("movl %%ebp, %0" : "=r" (ebp) : );
|
||||
} else {
|
||||
/* ebp is the last reg pushed by switch_to */
|
||||
ebp = *(unsigned long *) task->thread.esp;
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
if (!ebp) {
|
||||
if (task == current) {
|
||||
/* Grab ebp right from our regs */
|
||||
asm ("movl %%ebp, %0" : "=r" (ebp) : );
|
||||
} else {
|
||||
/* ebp is the last reg pushed by switch_to */
|
||||
ebp = *(unsigned long *) task->thread.esp;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
while (1) {
|
||||
struct thread_info *context;
|
||||
context = (struct thread_info *)
|
||||
((unsigned long)stack & (~(THREAD_SIZE - 1)));
|
||||
ebp = print_context_stack(context, stack, ebp, log_lvl);
|
||||
ebp = print_context_stack(context, stack, ebp, ops, data);
|
||||
/* Should be after the line below, but somewhere
|
||||
in early boot context comes out corrupted and we
|
||||
can't reference it -AK */
|
||||
if (ops->stack(data, "IRQ") < 0)
|
||||
break;
|
||||
stack = (unsigned long*)context->previous_esp;
|
||||
if (!stack)
|
||||
break;
|
||||
printk("%s =======================\n", log_lvl);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(dump_trace);
|
||||
|
||||
void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
|
||||
static void
|
||||
print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
|
||||
{
|
||||
printk(data);
|
||||
print_symbol(msg, symbol);
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
static void print_trace_warning(void *data, char *msg)
|
||||
{
|
||||
printk("%s%s\n", (char *)data, msg);
|
||||
}
|
||||
|
||||
static int print_trace_stack(void *data, char *name)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Print one address/symbol entries per line.
|
||||
*/
|
||||
static void print_trace_address(void *data, unsigned long addr)
|
||||
{
|
||||
printk("%s [<%08lx>] ", (char *)data, addr);
|
||||
print_symbol("%s\n", addr);
|
||||
}
|
||||
|
||||
static struct stacktrace_ops print_trace_ops = {
|
||||
.warning = print_trace_warning,
|
||||
.warning_symbol = print_trace_warning_symbol,
|
||||
.stack = print_trace_stack,
|
||||
.address = print_trace_address,
|
||||
};
|
||||
|
||||
static void
|
||||
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long * stack, char *log_lvl)
|
||||
{
|
||||
dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
|
||||
printk("%s =======================\n", log_lvl);
|
||||
}
|
||||
|
||||
void show_trace(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long * stack)
|
||||
{
|
||||
show_trace_log_lvl(task, regs, stack, "");
|
||||
}
|
||||
@ -291,8 +351,9 @@ void show_registers(struct pt_regs *regs)
|
||||
ss = regs->xss & 0xffff;
|
||||
}
|
||||
print_modules();
|
||||
printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n"
|
||||
"EFLAGS: %08lx (%s %.*s) \n",
|
||||
printk(KERN_EMERG "CPU: %d\n"
|
||||
KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
|
||||
KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
|
||||
smp_processor_id(), 0xffff & regs->xcs, regs->eip,
|
||||
print_tainted(), regs->eflags, system_utsname.release,
|
||||
(int)strcspn(system_utsname.version, " "),
|
||||
@ -348,7 +409,7 @@ static void handle_BUG(struct pt_regs *regs)
|
||||
|
||||
if (eip < PAGE_OFFSET)
|
||||
return;
|
||||
if (__get_user(ud2, (unsigned short __user *)eip))
|
||||
if (probe_kernel_address((unsigned short __user *)eip, ud2))
|
||||
return;
|
||||
if (ud2 != 0x0b0f)
|
||||
return;
|
||||
@ -361,7 +422,8 @@ static void handle_BUG(struct pt_regs *regs)
|
||||
char *file;
|
||||
char c;
|
||||
|
||||
if (__get_user(line, (unsigned short __user *)(eip + 2)))
|
||||
if (probe_kernel_address((unsigned short __user *)(eip + 2),
|
||||
line))
|
||||
break;
|
||||
if (__get_user(file, (char * __user *)(eip + 4)) ||
|
||||
(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
|
||||
@ -634,18 +696,24 @@ gp_in_kernel:
|
||||
}
|
||||
}
|
||||
|
||||
static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
|
||||
static __kprobes void
|
||||
mem_parity_error(unsigned char reason, struct pt_regs * regs)
|
||||
{
|
||||
printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
|
||||
"to continue\n");
|
||||
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
|
||||
"CPU %d.\n", reason, smp_processor_id());
|
||||
printk(KERN_EMERG "You probably have a hardware problem with your RAM "
|
||||
"chips\n");
|
||||
if (panic_on_unrecovered_nmi)
|
||||
panic("NMI: Not continuing");
|
||||
|
||||
printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
||||
|
||||
/* Clear and disable the memory parity error line. */
|
||||
clear_mem_error(reason);
|
||||
}
|
||||
|
||||
static void io_check_error(unsigned char reason, struct pt_regs * regs)
|
||||
static __kprobes void
|
||||
io_check_error(unsigned char reason, struct pt_regs * regs)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
@ -661,7 +729,8 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs)
|
||||
outb(reason, 0x61);
|
||||
}
|
||||
|
||||
static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
|
||||
static __kprobes void
|
||||
unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
|
||||
{
|
||||
#ifdef CONFIG_MCA
|
||||
/* Might actually be able to figure out what the guilty party
|
||||
@ -671,15 +740,18 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
|
||||
reason, smp_processor_id());
|
||||
printk("Dazed and confused, but trying to continue\n");
|
||||
printk("Do you have a strange power saving mode enabled?\n");
|
||||
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
|
||||
"CPU %d.\n", reason, smp_processor_id());
|
||||
printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
|
||||
if (panic_on_unrecovered_nmi)
|
||||
panic("NMI: Not continuing");
|
||||
|
||||
printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
||||
}
|
||||
|
||||
static DEFINE_SPINLOCK(nmi_print_lock);
|
||||
|
||||
void die_nmi (struct pt_regs *regs, const char *msg)
|
||||
void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
|
||||
{
|
||||
if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
|
||||
NOTIFY_STOP)
|
||||
@ -711,7 +783,7 @@ void die_nmi (struct pt_regs *regs, const char *msg)
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
static void default_do_nmi(struct pt_regs * regs)
|
||||
static __kprobes void default_do_nmi(struct pt_regs * regs)
|
||||
{
|
||||
unsigned char reason = 0;
|
||||
|
||||
@ -728,12 +800,12 @@ static void default_do_nmi(struct pt_regs * regs)
|
||||
* Ok, so this is none of the documented NMI sources,
|
||||
* so it must be the NMI watchdog.
|
||||
*/
|
||||
if (nmi_watchdog) {
|
||||
nmi_watchdog_tick(regs);
|
||||
if (nmi_watchdog_tick(regs, reason))
|
||||
return;
|
||||
}
|
||||
if (!do_nmi_callback(regs, smp_processor_id()))
|
||||
#endif
|
||||
unknown_nmi_error(reason, regs);
|
||||
unknown_nmi_error(reason, regs);
|
||||
|
||||
return;
|
||||
}
|
||||
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
|
||||
@ -749,14 +821,7 @@ static void default_do_nmi(struct pt_regs * regs)
|
||||
reassert_nmi();
|
||||
}
|
||||
|
||||
static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static nmi_callback_t nmi_callback = dummy_nmi_callback;
|
||||
|
||||
fastcall void do_nmi(struct pt_regs * regs, long error_code)
|
||||
fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
@ -766,25 +831,11 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
|
||||
|
||||
++nmi_count(cpu);
|
||||
|
||||
if (!rcu_dereference(nmi_callback)(regs, cpu))
|
||||
default_do_nmi(regs);
|
||||
default_do_nmi(regs);
|
||||
|
||||
nmi_exit();
|
||||
}
|
||||
|
||||
void set_nmi_callback(nmi_callback_t callback)
|
||||
{
|
||||
vmalloc_sync_all();
|
||||
rcu_assign_pointer(nmi_callback, callback);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(set_nmi_callback);
|
||||
|
||||
void unset_nmi_callback(void)
|
||||
{
|
||||
nmi_callback = dummy_nmi_callback;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unset_nmi_callback);
|
||||
|
||||
#ifdef CONFIG_KPROBES
|
||||
fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
@ -1124,20 +1175,6 @@ void __init trap_init_f00f_bug(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#define _set_gate(gate_addr,type,dpl,addr,seg) \
|
||||
do { \
|
||||
int __d0, __d1; \
|
||||
__asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
|
||||
"movw %4,%%dx\n\t" \
|
||||
"movl %%eax,%0\n\t" \
|
||||
"movl %%edx,%1" \
|
||||
:"=m" (*((long *) (gate_addr))), \
|
||||
"=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
|
||||
:"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
|
||||
"3" ((char *) (addr)),"2" ((seg) << 16)); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/*
|
||||
* This needs to use 'idt_table' rather than 'idt', and
|
||||
* thus use the _nonmapped_ version of the IDT, as the
|
||||
@ -1146,7 +1183,7 @@ do { \
|
||||
*/
|
||||
void set_intr_gate(unsigned int n, void *addr)
|
||||
{
|
||||
_set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
|
||||
_set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1154,22 +1191,22 @@ void set_intr_gate(unsigned int n, void *addr)
|
||||
*/
|
||||
static inline void set_system_intr_gate(unsigned int n, void *addr)
|
||||
{
|
||||
_set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS);
|
||||
_set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS);
|
||||
}
|
||||
|
||||
static void __init set_trap_gate(unsigned int n, void *addr)
|
||||
{
|
||||
_set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
|
||||
_set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS);
|
||||
}
|
||||
|
||||
static void __init set_system_gate(unsigned int n, void *addr)
|
||||
{
|
||||
_set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
|
||||
_set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS);
|
||||
}
|
||||
|
||||
static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
|
||||
{
|
||||
_set_gate(idt_table+n,5,0,0,(gdt_entry<<3));
|
||||
_set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3));
|
||||
}
|
||||
|
||||
|
||||
|
@ -192,7 +192,7 @@ int recalibrate_cpu_khz(void)
|
||||
|
||||
EXPORT_SYMBOL(recalibrate_cpu_khz);
|
||||
|
||||
void tsc_init(void)
|
||||
void __init tsc_init(void)
|
||||
{
|
||||
if (!cpu_has_tsc || tsc_disable)
|
||||
return;
|
||||
|
@ -4,6 +4,6 @@
|
||||
|
||||
|
||||
lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
|
||||
bitops.o
|
||||
bitops.o semaphore.o
|
||||
|
||||
lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
|
||||
|
217
arch/i386/lib/semaphore.S
Normal file
217
arch/i386/lib/semaphore.S
Normal file
@ -0,0 +1,217 @@
|
||||
/*
|
||||
* i386 semaphore implementation.
|
||||
*
|
||||
* (C) Copyright 1999 Linus Torvalds
|
||||
*
|
||||
* Portions Copyright 1999 Red Hat, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
|
||||
*/
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/rwlock.h>
|
||||
#include <asm/alternative-asm.i>
|
||||
#include <asm/frame.i>
|
||||
#include <asm/dwarf2.h>
|
||||
|
||||
/*
|
||||
* The semaphore operations have a special calling sequence that
|
||||
* allow us to do a simpler in-line version of them. These routines
|
||||
* need to convert that sequence back into the C sequence when
|
||||
* there is contention on the semaphore.
|
||||
*
|
||||
* %eax contains the semaphore pointer on entry. Save the C-clobbered
|
||||
* registers (%eax, %edx and %ecx) except %eax whish is either a return
|
||||
* value or just clobbered..
|
||||
*/
|
||||
.section .sched.text
|
||||
ENTRY(__down_failed)
|
||||
CFI_STARTPROC
|
||||
FRAME
|
||||
pushl %edx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET edx,0
|
||||
pushl %ecx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ecx,0
|
||||
call __down
|
||||
popl %ecx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE ecx
|
||||
popl %edx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE edx
|
||||
ENDFRAME
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(__down_failed)
|
||||
|
||||
ENTRY(__down_failed_interruptible)
|
||||
CFI_STARTPROC
|
||||
FRAME
|
||||
pushl %edx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET edx,0
|
||||
pushl %ecx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ecx,0
|
||||
call __down_interruptible
|
||||
popl %ecx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE ecx
|
||||
popl %edx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE edx
|
||||
ENDFRAME
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(__down_failed_interruptible)
|
||||
|
||||
ENTRY(__down_failed_trylock)
|
||||
CFI_STARTPROC
|
||||
FRAME
|
||||
pushl %edx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET edx,0
|
||||
pushl %ecx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ecx,0
|
||||
call __down_trylock
|
||||
popl %ecx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE ecx
|
||||
popl %edx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE edx
|
||||
ENDFRAME
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(__down_failed_trylock)
|
||||
|
||||
ENTRY(__up_wakeup)
|
||||
CFI_STARTPROC
|
||||
FRAME
|
||||
pushl %edx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET edx,0
|
||||
pushl %ecx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ecx,0
|
||||
call __up
|
||||
popl %ecx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE ecx
|
||||
popl %edx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE edx
|
||||
ENDFRAME
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(__up_wakeup)
|
||||
|
||||
/*
|
||||
* rw spinlock fallbacks
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
ENTRY(__write_lock_failed)
|
||||
CFI_STARTPROC simple
|
||||
FRAME
|
||||
2: LOCK_PREFIX
|
||||
addl $ RW_LOCK_BIAS,(%eax)
|
||||
1: rep; nop
|
||||
cmpl $ RW_LOCK_BIAS,(%eax)
|
||||
jne 1b
|
||||
LOCK_PREFIX
|
||||
subl $ RW_LOCK_BIAS,(%eax)
|
||||
jnz 2b
|
||||
ENDFRAME
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(__write_lock_failed)
|
||||
|
||||
ENTRY(__read_lock_failed)
|
||||
CFI_STARTPROC
|
||||
FRAME
|
||||
2: LOCK_PREFIX
|
||||
incl (%eax)
|
||||
1: rep; nop
|
||||
cmpl $1,(%eax)
|
||||
js 1b
|
||||
LOCK_PREFIX
|
||||
decl (%eax)
|
||||
js 2b
|
||||
ENDFRAME
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(__read_lock_failed)
|
||||
|
||||
#endif
|
||||
|
||||
/* Fix up special calling conventions */
|
||||
ENTRY(call_rwsem_down_read_failed)
|
||||
CFI_STARTPROC
|
||||
push %ecx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ecx,0
|
||||
push %edx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET edx,0
|
||||
call rwsem_down_read_failed
|
||||
pop %edx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
pop %ecx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(call_rwsem_down_read_failed)
|
||||
|
||||
ENTRY(call_rwsem_down_write_failed)
|
||||
CFI_STARTPROC
|
||||
push %ecx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ecx,0
|
||||
calll rwsem_down_write_failed
|
||||
pop %ecx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(call_rwsem_down_write_failed)
|
||||
|
||||
ENTRY(call_rwsem_wake)
|
||||
CFI_STARTPROC
|
||||
decw %dx /* do nothing if still outstanding active readers */
|
||||
jnz 1f
|
||||
push %ecx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ecx,0
|
||||
call rwsem_wake
|
||||
pop %ecx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
1: ret
|
||||
CFI_ENDPROC
|
||||
END(call_rwsem_wake)
|
||||
|
||||
/* Fix up special calling conventions */
|
||||
ENTRY(call_rwsem_downgrade_wake)
|
||||
CFI_STARTPROC
|
||||
push %ecx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ecx,0
|
||||
push %edx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET edx,0
|
||||
call rwsem_downgrade_wake
|
||||
pop %edx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
pop %ecx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(call_rwsem_downgrade_wake)
|
||||
|
@ -5,6 +5,7 @@
|
||||
#define APIC_DEFINITION 1
|
||||
#include <linux/threads.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/genapic.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
@ -4,6 +4,7 @@
|
||||
#define APIC_DEFINITION 1
|
||||
#include <linux/threads.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/genapic.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/errno.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/apicdef.h>
|
||||
@ -29,7 +30,24 @@ struct genapic *apic_probe[] __initdata = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int cmdline_apic;
|
||||
static int cmdline_apic __initdata;
|
||||
static int __init parse_apic(char *arg)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; apic_probe[i]; i++) {
|
||||
if (!strcmp(apic_probe[i]->name, arg)) {
|
||||
genapic = apic_probe[i];
|
||||
cmdline_apic = 1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -ENOENT;
|
||||
}
|
||||
early_param("apic", parse_apic);
|
||||
|
||||
void __init generic_bigsmp_probe(void)
|
||||
{
|
||||
@ -48,40 +66,20 @@ void __init generic_bigsmp_probe(void)
|
||||
}
|
||||
}
|
||||
|
||||
void __init generic_apic_probe(char *command_line)
|
||||
void __init generic_apic_probe(void)
|
||||
{
|
||||
char *s;
|
||||
int i;
|
||||
int changed = 0;
|
||||
|
||||
s = strstr(command_line, "apic=");
|
||||
if (s && (s == command_line || isspace(s[-1]))) {
|
||||
char *p = strchr(s, ' '), old;
|
||||
if (!p)
|
||||
p = strchr(s, '\0');
|
||||
old = *p;
|
||||
*p = 0;
|
||||
for (i = 0; !changed && apic_probe[i]; i++) {
|
||||
if (!strcmp(apic_probe[i]->name, s+5)) {
|
||||
changed = 1;
|
||||
if (!cmdline_apic) {
|
||||
int i;
|
||||
for (i = 0; apic_probe[i]; i++) {
|
||||
if (apic_probe[i]->probe()) {
|
||||
genapic = apic_probe[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!changed)
|
||||
printk(KERN_ERR "Unknown genapic `%s' specified.\n", s);
|
||||
*p = old;
|
||||
cmdline_apic = changed;
|
||||
}
|
||||
for (i = 0; !changed && apic_probe[i]; i++) {
|
||||
if (apic_probe[i]->probe()) {
|
||||
changed = 1;
|
||||
genapic = apic_probe[i];
|
||||
}
|
||||
/* Not visible without early console */
|
||||
if (!apic_probe[i])
|
||||
panic("Didn't find an APIC driver");
|
||||
}
|
||||
/* Not visible without early console */
|
||||
if (!changed)
|
||||
panic("Didn't find an APIC driver");
|
||||
|
||||
printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
|
||||
}
|
||||
|
||||
@ -119,7 +117,9 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int hard_smp_processor_id(void)
|
||||
{
|
||||
return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
|
||||
}
|
||||
#endif
|
||||
|
@ -4,6 +4,7 @@
|
||||
#define APIC_DEFINITION 1
|
||||
#include <linux/threads.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/genapic.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
@ -157,21 +157,6 @@ static void __init find_max_pfn_node(int nid)
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* Find the owning node for a pfn. */
|
||||
int early_pfn_to_nid(unsigned long pfn)
|
||||
{
|
||||
int nid;
|
||||
|
||||
for_each_node(nid) {
|
||||
if (node_end_pfn[nid] == 0)
|
||||
break;
|
||||
if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn)
|
||||
return nid;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate memory for the pg_data_t for this node via a crude pre-bootmem
|
||||
* method. For node zero take this from the bottom of memory, for
|
||||
@ -227,6 +212,8 @@ static unsigned long calculate_numa_remap_pages(void)
|
||||
unsigned long pfn;
|
||||
|
||||
for_each_online_node(nid) {
|
||||
unsigned old_end_pfn = node_end_pfn[nid];
|
||||
|
||||
/*
|
||||
* The acpi/srat node info can show hot-add memroy zones
|
||||
* where memory could be added but not currently present.
|
||||
@ -276,6 +263,7 @@ static unsigned long calculate_numa_remap_pages(void)
|
||||
|
||||
node_end_pfn[nid] -= size;
|
||||
node_remap_start_pfn[nid] = node_end_pfn[nid];
|
||||
shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
|
||||
}
|
||||
printk("Reserving total of %ld pages for numa KVA remap\n",
|
||||
reserve_pages);
|
||||
@ -322,6 +310,11 @@ unsigned long __init setup_memory(void)
|
||||
highstart_pfn = system_max_low_pfn;
|
||||
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
|
||||
pages_to_mb(highend_pfn - highstart_pfn));
|
||||
num_physpages = highend_pfn;
|
||||
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
|
||||
#else
|
||||
num_physpages = system_max_low_pfn;
|
||||
high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1;
|
||||
#endif
|
||||
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
|
||||
pages_to_mb(system_max_low_pfn));
|
||||
@ -364,45 +357,22 @@ void __init numa_kva_reserve(void)
|
||||
void __init zone_sizes_init(void)
|
||||
{
|
||||
int nid;
|
||||
unsigned long max_zone_pfns[MAX_NR_ZONES] = {
|
||||
virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT,
|
||||
max_low_pfn,
|
||||
highend_pfn
|
||||
};
|
||||
|
||||
|
||||
for_each_online_node(nid) {
|
||||
unsigned long zones_size[MAX_NR_ZONES] = {0, };
|
||||
unsigned long *zholes_size;
|
||||
unsigned int max_dma;
|
||||
|
||||
unsigned long low = max_low_pfn;
|
||||
unsigned long start = node_start_pfn[nid];
|
||||
unsigned long high = node_end_pfn[nid];
|
||||
|
||||
max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
|
||||
|
||||
if (node_has_online_mem(nid)){
|
||||
if (start > low) {
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
BUG_ON(start > high);
|
||||
zones_size[ZONE_HIGHMEM] = high - start;
|
||||
#endif
|
||||
} else {
|
||||
if (low < max_dma)
|
||||
zones_size[ZONE_DMA] = low;
|
||||
else {
|
||||
BUG_ON(max_dma > low);
|
||||
BUG_ON(low > high);
|
||||
zones_size[ZONE_DMA] = max_dma;
|
||||
zones_size[ZONE_NORMAL] = low - max_dma;
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
zones_size[ZONE_HIGHMEM] = high - low;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
/* If SRAT has not registered memory, register it now */
|
||||
if (find_max_pfn_with_active_regions() == 0) {
|
||||
for_each_online_node(nid) {
|
||||
if (node_has_online_mem(nid))
|
||||
add_active_range(nid, node_start_pfn[nid],
|
||||
node_end_pfn[nid]);
|
||||
}
|
||||
|
||||
zholes_size = get_zholes_size(nid);
|
||||
|
||||
free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
|
||||
zholes_size);
|
||||
}
|
||||
|
||||
free_area_init_nodes(max_zone_pfns);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
|
||||
const struct exception_table_entry *fixup;
|
||||
|
||||
#ifdef CONFIG_PNPBIOS
|
||||
if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3)))
|
||||
if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs)))
|
||||
{
|
||||
extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
|
||||
extern u32 pnp_bios_is_utter_crap;
|
||||
|
@ -27,21 +27,24 @@
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/kdebug.h>
|
||||
#include <asm/segment.h>
|
||||
|
||||
extern void die(const char *,struct pt_regs *,long);
|
||||
|
||||
#ifdef CONFIG_KPROBES
|
||||
ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
|
||||
static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
|
||||
|
||||
int register_page_fault_notifier(struct notifier_block *nb)
|
||||
{
|
||||
vmalloc_sync_all();
|
||||
return atomic_notifier_chain_register(¬ify_page_fault_chain, nb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_page_fault_notifier);
|
||||
|
||||
int unregister_page_fault_notifier(struct notifier_block *nb)
|
||||
{
|
||||
return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
|
||||
|
||||
static inline int notify_page_fault(enum die_val val, const char *str,
|
||||
struct pt_regs *regs, long err, int trap, int sig)
|
||||
@ -55,14 +58,6 @@ static inline int notify_page_fault(enum die_val val, const char *str,
|
||||
};
|
||||
return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args);
|
||||
}
|
||||
#else
|
||||
static inline int notify_page_fault(enum die_val val, const char *str,
|
||||
struct pt_regs *regs, long err, int trap, int sig)
|
||||
{
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Unlock any spinlocks which will prevent us from getting the
|
||||
@ -119,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
|
||||
}
|
||||
|
||||
/* The standard kernel/user address space limit. */
|
||||
*eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;
|
||||
*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
|
||||
|
||||
/* By far the most common cases. */
|
||||
if (likely(seg == __USER_CS || seg == __KERNEL_CS))
|
||||
if (likely(SEGMENT_IS_FLAT_CODE(seg)))
|
||||
return eip;
|
||||
|
||||
/* Check the segment exists, is within the current LDT/GDT size,
|
||||
@ -436,11 +431,7 @@ good_area:
|
||||
write = 0;
|
||||
switch (error_code & 3) {
|
||||
default: /* 3: write, present */
|
||||
#ifdef TEST_VERIFY_AREA
|
||||
if (regs->cs == KERNEL_CS)
|
||||
printk("WP fault at %08lx\n", regs->eip);
|
||||
#endif
|
||||
/* fall through */
|
||||
/* fall through */
|
||||
case 2: /* write, not present */
|
||||
if (!(vma->vm_flags & VM_WRITE))
|
||||
goto bad_area;
|
||||
|
@ -54,7 +54,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
|
||||
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
|
||||
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
|
||||
|
||||
if (vaddr < FIXADDR_START) { // FIXME
|
||||
if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
|
||||
dec_preempt_count();
|
||||
preempt_check_resched();
|
||||
return;
|
||||
|
@ -435,16 +435,22 @@ u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
|
||||
* on Enable
|
||||
* off Disable
|
||||
*/
|
||||
void __init noexec_setup(const char *str)
|
||||
static int __init noexec_setup(char *str)
|
||||
{
|
||||
if (!strncmp(str, "on",2) && cpu_has_nx) {
|
||||
__supported_pte_mask |= _PAGE_NX;
|
||||
disable_nx = 0;
|
||||
} else if (!strncmp(str,"off",3)) {
|
||||
if (!str || !strcmp(str, "on")) {
|
||||
if (cpu_has_nx) {
|
||||
__supported_pte_mask |= _PAGE_NX;
|
||||
disable_nx = 0;
|
||||
}
|
||||
} else if (!strcmp(str,"off")) {
|
||||
disable_nx = 1;
|
||||
__supported_pte_mask &= ~_PAGE_NX;
|
||||
}
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("noexec", noexec_setup);
|
||||
|
||||
int nx_enabled = 0;
|
||||
#ifdef CONFIG_X86_PAE
|
||||
@ -552,18 +558,6 @@ static void __init test_wp_bit(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void __init set_max_mapnr_init(void)
|
||||
{
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
num_physpages = highend_pfn;
|
||||
#else
|
||||
num_physpages = max_low_pfn;
|
||||
#endif
|
||||
#ifdef CONFIG_FLATMEM
|
||||
max_mapnr = num_physpages;
|
||||
#endif
|
||||
}
|
||||
|
||||
static struct kcore_list kcore_mem, kcore_vmalloc;
|
||||
|
||||
void __init mem_init(void)
|
||||
@ -590,14 +584,6 @@ void __init mem_init(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
set_max_mapnr_init();
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
|
||||
#else
|
||||
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
|
||||
#endif
|
||||
|
||||
/* this will put all low memory onto the freelists */
|
||||
totalram_pages += free_all_bootmem();
|
||||
|
||||
|
@ -17,14 +17,15 @@
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/kdebug.h>
|
||||
|
||||
#include "op_counter.h"
|
||||
#include "op_x86_model.h"
|
||||
|
||||
|
||||
static struct op_x86_model_spec const * model;
|
||||
static struct op_msrs cpu_msrs[NR_CPUS];
|
||||
static unsigned long saved_lvtpc[NR_CPUS];
|
||||
|
||||
|
||||
static int nmi_start(void);
|
||||
static void nmi_stop(void);
|
||||
|
||||
@ -82,13 +83,24 @@ static void exit_driverfs(void)
|
||||
#define exit_driverfs() do { } while (0)
|
||||
#endif /* CONFIG_PM */
|
||||
|
||||
|
||||
static int nmi_callback(struct pt_regs * regs, int cpu)
|
||||
static int profile_exceptions_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
return model->check_ctrs(regs, &cpu_msrs[cpu]);
|
||||
struct die_args *args = (struct die_args *)data;
|
||||
int ret = NOTIFY_DONE;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
switch(val) {
|
||||
case DIE_NMI:
|
||||
if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
|
||||
ret = NOTIFY_STOP;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void nmi_cpu_save_registers(struct op_msrs * msrs)
|
||||
{
|
||||
unsigned int const nr_ctrs = model->num_counters;
|
||||
@ -98,15 +110,19 @@ static void nmi_cpu_save_registers(struct op_msrs * msrs)
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < nr_ctrs; ++i) {
|
||||
rdmsr(counters[i].addr,
|
||||
counters[i].saved.low,
|
||||
counters[i].saved.high);
|
||||
if (counters[i].addr){
|
||||
rdmsr(counters[i].addr,
|
||||
counters[i].saved.low,
|
||||
counters[i].saved.high);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_ctrls; ++i) {
|
||||
rdmsr(controls[i].addr,
|
||||
controls[i].saved.low,
|
||||
controls[i].saved.high);
|
||||
if (controls[i].addr){
|
||||
rdmsr(controls[i].addr,
|
||||
controls[i].saved.low,
|
||||
controls[i].saved.high);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -170,27 +186,29 @@ static void nmi_cpu_setup(void * dummy)
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
}
|
||||
|
||||
static struct notifier_block profile_exceptions_nb = {
|
||||
.notifier_call = profile_exceptions_notify,
|
||||
.next = NULL,
|
||||
.priority = 0
|
||||
};
|
||||
|
||||
static int nmi_setup(void)
|
||||
{
|
||||
int err=0;
|
||||
|
||||
if (!allocate_msrs())
|
||||
return -ENOMEM;
|
||||
|
||||
/* We walk a thin line between law and rape here.
|
||||
* We need to be careful to install our NMI handler
|
||||
* without actually triggering any NMIs as this will
|
||||
* break the core code horrifically.
|
||||
*/
|
||||
if (reserve_lapic_nmi() < 0) {
|
||||
if ((err = register_die_notifier(&profile_exceptions_nb))){
|
||||
free_msrs();
|
||||
return -EBUSY;
|
||||
return err;
|
||||
}
|
||||
|
||||
/* We need to serialize save and setup for HT because the subset
|
||||
* of msrs are distinct for save and setup operations
|
||||
*/
|
||||
on_each_cpu(nmi_save_registers, NULL, 0, 1);
|
||||
on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
|
||||
set_nmi_callback(nmi_callback);
|
||||
nmi_enabled = 1;
|
||||
return 0;
|
||||
}
|
||||
@ -205,15 +223,19 @@ static void nmi_restore_registers(struct op_msrs * msrs)
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < nr_ctrls; ++i) {
|
||||
wrmsr(controls[i].addr,
|
||||
controls[i].saved.low,
|
||||
controls[i].saved.high);
|
||||
if (controls[i].addr){
|
||||
wrmsr(controls[i].addr,
|
||||
controls[i].saved.low,
|
||||
controls[i].saved.high);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_ctrs; ++i) {
|
||||
wrmsr(counters[i].addr,
|
||||
counters[i].saved.low,
|
||||
counters[i].saved.high);
|
||||
if (counters[i].addr){
|
||||
wrmsr(counters[i].addr,
|
||||
counters[i].saved.low,
|
||||
counters[i].saved.high);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -234,6 +256,7 @@ static void nmi_cpu_shutdown(void * dummy)
|
||||
apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
|
||||
apic_write(APIC_LVTERR, v);
|
||||
nmi_restore_registers(msrs);
|
||||
model->shutdown(msrs);
|
||||
}
|
||||
|
||||
|
||||
@ -241,8 +264,7 @@ static void nmi_shutdown(void)
|
||||
{
|
||||
nmi_enabled = 0;
|
||||
on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
|
||||
unset_nmi_callback();
|
||||
release_lapic_nmi();
|
||||
unregister_die_notifier(&profile_exceptions_nb);
|
||||
free_msrs();
|
||||
}
|
||||
|
||||
@ -284,6 +306,14 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
|
||||
struct dentry * dir;
|
||||
char buf[4];
|
||||
|
||||
/* quick little hack to _not_ expose a counter if it is not
|
||||
* available for use. This should protect userspace app.
|
||||
* NOTE: assumes 1:1 mapping here (that counters are organized
|
||||
* sequentially in their struct assignment).
|
||||
*/
|
||||
if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
|
||||
continue;
|
||||
|
||||
snprintf(buf, sizeof(buf), "%d", i);
|
||||
dir = oprofilefs_mkdir(sb, root, buf);
|
||||
oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
|
||||
|
@ -17,34 +17,49 @@
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/kdebug.h>
|
||||
|
||||
static int nmi_timer_callback(struct pt_regs * regs, int cpu)
|
||||
static int profile_timer_exceptions_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
oprofile_add_sample(regs, 0);
|
||||
return 1;
|
||||
struct die_args *args = (struct die_args *)data;
|
||||
int ret = NOTIFY_DONE;
|
||||
|
||||
switch(val) {
|
||||
case DIE_NMI:
|
||||
oprofile_add_sample(args->regs, 0);
|
||||
ret = NOTIFY_STOP;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct notifier_block profile_timer_exceptions_nb = {
|
||||
.notifier_call = profile_timer_exceptions_notify,
|
||||
.next = NULL,
|
||||
.priority = 0
|
||||
};
|
||||
|
||||
static int timer_start(void)
|
||||
{
|
||||
disable_timer_nmi_watchdog();
|
||||
set_nmi_callback(nmi_timer_callback);
|
||||
if (register_die_notifier(&profile_timer_exceptions_nb))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void timer_stop(void)
|
||||
{
|
||||
enable_timer_nmi_watchdog();
|
||||
unset_nmi_callback();
|
||||
unregister_die_notifier(&profile_timer_exceptions_nb);
|
||||
synchronize_sched(); /* Allow already-started NMIs to complete. */
|
||||
}
|
||||
|
||||
|
||||
int __init op_nmi_timer_init(struct oprofile_operations * ops)
|
||||
{
|
||||
extern int nmi_active;
|
||||
|
||||
if (nmi_active <= 0)
|
||||
if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
|
||||
return -ENODEV;
|
||||
|
||||
ops->start = timer_start;
|
||||
|
@ -21,10 +21,12 @@
|
||||
#define NUM_COUNTERS 4
|
||||
#define NUM_CONTROLS 4
|
||||
|
||||
#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
|
||||
#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
|
||||
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
|
||||
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
|
||||
|
||||
#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
|
||||
#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
|
||||
#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
|
||||
#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
|
||||
@ -40,15 +42,21 @@ static unsigned long reset_value[NUM_COUNTERS];
|
||||
|
||||
static void athlon_fill_in_addresses(struct op_msrs * const msrs)
|
||||
{
|
||||
msrs->counters[0].addr = MSR_K7_PERFCTR0;
|
||||
msrs->counters[1].addr = MSR_K7_PERFCTR1;
|
||||
msrs->counters[2].addr = MSR_K7_PERFCTR2;
|
||||
msrs->counters[3].addr = MSR_K7_PERFCTR3;
|
||||
int i;
|
||||
|
||||
msrs->controls[0].addr = MSR_K7_EVNTSEL0;
|
||||
msrs->controls[1].addr = MSR_K7_EVNTSEL1;
|
||||
msrs->controls[2].addr = MSR_K7_EVNTSEL2;
|
||||
msrs->controls[3].addr = MSR_K7_EVNTSEL3;
|
||||
for (i=0; i < NUM_COUNTERS; i++) {
|
||||
if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
|
||||
msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
|
||||
else
|
||||
msrs->counters[i].addr = 0;
|
||||
}
|
||||
|
||||
for (i=0; i < NUM_CONTROLS; i++) {
|
||||
if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
|
||||
msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
|
||||
else
|
||||
msrs->controls[i].addr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -59,19 +67,23 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs)
|
||||
|
||||
/* clear all counters */
|
||||
for (i = 0 ; i < NUM_CONTROLS; ++i) {
|
||||
if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
|
||||
continue;
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_CLEAR(low);
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
}
|
||||
|
||||
|
||||
/* avoid a false detection of ctr overflows in NMI handler */
|
||||
for (i = 0; i < NUM_COUNTERS; ++i) {
|
||||
if (unlikely(!CTR_IS_RESERVED(msrs,i)))
|
||||
continue;
|
||||
CTR_WRITE(1, msrs, i);
|
||||
}
|
||||
|
||||
/* enable active counters */
|
||||
for (i = 0; i < NUM_COUNTERS; ++i) {
|
||||
if (counter_config[i].enabled) {
|
||||
if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
|
||||
reset_value[i] = counter_config[i].count;
|
||||
|
||||
CTR_WRITE(counter_config[i].count, msrs, i);
|
||||
@ -98,6 +110,8 @@ static int athlon_check_ctrs(struct pt_regs * const regs,
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < NUM_COUNTERS; ++i) {
|
||||
if (!reset_value[i])
|
||||
continue;
|
||||
CTR_READ(low, high, msrs, i);
|
||||
if (CTR_OVERFLOWED(low)) {
|
||||
oprofile_add_sample(regs, i);
|
||||
@ -132,12 +146,27 @@ static void athlon_stop(struct op_msrs const * const msrs)
|
||||
/* Subtle: stop on all counters to avoid race with
|
||||
* setting our pm callback */
|
||||
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
|
||||
if (!reset_value[i])
|
||||
continue;
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_SET_INACTIVE(low);
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
}
|
||||
}
|
||||
|
||||
static void athlon_shutdown(struct op_msrs const * const msrs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
|
||||
if (CTR_IS_RESERVED(msrs,i))
|
||||
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
|
||||
}
|
||||
for (i = 0 ; i < NUM_CONTROLS ; ++i) {
|
||||
if (CTRL_IS_RESERVED(msrs,i))
|
||||
release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
|
||||
}
|
||||
}
|
||||
|
||||
struct op_x86_model_spec const op_athlon_spec = {
|
||||
.num_counters = NUM_COUNTERS,
|
||||
@ -146,5 +175,6 @@ struct op_x86_model_spec const op_athlon_spec = {
|
||||
.setup_ctrs = &athlon_setup_ctrs,
|
||||
.check_ctrs = &athlon_check_ctrs,
|
||||
.start = &athlon_start,
|
||||
.stop = &athlon_stop
|
||||
.stop = &athlon_stop,
|
||||
.shutdown = &athlon_shutdown
|
||||
};
|
||||
|
@ -32,7 +32,7 @@
|
||||
#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
|
||||
|
||||
static unsigned int num_counters = NUM_COUNTERS_NON_HT;
|
||||
|
||||
static unsigned int num_controls = NUM_CONTROLS_NON_HT;
|
||||
|
||||
/* this has to be checked dynamically since the
|
||||
hyper-threadedness of a chip is discovered at
|
||||
@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_COUNTERS_NON_HT;
|
||||
static inline void setup_num_counters(void)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
if (smp_num_siblings == 2)
|
||||
if (smp_num_siblings == 2){
|
||||
num_counters = NUM_COUNTERS_HT2;
|
||||
num_controls = NUM_CONTROLS_HT2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -97,15 +99,6 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
|
||||
|
||||
#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
|
||||
|
||||
/* All cccr we don't use. */
|
||||
static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
|
||||
MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
|
||||
MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3,
|
||||
MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
|
||||
MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1,
|
||||
MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3
|
||||
};
|
||||
|
||||
/* p4 event codes in libop/op_event.h are indices into this table. */
|
||||
|
||||
static struct p4_event_binding p4_events[NUM_EVENTS] = {
|
||||
@ -372,6 +365,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
|
||||
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
|
||||
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
|
||||
|
||||
#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
|
||||
#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
|
||||
#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
|
||||
#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
|
||||
#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
|
||||
@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT];
|
||||
static void p4_fill_in_addresses(struct op_msrs * const msrs)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int addr, stag;
|
||||
unsigned int addr, cccraddr, stag;
|
||||
|
||||
setup_num_counters();
|
||||
stag = get_stagger();
|
||||
|
||||
/* the counter registers we pay attention to */
|
||||
/* initialize some registers */
|
||||
for (i = 0; i < num_counters; ++i) {
|
||||
msrs->counters[i].addr =
|
||||
p4_counters[VIRT_CTR(stag, i)].counter_address;
|
||||
msrs->counters[i].addr = 0;
|
||||
}
|
||||
|
||||
/* FIXME: bad feeling, we don't save the 10 counters we don't use. */
|
||||
|
||||
/* 18 CCCR registers */
|
||||
for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
|
||||
addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
|
||||
msrs->controls[i].addr = addr;
|
||||
for (i = 0; i < num_controls; ++i) {
|
||||
msrs->controls[i].addr = 0;
|
||||
}
|
||||
|
||||
/* the counter & cccr registers we pay attention to */
|
||||
for (i = 0; i < num_counters; ++i) {
|
||||
addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
|
||||
cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
|
||||
if (reserve_perfctr_nmi(addr)){
|
||||
msrs->counters[i].addr = addr;
|
||||
msrs->controls[i].addr = cccraddr;
|
||||
}
|
||||
}
|
||||
|
||||
/* 43 ESCR registers in three or four discontiguous group */
|
||||
for (addr = MSR_P4_BSU_ESCR0 + stag;
|
||||
addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
|
||||
msrs->controls[i].addr = addr;
|
||||
if (reserve_evntsel_nmi(addr))
|
||||
msrs->controls[i].addr = addr;
|
||||
}
|
||||
|
||||
/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
|
||||
@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
|
||||
if (boot_cpu_data.x86_model >= 0x3) {
|
||||
for (addr = MSR_P4_BSU_ESCR0 + stag;
|
||||
addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
|
||||
msrs->controls[i].addr = addr;
|
||||
if (reserve_evntsel_nmi(addr))
|
||||
msrs->controls[i].addr = addr;
|
||||
}
|
||||
} else {
|
||||
for (addr = MSR_P4_IQ_ESCR0 + stag;
|
||||
addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
|
||||
msrs->controls[i].addr = addr;
|
||||
if (reserve_evntsel_nmi(addr))
|
||||
msrs->controls[i].addr = addr;
|
||||
}
|
||||
}
|
||||
|
||||
for (addr = MSR_P4_RAT_ESCR0 + stag;
|
||||
addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
|
||||
msrs->controls[i].addr = addr;
|
||||
if (reserve_evntsel_nmi(addr))
|
||||
msrs->controls[i].addr = addr;
|
||||
}
|
||||
|
||||
for (addr = MSR_P4_MS_ESCR0 + stag;
|
||||
addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
|
||||
msrs->controls[i].addr = addr;
|
||||
if (reserve_evntsel_nmi(addr))
|
||||
msrs->controls[i].addr = addr;
|
||||
}
|
||||
|
||||
for (addr = MSR_P4_IX_ESCR0 + stag;
|
||||
addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
|
||||
msrs->controls[i].addr = addr;
|
||||
if (reserve_evntsel_nmi(addr))
|
||||
msrs->controls[i].addr = addr;
|
||||
}
|
||||
|
||||
/* there are 2 remaining non-contiguously located ESCRs */
|
||||
|
||||
if (num_counters == NUM_COUNTERS_NON_HT) {
|
||||
/* standard non-HT CPUs handle both remaining ESCRs*/
|
||||
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
|
||||
msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
|
||||
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
|
||||
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
|
||||
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
|
||||
msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
|
||||
|
||||
} else if (stag == 0) {
|
||||
/* HT CPUs give the first remainder to the even thread, as
|
||||
the 32nd control register */
|
||||
msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
|
||||
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
|
||||
msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
|
||||
|
||||
} else {
|
||||
/* and two copies of the second to the odd thread,
|
||||
for the 22st and 23nd control registers */
|
||||
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
|
||||
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
|
||||
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
|
||||
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
|
||||
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int low, high;
|
||||
unsigned int addr;
|
||||
unsigned int stag;
|
||||
|
||||
stag = get_stagger();
|
||||
@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
|
||||
|
||||
/* clear the cccrs we will use */
|
||||
for (i = 0 ; i < num_counters ; i++) {
|
||||
if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
|
||||
continue;
|
||||
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
|
||||
CCCR_CLEAR(low);
|
||||
CCCR_SET_REQUIRED_BITS(low);
|
||||
wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
|
||||
}
|
||||
|
||||
/* clear cccrs outside our concern */
|
||||
for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
|
||||
rdmsr(p4_unused_cccr[i], low, high);
|
||||
CCCR_CLEAR(low);
|
||||
CCCR_SET_REQUIRED_BITS(low);
|
||||
wrmsr(p4_unused_cccr[i], low, high);
|
||||
}
|
||||
|
||||
/* clear all escrs (including those outside our concern) */
|
||||
for (addr = MSR_P4_BSU_ESCR0 + stag;
|
||||
addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) {
|
||||
wrmsr(addr, 0, 0);
|
||||
for (i = num_counters; i < num_controls; i++) {
|
||||
if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
|
||||
continue;
|
||||
wrmsr(msrs->controls[i].addr, 0, 0);
|
||||
}
|
||||
|
||||
/* On older models clear also MSR_P4_IQ_ESCR0/1 */
|
||||
if (boot_cpu_data.x86_model < 0x3) {
|
||||
wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
|
||||
wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
|
||||
}
|
||||
|
||||
for (addr = MSR_P4_RAT_ESCR0 + stag;
|
||||
addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
|
||||
wrmsr(addr, 0, 0);
|
||||
}
|
||||
|
||||
for (addr = MSR_P4_MS_ESCR0 + stag;
|
||||
addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){
|
||||
wrmsr(addr, 0, 0);
|
||||
}
|
||||
|
||||
for (addr = MSR_P4_IX_ESCR0 + stag;
|
||||
addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){
|
||||
wrmsr(addr, 0, 0);
|
||||
}
|
||||
|
||||
if (num_counters == NUM_COUNTERS_NON_HT) {
|
||||
wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
|
||||
wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
|
||||
} else if (stag == 0) {
|
||||
wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
|
||||
} else {
|
||||
wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
|
||||
}
|
||||
|
||||
/* setup all counters */
|
||||
for (i = 0 ; i < num_counters ; ++i) {
|
||||
if (counter_config[i].enabled) {
|
||||
if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
|
||||
reset_value[i] = counter_config[i].count;
|
||||
pmc_setup_one_p4_counter(i);
|
||||
CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
|
||||
@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const * const msrs)
|
||||
stag = get_stagger();
|
||||
|
||||
for (i = 0; i < num_counters; ++i) {
|
||||
if (!reset_value[i])
|
||||
continue;
|
||||
CCCR_READ(low, high, VIRT_CTR(stag, i));
|
||||
CCCR_SET_DISABLE(low);
|
||||
CCCR_WRITE(low, high, VIRT_CTR(stag, i));
|
||||
}
|
||||
}
|
||||
|
||||
static void p4_shutdown(struct op_msrs const * const msrs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < num_counters ; ++i) {
|
||||
if (CTR_IS_RESERVED(msrs,i))
|
||||
release_perfctr_nmi(msrs->counters[i].addr);
|
||||
}
|
||||
/* some of the control registers are specially reserved in
|
||||
* conjunction with the counter registers (hence the starting offset).
|
||||
* This saves a few bits.
|
||||
*/
|
||||
for (i = num_counters ; i < num_controls ; ++i) {
|
||||
if (CTRL_IS_RESERVED(msrs,i))
|
||||
release_evntsel_nmi(msrs->controls[i].addr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
struct op_x86_model_spec const op_p4_ht2_spec = {
|
||||
@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = {
|
||||
.setup_ctrs = &p4_setup_ctrs,
|
||||
.check_ctrs = &p4_check_ctrs,
|
||||
.start = &p4_start,
|
||||
.stop = &p4_stop
|
||||
.stop = &p4_stop,
|
||||
.shutdown = &p4_shutdown
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spec = {
|
||||
.setup_ctrs = &p4_setup_ctrs,
|
||||
.check_ctrs = &p4_check_ctrs,
|
||||
.start = &p4_start,
|
||||
.stop = &p4_stop
|
||||
.stop = &p4_stop,
|
||||
.shutdown = &p4_shutdown
|
||||
};
|
||||
|
@ -22,10 +22,12 @@
|
||||
#define NUM_COUNTERS 2
|
||||
#define NUM_CONTROLS 2
|
||||
|
||||
#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
|
||||
#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
|
||||
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
|
||||
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
|
||||
|
||||
#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
|
||||
#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
|
||||
#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
|
||||
#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
|
||||
@ -41,11 +43,21 @@ static unsigned long reset_value[NUM_COUNTERS];
|
||||
|
||||
static void ppro_fill_in_addresses(struct op_msrs * const msrs)
|
||||
{
|
||||
msrs->counters[0].addr = MSR_P6_PERFCTR0;
|
||||
msrs->counters[1].addr = MSR_P6_PERFCTR1;
|
||||
int i;
|
||||
|
||||
for (i=0; i < NUM_COUNTERS; i++) {
|
||||
if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
|
||||
msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
|
||||
else
|
||||
msrs->counters[i].addr = 0;
|
||||
}
|
||||
|
||||
msrs->controls[0].addr = MSR_P6_EVNTSEL0;
|
||||
msrs->controls[1].addr = MSR_P6_EVNTSEL1;
|
||||
for (i=0; i < NUM_CONTROLS; i++) {
|
||||
if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
|
||||
msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
|
||||
else
|
||||
msrs->controls[i].addr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -56,6 +68,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
|
||||
|
||||
/* clear all counters */
|
||||
for (i = 0 ; i < NUM_CONTROLS; ++i) {
|
||||
if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
|
||||
continue;
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_CLEAR(low);
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
@ -63,12 +77,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
|
||||
|
||||
/* avoid a false detection of ctr overflows in NMI handler */
|
||||
for (i = 0; i < NUM_COUNTERS; ++i) {
|
||||
if (unlikely(!CTR_IS_RESERVED(msrs,i)))
|
||||
continue;
|
||||
CTR_WRITE(1, msrs, i);
|
||||
}
|
||||
|
||||
/* enable active counters */
|
||||
for (i = 0; i < NUM_COUNTERS; ++i) {
|
||||
if (counter_config[i].enabled) {
|
||||
if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
|
||||
reset_value[i] = counter_config[i].count;
|
||||
|
||||
CTR_WRITE(counter_config[i].count, msrs, i);
|
||||
@ -81,6 +97,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
|
||||
CTRL_SET_UM(low, counter_config[i].unit_mask);
|
||||
CTRL_SET_EVENT(low, counter_config[i].event);
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
} else {
|
||||
reset_value[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -93,6 +111,8 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < NUM_COUNTERS; ++i) {
|
||||
if (!reset_value[i])
|
||||
continue;
|
||||
CTR_READ(low, high, msrs, i);
|
||||
if (CTR_OVERFLOWED(low)) {
|
||||
oprofile_add_sample(regs, i);
|
||||
@ -118,18 +138,38 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
|
||||
static void ppro_start(struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low,high;
|
||||
CTRL_READ(low, high, msrs, 0);
|
||||
CTRL_SET_ACTIVE(low);
|
||||
CTRL_WRITE(low, high, msrs, 0);
|
||||
|
||||
if (reset_value[0]) {
|
||||
CTRL_READ(low, high, msrs, 0);
|
||||
CTRL_SET_ACTIVE(low);
|
||||
CTRL_WRITE(low, high, msrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void ppro_stop(struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low,high;
|
||||
CTRL_READ(low, high, msrs, 0);
|
||||
CTRL_SET_INACTIVE(low);
|
||||
CTRL_WRITE(low, high, msrs, 0);
|
||||
|
||||
if (reset_value[0]) {
|
||||
CTRL_READ(low, high, msrs, 0);
|
||||
CTRL_SET_INACTIVE(low);
|
||||
CTRL_WRITE(low, high, msrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void ppro_shutdown(struct op_msrs const * const msrs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
|
||||
if (CTR_IS_RESERVED(msrs,i))
|
||||
release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
|
||||
}
|
||||
for (i = 0 ; i < NUM_CONTROLS ; ++i) {
|
||||
if (CTRL_IS_RESERVED(msrs,i))
|
||||
release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -140,5 +180,6 @@ struct op_x86_model_spec const op_ppro_spec = {
|
||||
.setup_ctrs = &ppro_setup_ctrs,
|
||||
.check_ctrs = &ppro_check_ctrs,
|
||||
.start = &ppro_start,
|
||||
.stop = &ppro_stop
|
||||
.stop = &ppro_stop,
|
||||
.shutdown = &ppro_shutdown
|
||||
};
|
||||
|
@ -40,6 +40,7 @@ struct op_x86_model_spec {
|
||||
struct op_msrs const * const msrs);
|
||||
void (*start)(struct op_msrs const * const msrs);
|
||||
void (*stop)(struct op_msrs const * const msrs);
|
||||
void (*shutdown)(struct op_msrs const * const msrs);
|
||||
};
|
||||
|
||||
extern struct op_x86_model_spec const op_ppro_spec;
|
||||
|
@ -11,4 +11,4 @@ pci-y += legacy.o irq.o
|
||||
pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
|
||||
pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o
|
||||
|
||||
obj-y += $(pci-y) common.o
|
||||
obj-y += $(pci-y) common.o early.o
|
||||
|
@ -242,6 +242,10 @@ char * __devinit pcibios_setup(char *str)
|
||||
acpi_noirq_set();
|
||||
return NULL;
|
||||
}
|
||||
else if (!strcmp(str, "noearly")) {
|
||||
pci_probe |= PCI_PROBE_NOEARLY;
|
||||
return NULL;
|
||||
}
|
||||
#ifndef CONFIG_X86_VISWS
|
||||
else if (!strcmp(str, "usepirqmask")) {
|
||||
pci_probe |= PCI_USE_PIRQ_MASK;
|
||||
|
@ -254,7 +254,16 @@ static int __init pci_check_type2(void)
|
||||
return works;
|
||||
}
|
||||
|
||||
void __init pci_direct_init(void)
|
||||
void __init pci_direct_init(int type)
|
||||
{
|
||||
printk(KERN_INFO "PCI: Using configuration type %d\n", type);
|
||||
if (type == 1)
|
||||
raw_pci_ops = &pci_direct_conf1;
|
||||
else
|
||||
raw_pci_ops = &pci_direct_conf2;
|
||||
}
|
||||
|
||||
int __init pci_direct_probe(void)
|
||||
{
|
||||
struct resource *region, *region2;
|
||||
|
||||
@ -264,19 +273,16 @@ void __init pci_direct_init(void)
|
||||
if (!region)
|
||||
goto type2;
|
||||
|
||||
if (pci_check_type1()) {
|
||||
printk(KERN_INFO "PCI: Using configuration type 1\n");
|
||||
raw_pci_ops = &pci_direct_conf1;
|
||||
return;
|
||||
}
|
||||
if (pci_check_type1())
|
||||
return 1;
|
||||
release_resource(region);
|
||||
|
||||
type2:
|
||||
if ((pci_probe & PCI_PROBE_CONF2) == 0)
|
||||
return;
|
||||
return 0;
|
||||
region = request_region(0xCF8, 4, "PCI conf2");
|
||||
if (!region)
|
||||
return;
|
||||
return 0;
|
||||
region2 = request_region(0xC000, 0x1000, "PCI conf2");
|
||||
if (!region2)
|
||||
goto fail2;
|
||||
@ -284,10 +290,11 @@ void __init pci_direct_init(void)
|
||||
if (pci_check_type2()) {
|
||||
printk(KERN_INFO "PCI: Using configuration type 2\n");
|
||||
raw_pci_ops = &pci_direct_conf2;
|
||||
return;
|
||||
return 2;
|
||||
}
|
||||
|
||||
release_resource(region2);
|
||||
fail2:
|
||||
release_resource(region);
|
||||
return 0;
|
||||
}
|
||||
|
52
arch/i386/pci/early.c
Normal file
52
arch/i386/pci/early.c
Normal file
@ -0,0 +1,52 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/pci.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/io.h>
|
||||
#include "pci.h"
|
||||
|
||||
/* Direct PCI access. This is used for PCI accesses in early boot before
|
||||
the PCI subsystem works. */
|
||||
|
||||
#define PDprintk(x...)
|
||||
|
||||
u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
|
||||
{
|
||||
u32 v;
|
||||
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
|
||||
v = inl(0xcfc);
|
||||
if (v != 0xffffffff)
|
||||
PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
|
||||
return v;
|
||||
}
|
||||
|
||||
u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
|
||||
{
|
||||
u8 v;
|
||||
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
|
||||
v = inb(0xcfc + (offset&3));
|
||||
PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
|
||||
return v;
|
||||
}
|
||||
|
||||
u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
|
||||
{
|
||||
u16 v;
|
||||
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
|
||||
v = inw(0xcfc + (offset&2));
|
||||
PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
|
||||
return v;
|
||||
}
|
||||
|
||||
void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
|
||||
u32 val)
|
||||
{
|
||||
PDprintk("%x writing to %x: %x\n", slot, offset, val);
|
||||
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
|
||||
outl(val, 0xcfc);
|
||||
}
|
||||
|
||||
int early_pci_allowed(void)
|
||||
{
|
||||
return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) ==
|
||||
PCI_PROBE_CONF1;
|
||||
}
|
@ -6,8 +6,13 @@
|
||||
in the right sequence from here. */
|
||||
static __init int pci_access_init(void)
|
||||
{
|
||||
int type = 0;
|
||||
|
||||
#ifdef CONFIG_PCI_DIRECT
|
||||
type = pci_direct_probe();
|
||||
#endif
|
||||
#ifdef CONFIG_PCI_MMCONFIG
|
||||
pci_mmcfg_init();
|
||||
pci_mmcfg_init(type);
|
||||
#endif
|
||||
if (raw_pci_ops)
|
||||
return 0;
|
||||
@ -21,7 +26,7 @@ static __init int pci_access_init(void)
|
||||
* fails.
|
||||
*/
|
||||
#ifdef CONFIG_PCI_DIRECT
|
||||
pci_direct_init();
|
||||
pci_direct_init(type);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
@ -151,6 +151,38 @@ static struct pci_raw_ops pci_mmcfg = {
|
||||
.write = pci_mmcfg_write,
|
||||
};
|
||||
|
||||
|
||||
static __init void pci_mmcfg_insert_resources(void)
|
||||
{
|
||||
#define PCI_MMCFG_RESOURCE_NAME_LEN 19
|
||||
int i;
|
||||
struct resource *res;
|
||||
char *names;
|
||||
unsigned num_buses;
|
||||
|
||||
res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
|
||||
pci_mmcfg_config_num, GFP_KERNEL);
|
||||
|
||||
if (!res) {
|
||||
printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
|
||||
return;
|
||||
}
|
||||
|
||||
names = (void *)&res[pci_mmcfg_config_num];
|
||||
for (i = 0; i < pci_mmcfg_config_num; i++, res++) {
|
||||
num_buses = pci_mmcfg_config[i].end_bus_number -
|
||||
pci_mmcfg_config[i].start_bus_number + 1;
|
||||
res->name = names;
|
||||
snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
|
||||
pci_mmcfg_config[i].pci_segment_group_number);
|
||||
res->start = pci_mmcfg_config[i].base_address;
|
||||
res->end = res->start + (num_buses << 20) - 1;
|
||||
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
||||
insert_resource(&iomem_resource, res);
|
||||
names += PCI_MMCFG_RESOURCE_NAME_LEN;
|
||||
}
|
||||
}
|
||||
|
||||
/* K8 systems have some devices (typically in the builtin northbridge)
|
||||
that are only accessible using type1
|
||||
Normally this can be expressed in the MCFG by not listing them
|
||||
@ -187,7 +219,9 @@ static __init void unreachable_devices(void)
|
||||
}
|
||||
}
|
||||
|
||||
void __init pci_mmcfg_init(void)
|
||||
|
||||
|
||||
void __init pci_mmcfg_init(int type)
|
||||
{
|
||||
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
|
||||
return;
|
||||
@ -198,7 +232,9 @@ void __init pci_mmcfg_init(void)
|
||||
(pci_mmcfg_config[0].base_address == 0))
|
||||
return;
|
||||
|
||||
if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
|
||||
/* Only do this check when type 1 works. If it doesn't work
|
||||
assume we run on a Mac and always use MCFG */
|
||||
if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].base_address,
|
||||
pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
|
||||
E820_RESERVED)) {
|
||||
printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
|
||||
@ -212,4 +248,5 @@ void __init pci_mmcfg_init(void)
|
||||
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
|
||||
|
||||
unreachable_devices();
|
||||
pci_mmcfg_insert_resources();
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
#define PCI_PROBE_CONF2 0x0004
|
||||
#define PCI_PROBE_MMCONF 0x0008
|
||||
#define PCI_PROBE_MASK 0x000f
|
||||
#define PCI_PROBE_NOEARLY 0x0010
|
||||
|
||||
#define PCI_NO_SORT 0x0100
|
||||
#define PCI_BIOS_SORT 0x0200
|
||||
@ -81,7 +82,9 @@ extern int pci_conf1_write(unsigned int seg, unsigned int bus,
|
||||
extern int pci_conf1_read(unsigned int seg, unsigned int bus,
|
||||
unsigned int devfn, int reg, int len, u32 *value);
|
||||
|
||||
extern void pci_direct_init(void);
|
||||
extern int pci_direct_probe(void);
|
||||
extern void pci_direct_init(int type);
|
||||
extern void pci_pcbios_init(void);
|
||||
extern void pci_mmcfg_init(void);
|
||||
extern void pci_mmcfg_init(int type);
|
||||
extern void pcibios_sort(void);
|
||||
|
||||
|
@ -356,6 +356,9 @@ config NODES_SHIFT
|
||||
MAX_NUMNODES will be 2^(This value).
|
||||
If in doubt, use the default.
|
||||
|
||||
config ARCH_POPULATES_NODE_MAP
|
||||
def_bool y
|
||||
|
||||
# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
|
||||
# VIRTUAL_MEM_MAP has been retained for historical reasons.
|
||||
config VIRTUAL_MEM_MAP
|
||||
@ -420,6 +423,14 @@ config IA64_PALINFO
|
||||
config SGI_SN
|
||||
def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
|
||||
|
||||
config IA64_ESI
|
||||
bool "ESI (Extensible SAL Interface) support"
|
||||
help
|
||||
If you say Y here, support is built into the kernel to
|
||||
make ESI calls. ESI calls are used to support vendor-specific
|
||||
firmware extensions, such as the ability to inject memory-errors
|
||||
for test-purposes. If you're unsure, say N.
|
||||
|
||||
source "drivers/sn/Kconfig"
|
||||
|
||||
source "drivers/firmware/Kconfig"
|
||||
|
@ -1942,7 +1942,7 @@ struct sysctl32 {
|
||||
unsigned int __unused[4];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
#ifdef CONFIG_SYSCTL_SYSCALL
|
||||
asmlinkage long
|
||||
sys32_sysctl (struct sysctl32 __user *args)
|
||||
{
|
||||
|
@ -32,6 +32,11 @@ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
|
||||
obj-$(CONFIG_AUDIT) += audit.o
|
||||
mca_recovery-y += mca_drv.o mca_drv_asm.o
|
||||
|
||||
obj-$(CONFIG_IA64_ESI) += esi.o
|
||||
ifneq ($(CONFIG_IA64_ESI),)
|
||||
obj-y += esi_stub.o # must be in kernel proper
|
||||
endif
|
||||
|
||||
# The gate DSO image is built using a special linker script.
|
||||
targets += gate.so gate-syms.o
|
||||
|
||||
|
@ -1605,8 +1605,8 @@ sys_call_table:
|
||||
data8 sys_ni_syscall // 1295 reserved for ppoll
|
||||
data8 sys_unshare
|
||||
data8 sys_splice
|
||||
data8 sys_ni_syscall // reserved for set_robust_list
|
||||
data8 sys_ni_syscall // reserved for get_robust_list
|
||||
data8 sys_set_robust_list
|
||||
data8 sys_get_robust_list
|
||||
data8 sys_sync_file_range // 1300
|
||||
data8 sys_tee
|
||||
data8 sys_vmsplice
|
||||
|
205
arch/ia64/kernel/esi.c
Normal file
205
arch/ia64/kernel/esi.c
Normal file
@ -0,0 +1,205 @@
|
||||
/*
|
||||
* Extensible SAL Interface (ESI) support routines.
|
||||
*
|
||||
* Copyright (C) 2006 Hewlett-Packard Co
|
||||
* Alex Williamson <alex.williamson@hp.com>
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <asm/esi.h>
|
||||
#include <asm/sal.h>
|
||||
|
||||
MODULE_AUTHOR("Alex Williamson <alex.williamson@hp.com>");
|
||||
MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
#define MODULE_NAME "esi"
|
||||
|
||||
#define ESI_TABLE_GUID \
|
||||
EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3, \
|
||||
0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4)
|
||||
|
||||
enum esi_systab_entry_type {
|
||||
ESI_DESC_ENTRY_POINT = 0
|
||||
};
|
||||
|
||||
/*
|
||||
* Entry type: Size:
|
||||
* 0 48
|
||||
*/
|
||||
#define ESI_DESC_SIZE(type) "\060"[(unsigned) (type)]
|
||||
|
||||
typedef struct ia64_esi_desc_entry_point {
|
||||
u8 type;
|
||||
u8 reserved1[15];
|
||||
u64 esi_proc;
|
||||
u64 gp;
|
||||
efi_guid_t guid;
|
||||
} ia64_esi_desc_entry_point_t;
|
||||
|
||||
struct pdesc {
|
||||
void *addr;
|
||||
void *gp;
|
||||
};
|
||||
|
||||
static struct ia64_sal_systab *esi_systab;
|
||||
|
||||
static int __init esi_init (void)
|
||||
{
|
||||
efi_config_table_t *config_tables;
|
||||
struct ia64_sal_systab *systab;
|
||||
unsigned long esi = 0;
|
||||
char *p;
|
||||
int i;
|
||||
|
||||
config_tables = __va(efi.systab->tables);
|
||||
|
||||
for (i = 0; i < (int) efi.systab->nr_tables; ++i) {
|
||||
if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) {
|
||||
esi = config_tables[i].table;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!esi)
|
||||
return -ENODEV;;
|
||||
|
||||
systab = __va(esi);
|
||||
|
||||
if (strncmp(systab->signature, "ESIT", 4) != 0) {
|
||||
printk(KERN_ERR "bad signature in ESI system table!");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
p = (char *) (systab + 1);
|
||||
for (i = 0; i < systab->entry_count; i++) {
|
||||
/*
|
||||
* The first byte of each entry type contains the type
|
||||
* descriptor.
|
||||
*/
|
||||
switch (*p) {
|
||||
case ESI_DESC_ENTRY_POINT:
|
||||
break;
|
||||
default:
|
||||
printk(KERN_WARNING "Unkown table type %d found in "
|
||||
"ESI table, ignoring rest of table\n", *p);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
p += ESI_DESC_SIZE(*p);
|
||||
}
|
||||
|
||||
esi_systab = systab;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp,
|
||||
enum esi_proc_type proc_type, u64 func,
|
||||
u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
|
||||
u64 arg7)
|
||||
{
|
||||
struct ia64_fpreg fr[6];
|
||||
unsigned long flags = 0;
|
||||
int i;
|
||||
char *p;
|
||||
|
||||
if (!esi_systab)
|
||||
return -1;
|
||||
|
||||
p = (char *) (esi_systab + 1);
|
||||
for (i = 0; i < esi_systab->entry_count; i++) {
|
||||
if (*p == ESI_DESC_ENTRY_POINT) {
|
||||
ia64_esi_desc_entry_point_t *esi = (void *)p;
|
||||
if (!efi_guidcmp(guid, esi->guid)) {
|
||||
ia64_sal_handler esi_proc;
|
||||
struct pdesc pdesc;
|
||||
|
||||
pdesc.addr = __va(esi->esi_proc);
|
||||
pdesc.gp = __va(esi->gp);
|
||||
|
||||
esi_proc = (ia64_sal_handler) &pdesc;
|
||||
|
||||
ia64_save_scratch_fpregs(fr);
|
||||
if (proc_type == ESI_PROC_SERIALIZED)
|
||||
spin_lock_irqsave(&sal_lock, flags);
|
||||
else if (proc_type == ESI_PROC_MP_SAFE)
|
||||
local_irq_save(flags);
|
||||
else
|
||||
preempt_disable();
|
||||
*isrvp = (*esi_proc)(func, arg1, arg2, arg3,
|
||||
arg4, arg5, arg6, arg7);
|
||||
if (proc_type == ESI_PROC_SERIALIZED)
|
||||
spin_unlock_irqrestore(&sal_lock,
|
||||
flags);
|
||||
else if (proc_type == ESI_PROC_MP_SAFE)
|
||||
local_irq_restore(flags);
|
||||
else
|
||||
preempt_enable();
|
||||
ia64_load_scratch_fpregs(fr);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p += ESI_DESC_SIZE(*p);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ia64_esi_call);
|
||||
|
||||
int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp,
|
||||
u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4,
|
||||
u64 arg5, u64 arg6, u64 arg7)
|
||||
{
|
||||
struct ia64_fpreg fr[6];
|
||||
unsigned long flags;
|
||||
u64 esi_params[8];
|
||||
char *p;
|
||||
int i;
|
||||
|
||||
if (!esi_systab)
|
||||
return -1;
|
||||
|
||||
p = (char *) (esi_systab + 1);
|
||||
for (i = 0; i < esi_systab->entry_count; i++) {
|
||||
if (*p == ESI_DESC_ENTRY_POINT) {
|
||||
ia64_esi_desc_entry_point_t *esi = (void *)p;
|
||||
if (!efi_guidcmp(guid, esi->guid)) {
|
||||
ia64_sal_handler esi_proc;
|
||||
struct pdesc pdesc;
|
||||
|
||||
pdesc.addr = (void *)esi->esi_proc;
|
||||
pdesc.gp = (void *)esi->gp;
|
||||
|
||||
esi_proc = (ia64_sal_handler) &pdesc;
|
||||
|
||||
esi_params[0] = func;
|
||||
esi_params[1] = arg1;
|
||||
esi_params[2] = arg2;
|
||||
esi_params[3] = arg3;
|
||||
esi_params[4] = arg4;
|
||||
esi_params[5] = arg5;
|
||||
esi_params[6] = arg6;
|
||||
esi_params[7] = arg7;
|
||||
ia64_save_scratch_fpregs(fr);
|
||||
spin_lock_irqsave(&sal_lock, flags);
|
||||
*isrvp = esi_call_phys(esi_proc, esi_params);
|
||||
spin_unlock_irqrestore(&sal_lock, flags);
|
||||
ia64_load_scratch_fpregs(fr);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p += ESI_DESC_SIZE(*p);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ia64_esi_call_phys);
|
||||
|
||||
static void __exit esi_exit (void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(esi_init);
|
||||
module_exit(esi_exit); /* makes module removable... */
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user