mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-10 15:58:47 +00:00
Merge branch 'for-rmk' of git://git.kernel.org/pub/scm/linux/kernel/git/nico/orion into fixes
This commit is contained in:
commit
1f2ee6496b
@ -72,7 +72,7 @@
|
||||
kgdb is a source level debugger for linux kernel. It is used along
|
||||
with gdb to debug a linux kernel. The expectation is that gdb can
|
||||
be used to "break in" to the kernel to inspect memory, variables
|
||||
and look through a cal stack information similar to what an
|
||||
and look through call stack information similar to what an
|
||||
application developer would use gdb for. It is possible to place
|
||||
breakpoints in kernel code and perform some limited execution
|
||||
stepping.
|
||||
@ -93,8 +93,10 @@
|
||||
<chapter id="CompilingAKernel">
|
||||
<title>Compiling a kernel</title>
|
||||
<para>
|
||||
To enable <symbol>CONFIG_KGDB</symbol>, look under the "Kernel debugging"
|
||||
and then select "KGDB: kernel debugging with remote gdb".
|
||||
To enable <symbol>CONFIG_KGDB</symbol> you should first turn on
|
||||
"Prompt for development and/or incomplete code/drivers"
|
||||
(CONFIG_EXPERIMENTAL) in "General setup", then under the
|
||||
"Kernel debugging" select "KGDB: kernel debugging with remote gdb".
|
||||
</para>
|
||||
<para>
|
||||
Next you should choose one of more I/O drivers to interconnect debugging
|
||||
|
@ -92,7 +92,6 @@ prototypes:
|
||||
void (*destroy_inode)(struct inode *);
|
||||
void (*dirty_inode) (struct inode *);
|
||||
int (*write_inode) (struct inode *, int);
|
||||
void (*put_inode) (struct inode *);
|
||||
void (*drop_inode) (struct inode *);
|
||||
void (*delete_inode) (struct inode *);
|
||||
void (*put_super) (struct super_block *);
|
||||
@ -115,7 +114,6 @@ alloc_inode: no no no
|
||||
destroy_inode: no
|
||||
dirty_inode: no (must not sleep)
|
||||
write_inode: no
|
||||
put_inode: no
|
||||
drop_inode: no !!!inode_lock!!!
|
||||
delete_inode: no
|
||||
put_super: yes yes no
|
||||
|
@ -205,7 +205,6 @@ struct super_operations {
|
||||
|
||||
void (*dirty_inode) (struct inode *);
|
||||
int (*write_inode) (struct inode *, int);
|
||||
void (*put_inode) (struct inode *);
|
||||
void (*drop_inode) (struct inode *);
|
||||
void (*delete_inode) (struct inode *);
|
||||
void (*put_super) (struct super_block *);
|
||||
@ -246,9 +245,6 @@ or bottom half).
|
||||
inode to disc. The second parameter indicates whether the write
|
||||
should be synchronous or not, not all filesystems check this flag.
|
||||
|
||||
put_inode: called when the VFS inode is removed from the inode
|
||||
cache.
|
||||
|
||||
drop_inode: called when the last access to the inode is dropped,
|
||||
with the inode_lock spinlock held.
|
||||
|
||||
|
@ -377,27 +377,3 @@ config FOO
|
||||
|
||||
limits FOO to module (=m) or disabled (=n).
|
||||
|
||||
|
||||
Build limited by a third config symbol which may be =y or =m
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
A common idiom that we see (and sometimes have problems with) is this:
|
||||
|
||||
When option C in B (module or subsystem) uses interfaces from A (module
|
||||
or subsystem), and both A and B are tristate (could be =y or =m if they
|
||||
were independent of each other, but they aren't), then we need to limit
|
||||
C such that it cannot be built statically if A is built as a loadable
|
||||
module. (C already depends on B, so there is no dependency issue to
|
||||
take care of here.)
|
||||
|
||||
If A is linked statically into the kernel image, C can be built
|
||||
statically or as loadable module(s). However, if A is built as loadable
|
||||
module(s), then C must be restricted to loadable module(s) also. This
|
||||
can be expressed in kconfig language as:
|
||||
|
||||
config C
|
||||
depends on A = y || A = B
|
||||
|
||||
or for real examples, use this command in a kernel tree:
|
||||
|
||||
$ find . -name Kconfig\* | xargs grep -ns "depends on.*=.*||.*=" | grep -v orig
|
||||
|
||||
|
@ -1094,9 +1094,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
mac5380= [HW,SCSI] Format:
|
||||
<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
|
||||
|
||||
mac53c9x= [HW,SCSI] Format:
|
||||
<num_esps>,<disconnect>,<nosync>,<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
|
||||
|
||||
machvec= [IA64] Force the use of a particular machine-vector
|
||||
(machvec) in a generic kernel.
|
||||
Example: machvec=hpzx1_swiotlb
|
||||
@ -1525,6 +1522,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
This is normally done in pci_enable_device(),
|
||||
so this option is a temporary workaround
|
||||
for broken drivers that don't call it.
|
||||
skip_isa_align [X86] do not align io start addr, so can
|
||||
handle more pci cards
|
||||
firmware [ARM] Do not re-enumerate the bus but instead
|
||||
just use the configuration from the
|
||||
bootloader. This is currently used on
|
||||
|
@ -8,17 +8,6 @@ Command line parameters
|
||||
|
||||
Enable logging of debug information in case of ccw device timeouts.
|
||||
|
||||
|
||||
* cio_msg = yes | no
|
||||
|
||||
Determines whether information on found devices and sensed device
|
||||
characteristics should be shown during startup or when new devices are
|
||||
found, i. e. messages of the types "Detected device 0.0.4711 on subchannel
|
||||
0.0.0042" and "SenseID: Device 0.0.4711 reports: ...".
|
||||
|
||||
Default is off.
|
||||
|
||||
|
||||
* cio_ignore = {all} |
|
||||
{<device> | <range of devices>} |
|
||||
{!<device> | !<range of devices>}
|
||||
|
@ -1,165 +0,0 @@
|
||||
Goals, Design and Implementation of the
|
||||
new ultra-scalable O(1) scheduler
|
||||
|
||||
|
||||
This is an edited version of an email Ingo Molnar sent to
|
||||
lkml on 4 Jan 2002. It describes the goals, design, and
|
||||
implementation of Ingo's new ultra-scalable O(1) scheduler.
|
||||
Last Updated: 18 April 2002.
|
||||
|
||||
|
||||
Goal
|
||||
====
|
||||
|
||||
The main goal of the new scheduler is to keep all the good things we know
|
||||
and love about the current Linux scheduler:
|
||||
|
||||
- good interactive performance even during high load: if the user
|
||||
types or clicks then the system must react instantly and must execute
|
||||
the user tasks smoothly, even during considerable background load.
|
||||
|
||||
- good scheduling/wakeup performance with 1-2 runnable processes.
|
||||
|
||||
- fairness: no process should stay without any timeslice for any
|
||||
unreasonable amount of time. No process should get an unjustly high
|
||||
amount of CPU time.
|
||||
|
||||
- priorities: less important tasks can be started with lower priority,
|
||||
more important tasks with higher priority.
|
||||
|
||||
- SMP efficiency: no CPU should stay idle if there is work to do.
|
||||
|
||||
- SMP affinity: processes which run on one CPU should stay affine to
|
||||
that CPU. Processes should not bounce between CPUs too frequently.
|
||||
|
||||
- plus additional scheduler features: RT scheduling, CPU binding.
|
||||
|
||||
and the goal is also to add a few new things:
|
||||
|
||||
- fully O(1) scheduling. Are you tired of the recalculation loop
|
||||
blowing the L1 cache away every now and then? Do you think the goodness
|
||||
loop is taking a bit too long to finish if there are lots of runnable
|
||||
processes? This new scheduler takes no prisoners: wakeup(), schedule(),
|
||||
the timer interrupt are all O(1) algorithms. There is no recalculation
|
||||
loop. There is no goodness loop either.
|
||||
|
||||
- 'perfect' SMP scalability. With the new scheduler there is no 'big'
|
||||
runqueue_lock anymore - it's all per-CPU runqueues and locks - two
|
||||
tasks on two separate CPUs can wake up, schedule and context-switch
|
||||
completely in parallel, without any interlocking. All
|
||||
scheduling-relevant data is structured for maximum scalability.
|
||||
|
||||
- better SMP affinity. The old scheduler has a particular weakness that
|
||||
causes the random bouncing of tasks between CPUs if/when higher
|
||||
priority/interactive tasks, this was observed and reported by many
|
||||
people. The reason is that the timeslice recalculation loop first needs
|
||||
every currently running task to consume its timeslice. But when this
|
||||
happens on eg. an 8-way system, then this property starves an
|
||||
increasing number of CPUs from executing any process. Once the last
|
||||
task that has a timeslice left has finished using up that timeslice,
|
||||
the recalculation loop is triggered and other CPUs can start executing
|
||||
tasks again - after having idled around for a number of timer ticks.
|
||||
The more CPUs, the worse this effect.
|
||||
|
||||
Furthermore, this same effect causes the bouncing effect as well:
|
||||
whenever there is such a 'timeslice squeeze' of the global runqueue,
|
||||
idle processors start executing tasks which are not affine to that CPU.
|
||||
(because the affine tasks have finished off their timeslices already.)
|
||||
|
||||
The new scheduler solves this problem by distributing timeslices on a
|
||||
per-CPU basis, without having any global synchronization or
|
||||
recalculation.
|
||||
|
||||
- batch scheduling. A significant proportion of computing-intensive tasks
|
||||
benefit from batch-scheduling, where timeslices are long and processes
|
||||
are roundrobin scheduled. The new scheduler does such batch-scheduling
|
||||
of the lowest priority tasks - so nice +19 jobs will get
|
||||
'batch-scheduled' automatically. With this scheduler, nice +19 jobs are
|
||||
in essence SCHED_IDLE, from an interactiveness point of view.
|
||||
|
||||
- handle extreme loads more smoothly, without breakdown and scheduling
|
||||
storms.
|
||||
|
||||
- O(1) RT scheduling. For those RT folks who are paranoid about the
|
||||
O(nr_running) property of the goodness loop and the recalculation loop.
|
||||
|
||||
- run fork()ed children before the parent. Andrea has pointed out the
|
||||
advantages of this a few months ago, but patches for this feature
|
||||
do not work with the old scheduler as well as they should,
|
||||
because idle processes often steal the new child before the fork()ing
|
||||
CPU gets to execute it.
|
||||
|
||||
|
||||
Design
|
||||
======
|
||||
|
||||
The core of the new scheduler contains the following mechanisms:
|
||||
|
||||
- *two* priority-ordered 'priority arrays' per CPU. There is an 'active'
|
||||
array and an 'expired' array. The active array contains all tasks that
|
||||
are affine to this CPU and have timeslices left. The expired array
|
||||
contains all tasks which have used up their timeslices - but this array
|
||||
is kept sorted as well. The active and expired array is not accessed
|
||||
directly, it's accessed through two pointers in the per-CPU runqueue
|
||||
structure. If all active tasks are used up then we 'switch' the two
|
||||
pointers and from now on the ready-to-go (former-) expired array is the
|
||||
active array - and the empty active array serves as the new collector
|
||||
for expired tasks.
|
||||
|
||||
- there is a 64-bit bitmap cache for array indices. Finding the highest
|
||||
priority task is thus a matter of two x86 BSFL bit-search instructions.
|
||||
|
||||
the split-array solution enables us to have an arbitrary number of active
|
||||
and expired tasks, and the recalculation of timeslices can be done
|
||||
immediately when the timeslice expires. Because the arrays are always
|
||||
access through the pointers in the runqueue, switching the two arrays can
|
||||
be done very quickly.
|
||||
|
||||
this is a hybride priority-list approach coupled with roundrobin
|
||||
scheduling and the array-switch method of distributing timeslices.
|
||||
|
||||
- there is a per-task 'load estimator'.
|
||||
|
||||
one of the toughest things to get right is good interactive feel during
|
||||
heavy system load. While playing with various scheduler variants i found
|
||||
that the best interactive feel is achieved not by 'boosting' interactive
|
||||
tasks, but by 'punishing' tasks that want to use more CPU time than there
|
||||
is available. This method is also much easier to do in an O(1) fashion.
|
||||
|
||||
to establish the actual 'load' the task contributes to the system, a
|
||||
complex-looking but pretty accurate method is used: there is a 4-entry
|
||||
'history' ringbuffer of the task's activities during the last 4 seconds.
|
||||
This ringbuffer is operated without much overhead. The entries tell the
|
||||
scheduler a pretty accurate load-history of the task: has it used up more
|
||||
CPU time or less during the past N seconds. [the size '4' and the interval
|
||||
of 4x 1 seconds was found by lots of experimentation - this part is
|
||||
flexible and can be changed in both directions.]
|
||||
|
||||
the penalty a task gets for generating more load than the CPU can handle
|
||||
is a priority decrease - there is a maximum amount to this penalty
|
||||
relative to their static priority, so even fully CPU-bound tasks will
|
||||
observe each other's priorities, and will share the CPU accordingly.
|
||||
|
||||
the SMP load-balancer can be extended/switched with additional parallel
|
||||
computing and cache hierarchy concepts: NUMA scheduling, multi-core CPUs
|
||||
can be supported easily by changing the load-balancer. Right now it's
|
||||
tuned for my SMP systems.
|
||||
|
||||
i skipped the prev->mm == next->mm advantage - no workload i know of shows
|
||||
any sensitivity to this. It can be added back by sacrificing O(1)
|
||||
schedule() [the current and one-lower priority list can be searched for a
|
||||
that->mm == current->mm condition], but costs a fair number of cycles
|
||||
during a number of important workloads, so i wanted to avoid this as much
|
||||
as possible.
|
||||
|
||||
- the SMP idle-task startup code was still racy and the new scheduler
|
||||
triggered this. So i streamlined the idle-setup code a bit. We do not call
|
||||
into schedule() before all processors have started up fully and all idle
|
||||
threads are in place.
|
||||
|
||||
- the patch also cleans up a number of aspects of sched.c - moves code
|
||||
into other areas of the kernel where it's appropriate, and simplifies
|
||||
certain code paths and data constructs. As a result, the new scheduler's
|
||||
code is smaller than the old one.
|
||||
|
||||
Ingo
|
@ -2112,12 +2112,10 @@ L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
|
||||
INTEL ETHERNET DRIVERS (e100/e1000/e1000e/igb/ixgb/ixgbe)
|
||||
P: Auke Kok
|
||||
M: auke-jan.h.kok@intel.com
|
||||
P: Jesse Brandeburg
|
||||
M: jesse.brandeburg@intel.com
|
||||
P: Jeff Kirsher
|
||||
M: jeffrey.t.kirsher@intel.com
|
||||
P: Jesse Brandeburg
|
||||
M: jesse.brandeburg@intel.com
|
||||
P: Bruce Allan
|
||||
M: bruce.w.allan@intel.com
|
||||
P: John Ronciak
|
||||
|
@ -34,23 +34,6 @@ extern unsigned long do_mremap(unsigned long addr, unsigned long old_len,
|
||||
unsigned long new_len, unsigned long flags,
|
||||
unsigned long new_addr);
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage int sys_pipe(unsigned long __user *fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/* common code for old and new mmaps */
|
||||
inline long do_mmap2(
|
||||
unsigned long addr, unsigned long len,
|
||||
|
@ -34,11 +34,7 @@
|
||||
* Non-CPU Masters address decoding --
|
||||
* Unlike the CPU, we setup the access from Orion's master interfaces to DDR
|
||||
* banks only (the typical use case).
|
||||
* Setup access for each master to DDR is issued by common.c.
|
||||
*
|
||||
* Note: although orion_setbits() and orion_clrbits() are not atomic
|
||||
* no locking is necessary here since code in this file is only called
|
||||
* at boot time when there is no concurrency issues.
|
||||
* Setup access for each master to DDR is issued by platform device setup.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -48,10 +44,6 @@
|
||||
#define TARGET_DEV_BUS 1
|
||||
#define TARGET_PCI 3
|
||||
#define TARGET_PCIE 4
|
||||
#define ATTR_DDR_CS(n) (((n) ==0) ? 0xe : \
|
||||
((n) == 1) ? 0xd : \
|
||||
((n) == 2) ? 0xb : \
|
||||
((n) == 3) ? 0x7 : 0xf)
|
||||
#define ATTR_PCIE_MEM 0x59
|
||||
#define ATTR_PCIE_IO 0x51
|
||||
#define ATTR_PCIE_WA 0x79
|
||||
@ -61,17 +53,12 @@
|
||||
#define ATTR_DEV_CS1 0x1d
|
||||
#define ATTR_DEV_CS2 0x1b
|
||||
#define ATTR_DEV_BOOT 0xf
|
||||
#define WIN_EN 1
|
||||
|
||||
/*
|
||||
* Helpers to get DDR bank info
|
||||
*/
|
||||
#define DDR_BASE_CS(n) ORION5X_DDR_REG(0x1500 + ((n) * 8))
|
||||
#define DDR_SIZE_CS(n) ORION5X_DDR_REG(0x1504 + ((n) * 8))
|
||||
#define DDR_MAX_CS 4
|
||||
#define DDR_REG_TO_SIZE(reg) (((reg) | 0xffffff) + 1)
|
||||
#define DDR_REG_TO_BASE(reg) ((reg) & 0xff000000)
|
||||
#define DDR_BANK_EN 1
|
||||
#define DDR_BASE_CS(n) ORION5X_DDR_REG(0x1500 + ((n) << 3))
|
||||
#define DDR_SIZE_CS(n) ORION5X_DDR_REG(0x1504 + ((n) << 3))
|
||||
|
||||
/*
|
||||
* CPU Address Decode Windows registers
|
||||
@ -81,17 +68,6 @@
|
||||
#define CPU_WIN_REMAP_LO(n) ORION5X_BRIDGE_REG(0x008 | ((n) << 4))
|
||||
#define CPU_WIN_REMAP_HI(n) ORION5X_BRIDGE_REG(0x00c | ((n) << 4))
|
||||
|
||||
/*
|
||||
* Gigabit Ethernet Address Decode Windows registers
|
||||
*/
|
||||
#define ETH_WIN_BASE(win) ORION5X_ETH_REG(0x200 + ((win) * 8))
|
||||
#define ETH_WIN_SIZE(win) ORION5X_ETH_REG(0x204 + ((win) * 8))
|
||||
#define ETH_WIN_REMAP(win) ORION5X_ETH_REG(0x280 + ((win) * 4))
|
||||
#define ETH_WIN_EN ORION5X_ETH_REG(0x290)
|
||||
#define ETH_WIN_PROT ORION5X_ETH_REG(0x294)
|
||||
#define ETH_MAX_WIN 6
|
||||
#define ETH_MAX_REMAP_WIN 4
|
||||
|
||||
|
||||
struct mbus_dram_target_info orion5x_mbus_dram_info;
|
||||
|
||||
@ -202,39 +178,3 @@ void __init orion5x_setup_pcie_wa_win(u32 base, u32 size)
|
||||
{
|
||||
setup_cpu_win(7, base, size, TARGET_PCIE, ATTR_PCIE_WA, -1);
|
||||
}
|
||||
|
||||
void __init orion5x_setup_eth_wins(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* First, disable and clear windows
|
||||
*/
|
||||
for (i = 0; i < ETH_MAX_WIN; i++) {
|
||||
orion5x_write(ETH_WIN_BASE(i), 0);
|
||||
orion5x_write(ETH_WIN_SIZE(i), 0);
|
||||
orion5x_setbits(ETH_WIN_EN, 1 << i);
|
||||
orion5x_clrbits(ETH_WIN_PROT, 0x3 << (i * 2));
|
||||
if (i < ETH_MAX_REMAP_WIN)
|
||||
orion5x_write(ETH_WIN_REMAP(i), 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup windows for DDR banks.
|
||||
*/
|
||||
for (i = 0; i < DDR_MAX_CS; i++) {
|
||||
u32 base, size;
|
||||
size = orion5x_read(DDR_SIZE_CS(i));
|
||||
base = orion5x_read(DDR_BASE_CS(i));
|
||||
if (size & DDR_BANK_EN) {
|
||||
base = DDR_REG_TO_BASE(base);
|
||||
size = DDR_REG_TO_SIZE(size);
|
||||
orion5x_write(ETH_WIN_SIZE(i), (size-1) & 0xffff0000);
|
||||
orion5x_write(ETH_WIN_BASE(i), (base & 0xffff0000) |
|
||||
(ATTR_DDR_CS(i) << 8) |
|
||||
TARGET_DDR);
|
||||
orion5x_clrbits(ETH_WIN_EN, 1 << i);
|
||||
orion5x_setbits(ETH_WIN_PROT, 0x3 << (i * 2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -190,6 +190,11 @@ static struct platform_device orion5x_ehci1 = {
|
||||
* (The Orion and Discovery (MV643xx) families use the same Ethernet driver)
|
||||
****************************************************************************/
|
||||
|
||||
struct mv643xx_eth_shared_platform_data orion5x_eth_shared_data = {
|
||||
.dram = &orion5x_mbus_dram_info,
|
||||
.t_clk = ORION5X_TCLK,
|
||||
};
|
||||
|
||||
static struct resource orion5x_eth_shared_resources[] = {
|
||||
{
|
||||
.start = ORION5X_ETH_PHYS_BASE + 0x2000,
|
||||
@ -201,6 +206,9 @@ static struct resource orion5x_eth_shared_resources[] = {
|
||||
static struct platform_device orion5x_eth_shared = {
|
||||
.name = MV643XX_ETH_SHARED_NAME,
|
||||
.id = 0,
|
||||
.dev = {
|
||||
.platform_data = &orion5x_eth_shared_data,
|
||||
},
|
||||
.num_resources = 1,
|
||||
.resource = orion5x_eth_shared_resources,
|
||||
};
|
||||
@ -223,7 +231,9 @@ static struct platform_device orion5x_eth = {
|
||||
|
||||
void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data)
|
||||
{
|
||||
eth_data->shared = &orion5x_eth_shared;
|
||||
orion5x_eth.dev.platform_data = eth_data;
|
||||
|
||||
platform_device_register(&orion5x_eth_shared);
|
||||
platform_device_register(&orion5x_eth);
|
||||
}
|
||||
@ -360,7 +370,6 @@ void __init orion5x_init(void)
|
||||
* Setup Orion address map
|
||||
*/
|
||||
orion5x_setup_cpu_mbus_bridge();
|
||||
orion5x_setup_eth_wins();
|
||||
|
||||
/*
|
||||
* Register devices.
|
||||
|
@ -22,7 +22,6 @@ void orion5x_setup_dev0_win(u32 base, u32 size);
|
||||
void orion5x_setup_dev1_win(u32 base, u32 size);
|
||||
void orion5x_setup_dev2_win(u32 base, u32 size);
|
||||
void orion5x_setup_pcie_wa_win(u32 base, u32 size);
|
||||
void orion5x_setup_eth_wins(void);
|
||||
|
||||
/*
|
||||
* Shared code used internally by other Orion core functions.
|
||||
|
@ -14,19 +14,6 @@
|
||||
#include <asm/mman.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
asmlinkage int sys_pipe(unsigned long __user *filedes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(filedes, fd, sizeof(fd)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
|
||||
unsigned long prot, unsigned long flags,
|
||||
unsigned long fd, off_t offset)
|
||||
|
@ -45,23 +45,6 @@
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/dma.h>
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage int sys_pipe(unsigned long __user *fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2 * sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/* common code for old and new mmaps */
|
||||
static inline long
|
||||
do_mmap2(unsigned long addr, unsigned long len,
|
||||
|
@ -40,8 +40,11 @@ asmlinkage int sys_pipe(unsigned long __user * fildes)
|
||||
error = do_pipe(fd);
|
||||
unlock_kernel();
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int))) {
|
||||
sys_close(fd[0]);
|
||||
sys_close(fd[1]);
|
||||
error = -EFAULT;
|
||||
}
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
@ -28,23 +28,6 @@
|
||||
#include <asm/setup.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage long sys_pipe(unsigned long __user * fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
|
||||
unsigned long prot, unsigned long flags,
|
||||
unsigned long fd, unsigned long pgoff)
|
||||
|
@ -27,23 +27,6 @@
|
||||
#include <asm/traps.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage int sys_pipe(unsigned long * fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/* common code for old and new mmaps */
|
||||
static inline long do_mmap2(
|
||||
unsigned long addr, unsigned long len,
|
||||
|
@ -90,8 +90,11 @@ sys_pipe(unsigned long r0, unsigned long r1, unsigned long r2,
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user((void __user *)r0, fd, 2*sizeof(int)))
|
||||
if (copy_to_user((void __user *)r0, fd, 2*sizeof(int))) {
|
||||
sys_close(fd[0]);
|
||||
sys_close(fd[1]);
|
||||
error = -EFAULT;
|
||||
}
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
@ -30,23 +30,6 @@
|
||||
#include <asm/page.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage int sys_pipe(unsigned long __user * fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/* common code for old and new mmaps */
|
||||
static inline long do_mmap2(
|
||||
unsigned long addr, unsigned long len,
|
||||
|
@ -468,15 +468,26 @@ static inline void access_error040(struct frame *fp)
|
||||
* (if do_page_fault didn't fix the mapping,
|
||||
* the writeback won't do good)
|
||||
*/
|
||||
disable_wb:
|
||||
#ifdef DEBUG
|
||||
printk(".. disabling wb2\n");
|
||||
#endif
|
||||
if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr)
|
||||
fp->un.fmt7.wb2s &= ~WBV_040;
|
||||
if (fp->un.fmt7.wb3a == fp->un.fmt7.faddr)
|
||||
fp->un.fmt7.wb3s &= ~WBV_040;
|
||||
}
|
||||
} else if (send_fault_sig(&fp->ptregs) > 0) {
|
||||
printk("68040 access error, ssw=%x\n", ssw);
|
||||
trap_c(fp);
|
||||
} else {
|
||||
/* In case of a bus error we either kill the process or expect
|
||||
* the kernel to catch the fault, which then is also responsible
|
||||
* for cleaning up the mess.
|
||||
*/
|
||||
current->thread.signo = SIGBUS;
|
||||
current->thread.faddr = fp->un.fmt7.faddr;
|
||||
if (send_fault_sig(&fp->ptregs) >= 0)
|
||||
printk("68040 bus error (ssw=%x, faddr=%lx)\n", ssw,
|
||||
fp->un.fmt7.faddr);
|
||||
goto disable_wb;
|
||||
}
|
||||
|
||||
do_040writebacks(fp);
|
||||
|
@ -48,9 +48,6 @@
|
||||
struct mac_booter_data mac_bi_data;
|
||||
int mac_bisize = sizeof mac_bi_data;
|
||||
|
||||
struct mac_hw_present mac_hw_present;
|
||||
EXPORT_SYMBOL(mac_hw_present);
|
||||
|
||||
/* New m68k bootinfo stuff and videobase */
|
||||
|
||||
extern int m68k_num_memory;
|
||||
@ -817,27 +814,6 @@ void __init mac_identify(void)
|
||||
m68k_ramdisk.addr, m68k_ramdisk.size);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* TODO: set the various fields in macintosh_config->hw_present here!
|
||||
*/
|
||||
switch (macintosh_config->scsi_type) {
|
||||
case MAC_SCSI_OLD:
|
||||
MACHW_SET(MAC_SCSI_80);
|
||||
break;
|
||||
case MAC_SCSI_QUADRA:
|
||||
case MAC_SCSI_QUADRA2:
|
||||
case MAC_SCSI_QUADRA3:
|
||||
MACHW_SET(MAC_SCSI_96);
|
||||
if ((macintosh_config->ident == MAC_MODEL_Q900) ||
|
||||
(macintosh_config->ident == MAC_MODEL_Q950))
|
||||
MACHW_SET(MAC_SCSI_96_2);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_WARNING "config.c: wtf: unknown scsi, using 53c80\n");
|
||||
MACHW_SET(MAC_SCSI_80);
|
||||
break;
|
||||
}
|
||||
|
||||
iop_init();
|
||||
via_init();
|
||||
oss_init();
|
||||
|
@ -28,23 +28,6 @@
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage int sys_pipe(unsigned long * fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/* common code for old and new mmaps */
|
||||
static inline long do_mmap2(
|
||||
unsigned long addr, unsigned long len,
|
||||
|
@ -28,23 +28,6 @@
|
||||
|
||||
#define MIN_MAP_ADDR PAGE_SIZE /* minimum fixed mmap address */
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way Unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage long sys_pipe(unsigned long __user *fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2 * sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* memory mapping syscall
|
||||
*/
|
||||
|
@ -33,19 +33,6 @@
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/personality.h>
|
||||
|
||||
int sys_pipe(int __user *fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
static unsigned long get_unshared_area(unsigned long addr, unsigned long len)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
|
@ -136,23 +136,6 @@ int sys_ipc(uint call, int first, unsigned long second, long third,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way unix traditionally does this, though.
|
||||
*/
|
||||
int sys_pipe(int __user *fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
static inline unsigned long do_mmap2(unsigned long addr, size_t len,
|
||||
unsigned long prot, unsigned long flags,
|
||||
unsigned long fd, unsigned long off, int shift)
|
||||
|
@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
{ "inst_emu", VCPU_STAT(emulated_inst_exits) },
|
||||
{ "dec", VCPU_STAT(dec_exits) },
|
||||
{ "ext_intr", VCPU_STAT(ext_intr_exits) },
|
||||
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
@ -338,6 +339,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
}
|
||||
break;
|
||||
|
||||
case BOOKE_INTERRUPT_FP_UNAVAIL:
|
||||
kvmppc_queue_exception(vcpu, exit_nr);
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
|
||||
case BOOKE_INTERRUPT_DATA_STORAGE:
|
||||
vcpu->arch.dear = vcpu->arch.fault_dear;
|
||||
vcpu->arch.esr = vcpu->arch.fault_esr;
|
||||
|
@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
|
||||
{
|
||||
/* XXX implement me */
|
||||
return 0;
|
||||
return !!(v->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
{
|
||||
return 1;
|
||||
return !(v->arch.msr & MSR_WE);
|
||||
}
|
||||
|
||||
|
||||
@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data)
|
||||
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
|
||||
|
||||
kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
|
||||
|
||||
if (waitqueue_active(&vcpu->wq)) {
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
int r;
|
||||
sigset_t sigsaved;
|
||||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
if (vcpu->sigset_active)
|
||||
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
|
||||
|
||||
@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
if (vcpu->sigset_active)
|
||||
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
|
||||
|
||||
vcpu_put(vcpu);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
|
||||
{
|
||||
kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
|
||||
|
||||
if (waitqueue_active(&vcpu->wq)) {
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -23,3 +23,4 @@ obj-$(CONFIG_SMP) += locks.o
|
||||
endif
|
||||
|
||||
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
|
||||
obj-$(CONFIG_HAS_IOMEM) += devres.o
|
||||
|
42
arch/powerpc/lib/devres.c
Normal file
42
arch/powerpc/lib/devres.c
Normal file
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (C) 2008 Freescale Semiconductor, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/device.h> /* devres_*(), devm_ioremap_release() */
|
||||
#include <linux/io.h> /* ioremap_flags() */
|
||||
#include <linux/module.h> /* EXPORT_SYMBOL() */
|
||||
|
||||
/**
|
||||
* devm_ioremap_prot - Managed ioremap_flags()
|
||||
* @dev: Generic device to remap IO address for
|
||||
* @offset: BUS offset to map
|
||||
* @size: Size of map
|
||||
* @flags: Page flags
|
||||
*
|
||||
* Managed ioremap_prot(). Map is automatically unmapped on driver
|
||||
* detach.
|
||||
*/
|
||||
void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
|
||||
size_t size, unsigned long flags)
|
||||
{
|
||||
void __iomem **ptr, *addr;
|
||||
|
||||
ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
|
||||
if (!ptr)
|
||||
return NULL;
|
||||
|
||||
addr = ioremap_flags(offset, size, flags);
|
||||
if (addr) {
|
||||
*ptr = addr;
|
||||
devres_add(dev, ptr);
|
||||
} else
|
||||
devres_free(ptr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
EXPORT_SYMBOL(devm_ioremap_prot);
|
@ -58,7 +58,9 @@ static struct resource mv643xx_eth0_resources[] = {
|
||||
|
||||
|
||||
static struct mv643xx_eth_platform_data eth0_pd = {
|
||||
.shared = &mv643xx_eth_shared_device,
|
||||
.port_number = 0,
|
||||
|
||||
.tx_sram_addr = PEGASOS2_SRAM_BASE_ETH0,
|
||||
.tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE,
|
||||
.tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16,
|
||||
@ -88,7 +90,9 @@ static struct resource mv643xx_eth1_resources[] = {
|
||||
};
|
||||
|
||||
static struct mv643xx_eth_platform_data eth1_pd = {
|
||||
.shared = &mv643xx_eth_shared_device,
|
||||
.port_number = 1,
|
||||
|
||||
.tx_sram_addr = PEGASOS2_SRAM_BASE_ETH1,
|
||||
.tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE,
|
||||
.tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16,
|
||||
|
@ -55,11 +55,6 @@ static ssize_t scanlog_read(struct file *file, char __user *buf,
|
||||
dp = PDE(inode);
|
||||
data = (unsigned int *)dp->data;
|
||||
|
||||
if (!data) {
|
||||
printk(KERN_ERR "scanlog: read failed no data\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (count > RTAS_DATA_BUF_SIZE)
|
||||
count = RTAS_DATA_BUF_SIZE;
|
||||
|
||||
@ -146,11 +141,6 @@ static int scanlog_open(struct inode * inode, struct file * file)
|
||||
struct proc_dir_entry *dp = PDE(inode);
|
||||
unsigned int *data = (unsigned int *)dp->data;
|
||||
|
||||
if (!data) {
|
||||
printk(KERN_ERR "scanlog: open failed no data\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (data[0] != 0) {
|
||||
/* This imperfect test stops a second copy of the
|
||||
* data (or a reset while data is being copied)
|
||||
@ -168,10 +158,6 @@ static int scanlog_release(struct inode * inode, struct file * file)
|
||||
struct proc_dir_entry *dp = PDE(inode);
|
||||
unsigned int *data = (unsigned int *)dp->data;
|
||||
|
||||
if (!data) {
|
||||
printk(KERN_ERR "scanlog: release failed no data\n");
|
||||
return -EIO;
|
||||
}
|
||||
data[0] = 0;
|
||||
|
||||
return 0;
|
||||
@ -200,12 +186,11 @@ static int __init scanlog_init(void)
|
||||
if (!data)
|
||||
goto err;
|
||||
|
||||
ent = proc_create("ppc64/rtas/scan-log-dump", S_IRUSR, NULL,
|
||||
&scanlog_fops);
|
||||
ent = proc_create_data("ppc64/rtas/scan-log-dump", S_IRUSR, NULL,
|
||||
&scanlog_fops, data);
|
||||
if (!ent)
|
||||
goto err;
|
||||
|
||||
ent->data = data;
|
||||
proc_ppc64_scan_log_dump = ent;
|
||||
|
||||
return 0;
|
||||
|
@ -239,6 +239,8 @@ static int __init mv64x60_eth_device_setup(struct device_node *np, int id,
|
||||
|
||||
memset(&pdata, 0, sizeof(pdata));
|
||||
|
||||
pdata.shared = shared_pdev;
|
||||
|
||||
prop = of_get_property(np, "reg", NULL);
|
||||
if (!prop)
|
||||
return -ENODEV;
|
||||
|
@ -341,6 +341,7 @@ static struct resource mv64x60_eth0_resources[] = {
|
||||
};
|
||||
|
||||
static struct mv643xx_eth_platform_data eth0_pd = {
|
||||
.shared = &mv64x60_eth_shared_device;
|
||||
.port_number = 0,
|
||||
};
|
||||
|
||||
@ -366,6 +367,7 @@ static struct resource mv64x60_eth1_resources[] = {
|
||||
};
|
||||
|
||||
static struct mv643xx_eth_platform_data eth1_pd = {
|
||||
.shared = &mv64x60_eth_shared_device;
|
||||
.port_number = 1,
|
||||
};
|
||||
|
||||
@ -391,6 +393,7 @@ static struct resource mv64x60_eth2_resources[] = {
|
||||
};
|
||||
|
||||
static struct mv643xx_eth_platform_data eth2_pd = {
|
||||
.shared = &mv64x60_eth_shared_device;
|
||||
.port_number = 2,
|
||||
};
|
||||
|
||||
|
@ -430,6 +430,13 @@ config CMM_IUCV
|
||||
Select this option to enable the special message interface to
|
||||
the cooperative memory management.
|
||||
|
||||
config PAGE_STATES
|
||||
bool "Unused page notification"
|
||||
help
|
||||
This enables the notification of unused pages to the
|
||||
hypervisor. The ESSA instruction is used to do the states
|
||||
changes between a page that has content and the unused state.
|
||||
|
||||
config VIRT_TIMER
|
||||
bool "Virtual CPU timer support"
|
||||
help
|
||||
|
@ -121,7 +121,7 @@ sys32_ptrace_wrapper:
|
||||
lgfr %r3,%r3 # long
|
||||
llgtr %r4,%r4 # long
|
||||
llgfr %r5,%r5 # long
|
||||
jg sys_ptrace # branch to system call
|
||||
jg compat_sys_ptrace # branch to system call
|
||||
|
||||
.globl sys32_alarm_wrapper
|
||||
sys32_alarm_wrapper:
|
||||
|
@ -279,8 +279,6 @@ sysc_do_restart:
|
||||
st %r2,SP_R2(%r15) # store return value (change R2 on stack)
|
||||
|
||||
sysc_return:
|
||||
tm SP_PSW+1(%r15),0x01 # returning to user ?
|
||||
bno BASED(sysc_restore)
|
||||
tm __TI_flags+3(%r9),_TIF_WORK_SVC
|
||||
bnz BASED(sysc_work) # there is work to do (signals etc.)
|
||||
sysc_restore:
|
||||
@ -312,6 +310,8 @@ sysc_work_loop:
|
||||
# One of the work bits is on. Find out which one.
|
||||
#
|
||||
sysc_work:
|
||||
tm SP_PSW+1(%r15),0x01 # returning to user ?
|
||||
bno BASED(sysc_restore)
|
||||
tm __TI_flags+3(%r9),_TIF_MCCK_PENDING
|
||||
bo BASED(sysc_mcck_pending)
|
||||
tm __TI_flags+3(%r9),_TIF_NEED_RESCHED
|
||||
@ -602,12 +602,6 @@ io_no_vtime:
|
||||
la %r2,SP_PTREGS(%r15) # address of register-save area
|
||||
basr %r14,%r1 # branch to standard irq handler
|
||||
io_return:
|
||||
tm SP_PSW+1(%r15),0x01 # returning to user ?
|
||||
#ifdef CONFIG_PREEMPT
|
||||
bno BASED(io_preempt) # no -> check for preemptive scheduling
|
||||
#else
|
||||
bno BASED(io_restore) # no-> skip resched & signal
|
||||
#endif
|
||||
tm __TI_flags+3(%r9),_TIF_WORK_INT
|
||||
bnz BASED(io_work) # there is work to do (signals etc.)
|
||||
io_restore:
|
||||
@ -629,10 +623,18 @@ io_restore_trace_psw:
|
||||
.long 0, io_restore_trace + 0x80000000
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
io_preempt:
|
||||
#
|
||||
# switch to kernel stack, then check the TIF bits
|
||||
#
|
||||
io_work:
|
||||
tm SP_PSW+1(%r15),0x01 # returning to user ?
|
||||
#ifndef CONFIG_PREEMPT
|
||||
bno BASED(io_restore) # no-> skip resched & signal
|
||||
#else
|
||||
bnz BASED(io_work_user) # no -> check for preemptive scheduling
|
||||
# check for preemptive scheduling
|
||||
icm %r0,15,__TI_precount(%r9)
|
||||
bnz BASED(io_restore)
|
||||
bnz BASED(io_restore) # preemption disabled
|
||||
l %r1,SP_R15(%r15)
|
||||
s %r1,BASED(.Lc_spsize)
|
||||
mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
|
||||
@ -646,10 +648,7 @@ io_resume_loop:
|
||||
br %r1 # call schedule
|
||||
#endif
|
||||
|
||||
#
|
||||
# switch to kernel stack, then check the TIF bits
|
||||
#
|
||||
io_work:
|
||||
io_work_user:
|
||||
l %r1,__LC_KERNEL_STACK
|
||||
s %r1,BASED(.Lc_spsize)
|
||||
mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
|
||||
|
@ -271,8 +271,6 @@ sysc_noemu:
|
||||
stg %r2,SP_R2(%r15) # store return value (change R2 on stack)
|
||||
|
||||
sysc_return:
|
||||
tm SP_PSW+1(%r15),0x01 # returning to user ?
|
||||
jno sysc_restore
|
||||
tm __TI_flags+7(%r9),_TIF_WORK_SVC
|
||||
jnz sysc_work # there is work to do (signals etc.)
|
||||
sysc_restore:
|
||||
@ -304,6 +302,8 @@ sysc_work_loop:
|
||||
# One of the work bits is on. Find out which one.
|
||||
#
|
||||
sysc_work:
|
||||
tm SP_PSW+1(%r15),0x01 # returning to user ?
|
||||
jno sysc_restore
|
||||
tm __TI_flags+7(%r9),_TIF_MCCK_PENDING
|
||||
jo sysc_mcck_pending
|
||||
tm __TI_flags+7(%r9),_TIF_NEED_RESCHED
|
||||
@ -585,12 +585,6 @@ io_no_vtime:
|
||||
la %r2,SP_PTREGS(%r15) # address of register-save area
|
||||
brasl %r14,do_IRQ # call standard irq handler
|
||||
io_return:
|
||||
tm SP_PSW+1(%r15),0x01 # returning to user ?
|
||||
#ifdef CONFIG_PREEMPT
|
||||
jno io_preempt # no -> check for preemptive scheduling
|
||||
#else
|
||||
jno io_restore # no-> skip resched & signal
|
||||
#endif
|
||||
tm __TI_flags+7(%r9),_TIF_WORK_INT
|
||||
jnz io_work # there is work to do (signals etc.)
|
||||
io_restore:
|
||||
@ -612,10 +606,41 @@ io_restore_trace_psw:
|
||||
.quad 0, io_restore_trace
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
io_preempt:
|
||||
#
|
||||
# There is work todo, we need to check if we return to userspace, then
|
||||
# check, if we are in SIE, if yes leave it
|
||||
#
|
||||
io_work:
|
||||
tm SP_PSW+1(%r15),0x01 # returning to user ?
|
||||
#ifndef CONFIG_PREEMPT
|
||||
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
|
||||
jnz io_work_user # yes -> no need to check for SIE
|
||||
la %r1, BASED(sie_opcode) # we return to kernel here
|
||||
lg %r2, SP_PSW+8(%r15)
|
||||
clc 0(2,%r1), 0(%r2) # is current instruction = SIE?
|
||||
jne io_restore # no-> return to kernel
|
||||
lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE
|
||||
aghi %r1, 4
|
||||
stg %r1, SP_PSW+8(%r15)
|
||||
j io_restore # return to kernel
|
||||
#else
|
||||
jno io_restore # no-> skip resched & signal
|
||||
#endif
|
||||
#else
|
||||
jnz io_work_user # yes -> do resched & signal
|
||||
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
|
||||
la %r1, BASED(sie_opcode)
|
||||
lg %r2, SP_PSW+8(%r15)
|
||||
clc 0(2,%r1), 0(%r2) # is current instruction = SIE?
|
||||
jne 0f # no -> leave PSW alone
|
||||
lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE
|
||||
aghi %r1, 4
|
||||
stg %r1, SP_PSW+8(%r15)
|
||||
0:
|
||||
#endif
|
||||
# check for preemptive scheduling
|
||||
icm %r0,15,__TI_precount(%r9)
|
||||
jnz io_restore
|
||||
jnz io_restore # preemption is disabled
|
||||
# switch to kernel stack
|
||||
lg %r1,SP_R15(%r15)
|
||||
aghi %r1,-SP_SIZE
|
||||
@ -629,10 +654,7 @@ io_resume_loop:
|
||||
jg preempt_schedule_irq
|
||||
#endif
|
||||
|
||||
#
|
||||
# switch to kernel stack, then check TIF bits
|
||||
#
|
||||
io_work:
|
||||
io_work_user:
|
||||
lg %r1,__LC_KERNEL_STACK
|
||||
aghi %r1,-SP_SIZE
|
||||
mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
|
||||
@ -653,6 +675,11 @@ io_work_loop:
|
||||
j io_restore
|
||||
io_work_done:
|
||||
|
||||
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
|
||||
sie_opcode:
|
||||
.long 0xb2140000
|
||||
#endif
|
||||
|
||||
#
|
||||
# _TIF_MCCK_PENDING is set, call handler
|
||||
#
|
||||
|
@ -292,8 +292,7 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
do_ptrace_normal(struct task_struct *child, long request, long addr, long data)
|
||||
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
ptrace_area parea;
|
||||
int copied, ret;
|
||||
@ -529,35 +528,19 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
do_ptrace_emu31(struct task_struct *child, long request, long addr, long data)
|
||||
long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
|
||||
compat_ulong_t caddr, compat_ulong_t cdata)
|
||||
{
|
||||
unsigned int tmp; /* 4 bytes !! */
|
||||
unsigned long addr = caddr;
|
||||
unsigned long data = cdata;
|
||||
ptrace_area_emu31 parea;
|
||||
int copied, ret;
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_PEEKTEXT:
|
||||
case PTRACE_PEEKDATA:
|
||||
/* read word at location addr. */
|
||||
copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
|
||||
if (copied != sizeof(tmp))
|
||||
return -EIO;
|
||||
return put_user(tmp, (unsigned int __force __user *) data);
|
||||
|
||||
case PTRACE_PEEKUSR:
|
||||
/* read the word at location addr in the USER area. */
|
||||
return peek_user_emu31(child, addr, data);
|
||||
|
||||
case PTRACE_POKETEXT:
|
||||
case PTRACE_POKEDATA:
|
||||
/* write the word at location addr. */
|
||||
tmp = data;
|
||||
copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 1);
|
||||
if (copied != sizeof(tmp))
|
||||
return -EIO;
|
||||
return 0;
|
||||
|
||||
case PTRACE_POKEUSR:
|
||||
/* write the word at location addr in the USER area */
|
||||
return poke_user_emu31(child, addr, data);
|
||||
@ -587,82 +570,11 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data)
|
||||
copied += sizeof(unsigned int);
|
||||
}
|
||||
return 0;
|
||||
case PTRACE_GETEVENTMSG:
|
||||
return put_user((__u32) child->ptrace_message,
|
||||
(unsigned int __force __user *) data);
|
||||
case PTRACE_GETSIGINFO:
|
||||
if (child->last_siginfo == NULL)
|
||||
return -EINVAL;
|
||||
return copy_siginfo_to_user32((compat_siginfo_t
|
||||
__force __user *) data,
|
||||
child->last_siginfo);
|
||||
case PTRACE_SETSIGINFO:
|
||||
if (child->last_siginfo == NULL)
|
||||
return -EINVAL;
|
||||
return copy_siginfo_from_user32(child->last_siginfo,
|
||||
(compat_siginfo_t
|
||||
__force __user *) data);
|
||||
}
|
||||
return ptrace_request(child, request, addr, data);
|
||||
return compat_ptrace_request(child, request, addr, data);
|
||||
}
|
||||
#endif
|
||||
|
||||
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
switch (request) {
|
||||
case PTRACE_SYSCALL:
|
||||
/* continue and stop at next (return from) syscall */
|
||||
case PTRACE_CONT:
|
||||
/* restart after signal. */
|
||||
if (!valid_signal(data))
|
||||
return -EIO;
|
||||
if (request == PTRACE_SYSCALL)
|
||||
set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
|
||||
else
|
||||
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
|
||||
child->exit_code = data;
|
||||
/* make sure the single step bit is not set. */
|
||||
user_disable_single_step(child);
|
||||
wake_up_process(child);
|
||||
return 0;
|
||||
|
||||
case PTRACE_KILL:
|
||||
/*
|
||||
* make the child exit. Best I can do is send it a sigkill.
|
||||
* perhaps it should be put in the status that it wants to
|
||||
* exit.
|
||||
*/
|
||||
if (child->exit_state == EXIT_ZOMBIE) /* already dead */
|
||||
return 0;
|
||||
child->exit_code = SIGKILL;
|
||||
/* make sure the single step bit is not set. */
|
||||
user_disable_single_step(child);
|
||||
wake_up_process(child);
|
||||
return 0;
|
||||
|
||||
case PTRACE_SINGLESTEP:
|
||||
/* set the trap flag. */
|
||||
if (!valid_signal(data))
|
||||
return -EIO;
|
||||
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
|
||||
child->exit_code = data;
|
||||
user_enable_single_step(child);
|
||||
/* give it a chance to run. */
|
||||
wake_up_process(child);
|
||||
return 0;
|
||||
|
||||
/* Do requests that differ for 31/64 bit */
|
||||
default:
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (test_thread_flag(TIF_31BIT))
|
||||
return do_ptrace_emu31(child, request, addr, data);
|
||||
#endif
|
||||
return do_ptrace_normal(child, request, addr, data);
|
||||
}
|
||||
/* Not reached. */
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
asmlinkage void
|
||||
syscall_trace(struct pt_regs *regs, int entryexit)
|
||||
{
|
||||
|
@ -32,23 +32,6 @@
|
||||
#include <asm/uaccess.h>
|
||||
#include "entry.h"
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way Unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage long sys_pipe(unsigned long __user *fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/* common code for old and new mmaps */
|
||||
static inline long do_mmap2(
|
||||
unsigned long addr, unsigned long len,
|
||||
|
@ -22,7 +22,6 @@ config KVM
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select S390_SWITCH_AMODE
|
||||
select PREEMPT
|
||||
---help---
|
||||
Support hosting paravirtualized guest machines using the SIE
|
||||
virtualization capability on the mainframe. This should work
|
||||
|
@ -105,6 +105,9 @@ static intercept_handler_t instruction_handlers[256] = {
|
||||
static int handle_noop(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
switch (vcpu->arch.sie_block->icptcode) {
|
||||
case 0x0:
|
||||
vcpu->stat.exit_null++;
|
||||
break;
|
||||
case 0x10:
|
||||
vcpu->stat.exit_external_request++;
|
||||
break;
|
||||
|
@ -31,6 +31,7 @@
|
||||
|
||||
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
{ "userspace_handled", VCPU_STAT(exit_userspace) },
|
||||
{ "exit_null", VCPU_STAT(exit_null) },
|
||||
{ "exit_validity", VCPU_STAT(exit_validity) },
|
||||
{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
|
||||
{ "exit_external_request", VCPU_STAT(exit_external_request) },
|
||||
@ -221,10 +222,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
|
||||
restore_fp_regs(&vcpu->arch.guest_fpregs);
|
||||
restore_access_regs(vcpu->arch.guest_acrs);
|
||||
|
||||
if (signal_pending(current))
|
||||
atomic_set_mask(CPUSTAT_STOP_INT,
|
||||
&vcpu->arch.sie_block->cpuflags);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
|
@ -5,3 +5,4 @@
|
||||
obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o
|
||||
obj-$(CONFIG_CMM) += cmm.o
|
||||
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
|
||||
obj-$(CONFIG_PAGE_STATES) += page-states.o
|
||||
|
@ -126,6 +126,9 @@ void __init mem_init(void)
|
||||
/* clear the zero-page */
|
||||
memset(empty_zero_page, 0, PAGE_SIZE);
|
||||
|
||||
/* Setup guest page hinting */
|
||||
cmma_init();
|
||||
|
||||
/* this will put all low memory onto the freelists */
|
||||
totalram_pages += free_all_bootmem();
|
||||
|
||||
|
79
arch/s390/mm/page-states.c
Normal file
79
arch/s390/mm/page-states.c
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
* arch/s390/mm/page-states.c
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
*
|
||||
* Guest page hinting for unused pages.
|
||||
*
|
||||
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#define ESSA_SET_STABLE 1
|
||||
#define ESSA_SET_UNUSED 2
|
||||
|
||||
static int cmma_flag;
|
||||
|
||||
static int __init cmma(char *str)
|
||||
{
|
||||
char *parm;
|
||||
parm = strstrip(str);
|
||||
if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) {
|
||||
cmma_flag = 1;
|
||||
return 1;
|
||||
}
|
||||
cmma_flag = 0;
|
||||
if (strcmp(parm, "no") == 0 || strcmp(parm, "off") == 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__setup("cmma=", cmma);
|
||||
|
||||
void __init cmma_init(void)
|
||||
{
|
||||
register unsigned long tmp asm("0") = 0;
|
||||
register int rc asm("1") = -EOPNOTSUPP;
|
||||
|
||||
if (!cmma_flag)
|
||||
return;
|
||||
asm volatile(
|
||||
" .insn rrf,0xb9ab0000,%1,%1,0,0\n"
|
||||
"0: la %0,0\n"
|
||||
"1:\n"
|
||||
EX_TABLE(0b,1b)
|
||||
: "+&d" (rc), "+&d" (tmp));
|
||||
if (rc)
|
||||
cmma_flag = 0;
|
||||
}
|
||||
|
||||
void arch_free_page(struct page *page, int order)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
if (!cmma_flag)
|
||||
return;
|
||||
for (i = 0; i < (1 << order); i++)
|
||||
asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
|
||||
: "=&d" (rc)
|
||||
: "a" ((page_to_pfn(page) + i) << PAGE_SHIFT),
|
||||
"i" (ESSA_SET_UNUSED));
|
||||
}
|
||||
|
||||
void arch_alloc_page(struct page *page, int order)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
if (!cmma_flag)
|
||||
return;
|
||||
for (i = 0; i < (1 << order); i++)
|
||||
asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
|
||||
: "=&d" (rc)
|
||||
: "a" ((page_to_pfn(page) + i) << PAGE_SHIFT),
|
||||
"i" (ESSA_SET_STABLE));
|
||||
}
|
@ -30,23 +30,6 @@
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way Unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage int sys_pipe(unsigned long * fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do a system call from kernel instead of calling sys_execve so we
|
||||
* end up with proper pt_regs.
|
||||
|
@ -419,14 +419,26 @@ asmlinkage int sparc_do_fork(unsigned long clone_flags,
|
||||
unsigned long stack_size)
|
||||
{
|
||||
unsigned long parent_tid_ptr, child_tid_ptr;
|
||||
unsigned long orig_i1 = regs->u_regs[UREG_I1];
|
||||
long ret;
|
||||
|
||||
parent_tid_ptr = regs->u_regs[UREG_I2];
|
||||
child_tid_ptr = regs->u_regs[UREG_I4];
|
||||
|
||||
return do_fork(clone_flags, stack_start,
|
||||
regs, stack_size,
|
||||
(int __user *) parent_tid_ptr,
|
||||
(int __user *) child_tid_ptr);
|
||||
ret = do_fork(clone_flags, stack_start,
|
||||
regs, stack_size,
|
||||
(int __user *) parent_tid_ptr,
|
||||
(int __user *) child_tid_ptr);
|
||||
|
||||
/* If we get an error and potentially restart the system
|
||||
* call, we're screwed because copy_thread() clobbered
|
||||
* the parent's %o1. So detect that case and restore it
|
||||
* here.
|
||||
*/
|
||||
if ((unsigned long)ret >= -ERESTART_RESTARTBLOCK)
|
||||
regs->u_regs[UREG_I1] = orig_i1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Copy a Sparc thread. The fork() return value conventions
|
||||
|
@ -245,15 +245,29 @@ static inline int invalid_frame_pointer(void __user *fp, int fplen)
|
||||
|
||||
static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, unsigned long framesize)
|
||||
{
|
||||
unsigned long sp;
|
||||
unsigned long sp = regs->u_regs[UREG_FP];
|
||||
|
||||
sp = regs->u_regs[UREG_FP];
|
||||
/*
|
||||
* If we are on the alternate signal stack and would overflow it, don't.
|
||||
* Return an always-bogus address instead so we will die with SIGSEGV.
|
||||
*/
|
||||
if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
|
||||
return (void __user *) -1L;
|
||||
|
||||
/* This is the X/Open sanctioned signal stack switching. */
|
||||
if (sa->sa_flags & SA_ONSTACK) {
|
||||
if (!on_sig_stack(sp) && !((current->sas_ss_sp + current->sas_ss_size) & 7))
|
||||
if (sas_ss_flags(sp) == 0)
|
||||
sp = current->sas_ss_sp + current->sas_ss_size;
|
||||
}
|
||||
|
||||
/* Always align the stack frame. This handles two cases. First,
|
||||
* sigaltstack need not be mindful of platform specific stack
|
||||
* alignment. Second, if we took this signal because the stack
|
||||
* is not aligned properly, we'd like to take the signal cleanly
|
||||
* and report that.
|
||||
*/
|
||||
sp &= ~7UL;
|
||||
|
||||
return (void __user *)(sp - framesize);
|
||||
}
|
||||
|
||||
|
@ -223,8 +223,7 @@ int sparc_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
|
||||
{
|
||||
if (ARCH_SUN4C_SUN4 &&
|
||||
(len > 0x20000000 ||
|
||||
((flags & MAP_FIXED) &&
|
||||
addr < 0xe0000000 && addr + len > 0x20000000)))
|
||||
(addr < 0xe0000000 && addr + len > 0x20000000)))
|
||||
return -EINVAL;
|
||||
|
||||
/* See asm-sparc/uaccess.h */
|
||||
|
@ -503,6 +503,8 @@ asmlinkage long sparc_do_fork(unsigned long clone_flags,
|
||||
unsigned long stack_size)
|
||||
{
|
||||
int __user *parent_tid_ptr, *child_tid_ptr;
|
||||
unsigned long orig_i1 = regs->u_regs[UREG_I1];
|
||||
long ret;
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (test_thread_flag(TIF_32BIT)) {
|
||||
@ -515,9 +517,19 @@ asmlinkage long sparc_do_fork(unsigned long clone_flags,
|
||||
child_tid_ptr = (int __user *) regs->u_regs[UREG_I4];
|
||||
}
|
||||
|
||||
return do_fork(clone_flags, stack_start,
|
||||
regs, stack_size,
|
||||
parent_tid_ptr, child_tid_ptr);
|
||||
ret = do_fork(clone_flags, stack_start,
|
||||
regs, stack_size,
|
||||
parent_tid_ptr, child_tid_ptr);
|
||||
|
||||
/* If we get an error and potentially restart the system
|
||||
* call, we're screwed because copy_thread() clobbered
|
||||
* the parent's %o1. So detect that case and restore it
|
||||
* here.
|
||||
*/
|
||||
if ((unsigned long)ret >= -ERESTART_RESTARTBLOCK)
|
||||
regs->u_regs[UREG_I1] = orig_i1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Copy a Sparc thread. The fork() return value conventions
|
||||
|
@ -376,16 +376,29 @@ save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
|
||||
|
||||
static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize)
|
||||
{
|
||||
unsigned long sp;
|
||||
unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
|
||||
|
||||
sp = regs->u_regs[UREG_FP] + STACK_BIAS;
|
||||
/*
|
||||
* If we are on the alternate signal stack and would overflow it, don't.
|
||||
* Return an always-bogus address instead so we will die with SIGSEGV.
|
||||
*/
|
||||
if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
|
||||
return (void __user *) -1L;
|
||||
|
||||
/* This is the X/Open sanctioned signal stack switching. */
|
||||
if (ka->sa.sa_flags & SA_ONSTACK) {
|
||||
if (!on_sig_stack(sp) &&
|
||||
!((current->sas_ss_sp + current->sas_ss_size) & 7))
|
||||
if (sas_ss_flags(sp) == 0)
|
||||
sp = current->sas_ss_sp + current->sas_ss_size;
|
||||
}
|
||||
|
||||
/* Always align the stack frame. This handles two cases. First,
|
||||
* sigaltstack need not be mindful of platform specific stack
|
||||
* alignment. Second, if we took this signal because the stack
|
||||
* is not aligned properly, we'd like to take the signal cleanly
|
||||
* and report that.
|
||||
*/
|
||||
sp &= ~7UL;
|
||||
|
||||
return (void __user *)(sp - framesize);
|
||||
}
|
||||
|
||||
|
@ -406,11 +406,27 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns
|
||||
regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
|
||||
sp = regs->u_regs[UREG_FP];
|
||||
|
||||
/*
|
||||
* If we are on the alternate signal stack and would overflow it, don't.
|
||||
* Return an always-bogus address instead so we will die with SIGSEGV.
|
||||
*/
|
||||
if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
|
||||
return (void __user *) -1L;
|
||||
|
||||
/* This is the X/Open sanctioned signal stack switching. */
|
||||
if (sa->sa_flags & SA_ONSTACK) {
|
||||
if (!on_sig_stack(sp) && !((current->sas_ss_sp + current->sas_ss_size) & 7))
|
||||
if (sas_ss_flags(sp) == 0)
|
||||
sp = current->sas_ss_sp + current->sas_ss_size;
|
||||
}
|
||||
|
||||
/* Always align the stack frame. This handles two cases. First,
|
||||
* sigaltstack need not be mindful of platform specific stack
|
||||
* alignment. Second, if we took this signal because the stack
|
||||
* is not aligned properly, we'd like to take the signal cleanly
|
||||
* and report that.
|
||||
*/
|
||||
sp &= ~7UL;
|
||||
|
||||
return (void __user *)(sp - framesize);
|
||||
}
|
||||
|
||||
|
@ -865,21 +865,14 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
|
||||
void *info = call_data->info;
|
||||
|
||||
clear_softint(1 << irq);
|
||||
|
||||
irq_enter();
|
||||
|
||||
if (!call_data->wait) {
|
||||
/* let initiator proceed after getting data */
|
||||
atomic_inc(&call_data->finished);
|
||||
}
|
||||
|
||||
func(info);
|
||||
|
||||
irq_exit();
|
||||
|
||||
if (call_data->wait) {
|
||||
/* let initiator proceed only after completion */
|
||||
func(info);
|
||||
atomic_inc(&call_data->finished);
|
||||
} else {
|
||||
/* let initiator proceed after getting data */
|
||||
atomic_inc(&call_data->finished);
|
||||
func(info);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1041,9 +1034,7 @@ void smp_receive_signal(int cpu)
|
||||
|
||||
void smp_receive_signal_client(int irq, struct pt_regs *regs)
|
||||
{
|
||||
irq_enter();
|
||||
clear_softint(1 << irq);
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
|
||||
@ -1051,8 +1042,6 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
|
||||
struct mm_struct *mm;
|
||||
unsigned long flags;
|
||||
|
||||
irq_enter();
|
||||
|
||||
clear_softint(1 << irq);
|
||||
|
||||
/* See if we need to allocate a new TLB context because
|
||||
@ -1072,8 +1061,6 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
|
||||
load_secondary_context(mm);
|
||||
__flush_tlb_mm(CTX_HWBITS(mm->context),
|
||||
SECONDARY_CONTEXT);
|
||||
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
void smp_new_mmu_context_version(void)
|
||||
@ -1239,8 +1226,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
|
||||
{
|
||||
clear_softint(1 << irq);
|
||||
|
||||
irq_enter();
|
||||
|
||||
preempt_disable();
|
||||
|
||||
__asm__ __volatile__("flushw");
|
||||
@ -1253,8 +1238,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
|
||||
prom_world(0);
|
||||
|
||||
preempt_enable();
|
||||
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
/* /proc/profile writes can call this, don't __init it please. */
|
||||
|
@ -549,13 +549,13 @@ int sparc64_mmap_check(unsigned long addr, unsigned long len,
|
||||
if (len >= STACK_TOP32)
|
||||
return -EINVAL;
|
||||
|
||||
if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len)
|
||||
if (addr > STACK_TOP32 - len)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (len >= VA_EXCLUDE_START)
|
||||
return -EINVAL;
|
||||
|
||||
if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len))
|
||||
if (invalid_64bit_range(addr, len))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -236,13 +236,6 @@ asmlinkage long sys32_getegid16(void)
|
||||
|
||||
/* 32-bit timeval and related flotsam. */
|
||||
|
||||
static long get_tv32(struct timeval *o, struct compat_timeval __user *i)
|
||||
{
|
||||
return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
|
||||
(__get_user(o->tv_sec, &i->tv_sec) |
|
||||
__get_user(o->tv_usec, &i->tv_usec)));
|
||||
}
|
||||
|
||||
static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
|
||||
{
|
||||
return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
|
||||
@ -757,30 +750,6 @@ asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv,
|
||||
return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
|
||||
}
|
||||
|
||||
asmlinkage long sys32_utimes(char __user *filename,
|
||||
struct compat_timeval __user *tvs)
|
||||
{
|
||||
struct timespec tv[2];
|
||||
|
||||
if (tvs) {
|
||||
struct timeval ktvs[2];
|
||||
if (get_tv32(&ktvs[0], tvs) ||
|
||||
get_tv32(&ktvs[1], 1+tvs))
|
||||
return -EFAULT;
|
||||
|
||||
if (ktvs[0].tv_usec < 0 || ktvs[0].tv_usec >= 1000000 ||
|
||||
ktvs[1].tv_usec < 0 || ktvs[1].tv_usec >= 1000000)
|
||||
return -EINVAL;
|
||||
|
||||
tv[0].tv_sec = ktvs[0].tv_sec;
|
||||
tv[0].tv_nsec = 1000 * ktvs[0].tv_usec;
|
||||
tv[1].tv_sec = ktvs[1].tv_sec;
|
||||
tv[1].tv_nsec = 1000 * ktvs[1].tv_usec;
|
||||
}
|
||||
|
||||
return do_utimes(AT_FDCWD, filename, tvs ? tv : NULL, 0);
|
||||
}
|
||||
|
||||
/* These are here just in case some old sparc32 binary calls it. */
|
||||
asmlinkage long sys32_pause(void)
|
||||
{
|
||||
|
@ -45,7 +45,7 @@ sys_call_table32:
|
||||
/*120*/ .word compat_sys_readv, compat_sys_writev, sys32_settimeofday, sys32_fchown16, sys_fchmod
|
||||
.word sys_nis_syscall, sys32_setreuid16, sys32_setregid16, sys_rename, sys_truncate
|
||||
/*130*/ .word sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall
|
||||
.word sys_nis_syscall, sys32_mkdir, sys_rmdir, sys32_utimes, compat_sys_stat64
|
||||
.word sys_nis_syscall, sys32_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
|
||||
/*140*/ .word sys32_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit
|
||||
.word compat_sys_setrlimit, sys_pivot_root, sys32_prctl, sys_pciconfig_read, sys_pciconfig_write
|
||||
/*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
|
||||
|
@ -771,6 +771,9 @@ static void __init find_ramdisk(unsigned long phys_base)
|
||||
initrd_end = ramdisk_image + sparc_ramdisk_size;
|
||||
|
||||
lmb_reserve(initrd_start, initrd_end);
|
||||
|
||||
initrd_start += PAGE_OFFSET;
|
||||
initrd_end += PAGE_OFFSET;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -2362,16 +2365,3 @@ void __flush_tlb_all(void)
|
||||
__asm__ __volatile__("wrpr %0, 0, %%pstate"
|
||||
: : "r" (pstate));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
|
||||
void online_page(struct page *page)
|
||||
{
|
||||
ClearPageReserved(page);
|
||||
init_page_count(page);
|
||||
__free_page(page);
|
||||
totalram_pages++;
|
||||
num_physpages++;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_MEMORY_HOTPLUG */
|
||||
|
@ -191,9 +191,9 @@ void line_flush_chars(struct tty_struct *tty)
|
||||
line_flush_buffer(tty);
|
||||
}
|
||||
|
||||
void line_put_char(struct tty_struct *tty, unsigned char ch)
|
||||
int line_put_char(struct tty_struct *tty, unsigned char ch)
|
||||
{
|
||||
line_write(tty, &ch, sizeof(ch));
|
||||
return line_write(tty, &ch, sizeof(ch));
|
||||
}
|
||||
|
||||
int line_write(struct tty_struct *tty, const unsigned char *buf, int len)
|
||||
|
@ -71,7 +71,7 @@ extern int line_setup(struct line *lines, unsigned int sizeof_lines,
|
||||
char *init, char **error_out);
|
||||
extern int line_write(struct tty_struct *tty, const unsigned char *buf,
|
||||
int len);
|
||||
extern void line_put_char(struct tty_struct *tty, unsigned char ch);
|
||||
extern int line_put_char(struct tty_struct *tty, unsigned char ch);
|
||||
extern void line_set_termios(struct tty_struct *tty, struct ktermios * old);
|
||||
extern int line_chars_in_buffer(struct tty_struct *tty);
|
||||
extern void line_flush_buffer(struct tty_struct *tty);
|
||||
|
@ -73,23 +73,6 @@ long old_mmap(unsigned long addr, unsigned long len,
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way unix traditionally does this, though.
|
||||
*/
|
||||
long sys_pipe(unsigned long __user * fildes)
|
||||
{
|
||||
int fd[2];
|
||||
long error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, sizeof(fd)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
long sys_uname(struct old_utsname __user * name)
|
||||
{
|
||||
|
@ -132,23 +132,6 @@ sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way unix traditionally does this, though.
|
||||
*/
|
||||
int sys_pipe (int *fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe (fd);
|
||||
if (!error) {
|
||||
if (copy_to_user (fildes, fd, 2*sizeof (int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
do_mmap2 (unsigned long addr, size_t len,
|
||||
unsigned long prot, unsigned long flags,
|
||||
|
@ -18,6 +18,7 @@ config X86_64
|
||||
### Arch settings
|
||||
config X86
|
||||
def_bool y
|
||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||
select HAVE_IDE
|
||||
select HAVE_OPROFILE
|
||||
select HAVE_KPROBES
|
||||
@ -1661,6 +1662,7 @@ config GEODE_MFGPT_TIMER
|
||||
|
||||
config OLPC
|
||||
bool "One Laptop Per Child support"
|
||||
depends on MGEODE_LX
|
||||
default n
|
||||
help
|
||||
Add support for detecting the unique features of the OLPC
|
||||
|
@ -191,7 +191,7 @@ static void read_ehdr(FILE *fp)
|
||||
die("Cannot read ELF header: %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) {
|
||||
if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) {
|
||||
die("No ELF magic\n");
|
||||
}
|
||||
if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) {
|
||||
|
@ -83,9 +83,7 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o
|
||||
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
|
||||
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
|
||||
|
||||
ifdef CONFIG_INPUT_PCSPKR
|
||||
obj-y += pcspeaker.o
|
||||
endif
|
||||
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
|
||||
|
||||
obj-$(CONFIG_SCx200) += scx200.o
|
||||
scx200-y += scx200_32.o
|
||||
|
@ -10,5 +10,5 @@ endif
|
||||
$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin
|
||||
|
||||
$(obj)/realmode/wakeup.bin: FORCE
|
||||
$(Q)$(MAKE) $(build)=$(obj)/realmode $@
|
||||
$(Q)$(MAKE) $(build)=$(obj)/realmode
|
||||
|
||||
|
@ -6,7 +6,8 @@
|
||||
# for more details.
|
||||
#
|
||||
|
||||
targets := wakeup.bin wakeup.elf
|
||||
always := wakeup.bin
|
||||
targets := wakeup.elf wakeup.lds
|
||||
|
||||
wakeup-y += wakeup.o wakemain.o video-mode.o copy.o
|
||||
|
||||
@ -48,7 +49,7 @@ LDFLAGS_wakeup.elf := -T
|
||||
|
||||
CPPFLAGS_wakeup.lds += -P -C
|
||||
|
||||
$(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE
|
||||
$(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE
|
||||
$(call if_changed,ld)
|
||||
|
||||
OBJCOPYFLAGS_wakeup.bin := -O binary
|
||||
|
@ -133,6 +133,7 @@ static int kvm_register_clock(void)
|
||||
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
static void kvm_setup_secondary_clock(void)
|
||||
{
|
||||
/*
|
||||
@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void)
|
||||
/* ok, done with our trickery, call native */
|
||||
setup_secondary_APIC_clock();
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* After the clock is registered, the host will keep writing to the
|
||||
@ -177,7 +179,9 @@ void __init kvmclock_init(void)
|
||||
pv_time_ops.get_wallclock = kvm_get_wallclock;
|
||||
pv_time_ops.set_wallclock = kvm_set_wallclock;
|
||||
pv_time_ops.sched_clock = kvm_clock_read;
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
|
||||
#endif
|
||||
machine_ops.shutdown = kvm_shutdown;
|
||||
#ifdef CONFIG_KEXEC
|
||||
machine_ops.crash_shutdown = kvm_crash_shutdown;
|
||||
|
@ -794,6 +794,11 @@ void __init find_smp_config(void)
|
||||
ACPI-based MP Configuration
|
||||
-------------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
|
||||
*/
|
||||
int es7000_plat;
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
@ -909,8 +914,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
|
||||
MP_intsrc_info(&intsrc);
|
||||
}
|
||||
|
||||
int es7000_plat;
|
||||
|
||||
void __init mp_config_acpi_legacy_irqs(void)
|
||||
{
|
||||
struct mpc_config_intsrc intsrc;
|
||||
|
@ -149,7 +149,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
|
||||
},
|
||||
},
|
||||
{ /* Handle problems with rebooting on Dell Optiplex 745's DFF*/
|
||||
|
@ -95,7 +95,7 @@ void __init setup_per_cpu_areas(void)
|
||||
|
||||
/* Copy section for each CPU (we discard the original) */
|
||||
size = PERCPU_ENOUGH_ROOM;
|
||||
printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
|
||||
printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
|
||||
size);
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
|
@ -299,7 +299,7 @@ static void __cpuinit smp_callin(void)
|
||||
/*
|
||||
* Activate a secondary processor.
|
||||
*/
|
||||
void __cpuinit start_secondary(void *unused)
|
||||
static void __cpuinit start_secondary(void *unused)
|
||||
{
|
||||
/*
|
||||
* Don't put *anything* before cpu_init(), SMP booting is too
|
||||
@ -1306,7 +1306,7 @@ static void remove_siblinginfo(int cpu)
|
||||
cpu_clear(cpu, cpu_sibling_setup_map);
|
||||
}
|
||||
|
||||
int additional_cpus __initdata = -1;
|
||||
static int additional_cpus __initdata = -1;
|
||||
|
||||
static __init int setup_additional_cpus(char *s)
|
||||
{
|
||||
|
@ -22,23 +22,6 @@
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way Unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage int sys_pipe(unsigned long __user * fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
|
||||
unsigned long prot, unsigned long flags,
|
||||
unsigned long fd, unsigned long pgoff)
|
||||
|
@ -17,23 +17,6 @@
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/ia32.h>
|
||||
|
||||
/*
|
||||
* sys_pipe() is the normal C calling standard for creating
|
||||
* a pipe. It's not the way Unix traditionally does this, though.
|
||||
*/
|
||||
asmlinkage long sys_pipe(int __user *fildes)
|
||||
{
|
||||
int fd[2];
|
||||
int error;
|
||||
|
||||
error = do_pipe(fd);
|
||||
if (!error) {
|
||||
if (copy_to_user(fildes, fd, 2*sizeof(int)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
|
||||
unsigned long fd, unsigned long off)
|
||||
{
|
||||
|
@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
|
||||
* mode 1 is one shot, mode 2 is period, otherwise del timer */
|
||||
switch (ps->channels[0].mode) {
|
||||
case 1:
|
||||
/* FIXME: enhance mode 4 precision */
|
||||
case 4:
|
||||
create_pit_timer(&ps->pit_timer, val, 0);
|
||||
break;
|
||||
case 2:
|
||||
|
@ -79,36 +79,6 @@ static int dbg = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define PT64_PT_BITS 9
|
||||
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
|
||||
#define PT32_PT_BITS 10
|
||||
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
|
||||
|
||||
#define PT_WRITABLE_SHIFT 1
|
||||
|
||||
#define PT_PRESENT_MASK (1ULL << 0)
|
||||
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
|
||||
#define PT_USER_MASK (1ULL << 2)
|
||||
#define PT_PWT_MASK (1ULL << 3)
|
||||
#define PT_PCD_MASK (1ULL << 4)
|
||||
#define PT_ACCESSED_MASK (1ULL << 5)
|
||||
#define PT_DIRTY_MASK (1ULL << 6)
|
||||
#define PT_PAGE_SIZE_MASK (1ULL << 7)
|
||||
#define PT_PAT_MASK (1ULL << 7)
|
||||
#define PT_GLOBAL_MASK (1ULL << 8)
|
||||
#define PT64_NX_SHIFT 63
|
||||
#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
|
||||
|
||||
#define PT_PAT_SHIFT 7
|
||||
#define PT_DIR_PAT_SHIFT 12
|
||||
#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
|
||||
|
||||
#define PT32_DIR_PSE36_SIZE 4
|
||||
#define PT32_DIR_PSE36_SHIFT 13
|
||||
#define PT32_DIR_PSE36_MASK \
|
||||
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
|
||||
|
||||
|
||||
#define PT_FIRST_AVAIL_BITS_SHIFT 9
|
||||
#define PT64_SECOND_AVAIL_BITS_SHIFT 52
|
||||
|
||||
@ -154,10 +124,6 @@ static int dbg = 1;
|
||||
#define PFERR_USER_MASK (1U << 2)
|
||||
#define PFERR_FETCH_MASK (1U << 4)
|
||||
|
||||
#define PT64_ROOT_LEVEL 4
|
||||
#define PT32_ROOT_LEVEL 2
|
||||
#define PT32E_ROOT_LEVEL 3
|
||||
|
||||
#define PT_DIRECTORY_LEVEL 2
|
||||
#define PT_PAGE_TABLE_LEVEL 1
|
||||
|
||||
@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache;
|
||||
|
||||
static u64 __read_mostly shadow_trap_nonpresent_pte;
|
||||
static u64 __read_mostly shadow_notrap_nonpresent_pte;
|
||||
static u64 __read_mostly shadow_base_present_pte;
|
||||
static u64 __read_mostly shadow_nx_mask;
|
||||
static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
|
||||
static u64 __read_mostly shadow_user_mask;
|
||||
static u64 __read_mostly shadow_accessed_mask;
|
||||
static u64 __read_mostly shadow_dirty_mask;
|
||||
|
||||
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
|
||||
{
|
||||
@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
|
||||
|
||||
void kvm_mmu_set_base_ptes(u64 base_pte)
|
||||
{
|
||||
shadow_base_present_pte = base_pte;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
|
||||
|
||||
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
||||
u64 dirty_mask, u64 nx_mask, u64 x_mask)
|
||||
{
|
||||
shadow_user_mask = user_mask;
|
||||
shadow_accessed_mask = accessed_mask;
|
||||
shadow_dirty_mask = dirty_mask;
|
||||
shadow_nx_mask = nx_mask;
|
||||
shadow_x_mask = x_mask;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
|
||||
|
||||
static int is_write_protection(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.cr0 & X86_CR0_WP;
|
||||
@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte)
|
||||
|
||||
static int is_dirty_pte(unsigned long pte)
|
||||
{
|
||||
return pte & PT_DIRTY_MASK;
|
||||
return pte & shadow_dirty_mask;
|
||||
}
|
||||
|
||||
static int is_rmap_pte(u64 pte)
|
||||
@ -387,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
|
||||
|
||||
write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
|
||||
*write_count += 1;
|
||||
WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
|
||||
}
|
||||
|
||||
static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
|
||||
@ -547,7 +535,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
|
||||
return;
|
||||
sp = page_header(__pa(spte));
|
||||
pfn = spte_to_pfn(*spte);
|
||||
if (*spte & PT_ACCESSED_MASK)
|
||||
if (*spte & shadow_accessed_mask)
|
||||
kvm_set_pfn_accessed(pfn);
|
||||
if (is_writeble_pte(*spte))
|
||||
kvm_release_pfn_dirty(pfn);
|
||||
@ -1073,17 +1061,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
|
||||
* whether the guest actually used the pte (in order to detect
|
||||
* demand paging).
|
||||
*/
|
||||
spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
|
||||
spte = shadow_base_present_pte | shadow_dirty_mask;
|
||||
if (!speculative)
|
||||
pte_access |= PT_ACCESSED_MASK;
|
||||
if (!dirty)
|
||||
pte_access &= ~ACC_WRITE_MASK;
|
||||
if (!(pte_access & ACC_EXEC_MASK))
|
||||
spte |= PT64_NX_MASK;
|
||||
|
||||
spte |= PT_PRESENT_MASK;
|
||||
if (pte_access & ACC_EXEC_MASK)
|
||||
spte |= shadow_x_mask;
|
||||
else
|
||||
spte |= shadow_nx_mask;
|
||||
if (pte_access & ACC_USER_MASK)
|
||||
spte |= PT_USER_MASK;
|
||||
spte |= shadow_user_mask;
|
||||
if (largepage)
|
||||
spte |= PT_PAGE_SIZE_MASK;
|
||||
|
||||
@ -1188,8 +1176,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
|
||||
| PT_WRITABLE_MASK | PT_USER_MASK;
|
||||
table[index] = __pa(new_table->spt)
|
||||
| PT_PRESENT_MASK | PT_WRITABLE_MASK
|
||||
| shadow_user_mask | shadow_x_mask;
|
||||
}
|
||||
table_addr = table[index] & PT64_BASE_ADDR_MASK;
|
||||
}
|
||||
@ -1244,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
|
||||
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
|
||||
return;
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
#ifdef CONFIG_X86_64
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
@ -1256,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
for (i = 0; i < 4; ++i) {
|
||||
hpa_t root = vcpu->arch.mmu.pae_root[i];
|
||||
|
||||
@ -1282,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
|
||||
|
||||
root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
@ -1297,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.mmu.root_hpa = root;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
metaphysical = !is_paging(vcpu);
|
||||
if (tdp_enabled)
|
||||
metaphysical = 1;
|
||||
@ -1377,7 +1362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
|
||||
largepage, gfn, pfn, TDP_ROOT_LEVEL);
|
||||
largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
return r;
|
||||
@ -1484,7 +1469,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
context->page_fault = tdp_page_fault;
|
||||
context->free = nonpaging_free;
|
||||
context->prefetch_page = nonpaging_prefetch_page;
|
||||
context->shadow_root_level = TDP_ROOT_LEVEL;
|
||||
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
|
||||
if (!is_paging(vcpu)) {
|
||||
@ -1633,7 +1618,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 *spte = vcpu->arch.last_pte_updated;
|
||||
|
||||
return !!(spte && (*spte & PT_ACCESSED_MASK));
|
||||
return !!(spte && (*spte & shadow_accessed_mask));
|
||||
}
|
||||
|
||||
static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
|
@ -3,11 +3,38 @@
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL
|
||||
#else
|
||||
#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL
|
||||
#endif
|
||||
#define PT64_PT_BITS 9
|
||||
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
|
||||
#define PT32_PT_BITS 10
|
||||
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
|
||||
|
||||
#define PT_WRITABLE_SHIFT 1
|
||||
|
||||
#define PT_PRESENT_MASK (1ULL << 0)
|
||||
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
|
||||
#define PT_USER_MASK (1ULL << 2)
|
||||
#define PT_PWT_MASK (1ULL << 3)
|
||||
#define PT_PCD_MASK (1ULL << 4)
|
||||
#define PT_ACCESSED_MASK (1ULL << 5)
|
||||
#define PT_DIRTY_MASK (1ULL << 6)
|
||||
#define PT_PAGE_SIZE_MASK (1ULL << 7)
|
||||
#define PT_PAT_MASK (1ULL << 7)
|
||||
#define PT_GLOBAL_MASK (1ULL << 8)
|
||||
#define PT64_NX_SHIFT 63
|
||||
#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
|
||||
|
||||
#define PT_PAT_SHIFT 7
|
||||
#define PT_DIR_PAT_SHIFT 12
|
||||
#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
|
||||
|
||||
#define PT32_DIR_PSE36_SIZE 4
|
||||
#define PT32_DIR_PSE36_SHIFT 13
|
||||
#define PT32_DIR_PSE36_MASK \
|
||||
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
|
||||
|
||||
#define PT64_ROOT_LEVEL 4
|
||||
#define PT32_ROOT_LEVEL 2
|
||||
#define PT32E_ROOT_LEVEL 3
|
||||
|
||||
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
static int get_npt_level(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return PT64_ROOT_LEVEL;
|
||||
#else
|
||||
return PT32E_ROOT_LEVEL;
|
||||
#endif
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops svm_x86_ops = {
|
||||
.cpu_has_kvm_support = has_svm,
|
||||
.disabled_by_bios = is_disabled,
|
||||
@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.inject_pending_vectors = do_interrupt_requests,
|
||||
|
||||
.set_tss_addr = svm_set_tss_addr,
|
||||
.get_tdp_level = get_npt_level,
|
||||
};
|
||||
|
||||
static int __init svm_init(void)
|
||||
|
@ -42,6 +42,9 @@ module_param(enable_vpid, bool, 0);
|
||||
static int flexpriority_enabled = 1;
|
||||
module_param(flexpriority_enabled, bool, 0);
|
||||
|
||||
static int enable_ept = 1;
|
||||
module_param(enable_ept, bool, 0);
|
||||
|
||||
struct vmcs {
|
||||
u32 revision_id;
|
||||
u32 abort;
|
||||
@ -84,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
|
||||
return container_of(vcpu, struct vcpu_vmx, vcpu);
|
||||
}
|
||||
|
||||
static int init_rmode_tss(struct kvm *kvm);
|
||||
static int init_rmode(struct kvm *kvm);
|
||||
|
||||
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
|
||||
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
|
||||
@ -107,6 +110,11 @@ static struct vmcs_config {
|
||||
u32 vmentry_ctrl;
|
||||
} vmcs_config;
|
||||
|
||||
struct vmx_capability {
|
||||
u32 ept;
|
||||
u32 vpid;
|
||||
} vmx_capability;
|
||||
|
||||
#define VMX_SEGMENT_FIELD(seg) \
|
||||
[VCPU_SREG_##seg] = { \
|
||||
.selector = GUEST_##seg##_SELECTOR, \
|
||||
@ -214,6 +222,32 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
|
||||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
|
||||
}
|
||||
|
||||
static inline int cpu_has_vmx_invept_individual_addr(void)
|
||||
{
|
||||
return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT));
|
||||
}
|
||||
|
||||
static inline int cpu_has_vmx_invept_context(void)
|
||||
{
|
||||
return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT));
|
||||
}
|
||||
|
||||
static inline int cpu_has_vmx_invept_global(void)
|
||||
{
|
||||
return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT));
|
||||
}
|
||||
|
||||
static inline int cpu_has_vmx_ept(void)
|
||||
{
|
||||
return (vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_ENABLE_EPT);
|
||||
}
|
||||
|
||||
static inline int vm_need_ept(void)
|
||||
{
|
||||
return (cpu_has_vmx_ept() && enable_ept);
|
||||
}
|
||||
|
||||
static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
|
||||
{
|
||||
return ((cpu_has_vmx_virtualize_apic_accesses()) &&
|
||||
@ -250,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
|
||||
: : "a"(&operand), "c"(ext) : "cc", "memory");
|
||||
}
|
||||
|
||||
static inline void __invept(int ext, u64 eptp, gpa_t gpa)
|
||||
{
|
||||
struct {
|
||||
u64 eptp, gpa;
|
||||
} operand = {eptp, gpa};
|
||||
|
||||
asm volatile (ASM_VMX_INVEPT
|
||||
/* CF==1 or ZF==1 --> rc = -1 */
|
||||
"; ja 1f ; ud2 ; 1:\n"
|
||||
: : "a" (&operand), "c" (ext) : "cc", "memory");
|
||||
}
|
||||
|
||||
static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
|
||||
{
|
||||
int i;
|
||||
@ -301,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
|
||||
__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
|
||||
}
|
||||
|
||||
static inline void ept_sync_global(void)
|
||||
{
|
||||
if (cpu_has_vmx_invept_global())
|
||||
__invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
|
||||
}
|
||||
|
||||
static inline void ept_sync_context(u64 eptp)
|
||||
{
|
||||
if (vm_need_ept()) {
|
||||
if (cpu_has_vmx_invept_context())
|
||||
__invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
|
||||
else
|
||||
ept_sync_global();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
|
||||
{
|
||||
if (vm_need_ept()) {
|
||||
if (cpu_has_vmx_invept_individual_addr())
|
||||
__invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
|
||||
eptp, gpa);
|
||||
else
|
||||
ept_sync_context(eptp);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long vmcs_readl(unsigned long field)
|
||||
{
|
||||
unsigned long value;
|
||||
@ -388,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
|
||||
eb |= 1u << 1;
|
||||
if (vcpu->arch.rmode.active)
|
||||
eb = ~0;
|
||||
if (vm_need_ept())
|
||||
eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
|
||||
vmcs_write32(EXCEPTION_BITMAP, eb);
|
||||
}
|
||||
|
||||
@ -985,7 +1060,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
|
||||
static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
{
|
||||
u32 vmx_msr_low, vmx_msr_high;
|
||||
u32 min, opt;
|
||||
u32 min, opt, min2, opt2;
|
||||
u32 _pin_based_exec_control = 0;
|
||||
u32 _cpu_based_exec_control = 0;
|
||||
u32 _cpu_based_2nd_exec_control = 0;
|
||||
@ -1003,6 +1078,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
CPU_BASED_CR8_LOAD_EXITING |
|
||||
CPU_BASED_CR8_STORE_EXITING |
|
||||
#endif
|
||||
CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING |
|
||||
CPU_BASED_USE_IO_BITMAPS |
|
||||
CPU_BASED_MOV_DR_EXITING |
|
||||
CPU_BASED_USE_TSC_OFFSETING;
|
||||
@ -1018,11 +1095,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
~CPU_BASED_CR8_STORE_EXITING;
|
||||
#endif
|
||||
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
|
||||
min = 0;
|
||||
opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
min2 = 0;
|
||||
opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
SECONDARY_EXEC_WBINVD_EXITING |
|
||||
SECONDARY_EXEC_ENABLE_VPID;
|
||||
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
|
||||
SECONDARY_EXEC_ENABLE_VPID |
|
||||
SECONDARY_EXEC_ENABLE_EPT;
|
||||
if (adjust_vmx_controls(min2, opt2,
|
||||
MSR_IA32_VMX_PROCBASED_CTLS2,
|
||||
&_cpu_based_2nd_exec_control) < 0)
|
||||
return -EIO;
|
||||
}
|
||||
@ -1031,6 +1110,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
|
||||
_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
|
||||
#endif
|
||||
if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
|
||||
/* CR3 accesses don't need to cause VM Exits when EPT enabled */
|
||||
min &= ~(CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING);
|
||||
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
|
||||
&_cpu_based_exec_control) < 0)
|
||||
return -EIO;
|
||||
rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
|
||||
vmx_capability.ept, vmx_capability.vpid);
|
||||
}
|
||||
|
||||
min = 0;
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -1256,7 +1345,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
|
||||
fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
|
||||
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
init_rmode_tss(vcpu->kvm);
|
||||
init_rmode(vcpu->kvm);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -1304,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
|
||||
}
|
||||
|
||||
static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
|
||||
if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
|
||||
printk(KERN_ERR "EPT: Fail to load pdptrs!\n");
|
||||
return;
|
||||
}
|
||||
vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
|
||||
vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
|
||||
vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
|
||||
vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
|
||||
|
||||
static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
|
||||
unsigned long cr0,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(cr0 & X86_CR0_PG)) {
|
||||
/* From paging/starting to nonpaging */
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
|
||||
vmcs_config.cpu_based_exec_ctrl |
|
||||
(CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING));
|
||||
vcpu->arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, vcpu->arch.cr4);
|
||||
*hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
|
||||
*hw_cr0 &= ~X86_CR0_WP;
|
||||
} else if (!is_paging(vcpu)) {
|
||||
/* From nonpaging to paging */
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
|
||||
vmcs_config.cpu_based_exec_ctrl &
|
||||
~(CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING));
|
||||
vcpu->arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, vcpu->arch.cr4);
|
||||
if (!(vcpu->arch.cr0 & X86_CR0_WP))
|
||||
*hw_cr0 &= ~X86_CR0_WP;
|
||||
}
|
||||
}
|
||||
|
||||
static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!is_paging(vcpu)) {
|
||||
*hw_cr4 &= ~X86_CR4_PAE;
|
||||
*hw_cr4 |= X86_CR4_PSE;
|
||||
} else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
|
||||
*hw_cr4 &= ~X86_CR4_PAE;
|
||||
}
|
||||
|
||||
static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
||||
{
|
||||
unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) |
|
||||
KVM_VM_CR0_ALWAYS_ON;
|
||||
|
||||
vmx_fpu_deactivate(vcpu);
|
||||
|
||||
if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE))
|
||||
@ -1323,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (vm_need_ept())
|
||||
ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
|
||||
|
||||
vmcs_writel(CR0_READ_SHADOW, cr0);
|
||||
vmcs_writel(GUEST_CR0,
|
||||
(cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
|
||||
vmcs_writel(GUEST_CR0, hw_cr0);
|
||||
vcpu->arch.cr0 = cr0;
|
||||
|
||||
if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
|
||||
vmx_fpu_activate(vcpu);
|
||||
}
|
||||
|
||||
static u64 construct_eptp(unsigned long root_hpa)
|
||||
{
|
||||
u64 eptp;
|
||||
|
||||
/* TODO write the value reading from MSR */
|
||||
eptp = VMX_EPT_DEFAULT_MT |
|
||||
VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
|
||||
eptp |= (root_hpa & PAGE_MASK);
|
||||
|
||||
return eptp;
|
||||
}
|
||||
|
||||
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
{
|
||||
unsigned long guest_cr3;
|
||||
u64 eptp;
|
||||
|
||||
guest_cr3 = cr3;
|
||||
if (vm_need_ept()) {
|
||||
eptp = construct_eptp(cr3);
|
||||
vmcs_write64(EPT_POINTER, eptp);
|
||||
ept_sync_context(eptp);
|
||||
ept_load_pdptrs(vcpu);
|
||||
guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
|
||||
VMX_EPT_IDENTITY_PAGETABLE_ADDR;
|
||||
}
|
||||
|
||||
vmx_flush_tlb(vcpu);
|
||||
vmcs_writel(GUEST_CR3, cr3);
|
||||
vmcs_writel(GUEST_CR3, guest_cr3);
|
||||
if (vcpu->arch.cr0 & X86_CR0_PE)
|
||||
vmx_fpu_deactivate(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
{
|
||||
vmcs_writel(CR4_READ_SHADOW, cr4);
|
||||
vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ?
|
||||
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
|
||||
unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ?
|
||||
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
|
||||
|
||||
vcpu->arch.cr4 = cr4;
|
||||
if (vm_need_ept())
|
||||
ept_update_paging_mode_cr4(&hw_cr4, vcpu);
|
||||
|
||||
vmcs_writel(CR4_READ_SHADOW, cr4);
|
||||
vmcs_writel(GUEST_CR4, hw_cr4);
|
||||
}
|
||||
|
||||
static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
@ -1530,6 +1707,41 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int init_rmode_identity_map(struct kvm *kvm)
|
||||
{
|
||||
int i, r, ret;
|
||||
pfn_t identity_map_pfn;
|
||||
u32 tmp;
|
||||
|
||||
if (!vm_need_ept())
|
||||
return 1;
|
||||
if (unlikely(!kvm->arch.ept_identity_pagetable)) {
|
||||
printk(KERN_ERR "EPT: identity-mapping pagetable "
|
||||
"haven't been allocated!\n");
|
||||
return 0;
|
||||
}
|
||||
if (likely(kvm->arch.ept_identity_pagetable_done))
|
||||
return 1;
|
||||
ret = 0;
|
||||
identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT;
|
||||
r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
|
||||
if (r < 0)
|
||||
goto out;
|
||||
/* Set up identity-mapping pagetable for EPT in real mode */
|
||||
for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
|
||||
tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
|
||||
_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
|
||||
r = kvm_write_guest_page(kvm, identity_map_pfn,
|
||||
&tmp, i * sizeof(tmp), sizeof(tmp));
|
||||
if (r < 0)
|
||||
goto out;
|
||||
}
|
||||
kvm->arch.ept_identity_pagetable_done = true;
|
||||
ret = 1;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void seg_setup(int seg)
|
||||
{
|
||||
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
||||
@ -1564,6 +1776,31 @@ out:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int alloc_identity_pagetable(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_userspace_memory_region kvm_userspace_mem;
|
||||
int r = 0;
|
||||
|
||||
down_write(&kvm->slots_lock);
|
||||
if (kvm->arch.ept_identity_pagetable)
|
||||
goto out;
|
||||
kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
|
||||
kvm_userspace_mem.flags = 0;
|
||||
kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
|
||||
kvm_userspace_mem.memory_size = PAGE_SIZE;
|
||||
r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
|
||||
VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
out:
|
||||
up_write(&kvm->slots_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void allocate_vpid(struct vcpu_vmx *vmx)
|
||||
{
|
||||
int vpid;
|
||||
@ -1638,6 +1875,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
CPU_BASED_CR8_LOAD_EXITING;
|
||||
#endif
|
||||
}
|
||||
if (!vm_need_ept())
|
||||
exec_control |= CPU_BASED_CR3_STORE_EXITING |
|
||||
CPU_BASED_CR3_LOAD_EXITING;
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls()) {
|
||||
@ -1647,6 +1887,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
||||
if (vmx->vpid == 0)
|
||||
exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
|
||||
if (!vm_need_ept())
|
||||
exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
|
||||
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
||||
}
|
||||
|
||||
@ -1722,6 +1964,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_rmode(struct kvm *kvm)
|
||||
{
|
||||
if (!init_rmode_tss(kvm))
|
||||
return 0;
|
||||
if (!init_rmode_identity_map(kvm))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
@ -1729,7 +1980,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
int ret;
|
||||
|
||||
down_read(&vcpu->kvm->slots_lock);
|
||||
if (!init_rmode_tss(vmx->vcpu.kvm)) {
|
||||
if (!init_rmode(vmx->vcpu.kvm)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@ -1994,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
|
||||
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
||||
if (is_page_fault(intr_info)) {
|
||||
/* EPT won't cause page fault directly */
|
||||
if (vm_need_ept())
|
||||
BUG();
|
||||
cr2 = vmcs_readl(EXIT_QUALIFICATION);
|
||||
KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
|
||||
(u32)((u64)cr2 >> 32), handler);
|
||||
@ -2323,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
return kvm_task_switch(vcpu, tss_selector, reason);
|
||||
}
|
||||
|
||||
static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
{
|
||||
u64 exit_qualification;
|
||||
enum emulation_result er;
|
||||
gpa_t gpa;
|
||||
unsigned long hva;
|
||||
int gla_validity;
|
||||
int r;
|
||||
|
||||
exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
|
||||
|
||||
if (exit_qualification & (1 << 6)) {
|
||||
printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
gla_validity = (exit_qualification >> 7) & 0x3;
|
||||
if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
|
||||
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
|
||||
printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
|
||||
(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
|
||||
(long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
|
||||
printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
|
||||
(long unsigned int)exit_qualification);
|
||||
kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
kvm_run->hw.hardware_exit_reason = 0;
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
|
||||
hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
|
||||
if (!kvm_is_error_hva(hva)) {
|
||||
r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
|
||||
if (r < 0) {
|
||||
printk(KERN_ERR "EPT: Not enough memory!\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 1;
|
||||
} else {
|
||||
/* must be MMIO */
|
||||
er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
|
||||
|
||||
if (er == EMULATE_FAIL) {
|
||||
printk(KERN_ERR
|
||||
"EPT: Fail to handle EPT violation vmexit!er is %d\n",
|
||||
er);
|
||||
printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
|
||||
(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
|
||||
(long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
|
||||
printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
|
||||
(long unsigned int)exit_qualification);
|
||||
return -ENOTSUPP;
|
||||
} else if (er == EMULATE_DO_MMIO)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exit handlers return 1 if the exit was handled fully and guest execution
|
||||
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
||||
@ -2346,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
|
||||
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
|
||||
[EXIT_REASON_WBINVD] = handle_wbinvd,
|
||||
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
|
||||
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
|
||||
};
|
||||
|
||||
static const int kvm_vmx_max_exit_handlers =
|
||||
@ -2364,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
|
||||
(u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
|
||||
|
||||
/* Access CR3 don't cause VMExit in paging mode, so we need
|
||||
* to sync with guest real CR3. */
|
||||
if (vm_need_ept() && is_paging(vcpu)) {
|
||||
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
|
||||
ept_load_pdptrs(vcpu);
|
||||
}
|
||||
|
||||
if (unlikely(vmx->fail)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
|
||||
kvm_run->fail_entry.hardware_entry_failure_reason
|
||||
@ -2372,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
||||
exit_reason != EXIT_REASON_EXCEPTION_NMI)
|
||||
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
|
||||
exit_reason != EXIT_REASON_EPT_VIOLATION))
|
||||
printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
|
||||
"exit reason is 0x%x\n", __func__, exit_reason);
|
||||
if (exit_reason < kvm_vmx_max_exit_handlers
|
||||
@ -2674,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
allocate_vpid(vmx);
|
||||
if (id == 0 && vm_need_ept()) {
|
||||
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
|
||||
VMX_EPT_WRITABLE_MASK |
|
||||
VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
|
||||
kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
|
||||
VMX_EPT_FAKE_DIRTY_MASK, 0ull,
|
||||
VMX_EPT_EXECUTABLE_MASK);
|
||||
kvm_enable_tdp();
|
||||
}
|
||||
|
||||
err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
|
||||
if (err)
|
||||
@ -2706,6 +3036,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
if (alloc_apic_access_page(kvm) != 0)
|
||||
goto free_vmcs;
|
||||
|
||||
if (vm_need_ept())
|
||||
if (alloc_identity_pagetable(kvm) != 0)
|
||||
goto free_vmcs;
|
||||
|
||||
return &vmx->vcpu;
|
||||
|
||||
free_vmcs:
|
||||
@ -2735,6 +3069,11 @@ static void __init vmx_check_processor_compat(void *rtn)
|
||||
}
|
||||
}
|
||||
|
||||
static int get_ept_level(void)
|
||||
{
|
||||
return VMX_EPT_DEFAULT_GAW + 1;
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.cpu_has_kvm_support = cpu_has_kvm_support,
|
||||
.disabled_by_bios = vmx_disabled_by_bios,
|
||||
@ -2791,6 +3130,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.inject_pending_vectors = do_interrupt_requests,
|
||||
|
||||
.set_tss_addr = vmx_set_tss_addr,
|
||||
.get_tdp_level = get_ept_level,
|
||||
};
|
||||
|
||||
static int __init vmx_init(void)
|
||||
@ -2843,9 +3183,14 @@ static int __init vmx_init(void)
|
||||
vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
|
||||
vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
|
||||
|
||||
if (cpu_has_vmx_ept())
|
||||
bypass_guest_pf = 0;
|
||||
|
||||
if (bypass_guest_pf)
|
||||
kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
|
||||
|
||||
ept_sync_global();
|
||||
|
||||
return 0;
|
||||
|
||||
out2:
|
||||
|
@ -35,6 +35,8 @@
|
||||
#define CPU_BASED_MWAIT_EXITING 0x00000400
|
||||
#define CPU_BASED_RDPMC_EXITING 0x00000800
|
||||
#define CPU_BASED_RDTSC_EXITING 0x00001000
|
||||
#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
|
||||
#define CPU_BASED_CR3_STORE_EXITING 0x00010000
|
||||
#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
|
||||
#define CPU_BASED_CR8_STORE_EXITING 0x00100000
|
||||
#define CPU_BASED_TPR_SHADOW 0x00200000
|
||||
@ -49,6 +51,7 @@
|
||||
* Definitions of Secondary Processor-Based VM-Execution Controls.
|
||||
*/
|
||||
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
|
||||
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
|
||||
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
|
||||
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
|
||||
|
||||
@ -100,10 +103,22 @@ enum vmcs_field {
|
||||
VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
|
||||
APIC_ACCESS_ADDR = 0x00002014,
|
||||
APIC_ACCESS_ADDR_HIGH = 0x00002015,
|
||||
EPT_POINTER = 0x0000201a,
|
||||
EPT_POINTER_HIGH = 0x0000201b,
|
||||
GUEST_PHYSICAL_ADDRESS = 0x00002400,
|
||||
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
|
||||
VMCS_LINK_POINTER = 0x00002800,
|
||||
VMCS_LINK_POINTER_HIGH = 0x00002801,
|
||||
GUEST_IA32_DEBUGCTL = 0x00002802,
|
||||
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
|
||||
GUEST_PDPTR0 = 0x0000280a,
|
||||
GUEST_PDPTR0_HIGH = 0x0000280b,
|
||||
GUEST_PDPTR1 = 0x0000280c,
|
||||
GUEST_PDPTR1_HIGH = 0x0000280d,
|
||||
GUEST_PDPTR2 = 0x0000280e,
|
||||
GUEST_PDPTR2_HIGH = 0x0000280f,
|
||||
GUEST_PDPTR3 = 0x00002810,
|
||||
GUEST_PDPTR3_HIGH = 0x00002811,
|
||||
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
|
||||
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
|
||||
EXCEPTION_BITMAP = 0x00004004,
|
||||
@ -226,6 +241,8 @@ enum vmcs_field {
|
||||
#define EXIT_REASON_MWAIT_INSTRUCTION 36
|
||||
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
|
||||
#define EXIT_REASON_APIC_ACCESS 44
|
||||
#define EXIT_REASON_EPT_VIOLATION 48
|
||||
#define EXIT_REASON_EPT_MISCONFIG 49
|
||||
#define EXIT_REASON_WBINVD 54
|
||||
|
||||
/*
|
||||
@ -316,15 +333,36 @@ enum vmcs_field {
|
||||
#define MSR_IA32_VMX_CR4_FIXED1 0x489
|
||||
#define MSR_IA32_VMX_VMCS_ENUM 0x48a
|
||||
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b
|
||||
#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c
|
||||
|
||||
#define MSR_IA32_FEATURE_CONTROL 0x3a
|
||||
#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
|
||||
#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
|
||||
|
||||
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
|
||||
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
|
||||
|
||||
#define VMX_NR_VPIDS (1 << 16)
|
||||
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
|
||||
#define VMX_VPID_EXTENT_ALL_CONTEXT 2
|
||||
|
||||
#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
|
||||
#define VMX_EPT_EXTENT_CONTEXT 1
|
||||
#define VMX_EPT_EXTENT_GLOBAL 2
|
||||
#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
|
||||
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
|
||||
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
|
||||
#define VMX_EPT_DEFAULT_GAW 3
|
||||
#define VMX_EPT_MAX_GAW 0x4
|
||||
#define VMX_EPT_MT_EPTE_SHIFT 3
|
||||
#define VMX_EPT_GAW_EPTP_SHIFT 3
|
||||
#define VMX_EPT_DEFAULT_MT 0x6ull
|
||||
#define VMX_EPT_READABLE_MASK 0x1ull
|
||||
#define VMX_EPT_WRITABLE_MASK 0x2ull
|
||||
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
|
||||
#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62)
|
||||
#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63)
|
||||
|
||||
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
|
||||
|
||||
#endif
|
||||
|
@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque)
|
||||
|
||||
kvm_x86_ops = ops;
|
||||
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
|
||||
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
|
||||
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
|
||||
PT_DIRTY_MASK, PT64_NX_MASK, 0);
|
||||
return 0;
|
||||
|
||||
out:
|
||||
@ -3019,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
|
||||
kvm_x86_ops->decache_regs(vcpu);
|
||||
|
||||
vcpu->arch.exception.pending = false;
|
||||
|
||||
vcpu_put(vcpu);
|
||||
|
||||
return 0;
|
||||
@ -3481,7 +3486,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
|
||||
}
|
||||
|
||||
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
|
||||
cseg_desc.type &= ~(1 << 8); //clear the B flag
|
||||
cseg_desc.type &= ~(1 << 1); //clear the B flag
|
||||
save_guest_segment_descriptor(vcpu, tr_seg.selector,
|
||||
&cseg_desc);
|
||||
}
|
||||
@ -3507,7 +3512,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
|
||||
}
|
||||
|
||||
if (reason != TASK_SWITCH_IRET) {
|
||||
nseg_desc.type |= (1 << 8);
|
||||
nseg_desc.type |= (1 << 1);
|
||||
save_guest_segment_descriptor(vcpu, tss_selector,
|
||||
&nseg_desc);
|
||||
}
|
||||
@ -3698,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned after_mxcsr_mask;
|
||||
|
||||
/*
|
||||
* Touch the fpu the first time in non atomic context as if
|
||||
* this is the first fpu instruction the exception handler
|
||||
* will fire before the instruction returns and it'll have to
|
||||
* allocate ram with GFP_KERNEL.
|
||||
*/
|
||||
if (!used_math())
|
||||
fx_save(&vcpu->arch.host_fx_image);
|
||||
|
||||
/* Initialize guest FPU by resetting ours and saving into guest's */
|
||||
preempt_disable();
|
||||
fx_save(&vcpu->arch.host_fx_image);
|
||||
fpu_init();
|
||||
fx_finit();
|
||||
fx_save(&vcpu->arch.guest_fx_image);
|
||||
fx_restore(&vcpu->arch.host_fx_image);
|
||||
preempt_enable();
|
||||
@ -3906,6 +3920,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
kvm_free_physmem(kvm);
|
||||
if (kvm->arch.apic_access_page)
|
||||
put_page(kvm->arch.apic_access_page);
|
||||
if (kvm->arch.ept_identity_pagetable)
|
||||
put_page(kvm->arch.ept_identity_pagetable);
|
||||
kfree(kvm);
|
||||
}
|
||||
|
||||
|
@ -1761,6 +1761,7 @@ twobyte_insn:
|
||||
case 6: /* lmsw */
|
||||
realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
|
||||
&ctxt->eflags);
|
||||
c->dst.type = OP_NONE;
|
||||
break;
|
||||
case 7: /* invlpg*/
|
||||
emulate_invlpg(ctxt->vcpu, memop);
|
||||
|
@ -476,29 +476,3 @@ int memory_add_physaddr_to_nid(u64 addr)
|
||||
|
||||
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_HAVE_ARCH_PARSE_SRAT
|
||||
/*
|
||||
* XXX FIXME: Make SLIT table parsing available to 32-bit NUMA
|
||||
*
|
||||
* These stub functions are needed to compile 32-bit NUMA when SRAT is
|
||||
* not set. There are functions in srat_64.c for parsing this table
|
||||
* and it may be possible to make them common functions.
|
||||
*/
|
||||
void acpi_numa_slit_init (struct acpi_table_slit *slit)
|
||||
{
|
||||
printk(KERN_INFO "ACPI: No support for parsing SLIT table\n");
|
||||
}
|
||||
|
||||
void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa)
|
||||
{
|
||||
}
|
||||
|
||||
void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma)
|
||||
{
|
||||
}
|
||||
|
||||
void acpi_numa_arch_fixup(void)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */
|
||||
|
@ -172,10 +172,3 @@ void reserve_top_address(unsigned long reserve)
|
||||
__FIXADDR_TOP = -reserve - PAGE_SIZE;
|
||||
__VMALLOC_RESERVE += reserve;
|
||||
}
|
||||
|
||||
int pmd_bad(pmd_t pmd)
|
||||
{
|
||||
WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
|
||||
|
||||
return pmd_bad_v1(pmd);
|
||||
}
|
||||
|
@ -6,11 +6,19 @@ obj-$(CONFIG_PCI_DIRECT) += direct.o
|
||||
obj-$(CONFIG_PCI_OLPC) += olpc.o
|
||||
|
||||
pci-y := fixup.o
|
||||
|
||||
# Do not change the ordering here. There is a nasty init function
|
||||
# ordering dependency which breaks when you move acpi.o below
|
||||
# legacy/irq.o
|
||||
pci-$(CONFIG_ACPI) += acpi.o
|
||||
pci-y += legacy.o irq.o
|
||||
|
||||
pci-$(CONFIG_X86_VISWS) += visws.o fixup.o
|
||||
pci-$(CONFIG_X86_NUMAQ) += numa.o irq.o
|
||||
# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are
|
||||
# therefor correct. This needs a proper fix by distangling the code.
|
||||
pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
|
||||
pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o
|
||||
|
||||
# Necessary for NUMAQ as well
|
||||
pci-$(CONFIG_NUMA) += mp_bus_to_node.o
|
||||
|
||||
obj-y += $(pci-y) common.o early.o
|
||||
|
@ -6,45 +6,6 @@
|
||||
#include <asm/numa.h>
|
||||
#include "pci.h"
|
||||
|
||||
static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
|
||||
{
|
||||
pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
|
||||
printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct dmi_system_id acpi_pciprobe_dmi_table[] __devinitdata = {
|
||||
/*
|
||||
* Systems where PCI IO resource ISA alignment can be skipped
|
||||
* when the ISA enable bit in the bridge control is not set
|
||||
*/
|
||||
{
|
||||
.callback = can_skip_ioresource_align,
|
||||
.ident = "IBM System x3800",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = can_skip_ioresource_align,
|
||||
.ident = "IBM System x3850",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "x3850"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = can_skip_ioresource_align,
|
||||
.ident = "IBM System x3950",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "x3950"),
|
||||
},
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
struct pci_root_info {
|
||||
char *name;
|
||||
unsigned int res_num;
|
||||
@ -196,8 +157,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
|
||||
int pxm;
|
||||
#endif
|
||||
|
||||
dmi_check_system(acpi_pciprobe_dmi_table);
|
||||
|
||||
if (domain && !pci_domains_supported) {
|
||||
printk(KERN_WARNING "PCI: Multiple domains not supported "
|
||||
"(dom %d, bus %d)\n", domain, busnum);
|
||||
|
@ -77,17 +77,48 @@ int pcibios_scanned;
|
||||
*/
|
||||
DEFINE_SPINLOCK(pci_config_lock);
|
||||
|
||||
static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
|
||||
static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
|
||||
{
|
||||
struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
|
||||
pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
|
||||
printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (rom_r->parent)
|
||||
return;
|
||||
if (rom_r->start)
|
||||
/* we deal with BIOS assigned ROM later */
|
||||
return;
|
||||
if (!(pci_probe & PCI_ASSIGN_ROMS))
|
||||
rom_r->start = rom_r->end = rom_r->flags = 0;
|
||||
static struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitdata = {
|
||||
/*
|
||||
* Systems where PCI IO resource ISA alignment can be skipped
|
||||
* when the ISA enable bit in the bridge control is not set
|
||||
*/
|
||||
{
|
||||
.callback = can_skip_ioresource_align,
|
||||
.ident = "IBM System x3800",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = can_skip_ioresource_align,
|
||||
.ident = "IBM System x3850",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "x3850"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = can_skip_ioresource_align,
|
||||
.ident = "IBM System x3950",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "x3950"),
|
||||
},
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
void __init dmi_check_skip_isa_align(void)
|
||||
{
|
||||
dmi_check_system(can_skip_pciprobe_dmi_table);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -97,11 +128,7 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
|
||||
|
||||
void __devinit pcibios_fixup_bus(struct pci_bus *b)
|
||||
{
|
||||
struct pci_dev *dev;
|
||||
|
||||
pci_read_bridge_bases(b);
|
||||
list_for_each_entry(dev, &b->devices, bus_list)
|
||||
pcibios_fixup_device_resources(dev);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -318,13 +345,16 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
|
||||
{}
|
||||
};
|
||||
|
||||
void __init dmi_check_pciprobe(void)
|
||||
{
|
||||
dmi_check_system(pciprobe_dmi_table);
|
||||
}
|
||||
|
||||
struct pci_bus * __devinit pcibios_scan_root(int busnum)
|
||||
{
|
||||
struct pci_bus *bus = NULL;
|
||||
struct pci_sysdata *sd;
|
||||
|
||||
dmi_check_system(pciprobe_dmi_table);
|
||||
|
||||
while ((bus = pci_find_next_bus(bus)) != NULL) {
|
||||
if (bus->number == busnum) {
|
||||
/* Already scanned */
|
||||
@ -462,6 +492,9 @@ char * __devinit pcibios_setup(char *str)
|
||||
} else if (!strcmp(str, "routeirq")) {
|
||||
pci_routeirq = 1;
|
||||
return NULL;
|
||||
} else if (!strcmp(str, "skip_isa_align")) {
|
||||
pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
|
||||
return NULL;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
@ -489,7 +522,7 @@ void pcibios_disable_device (struct pci_dev *dev)
|
||||
pcibios_disable_irq(dev);
|
||||
}
|
||||
|
||||
struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
|
||||
struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
|
||||
{
|
||||
struct pci_bus *bus = NULL;
|
||||
struct pci_sysdata *sd;
|
||||
@ -512,7 +545,7 @@ struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
|
||||
return bus;
|
||||
}
|
||||
|
||||
struct pci_bus *pci_scan_bus_with_sysdata(int busno)
|
||||
struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno)
|
||||
{
|
||||
return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
|
||||
}
|
||||
|
@ -502,7 +502,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
|
||||
*/
|
||||
static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
|
||||
{
|
||||
dev->cfg_size = pci_cfg_space_size_ext(dev, 0);
|
||||
dev->cfg_size = pci_cfg_space_size_ext(dev);
|
||||
}
|
||||
|
||||
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size);
|
||||
|
@ -33,6 +33,10 @@ static __init int pci_access_init(void)
|
||||
printk(KERN_ERR
|
||||
"PCI: Fatal: No config space access function found\n");
|
||||
|
||||
dmi_check_pciprobe();
|
||||
|
||||
dmi_check_skip_isa_align();
|
||||
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(pci_access_init);
|
||||
|
@ -38,6 +38,9 @@ enum pci_bf_sort_state {
|
||||
pci_dmi_bf,
|
||||
};
|
||||
|
||||
extern void __init dmi_check_pciprobe(void);
|
||||
extern void __init dmi_check_skip_isa_align(void);
|
||||
|
||||
/* pci-i386.c */
|
||||
|
||||
extern unsigned int pcibios_max_latency;
|
||||
|
@ -162,7 +162,7 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
|
||||
Elf32_Shdr *shdr;
|
||||
int i;
|
||||
|
||||
BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
|
||||
BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
|
||||
!elf_check_arch_ia32(ehdr) ||
|
||||
ehdr->e_type != ET_DYN);
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com>
|
||||
*
|
||||
* This file is subject to the terms and conditions of the GNU General Public
|
||||
@ -29,3 +28,4 @@ int fb_is_primary_device(struct fb_info *info)
|
||||
return retval;
|
||||
}
|
||||
EXPORT_SYMBOL(fb_is_primary_device);
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -54,15 +54,16 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
|
||||
|
||||
static void drive_stat_acct(struct request *rq, int new_io)
|
||||
{
|
||||
struct hd_struct *part;
|
||||
int rw = rq_data_dir(rq);
|
||||
|
||||
if (!blk_fs_request(rq) || !rq->rq_disk)
|
||||
return;
|
||||
|
||||
if (!new_io) {
|
||||
__all_stat_inc(rq->rq_disk, merges[rw], rq->sector);
|
||||
} else {
|
||||
struct hd_struct *part = get_part(rq->rq_disk, rq->sector);
|
||||
part = get_part(rq->rq_disk, rq->sector);
|
||||
if (!new_io)
|
||||
__all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
|
||||
else {
|
||||
disk_round_stats(rq->rq_disk);
|
||||
rq->rq_disk->in_flight++;
|
||||
if (part) {
|
||||
@ -253,9 +254,11 @@ EXPORT_SYMBOL(__generic_unplug_device);
|
||||
**/
|
||||
void generic_unplug_device(struct request_queue *q)
|
||||
{
|
||||
spin_lock_irq(q->queue_lock);
|
||||
__generic_unplug_device(q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
if (blk_queue_plugged(q)) {
|
||||
spin_lock_irq(q->queue_lock);
|
||||
__generic_unplug_device(q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(generic_unplug_device);
|
||||
|
||||
@ -1536,10 +1539,11 @@ static int __end_that_request_first(struct request *req, int error,
|
||||
}
|
||||
|
||||
if (blk_fs_request(req) && req->rq_disk) {
|
||||
struct hd_struct *part = get_part(req->rq_disk, req->sector);
|
||||
const int rw = rq_data_dir(req);
|
||||
|
||||
all_stat_add(req->rq_disk, sectors[rw],
|
||||
nr_bytes >> 9, req->sector);
|
||||
all_stat_add(req->rq_disk, part, sectors[rw],
|
||||
nr_bytes >> 9, req->sector);
|
||||
}
|
||||
|
||||
total_bytes = bio_nbytes = 0;
|
||||
@ -1725,8 +1729,8 @@ static void end_that_request_last(struct request *req, int error)
|
||||
const int rw = rq_data_dir(req);
|
||||
struct hd_struct *part = get_part(disk, req->sector);
|
||||
|
||||
__all_stat_inc(disk, ios[rw], req->sector);
|
||||
__all_stat_add(disk, ticks[rw], duration, req->sector);
|
||||
__all_stat_inc(disk, part, ios[rw], req->sector);
|
||||
__all_stat_add(disk, part, ticks[rw], duration, req->sector);
|
||||
disk_round_stats(disk);
|
||||
disk->in_flight--;
|
||||
if (part) {
|
||||
|
@ -41,8 +41,8 @@ int put_io_context(struct io_context *ioc)
|
||||
rcu_read_lock();
|
||||
if (ioc->aic && ioc->aic->dtor)
|
||||
ioc->aic->dtor(ioc->aic);
|
||||
rcu_read_unlock();
|
||||
cfq_dtor(ioc);
|
||||
rcu_read_unlock();
|
||||
|
||||
kmem_cache_free(iocontext_cachep, ioc);
|
||||
return 1;
|
||||
|
@ -149,9 +149,9 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
|
||||
static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
|
||||
struct bio *nxt)
|
||||
{
|
||||
if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
|
||||
if (!bio_flagged(bio, BIO_SEG_VALID))
|
||||
blk_recount_segments(q, bio);
|
||||
if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
|
||||
if (!bio_flagged(nxt, BIO_SEG_VALID))
|
||||
blk_recount_segments(q, nxt);
|
||||
if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
|
||||
BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
|
||||
@ -312,9 +312,9 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
|
||||
q->last_merge = NULL;
|
||||
return 0;
|
||||
}
|
||||
if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
|
||||
if (!bio_flagged(req->biotail, BIO_SEG_VALID))
|
||||
blk_recount_segments(q, req->biotail);
|
||||
if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
|
||||
if (!bio_flagged(bio, BIO_SEG_VALID))
|
||||
blk_recount_segments(q, bio);
|
||||
len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
|
||||
if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
|
||||
@ -352,9 +352,9 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
|
||||
return 0;
|
||||
}
|
||||
len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
|
||||
if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
|
||||
if (!bio_flagged(bio, BIO_SEG_VALID))
|
||||
blk_recount_segments(q, bio);
|
||||
if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
|
||||
if (!bio_flagged(req->bio, BIO_SEG_VALID))
|
||||
blk_recount_segments(q, req->bio);
|
||||
if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
|
||||
!BIOVEC_VIRT_OVERSIZE(len)) {
|
||||
|
@ -146,11 +146,13 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
|
||||
unsigned long nm;
|
||||
ssize_t ret = queue_var_store(&nm, page, count);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (nm)
|
||||
set_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags);
|
||||
queue_flag_set(QUEUE_FLAG_NOMERGES, q);
|
||||
else
|
||||
clear_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags);
|
||||
queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
|
||||
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -70,7 +70,7 @@ void __blk_queue_free_tags(struct request_queue *q)
|
||||
__blk_free_tags(bqt);
|
||||
|
||||
q->queue_tags = NULL;
|
||||
queue_flag_clear(QUEUE_FLAG_QUEUED, q);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -98,7 +98,7 @@ EXPORT_SYMBOL(blk_free_tags);
|
||||
**/
|
||||
void blk_queue_free_tags(struct request_queue *q)
|
||||
{
|
||||
queue_flag_clear(QUEUE_FLAG_QUEUED, q);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_free_tags);
|
||||
|
||||
@ -171,6 +171,9 @@ EXPORT_SYMBOL(blk_init_tags);
|
||||
* @q: the request queue for the device
|
||||
* @depth: the maximum queue depth supported
|
||||
* @tags: the tag to use
|
||||
*
|
||||
* Queue lock must be held here if the function is called to resize an
|
||||
* existing map.
|
||||
**/
|
||||
int blk_queue_init_tags(struct request_queue *q, int depth,
|
||||
struct blk_queue_tag *tags)
|
||||
@ -197,7 +200,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
|
||||
* assign it, all done
|
||||
*/
|
||||
q->queue_tags = tags;
|
||||
queue_flag_set(QUEUE_FLAG_QUEUED, q);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
|
||||
INIT_LIST_HEAD(&q->tag_busy_list);
|
||||
return 0;
|
||||
fail:
|
||||
|
@ -1142,6 +1142,17 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
|
||||
kmem_cache_free(cfq_pool, cfqq);
|
||||
}
|
||||
|
||||
static void
|
||||
__call_for_each_cic(struct io_context *ioc,
|
||||
void (*func)(struct io_context *, struct cfq_io_context *))
|
||||
{
|
||||
struct cfq_io_context *cic;
|
||||
struct hlist_node *n;
|
||||
|
||||
hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
|
||||
func(ioc, cic);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call func for each cic attached to this ioc.
|
||||
*/
|
||||
@ -1149,12 +1160,8 @@ static void
|
||||
call_for_each_cic(struct io_context *ioc,
|
||||
void (*func)(struct io_context *, struct cfq_io_context *))
|
||||
{
|
||||
struct cfq_io_context *cic;
|
||||
struct hlist_node *n;
|
||||
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
|
||||
func(ioc, cic);
|
||||
__call_for_each_cic(ioc, func);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@ -1198,7 +1205,7 @@ static void cfq_free_io_context(struct io_context *ioc)
|
||||
* should be ok to iterate over the known list, we will see all cic's
|
||||
* since no new ones are added.
|
||||
*/
|
||||
call_for_each_cic(ioc, cic_free_func);
|
||||
__call_for_each_cic(ioc, cic_free_func);
|
||||
}
|
||||
|
||||
static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
|
||||
@ -1296,10 +1303,10 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
|
||||
printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
|
||||
case IOPRIO_CLASS_NONE:
|
||||
/*
|
||||
* no prio set, place us in the middle of the BE classes
|
||||
* no prio set, inherit CPU scheduling settings
|
||||
*/
|
||||
cfqq->ioprio = task_nice_ioprio(tsk);
|
||||
cfqq->ioprio_class = IOPRIO_CLASS_BE;
|
||||
cfqq->ioprio_class = task_nice_ioclass(tsk);
|
||||
break;
|
||||
case IOPRIO_CLASS_RT:
|
||||
cfqq->ioprio = task_ioprio(ioc);
|
||||
|
@ -1,7 +1,17 @@
|
||||
menuconfig ACCESSIBILITY
|
||||
bool "Accessibility support"
|
||||
---help---
|
||||
Enable a submenu where accessibility items may be enabled.
|
||||
Accessibility handles all special kinds of hardware devices or
|
||||
software adapters which help people with disabilities (e.g.
|
||||
blindness) to use computers.
|
||||
|
||||
That includes braille devices, speech synthesis, keyboard
|
||||
remapping, etc.
|
||||
|
||||
Say Y here to get to see options for accessibility.
|
||||
This option alone does not add any kernel code.
|
||||
|
||||
If you say N, all options in this submenu will be skipped and disabled.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
|
@ -205,8 +205,8 @@ config SATA_VITESSE
|
||||
If unsure, say N.
|
||||
|
||||
config SATA_INIC162X
|
||||
tristate "Initio 162x SATA support (HIGHLY EXPERIMENTAL)"
|
||||
depends on PCI && EXPERIMENTAL
|
||||
tristate "Initio 162x SATA support"
|
||||
depends on PCI
|
||||
help
|
||||
This option enables support for Initio 162x Serial ATA.
|
||||
|
||||
@ -697,6 +697,15 @@ config PATA_SCC
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config PATA_SCH
|
||||
tristate "Intel SCH PATA support"
|
||||
depends on PCI
|
||||
help
|
||||
This option enables support for Intel SCH PATA on the Intel
|
||||
SCH (US15W, US15L, UL11L) series host controllers.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config PATA_BF54X
|
||||
tristate "Blackfin 54x ATAPI support"
|
||||
depends on BF542 || BF548 || BF549
|
||||
|
@ -67,6 +67,7 @@ obj-$(CONFIG_PATA_SIS) += pata_sis.o
|
||||
obj-$(CONFIG_PATA_TRIFLEX) += pata_triflex.o
|
||||
obj-$(CONFIG_PATA_IXP4XX_CF) += pata_ixp4xx_cf.o
|
||||
obj-$(CONFIG_PATA_SCC) += pata_scc.o
|
||||
obj-$(CONFIG_PATA_SCH) += pata_sch.o
|
||||
obj-$(CONFIG_PATA_BF54X) += pata_bf54x.o
|
||||
obj-$(CONFIG_PATA_PLATFORM) += pata_platform.o
|
||||
obj-$(CONFIG_PATA_OF_PLATFORM) += pata_of_platform.o
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user