mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-07 13:53:24 +00:00
Merge branch 'kvm-updates/3.4' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Avi Kivity: "Changes include timekeeping improvements, support for assigning host PCI devices that share interrupt lines, s390 user-controlled guests, a large ppc update, and random fixes." This is with the sign-off's fixed, hopefully next merge window we won't have rebased commits. * 'kvm-updates/3.4' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (130 commits) KVM: Convert intx_mask_lock to spin lock KVM: x86: fix kvm_write_tsc() TSC matching thinko x86: kvmclock: abstract save/restore sched_clock_state KVM: nVMX: Fix erroneous exception bitmap check KVM: Ignore the writes to MSR_K7_HWCR(3) KVM: MMU: make use of ->root_level in reset_rsvds_bits_mask KVM: PMU: add proper support for fixed counter 2 KVM: PMU: Fix raw event check KVM: PMU: warn when pin control is set in eventsel msr KVM: VMX: Fix delayed load of shared MSRs KVM: use correct tlbs dirty type in cmpxchg KVM: Allow host IRQ sharing for assigned PCI 2.3 devices KVM: Ensure all vcpus are consistent with in-kernel irqchip settings KVM: x86 emulator: Allow PM/VM86 switch during task switch KVM: SVM: Fix CPL updates KVM: x86 emulator: VM86 segments must have DPL 3 KVM: x86 emulator: Fix task switch privilege checks arch/powerpc/kvm/book3s_hv.c: included linux/sched.h twice KVM: x86 emulator: correctly mask pmc index bits in RDPMC instruction emulation KVM: mmu_notifier: Flush TLBs before releasing mmu_lock ...
This commit is contained in:
commit
2e7580b0e7
@ -95,7 +95,7 @@ described as 'basic' will be available.
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: system ioctl
|
||||
Parameters: none
|
||||
Parameters: machine type identifier (KVM_VM_*)
|
||||
Returns: a VM fd that can be used to control the new virtual machine.
|
||||
|
||||
The new VM has no virtual cpus and no memory. An mmap() of a VM fd
|
||||
@ -103,6 +103,11 @@ will access the virtual machine's physical address space; offset zero
|
||||
corresponds to guest physical address zero. Use of mmap() on a VM fd
|
||||
is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is
|
||||
available.
|
||||
You most certainly want to use 0 as machine type.
|
||||
|
||||
In order to create user controlled virtual machines on S390, check
|
||||
KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
|
||||
privileged user (CAP_SYS_ADMIN).
|
||||
|
||||
4.3 KVM_GET_MSR_INDEX_LIST
|
||||
|
||||
@ -213,6 +218,11 @@ allocation of vcpu ids. For example, if userspace wants
|
||||
single-threaded guest vcpus, it should make all vcpu ids be a multiple
|
||||
of the number of vcpus per vcore.
|
||||
|
||||
For virtual cpus that have been created with S390 user controlled virtual
|
||||
machines, the resulting vcpu fd can be memory mapped at page offset
|
||||
KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
|
||||
cpu's hardware control block.
|
||||
|
||||
4.8 KVM_GET_DIRTY_LOG (vm ioctl)
|
||||
|
||||
Capability: basic
|
||||
@ -1159,6 +1169,14 @@ following flags are specified:
|
||||
|
||||
/* Depends on KVM_CAP_IOMMU */
|
||||
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
|
||||
/* The following two depend on KVM_CAP_PCI_2_3 */
|
||||
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
|
||||
#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
|
||||
|
||||
If KVM_DEV_ASSIGN_PCI_2_3 is set, the kernel will manage legacy INTx interrupts
|
||||
via the PCI-2.3-compliant device-level mask, thus enable IRQ sharing with other
|
||||
assigned devices or host devices. KVM_DEV_ASSIGN_MASK_INTX specifies the
|
||||
guest's view on the INTx mask, see KVM_ASSIGN_SET_INTX_MASK for details.
|
||||
|
||||
The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure
|
||||
isolation of the device. Usages not specifying this flag are deprecated.
|
||||
@ -1399,6 +1417,71 @@ The following flags are defined:
|
||||
If datamatch flag is set, the event will be signaled only if the written value
|
||||
to the registered address is equal to datamatch in struct kvm_ioeventfd.
|
||||
|
||||
4.59 KVM_DIRTY_TLB
|
||||
|
||||
Capability: KVM_CAP_SW_TLB
|
||||
Architectures: ppc
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_dirty_tlb (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
struct kvm_dirty_tlb {
|
||||
__u64 bitmap;
|
||||
__u32 num_dirty;
|
||||
};
|
||||
|
||||
This must be called whenever userspace has changed an entry in the shared
|
||||
TLB, prior to calling KVM_RUN on the associated vcpu.
|
||||
|
||||
The "bitmap" field is the userspace address of an array. This array
|
||||
consists of a number of bits, equal to the total number of TLB entries as
|
||||
determined by the last successful call to KVM_CONFIG_TLB, rounded up to the
|
||||
nearest multiple of 64.
|
||||
|
||||
Each bit corresponds to one TLB entry, ordered the same as in the shared TLB
|
||||
array.
|
||||
|
||||
The array is little-endian: the bit 0 is the least significant bit of the
|
||||
first byte, bit 8 is the least significant bit of the second byte, etc.
|
||||
This avoids any complications with differing word sizes.
|
||||
|
||||
The "num_dirty" field is a performance hint for KVM to determine whether it
|
||||
should skip processing the bitmap and just invalidate everything. It must
|
||||
be set to the number of set bits in the bitmap.
|
||||
|
||||
4.60 KVM_ASSIGN_SET_INTX_MASK
|
||||
|
||||
Capability: KVM_CAP_PCI_2_3
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_assigned_pci_dev (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Allows userspace to mask PCI INTx interrupts from the assigned device. The
|
||||
kernel will not deliver INTx interrupts to the guest between setting and
|
||||
clearing of KVM_ASSIGN_SET_INTX_MASK via this interface. This enables use of
|
||||
and emulation of PCI 2.3 INTx disable command register behavior.
|
||||
|
||||
This may be used for both PCI 2.3 devices supporting INTx disable natively and
|
||||
older devices lacking this support. Userspace is responsible for emulating the
|
||||
read value of the INTx disable bit in the guest visible PCI command register.
|
||||
When modifying the INTx disable state, userspace should precede updating the
|
||||
physical device command register by calling this ioctl to inform the kernel of
|
||||
the new intended INTx mask state.
|
||||
|
||||
Note that the kernel uses the device INTx disable bit to internally manage the
|
||||
device interrupt state for PCI 2.3 devices. Reads of this register may
|
||||
therefore not match the expected value. Writes should always use the guest
|
||||
intended INTx disable value rather than attempting to read-copy-update the
|
||||
current physical device state. Races between user and kernel updates to the
|
||||
INTx disable bit are handled lazily in the kernel. It's possible the device
|
||||
may generate unintended interrupts, but they will not be injected into the
|
||||
guest.
|
||||
|
||||
See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
|
||||
by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is
|
||||
evaluated.
|
||||
|
||||
4.62 KVM_CREATE_SPAPR_TCE
|
||||
|
||||
Capability: KVM_CAP_SPAPR_TCE
|
||||
@ -1491,6 +1574,101 @@ following algorithm:
|
||||
Some guests configure the LINT1 NMI input to cause a panic, aiding in
|
||||
debugging.
|
||||
|
||||
4.65 KVM_S390_UCAS_MAP
|
||||
|
||||
Capability: KVM_CAP_S390_UCONTROL
|
||||
Architectures: s390
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_s390_ucas_mapping (in)
|
||||
Returns: 0 in case of success
|
||||
|
||||
The parameter is defined like this:
|
||||
struct kvm_s390_ucas_mapping {
|
||||
__u64 user_addr;
|
||||
__u64 vcpu_addr;
|
||||
__u64 length;
|
||||
};
|
||||
|
||||
This ioctl maps the memory at "user_addr" with the length "length" to
|
||||
the vcpu's address space starting at "vcpu_addr". All parameters need to
|
||||
be alligned by 1 megabyte.
|
||||
|
||||
4.66 KVM_S390_UCAS_UNMAP
|
||||
|
||||
Capability: KVM_CAP_S390_UCONTROL
|
||||
Architectures: s390
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_s390_ucas_mapping (in)
|
||||
Returns: 0 in case of success
|
||||
|
||||
The parameter is defined like this:
|
||||
struct kvm_s390_ucas_mapping {
|
||||
__u64 user_addr;
|
||||
__u64 vcpu_addr;
|
||||
__u64 length;
|
||||
};
|
||||
|
||||
This ioctl unmaps the memory in the vcpu's address space starting at
|
||||
"vcpu_addr" with the length "length". The field "user_addr" is ignored.
|
||||
All parameters need to be alligned by 1 megabyte.
|
||||
|
||||
4.67 KVM_S390_VCPU_FAULT
|
||||
|
||||
Capability: KVM_CAP_S390_UCONTROL
|
||||
Architectures: s390
|
||||
Type: vcpu ioctl
|
||||
Parameters: vcpu absolute address (in)
|
||||
Returns: 0 in case of success
|
||||
|
||||
This call creates a page table entry on the virtual cpu's address space
|
||||
(for user controlled virtual machines) or the virtual machine's address
|
||||
space (for regular virtual machines). This only works for minor faults,
|
||||
thus it's recommended to access subject memory page via the user page
|
||||
table upfront. This is useful to handle validity intercepts for user
|
||||
controlled virtual machines to fault in the virtual cpu's lowcore pages
|
||||
prior to calling the KVM_RUN ioctl.
|
||||
|
||||
4.68 KVM_SET_ONE_REG
|
||||
|
||||
Capability: KVM_CAP_ONE_REG
|
||||
Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_one_reg (in)
|
||||
Returns: 0 on success, negative value on failure
|
||||
|
||||
struct kvm_one_reg {
|
||||
__u64 id;
|
||||
__u64 addr;
|
||||
};
|
||||
|
||||
Using this ioctl, a single vcpu register can be set to a specific value
|
||||
defined by user space with the passed in struct kvm_one_reg, where id
|
||||
refers to the register identifier as described below and addr is a pointer
|
||||
to a variable with the respective size. There can be architecture agnostic
|
||||
and architecture specific registers. Each have their own range of operation
|
||||
and their own constants and width. To keep track of the implemented
|
||||
registers, find a list below:
|
||||
|
||||
Arch | Register | Width (bits)
|
||||
| |
|
||||
PPC | KVM_REG_PPC_HIOR | 64
|
||||
|
||||
4.69 KVM_GET_ONE_REG
|
||||
|
||||
Capability: KVM_CAP_ONE_REG
|
||||
Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_one_reg (in and out)
|
||||
Returns: 0 on success, negative value on failure
|
||||
|
||||
This ioctl allows to receive the value of a single register implemented
|
||||
in a vcpu. The register to read is indicated by the "id" field of the
|
||||
kvm_one_reg struct passed in. On success, the register value can be found
|
||||
at the memory location pointed to by "addr".
|
||||
|
||||
The list of registers accessible using this interface is identical to the
|
||||
list in 4.64.
|
||||
|
||||
5. The kvm_run structure
|
||||
|
||||
Application code obtains a pointer to the kvm_run structure by
|
||||
@ -1651,6 +1829,20 @@ s390 specific.
|
||||
|
||||
s390 specific.
|
||||
|
||||
/* KVM_EXIT_S390_UCONTROL */
|
||||
struct {
|
||||
__u64 trans_exc_code;
|
||||
__u32 pgm_code;
|
||||
} s390_ucontrol;
|
||||
|
||||
s390 specific. A page fault has occurred for a user controlled virtual
|
||||
machine (KVM_VM_S390_UNCONTROL) on it's host page table that cannot be
|
||||
resolved by the kernel.
|
||||
The program code and the translation exception code that were placed
|
||||
in the cpu's lowcore are presented here as defined by the z Architecture
|
||||
Principles of Operation Book in the Chapter for Dynamic Address Translation
|
||||
(DAT)
|
||||
|
||||
/* KVM_EXIT_DCR */
|
||||
struct {
|
||||
__u32 dcrn;
|
||||
@ -1693,6 +1885,29 @@ developer registration required to access it).
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
|
||||
/*
|
||||
* shared registers between kvm and userspace.
|
||||
* kvm_valid_regs specifies the register classes set by the host
|
||||
* kvm_dirty_regs specified the register classes dirtied by userspace
|
||||
* struct kvm_sync_regs is architecture specific, as well as the
|
||||
* bits for kvm_valid_regs and kvm_dirty_regs
|
||||
*/
|
||||
__u64 kvm_valid_regs;
|
||||
__u64 kvm_dirty_regs;
|
||||
union {
|
||||
struct kvm_sync_regs regs;
|
||||
char padding[1024];
|
||||
} s;
|
||||
|
||||
If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access
|
||||
certain guest registers without having to call SET/GET_*REGS. Thus we can
|
||||
avoid some system call overhead if userspace has to handle the exit.
|
||||
Userspace can query the validity of the structure by checking
|
||||
kvm_valid_regs for specific bits. These bits are architecture specific
|
||||
and usually define the validity of a groups of registers. (e.g. one bit
|
||||
for general purpose registers)
|
||||
|
||||
};
|
||||
|
||||
6. Capabilities that can be enabled
|
||||
@ -1741,3 +1956,45 @@ HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the
|
||||
HTAB invisible to the guest.
|
||||
|
||||
When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur.
|
||||
|
||||
6.3 KVM_CAP_SW_TLB
|
||||
|
||||
Architectures: ppc
|
||||
Parameters: args[0] is the address of a struct kvm_config_tlb
|
||||
Returns: 0 on success; -1 on error
|
||||
|
||||
struct kvm_config_tlb {
|
||||
__u64 params;
|
||||
__u64 array;
|
||||
__u32 mmu_type;
|
||||
__u32 array_len;
|
||||
};
|
||||
|
||||
Configures the virtual CPU's TLB array, establishing a shared memory area
|
||||
between userspace and KVM. The "params" and "array" fields are userspace
|
||||
addresses of mmu-type-specific data structures. The "array_len" field is an
|
||||
safety mechanism, and should be set to the size in bytes of the memory that
|
||||
userspace has reserved for the array. It must be at least the size dictated
|
||||
by "mmu_type" and "params".
|
||||
|
||||
While KVM_RUN is active, the shared region is under control of KVM. Its
|
||||
contents are undefined, and any modification by userspace results in
|
||||
boundedly undefined behavior.
|
||||
|
||||
On return from KVM_RUN, the shared region will reflect the current state of
|
||||
the guest's TLB. If userspace makes any changes, it must call KVM_DIRTY_TLB
|
||||
to tell KVM which entries have been changed, prior to calling KVM_RUN again
|
||||
on this vcpu.
|
||||
|
||||
For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
|
||||
- The "params" field is of type "struct kvm_book3e_206_tlb_params".
|
||||
- The "array" field points to an array of type "struct
|
||||
kvm_book3e_206_tlb_entry".
|
||||
- The array consists of all entries in the first TLB, followed by all
|
||||
entries in the second TLB.
|
||||
- Within a TLB, entries are ordered first by increasing set number. Within a
|
||||
set, entries are ordered by way (increasing ESEL).
|
||||
- The hash for determining set number in TLB0 is: (MAS2 >> 12) & (num_sets - 1)
|
||||
where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
|
||||
- The tsize field of mas1 shall be set to 4K on TLB0, even though the
|
||||
hardware ignores this value for TLB0.
|
||||
|
@ -81,28 +81,8 @@ additional registers to the magic page. If you add fields to the magic page,
|
||||
also define a new hypercall feature to indicate that the host can give you more
|
||||
registers. Only if the host supports the additional features, make use of them.
|
||||
|
||||
The magic page has the following layout as described in
|
||||
arch/powerpc/include/asm/kvm_para.h:
|
||||
|
||||
struct kvm_vcpu_arch_shared {
|
||||
__u64 scratch1;
|
||||
__u64 scratch2;
|
||||
__u64 scratch3;
|
||||
__u64 critical; /* Guest may not get interrupts if == r1 */
|
||||
__u64 sprg0;
|
||||
__u64 sprg1;
|
||||
__u64 sprg2;
|
||||
__u64 sprg3;
|
||||
__u64 srr0;
|
||||
__u64 srr1;
|
||||
__u64 dar;
|
||||
__u64 msr;
|
||||
__u32 dsisr;
|
||||
__u32 int_pending; /* Tells the guest if we have an interrupt */
|
||||
};
|
||||
|
||||
Additions to the page must only occur at the end. Struct fields are always 32
|
||||
or 64 bit aligned, depending on them being 32 or 64 bit wide respectively.
|
||||
The magic page layout is described by struct kvm_vcpu_arch_shared
|
||||
in arch/powerpc/include/asm/kvm_para.h.
|
||||
|
||||
Magic page features
|
||||
===================
|
||||
|
@ -261,4 +261,8 @@ struct kvm_debug_exit_arch {
|
||||
struct kvm_guest_debug_arch {
|
||||
};
|
||||
|
||||
/* definition of registers in kvm_run */
|
||||
struct kvm_sync_regs {
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -459,6 +459,9 @@ struct kvm_sal_data {
|
||||
unsigned long boot_gp;
|
||||
};
|
||||
|
||||
struct kvm_arch_memory_slot {
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
spinlock_t dirty_log_lock;
|
||||
|
||||
|
@ -809,10 +809,13 @@ static void kvm_build_io_pmt(struct kvm *kvm)
|
||||
#define GUEST_PHYSICAL_RR4 0x2739
|
||||
#define VMM_INIT_RR 0x1660
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm)
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
BUG_ON(!kvm);
|
||||
|
||||
if (type)
|
||||
return -EINVAL;
|
||||
|
||||
kvm->arch.is_sn2 = ia64_platform_is("sn2");
|
||||
|
||||
kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
|
||||
@ -1169,6 +1172,11 @@ static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
|
||||
|
||||
#define PALE_RESET_ENTRY 0x80000000ffffffb0UL
|
||||
|
||||
bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return irqchip_in_kernel(vcpu->kcm) == (vcpu->arch.apic != NULL);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu *v;
|
||||
@ -1563,6 +1571,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
|
||||
{
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
}
|
||||
|
||||
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_memory_slot old,
|
||||
|
@ -265,12 +265,9 @@ struct kvm_debug_exit_arch {
|
||||
struct kvm_guest_debug_arch {
|
||||
};
|
||||
|
||||
#define KVM_REG_MASK 0x001f
|
||||
#define KVM_REG_EXT_MASK 0xffe0
|
||||
#define KVM_REG_GPR 0x0000
|
||||
#define KVM_REG_FPR 0x0020
|
||||
#define KVM_REG_QPR 0x0040
|
||||
#define KVM_REG_FQPR 0x0060
|
||||
/* definition of registers in kvm_run */
|
||||
struct kvm_sync_regs {
|
||||
};
|
||||
|
||||
#define KVM_INTERRUPT_SET -1U
|
||||
#define KVM_INTERRUPT_UNSET -2U
|
||||
@ -292,4 +289,41 @@ struct kvm_allocate_rma {
|
||||
__u64 rma_size;
|
||||
};
|
||||
|
||||
struct kvm_book3e_206_tlb_entry {
|
||||
__u32 mas8;
|
||||
__u32 mas1;
|
||||
__u64 mas2;
|
||||
__u64 mas7_3;
|
||||
};
|
||||
|
||||
struct kvm_book3e_206_tlb_params {
|
||||
/*
|
||||
* For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
|
||||
*
|
||||
* - The number of ways of TLB0 must be a power of two between 2 and
|
||||
* 16.
|
||||
* - TLB1 must be fully associative.
|
||||
* - The size of TLB0 must be a multiple of the number of ways, and
|
||||
* the number of sets must be a power of two.
|
||||
* - The size of TLB1 may not exceed 64 entries.
|
||||
* - TLB0 supports 4 KiB pages.
|
||||
* - The page sizes supported by TLB1 are as indicated by
|
||||
* TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1)
|
||||
* as returned by KVM_GET_SREGS.
|
||||
* - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[]
|
||||
* and tlb_ways[] must be zero.
|
||||
*
|
||||
* tlb_ways[n] = tlb_sizes[n] means the array is fully associative.
|
||||
*
|
||||
* KVM will adjust TLBnCFG based on the sizes configured here,
|
||||
* though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
|
||||
* set to zero.
|
||||
*/
|
||||
__u32 tlb_sizes[4];
|
||||
__u32 tlb_ways[4];
|
||||
__u32 reserved[8];
|
||||
};
|
||||
|
||||
#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
|
||||
|
||||
#endif /* __LINUX_KVM_POWERPC_H */
|
||||
|
@ -90,6 +90,8 @@ struct kvmppc_vcpu_book3s {
|
||||
#endif
|
||||
int context_id[SID_CONTEXTS];
|
||||
|
||||
bool hior_explicit; /* HIOR is set by ioctl, not PVR */
|
||||
|
||||
struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
|
||||
struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
|
||||
struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
|
||||
@ -119,6 +121,11 @@ extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
|
||||
extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
|
||||
extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
|
||||
extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
|
||||
extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu, unsigned long addr,
|
||||
unsigned long status);
|
||||
extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
|
||||
unsigned long slb_v, unsigned long valid);
|
||||
|
||||
extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
|
||||
extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
|
||||
@ -138,6 +145,21 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
|
||||
extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
|
||||
extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
||||
extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
|
||||
unsigned long *rmap, long pte_index, int realmode);
|
||||
extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
|
||||
unsigned long pte_index);
|
||||
void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
|
||||
unsigned long pte_index);
|
||||
extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
|
||||
unsigned long *nb_ret);
|
||||
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
|
||||
extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel);
|
||||
extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel);
|
||||
extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
|
||||
extern void kvmppc_entry_trampoline(void);
|
||||
extern void kvmppc_hv_entry_trampoline(void);
|
||||
@ -183,7 +205,9 @@ static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
|
||||
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
|
||||
{
|
||||
if ( num < 14 ) {
|
||||
to_svcpu(vcpu)->gpr[num] = val;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
svcpu->gpr[num] = val;
|
||||
svcpu_put(svcpu);
|
||||
to_book3s(vcpu)->shadow_vcpu->gpr[num] = val;
|
||||
} else
|
||||
vcpu->arch.gpr[num] = val;
|
||||
@ -191,80 +215,120 @@ static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
|
||||
|
||||
static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
|
||||
{
|
||||
if ( num < 14 )
|
||||
return to_svcpu(vcpu)->gpr[num];
|
||||
else
|
||||
if ( num < 14 ) {
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
ulong r = svcpu->gpr[num];
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
} else
|
||||
return vcpu->arch.gpr[num];
|
||||
}
|
||||
|
||||
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
|
||||
{
|
||||
to_svcpu(vcpu)->cr = val;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
svcpu->cr = val;
|
||||
svcpu_put(svcpu);
|
||||
to_book3s(vcpu)->shadow_vcpu->cr = val;
|
||||
}
|
||||
|
||||
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_svcpu(vcpu)->cr;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
u32 r;
|
||||
r = svcpu->cr;
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
|
||||
{
|
||||
to_svcpu(vcpu)->xer = val;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
svcpu->xer = val;
|
||||
to_book3s(vcpu)->shadow_vcpu->xer = val;
|
||||
svcpu_put(svcpu);
|
||||
}
|
||||
|
||||
static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_svcpu(vcpu)->xer;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
u32 r;
|
||||
r = svcpu->xer;
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
|
||||
{
|
||||
to_svcpu(vcpu)->ctr = val;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
svcpu->ctr = val;
|
||||
svcpu_put(svcpu);
|
||||
}
|
||||
|
||||
static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_svcpu(vcpu)->ctr;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
ulong r;
|
||||
r = svcpu->ctr;
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
|
||||
{
|
||||
to_svcpu(vcpu)->lr = val;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
svcpu->lr = val;
|
||||
svcpu_put(svcpu);
|
||||
}
|
||||
|
||||
static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_svcpu(vcpu)->lr;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
ulong r;
|
||||
r = svcpu->lr;
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
|
||||
{
|
||||
to_svcpu(vcpu)->pc = val;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
svcpu->pc = val;
|
||||
svcpu_put(svcpu);
|
||||
}
|
||||
|
||||
static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_svcpu(vcpu)->pc;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
ulong r;
|
||||
r = svcpu->pc;
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ulong pc = kvmppc_get_pc(vcpu);
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
u32 r;
|
||||
|
||||
/* Load the instruction manually if it failed to do so in the
|
||||
* exit path */
|
||||
if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
|
||||
kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
|
||||
|
||||
return svcpu->last_inst;
|
||||
r = svcpu->last_inst;
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_svcpu(vcpu)->fault_dar;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
ulong r;
|
||||
r = svcpu->fault_dar;
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
|
||||
|
@ -20,11 +20,15 @@
|
||||
#ifndef __ASM_KVM_BOOK3S_32_H__
|
||||
#define __ASM_KVM_BOOK3S_32_H__
|
||||
|
||||
static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
|
||||
static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_book3s(vcpu)->shadow_vcpu;
|
||||
}
|
||||
|
||||
static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
|
||||
{
|
||||
}
|
||||
|
||||
#define PTE_SIZE 12
|
||||
#define VSID_ALL 0
|
||||
#define SR_INVALID 0x00000001 /* VSID 1 should always be unused */
|
||||
|
@ -21,14 +21,56 @@
|
||||
#define __ASM_KVM_BOOK3S_64_H__
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_PR
|
||||
static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
|
||||
static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
preempt_disable();
|
||||
return &get_paca()->shadow_vcpu;
|
||||
}
|
||||
|
||||
static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
|
||||
{
|
||||
preempt_enable();
|
||||
}
|
||||
#endif
|
||||
|
||||
#define SPAPR_TCE_SHIFT 12
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
/* For now use fixed-size 16MB page table */
|
||||
#define HPT_ORDER 24
|
||||
#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
|
||||
#define HPT_NPTE (HPT_NPTEG << 3) /* 8 PTEs per PTEG */
|
||||
#define HPT_HASH_MASK (HPT_NPTEG - 1)
|
||||
#endif
|
||||
|
||||
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
|
||||
|
||||
/*
|
||||
* We use a lock bit in HPTE dword 0 to synchronize updates and
|
||||
* accesses to each HPTE, and another bit to indicate non-present
|
||||
* HPTEs.
|
||||
*/
|
||||
#define HPTE_V_HVLOCK 0x40UL
|
||||
#define HPTE_V_ABSENT 0x20UL
|
||||
|
||||
static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
|
||||
{
|
||||
unsigned long tmp, old;
|
||||
|
||||
asm volatile(" ldarx %0,0,%2\n"
|
||||
" and. %1,%0,%3\n"
|
||||
" bne 2f\n"
|
||||
" ori %0,%0,%4\n"
|
||||
" stdcx. %0,0,%2\n"
|
||||
" beq+ 2f\n"
|
||||
" li %1,%3\n"
|
||||
"2: isync"
|
||||
: "=&r" (tmp), "=&r" (old)
|
||||
: "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
|
||||
: "cc", "memory");
|
||||
return old == 0;
|
||||
}
|
||||
|
||||
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
|
||||
unsigned long pte_index)
|
||||
{
|
||||
@ -62,4 +104,140 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
|
||||
return rb;
|
||||
}
|
||||
|
||||
static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
|
||||
{
|
||||
/* only handle 4k, 64k and 16M pages for now */
|
||||
if (!(h & HPTE_V_LARGE))
|
||||
return 1ul << 12; /* 4k page */
|
||||
if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206))
|
||||
return 1ul << 16; /* 64k page */
|
||||
if ((l & 0xff000) == 0)
|
||||
return 1ul << 24; /* 16M page */
|
||||
return 0; /* error */
|
||||
}
|
||||
|
||||
static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
|
||||
{
|
||||
return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline int hpte_is_writable(unsigned long ptel)
|
||||
{
|
||||
unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP);
|
||||
|
||||
return pp != PP_RXRX && pp != PP_RXXX;
|
||||
}
|
||||
|
||||
static inline unsigned long hpte_make_readonly(unsigned long ptel)
|
||||
{
|
||||
if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX)
|
||||
ptel = (ptel & ~HPTE_R_PP) | PP_RXXX;
|
||||
else
|
||||
ptel |= PP_RXRX;
|
||||
return ptel;
|
||||
}
|
||||
|
||||
static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
|
||||
{
|
||||
unsigned int wimg = ptel & HPTE_R_WIMG;
|
||||
|
||||
/* Handle SAO */
|
||||
if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
|
||||
cpu_has_feature(CPU_FTR_ARCH_206))
|
||||
wimg = HPTE_R_M;
|
||||
|
||||
if (!io_type)
|
||||
return wimg == HPTE_R_M;
|
||||
|
||||
return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock and read a linux PTE. If it's present and writable, atomically
|
||||
* set dirty and referenced bits and return the PTE, otherwise return 0.
|
||||
*/
|
||||
static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
|
||||
{
|
||||
pte_t pte, tmp;
|
||||
|
||||
/* wait until _PAGE_BUSY is clear then set it atomically */
|
||||
__asm__ __volatile__ (
|
||||
"1: ldarx %0,0,%3\n"
|
||||
" andi. %1,%0,%4\n"
|
||||
" bne- 1b\n"
|
||||
" ori %1,%0,%4\n"
|
||||
" stdcx. %1,0,%3\n"
|
||||
" bne- 1b"
|
||||
: "=&r" (pte), "=&r" (tmp), "=m" (*p)
|
||||
: "r" (p), "i" (_PAGE_BUSY)
|
||||
: "cc");
|
||||
|
||||
if (pte_present(pte)) {
|
||||
pte = pte_mkyoung(pte);
|
||||
if (writing && pte_write(pte))
|
||||
pte = pte_mkdirty(pte);
|
||||
}
|
||||
|
||||
*p = pte; /* clears _PAGE_BUSY */
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
/* Return HPTE cache control bits corresponding to Linux pte bits */
|
||||
static inline unsigned long hpte_cache_bits(unsigned long pte_val)
|
||||
{
|
||||
#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W
|
||||
return pte_val & (HPTE_R_W | HPTE_R_I);
|
||||
#else
|
||||
return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
|
||||
((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
|
||||
{
|
||||
if (key)
|
||||
return PP_RWRX <= pp && pp <= PP_RXRX;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline bool hpte_write_permission(unsigned long pp, unsigned long key)
|
||||
{
|
||||
if (key)
|
||||
return pp == PP_RWRW;
|
||||
return pp <= PP_RWRW;
|
||||
}
|
||||
|
||||
static inline int hpte_get_skey_perm(unsigned long hpte_r, unsigned long amr)
|
||||
{
|
||||
unsigned long skey;
|
||||
|
||||
skey = ((hpte_r & HPTE_R_KEY_HI) >> 57) |
|
||||
((hpte_r & HPTE_R_KEY_LO) >> 9);
|
||||
return (amr >> (62 - 2 * skey)) & 3;
|
||||
}
|
||||
|
||||
static inline void lock_rmap(unsigned long *rmap)
|
||||
{
|
||||
do {
|
||||
while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
|
||||
cpu_relax();
|
||||
} while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
|
||||
}
|
||||
|
||||
static inline void unlock_rmap(unsigned long *rmap)
|
||||
{
|
||||
__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap);
|
||||
}
|
||||
|
||||
static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
|
||||
unsigned long pagesize)
|
||||
{
|
||||
unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
|
||||
|
||||
if (pagesize <= PAGE_SIZE)
|
||||
return 1;
|
||||
return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
|
||||
}
|
||||
|
||||
#endif /* __ASM_KVM_BOOK3S_64_H__ */
|
||||
|
@ -22,46 +22,55 @@
|
||||
#define E500_PID_NUM 3
|
||||
#define E500_TLB_NUM 2
|
||||
|
||||
struct tlbe{
|
||||
u32 mas1;
|
||||
u32 mas2;
|
||||
u32 mas3;
|
||||
u32 mas7;
|
||||
};
|
||||
|
||||
#define E500_TLB_VALID 1
|
||||
#define E500_TLB_DIRTY 2
|
||||
|
||||
struct tlbe_priv {
|
||||
struct tlbe_ref {
|
||||
pfn_t pfn;
|
||||
unsigned int flags; /* E500_TLB_* */
|
||||
};
|
||||
|
||||
struct tlbe_priv {
|
||||
struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */
|
||||
};
|
||||
|
||||
struct vcpu_id_table;
|
||||
|
||||
struct kvmppc_e500_tlb_params {
|
||||
int entries, ways, sets;
|
||||
};
|
||||
|
||||
struct kvmppc_vcpu_e500 {
|
||||
/* Unmodified copy of the guest's TLB. */
|
||||
struct tlbe *gtlb_arch[E500_TLB_NUM];
|
||||
/* Unmodified copy of the guest's TLB -- shared with host userspace. */
|
||||
struct kvm_book3e_206_tlb_entry *gtlb_arch;
|
||||
|
||||
/* Starting entry number in gtlb_arch[] */
|
||||
int gtlb_offset[E500_TLB_NUM];
|
||||
|
||||
/* KVM internal information associated with each guest TLB entry */
|
||||
struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
|
||||
|
||||
unsigned int gtlb_size[E500_TLB_NUM];
|
||||
struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
|
||||
|
||||
unsigned int gtlb_nv[E500_TLB_NUM];
|
||||
|
||||
/*
|
||||
* information associated with each host TLB entry --
|
||||
* TLB1 only for now. If/when guest TLB1 entries can be
|
||||
* mapped with host TLB0, this will be used for that too.
|
||||
*
|
||||
* We don't want to use this for guest TLB0 because then we'd
|
||||
* have the overhead of doing the translation again even if
|
||||
* the entry is still in the guest TLB (e.g. we swapped out
|
||||
* and back, and our host TLB entries got evicted).
|
||||
*/
|
||||
struct tlbe_ref *tlb_refs[E500_TLB_NUM];
|
||||
unsigned int host_tlb1_nv;
|
||||
|
||||
u32 host_pid[E500_PID_NUM];
|
||||
u32 pid[E500_PID_NUM];
|
||||
u32 svr;
|
||||
|
||||
u32 mas0;
|
||||
u32 mas1;
|
||||
u32 mas2;
|
||||
u32 mas3;
|
||||
u32 mas4;
|
||||
u32 mas5;
|
||||
u32 mas6;
|
||||
u32 mas7;
|
||||
|
||||
/* vcpu id table */
|
||||
struct vcpu_id_table *idt;
|
||||
|
||||
@ -73,6 +82,9 @@ struct kvmppc_vcpu_e500 {
|
||||
u32 tlb1cfg;
|
||||
u64 mcar;
|
||||
|
||||
struct page **shared_tlb_pages;
|
||||
int num_shared_tlb_pages;
|
||||
|
||||
struct kvm_vcpu vcpu;
|
||||
};
|
||||
|
||||
|
@ -32,17 +32,32 @@
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#define KVM_MAX_VCPUS NR_CPUS
|
||||
#define KVM_MAX_VCORES NR_CPUS
|
||||
#define KVM_MEMORY_SLOTS 32
|
||||
/* memory slots that does not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 4
|
||||
#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
|
||||
|
||||
#ifdef CONFIG_KVM_MMIO
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
struct kvm;
|
||||
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||
extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
|
||||
#endif
|
||||
|
||||
/* We don't currently support large pages. */
|
||||
#define KVM_HPAGE_GFN_SHIFT(x) 0
|
||||
#define KVM_NR_PAGE_SIZES 1
|
||||
@ -158,34 +173,72 @@ struct kvmppc_spapr_tce_table {
|
||||
struct page *pages[0];
|
||||
};
|
||||
|
||||
struct kvmppc_rma_info {
|
||||
struct kvmppc_linear_info {
|
||||
void *base_virt;
|
||||
unsigned long base_pfn;
|
||||
unsigned long npages;
|
||||
struct list_head list;
|
||||
atomic_t use_count;
|
||||
atomic_t use_count;
|
||||
int type;
|
||||
};
|
||||
|
||||
/*
|
||||
* The reverse mapping array has one entry for each HPTE,
|
||||
* which stores the guest's view of the second word of the HPTE
|
||||
* (including the guest physical address of the mapping),
|
||||
* plus forward and backward pointers in a doubly-linked ring
|
||||
* of HPTEs that map the same host page. The pointers in this
|
||||
* ring are 32-bit HPTE indexes, to save space.
|
||||
*/
|
||||
struct revmap_entry {
|
||||
unsigned long guest_rpte;
|
||||
unsigned int forw, back;
|
||||
};
|
||||
|
||||
/*
|
||||
* We use the top bit of each memslot->rmap entry as a lock bit,
|
||||
* and bit 32 as a present flag. The bottom 32 bits are the
|
||||
* index in the guest HPT of a HPTE that points to the page.
|
||||
*/
|
||||
#define KVMPPC_RMAP_LOCK_BIT 63
|
||||
#define KVMPPC_RMAP_RC_SHIFT 32
|
||||
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
|
||||
#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
|
||||
#define KVMPPC_RMAP_PRESENT 0x100000000ul
|
||||
#define KVMPPC_RMAP_INDEX 0xfffffffful
|
||||
|
||||
/* Low-order bits in kvm->arch.slot_phys[][] */
|
||||
#define KVMPPC_PAGE_ORDER_MASK 0x1f
|
||||
#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
|
||||
#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */
|
||||
#define KVMPPC_GOT_PAGE 0x80
|
||||
|
||||
struct kvm_arch_memory_slot {
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
unsigned long hpt_virt;
|
||||
unsigned long ram_npages;
|
||||
unsigned long ram_psize;
|
||||
unsigned long ram_porder;
|
||||
struct kvmppc_pginfo *ram_pginfo;
|
||||
struct revmap_entry *revmap;
|
||||
unsigned int lpid;
|
||||
unsigned int host_lpid;
|
||||
unsigned long host_lpcr;
|
||||
unsigned long sdr1;
|
||||
unsigned long host_sdr1;
|
||||
int tlbie_lock;
|
||||
int n_rma_pages;
|
||||
unsigned long lpcr;
|
||||
unsigned long rmor;
|
||||
struct kvmppc_rma_info *rma;
|
||||
struct kvmppc_linear_info *rma;
|
||||
unsigned long vrma_slb_v;
|
||||
int rma_setup_done;
|
||||
int using_mmu_notifiers;
|
||||
struct list_head spapr_tce_tables;
|
||||
spinlock_t slot_phys_lock;
|
||||
unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
|
||||
int slot_npages[KVM_MEM_SLOTS_NUM];
|
||||
unsigned short last_vcpu[NR_CPUS];
|
||||
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
|
||||
struct kvmppc_linear_info *hpt_li;
|
||||
#endif /* CONFIG_KVM_BOOK3S_64_HV */
|
||||
};
|
||||
|
||||
@ -318,10 +371,6 @@ struct kvm_vcpu_arch {
|
||||
u32 vrsave; /* also USPRG0 */
|
||||
u32 mmucr;
|
||||
ulong shadow_msr;
|
||||
ulong sprg4;
|
||||
ulong sprg5;
|
||||
ulong sprg6;
|
||||
ulong sprg7;
|
||||
ulong csrr0;
|
||||
ulong csrr1;
|
||||
ulong dsrr0;
|
||||
@ -329,16 +378,14 @@ struct kvm_vcpu_arch {
|
||||
ulong mcsrr0;
|
||||
ulong mcsrr1;
|
||||
ulong mcsr;
|
||||
ulong esr;
|
||||
u32 dec;
|
||||
u32 decar;
|
||||
u32 tbl;
|
||||
u32 tbu;
|
||||
u32 tcr;
|
||||
u32 tsr;
|
||||
ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
|
||||
u32 ivor[64];
|
||||
ulong ivpr;
|
||||
u32 pir;
|
||||
u32 pvr;
|
||||
|
||||
u32 shadow_pid;
|
||||
@ -427,9 +474,14 @@ struct kvm_vcpu_arch {
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
struct kvm_vcpu_arch_shared shregs;
|
||||
|
||||
unsigned long pgfault_addr;
|
||||
long pgfault_index;
|
||||
unsigned long pgfault_hpte[2];
|
||||
|
||||
struct list_head run_list;
|
||||
struct task_struct *run_task;
|
||||
struct kvm_run *kvm_run;
|
||||
pgd_t *pgdir;
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -438,4 +490,12 @@ struct kvm_vcpu_arch {
|
||||
#define KVMPPC_VCPU_BUSY_IN_HOST 1
|
||||
#define KVMPPC_VCPU_RUNNABLE 2
|
||||
|
||||
/* Values for vcpu->arch.io_gpr */
|
||||
#define KVM_MMIO_REG_MASK 0x001f
|
||||
#define KVM_MMIO_REG_EXT_MASK 0xffe0
|
||||
#define KVM_MMIO_REG_GPR 0x0000
|
||||
#define KVM_MMIO_REG_FPR 0x0020
|
||||
#define KVM_MMIO_REG_QPR 0x0040
|
||||
#define KVM_MMIO_REG_FQPR 0x0060
|
||||
|
||||
#endif /* __POWERPC_KVM_HOST_H__ */
|
||||
|
@ -22,6 +22,16 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* Additions to this struct must only occur at the end, and should be
|
||||
* accompanied by a KVM_MAGIC_FEAT flag to advertise that they are present
|
||||
* (albeit not necessarily relevant to the current target hardware platform).
|
||||
*
|
||||
* Struct fields are always 32 or 64 bit aligned, depending on them being 32
|
||||
* or 64 bit wide respectively.
|
||||
*
|
||||
* See Documentation/virtual/kvm/ppc-pv.txt
|
||||
*/
|
||||
struct kvm_vcpu_arch_shared {
|
||||
__u64 scratch1;
|
||||
__u64 scratch2;
|
||||
@ -33,11 +43,35 @@ struct kvm_vcpu_arch_shared {
|
||||
__u64 sprg3;
|
||||
__u64 srr0;
|
||||
__u64 srr1;
|
||||
__u64 dar;
|
||||
__u64 dar; /* dear on BookE */
|
||||
__u64 msr;
|
||||
__u32 dsisr;
|
||||
__u32 int_pending; /* Tells the guest if we have an interrupt */
|
||||
__u32 sr[16];
|
||||
__u32 mas0;
|
||||
__u32 mas1;
|
||||
__u64 mas7_3;
|
||||
__u64 mas2;
|
||||
__u32 mas4;
|
||||
__u32 mas6;
|
||||
__u32 esr;
|
||||
__u32 pir;
|
||||
|
||||
/*
|
||||
* SPRG4-7 are user-readable, so we can only keep these consistent
|
||||
* between the shared area and the real registers when there's an
|
||||
* intervening exit to KVM. This also applies to SPRG3 on some
|
||||
* chips.
|
||||
*
|
||||
* This suffices for access by guest userspace, since in PR-mode
|
||||
* KVM, an exit must occur when changing the guest's MSR[PR].
|
||||
* If the guest kernel writes to SPRG3-7 via the shared area, it
|
||||
* must also use the shared area for reading while in kernel space.
|
||||
*/
|
||||
__u64 sprg4;
|
||||
__u64 sprg5;
|
||||
__u64 sprg6;
|
||||
__u64 sprg7;
|
||||
};
|
||||
|
||||
#define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */
|
||||
@ -47,7 +81,10 @@ struct kvm_vcpu_arch_shared {
|
||||
|
||||
#define KVM_FEATURE_MAGIC_PAGE 1
|
||||
|
||||
#define KVM_MAGIC_FEAT_SR (1 << 0)
|
||||
#define KVM_MAGIC_FEAT_SR (1 << 0)
|
||||
|
||||
/* MASn, ESR, PIR, and high SPRGs */
|
||||
#define KVM_MAGIC_FEAT_MAS0_TO_SPRG7 (1 << 1)
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
|
@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
|
||||
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
||||
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
|
||||
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
|
||||
extern void kvmppc_decrementer_func(unsigned long data);
|
||||
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* Core-specific hooks */
|
||||
@ -94,7 +95,7 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
|
||||
extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
|
||||
extern void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
|
||||
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
|
||||
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
|
||||
extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
|
||||
@ -120,15 +121,17 @@ extern long kvmppc_alloc_hpt(struct kvm *kvm);
|
||||
extern void kvmppc_free_hpt(struct kvm *kvm);
|
||||
extern long kvmppc_prepare_vrma(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem);
|
||||
extern void kvmppc_map_vrma(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem);
|
||||
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
|
||||
struct kvm_memory_slot *memslot, unsigned long porder);
|
||||
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
|
||||
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
struct kvm_create_spapr_tce *args);
|
||||
extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
|
||||
struct kvm_allocate_rma *rma);
|
||||
extern struct kvmppc_rma_info *kvm_alloc_rma(void);
|
||||
extern void kvm_release_rma(struct kvmppc_rma_info *ri);
|
||||
extern struct kvmppc_linear_info *kvm_alloc_rma(void);
|
||||
extern void kvm_release_rma(struct kvmppc_linear_info *ri);
|
||||
extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
|
||||
extern void kvm_release_hpt(struct kvmppc_linear_info *li);
|
||||
extern int kvmppc_core_init_vm(struct kvm *kvm);
|
||||
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
|
||||
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
@ -175,6 +178,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
|
||||
void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
|
||||
int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
|
||||
|
||||
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
|
||||
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
|
||||
|
||||
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
@ -183,14 +189,19 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
|
||||
paca[cpu].kvm_hstate.xics_phys = addr;
|
||||
}
|
||||
|
||||
extern void kvm_rma_init(void);
|
||||
extern void kvm_linear_init(void);
|
||||
|
||||
#else
|
||||
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
|
||||
{}
|
||||
|
||||
static inline void kvm_rma_init(void)
|
||||
static inline void kvm_linear_init(void)
|
||||
{}
|
||||
#endif
|
||||
|
||||
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
|
||||
struct kvm_config_tlb *cfg);
|
||||
int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
|
||||
struct kvm_dirty_tlb *cfg);
|
||||
|
||||
#endif /* __POWERPC_KVM_PPC_H__ */
|
||||
|
@ -41,9 +41,10 @@
|
||||
/* MAS registers bit definitions */
|
||||
|
||||
#define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000)
|
||||
#define MAS0_ESEL(x) (((x) << 16) & 0x0FFF0000)
|
||||
#define MAS0_NV(x) ((x) & 0x00000FFF)
|
||||
#define MAS0_ESEL_MASK 0x0FFF0000
|
||||
#define MAS0_ESEL_SHIFT 16
|
||||
#define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK)
|
||||
#define MAS0_NV(x) ((x) & 0x00000FFF)
|
||||
#define MAS0_HES 0x00004000
|
||||
#define MAS0_WQ_ALLWAYS 0x00000000
|
||||
#define MAS0_WQ_COND 0x00001000
|
||||
@ -167,6 +168,7 @@
|
||||
#define TLBnCFG_MAXSIZE 0x000f0000 /* Maximum Page Size (v1.0) */
|
||||
#define TLBnCFG_MAXSIZE_SHIFT 16
|
||||
#define TLBnCFG_ASSOC 0xff000000 /* Associativity */
|
||||
#define TLBnCFG_ASSOC_SHIFT 24
|
||||
|
||||
/* TLBnPS encoding */
|
||||
#define TLBnPS_4K 0x00000004
|
||||
|
@ -108,11 +108,11 @@ extern char initial_stab[];
|
||||
#define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000)
|
||||
|
||||
/* Values for PP (assumes Ks=0, Kp=1) */
|
||||
/* pp0 will always be 0 for linux */
|
||||
#define PP_RWXX 0 /* Supervisor read/write, User none */
|
||||
#define PP_RWRX 1 /* Supervisor read/write, User read */
|
||||
#define PP_RWRW 2 /* Supervisor read/write, User read/write */
|
||||
#define PP_RXRX 3 /* Supervisor read, User read */
|
||||
#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
|
@ -45,6 +45,7 @@
|
||||
#define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff
|
||||
#define PPC_INST_MTSPR_DSCR 0x7c1103a6
|
||||
#define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff
|
||||
#define PPC_INST_SLBFEE 0x7c0007a7
|
||||
|
||||
#define PPC_INST_STRING 0x7c00042a
|
||||
#define PPC_INST_STRING_MASK 0xfc0007fe
|
||||
@ -183,7 +184,8 @@
|
||||
__PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
|
||||
#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \
|
||||
__PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
|
||||
|
||||
#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \
|
||||
__PPC_RT(t) | __PPC_RB(b))
|
||||
|
||||
/*
|
||||
* Define what the VSX XX1 form instructions will look like, then add
|
||||
|
@ -216,6 +216,7 @@
|
||||
#define DSISR_ISSTORE 0x02000000 /* access was a store */
|
||||
#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
|
||||
#define DSISR_NOSEGMENT 0x00200000 /* STAB/SLB miss */
|
||||
#define DSISR_KEYFAULT 0x00200000 /* Key fault */
|
||||
#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */
|
||||
#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */
|
||||
#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */
|
||||
@ -237,6 +238,7 @@
|
||||
#define LPCR_ISL (1ul << (63-2))
|
||||
#define LPCR_VC_SH (63-2)
|
||||
#define LPCR_DPFD_SH (63-11)
|
||||
#define LPCR_VRMASD (0x1ful << (63-16))
|
||||
#define LPCR_VRMA_L (1ul << (63-12))
|
||||
#define LPCR_VRMA_LP0 (1ul << (63-15))
|
||||
#define LPCR_VRMA_LP1 (1ul << (63-16))
|
||||
@ -493,6 +495,9 @@
|
||||
#define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */
|
||||
#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */
|
||||
#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */
|
||||
#define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */
|
||||
#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
|
||||
#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
|
||||
#define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */
|
||||
#define SRR1_WAKESYSERR 0x00300000 /* System error */
|
||||
#define SRR1_WAKEEE 0x00200000 /* External interrupt */
|
||||
|
@ -412,16 +412,23 @@ int main(void)
|
||||
DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
|
||||
DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
|
||||
#endif
|
||||
DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
|
||||
DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
|
||||
DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
|
||||
DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
|
||||
DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
|
||||
DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
|
||||
DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
|
||||
DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7));
|
||||
DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
|
||||
DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1));
|
||||
DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
|
||||
DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
|
||||
DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
|
||||
|
||||
DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
|
||||
DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
|
||||
DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2));
|
||||
DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3));
|
||||
DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
|
||||
DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
|
||||
|
||||
/* book3s */
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
|
||||
@ -434,6 +441,7 @@ int main(void)
|
||||
DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
|
||||
DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
|
||||
DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
|
||||
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
|
||||
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
|
||||
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
|
||||
#endif
|
||||
|
@ -101,14 +101,14 @@ data_access_not_stab:
|
||||
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
|
||||
#endif
|
||||
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
|
||||
KVMTEST_PR, 0x300)
|
||||
KVMTEST, 0x300)
|
||||
|
||||
. = 0x380
|
||||
.globl data_access_slb_pSeries
|
||||
data_access_slb_pSeries:
|
||||
HMT_MEDIUM
|
||||
SET_SCRATCH0(r13)
|
||||
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
|
||||
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
|
||||
std r3,PACA_EXSLB+EX_R3(r13)
|
||||
mfspr r3,SPRN_DAR
|
||||
#ifdef __DISABLED__
|
||||
@ -330,8 +330,8 @@ do_stab_bolted_pSeries:
|
||||
EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
|
||||
#endif /* CONFIG_POWER4_ONLY */
|
||||
|
||||
KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x300)
|
||||
KVM_HANDLER_PR_SKIP(PACA_EXSLB, EXC_STD, 0x380)
|
||||
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
|
||||
KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
|
||||
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
|
||||
KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480)
|
||||
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
|
||||
* Copyright 2010-2011 Freescale Semiconductor, Inc.
|
||||
*
|
||||
* Authors:
|
||||
* Alexander Graf <agraf@suse.de>
|
||||
@ -29,6 +30,7 @@
|
||||
#include <asm/sections.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/disassemble.h>
|
||||
#include <asm/ppc-opcode.h>
|
||||
|
||||
#define KVM_MAGIC_PAGE (-4096L)
|
||||
#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
|
||||
@ -41,34 +43,30 @@
|
||||
#define KVM_INST_B 0x48000000
|
||||
#define KVM_INST_B_MASK 0x03ffffff
|
||||
#define KVM_INST_B_MAX 0x01ffffff
|
||||
#define KVM_INST_LI 0x38000000
|
||||
|
||||
#define KVM_MASK_RT 0x03e00000
|
||||
#define KVM_RT_30 0x03c00000
|
||||
#define KVM_MASK_RB 0x0000f800
|
||||
#define KVM_INST_MFMSR 0x7c0000a6
|
||||
#define KVM_INST_MFSPR_SPRG0 0x7c1042a6
|
||||
#define KVM_INST_MFSPR_SPRG1 0x7c1142a6
|
||||
#define KVM_INST_MFSPR_SPRG2 0x7c1242a6
|
||||
#define KVM_INST_MFSPR_SPRG3 0x7c1342a6
|
||||
#define KVM_INST_MFSPR_SRR0 0x7c1a02a6
|
||||
#define KVM_INST_MFSPR_SRR1 0x7c1b02a6
|
||||
#define KVM_INST_MFSPR_DAR 0x7c1302a6
|
||||
#define KVM_INST_MFSPR_DSISR 0x7c1202a6
|
||||
|
||||
#define KVM_INST_MTSPR_SPRG0 0x7c1043a6
|
||||
#define KVM_INST_MTSPR_SPRG1 0x7c1143a6
|
||||
#define KVM_INST_MTSPR_SPRG2 0x7c1243a6
|
||||
#define KVM_INST_MTSPR_SPRG3 0x7c1343a6
|
||||
#define KVM_INST_MTSPR_SRR0 0x7c1a03a6
|
||||
#define KVM_INST_MTSPR_SRR1 0x7c1b03a6
|
||||
#define KVM_INST_MTSPR_DAR 0x7c1303a6
|
||||
#define KVM_INST_MTSPR_DSISR 0x7c1203a6
|
||||
#define SPR_FROM 0
|
||||
#define SPR_TO 0x100
|
||||
|
||||
#define KVM_INST_SPR(sprn, moveto) (0x7c0002a6 | \
|
||||
(((sprn) & 0x1f) << 16) | \
|
||||
(((sprn) & 0x3e0) << 6) | \
|
||||
(moveto))
|
||||
|
||||
#define KVM_INST_MFSPR(sprn) KVM_INST_SPR(sprn, SPR_FROM)
|
||||
#define KVM_INST_MTSPR(sprn) KVM_INST_SPR(sprn, SPR_TO)
|
||||
|
||||
#define KVM_INST_TLBSYNC 0x7c00046c
|
||||
#define KVM_INST_MTMSRD_L0 0x7c000164
|
||||
#define KVM_INST_MTMSRD_L1 0x7c010164
|
||||
#define KVM_INST_MTMSR 0x7c000124
|
||||
|
||||
#define KVM_INST_WRTEE 0x7c000106
|
||||
#define KVM_INST_WRTEEI_0 0x7c000146
|
||||
#define KVM_INST_WRTEEI_1 0x7c008146
|
||||
|
||||
@ -270,26 +268,27 @@ static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
|
||||
|
||||
#ifdef CONFIG_BOOKE
|
||||
|
||||
extern u32 kvm_emulate_wrteei_branch_offs;
|
||||
extern u32 kvm_emulate_wrteei_ee_offs;
|
||||
extern u32 kvm_emulate_wrteei_len;
|
||||
extern u32 kvm_emulate_wrteei[];
|
||||
extern u32 kvm_emulate_wrtee_branch_offs;
|
||||
extern u32 kvm_emulate_wrtee_reg_offs;
|
||||
extern u32 kvm_emulate_wrtee_orig_ins_offs;
|
||||
extern u32 kvm_emulate_wrtee_len;
|
||||
extern u32 kvm_emulate_wrtee[];
|
||||
|
||||
static void kvm_patch_ins_wrteei(u32 *inst)
|
||||
static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
|
||||
{
|
||||
u32 *p;
|
||||
int distance_start;
|
||||
int distance_end;
|
||||
ulong next_inst;
|
||||
|
||||
p = kvm_alloc(kvm_emulate_wrteei_len * 4);
|
||||
p = kvm_alloc(kvm_emulate_wrtee_len * 4);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
/* Find out where we are and put everything there */
|
||||
distance_start = (ulong)p - (ulong)inst;
|
||||
next_inst = ((ulong)inst + 4);
|
||||
distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_branch_offs];
|
||||
distance_end = next_inst - (ulong)&p[kvm_emulate_wrtee_branch_offs];
|
||||
|
||||
/* Make sure we only write valid b instructions */
|
||||
if (distance_start > KVM_INST_B_MAX) {
|
||||
@ -298,10 +297,65 @@ static void kvm_patch_ins_wrteei(u32 *inst)
|
||||
}
|
||||
|
||||
/* Modify the chunk to fit the invocation */
|
||||
memcpy(p, kvm_emulate_wrteei, kvm_emulate_wrteei_len * 4);
|
||||
p[kvm_emulate_wrteei_branch_offs] |= distance_end & KVM_INST_B_MASK;
|
||||
p[kvm_emulate_wrteei_ee_offs] |= (*inst & MSR_EE);
|
||||
flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_len * 4);
|
||||
memcpy(p, kvm_emulate_wrtee, kvm_emulate_wrtee_len * 4);
|
||||
p[kvm_emulate_wrtee_branch_offs] |= distance_end & KVM_INST_B_MASK;
|
||||
|
||||
if (imm_one) {
|
||||
p[kvm_emulate_wrtee_reg_offs] =
|
||||
KVM_INST_LI | __PPC_RT(30) | MSR_EE;
|
||||
} else {
|
||||
/* Make clobbered registers work too */
|
||||
switch (get_rt(rt)) {
|
||||
case 30:
|
||||
kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs],
|
||||
magic_var(scratch2), KVM_RT_30);
|
||||
break;
|
||||
case 31:
|
||||
kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs],
|
||||
magic_var(scratch1), KVM_RT_30);
|
||||
break;
|
||||
default:
|
||||
p[kvm_emulate_wrtee_reg_offs] |= rt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
p[kvm_emulate_wrtee_orig_ins_offs] = *inst;
|
||||
flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrtee_len * 4);
|
||||
|
||||
/* Patch the invocation */
|
||||
kvm_patch_ins_b(inst, distance_start);
|
||||
}
|
||||
|
||||
extern u32 kvm_emulate_wrteei_0_branch_offs;
|
||||
extern u32 kvm_emulate_wrteei_0_len;
|
||||
extern u32 kvm_emulate_wrteei_0[];
|
||||
|
||||
static void kvm_patch_ins_wrteei_0(u32 *inst)
|
||||
{
|
||||
u32 *p;
|
||||
int distance_start;
|
||||
int distance_end;
|
||||
ulong next_inst;
|
||||
|
||||
p = kvm_alloc(kvm_emulate_wrteei_0_len * 4);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
/* Find out where we are and put everything there */
|
||||
distance_start = (ulong)p - (ulong)inst;
|
||||
next_inst = ((ulong)inst + 4);
|
||||
distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_0_branch_offs];
|
||||
|
||||
/* Make sure we only write valid b instructions */
|
||||
if (distance_start > KVM_INST_B_MAX) {
|
||||
kvm_patching_worked = false;
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(p, kvm_emulate_wrteei_0, kvm_emulate_wrteei_0_len * 4);
|
||||
p[kvm_emulate_wrteei_0_branch_offs] |= distance_end & KVM_INST_B_MASK;
|
||||
flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_0_len * 4);
|
||||
|
||||
/* Patch the invocation */
|
||||
kvm_patch_ins_b(inst, distance_start);
|
||||
@ -380,56 +434,191 @@ static void kvm_check_ins(u32 *inst, u32 features)
|
||||
case KVM_INST_MFMSR:
|
||||
kvm_patch_ins_ld(inst, magic_var(msr), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SPRG0:
|
||||
case KVM_INST_MFSPR(SPRN_SPRG0):
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SPRG1:
|
||||
case KVM_INST_MFSPR(SPRN_SPRG1):
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SPRG2:
|
||||
case KVM_INST_MFSPR(SPRN_SPRG2):
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SPRG3:
|
||||
case KVM_INST_MFSPR(SPRN_SPRG3):
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SRR0:
|
||||
case KVM_INST_MFSPR(SPRN_SRR0):
|
||||
kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SRR1:
|
||||
case KVM_INST_MFSPR(SPRN_SRR1):
|
||||
kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_DAR:
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_MFSPR(SPRN_DEAR):
|
||||
#else
|
||||
case KVM_INST_MFSPR(SPRN_DAR):
|
||||
#endif
|
||||
kvm_patch_ins_ld(inst, magic_var(dar), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_DSISR:
|
||||
case KVM_INST_MFSPR(SPRN_DSISR):
|
||||
kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3E_MMU
|
||||
case KVM_INST_MFSPR(SPRN_MAS0):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR(SPRN_MAS1):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_lwz(inst, magic_var(mas1), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR(SPRN_MAS2):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_ld(inst, magic_var(mas2), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR(SPRN_MAS3):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_lwz(inst, magic_var(mas7_3) + 4, inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR(SPRN_MAS4):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_lwz(inst, magic_var(mas4), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR(SPRN_MAS6):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_lwz(inst, magic_var(mas6), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR(SPRN_MAS7):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt);
|
||||
break;
|
||||
#endif /* CONFIG_PPC_BOOK3E_MMU */
|
||||
|
||||
case KVM_INST_MFSPR(SPRN_SPRG4):
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_MFSPR(SPRN_SPRG4R):
|
||||
#endif
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg4), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR(SPRN_SPRG5):
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_MFSPR(SPRN_SPRG5R):
|
||||
#endif
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg5), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR(SPRN_SPRG6):
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_MFSPR(SPRN_SPRG6R):
|
||||
#endif
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg6), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR(SPRN_SPRG7):
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_MFSPR(SPRN_SPRG7R):
|
||||
#endif
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg7), inst_rt);
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_MFSPR(SPRN_ESR):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_lwz(inst, magic_var(esr), inst_rt);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case KVM_INST_MFSPR(SPRN_PIR):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_lwz(inst, magic_var(pir), inst_rt);
|
||||
break;
|
||||
|
||||
|
||||
/* Stores */
|
||||
case KVM_INST_MTSPR_SPRG0:
|
||||
case KVM_INST_MTSPR(SPRN_SPRG0):
|
||||
kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_SPRG1:
|
||||
case KVM_INST_MTSPR(SPRN_SPRG1):
|
||||
kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_SPRG2:
|
||||
case KVM_INST_MTSPR(SPRN_SPRG2):
|
||||
kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_SPRG3:
|
||||
case KVM_INST_MTSPR(SPRN_SPRG3):
|
||||
kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_SRR0:
|
||||
case KVM_INST_MTSPR(SPRN_SRR0):
|
||||
kvm_patch_ins_std(inst, magic_var(srr0), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_SRR1:
|
||||
case KVM_INST_MTSPR(SPRN_SRR1):
|
||||
kvm_patch_ins_std(inst, magic_var(srr1), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_DAR:
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_MTSPR(SPRN_DEAR):
|
||||
#else
|
||||
case KVM_INST_MTSPR(SPRN_DAR):
|
||||
#endif
|
||||
kvm_patch_ins_std(inst, magic_var(dar), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_DSISR:
|
||||
case KVM_INST_MTSPR(SPRN_DSISR):
|
||||
kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
|
||||
break;
|
||||
#ifdef CONFIG_PPC_BOOK3E_MMU
|
||||
case KVM_INST_MTSPR(SPRN_MAS0):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR(SPRN_MAS1):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_stw(inst, magic_var(mas1), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR(SPRN_MAS2):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_std(inst, magic_var(mas2), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR(SPRN_MAS3):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_stw(inst, magic_var(mas7_3) + 4, inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR(SPRN_MAS4):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_stw(inst, magic_var(mas4), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR(SPRN_MAS6):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_stw(inst, magic_var(mas6), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR(SPRN_MAS7):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt);
|
||||
break;
|
||||
#endif /* CONFIG_PPC_BOOK3E_MMU */
|
||||
|
||||
case KVM_INST_MTSPR(SPRN_SPRG4):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_std(inst, magic_var(sprg4), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR(SPRN_SPRG5):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_std(inst, magic_var(sprg5), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR(SPRN_SPRG6):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_std(inst, magic_var(sprg6), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR(SPRN_SPRG7):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_std(inst, magic_var(sprg7), inst_rt);
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_MTSPR(SPRN_ESR):
|
||||
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
|
||||
kvm_patch_ins_stw(inst, magic_var(esr), inst_rt);
|
||||
break;
|
||||
#endif
|
||||
|
||||
/* Nops */
|
||||
case KVM_INST_TLBSYNC:
|
||||
@ -444,6 +633,11 @@ static void kvm_check_ins(u32 *inst, u32 features)
|
||||
case KVM_INST_MTMSRD_L0:
|
||||
kvm_patch_ins_mtmsr(inst, inst_rt);
|
||||
break;
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_WRTEE:
|
||||
kvm_patch_ins_wrtee(inst, inst_rt, 0);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
switch (inst_no_rt & ~KVM_MASK_RB) {
|
||||
@ -461,13 +655,19 @@ static void kvm_check_ins(u32 *inst, u32 features)
|
||||
switch (_inst) {
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_WRTEEI_0:
|
||||
kvm_patch_ins_wrteei_0(inst);
|
||||
break;
|
||||
|
||||
case KVM_INST_WRTEEI_1:
|
||||
kvm_patch_ins_wrteei(inst);
|
||||
kvm_patch_ins_wrtee(inst, 0, 1);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
extern u32 kvm_template_start[];
|
||||
extern u32 kvm_template_end[];
|
||||
|
||||
static void kvm_use_magic_page(void)
|
||||
{
|
||||
u32 *p;
|
||||
@ -488,8 +688,23 @@ static void kvm_use_magic_page(void)
|
||||
start = (void*)_stext;
|
||||
end = (void*)_etext;
|
||||
|
||||
for (p = start; p < end; p++)
|
||||
/*
|
||||
* Being interrupted in the middle of patching would
|
||||
* be bad for SPRG4-7, which KVM can't keep in sync
|
||||
* with emulated accesses because reads don't trap.
|
||||
*/
|
||||
local_irq_disable();
|
||||
|
||||
for (p = start; p < end; p++) {
|
||||
/* Avoid patching the template code */
|
||||
if (p >= kvm_template_start && p < kvm_template_end) {
|
||||
p = kvm_template_end - 1;
|
||||
continue;
|
||||
}
|
||||
kvm_check_ins(p, features);
|
||||
}
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
printk(KERN_INFO "KVM: Live patching for a fast VM %s\n",
|
||||
kvm_patching_worked ? "worked" : "failed");
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*
|
||||
* Copyright SUSE Linux Products GmbH 2010
|
||||
* Copyright 2010-2011 Freescale Semiconductor, Inc.
|
||||
*
|
||||
* Authors: Alexander Graf <agraf@suse.de>
|
||||
*/
|
||||
@ -65,6 +66,9 @@ kvm_hypercall_start:
|
||||
shared->critical == r1 and r2 is always != r1 */ \
|
||||
STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);
|
||||
|
||||
.global kvm_template_start
|
||||
kvm_template_start:
|
||||
|
||||
.global kvm_emulate_mtmsrd
|
||||
kvm_emulate_mtmsrd:
|
||||
|
||||
@ -167,6 +171,9 @@ maybe_stay_in_guest:
|
||||
kvm_emulate_mtmsr_reg2:
|
||||
ori r30, r0, 0
|
||||
|
||||
/* Put MSR into magic page because we don't call mtmsr */
|
||||
STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
|
||||
/* Check if we have to fetch an interrupt */
|
||||
lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
|
||||
cmpwi r31, 0
|
||||
@ -174,15 +181,10 @@ kvm_emulate_mtmsr_reg2:
|
||||
|
||||
/* Check if we may trigger an interrupt */
|
||||
andi. r31, r30, MSR_EE
|
||||
beq no_mtmsr
|
||||
|
||||
b do_mtmsr
|
||||
bne do_mtmsr
|
||||
|
||||
no_mtmsr:
|
||||
|
||||
/* Put MSR into magic page because we don't call mtmsr */
|
||||
STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
|
||||
SCRATCH_RESTORE
|
||||
|
||||
/* Go back to caller */
|
||||
@ -210,24 +212,80 @@ kvm_emulate_mtmsr_orig_ins_offs:
|
||||
kvm_emulate_mtmsr_len:
|
||||
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
|
||||
|
||||
|
||||
|
||||
.global kvm_emulate_wrteei
|
||||
kvm_emulate_wrteei:
|
||||
/* also used for wrteei 1 */
|
||||
.global kvm_emulate_wrtee
|
||||
kvm_emulate_wrtee:
|
||||
|
||||
SCRATCH_SAVE
|
||||
|
||||
/* Fetch old MSR in r31 */
|
||||
LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
|
||||
/* Remove MSR_EE from old MSR */
|
||||
li r30, 0
|
||||
ori r30, r30, MSR_EE
|
||||
andc r31, r31, r30
|
||||
/* Insert new MSR[EE] */
|
||||
kvm_emulate_wrtee_reg:
|
||||
ori r30, r0, 0
|
||||
rlwimi r31, r30, 0, MSR_EE
|
||||
|
||||
/* OR new MSR_EE onto the old MSR */
|
||||
kvm_emulate_wrteei_ee:
|
||||
ori r31, r31, 0
|
||||
/*
|
||||
* If MSR[EE] is now set, check for a pending interrupt.
|
||||
* We could skip this if MSR[EE] was already on, but that
|
||||
* should be rare, so don't bother.
|
||||
*/
|
||||
andi. r30, r30, MSR_EE
|
||||
|
||||
/* Put MSR into magic page because we don't call wrtee */
|
||||
STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
|
||||
beq no_wrtee
|
||||
|
||||
/* Check if we have to fetch an interrupt */
|
||||
lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
|
||||
cmpwi r30, 0
|
||||
bne do_wrtee
|
||||
|
||||
no_wrtee:
|
||||
SCRATCH_RESTORE
|
||||
|
||||
/* Go back to caller */
|
||||
kvm_emulate_wrtee_branch:
|
||||
b .
|
||||
|
||||
do_wrtee:
|
||||
SCRATCH_RESTORE
|
||||
|
||||
/* Just fire off the wrtee if it's critical */
|
||||
kvm_emulate_wrtee_orig_ins:
|
||||
wrtee r0
|
||||
|
||||
b kvm_emulate_wrtee_branch
|
||||
|
||||
kvm_emulate_wrtee_end:
|
||||
|
||||
.global kvm_emulate_wrtee_branch_offs
|
||||
kvm_emulate_wrtee_branch_offs:
|
||||
.long (kvm_emulate_wrtee_branch - kvm_emulate_wrtee) / 4
|
||||
|
||||
.global kvm_emulate_wrtee_reg_offs
|
||||
kvm_emulate_wrtee_reg_offs:
|
||||
.long (kvm_emulate_wrtee_reg - kvm_emulate_wrtee) / 4
|
||||
|
||||
.global kvm_emulate_wrtee_orig_ins_offs
|
||||
kvm_emulate_wrtee_orig_ins_offs:
|
||||
.long (kvm_emulate_wrtee_orig_ins - kvm_emulate_wrtee) / 4
|
||||
|
||||
.global kvm_emulate_wrtee_len
|
||||
kvm_emulate_wrtee_len:
|
||||
.long (kvm_emulate_wrtee_end - kvm_emulate_wrtee) / 4
|
||||
|
||||
.global kvm_emulate_wrteei_0
|
||||
kvm_emulate_wrteei_0:
|
||||
SCRATCH_SAVE
|
||||
|
||||
/* Fetch old MSR in r31 */
|
||||
LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
|
||||
/* Remove MSR_EE from old MSR */
|
||||
rlwinm r31, r31, 0, ~MSR_EE
|
||||
|
||||
/* Write new MSR value back */
|
||||
STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
@ -235,22 +293,17 @@ kvm_emulate_wrteei_ee:
|
||||
SCRATCH_RESTORE
|
||||
|
||||
/* Go back to caller */
|
||||
kvm_emulate_wrteei_branch:
|
||||
kvm_emulate_wrteei_0_branch:
|
||||
b .
|
||||
kvm_emulate_wrteei_end:
|
||||
kvm_emulate_wrteei_0_end:
|
||||
|
||||
.global kvm_emulate_wrteei_branch_offs
|
||||
kvm_emulate_wrteei_branch_offs:
|
||||
.long (kvm_emulate_wrteei_branch - kvm_emulate_wrteei) / 4
|
||||
|
||||
.global kvm_emulate_wrteei_ee_offs
|
||||
kvm_emulate_wrteei_ee_offs:
|
||||
.long (kvm_emulate_wrteei_ee - kvm_emulate_wrteei) / 4
|
||||
|
||||
.global kvm_emulate_wrteei_len
|
||||
kvm_emulate_wrteei_len:
|
||||
.long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4
|
||||
.global kvm_emulate_wrteei_0_branch_offs
|
||||
kvm_emulate_wrteei_0_branch_offs:
|
||||
.long (kvm_emulate_wrteei_0_branch - kvm_emulate_wrteei_0) / 4
|
||||
|
||||
.global kvm_emulate_wrteei_0_len
|
||||
kvm_emulate_wrteei_0_len:
|
||||
.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
|
||||
|
||||
.global kvm_emulate_mtsrin
|
||||
kvm_emulate_mtsrin:
|
||||
@ -300,3 +353,6 @@ kvm_emulate_mtsrin_orig_ins_offs:
|
||||
.global kvm_emulate_mtsrin_len
|
||||
kvm_emulate_mtsrin_len:
|
||||
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
|
||||
|
||||
.global kvm_template_end
|
||||
kvm_template_end:
|
||||
|
@ -598,7 +598,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
/* Initialize the MMU context management stuff */
|
||||
mmu_context_init();
|
||||
|
||||
kvm_rma_init();
|
||||
kvm_linear_init();
|
||||
|
||||
ppc64_boot_msg(0x15, "Setup Done");
|
||||
}
|
||||
|
@ -69,6 +69,7 @@ config KVM_BOOK3S_64
|
||||
config KVM_BOOK3S_64_HV
|
||||
bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
|
||||
depends on KVM_BOOK3S_64
|
||||
select MMU_NOTIFIER
|
||||
---help---
|
||||
Support running unmodified book3s_64 guest kernels in
|
||||
virtual machines on POWER7 and PPC970 processors that have
|
||||
|
@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
|
||||
return true;
|
||||
}
|
||||
|
||||
void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
|
||||
void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long *pending = &vcpu->arch.pending_exceptions;
|
||||
unsigned long old_pending = vcpu->arch.pending_exceptions;
|
||||
@ -423,10 +423,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
regs->sprg1 = vcpu->arch.shared->sprg1;
|
||||
regs->sprg2 = vcpu->arch.shared->sprg2;
|
||||
regs->sprg3 = vcpu->arch.shared->sprg3;
|
||||
regs->sprg4 = vcpu->arch.sprg4;
|
||||
regs->sprg5 = vcpu->arch.sprg5;
|
||||
regs->sprg6 = vcpu->arch.sprg6;
|
||||
regs->sprg7 = vcpu->arch.sprg7;
|
||||
regs->sprg4 = vcpu->arch.shared->sprg4;
|
||||
regs->sprg5 = vcpu->arch.shared->sprg5;
|
||||
regs->sprg6 = vcpu->arch.shared->sprg6;
|
||||
regs->sprg7 = vcpu->arch.shared->sprg7;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
|
||||
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
|
||||
@ -450,10 +450,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
vcpu->arch.shared->sprg1 = regs->sprg1;
|
||||
vcpu->arch.shared->sprg2 = regs->sprg2;
|
||||
vcpu->arch.shared->sprg3 = regs->sprg3;
|
||||
vcpu->arch.sprg4 = regs->sprg4;
|
||||
vcpu->arch.sprg5 = regs->sprg5;
|
||||
vcpu->arch.sprg6 = regs->sprg6;
|
||||
vcpu->arch.sprg7 = regs->sprg7;
|
||||
vcpu->arch.shared->sprg4 = regs->sprg4;
|
||||
vcpu->arch.shared->sprg5 = regs->sprg5;
|
||||
vcpu->arch.shared->sprg6 = regs->sprg6;
|
||||
vcpu->arch.shared->sprg7 = regs->sprg7;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
|
||||
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
|
||||
@ -477,41 +477,10 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get (and clear) the dirty memory log for a memory slot.
|
||||
*/
|
||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log)
|
||||
void kvmppc_decrementer_func(unsigned long data)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct kvm_vcpu *vcpu;
|
||||
ulong ga, ga_end;
|
||||
int is_dirty = 0;
|
||||
int r;
|
||||
unsigned long n;
|
||||
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = kvm_get_dirty_log(kvm, log, &is_dirty);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
/* If nothing is dirty, don't bother messing with page tables. */
|
||||
if (is_dirty) {
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
|
||||
ga = memslot->base_gfn << PAGE_SHIFT;
|
||||
ga_end = ga + (memslot->npages << PAGE_SHIFT);
|
||||
|
||||
kvm_for_each_vcpu(n, vcpu, kvm)
|
||||
kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
|
||||
|
||||
n = kvm_dirty_bitmap_bytes(memslot);
|
||||
memset(memslot->dirty_bitmap, 0, n);
|
||||
}
|
||||
|
||||
r = 0;
|
||||
out:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return r;
|
||||
kvmppc_core_queue_dec(vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
@ -151,13 +151,15 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
|
||||
bool primary = false;
|
||||
bool evict = false;
|
||||
struct hpte_cache *pte;
|
||||
int r = 0;
|
||||
|
||||
/* Get host physical address for gpa */
|
||||
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
|
||||
if (is_error_pfn(hpaddr)) {
|
||||
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
|
||||
orig_pte->eaddr);
|
||||
return -EINVAL;
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
hpaddr <<= PAGE_SHIFT;
|
||||
|
||||
@ -249,7 +251,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
|
||||
|
||||
kvmppc_mmu_hpte_cache_map(vcpu, pte);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
@ -297,12 +300,14 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
|
||||
u64 gvsid;
|
||||
u32 sr;
|
||||
struct kvmppc_sid_map *map;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
int r = 0;
|
||||
|
||||
if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
|
||||
/* Invalidate an entry */
|
||||
svcpu->sr[esid] = SR_INVALID;
|
||||
return -ENOENT;
|
||||
r = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
map = find_sid_vsid(vcpu, gvsid);
|
||||
@ -315,17 +320,21 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
|
||||
|
||||
dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
|
||||
dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr));
|
||||
for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++)
|
||||
svcpu->sr[i] = SR_INVALID;
|
||||
|
||||
svcpu_put(svcpu);
|
||||
}
|
||||
|
||||
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
@ -88,12 +88,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
|
||||
int vflags = 0;
|
||||
int attempt = 0;
|
||||
struct kvmppc_sid_map *map;
|
||||
int r = 0;
|
||||
|
||||
/* Get host physical address for gpa */
|
||||
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
|
||||
if (is_error_pfn(hpaddr)) {
|
||||
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
|
||||
return -EINVAL;
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
hpaddr <<= PAGE_SHIFT;
|
||||
hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
|
||||
@ -110,7 +112,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
|
||||
printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n",
|
||||
vsid, orig_pte->eaddr);
|
||||
WARN_ON(true);
|
||||
return -EINVAL;
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vsid = map->host_vsid;
|
||||
@ -131,8 +134,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
|
||||
|
||||
/* In case we tried normal mapping already, let's nuke old entries */
|
||||
if (attempt > 1)
|
||||
if (ppc_md.hpte_remove(hpteg) < 0)
|
||||
return -1;
|
||||
if (ppc_md.hpte_remove(hpteg) < 0) {
|
||||
r = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M);
|
||||
|
||||
@ -162,7 +167,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
|
||||
kvmppc_mmu_hpte_cache_map(vcpu, pte);
|
||||
}
|
||||
|
||||
return 0;
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
@ -207,25 +213,30 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
|
||||
static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
|
||||
{
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
int i;
|
||||
int max_slb_size = 64;
|
||||
int found_inval = -1;
|
||||
int r;
|
||||
|
||||
if (!to_svcpu(vcpu)->slb_max)
|
||||
to_svcpu(vcpu)->slb_max = 1;
|
||||
if (!svcpu->slb_max)
|
||||
svcpu->slb_max = 1;
|
||||
|
||||
/* Are we overwriting? */
|
||||
for (i = 1; i < to_svcpu(vcpu)->slb_max; i++) {
|
||||
if (!(to_svcpu(vcpu)->slb[i].esid & SLB_ESID_V))
|
||||
for (i = 1; i < svcpu->slb_max; i++) {
|
||||
if (!(svcpu->slb[i].esid & SLB_ESID_V))
|
||||
found_inval = i;
|
||||
else if ((to_svcpu(vcpu)->slb[i].esid & ESID_MASK) == esid)
|
||||
return i;
|
||||
else if ((svcpu->slb[i].esid & ESID_MASK) == esid) {
|
||||
r = i;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Found a spare entry that was invalidated before */
|
||||
if (found_inval > 0)
|
||||
return found_inval;
|
||||
if (found_inval > 0) {
|
||||
r = found_inval;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* No spare invalid entry, so create one */
|
||||
|
||||
@ -233,30 +244,35 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
|
||||
max_slb_size = mmu_slb_size;
|
||||
|
||||
/* Overflowing -> purge */
|
||||
if ((to_svcpu(vcpu)->slb_max) == max_slb_size)
|
||||
if ((svcpu->slb_max) == max_slb_size)
|
||||
kvmppc_mmu_flush_segments(vcpu);
|
||||
|
||||
r = to_svcpu(vcpu)->slb_max;
|
||||
to_svcpu(vcpu)->slb_max++;
|
||||
r = svcpu->slb_max;
|
||||
svcpu->slb_max++;
|
||||
|
||||
out:
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
|
||||
{
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
u64 esid = eaddr >> SID_SHIFT;
|
||||
u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V;
|
||||
u64 slb_vsid = SLB_VSID_USER;
|
||||
u64 gvsid;
|
||||
int slb_index;
|
||||
struct kvmppc_sid_map *map;
|
||||
int r = 0;
|
||||
|
||||
slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK);
|
||||
|
||||
if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
|
||||
/* Invalidate an entry */
|
||||
to_svcpu(vcpu)->slb[slb_index].esid = 0;
|
||||
return -ENOENT;
|
||||
svcpu->slb[slb_index].esid = 0;
|
||||
r = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
map = find_sid_vsid(vcpu, gvsid);
|
||||
@ -269,18 +285,22 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
|
||||
slb_vsid &= ~SLB_VSID_KP;
|
||||
slb_esid |= slb_index;
|
||||
|
||||
to_svcpu(vcpu)->slb[slb_index].esid = slb_esid;
|
||||
to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid;
|
||||
svcpu->slb[slb_index].esid = slb_esid;
|
||||
svcpu->slb[slb_index].vsid = slb_vsid;
|
||||
|
||||
trace_kvm_book3s_slbmte(slb_vsid, slb_esid);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
svcpu_put(svcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
to_svcpu(vcpu)->slb_max = 1;
|
||||
to_svcpu(vcpu)->slb[0].esid = 0;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
svcpu->slb_max = 1;
|
||||
svcpu->slb[0].esid = 0;
|
||||
svcpu_put(svcpu);
|
||||
}
|
||||
|
||||
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/kvm_ppc.h>
|
||||
@ -33,15 +34,6 @@
|
||||
#include <asm/ppc-opcode.h>
|
||||
#include <asm/cputable.h>
|
||||
|
||||
/* For now use fixed-size 16MB page table */
|
||||
#define HPT_ORDER 24
|
||||
#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
|
||||
#define HPT_HASH_MASK (HPT_NPTEG - 1)
|
||||
|
||||
/* Pages in the VRMA are 16MB pages */
|
||||
#define VRMA_PAGE_ORDER 24
|
||||
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
|
||||
|
||||
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
|
||||
#define MAX_LPID_970 63
|
||||
#define NR_LPIDS (LPID_RSVD + 1)
|
||||
@ -51,21 +43,41 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
|
||||
{
|
||||
unsigned long hpt;
|
||||
unsigned long lpid;
|
||||
struct revmap_entry *rev;
|
||||
struct kvmppc_linear_info *li;
|
||||
|
||||
/* Allocate guest's hashed page table */
|
||||
li = kvm_alloc_hpt();
|
||||
if (li) {
|
||||
/* using preallocated memory */
|
||||
hpt = (ulong)li->base_virt;
|
||||
kvm->arch.hpt_li = li;
|
||||
} else {
|
||||
/* using dynamic memory */
|
||||
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
|
||||
__GFP_NOWARN, HPT_ORDER - PAGE_SHIFT);
|
||||
}
|
||||
|
||||
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN,
|
||||
HPT_ORDER - PAGE_SHIFT);
|
||||
if (!hpt) {
|
||||
pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
kvm->arch.hpt_virt = hpt;
|
||||
|
||||
/* Allocate reverse map array */
|
||||
rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
|
||||
if (!rev) {
|
||||
pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
|
||||
goto out_freehpt;
|
||||
}
|
||||
kvm->arch.revmap = rev;
|
||||
|
||||
/* Allocate the guest's logical partition ID */
|
||||
do {
|
||||
lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
|
||||
if (lpid >= NR_LPIDS) {
|
||||
pr_err("kvm_alloc_hpt: No LPIDs free\n");
|
||||
free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
|
||||
return -ENOMEM;
|
||||
goto out_freeboth;
|
||||
}
|
||||
} while (test_and_set_bit(lpid, lpid_inuse));
|
||||
|
||||
@ -74,37 +86,64 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
|
||||
|
||||
pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
|
||||
return 0;
|
||||
|
||||
out_freeboth:
|
||||
vfree(rev);
|
||||
out_freehpt:
|
||||
free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void kvmppc_free_hpt(struct kvm *kvm)
|
||||
{
|
||||
clear_bit(kvm->arch.lpid, lpid_inuse);
|
||||
free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
|
||||
vfree(kvm->arch.revmap);
|
||||
if (kvm->arch.hpt_li)
|
||||
kvm_release_hpt(kvm->arch.hpt_li);
|
||||
else
|
||||
free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
|
||||
}
|
||||
|
||||
void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
|
||||
/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
|
||||
static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
|
||||
{
|
||||
return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
|
||||
}
|
||||
|
||||
/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
|
||||
static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
|
||||
{
|
||||
return (pgsize == 0x10000) ? 0x1000 : 0;
|
||||
}
|
||||
|
||||
void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
|
||||
unsigned long porder)
|
||||
{
|
||||
unsigned long i;
|
||||
unsigned long npages = kvm->arch.ram_npages;
|
||||
unsigned long pfn;
|
||||
unsigned long *hpte;
|
||||
unsigned long hash;
|
||||
struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo;
|
||||
unsigned long npages;
|
||||
unsigned long hp_v, hp_r;
|
||||
unsigned long addr, hash;
|
||||
unsigned long psize;
|
||||
unsigned long hp0, hp1;
|
||||
long ret;
|
||||
|
||||
if (!pginfo)
|
||||
return;
|
||||
psize = 1ul << porder;
|
||||
npages = memslot->npages >> (porder - PAGE_SHIFT);
|
||||
|
||||
/* VRMA can't be > 1TB */
|
||||
if (npages > 1ul << (40 - kvm->arch.ram_porder))
|
||||
npages = 1ul << (40 - kvm->arch.ram_porder);
|
||||
if (npages > 1ul << (40 - porder))
|
||||
npages = 1ul << (40 - porder);
|
||||
/* Can't use more than 1 HPTE per HPTEG */
|
||||
if (npages > HPT_NPTEG)
|
||||
npages = HPT_NPTEG;
|
||||
|
||||
hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
|
||||
HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
|
||||
hp1 = hpte1_pgsize_encoding(psize) |
|
||||
HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
|
||||
|
||||
for (i = 0; i < npages; ++i) {
|
||||
pfn = pginfo[i].pfn;
|
||||
if (!pfn)
|
||||
break;
|
||||
addr = i << porder;
|
||||
/* can't use hpt_hash since va > 64 bits */
|
||||
hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
|
||||
/*
|
||||
@ -113,15 +152,15 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
|
||||
* at most one HPTE per HPTEG, we just assume entry 7
|
||||
* is available and use it.
|
||||
*/
|
||||
hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 7));
|
||||
hpte += 7 * 2;
|
||||
/* HPTE low word - RPN, protection, etc. */
|
||||
hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C |
|
||||
HPTE_R_M | PP_RWXX;
|
||||
wmb();
|
||||
hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
|
||||
(i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
|
||||
HPTE_V_LARGE | HPTE_V_VALID;
|
||||
hash = (hash << 3) + 7;
|
||||
hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
|
||||
hp_r = hp1 | addr;
|
||||
ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
|
||||
if (ret != H_SUCCESS) {
|
||||
pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
|
||||
addr, ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -158,10 +197,814 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
|
||||
kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
|
||||
}
|
||||
|
||||
static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, bool data)
|
||||
/*
|
||||
* This is called to get a reference to a guest page if there isn't
|
||||
* one already in the kvm->arch.slot_phys[][] arrays.
|
||||
*/
|
||||
static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
|
||||
struct kvm_memory_slot *memslot,
|
||||
unsigned long psize)
|
||||
{
|
||||
return -ENOENT;
|
||||
unsigned long start;
|
||||
long np, err;
|
||||
struct page *page, *hpage, *pages[1];
|
||||
unsigned long s, pgsize;
|
||||
unsigned long *physp;
|
||||
unsigned int is_io, got, pgorder;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long pfn, i, npages;
|
||||
|
||||
physp = kvm->arch.slot_phys[memslot->id];
|
||||
if (!physp)
|
||||
return -EINVAL;
|
||||
if (physp[gfn - memslot->base_gfn])
|
||||
return 0;
|
||||
|
||||
is_io = 0;
|
||||
got = 0;
|
||||
page = NULL;
|
||||
pgsize = psize;
|
||||
err = -EINVAL;
|
||||
start = gfn_to_hva_memslot(memslot, gfn);
|
||||
|
||||
/* Instantiate and get the page we want access to */
|
||||
np = get_user_pages_fast(start, 1, 1, pages);
|
||||
if (np != 1) {
|
||||
/* Look up the vma for the page */
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
vma = find_vma(current->mm, start);
|
||||
if (!vma || vma->vm_start > start ||
|
||||
start + psize > vma->vm_end ||
|
||||
!(vma->vm_flags & VM_PFNMAP))
|
||||
goto up_err;
|
||||
is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
|
||||
pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
|
||||
/* check alignment of pfn vs. requested page size */
|
||||
if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
|
||||
goto up_err;
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
} else {
|
||||
page = pages[0];
|
||||
got = KVMPPC_GOT_PAGE;
|
||||
|
||||
/* See if this is a large page */
|
||||
s = PAGE_SIZE;
|
||||
if (PageHuge(page)) {
|
||||
hpage = compound_head(page);
|
||||
s <<= compound_order(hpage);
|
||||
/* Get the whole large page if slot alignment is ok */
|
||||
if (s > psize && slot_is_aligned(memslot, s) &&
|
||||
!(memslot->userspace_addr & (s - 1))) {
|
||||
start &= ~(s - 1);
|
||||
pgsize = s;
|
||||
page = hpage;
|
||||
}
|
||||
}
|
||||
if (s < psize)
|
||||
goto out;
|
||||
pfn = page_to_pfn(page);
|
||||
}
|
||||
|
||||
npages = pgsize >> PAGE_SHIFT;
|
||||
pgorder = __ilog2(npages);
|
||||
physp += (gfn - memslot->base_gfn) & ~(npages - 1);
|
||||
spin_lock(&kvm->arch.slot_phys_lock);
|
||||
for (i = 0; i < npages; ++i) {
|
||||
if (!physp[i]) {
|
||||
physp[i] = ((pfn + i) << PAGE_SHIFT) +
|
||||
got + is_io + pgorder;
|
||||
got = 0;
|
||||
}
|
||||
}
|
||||
spin_unlock(&kvm->arch.slot_phys_lock);
|
||||
err = 0;
|
||||
|
||||
out:
|
||||
if (got) {
|
||||
if (PageHuge(page))
|
||||
page = compound_head(page);
|
||||
put_page(page);
|
||||
}
|
||||
return err;
|
||||
|
||||
up_err:
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* We come here on a H_ENTER call from the guest when we are not
|
||||
* using mmu notifiers and we don't have the requested page pinned
|
||||
* already.
|
||||
*/
|
||||
long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long psize, gpa, gfn;
|
||||
struct kvm_memory_slot *memslot;
|
||||
long ret;
|
||||
|
||||
if (kvm->arch.using_mmu_notifiers)
|
||||
goto do_insert;
|
||||
|
||||
psize = hpte_page_size(pteh, ptel);
|
||||
if (!psize)
|
||||
return H_PARAMETER;
|
||||
|
||||
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
|
||||
|
||||
/* Find the memslot (if any) for this address */
|
||||
gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
|
||||
gfn = gpa >> PAGE_SHIFT;
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
|
||||
if (!slot_is_aligned(memslot, psize))
|
||||
return H_PARAMETER;
|
||||
if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
|
||||
return H_PARAMETER;
|
||||
}
|
||||
|
||||
do_insert:
|
||||
/* Protect linux PTE lookup from page table destruction */
|
||||
rcu_read_lock_sched(); /* this disables preemption too */
|
||||
vcpu->arch.pgdir = current->mm->pgd;
|
||||
ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
|
||||
rcu_read_unlock_sched();
|
||||
if (ret == H_TOO_HARD) {
|
||||
/* this can't happen */
|
||||
pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
|
||||
ret = H_RESOURCE; /* or something */
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
|
||||
gva_t eaddr)
|
||||
{
|
||||
u64 mask;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vcpu->arch.slb_nr; i++) {
|
||||
if (!(vcpu->arch.slb[i].orige & SLB_ESID_V))
|
||||
continue;
|
||||
|
||||
if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T)
|
||||
mask = ESID_MASK_1T;
|
||||
else
|
||||
mask = ESID_MASK;
|
||||
|
||||
if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0)
|
||||
return &vcpu->arch.slb[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
|
||||
unsigned long ea)
|
||||
{
|
||||
unsigned long ra_mask;
|
||||
|
||||
ra_mask = hpte_page_size(v, r) - 1;
|
||||
return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
|
||||
}
|
||||
|
||||
static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, bool data)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvmppc_slb *slbe;
|
||||
unsigned long slb_v;
|
||||
unsigned long pp, key;
|
||||
unsigned long v, gr;
|
||||
unsigned long *hptep;
|
||||
int index;
|
||||
int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
|
||||
|
||||
/* Get SLB entry */
|
||||
if (virtmode) {
|
||||
slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr);
|
||||
if (!slbe)
|
||||
return -EINVAL;
|
||||
slb_v = slbe->origv;
|
||||
} else {
|
||||
/* real mode access */
|
||||
slb_v = vcpu->kvm->arch.vrma_slb_v;
|
||||
}
|
||||
|
||||
/* Find the HPTE in the hash table */
|
||||
index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,
|
||||
HPTE_V_VALID | HPTE_V_ABSENT);
|
||||
if (index < 0)
|
||||
return -ENOENT;
|
||||
hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
|
||||
v = hptep[0] & ~HPTE_V_HVLOCK;
|
||||
gr = kvm->arch.revmap[index].guest_rpte;
|
||||
|
||||
/* Unlock the HPTE */
|
||||
asm volatile("lwsync" : : : "memory");
|
||||
hptep[0] = v;
|
||||
|
||||
gpte->eaddr = eaddr;
|
||||
gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff);
|
||||
|
||||
/* Get PP bits and key for permission check */
|
||||
pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
|
||||
key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
|
||||
key &= slb_v;
|
||||
|
||||
/* Calculate permissions */
|
||||
gpte->may_read = hpte_read_permission(pp, key);
|
||||
gpte->may_write = hpte_write_permission(pp, key);
|
||||
gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
|
||||
|
||||
/* Storage key permission check for POWER7 */
|
||||
if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
|
||||
int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
|
||||
if (amrfield & 1)
|
||||
gpte->may_read = 0;
|
||||
if (amrfield & 2)
|
||||
gpte->may_write = 0;
|
||||
}
|
||||
|
||||
/* Get the guest physical address */
|
||||
gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Quick test for whether an instruction is a load or a store.
|
||||
* If the instruction is a load or a store, then this will indicate
|
||||
* which it is, at least on server processors. (Embedded processors
|
||||
* have some external PID instructions that don't follow the rule
|
||||
* embodied here.) If the instruction isn't a load or store, then
|
||||
* this doesn't return anything useful.
|
||||
*/
|
||||
static int instruction_is_store(unsigned int instr)
|
||||
{
|
||||
unsigned int mask;
|
||||
|
||||
mask = 0x10000000;
|
||||
if ((instr & 0xfc000000) == 0x7c000000)
|
||||
mask = 0x100; /* major opcode 31 */
|
||||
return (instr & mask) != 0;
|
||||
}
|
||||
|
||||
static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned long gpa, int is_store)
|
||||
{
|
||||
int ret;
|
||||
u32 last_inst;
|
||||
unsigned long srr0 = kvmppc_get_pc(vcpu);
|
||||
|
||||
/* We try to load the last instruction. We don't let
|
||||
* emulate_instruction do it as it doesn't check what
|
||||
* kvmppc_ld returns.
|
||||
* If we fail, we just return to the guest and try executing it again.
|
||||
*/
|
||||
if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) {
|
||||
ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
|
||||
if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED)
|
||||
return RESUME_GUEST;
|
||||
vcpu->arch.last_inst = last_inst;
|
||||
}
|
||||
|
||||
/*
|
||||
* WARNING: We do not know for sure whether the instruction we just
|
||||
* read from memory is the same that caused the fault in the first
|
||||
* place. If the instruction we read is neither an load or a store,
|
||||
* then it can't access memory, so we don't need to worry about
|
||||
* enforcing access permissions. So, assuming it is a load or
|
||||
* store, we just check that its direction (load or store) is
|
||||
* consistent with the original fault, since that's what we
|
||||
* checked the access permissions against. If there is a mismatch
|
||||
* we just return and retry the instruction.
|
||||
*/
|
||||
|
||||
if (instruction_is_store(vcpu->arch.last_inst) != !!is_store)
|
||||
return RESUME_GUEST;
|
||||
|
||||
/*
|
||||
* Emulated accesses are emulated by looking at the hash for
|
||||
* translation once, then performing the access later. The
|
||||
* translation could be invalidated in the meantime in which
|
||||
* point performing the subsequent memory access on the old
|
||||
* physical address could possibly be a security hole for the
|
||||
* guest (but not the host).
|
||||
*
|
||||
* This is less of an issue for MMIO stores since they aren't
|
||||
* globally visible. It could be an issue for MMIO loads to
|
||||
* a certain extent but we'll ignore it for now.
|
||||
*/
|
||||
|
||||
vcpu->arch.paddr_accessed = gpa;
|
||||
return kvmppc_emulate_mmio(run, vcpu);
|
||||
}
|
||||
|
||||
int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned long ea, unsigned long dsisr)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long *hptep, hpte[3], r;
|
||||
unsigned long mmu_seq, psize, pte_size;
|
||||
unsigned long gfn, hva, pfn;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long *rmap;
|
||||
struct revmap_entry *rev;
|
||||
struct page *page, *pages[1];
|
||||
long index, ret, npages;
|
||||
unsigned long is_io;
|
||||
unsigned int writing, write_ok;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long rcbits;
|
||||
|
||||
/*
|
||||
* Real-mode code has already searched the HPT and found the
|
||||
* entry we're interested in. Lock the entry and check that
|
||||
* it hasn't changed. If it has, just return and re-execute the
|
||||
* instruction.
|
||||
*/
|
||||
if (ea != vcpu->arch.pgfault_addr)
|
||||
return RESUME_GUEST;
|
||||
index = vcpu->arch.pgfault_index;
|
||||
hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
|
||||
rev = &kvm->arch.revmap[index];
|
||||
preempt_disable();
|
||||
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
|
||||
hpte[1] = hptep[1];
|
||||
hpte[2] = r = rev->guest_rpte;
|
||||
asm volatile("lwsync" : : : "memory");
|
||||
hptep[0] = hpte[0];
|
||||
preempt_enable();
|
||||
|
||||
if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
|
||||
hpte[1] != vcpu->arch.pgfault_hpte[1])
|
||||
return RESUME_GUEST;
|
||||
|
||||
/* Translate the logical address and get the page */
|
||||
psize = hpte_page_size(hpte[0], r);
|
||||
gfn = hpte_rpn(r, psize);
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
|
||||
/* No memslot means it's an emulated MMIO region */
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
|
||||
unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
|
||||
return kvmppc_hv_emulate_mmio(run, vcpu, gpa,
|
||||
dsisr & DSISR_ISSTORE);
|
||||
}
|
||||
|
||||
if (!kvm->arch.using_mmu_notifiers)
|
||||
return -EFAULT; /* should never get here */
|
||||
|
||||
/* used to check for invalidations in progress */
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
||||
is_io = 0;
|
||||
pfn = 0;
|
||||
page = NULL;
|
||||
pte_size = PAGE_SIZE;
|
||||
writing = (dsisr & DSISR_ISSTORE) != 0;
|
||||
/* If writing != 0, then the HPTE must allow writing, if we get here */
|
||||
write_ok = writing;
|
||||
hva = gfn_to_hva_memslot(memslot, gfn);
|
||||
npages = get_user_pages_fast(hva, 1, writing, pages);
|
||||
if (npages < 1) {
|
||||
/* Check if it's an I/O mapping */
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
vma = find_vma(current->mm, hva);
|
||||
if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
|
||||
(vma->vm_flags & VM_PFNMAP)) {
|
||||
pfn = vma->vm_pgoff +
|
||||
((hva - vma->vm_start) >> PAGE_SHIFT);
|
||||
pte_size = psize;
|
||||
is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
|
||||
write_ok = vma->vm_flags & VM_WRITE;
|
||||
}
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (!pfn)
|
||||
return -EFAULT;
|
||||
} else {
|
||||
page = pages[0];
|
||||
if (PageHuge(page)) {
|
||||
page = compound_head(page);
|
||||
pte_size <<= compound_order(page);
|
||||
}
|
||||
/* if the guest wants write access, see if that is OK */
|
||||
if (!writing && hpte_is_writable(r)) {
|
||||
pte_t *ptep, pte;
|
||||
|
||||
/*
|
||||
* We need to protect against page table destruction
|
||||
* while looking up and updating the pte.
|
||||
*/
|
||||
rcu_read_lock_sched();
|
||||
ptep = find_linux_pte_or_hugepte(current->mm->pgd,
|
||||
hva, NULL);
|
||||
if (ptep && pte_present(*ptep)) {
|
||||
pte = kvmppc_read_update_linux_pte(ptep, 1);
|
||||
if (pte_write(pte))
|
||||
write_ok = 1;
|
||||
}
|
||||
rcu_read_unlock_sched();
|
||||
}
|
||||
pfn = page_to_pfn(page);
|
||||
}
|
||||
|
||||
ret = -EFAULT;
|
||||
if (psize > pte_size)
|
||||
goto out_put;
|
||||
|
||||
/* Check WIMG vs. the actual page we're accessing */
|
||||
if (!hpte_cache_flags_ok(r, is_io)) {
|
||||
if (is_io)
|
||||
return -EFAULT;
|
||||
/*
|
||||
* Allow guest to map emulated device memory as
|
||||
* uncacheable, but actually make it cacheable.
|
||||
*/
|
||||
r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
|
||||
}
|
||||
|
||||
/* Set the HPTE to point to pfn */
|
||||
r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
|
||||
if (hpte_is_writable(r) && !write_ok)
|
||||
r = hpte_make_readonly(r);
|
||||
ret = RESUME_GUEST;
|
||||
preempt_disable();
|
||||
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
|
||||
rev->guest_rpte != hpte[2])
|
||||
/* HPTE has been changed under us; let the guest retry */
|
||||
goto out_unlock;
|
||||
hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
|
||||
|
||||
rmap = &memslot->rmap[gfn - memslot->base_gfn];
|
||||
lock_rmap(rmap);
|
||||
|
||||
/* Check if we might have been invalidated; let the guest retry if so */
|
||||
ret = RESUME_GUEST;
|
||||
if (mmu_notifier_retry(vcpu, mmu_seq)) {
|
||||
unlock_rmap(rmap);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
|
||||
rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
|
||||
r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
|
||||
|
||||
if (hptep[0] & HPTE_V_VALID) {
|
||||
/* HPTE was previously valid, so we need to invalidate it */
|
||||
unlock_rmap(rmap);
|
||||
hptep[0] |= HPTE_V_ABSENT;
|
||||
kvmppc_invalidate_hpte(kvm, hptep, index);
|
||||
/* don't lose previous R and C bits */
|
||||
r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
|
||||
} else {
|
||||
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
|
||||
}
|
||||
|
||||
hptep[1] = r;
|
||||
eieio();
|
||||
hptep[0] = hpte[0];
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
preempt_enable();
|
||||
if (page && hpte_is_writable(r))
|
||||
SetPageDirty(page);
|
||||
|
||||
out_put:
|
||||
if (page)
|
||||
put_page(page);
|
||||
return ret;
|
||||
|
||||
out_unlock:
|
||||
hptep[0] &= ~HPTE_V_HVLOCK;
|
||||
preempt_enable();
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
||||
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
|
||||
unsigned long gfn))
|
||||
{
|
||||
int ret;
|
||||
int retval = 0;
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
kvm_for_each_memslot(memslot, slots) {
|
||||
unsigned long start = memslot->userspace_addr;
|
||||
unsigned long end;
|
||||
|
||||
end = start + (memslot->npages << PAGE_SHIFT);
|
||||
if (hva >= start && hva < end) {
|
||||
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
|
||||
|
||||
ret = handler(kvm, &memslot->rmap[gfn_offset],
|
||||
memslot->base_gfn + gfn_offset);
|
||||
retval |= ret;
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
unsigned long gfn)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
unsigned long h, i, j;
|
||||
unsigned long *hptep;
|
||||
unsigned long ptel, psize, rcbits;
|
||||
|
||||
for (;;) {
|
||||
lock_rmap(rmapp);
|
||||
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
|
||||
unlock_rmap(rmapp);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* To avoid an ABBA deadlock with the HPTE lock bit,
|
||||
* we can't spin on the HPTE lock while holding the
|
||||
* rmap chain lock.
|
||||
*/
|
||||
i = *rmapp & KVMPPC_RMAP_INDEX;
|
||||
hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
|
||||
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
|
||||
/* unlock rmap before spinning on the HPTE lock */
|
||||
unlock_rmap(rmapp);
|
||||
while (hptep[0] & HPTE_V_HVLOCK)
|
||||
cpu_relax();
|
||||
continue;
|
||||
}
|
||||
j = rev[i].forw;
|
||||
if (j == i) {
|
||||
/* chain is now empty */
|
||||
*rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
|
||||
} else {
|
||||
/* remove i from chain */
|
||||
h = rev[i].back;
|
||||
rev[h].forw = j;
|
||||
rev[j].back = h;
|
||||
rev[i].forw = rev[i].back = i;
|
||||
*rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
|
||||
}
|
||||
|
||||
/* Now check and modify the HPTE */
|
||||
ptel = rev[i].guest_rpte;
|
||||
psize = hpte_page_size(hptep[0], ptel);
|
||||
if ((hptep[0] & HPTE_V_VALID) &&
|
||||
hpte_rpn(ptel, psize) == gfn) {
|
||||
hptep[0] |= HPTE_V_ABSENT;
|
||||
kvmppc_invalidate_hpte(kvm, hptep, i);
|
||||
/* Harvest R and C */
|
||||
rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
|
||||
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
|
||||
rev[i].guest_rpte = ptel | rcbits;
|
||||
}
|
||||
unlock_rmap(rmapp);
|
||||
hptep[0] &= ~HPTE_V_HVLOCK;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
if (kvm->arch.using_mmu_notifiers)
|
||||
kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
unsigned long gfn)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
unsigned long head, i, j;
|
||||
unsigned long *hptep;
|
||||
int ret = 0;
|
||||
|
||||
retry:
|
||||
lock_rmap(rmapp);
|
||||
if (*rmapp & KVMPPC_RMAP_REFERENCED) {
|
||||
*rmapp &= ~KVMPPC_RMAP_REFERENCED;
|
||||
ret = 1;
|
||||
}
|
||||
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
|
||||
unlock_rmap(rmapp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
i = head = *rmapp & KVMPPC_RMAP_INDEX;
|
||||
do {
|
||||
hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
|
||||
j = rev[i].forw;
|
||||
|
||||
/* If this HPTE isn't referenced, ignore it */
|
||||
if (!(hptep[1] & HPTE_R_R))
|
||||
continue;
|
||||
|
||||
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
|
||||
/* unlock rmap before spinning on the HPTE lock */
|
||||
unlock_rmap(rmapp);
|
||||
while (hptep[0] & HPTE_V_HVLOCK)
|
||||
cpu_relax();
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Now check and modify the HPTE */
|
||||
if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
|
||||
kvmppc_clear_ref_hpte(kvm, hptep, i);
|
||||
rev[i].guest_rpte |= HPTE_R_R;
|
||||
ret = 1;
|
||||
}
|
||||
hptep[0] &= ~HPTE_V_HVLOCK;
|
||||
} while ((i = j) != head);
|
||||
|
||||
unlock_rmap(rmapp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
if (!kvm->arch.using_mmu_notifiers)
|
||||
return 0;
|
||||
return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
|
||||
}
|
||||
|
||||
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
unsigned long gfn)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
unsigned long head, i, j;
|
||||
unsigned long *hp;
|
||||
int ret = 1;
|
||||
|
||||
if (*rmapp & KVMPPC_RMAP_REFERENCED)
|
||||
return 1;
|
||||
|
||||
lock_rmap(rmapp);
|
||||
if (*rmapp & KVMPPC_RMAP_REFERENCED)
|
||||
goto out;
|
||||
|
||||
if (*rmapp & KVMPPC_RMAP_PRESENT) {
|
||||
i = head = *rmapp & KVMPPC_RMAP_INDEX;
|
||||
do {
|
||||
hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
|
||||
j = rev[i].forw;
|
||||
if (hp[1] & HPTE_R_R)
|
||||
goto out;
|
||||
} while ((i = j) != head);
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
unlock_rmap(rmapp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
if (!kvm->arch.using_mmu_notifiers)
|
||||
return 0;
|
||||
return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
|
||||
}
|
||||
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
if (!kvm->arch.using_mmu_notifiers)
|
||||
return;
|
||||
kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
|
||||
}
|
||||
|
||||
static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
unsigned long head, i, j;
|
||||
unsigned long *hptep;
|
||||
int ret = 0;
|
||||
|
||||
retry:
|
||||
lock_rmap(rmapp);
|
||||
if (*rmapp & KVMPPC_RMAP_CHANGED) {
|
||||
*rmapp &= ~KVMPPC_RMAP_CHANGED;
|
||||
ret = 1;
|
||||
}
|
||||
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
|
||||
unlock_rmap(rmapp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
i = head = *rmapp & KVMPPC_RMAP_INDEX;
|
||||
do {
|
||||
hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
|
||||
j = rev[i].forw;
|
||||
|
||||
if (!(hptep[1] & HPTE_R_C))
|
||||
continue;
|
||||
|
||||
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
|
||||
/* unlock rmap before spinning on the HPTE lock */
|
||||
unlock_rmap(rmapp);
|
||||
while (hptep[0] & HPTE_V_HVLOCK)
|
||||
cpu_relax();
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Now check and modify the HPTE */
|
||||
if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) {
|
||||
/* need to make it temporarily absent to clear C */
|
||||
hptep[0] |= HPTE_V_ABSENT;
|
||||
kvmppc_invalidate_hpte(kvm, hptep, i);
|
||||
hptep[1] &= ~HPTE_R_C;
|
||||
eieio();
|
||||
hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
|
||||
rev[i].guest_rpte |= HPTE_R_C;
|
||||
ret = 1;
|
||||
}
|
||||
hptep[0] &= ~HPTE_V_HVLOCK;
|
||||
} while ((i = j) != head);
|
||||
|
||||
unlock_rmap(rmapp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
{
|
||||
unsigned long i;
|
||||
unsigned long *rmapp, *map;
|
||||
|
||||
preempt_disable();
|
||||
rmapp = memslot->rmap;
|
||||
map = memslot->dirty_bitmap;
|
||||
for (i = 0; i < memslot->npages; ++i) {
|
||||
if (kvm_test_clear_dirty(kvm, rmapp))
|
||||
__set_bit_le(i, map);
|
||||
++rmapp;
|
||||
}
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
|
||||
unsigned long *nb_ret)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long gfn = gpa >> PAGE_SHIFT;
|
||||
struct page *page, *pages[1];
|
||||
int npages;
|
||||
unsigned long hva, psize, offset;
|
||||
unsigned long pa;
|
||||
unsigned long *physp;
|
||||
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
||||
return NULL;
|
||||
if (!kvm->arch.using_mmu_notifiers) {
|
||||
physp = kvm->arch.slot_phys[memslot->id];
|
||||
if (!physp)
|
||||
return NULL;
|
||||
physp += gfn - memslot->base_gfn;
|
||||
pa = *physp;
|
||||
if (!pa) {
|
||||
if (kvmppc_get_guest_page(kvm, gfn, memslot,
|
||||
PAGE_SIZE) < 0)
|
||||
return NULL;
|
||||
pa = *physp;
|
||||
}
|
||||
page = pfn_to_page(pa >> PAGE_SHIFT);
|
||||
} else {
|
||||
hva = gfn_to_hva_memslot(memslot, gfn);
|
||||
npages = get_user_pages_fast(hva, 1, 1, pages);
|
||||
if (npages < 1)
|
||||
return NULL;
|
||||
page = pages[0];
|
||||
}
|
||||
psize = PAGE_SIZE;
|
||||
if (PageHuge(page)) {
|
||||
page = compound_head(page);
|
||||
psize <<= compound_order(page);
|
||||
}
|
||||
if (!kvm->arch.using_mmu_notifiers)
|
||||
get_page(page);
|
||||
offset = gpa & (psize - 1);
|
||||
if (nb_ret)
|
||||
*nb_ret = psize - offset;
|
||||
return page_address(page) + offset;
|
||||
}
|
||||
|
||||
void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
|
||||
{
|
||||
struct page *page = virt_to_page(va);
|
||||
|
||||
page = compound_head(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
|
||||
|
@ -230,9 +230,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
r = kvmppc_st(vcpu, &addr, 32, zeros, true);
|
||||
if ((r == -ENOENT) || (r == -EPERM)) {
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu;
|
||||
|
||||
svcpu = svcpu_get(vcpu);
|
||||
*advance = 0;
|
||||
vcpu->arch.shared->dar = vaddr;
|
||||
to_svcpu(vcpu)->fault_dar = vaddr;
|
||||
svcpu->fault_dar = vaddr;
|
||||
|
||||
dsisr = DSISR_ISSTORE;
|
||||
if (r == -ENOENT)
|
||||
@ -241,7 +244,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
dsisr |= DSISR_PROTFAULT;
|
||||
|
||||
vcpu->arch.shared->dsisr = dsisr;
|
||||
to_svcpu(vcpu)->fault_dsisr = dsisr;
|
||||
svcpu->fault_dsisr = dsisr;
|
||||
svcpu_put(svcpu);
|
||||
|
||||
kvmppc_book3s_queue_irqprio(vcpu,
|
||||
BOOK3S_INTERRUPT_DATA_STORAGE);
|
||||
|
@ -48,22 +48,14 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/highmem.h>
|
||||
|
||||
/*
|
||||
* For now, limit memory to 64GB and require it to be large pages.
|
||||
* This value is chosen because it makes the ram_pginfo array be
|
||||
* 64kB in size, which is about as large as we want to be trying
|
||||
* to allocate with kmalloc.
|
||||
*/
|
||||
#define MAX_MEM_ORDER 36
|
||||
|
||||
#define LARGE_PAGE_ORDER 24 /* 16MB pages */
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
/* #define EXIT_DEBUG */
|
||||
/* #define EXIT_DEBUG_SIMPLE */
|
||||
/* #define EXIT_DEBUG_INT */
|
||||
|
||||
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
|
||||
static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
@ -146,10 +138,10 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
|
||||
unsigned long vcpuid, unsigned long vpa)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long pg_index, ra, len;
|
||||
unsigned long pg_offset;
|
||||
unsigned long len, nb;
|
||||
void *va;
|
||||
struct kvm_vcpu *tvcpu;
|
||||
int err = H_PARAMETER;
|
||||
|
||||
tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
|
||||
if (!tvcpu)
|
||||
@ -162,45 +154,41 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
|
||||
if (flags < 4) {
|
||||
if (vpa & 0x7f)
|
||||
return H_PARAMETER;
|
||||
if (flags >= 2 && !tvcpu->arch.vpa)
|
||||
return H_RESOURCE;
|
||||
/* registering new area; convert logical addr to real */
|
||||
pg_index = vpa >> kvm->arch.ram_porder;
|
||||
pg_offset = vpa & (kvm->arch.ram_psize - 1);
|
||||
if (pg_index >= kvm->arch.ram_npages)
|
||||
va = kvmppc_pin_guest_page(kvm, vpa, &nb);
|
||||
if (va == NULL)
|
||||
return H_PARAMETER;
|
||||
if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
|
||||
return H_PARAMETER;
|
||||
ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
|
||||
ra |= pg_offset;
|
||||
va = __va(ra);
|
||||
if (flags <= 1)
|
||||
len = *(unsigned short *)(va + 4);
|
||||
else
|
||||
len = *(unsigned int *)(va + 4);
|
||||
if (pg_offset + len > kvm->arch.ram_psize)
|
||||
return H_PARAMETER;
|
||||
if (len > nb)
|
||||
goto out_unpin;
|
||||
switch (flags) {
|
||||
case 1: /* register VPA */
|
||||
if (len < 640)
|
||||
return H_PARAMETER;
|
||||
goto out_unpin;
|
||||
if (tvcpu->arch.vpa)
|
||||
kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
|
||||
tvcpu->arch.vpa = va;
|
||||
init_vpa(vcpu, va);
|
||||
break;
|
||||
case 2: /* register DTL */
|
||||
if (len < 48)
|
||||
return H_PARAMETER;
|
||||
if (!tvcpu->arch.vpa)
|
||||
return H_RESOURCE;
|
||||
goto out_unpin;
|
||||
len -= len % 48;
|
||||
if (tvcpu->arch.dtl)
|
||||
kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
|
||||
tvcpu->arch.dtl = va;
|
||||
tvcpu->arch.dtl_end = va + len;
|
||||
break;
|
||||
case 3: /* register SLB shadow buffer */
|
||||
if (len < 8)
|
||||
return H_PARAMETER;
|
||||
if (!tvcpu->arch.vpa)
|
||||
return H_RESOURCE;
|
||||
tvcpu->arch.slb_shadow = va;
|
||||
len = (len - 16) / 16;
|
||||
if (len < 16)
|
||||
goto out_unpin;
|
||||
if (tvcpu->arch.slb_shadow)
|
||||
kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
|
||||
tvcpu->arch.slb_shadow = va;
|
||||
break;
|
||||
}
|
||||
@ -209,17 +197,30 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
|
||||
case 5: /* unregister VPA */
|
||||
if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
|
||||
return H_RESOURCE;
|
||||
if (!tvcpu->arch.vpa)
|
||||
break;
|
||||
kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
|
||||
tvcpu->arch.vpa = NULL;
|
||||
break;
|
||||
case 6: /* unregister DTL */
|
||||
if (!tvcpu->arch.dtl)
|
||||
break;
|
||||
kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl);
|
||||
tvcpu->arch.dtl = NULL;
|
||||
break;
|
||||
case 7: /* unregister SLB shadow buffer */
|
||||
if (!tvcpu->arch.slb_shadow)
|
||||
break;
|
||||
kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow);
|
||||
tvcpu->arch.slb_shadow = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return H_SUCCESS;
|
||||
|
||||
out_unpin:
|
||||
kvmppc_unpin_guest_page(kvm, va);
|
||||
return err;
|
||||
}
|
||||
|
||||
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
|
||||
@ -229,6 +230,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
|
||||
struct kvm_vcpu *tvcpu;
|
||||
|
||||
switch (req) {
|
||||
case H_ENTER:
|
||||
ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
|
||||
kvmppc_get_gpr(vcpu, 5),
|
||||
kvmppc_get_gpr(vcpu, 6),
|
||||
kvmppc_get_gpr(vcpu, 7));
|
||||
break;
|
||||
case H_CEDE:
|
||||
break;
|
||||
case H_PROD:
|
||||
@ -318,20 +325,19 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* We get these next two if the guest does a bad real-mode access,
|
||||
* as we have enabled VRMA (virtualized real mode area) mode in the
|
||||
* LPCR. We just generate an appropriate DSI/ISI to the guest.
|
||||
* We get these next two if the guest accesses a page which it thinks
|
||||
* it has mapped but which is not actually present, either because
|
||||
* it is for an emulated I/O device or because the corresonding
|
||||
* host page has been paged out. Any other HDSI/HISI interrupts
|
||||
* have been handled already.
|
||||
*/
|
||||
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
|
||||
vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr;
|
||||
vcpu->arch.shregs.dar = vcpu->arch.fault_dar;
|
||||
kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
|
||||
r = RESUME_GUEST;
|
||||
r = kvmppc_book3s_hv_page_fault(run, vcpu,
|
||||
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
|
||||
break;
|
||||
case BOOK3S_INTERRUPT_H_INST_STORAGE:
|
||||
kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
|
||||
0x08000000);
|
||||
r = RESUME_GUEST;
|
||||
r = kvmppc_book3s_hv_page_fault(run, vcpu,
|
||||
kvmppc_get_pc(vcpu), 0);
|
||||
break;
|
||||
/*
|
||||
* This occurs if the guest executes an illegal instruction.
|
||||
@ -391,6 +397,42 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_PPC_HIOR:
|
||||
r = put_user(0, (u64 __user *)reg->addr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_PPC_HIOR:
|
||||
{
|
||||
u64 hior;
|
||||
/* Only allow this to be set to zero */
|
||||
r = get_user(hior, (u64 __user *)reg->addr);
|
||||
if (!r && (hior != 0))
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvmppc_core_check_processor_compat(void)
|
||||
{
|
||||
if (cpu_has_feature(CPU_FTR_HVMODE))
|
||||
@ -410,7 +452,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
goto out;
|
||||
|
||||
err = -ENOMEM;
|
||||
vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
|
||||
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
|
||||
if (!vcpu)
|
||||
goto out;
|
||||
|
||||
@ -462,15 +504,21 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
return vcpu;
|
||||
|
||||
free_vcpu:
|
||||
kfree(vcpu);
|
||||
kmem_cache_free(kvm_vcpu_cache, vcpu);
|
||||
out:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.dtl)
|
||||
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl);
|
||||
if (vcpu->arch.slb_shadow)
|
||||
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow);
|
||||
if (vcpu->arch.vpa)
|
||||
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa);
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kfree(vcpu);
|
||||
kmem_cache_free(kvm_vcpu_cache, vcpu);
|
||||
}
|
||||
|
||||
static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
|
||||
@ -481,7 +529,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
|
||||
if (now > vcpu->arch.dec_expires) {
|
||||
/* decrementer has already gone negative */
|
||||
kvmppc_core_queue_dec(vcpu);
|
||||
kvmppc_core_deliver_interrupts(vcpu);
|
||||
kvmppc_core_prepare_to_enter(vcpu);
|
||||
return;
|
||||
}
|
||||
dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
|
||||
@ -796,7 +844,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
|
||||
list_for_each_entry_safe(v, vn, &vc->runnable_threads,
|
||||
arch.run_list) {
|
||||
kvmppc_core_deliver_interrupts(v);
|
||||
kvmppc_core_prepare_to_enter(v);
|
||||
if (signal_pending(v->arch.run_task)) {
|
||||
kvmppc_remove_runnable(vc, v);
|
||||
v->stat.signal_exits++;
|
||||
@ -835,20 +883,26 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
kvmppc_core_prepare_to_enter(vcpu);
|
||||
|
||||
/* No need to go into the guest when all we'll do is come back out */
|
||||
if (signal_pending(current)) {
|
||||
run->exit_reason = KVM_EXIT_INTR;
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
/* On PPC970, check that we have an RMA region */
|
||||
if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
|
||||
return -EPERM;
|
||||
/* On the first time here, set up VRMA or RMA */
|
||||
if (!vcpu->kvm->arch.rma_setup_done) {
|
||||
r = kvmppc_hv_setup_rma(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
flush_fp_to_thread(current);
|
||||
flush_altivec_to_thread(current);
|
||||
flush_vsx_to_thread(current);
|
||||
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
|
||||
vcpu->arch.pgdir = current->mm->pgd;
|
||||
|
||||
do {
|
||||
r = kvmppc_run_vcpu(run, vcpu);
|
||||
@ -856,7 +910,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
|
||||
!(vcpu->arch.shregs.msr & MSR_PR)) {
|
||||
r = kvmppc_pseries_do_hcall(vcpu);
|
||||
kvmppc_core_deliver_interrupts(vcpu);
|
||||
kvmppc_core_prepare_to_enter(vcpu);
|
||||
}
|
||||
} while (r == RESUME_GUEST);
|
||||
return r;
|
||||
@ -1000,7 +1054,7 @@ static inline int lpcr_rmls(unsigned long rma_size)
|
||||
|
||||
static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
struct kvmppc_rma_info *ri = vma->vm_file->private_data;
|
||||
struct kvmppc_linear_info *ri = vma->vm_file->private_data;
|
||||
struct page *page;
|
||||
|
||||
if (vmf->pgoff >= ri->npages)
|
||||
@ -1025,7 +1079,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
|
||||
static int kvm_rma_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct kvmppc_rma_info *ri = filp->private_data;
|
||||
struct kvmppc_linear_info *ri = filp->private_data;
|
||||
|
||||
kvm_release_rma(ri);
|
||||
return 0;
|
||||
@ -1038,7 +1092,7 @@ static struct file_operations kvm_rma_fops = {
|
||||
|
||||
long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
|
||||
{
|
||||
struct kvmppc_rma_info *ri;
|
||||
struct kvmppc_linear_info *ri;
|
||||
long fd;
|
||||
|
||||
ri = kvm_alloc_rma();
|
||||
@ -1053,89 +1107,189 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
|
||||
return fd;
|
||||
}
|
||||
|
||||
static struct page *hva_to_page(unsigned long addr)
|
||||
/*
|
||||
* Get (and clear) the dirty memory log for a memory slot.
|
||||
*/
|
||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
{
|
||||
struct page *page[1];
|
||||
int npages;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int r;
|
||||
unsigned long n;
|
||||
|
||||
might_sleep();
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
npages = get_user_pages_fast(addr, 1, 1, page);
|
||||
r = -EINVAL;
|
||||
if (log->slot >= KVM_MEMORY_SLOTS)
|
||||
goto out;
|
||||
|
||||
if (unlikely(npages != 1))
|
||||
return 0;
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
r = -ENOENT;
|
||||
if (!memslot->dirty_bitmap)
|
||||
goto out;
|
||||
|
||||
return page[0];
|
||||
n = kvm_dirty_bitmap_bytes(memslot);
|
||||
memset(memslot->dirty_bitmap, 0, n);
|
||||
|
||||
r = kvmppc_hv_get_dirty_log(kvm, memslot);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
|
||||
goto out;
|
||||
|
||||
r = 0;
|
||||
out:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static unsigned long slb_pgsize_encoding(unsigned long psize)
|
||||
{
|
||||
unsigned long senc = 0;
|
||||
|
||||
if (psize > 0x1000) {
|
||||
senc = SLB_VSID_L;
|
||||
if (psize == 0x10000)
|
||||
senc |= SLB_VSID_LP_01;
|
||||
}
|
||||
return senc;
|
||||
}
|
||||
|
||||
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
unsigned long psize, porder;
|
||||
unsigned long i, npages, totalpages;
|
||||
unsigned long pg_ix;
|
||||
struct kvmppc_pginfo *pginfo;
|
||||
unsigned long hva;
|
||||
struct kvmppc_rma_info *ri = NULL;
|
||||
unsigned long npages;
|
||||
unsigned long *phys;
|
||||
|
||||
/* Allocate a slot_phys array */
|
||||
phys = kvm->arch.slot_phys[mem->slot];
|
||||
if (!kvm->arch.using_mmu_notifiers && !phys) {
|
||||
npages = mem->memory_size >> PAGE_SHIFT;
|
||||
phys = vzalloc(npages * sizeof(unsigned long));
|
||||
if (!phys)
|
||||
return -ENOMEM;
|
||||
kvm->arch.slot_phys[mem->slot] = phys;
|
||||
kvm->arch.slot_npages[mem->slot] = npages;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unpin_slot(struct kvm *kvm, int slot_id)
|
||||
{
|
||||
unsigned long *physp;
|
||||
unsigned long j, npages, pfn;
|
||||
struct page *page;
|
||||
|
||||
/* For now, only allow 16MB pages */
|
||||
porder = LARGE_PAGE_ORDER;
|
||||
psize = 1ul << porder;
|
||||
if ((mem->memory_size & (psize - 1)) ||
|
||||
(mem->guest_phys_addr & (psize - 1))) {
|
||||
pr_err("bad memory_size=%llx @ %llx\n",
|
||||
mem->memory_size, mem->guest_phys_addr);
|
||||
return -EINVAL;
|
||||
physp = kvm->arch.slot_phys[slot_id];
|
||||
npages = kvm->arch.slot_npages[slot_id];
|
||||
if (physp) {
|
||||
spin_lock(&kvm->arch.slot_phys_lock);
|
||||
for (j = 0; j < npages; j++) {
|
||||
if (!(physp[j] & KVMPPC_GOT_PAGE))
|
||||
continue;
|
||||
pfn = physp[j] >> PAGE_SHIFT;
|
||||
page = pfn_to_page(pfn);
|
||||
if (PageHuge(page))
|
||||
page = compound_head(page);
|
||||
SetPageDirty(page);
|
||||
put_page(page);
|
||||
}
|
||||
kvm->arch.slot_phys[slot_id] = NULL;
|
||||
spin_unlock(&kvm->arch.slot_phys_lock);
|
||||
vfree(physp);
|
||||
}
|
||||
}
|
||||
|
||||
npages = mem->memory_size >> porder;
|
||||
totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder;
|
||||
void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
}
|
||||
|
||||
/* More memory than we have space to track? */
|
||||
if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER)))
|
||||
return -EINVAL;
|
||||
static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int err = 0;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvmppc_linear_info *ri = NULL;
|
||||
unsigned long hva;
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long lpcr, senc;
|
||||
unsigned long psize, porder;
|
||||
unsigned long rma_size;
|
||||
unsigned long rmls;
|
||||
unsigned long *physp;
|
||||
unsigned long i, npages;
|
||||
|
||||
/* Do we already have an RMA registered? */
|
||||
if (mem->guest_phys_addr == 0 && kvm->arch.rma)
|
||||
return -EINVAL;
|
||||
mutex_lock(&kvm->lock);
|
||||
if (kvm->arch.rma_setup_done)
|
||||
goto out; /* another vcpu beat us to it */
|
||||
|
||||
if (totalpages > kvm->arch.ram_npages)
|
||||
kvm->arch.ram_npages = totalpages;
|
||||
/* Look up the memslot for guest physical address 0 */
|
||||
memslot = gfn_to_memslot(kvm, 0);
|
||||
|
||||
/* We must have some memory at 0 by now */
|
||||
err = -EINVAL;
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
||||
goto out;
|
||||
|
||||
/* Look up the VMA for the start of this memory slot */
|
||||
hva = memslot->userspace_addr;
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
vma = find_vma(current->mm, hva);
|
||||
if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
|
||||
goto up_out;
|
||||
|
||||
psize = vma_kernel_pagesize(vma);
|
||||
porder = __ilog2(psize);
|
||||
|
||||
/* Is this one of our preallocated RMAs? */
|
||||
if (mem->guest_phys_addr == 0) {
|
||||
struct vm_area_struct *vma;
|
||||
if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
|
||||
hva == vma->vm_start)
|
||||
ri = vma->vm_file->private_data;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
vma = find_vma(current->mm, mem->userspace_addr);
|
||||
if (vma && vma->vm_file &&
|
||||
vma->vm_file->f_op == &kvm_rma_fops &&
|
||||
mem->userspace_addr == vma->vm_start)
|
||||
ri = vma->vm_file->private_data;
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) {
|
||||
pr_err("CPU requires an RMO\n");
|
||||
return -EINVAL;
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
if (!ri) {
|
||||
/* On POWER7, use VRMA; on PPC970, give up */
|
||||
err = -EPERM;
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_201)) {
|
||||
pr_err("KVM: CPU requires an RMO\n");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (ri) {
|
||||
unsigned long rma_size;
|
||||
unsigned long lpcr;
|
||||
long rmls;
|
||||
/* We can handle 4k, 64k or 16M pages in the VRMA */
|
||||
err = -EINVAL;
|
||||
if (!(psize == 0x1000 || psize == 0x10000 ||
|
||||
psize == 0x1000000))
|
||||
goto out;
|
||||
|
||||
rma_size = ri->npages << PAGE_SHIFT;
|
||||
if (rma_size > mem->memory_size)
|
||||
rma_size = mem->memory_size;
|
||||
/* Update VRMASD field in the LPCR */
|
||||
senc = slb_pgsize_encoding(psize);
|
||||
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
|
||||
(VRMA_VSID << SLB_VSID_SHIFT_1T);
|
||||
lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
|
||||
lpcr |= senc << (LPCR_VRMASD_SH - 4);
|
||||
kvm->arch.lpcr = lpcr;
|
||||
|
||||
/* Create HPTEs in the hash page table for the VRMA */
|
||||
kvmppc_map_vrma(vcpu, memslot, porder);
|
||||
|
||||
} else {
|
||||
/* Set up to use an RMO region */
|
||||
rma_size = ri->npages;
|
||||
if (rma_size > memslot->npages)
|
||||
rma_size = memslot->npages;
|
||||
rma_size <<= PAGE_SHIFT;
|
||||
rmls = lpcr_rmls(rma_size);
|
||||
err = -EINVAL;
|
||||
if (rmls < 0) {
|
||||
pr_err("Can't use RMA of 0x%lx bytes\n", rma_size);
|
||||
return -EINVAL;
|
||||
pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
|
||||
goto out;
|
||||
}
|
||||
atomic_inc(&ri->use_count);
|
||||
kvm->arch.rma = ri;
|
||||
kvm->arch.n_rma_pages = rma_size >> porder;
|
||||
|
||||
/* Update LPCR and RMOR */
|
||||
lpcr = kvm->arch.lpcr;
|
||||
@ -1155,53 +1309,35 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
|
||||
}
|
||||
kvm->arch.lpcr = lpcr;
|
||||
pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n",
|
||||
pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
|
||||
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
|
||||
|
||||
/* Initialize phys addrs of pages in RMO */
|
||||
npages = ri->npages;
|
||||
porder = __ilog2(npages);
|
||||
physp = kvm->arch.slot_phys[memslot->id];
|
||||
spin_lock(&kvm->arch.slot_phys_lock);
|
||||
for (i = 0; i < npages; ++i)
|
||||
physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
|
||||
spin_unlock(&kvm->arch.slot_phys_lock);
|
||||
}
|
||||
|
||||
pg_ix = mem->guest_phys_addr >> porder;
|
||||
pginfo = kvm->arch.ram_pginfo + pg_ix;
|
||||
for (i = 0; i < npages; ++i, ++pg_ix) {
|
||||
if (ri && pg_ix < kvm->arch.n_rma_pages) {
|
||||
pginfo[i].pfn = ri->base_pfn +
|
||||
(pg_ix << (porder - PAGE_SHIFT));
|
||||
continue;
|
||||
}
|
||||
hva = mem->userspace_addr + (i << porder);
|
||||
page = hva_to_page(hva);
|
||||
if (!page) {
|
||||
pr_err("oops, no pfn for hva %lx\n", hva);
|
||||
goto err;
|
||||
}
|
||||
/* Check it's a 16MB page */
|
||||
if (!PageHead(page) ||
|
||||
compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) {
|
||||
pr_err("page at %lx isn't 16MB (o=%d)\n",
|
||||
hva, compound_order(page));
|
||||
goto err;
|
||||
}
|
||||
pginfo[i].pfn = page_to_pfn(page);
|
||||
}
|
||||
/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
|
||||
smp_wmb();
|
||||
kvm->arch.rma_setup_done = 1;
|
||||
err = 0;
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
if (mem->guest_phys_addr == 0 && mem->memory_size != 0 &&
|
||||
!kvm->arch.rma)
|
||||
kvmppc_map_vrma(kvm, mem);
|
||||
up_out:
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
goto out;
|
||||
}
|
||||
|
||||
int kvmppc_core_init_vm(struct kvm *kvm)
|
||||
{
|
||||
long r;
|
||||
unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER);
|
||||
long err = -ENOMEM;
|
||||
unsigned long lpcr;
|
||||
|
||||
/* Allocate hashed page table */
|
||||
@ -1211,19 +1347,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
|
||||
|
||||
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
|
||||
|
||||
kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo),
|
||||
GFP_KERNEL);
|
||||
if (!kvm->arch.ram_pginfo) {
|
||||
pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n",
|
||||
npages * sizeof(struct kvmppc_pginfo));
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
kvm->arch.ram_npages = 0;
|
||||
kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
|
||||
kvm->arch.ram_porder = LARGE_PAGE_ORDER;
|
||||
kvm->arch.rma = NULL;
|
||||
kvm->arch.n_rma_pages = 0;
|
||||
|
||||
kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
|
||||
|
||||
@ -1241,30 +1365,25 @@ int kvmppc_core_init_vm(struct kvm *kvm)
|
||||
kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
|
||||
lpcr &= LPCR_PECE | LPCR_LPES;
|
||||
lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
|
||||
LPCR_VPM0 | LPCR_VRMA_L;
|
||||
LPCR_VPM0 | LPCR_VPM1;
|
||||
kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
|
||||
(VRMA_VSID << SLB_VSID_SHIFT_1T);
|
||||
}
|
||||
kvm->arch.lpcr = lpcr;
|
||||
|
||||
kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
|
||||
spin_lock_init(&kvm->arch.slot_phys_lock);
|
||||
return 0;
|
||||
|
||||
out_free:
|
||||
kvmppc_free_hpt(kvm);
|
||||
return err;
|
||||
}
|
||||
|
||||
void kvmppc_core_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvmppc_pginfo *pginfo;
|
||||
unsigned long i;
|
||||
|
||||
if (kvm->arch.ram_pginfo) {
|
||||
pginfo = kvm->arch.ram_pginfo;
|
||||
kvm->arch.ram_pginfo = NULL;
|
||||
for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i)
|
||||
if (pginfo[i].pfn)
|
||||
put_page(pfn_to_page(pginfo[i].pfn));
|
||||
kfree(pginfo);
|
||||
}
|
||||
if (!kvm->arch.using_mmu_notifiers)
|
||||
for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
|
||||
unpin_slot(kvm, i);
|
||||
|
||||
if (kvm->arch.rma) {
|
||||
kvm_release_rma(kvm->arch.rma);
|
||||
kvm->arch.rma = NULL;
|
||||
|
@ -18,6 +18,15 @@
|
||||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/kvm_book3s.h>
|
||||
|
||||
#define KVM_LINEAR_RMA 0
|
||||
#define KVM_LINEAR_HPT 1
|
||||
|
||||
static void __init kvm_linear_init_one(ulong size, int count, int type);
|
||||
static struct kvmppc_linear_info *kvm_alloc_linear(int type);
|
||||
static void kvm_release_linear(struct kvmppc_linear_info *ri);
|
||||
|
||||
/*************** RMA *************/
|
||||
|
||||
/*
|
||||
* This maintains a list of RMAs (real mode areas) for KVM guests to use.
|
||||
* Each RMA has to be physically contiguous and of a size that the
|
||||
@ -29,32 +38,6 @@
|
||||
static unsigned long kvm_rma_size = 64 << 20; /* 64MB */
|
||||
static unsigned long kvm_rma_count;
|
||||
|
||||
static int __init early_parse_rma_size(char *p)
|
||||
{
|
||||
if (!p)
|
||||
return 1;
|
||||
|
||||
kvm_rma_size = memparse(p, &p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("kvm_rma_size", early_parse_rma_size);
|
||||
|
||||
static int __init early_parse_rma_count(char *p)
|
||||
{
|
||||
if (!p)
|
||||
return 1;
|
||||
|
||||
kvm_rma_count = simple_strtoul(p, NULL, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("kvm_rma_count", early_parse_rma_count);
|
||||
|
||||
static struct kvmppc_rma_info *rma_info;
|
||||
static LIST_HEAD(free_rmas);
|
||||
static DEFINE_SPINLOCK(rma_lock);
|
||||
|
||||
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
|
||||
Assumes POWER7 or PPC970. */
|
||||
static inline int lpcr_rmls(unsigned long rma_size)
|
||||
@ -81,18 +64,153 @@ static inline int lpcr_rmls(unsigned long rma_size)
|
||||
}
|
||||
}
|
||||
|
||||
static int __init early_parse_rma_size(char *p)
|
||||
{
|
||||
if (!p)
|
||||
return 1;
|
||||
|
||||
kvm_rma_size = memparse(p, &p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("kvm_rma_size", early_parse_rma_size);
|
||||
|
||||
static int __init early_parse_rma_count(char *p)
|
||||
{
|
||||
if (!p)
|
||||
return 1;
|
||||
|
||||
kvm_rma_count = simple_strtoul(p, NULL, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("kvm_rma_count", early_parse_rma_count);
|
||||
|
||||
struct kvmppc_linear_info *kvm_alloc_rma(void)
|
||||
{
|
||||
return kvm_alloc_linear(KVM_LINEAR_RMA);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_alloc_rma);
|
||||
|
||||
void kvm_release_rma(struct kvmppc_linear_info *ri)
|
||||
{
|
||||
kvm_release_linear(ri);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_release_rma);
|
||||
|
||||
/*************** HPT *************/
|
||||
|
||||
/*
|
||||
* Called at boot time while the bootmem allocator is active,
|
||||
* to allocate contiguous physical memory for the real memory
|
||||
* areas for guests.
|
||||
* This maintains a list of big linear HPT tables that contain the GVA->HPA
|
||||
* memory mappings. If we don't reserve those early on, we might not be able
|
||||
* to get a big (usually 16MB) linear memory region from the kernel anymore.
|
||||
*/
|
||||
void __init kvm_rma_init(void)
|
||||
|
||||
static unsigned long kvm_hpt_count;
|
||||
|
||||
static int __init early_parse_hpt_count(char *p)
|
||||
{
|
||||
if (!p)
|
||||
return 1;
|
||||
|
||||
kvm_hpt_count = simple_strtoul(p, NULL, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("kvm_hpt_count", early_parse_hpt_count);
|
||||
|
||||
struct kvmppc_linear_info *kvm_alloc_hpt(void)
|
||||
{
|
||||
return kvm_alloc_linear(KVM_LINEAR_HPT);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
|
||||
|
||||
void kvm_release_hpt(struct kvmppc_linear_info *li)
|
||||
{
|
||||
kvm_release_linear(li);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_release_hpt);
|
||||
|
||||
/*************** generic *************/
|
||||
|
||||
static LIST_HEAD(free_linears);
|
||||
static DEFINE_SPINLOCK(linear_lock);
|
||||
|
||||
static void __init kvm_linear_init_one(ulong size, int count, int type)
|
||||
{
|
||||
unsigned long i;
|
||||
unsigned long j, npages;
|
||||
void *rma;
|
||||
void *linear;
|
||||
struct page *pg;
|
||||
const char *typestr;
|
||||
struct kvmppc_linear_info *linear_info;
|
||||
|
||||
if (!count)
|
||||
return;
|
||||
|
||||
typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
|
||||
|
||||
npages = size >> PAGE_SHIFT;
|
||||
linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
|
||||
for (i = 0; i < count; ++i) {
|
||||
linear = alloc_bootmem_align(size, size);
|
||||
pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
|
||||
size >> 20);
|
||||
linear_info[i].base_virt = linear;
|
||||
linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
|
||||
linear_info[i].npages = npages;
|
||||
linear_info[i].type = type;
|
||||
list_add_tail(&linear_info[i].list, &free_linears);
|
||||
atomic_set(&linear_info[i].use_count, 0);
|
||||
|
||||
pg = pfn_to_page(linear_info[i].base_pfn);
|
||||
for (j = 0; j < npages; ++j) {
|
||||
atomic_inc(&pg->_count);
|
||||
++pg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static struct kvmppc_linear_info *kvm_alloc_linear(int type)
|
||||
{
|
||||
struct kvmppc_linear_info *ri;
|
||||
|
||||
ri = NULL;
|
||||
spin_lock(&linear_lock);
|
||||
list_for_each_entry(ri, &free_linears, list) {
|
||||
if (ri->type != type)
|
||||
continue;
|
||||
|
||||
list_del(&ri->list);
|
||||
atomic_inc(&ri->use_count);
|
||||
break;
|
||||
}
|
||||
spin_unlock(&linear_lock);
|
||||
memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
|
||||
return ri;
|
||||
}
|
||||
|
||||
static void kvm_release_linear(struct kvmppc_linear_info *ri)
|
||||
{
|
||||
if (atomic_dec_and_test(&ri->use_count)) {
|
||||
spin_lock(&linear_lock);
|
||||
list_add_tail(&ri->list, &free_linears);
|
||||
spin_unlock(&linear_lock);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Called at boot time while the bootmem allocator is active,
|
||||
* to allocate contiguous physical memory for the hash page
|
||||
* tables for guests.
|
||||
*/
|
||||
void __init kvm_linear_init(void)
|
||||
{
|
||||
/* HPT */
|
||||
kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT);
|
||||
|
||||
/* RMA */
|
||||
/* Only do this on PPC970 in HV mode */
|
||||
if (!cpu_has_feature(CPU_FTR_HVMODE) ||
|
||||
!cpu_has_feature(CPU_FTR_ARCH_201))
|
||||
@ -107,50 +225,5 @@ void __init kvm_rma_init(void)
|
||||
return;
|
||||
}
|
||||
|
||||
npages = kvm_rma_size >> PAGE_SHIFT;
|
||||
rma_info = alloc_bootmem(kvm_rma_count * sizeof(struct kvmppc_rma_info));
|
||||
for (i = 0; i < kvm_rma_count; ++i) {
|
||||
rma = alloc_bootmem_align(kvm_rma_size, kvm_rma_size);
|
||||
pr_info("Allocated KVM RMA at %p (%ld MB)\n", rma,
|
||||
kvm_rma_size >> 20);
|
||||
rma_info[i].base_virt = rma;
|
||||
rma_info[i].base_pfn = __pa(rma) >> PAGE_SHIFT;
|
||||
rma_info[i].npages = npages;
|
||||
list_add_tail(&rma_info[i].list, &free_rmas);
|
||||
atomic_set(&rma_info[i].use_count, 0);
|
||||
|
||||
pg = pfn_to_page(rma_info[i].base_pfn);
|
||||
for (j = 0; j < npages; ++j) {
|
||||
atomic_inc(&pg->_count);
|
||||
++pg;
|
||||
}
|
||||
}
|
||||
kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
|
||||
}
|
||||
|
||||
struct kvmppc_rma_info *kvm_alloc_rma(void)
|
||||
{
|
||||
struct kvmppc_rma_info *ri;
|
||||
|
||||
ri = NULL;
|
||||
spin_lock(&rma_lock);
|
||||
if (!list_empty(&free_rmas)) {
|
||||
ri = list_first_entry(&free_rmas, struct kvmppc_rma_info, list);
|
||||
list_del(&ri->list);
|
||||
atomic_inc(&ri->use_count);
|
||||
}
|
||||
spin_unlock(&rma_lock);
|
||||
return ri;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_alloc_rma);
|
||||
|
||||
void kvm_release_rma(struct kvmppc_rma_info *ri)
|
||||
{
|
||||
if (atomic_dec_and_test(&ri->use_count)) {
|
||||
spin_lock(&rma_lock);
|
||||
list_add_tail(&ri->list, &free_rmas);
|
||||
spin_unlock(&rma_lock);
|
||||
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_release_rma);
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/kvm_ppc.h>
|
||||
@ -20,95 +21,307 @@
|
||||
#include <asm/synch.h>
|
||||
#include <asm/ppc-opcode.h>
|
||||
|
||||
/* For now use fixed-size 16MB page table */
|
||||
#define HPT_ORDER 24
|
||||
#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
|
||||
#define HPT_HASH_MASK (HPT_NPTEG - 1)
|
||||
|
||||
#define HPTE_V_HVLOCK 0x40UL
|
||||
|
||||
static inline long lock_hpte(unsigned long *hpte, unsigned long bits)
|
||||
/* Translate address of a vmalloc'd thing to a linear map address */
|
||||
static void *real_vmalloc_addr(void *x)
|
||||
{
|
||||
unsigned long tmp, old;
|
||||
unsigned long addr = (unsigned long) x;
|
||||
pte_t *p;
|
||||
|
||||
asm volatile(" ldarx %0,0,%2\n"
|
||||
" and. %1,%0,%3\n"
|
||||
" bne 2f\n"
|
||||
" ori %0,%0,%4\n"
|
||||
" stdcx. %0,0,%2\n"
|
||||
" beq+ 2f\n"
|
||||
" li %1,%3\n"
|
||||
"2: isync"
|
||||
: "=&r" (tmp), "=&r" (old)
|
||||
: "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
|
||||
: "cc", "memory");
|
||||
return old == 0;
|
||||
p = find_linux_pte(swapper_pg_dir, addr);
|
||||
if (!p || !pte_present(*p))
|
||||
return NULL;
|
||||
/* assume we don't have huge pages in vmalloc space... */
|
||||
addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
|
||||
return __va(addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add this HPTE into the chain for the real page.
|
||||
* Must be called with the chain locked; it unlocks the chain.
|
||||
*/
|
||||
void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
|
||||
unsigned long *rmap, long pte_index, int realmode)
|
||||
{
|
||||
struct revmap_entry *head, *tail;
|
||||
unsigned long i;
|
||||
|
||||
if (*rmap & KVMPPC_RMAP_PRESENT) {
|
||||
i = *rmap & KVMPPC_RMAP_INDEX;
|
||||
head = &kvm->arch.revmap[i];
|
||||
if (realmode)
|
||||
head = real_vmalloc_addr(head);
|
||||
tail = &kvm->arch.revmap[head->back];
|
||||
if (realmode)
|
||||
tail = real_vmalloc_addr(tail);
|
||||
rev->forw = i;
|
||||
rev->back = head->back;
|
||||
tail->forw = pte_index;
|
||||
head->back = pte_index;
|
||||
} else {
|
||||
rev->forw = rev->back = pte_index;
|
||||
i = pte_index;
|
||||
}
|
||||
smp_wmb();
|
||||
*rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
|
||||
|
||||
/* Remove this HPTE from the chain for a real page */
|
||||
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
|
||||
struct revmap_entry *rev,
|
||||
unsigned long hpte_v, unsigned long hpte_r)
|
||||
{
|
||||
struct revmap_entry *next, *prev;
|
||||
unsigned long gfn, ptel, head;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long *rmap;
|
||||
unsigned long rcbits;
|
||||
|
||||
rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
|
||||
ptel = rev->guest_rpte |= rcbits;
|
||||
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
|
||||
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
||||
return;
|
||||
|
||||
rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
|
||||
lock_rmap(rmap);
|
||||
|
||||
head = *rmap & KVMPPC_RMAP_INDEX;
|
||||
next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
|
||||
prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
|
||||
next->back = rev->back;
|
||||
prev->forw = rev->forw;
|
||||
if (head == pte_index) {
|
||||
head = rev->forw;
|
||||
if (head == pte_index)
|
||||
*rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
|
||||
else
|
||||
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
|
||||
}
|
||||
*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
|
||||
unlock_rmap(rmap);
|
||||
}
|
||||
|
||||
static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
|
||||
int writing, unsigned long *pte_sizep)
|
||||
{
|
||||
pte_t *ptep;
|
||||
unsigned long ps = *pte_sizep;
|
||||
unsigned int shift;
|
||||
|
||||
ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
|
||||
if (!ptep)
|
||||
return __pte(0);
|
||||
if (shift)
|
||||
*pte_sizep = 1ul << shift;
|
||||
else
|
||||
*pte_sizep = PAGE_SIZE;
|
||||
if (ps > *pte_sizep)
|
||||
return __pte(0);
|
||||
if (!pte_present(*ptep))
|
||||
return __pte(0);
|
||||
return kvmppc_read_update_linux_pte(ptep, writing);
|
||||
}
|
||||
|
||||
static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
|
||||
{
|
||||
asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
|
||||
hpte[0] = hpte_v;
|
||||
}
|
||||
|
||||
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel)
|
||||
{
|
||||
unsigned long porder;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long i, lpn, pa;
|
||||
unsigned long i, pa, gpa, gfn, psize;
|
||||
unsigned long slot_fn, hva;
|
||||
unsigned long *hpte;
|
||||
struct revmap_entry *rev;
|
||||
unsigned long g_ptel = ptel;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long *physp, pte_size;
|
||||
unsigned long is_io;
|
||||
unsigned long *rmap;
|
||||
pte_t pte;
|
||||
unsigned int writing;
|
||||
unsigned long mmu_seq;
|
||||
unsigned long rcbits;
|
||||
bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
|
||||
|
||||
/* only handle 4k, 64k and 16M pages for now */
|
||||
porder = 12;
|
||||
if (pteh & HPTE_V_LARGE) {
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
|
||||
(ptel & 0xf000) == 0x1000) {
|
||||
/* 64k page */
|
||||
porder = 16;
|
||||
} else if ((ptel & 0xff000) == 0) {
|
||||
/* 16M page */
|
||||
porder = 24;
|
||||
/* lowest AVA bit must be 0 for 16M pages */
|
||||
if (pteh & 0x80)
|
||||
return H_PARAMETER;
|
||||
} else
|
||||
psize = hpte_page_size(pteh, ptel);
|
||||
if (!psize)
|
||||
return H_PARAMETER;
|
||||
writing = hpte_is_writable(ptel);
|
||||
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
|
||||
|
||||
/* used later to detect if we might have been invalidated */
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
||||
/* Find the memslot (if any) for this address */
|
||||
gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
|
||||
gfn = gpa >> PAGE_SHIFT;
|
||||
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||
pa = 0;
|
||||
is_io = ~0ul;
|
||||
rmap = NULL;
|
||||
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
|
||||
/* PPC970 can't do emulated MMIO */
|
||||
if (!cpu_has_feature(CPU_FTR_ARCH_206))
|
||||
return H_PARAMETER;
|
||||
/* Emulated MMIO - mark this with key=31 */
|
||||
pteh |= HPTE_V_ABSENT;
|
||||
ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
|
||||
goto do_insert;
|
||||
}
|
||||
lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder;
|
||||
if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder)
|
||||
|
||||
/* Check if the requested page fits entirely in the memslot. */
|
||||
if (!slot_is_aligned(memslot, psize))
|
||||
return H_PARAMETER;
|
||||
pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT;
|
||||
if (!pa)
|
||||
slot_fn = gfn - memslot->base_gfn;
|
||||
rmap = &memslot->rmap[slot_fn];
|
||||
|
||||
if (!kvm->arch.using_mmu_notifiers) {
|
||||
physp = kvm->arch.slot_phys[memslot->id];
|
||||
if (!physp)
|
||||
return H_PARAMETER;
|
||||
physp += slot_fn;
|
||||
if (realmode)
|
||||
physp = real_vmalloc_addr(physp);
|
||||
pa = *physp;
|
||||
if (!pa)
|
||||
return H_TOO_HARD;
|
||||
is_io = pa & (HPTE_R_I | HPTE_R_W);
|
||||
pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
|
||||
pa &= PAGE_MASK;
|
||||
} else {
|
||||
/* Translate to host virtual address */
|
||||
hva = gfn_to_hva_memslot(memslot, gfn);
|
||||
|
||||
/* Look up the Linux PTE for the backing page */
|
||||
pte_size = psize;
|
||||
pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
|
||||
if (pte_present(pte)) {
|
||||
if (writing && !pte_write(pte))
|
||||
/* make the actual HPTE be read-only */
|
||||
ptel = hpte_make_readonly(ptel);
|
||||
is_io = hpte_cache_bits(pte_val(pte));
|
||||
pa = pte_pfn(pte) << PAGE_SHIFT;
|
||||
}
|
||||
}
|
||||
if (pte_size < psize)
|
||||
return H_PARAMETER;
|
||||
/* Check WIMG */
|
||||
if ((ptel & HPTE_R_WIMG) != HPTE_R_M &&
|
||||
(ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M))
|
||||
return H_PARAMETER;
|
||||
pteh &= ~0x60UL;
|
||||
ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize);
|
||||
if (pa && pte_size > psize)
|
||||
pa |= gpa & (pte_size - 1);
|
||||
|
||||
ptel &= ~(HPTE_R_PP0 - psize);
|
||||
ptel |= pa;
|
||||
if (pte_index >= (HPT_NPTEG << 3))
|
||||
|
||||
if (pa)
|
||||
pteh |= HPTE_V_VALID;
|
||||
else
|
||||
pteh |= HPTE_V_ABSENT;
|
||||
|
||||
/* Check WIMG */
|
||||
if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
|
||||
if (is_io)
|
||||
return H_PARAMETER;
|
||||
/*
|
||||
* Allow guest to map emulated device memory as
|
||||
* uncacheable, but actually make it cacheable.
|
||||
*/
|
||||
ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
|
||||
ptel |= HPTE_R_M;
|
||||
}
|
||||
|
||||
/* Find and lock the HPTEG slot to use */
|
||||
do_insert:
|
||||
if (pte_index >= HPT_NPTE)
|
||||
return H_PARAMETER;
|
||||
if (likely((flags & H_EXACT) == 0)) {
|
||||
pte_index &= ~7UL;
|
||||
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
for (i = 0; ; ++i) {
|
||||
if (i == 8)
|
||||
return H_PTEG_FULL;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
if ((*hpte & HPTE_V_VALID) == 0 &&
|
||||
lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
|
||||
try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
|
||||
HPTE_V_ABSENT))
|
||||
break;
|
||||
hpte += 2;
|
||||
}
|
||||
if (i == 8) {
|
||||
/*
|
||||
* Since try_lock_hpte doesn't retry (not even stdcx.
|
||||
* failures), it could be that there is a free slot
|
||||
* but we transiently failed to lock it. Try again,
|
||||
* actually locking each slot and checking it.
|
||||
*/
|
||||
hpte -= 16;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)))
|
||||
break;
|
||||
*hpte &= ~HPTE_V_HVLOCK;
|
||||
hpte += 2;
|
||||
}
|
||||
if (i == 8)
|
||||
return H_PTEG_FULL;
|
||||
}
|
||||
pte_index += i;
|
||||
} else {
|
||||
i = 0;
|
||||
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
|
||||
return H_PTEG_FULL;
|
||||
if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
|
||||
HPTE_V_ABSENT)) {
|
||||
/* Lock the slot and check again */
|
||||
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
|
||||
*hpte &= ~HPTE_V_HVLOCK;
|
||||
return H_PTEG_FULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Save away the guest's idea of the second HPTE dword */
|
||||
rev = &kvm->arch.revmap[pte_index];
|
||||
if (realmode)
|
||||
rev = real_vmalloc_addr(rev);
|
||||
if (rev)
|
||||
rev->guest_rpte = g_ptel;
|
||||
|
||||
/* Link HPTE into reverse-map chain */
|
||||
if (pteh & HPTE_V_VALID) {
|
||||
if (realmode)
|
||||
rmap = real_vmalloc_addr(rmap);
|
||||
lock_rmap(rmap);
|
||||
/* Check for pending invalidations under the rmap chain lock */
|
||||
if (kvm->arch.using_mmu_notifiers &&
|
||||
mmu_notifier_retry(vcpu, mmu_seq)) {
|
||||
/* inval in progress, write a non-present HPTE */
|
||||
pteh |= HPTE_V_ABSENT;
|
||||
pteh &= ~HPTE_V_VALID;
|
||||
unlock_rmap(rmap);
|
||||
} else {
|
||||
kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
|
||||
realmode);
|
||||
/* Only set R/C in real HPTE if already set in *rmap */
|
||||
rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
|
||||
ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
|
||||
}
|
||||
}
|
||||
|
||||
hpte[1] = ptel;
|
||||
|
||||
/* Write the first HPTE dword, unlocking the HPTE and making it valid */
|
||||
eieio();
|
||||
hpte[0] = pteh;
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
atomic_inc(&kvm->arch.ram_pginfo[lpn].refcnt);
|
||||
vcpu->arch.gpr[4] = pte_index + i;
|
||||
|
||||
vcpu->arch.gpr[4] = pte_index;
|
||||
return H_SUCCESS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_h_enter);
|
||||
|
||||
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
|
||||
|
||||
@ -137,37 +350,46 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long *hpte;
|
||||
unsigned long v, r, rb;
|
||||
struct revmap_entry *rev;
|
||||
|
||||
if (pte_index >= (HPT_NPTEG << 3))
|
||||
if (pte_index >= HPT_NPTE)
|
||||
return H_PARAMETER;
|
||||
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
while (!lock_hpte(hpte, HPTE_V_HVLOCK))
|
||||
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
if ((hpte[0] & HPTE_V_VALID) == 0 ||
|
||||
if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
|
||||
((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
|
||||
((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
|
||||
hpte[0] &= ~HPTE_V_HVLOCK;
|
||||
return H_NOT_FOUND;
|
||||
}
|
||||
if (atomic_read(&kvm->online_vcpus) == 1)
|
||||
flags |= H_LOCAL;
|
||||
vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
|
||||
vcpu->arch.gpr[5] = r = hpte[1];
|
||||
rb = compute_tlbie_rb(v, r, pte_index);
|
||||
hpte[0] = 0;
|
||||
if (!(flags & H_LOCAL)) {
|
||||
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
|
||||
: : "r" (rb), "r" (kvm->arch.lpid));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
kvm->arch.tlbie_lock = 0;
|
||||
} else {
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
asm volatile("tlbiel %0" : : "r" (rb));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
v = hpte[0] & ~HPTE_V_HVLOCK;
|
||||
if (v & HPTE_V_VALID) {
|
||||
hpte[0] &= ~HPTE_V_VALID;
|
||||
rb = compute_tlbie_rb(v, hpte[1], pte_index);
|
||||
if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
|
||||
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
|
||||
: : "r" (rb), "r" (kvm->arch.lpid));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
kvm->arch.tlbie_lock = 0;
|
||||
} else {
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
asm volatile("tlbiel %0" : : "r" (rb));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
}
|
||||
/* Read PTE low word after tlbie to get final R/C values */
|
||||
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
|
||||
}
|
||||
r = rev->guest_rpte;
|
||||
unlock_hpte(hpte, 0);
|
||||
|
||||
vcpu->arch.gpr[4] = v;
|
||||
vcpu->arch.gpr[5] = r;
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
@ -175,78 +397,117 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long *args = &vcpu->arch.gpr[4];
|
||||
unsigned long *hp, tlbrb[4];
|
||||
long int i, found;
|
||||
long int n_inval = 0;
|
||||
unsigned long flags, req, pte_index;
|
||||
unsigned long *hp, *hptes[4], tlbrb[4];
|
||||
long int i, j, k, n, found, indexes[4];
|
||||
unsigned long flags, req, pte_index, rcbits;
|
||||
long int local = 0;
|
||||
long int ret = H_SUCCESS;
|
||||
struct revmap_entry *rev, *revs[4];
|
||||
|
||||
if (atomic_read(&kvm->online_vcpus) == 1)
|
||||
local = 1;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
pte_index = args[i * 2];
|
||||
flags = pte_index >> 56;
|
||||
pte_index &= ((1ul << 56) - 1);
|
||||
req = flags >> 6;
|
||||
flags &= 3;
|
||||
if (req == 3)
|
||||
break;
|
||||
if (req != 1 || flags == 3 ||
|
||||
pte_index >= (HPT_NPTEG << 3)) {
|
||||
/* parameter error */
|
||||
args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
|
||||
ret = H_PARAMETER;
|
||||
break;
|
||||
}
|
||||
hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
while (!lock_hpte(hp, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
found = 0;
|
||||
if (hp[0] & HPTE_V_VALID) {
|
||||
switch (flags & 3) {
|
||||
case 0: /* absolute */
|
||||
found = 1;
|
||||
break;
|
||||
case 1: /* andcond */
|
||||
if (!(hp[0] & args[i * 2 + 1]))
|
||||
found = 1;
|
||||
break;
|
||||
case 2: /* AVPN */
|
||||
if ((hp[0] & ~0x7fUL) == args[i * 2 + 1])
|
||||
found = 1;
|
||||
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
|
||||
n = 0;
|
||||
for (; i < 4; ++i) {
|
||||
j = i * 2;
|
||||
pte_index = args[j];
|
||||
flags = pte_index >> 56;
|
||||
pte_index &= ((1ul << 56) - 1);
|
||||
req = flags >> 6;
|
||||
flags &= 3;
|
||||
if (req == 3) { /* no more requests */
|
||||
i = 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
hp[0] &= ~HPTE_V_HVLOCK;
|
||||
args[i * 2] = ((0x90 | flags) << 56) + pte_index;
|
||||
continue;
|
||||
}
|
||||
/* insert R and C bits from PTE */
|
||||
flags |= (hp[1] >> 5) & 0x0c;
|
||||
args[i * 2] = ((0x80 | flags) << 56) + pte_index;
|
||||
tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
|
||||
hp[0] = 0;
|
||||
}
|
||||
if (n_inval == 0)
|
||||
return ret;
|
||||
if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
|
||||
/* parameter error */
|
||||
args[j] = ((0xa0 | flags) << 56) + pte_index;
|
||||
ret = H_PARAMETER;
|
||||
break;
|
||||
}
|
||||
hp = (unsigned long *)
|
||||
(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
/* to avoid deadlock, don't spin except for first */
|
||||
if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
|
||||
if (n)
|
||||
break;
|
||||
while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
}
|
||||
found = 0;
|
||||
if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
|
||||
switch (flags & 3) {
|
||||
case 0: /* absolute */
|
||||
found = 1;
|
||||
break;
|
||||
case 1: /* andcond */
|
||||
if (!(hp[0] & args[j + 1]))
|
||||
found = 1;
|
||||
break;
|
||||
case 2: /* AVPN */
|
||||
if ((hp[0] & ~0x7fUL) == args[j + 1])
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
hp[0] &= ~HPTE_V_HVLOCK;
|
||||
args[j] = ((0x90 | flags) << 56) + pte_index;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!local) {
|
||||
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
for (i = 0; i < n_inval; ++i)
|
||||
asm volatile(PPC_TLBIE(%1,%0)
|
||||
: : "r" (tlbrb[i]), "r" (kvm->arch.lpid));
|
||||
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
|
||||
kvm->arch.tlbie_lock = 0;
|
||||
} else {
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
for (i = 0; i < n_inval; ++i)
|
||||
asm volatile("tlbiel %0" : : "r" (tlbrb[i]));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
args[j] = ((0x80 | flags) << 56) + pte_index;
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
|
||||
if (!(hp[0] & HPTE_V_VALID)) {
|
||||
/* insert R and C bits from PTE */
|
||||
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
|
||||
args[j] |= rcbits << (56 - 5);
|
||||
continue;
|
||||
}
|
||||
|
||||
hp[0] &= ~HPTE_V_VALID; /* leave it locked */
|
||||
tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
|
||||
indexes[n] = j;
|
||||
hptes[n] = hp;
|
||||
revs[n] = rev;
|
||||
++n;
|
||||
}
|
||||
|
||||
if (!n)
|
||||
break;
|
||||
|
||||
/* Now that we've collected a batch, do the tlbies */
|
||||
if (!local) {
|
||||
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
for (k = 0; k < n; ++k)
|
||||
asm volatile(PPC_TLBIE(%1,%0) : :
|
||||
"r" (tlbrb[k]),
|
||||
"r" (kvm->arch.lpid));
|
||||
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
|
||||
kvm->arch.tlbie_lock = 0;
|
||||
} else {
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
for (k = 0; k < n; ++k)
|
||||
asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
}
|
||||
|
||||
/* Read PTE low words after tlbie to get final R/C values */
|
||||
for (k = 0; k < n; ++k) {
|
||||
j = indexes[k];
|
||||
pte_index = args[j] & ((1ul << 56) - 1);
|
||||
hp = hptes[k];
|
||||
rev = revs[k];
|
||||
remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
|
||||
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
|
||||
args[j] |= rcbits << (56 - 5);
|
||||
hp[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -256,40 +517,55 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long *hpte;
|
||||
unsigned long v, r, rb;
|
||||
struct revmap_entry *rev;
|
||||
unsigned long v, r, rb, mask, bits;
|
||||
|
||||
if (pte_index >= (HPT_NPTEG << 3))
|
||||
if (pte_index >= HPT_NPTE)
|
||||
return H_PARAMETER;
|
||||
|
||||
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
while (!lock_hpte(hpte, HPTE_V_HVLOCK))
|
||||
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
if ((hpte[0] & HPTE_V_VALID) == 0 ||
|
||||
if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
|
||||
((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
|
||||
hpte[0] &= ~HPTE_V_HVLOCK;
|
||||
return H_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (atomic_read(&kvm->online_vcpus) == 1)
|
||||
flags |= H_LOCAL;
|
||||
v = hpte[0];
|
||||
r = hpte[1] & ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
|
||||
HPTE_R_KEY_HI | HPTE_R_KEY_LO);
|
||||
r |= (flags << 55) & HPTE_R_PP0;
|
||||
r |= (flags << 48) & HPTE_R_KEY_HI;
|
||||
r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
|
||||
rb = compute_tlbie_rb(v, r, pte_index);
|
||||
hpte[0] = v & ~HPTE_V_VALID;
|
||||
if (!(flags & H_LOCAL)) {
|
||||
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
|
||||
: : "r" (rb), "r" (kvm->arch.lpid));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
kvm->arch.tlbie_lock = 0;
|
||||
} else {
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
asm volatile("tlbiel %0" : : "r" (rb));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
bits = (flags << 55) & HPTE_R_PP0;
|
||||
bits |= (flags << 48) & HPTE_R_KEY_HI;
|
||||
bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
|
||||
|
||||
/* Update guest view of 2nd HPTE dword */
|
||||
mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
|
||||
HPTE_R_KEY_HI | HPTE_R_KEY_LO;
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
if (rev) {
|
||||
r = (rev->guest_rpte & ~mask) | bits;
|
||||
rev->guest_rpte = r;
|
||||
}
|
||||
r = (hpte[1] & ~mask) | bits;
|
||||
|
||||
/* Update HPTE */
|
||||
if (v & HPTE_V_VALID) {
|
||||
rb = compute_tlbie_rb(v, r, pte_index);
|
||||
hpte[0] = v & ~HPTE_V_VALID;
|
||||
if (!(flags & H_LOCAL)) {
|
||||
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
|
||||
: : "r" (rb), "r" (kvm->arch.lpid));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
kvm->arch.tlbie_lock = 0;
|
||||
} else {
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
asm volatile("tlbiel %0" : : "r" (rb));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
}
|
||||
}
|
||||
hpte[1] = r;
|
||||
eieio();
|
||||
@ -298,40 +574,243 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
static unsigned long reverse_xlate(struct kvm *kvm, unsigned long realaddr)
|
||||
{
|
||||
long int i;
|
||||
unsigned long offset, rpn;
|
||||
|
||||
offset = realaddr & (kvm->arch.ram_psize - 1);
|
||||
rpn = (realaddr - offset) >> PAGE_SHIFT;
|
||||
for (i = 0; i < kvm->arch.ram_npages; ++i)
|
||||
if (rpn == kvm->arch.ram_pginfo[i].pfn)
|
||||
return (i << PAGE_SHIFT) + offset;
|
||||
return HPTE_R_RPN; /* all 1s in the RPN field */
|
||||
}
|
||||
|
||||
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long pte_index)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long *hpte, r;
|
||||
unsigned long *hpte, v, r;
|
||||
int i, n = 1;
|
||||
struct revmap_entry *rev = NULL;
|
||||
|
||||
if (pte_index >= (HPT_NPTEG << 3))
|
||||
if (pte_index >= HPT_NPTE)
|
||||
return H_PARAMETER;
|
||||
if (flags & H_READ_4) {
|
||||
pte_index &= ~3;
|
||||
n = 4;
|
||||
}
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
for (i = 0; i < n; ++i, ++pte_index) {
|
||||
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
v = hpte[0] & ~HPTE_V_HVLOCK;
|
||||
r = hpte[1];
|
||||
if ((flags & H_R_XLATE) && (hpte[0] & HPTE_V_VALID))
|
||||
r = reverse_xlate(kvm, r & HPTE_R_RPN) |
|
||||
(r & ~HPTE_R_RPN);
|
||||
vcpu->arch.gpr[4 + i * 2] = hpte[0];
|
||||
if (v & HPTE_V_ABSENT) {
|
||||
v &= ~HPTE_V_ABSENT;
|
||||
v |= HPTE_V_VALID;
|
||||
}
|
||||
if (v & HPTE_V_VALID)
|
||||
r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
|
||||
vcpu->arch.gpr[4 + i * 2] = v;
|
||||
vcpu->arch.gpr[5 + i * 2] = r;
|
||||
}
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
|
||||
unsigned long pte_index)
|
||||
{
|
||||
unsigned long rb;
|
||||
|
||||
hptep[0] &= ~HPTE_V_VALID;
|
||||
rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
|
||||
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
|
||||
: : "r" (rb), "r" (kvm->arch.lpid));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
kvm->arch.tlbie_lock = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
|
||||
|
||||
void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
|
||||
unsigned long pte_index)
|
||||
{
|
||||
unsigned long rb;
|
||||
unsigned char rbyte;
|
||||
|
||||
rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
|
||||
rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
|
||||
/* modify only the second-last byte, which contains the ref bit */
|
||||
*((char *)hptep + 14) = rbyte;
|
||||
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
|
||||
: : "r" (rb), "r" (kvm->arch.lpid));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
kvm->arch.tlbie_lock = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
|
||||
|
||||
static int slb_base_page_shift[4] = {
|
||||
24, /* 16M */
|
||||
16, /* 64k */
|
||||
34, /* 16G */
|
||||
20, /* 1M, unsupported */
|
||||
};
|
||||
|
||||
long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
|
||||
unsigned long valid)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int pshift;
|
||||
unsigned long somask;
|
||||
unsigned long vsid, hash;
|
||||
unsigned long avpn;
|
||||
unsigned long *hpte;
|
||||
unsigned long mask, val;
|
||||
unsigned long v, r;
|
||||
|
||||
/* Get page shift, work out hash and AVPN etc. */
|
||||
mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
|
||||
val = 0;
|
||||
pshift = 12;
|
||||
if (slb_v & SLB_VSID_L) {
|
||||
mask |= HPTE_V_LARGE;
|
||||
val |= HPTE_V_LARGE;
|
||||
pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
|
||||
}
|
||||
if (slb_v & SLB_VSID_B_1T) {
|
||||
somask = (1UL << 40) - 1;
|
||||
vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
|
||||
vsid ^= vsid << 25;
|
||||
} else {
|
||||
somask = (1UL << 28) - 1;
|
||||
vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
|
||||
}
|
||||
hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK;
|
||||
avpn = slb_v & ~(somask >> 16); /* also includes B */
|
||||
avpn |= (eaddr & somask) >> 16;
|
||||
|
||||
if (pshift >= 24)
|
||||
avpn &= ~((1UL << (pshift - 16)) - 1);
|
||||
else
|
||||
avpn &= ~0x7fUL;
|
||||
val |= avpn;
|
||||
|
||||
for (;;) {
|
||||
hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7));
|
||||
|
||||
for (i = 0; i < 16; i += 2) {
|
||||
/* Read the PTE racily */
|
||||
v = hpte[i] & ~HPTE_V_HVLOCK;
|
||||
|
||||
/* Check valid/absent, hash, segment size and AVPN */
|
||||
if (!(v & valid) || (v & mask) != val)
|
||||
continue;
|
||||
|
||||
/* Lock the PTE and read it under the lock */
|
||||
while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
v = hpte[i] & ~HPTE_V_HVLOCK;
|
||||
r = hpte[i+1];
|
||||
|
||||
/*
|
||||
* Check the HPTE again, including large page size
|
||||
* Since we don't currently allow any MPSS (mixed
|
||||
* page-size segment) page sizes, it is sufficient
|
||||
* to check against the actual page size.
|
||||
*/
|
||||
if ((v & valid) && (v & mask) == val &&
|
||||
hpte_page_size(v, r) == (1ul << pshift))
|
||||
/* Return with the HPTE still locked */
|
||||
return (hash << 3) + (i >> 1);
|
||||
|
||||
/* Unlock and move on */
|
||||
hpte[i] = v;
|
||||
}
|
||||
|
||||
if (val & HPTE_V_SECONDARY)
|
||||
break;
|
||||
val |= HPTE_V_SECONDARY;
|
||||
hash = hash ^ HPT_HASH_MASK;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
|
||||
|
||||
/*
|
||||
* Called in real mode to check whether an HPTE not found fault
|
||||
* is due to accessing a paged-out page or an emulated MMIO page,
|
||||
* or if a protection fault is due to accessing a page that the
|
||||
* guest wanted read/write access to but which we made read-only.
|
||||
* Returns a possibly modified status (DSISR) value if not
|
||||
* (i.e. pass the interrupt to the guest),
|
||||
* -1 to pass the fault up to host kernel mode code, -2 to do that
|
||||
* and also load the instruction word (for MMIO emulation),
|
||||
* or 0 if we should make the guest retry the access.
|
||||
*/
|
||||
long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
|
||||
unsigned long slb_v, unsigned int status, bool data)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
long int index;
|
||||
unsigned long v, r, gr;
|
||||
unsigned long *hpte;
|
||||
unsigned long valid;
|
||||
struct revmap_entry *rev;
|
||||
unsigned long pp, key;
|
||||
|
||||
/* For protection fault, expect to find a valid HPTE */
|
||||
valid = HPTE_V_VALID;
|
||||
if (status & DSISR_NOHPTE)
|
||||
valid |= HPTE_V_ABSENT;
|
||||
|
||||
index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
|
||||
if (index < 0) {
|
||||
if (status & DSISR_NOHPTE)
|
||||
return status; /* there really was no HPTE */
|
||||
return 0; /* for prot fault, HPTE disappeared */
|
||||
}
|
||||
hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
|
||||
v = hpte[0] & ~HPTE_V_HVLOCK;
|
||||
r = hpte[1];
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
|
||||
gr = rev->guest_rpte;
|
||||
|
||||
unlock_hpte(hpte, v);
|
||||
|
||||
/* For not found, if the HPTE is valid by now, retry the instruction */
|
||||
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
|
||||
return 0;
|
||||
|
||||
/* Check access permissions to the page */
|
||||
pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
|
||||
key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
|
||||
status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
|
||||
if (!data) {
|
||||
if (gr & (HPTE_R_N | HPTE_R_G))
|
||||
return status | SRR1_ISI_N_OR_G;
|
||||
if (!hpte_read_permission(pp, slb_v & key))
|
||||
return status | SRR1_ISI_PROT;
|
||||
} else if (status & DSISR_ISSTORE) {
|
||||
/* check write permission */
|
||||
if (!hpte_write_permission(pp, slb_v & key))
|
||||
return status | DSISR_PROTFAULT;
|
||||
} else {
|
||||
if (!hpte_read_permission(pp, slb_v & key))
|
||||
return status | DSISR_PROTFAULT;
|
||||
}
|
||||
|
||||
/* Check storage key, if applicable */
|
||||
if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
|
||||
unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
|
||||
if (status & DSISR_ISSTORE)
|
||||
perm >>= 1;
|
||||
if (perm & 1)
|
||||
return status | DSISR_KEYFAULT;
|
||||
}
|
||||
|
||||
/* Save HPTE info for virtual-mode handler */
|
||||
vcpu->arch.pgfault_addr = addr;
|
||||
vcpu->arch.pgfault_index = index;
|
||||
vcpu->arch.pgfault_hpte[0] = v;
|
||||
vcpu->arch.pgfault_hpte[1] = r;
|
||||
|
||||
/* Check the storage key to see if it is possibly emulated MMIO */
|
||||
if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
|
||||
(r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
|
||||
(HPTE_R_KEY_HI | HPTE_R_KEY_LO))
|
||||
return -2; /* MMIO emulation - load instr word */
|
||||
|
||||
return -1; /* send fault up to host kernel mode */
|
||||
}
|
||||
|
@ -601,6 +601,30 @@ kvmppc_interrupt:
|
||||
|
||||
stw r12,VCPU_TRAP(r9)
|
||||
|
||||
/* Save HEIR (HV emulation assist reg) in last_inst
|
||||
if this is an HEI (HV emulation interrupt, e40) */
|
||||
li r3,KVM_INST_FETCH_FAILED
|
||||
BEGIN_FTR_SECTION
|
||||
cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
|
||||
bne 11f
|
||||
mfspr r3,SPRN_HEIR
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
|
||||
11: stw r3,VCPU_LAST_INST(r9)
|
||||
|
||||
/* these are volatile across C function calls */
|
||||
mfctr r3
|
||||
mfxer r4
|
||||
std r3, VCPU_CTR(r9)
|
||||
stw r4, VCPU_XER(r9)
|
||||
|
||||
BEGIN_FTR_SECTION
|
||||
/* If this is a page table miss then see if it's theirs or ours */
|
||||
cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
|
||||
beq kvmppc_hdsi
|
||||
cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
|
||||
beq kvmppc_hisi
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
|
||||
|
||||
/* See if this is a leftover HDEC interrupt */
|
||||
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
|
||||
bne 2f
|
||||
@ -608,7 +632,7 @@ kvmppc_interrupt:
|
||||
cmpwi r3,0
|
||||
bge ignore_hdec
|
||||
2:
|
||||
/* See if this is something we can handle in real mode */
|
||||
/* See if this is an hcall we can handle in real mode */
|
||||
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
|
||||
beq hcall_try_real_mode
|
||||
|
||||
@ -624,6 +648,7 @@ BEGIN_FTR_SECTION
|
||||
1:
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
|
||||
|
||||
nohpte_cont:
|
||||
hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
|
||||
/* Save DEC */
|
||||
mfspr r5,SPRN_DEC
|
||||
@ -632,36 +657,21 @@ hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
|
||||
add r5,r5,r6
|
||||
std r5,VCPU_DEC_EXPIRES(r9)
|
||||
|
||||
/* Save HEIR (HV emulation assist reg) in last_inst
|
||||
if this is an HEI (HV emulation interrupt, e40) */
|
||||
li r3,-1
|
||||
BEGIN_FTR_SECTION
|
||||
cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
|
||||
bne 11f
|
||||
mfspr r3,SPRN_HEIR
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
|
||||
11: stw r3,VCPU_LAST_INST(r9)
|
||||
|
||||
/* Save more register state */
|
||||
mfxer r5
|
||||
mfdar r6
|
||||
mfdsisr r7
|
||||
mfctr r8
|
||||
|
||||
stw r5, VCPU_XER(r9)
|
||||
std r6, VCPU_DAR(r9)
|
||||
stw r7, VCPU_DSISR(r9)
|
||||
std r8, VCPU_CTR(r9)
|
||||
/* grab HDAR & HDSISR if HV data storage interrupt (HDSI) */
|
||||
BEGIN_FTR_SECTION
|
||||
/* don't overwrite fault_dar/fault_dsisr if HDSI */
|
||||
cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
|
||||
beq 6f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
|
||||
7: std r6, VCPU_FAULT_DAR(r9)
|
||||
std r6, VCPU_FAULT_DAR(r9)
|
||||
stw r7, VCPU_FAULT_DSISR(r9)
|
||||
|
||||
/* Save guest CTRL register, set runlatch to 1 */
|
||||
mfspr r6,SPRN_CTRLF
|
||||
6: mfspr r6,SPRN_CTRLF
|
||||
stw r6,VCPU_CTRL(r9)
|
||||
andi. r0,r6,1
|
||||
bne 4f
|
||||
@ -1094,9 +1104,131 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
|
||||
mtspr SPRN_HSRR1, r7
|
||||
ba 0x500
|
||||
|
||||
6: mfspr r6,SPRN_HDAR
|
||||
mfspr r7,SPRN_HDSISR
|
||||
b 7b
|
||||
/*
|
||||
* Check whether an HDSI is an HPTE not found fault or something else.
|
||||
* If it is an HPTE not found fault that is due to the guest accessing
|
||||
* a page that they have mapped but which we have paged out, then
|
||||
* we continue on with the guest exit path. In all other cases,
|
||||
* reflect the HDSI to the guest as a DSI.
|
||||
*/
|
||||
kvmppc_hdsi:
|
||||
mfspr r4, SPRN_HDAR
|
||||
mfspr r6, SPRN_HDSISR
|
||||
/* HPTE not found fault or protection fault? */
|
||||
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
|
||||
beq 1f /* if not, send it to the guest */
|
||||
andi. r0, r11, MSR_DR /* data relocation enabled? */
|
||||
beq 3f
|
||||
clrrdi r0, r4, 28
|
||||
PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */
|
||||
bne 1f /* if no SLB entry found */
|
||||
4: std r4, VCPU_FAULT_DAR(r9)
|
||||
stw r6, VCPU_FAULT_DSISR(r9)
|
||||
|
||||
/* Search the hash table. */
|
||||
mr r3, r9 /* vcpu pointer */
|
||||
li r7, 1 /* data fault */
|
||||
bl .kvmppc_hpte_hv_fault
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
ld r10, VCPU_PC(r9)
|
||||
ld r11, VCPU_MSR(r9)
|
||||
li r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
|
||||
cmpdi r3, 0 /* retry the instruction */
|
||||
beq 6f
|
||||
cmpdi r3, -1 /* handle in kernel mode */
|
||||
beq nohpte_cont
|
||||
cmpdi r3, -2 /* MMIO emulation; need instr word */
|
||||
beq 2f
|
||||
|
||||
/* Synthesize a DSI for the guest */
|
||||
ld r4, VCPU_FAULT_DAR(r9)
|
||||
mr r6, r3
|
||||
1: mtspr SPRN_DAR, r4
|
||||
mtspr SPRN_DSISR, r6
|
||||
mtspr SPRN_SRR0, r10
|
||||
mtspr SPRN_SRR1, r11
|
||||
li r10, BOOK3S_INTERRUPT_DATA_STORAGE
|
||||
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
|
||||
rotldi r11, r11, 63
|
||||
6: ld r7, VCPU_CTR(r9)
|
||||
lwz r8, VCPU_XER(r9)
|
||||
mtctr r7
|
||||
mtxer r8
|
||||
mr r4, r9
|
||||
b fast_guest_return
|
||||
|
||||
3: ld r5, VCPU_KVM(r9) /* not relocated, use VRMA */
|
||||
ld r5, KVM_VRMA_SLB_V(r5)
|
||||
b 4b
|
||||
|
||||
/* If this is for emulated MMIO, load the instruction word */
|
||||
2: li r8, KVM_INST_FETCH_FAILED /* In case lwz faults */
|
||||
|
||||
/* Set guest mode to 'jump over instruction' so if lwz faults
|
||||
* we'll just continue at the next IP. */
|
||||
li r0, KVM_GUEST_MODE_SKIP
|
||||
stb r0, HSTATE_IN_GUEST(r13)
|
||||
|
||||
/* Do the access with MSR:DR enabled */
|
||||
mfmsr r3
|
||||
ori r4, r3, MSR_DR /* Enable paging for data */
|
||||
mtmsrd r4
|
||||
lwz r8, 0(r10)
|
||||
mtmsrd r3
|
||||
|
||||
/* Store the result */
|
||||
stw r8, VCPU_LAST_INST(r9)
|
||||
|
||||
/* Unset guest mode. */
|
||||
li r0, KVM_GUEST_MODE_NONE
|
||||
stb r0, HSTATE_IN_GUEST(r13)
|
||||
b nohpte_cont
|
||||
|
||||
/*
|
||||
* Similarly for an HISI, reflect it to the guest as an ISI unless
|
||||
* it is an HPTE not found fault for a page that we have paged out.
|
||||
*/
|
||||
kvmppc_hisi:
|
||||
andis. r0, r11, SRR1_ISI_NOPT@h
|
||||
beq 1f
|
||||
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
|
||||
beq 3f
|
||||
clrrdi r0, r10, 28
|
||||
PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */
|
||||
bne 1f /* if no SLB entry found */
|
||||
4:
|
||||
/* Search the hash table. */
|
||||
mr r3, r9 /* vcpu pointer */
|
||||
mr r4, r10
|
||||
mr r6, r11
|
||||
li r7, 0 /* instruction fault */
|
||||
bl .kvmppc_hpte_hv_fault
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
ld r10, VCPU_PC(r9)
|
||||
ld r11, VCPU_MSR(r9)
|
||||
li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
|
||||
cmpdi r3, 0 /* retry the instruction */
|
||||
beq 6f
|
||||
cmpdi r3, -1 /* handle in kernel mode */
|
||||
beq nohpte_cont
|
||||
|
||||
/* Synthesize an ISI for the guest */
|
||||
mr r11, r3
|
||||
1: mtspr SPRN_SRR0, r10
|
||||
mtspr SPRN_SRR1, r11
|
||||
li r10, BOOK3S_INTERRUPT_INST_STORAGE
|
||||
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
|
||||
rotldi r11, r11, 63
|
||||
6: ld r7, VCPU_CTR(r9)
|
||||
lwz r8, VCPU_XER(r9)
|
||||
mtctr r7
|
||||
mtxer r8
|
||||
mr r4, r9
|
||||
b fast_guest_return
|
||||
|
||||
3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
|
||||
ld r5, KVM_VRMA_SLB_V(r6)
|
||||
b 4b
|
||||
|
||||
/*
|
||||
* Try to handle an hcall in real mode.
|
||||
|
@ -196,7 +196,8 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
kvmppc_inject_pf(vcpu, addr, false);
|
||||
goto done_load;
|
||||
} else if (r == EMULATE_DO_MMIO) {
|
||||
emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1);
|
||||
emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
|
||||
len, 1);
|
||||
goto done_load;
|
||||
}
|
||||
|
||||
@ -286,11 +287,13 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
kvmppc_inject_pf(vcpu, addr, false);
|
||||
goto done_load;
|
||||
} else if ((r == EMULATE_DO_MMIO) && w) {
|
||||
emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1);
|
||||
emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
|
||||
4, 1);
|
||||
vcpu->arch.qpr[rs] = tmp[1];
|
||||
goto done_load;
|
||||
} else if (r == EMULATE_DO_MMIO) {
|
||||
emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1);
|
||||
emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs,
|
||||
8, 1);
|
||||
goto done_load;
|
||||
}
|
||||
|
||||
|
@ -51,15 +51,19 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
|
||||
#define MSR_USER32 MSR_USER
|
||||
#define MSR_USER64 MSR_USER
|
||||
#define HW_PAGE_SIZE PAGE_SIZE
|
||||
#define __hard_irq_disable local_irq_disable
|
||||
#define __hard_irq_enable local_irq_enable
|
||||
#endif
|
||||
|
||||
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
|
||||
memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
|
||||
sizeof(get_paca()->shadow_vcpu));
|
||||
to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
|
||||
svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
|
||||
svcpu_put(svcpu);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_32
|
||||
@ -70,10 +74,12 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
|
||||
memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
|
||||
sizeof(get_paca()->shadow_vcpu));
|
||||
to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
|
||||
to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
|
||||
svcpu_put(svcpu);
|
||||
#endif
|
||||
|
||||
kvmppc_giveup_ext(vcpu, MSR_FP);
|
||||
@ -151,14 +157,16 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
|
||||
kvmppc_mmu_book3s_64_init(vcpu);
|
||||
to_book3s(vcpu)->hior = 0xfff00000;
|
||||
if (!to_book3s(vcpu)->hior_explicit)
|
||||
to_book3s(vcpu)->hior = 0xfff00000;
|
||||
to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
|
||||
vcpu->arch.cpu_type = KVM_CPU_3S_64;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
kvmppc_mmu_book3s_32_init(vcpu);
|
||||
to_book3s(vcpu)->hior = 0;
|
||||
if (!to_book3s(vcpu)->hior_explicit)
|
||||
to_book3s(vcpu)->hior = 0;
|
||||
to_book3s(vcpu)->msr_mask = 0xffffffffULL;
|
||||
vcpu->arch.cpu_type = KVM_CPU_3S_32;
|
||||
}
|
||||
@ -308,19 +316,22 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
if (page_found == -ENOENT) {
|
||||
/* Page not found in guest PTE entries */
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
|
||||
vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
|
||||
vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
|
||||
vcpu->arch.shared->msr |=
|
||||
(to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
|
||||
(svcpu->shadow_srr1 & 0x00000000f8000000ULL);
|
||||
svcpu_put(svcpu);
|
||||
kvmppc_book3s_queue_irqprio(vcpu, vec);
|
||||
} else if (page_found == -EPERM) {
|
||||
/* Storage protection */
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
|
||||
vcpu->arch.shared->dsisr =
|
||||
to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
|
||||
vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
|
||||
vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
|
||||
vcpu->arch.shared->msr |=
|
||||
(to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
|
||||
svcpu->shadow_srr1 & 0x00000000f8000000ULL;
|
||||
svcpu_put(svcpu);
|
||||
kvmppc_book3s_queue_irqprio(vcpu, vec);
|
||||
} else if (page_found == -EINVAL) {
|
||||
/* Page not found in guest SLB */
|
||||
@ -517,24 +528,29 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
run->ready_for_interrupt_injection = 1;
|
||||
|
||||
trace_kvm_book3s_exit(exit_nr, vcpu);
|
||||
preempt_enable();
|
||||
kvm_resched(vcpu);
|
||||
switch (exit_nr) {
|
||||
case BOOK3S_INTERRUPT_INST_STORAGE:
|
||||
{
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
ulong shadow_srr1 = svcpu->shadow_srr1;
|
||||
vcpu->stat.pf_instruc++;
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_32
|
||||
/* We set segments as unused segments when invalidating them. So
|
||||
* treat the respective fault as segment fault. */
|
||||
if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
|
||||
== SR_INVALID) {
|
||||
if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
|
||||
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
|
||||
r = RESUME_GUEST;
|
||||
svcpu_put(svcpu);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
svcpu_put(svcpu);
|
||||
|
||||
/* only care about PTEG not found errors, but leave NX alone */
|
||||
if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
|
||||
if (shadow_srr1 & 0x40000000) {
|
||||
r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
|
||||
vcpu->stat.sp_instruc++;
|
||||
} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
|
||||
@ -547,33 +563,37 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
|
||||
r = RESUME_GUEST;
|
||||
} else {
|
||||
vcpu->arch.shared->msr |=
|
||||
to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
|
||||
vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000;
|
||||
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
|
||||
r = RESUME_GUEST;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BOOK3S_INTERRUPT_DATA_STORAGE:
|
||||
{
|
||||
ulong dar = kvmppc_get_fault_dar(vcpu);
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
u32 fault_dsisr = svcpu->fault_dsisr;
|
||||
vcpu->stat.pf_storage++;
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_32
|
||||
/* We set segments as unused segments when invalidating them. So
|
||||
* treat the respective fault as segment fault. */
|
||||
if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
|
||||
if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
|
||||
kvmppc_mmu_map_segment(vcpu, dar);
|
||||
r = RESUME_GUEST;
|
||||
svcpu_put(svcpu);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
svcpu_put(svcpu);
|
||||
|
||||
/* The only case we need to handle is missing shadow PTEs */
|
||||
if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
|
||||
if (fault_dsisr & DSISR_NOHPTE) {
|
||||
r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
|
||||
} else {
|
||||
vcpu->arch.shared->dar = dar;
|
||||
vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
|
||||
vcpu->arch.shared->dsisr = fault_dsisr;
|
||||
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
|
||||
r = RESUME_GUEST;
|
||||
}
|
||||
@ -609,10 +629,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
case BOOK3S_INTERRUPT_PROGRAM:
|
||||
{
|
||||
enum emulation_result er;
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu;
|
||||
ulong flags;
|
||||
|
||||
program_interrupt:
|
||||
flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
|
||||
svcpu = svcpu_get(vcpu);
|
||||
flags = svcpu->shadow_srr1 & 0x1f0000ull;
|
||||
svcpu_put(svcpu);
|
||||
|
||||
if (vcpu->arch.shared->msr & MSR_PR) {
|
||||
#ifdef EXIT_DEBUG
|
||||
@ -740,20 +763,33 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
default:
|
||||
{
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
ulong shadow_srr1 = svcpu->shadow_srr1;
|
||||
svcpu_put(svcpu);
|
||||
/* Ugh - bork here! What did we get? */
|
||||
printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
|
||||
exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
|
||||
exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
|
||||
r = RESUME_HOST;
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!(r & RESUME_HOST)) {
|
||||
/* To avoid clobbering exit_reason, only check for signals if
|
||||
* we aren't already exiting to userspace for some other
|
||||
* reason. */
|
||||
|
||||
/*
|
||||
* Interrupts could be timers for the guest which we have to
|
||||
* inject again, so let's postpone them until we're in the guest
|
||||
* and if we really did time things so badly, then we just exit
|
||||
* again due to a host external interrupt.
|
||||
*/
|
||||
__hard_irq_disable();
|
||||
if (signal_pending(current)) {
|
||||
__hard_irq_enable();
|
||||
#ifdef EXIT_DEBUG
|
||||
printk(KERN_EMERG "KVM: Going back to host\n");
|
||||
#endif
|
||||
@ -761,10 +797,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
run->exit_reason = KVM_EXIT_INTR;
|
||||
r = -EINTR;
|
||||
} else {
|
||||
preempt_disable();
|
||||
|
||||
/* In case an interrupt came in that was triggered
|
||||
* from userspace (like DEC), we need to check what
|
||||
* to inject now! */
|
||||
kvmppc_core_deliver_interrupts(vcpu);
|
||||
kvmppc_core_prepare_to_enter(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
@ -836,6 +874,38 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_PPC_HIOR:
|
||||
r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_PPC_HIOR:
|
||||
r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr);
|
||||
if (!r)
|
||||
to_book3s(vcpu)->hior_explicit = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvmppc_core_check_processor_compat(void)
|
||||
{
|
||||
return 0;
|
||||
@ -923,16 +993,31 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
ulong ext_msr;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/* Check if we can run the vcpu at all */
|
||||
if (!vcpu->arch.sane) {
|
||||
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvmppc_core_prepare_to_enter(vcpu);
|
||||
|
||||
/*
|
||||
* Interrupts could be timers for the guest which we have to inject
|
||||
* again, so let's postpone them until we're in the guest and if we
|
||||
* really did time things so badly, then we just exit again due to
|
||||
* a host external interrupt.
|
||||
*/
|
||||
__hard_irq_disable();
|
||||
|
||||
/* No need to go into the guest when all we do is going out */
|
||||
if (signal_pending(current)) {
|
||||
__hard_irq_enable();
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
return -EINTR;
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Save FPU state in stack */
|
||||
@ -974,8 +1059,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_guest_exit();
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
current->thread.regs->msr = ext_msr;
|
||||
|
||||
/* Make sure we save the guest FPU/Altivec/VSX state */
|
||||
@ -1002,9 +1085,50 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
current->thread.used_vsr = used_vsr;
|
||||
#endif
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get (and clear) the dirty memory log for a memory slot.
|
||||
*/
|
||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct kvm_vcpu *vcpu;
|
||||
ulong ga, ga_end;
|
||||
int is_dirty = 0;
|
||||
int r;
|
||||
unsigned long n;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = kvm_get_dirty_log(kvm, log, &is_dirty);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
/* If nothing is dirty, don't bother messing with page tables. */
|
||||
if (is_dirty) {
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
|
||||
ga = memslot->base_gfn << PAGE_SHIFT;
|
||||
ga_end = ga + (memslot->npages << PAGE_SHIFT);
|
||||
|
||||
kvm_for_each_vcpu(n, vcpu, kvm)
|
||||
kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
|
||||
|
||||
n = kvm_dirty_bitmap_bytes(memslot);
|
||||
memset(memslot->dirty_bitmap, 0, n);
|
||||
}
|
||||
|
||||
r = 0;
|
||||
out:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
|
@ -124,12 +124,6 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
|
||||
vcpu->arch.shared->msr = new_msr;
|
||||
|
||||
kvmppc_mmu_msr_notify(vcpu, old_msr);
|
||||
|
||||
if (vcpu->arch.shared->msr & MSR_WE) {
|
||||
kvm_vcpu_block(vcpu);
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
|
||||
};
|
||||
|
||||
kvmppc_vcpu_sync_spe(vcpu);
|
||||
}
|
||||
|
||||
@ -258,9 +252,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
allowed = vcpu->arch.shared->msr & MSR_ME;
|
||||
msr_mask = 0;
|
||||
break;
|
||||
case BOOKE_IRQPRIO_EXTERNAL:
|
||||
case BOOKE_IRQPRIO_DECREMENTER:
|
||||
case BOOKE_IRQPRIO_FIT:
|
||||
keep_irq = true;
|
||||
/* fall through */
|
||||
case BOOKE_IRQPRIO_EXTERNAL:
|
||||
allowed = vcpu->arch.shared->msr & MSR_EE;
|
||||
allowed = allowed && !crit;
|
||||
msr_mask = MSR_CE|MSR_ME|MSR_DE;
|
||||
@ -276,7 +272,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
|
||||
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
|
||||
if (update_esr == true)
|
||||
vcpu->arch.esr = vcpu->arch.queued_esr;
|
||||
vcpu->arch.shared->esr = vcpu->arch.queued_esr;
|
||||
if (update_dear == true)
|
||||
vcpu->arch.shared->dar = vcpu->arch.queued_dear;
|
||||
kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
|
||||
@ -288,13 +284,26 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
return allowed;
|
||||
}
|
||||
|
||||
/* Check pending exceptions and deliver one, if possible. */
|
||||
void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
|
||||
static void update_timer_ints(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
|
||||
kvmppc_core_queue_dec(vcpu);
|
||||
else
|
||||
kvmppc_core_dequeue_dec(vcpu);
|
||||
}
|
||||
|
||||
static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long *pending = &vcpu->arch.pending_exceptions;
|
||||
unsigned long old_pending = vcpu->arch.pending_exceptions;
|
||||
unsigned int priority;
|
||||
|
||||
if (vcpu->requests) {
|
||||
if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
|
||||
smp_mb();
|
||||
update_timer_ints(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
priority = __ffs(*pending);
|
||||
while (priority <= BOOKE_IRQPRIO_MAX) {
|
||||
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
|
||||
@ -306,10 +315,24 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
/* Tell the guest about our interrupt status */
|
||||
if (*pending)
|
||||
vcpu->arch.shared->int_pending = 1;
|
||||
else if (old_pending)
|
||||
vcpu->arch.shared->int_pending = 0;
|
||||
vcpu->arch.shared->int_pending = !!*pending;
|
||||
}
|
||||
|
||||
/* Check pending exceptions and deliver one, if possible. */
|
||||
void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
kvmppc_core_check_exceptions(vcpu);
|
||||
|
||||
if (vcpu->arch.shared->msr & MSR_WE) {
|
||||
local_irq_enable();
|
||||
kvm_vcpu_block(vcpu);
|
||||
local_irq_disable();
|
||||
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
|
||||
kvmppc_core_check_exceptions(vcpu);
|
||||
};
|
||||
}
|
||||
|
||||
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
@ -322,11 +345,21 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
kvmppc_core_prepare_to_enter(vcpu);
|
||||
|
||||
if (signal_pending(current)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvm_guest_enter();
|
||||
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
|
||||
kvm_guest_exit();
|
||||
local_irq_enable();
|
||||
|
||||
out:
|
||||
local_irq_enable();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -603,7 +636,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
kvmppc_core_deliver_interrupts(vcpu);
|
||||
kvmppc_core_prepare_to_enter(vcpu);
|
||||
|
||||
if (!(r & RESUME_HOST)) {
|
||||
/* To avoid clobbering exit_reason, only check for signals if
|
||||
@ -628,6 +661,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.pc = 0;
|
||||
vcpu->arch.shared->msr = 0;
|
||||
vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
|
||||
vcpu->arch.shared->pir = vcpu->vcpu_id;
|
||||
kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
|
||||
|
||||
vcpu->arch.shadow_pid = 1;
|
||||
@ -662,10 +696,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
regs->sprg1 = vcpu->arch.shared->sprg1;
|
||||
regs->sprg2 = vcpu->arch.shared->sprg2;
|
||||
regs->sprg3 = vcpu->arch.shared->sprg3;
|
||||
regs->sprg4 = vcpu->arch.sprg4;
|
||||
regs->sprg5 = vcpu->arch.sprg5;
|
||||
regs->sprg6 = vcpu->arch.sprg6;
|
||||
regs->sprg7 = vcpu->arch.sprg7;
|
||||
regs->sprg4 = vcpu->arch.shared->sprg4;
|
||||
regs->sprg5 = vcpu->arch.shared->sprg5;
|
||||
regs->sprg6 = vcpu->arch.shared->sprg6;
|
||||
regs->sprg7 = vcpu->arch.shared->sprg7;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
|
||||
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
|
||||
@ -690,10 +724,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
vcpu->arch.shared->sprg1 = regs->sprg1;
|
||||
vcpu->arch.shared->sprg2 = regs->sprg2;
|
||||
vcpu->arch.shared->sprg3 = regs->sprg3;
|
||||
vcpu->arch.sprg4 = regs->sprg4;
|
||||
vcpu->arch.sprg5 = regs->sprg5;
|
||||
vcpu->arch.sprg6 = regs->sprg6;
|
||||
vcpu->arch.sprg7 = regs->sprg7;
|
||||
vcpu->arch.shared->sprg4 = regs->sprg4;
|
||||
vcpu->arch.shared->sprg5 = regs->sprg5;
|
||||
vcpu->arch.shared->sprg6 = regs->sprg6;
|
||||
vcpu->arch.shared->sprg7 = regs->sprg7;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
|
||||
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
|
||||
@ -711,7 +745,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu,
|
||||
sregs->u.e.csrr0 = vcpu->arch.csrr0;
|
||||
sregs->u.e.csrr1 = vcpu->arch.csrr1;
|
||||
sregs->u.e.mcsr = vcpu->arch.mcsr;
|
||||
sregs->u.e.esr = vcpu->arch.esr;
|
||||
sregs->u.e.esr = vcpu->arch.shared->esr;
|
||||
sregs->u.e.dear = vcpu->arch.shared->dar;
|
||||
sregs->u.e.tsr = vcpu->arch.tsr;
|
||||
sregs->u.e.tcr = vcpu->arch.tcr;
|
||||
@ -729,28 +763,19 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.csrr0 = sregs->u.e.csrr0;
|
||||
vcpu->arch.csrr1 = sregs->u.e.csrr1;
|
||||
vcpu->arch.mcsr = sregs->u.e.mcsr;
|
||||
vcpu->arch.esr = sregs->u.e.esr;
|
||||
vcpu->arch.shared->esr = sregs->u.e.esr;
|
||||
vcpu->arch.shared->dar = sregs->u.e.dear;
|
||||
vcpu->arch.vrsave = sregs->u.e.vrsave;
|
||||
vcpu->arch.tcr = sregs->u.e.tcr;
|
||||
kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
|
||||
|
||||
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
|
||||
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
|
||||
vcpu->arch.dec = sregs->u.e.dec;
|
||||
|
||||
kvmppc_emulate_dec(vcpu);
|
||||
kvmppc_emulate_dec(vcpu);
|
||||
}
|
||||
|
||||
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
|
||||
/*
|
||||
* FIXME: existing KVM timer handling is incomplete.
|
||||
* TSR cannot be read by the guest, and its value in
|
||||
* vcpu->arch is always zero. For now, just handle
|
||||
* the case where the caller is trying to inject a
|
||||
* decrementer interrupt.
|
||||
*/
|
||||
|
||||
if ((sregs->u.e.tsr & TSR_DIS) &&
|
||||
(vcpu->arch.tcr & TCR_DIE))
|
||||
kvmppc_core_queue_dec(vcpu);
|
||||
vcpu->arch.tsr = sregs->u.e.tsr;
|
||||
update_timer_ints(vcpu);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -761,7 +786,7 @@ static void get_sregs_arch206(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
sregs->u.e.features |= KVM_SREGS_E_ARCH206;
|
||||
|
||||
sregs->u.e.pir = 0;
|
||||
sregs->u.e.pir = vcpu->vcpu_id;
|
||||
sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
|
||||
sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
|
||||
sregs->u.e.decar = vcpu->arch.decar;
|
||||
@ -774,7 +799,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu,
|
||||
if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
|
||||
return 0;
|
||||
|
||||
if (sregs->u.e.pir != 0)
|
||||
if (sregs->u.e.pir != vcpu->vcpu_id)
|
||||
return -EINVAL;
|
||||
|
||||
vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
|
||||
@ -862,6 +887,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
return kvmppc_core_set_sregs(vcpu, sregs);
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
@ -906,6 +941,33 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
}
|
||||
|
||||
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
|
||||
{
|
||||
vcpu->arch.tcr = new_tcr;
|
||||
update_timer_ints(vcpu);
|
||||
}
|
||||
|
||||
void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
|
||||
{
|
||||
set_bits(tsr_bits, &vcpu->arch.tsr);
|
||||
smp_wmb();
|
||||
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
|
||||
{
|
||||
clear_bits(tsr_bits, &vcpu->arch.tsr);
|
||||
update_timer_ints(vcpu);
|
||||
}
|
||||
|
||||
void kvmppc_decrementer_func(unsigned long data)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
|
||||
|
||||
kvmppc_set_tsr_bits(vcpu, TSR_DIS);
|
||||
}
|
||||
|
||||
int __init kvmppc_booke_init(void)
|
||||
{
|
||||
unsigned long ivor[16];
|
||||
|
@ -55,6 +55,10 @@ extern unsigned long kvmppc_booke_handlers;
|
||||
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
|
||||
void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
|
||||
|
||||
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
|
||||
void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
|
||||
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
|
||||
|
||||
int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned int inst, int *advance);
|
||||
int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
* Copyright 2011 Freescale Semiconductor, Inc.
|
||||
*
|
||||
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
|
||||
*/
|
||||
@ -107,7 +108,7 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
case SPRN_DEAR:
|
||||
vcpu->arch.shared->dar = spr_val; break;
|
||||
case SPRN_ESR:
|
||||
vcpu->arch.esr = spr_val; break;
|
||||
vcpu->arch.shared->esr = spr_val; break;
|
||||
case SPRN_DBCR0:
|
||||
vcpu->arch.dbcr0 = spr_val; break;
|
||||
case SPRN_DBCR1:
|
||||
@ -115,23 +116,23 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
case SPRN_DBSR:
|
||||
vcpu->arch.dbsr &= ~spr_val; break;
|
||||
case SPRN_TSR:
|
||||
vcpu->arch.tsr &= ~spr_val; break;
|
||||
kvmppc_clr_tsr_bits(vcpu, spr_val);
|
||||
break;
|
||||
case SPRN_TCR:
|
||||
vcpu->arch.tcr = spr_val;
|
||||
kvmppc_emulate_dec(vcpu);
|
||||
kvmppc_set_tcr(vcpu, spr_val);
|
||||
break;
|
||||
|
||||
/* Note: SPRG4-7 are user-readable. These values are
|
||||
* loaded into the real SPRGs when resuming the
|
||||
* guest. */
|
||||
case SPRN_SPRG4:
|
||||
vcpu->arch.sprg4 = spr_val; break;
|
||||
vcpu->arch.shared->sprg4 = spr_val; break;
|
||||
case SPRN_SPRG5:
|
||||
vcpu->arch.sprg5 = spr_val; break;
|
||||
vcpu->arch.shared->sprg5 = spr_val; break;
|
||||
case SPRN_SPRG6:
|
||||
vcpu->arch.sprg6 = spr_val; break;
|
||||
vcpu->arch.shared->sprg6 = spr_val; break;
|
||||
case SPRN_SPRG7:
|
||||
vcpu->arch.sprg7 = spr_val; break;
|
||||
vcpu->arch.shared->sprg7 = spr_val; break;
|
||||
|
||||
case SPRN_IVPR:
|
||||
vcpu->arch.ivpr = spr_val;
|
||||
@ -202,13 +203,17 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
||||
case SPRN_DEAR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
|
||||
case SPRN_ESR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break;
|
||||
case SPRN_DBCR0:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
|
||||
case SPRN_DBCR1:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
|
||||
case SPRN_DBSR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
|
||||
case SPRN_TSR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break;
|
||||
case SPRN_TCR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break;
|
||||
|
||||
case SPRN_IVOR0:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
|
||||
|
@ -402,19 +402,25 @@ lightweight_exit:
|
||||
/* Save vcpu pointer for the exception handlers. */
|
||||
mtspr SPRN_SPRG_WVCPU, r4
|
||||
|
||||
lwz r5, VCPU_SHARED(r4)
|
||||
|
||||
/* Can't switch the stack pointer until after IVPR is switched,
|
||||
* because host interrupt handlers would get confused. */
|
||||
lwz r1, VCPU_GPR(r1)(r4)
|
||||
|
||||
/* Host interrupt handlers may have clobbered these guest-readable
|
||||
* SPRGs, so we need to reload them here with the guest's values. */
|
||||
lwz r3, VCPU_SPRG4(r4)
|
||||
/*
|
||||
* Host interrupt handlers may have clobbered these
|
||||
* guest-readable SPRGs, or the guest kernel may have
|
||||
* written directly to the shared area, so we
|
||||
* need to reload them here with the guest's values.
|
||||
*/
|
||||
lwz r3, VCPU_SHARED_SPRG4(r5)
|
||||
mtspr SPRN_SPRG4W, r3
|
||||
lwz r3, VCPU_SPRG5(r4)
|
||||
lwz r3, VCPU_SHARED_SPRG5(r5)
|
||||
mtspr SPRN_SPRG5W, r3
|
||||
lwz r3, VCPU_SPRG6(r4)
|
||||
lwz r3, VCPU_SHARED_SPRG6(r5)
|
||||
mtspr SPRN_SPRG6W, r3
|
||||
lwz r3, VCPU_SPRG7(r4)
|
||||
lwz r3, VCPU_SHARED_SPRG7(r5)
|
||||
mtspr SPRN_SPRG7W, r3
|
||||
|
||||
#ifdef CONFIG_KVM_EXIT_TIMING
|
||||
|
@ -71,9 +71,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.pvr = mfspr(SPRN_PVR);
|
||||
vcpu_e500->svr = mfspr(SPRN_SVR);
|
||||
|
||||
/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
|
||||
vcpu->vcpu_id = 0;
|
||||
|
||||
vcpu->arch.cpu_type = KVM_CPU_E500V2;
|
||||
|
||||
return 0;
|
||||
@ -118,12 +115,12 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
|
||||
sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
|
||||
sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
|
||||
|
||||
sregs->u.e.mas0 = vcpu_e500->mas0;
|
||||
sregs->u.e.mas1 = vcpu_e500->mas1;
|
||||
sregs->u.e.mas2 = vcpu_e500->mas2;
|
||||
sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
|
||||
sregs->u.e.mas4 = vcpu_e500->mas4;
|
||||
sregs->u.e.mas6 = vcpu_e500->mas6;
|
||||
sregs->u.e.mas0 = vcpu->arch.shared->mas0;
|
||||
sregs->u.e.mas1 = vcpu->arch.shared->mas1;
|
||||
sregs->u.e.mas2 = vcpu->arch.shared->mas2;
|
||||
sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
|
||||
sregs->u.e.mas4 = vcpu->arch.shared->mas4;
|
||||
sregs->u.e.mas6 = vcpu->arch.shared->mas6;
|
||||
|
||||
sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
|
||||
sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
|
||||
@ -151,13 +148,12 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
|
||||
}
|
||||
|
||||
if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
|
||||
vcpu_e500->mas0 = sregs->u.e.mas0;
|
||||
vcpu_e500->mas1 = sregs->u.e.mas1;
|
||||
vcpu_e500->mas2 = sregs->u.e.mas2;
|
||||
vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
|
||||
vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
|
||||
vcpu_e500->mas4 = sregs->u.e.mas4;
|
||||
vcpu_e500->mas6 = sregs->u.e.mas6;
|
||||
vcpu->arch.shared->mas0 = sregs->u.e.mas0;
|
||||
vcpu->arch.shared->mas1 = sregs->u.e.mas1;
|
||||
vcpu->arch.shared->mas2 = sregs->u.e.mas2;
|
||||
vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
|
||||
vcpu->arch.shared->mas4 = sregs->u.e.mas4;
|
||||
vcpu->arch.shared->mas6 = sregs->u.e.mas6;
|
||||
}
|
||||
|
||||
if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
|
||||
@ -233,6 +229,10 @@ static int __init kvmppc_e500_init(void)
|
||||
unsigned long ivor[3];
|
||||
unsigned long max_ivor = 0;
|
||||
|
||||
r = kvmppc_core_check_processor_compat();
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = kvmppc_booke_init();
|
||||
if (r)
|
||||
return r;
|
||||
|
@ -89,19 +89,23 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
return EMULATE_FAIL;
|
||||
vcpu_e500->pid[2] = spr_val; break;
|
||||
case SPRN_MAS0:
|
||||
vcpu_e500->mas0 = spr_val; break;
|
||||
vcpu->arch.shared->mas0 = spr_val; break;
|
||||
case SPRN_MAS1:
|
||||
vcpu_e500->mas1 = spr_val; break;
|
||||
vcpu->arch.shared->mas1 = spr_val; break;
|
||||
case SPRN_MAS2:
|
||||
vcpu_e500->mas2 = spr_val; break;
|
||||
vcpu->arch.shared->mas2 = spr_val; break;
|
||||
case SPRN_MAS3:
|
||||
vcpu_e500->mas3 = spr_val; break;
|
||||
vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff;
|
||||
vcpu->arch.shared->mas7_3 |= spr_val;
|
||||
break;
|
||||
case SPRN_MAS4:
|
||||
vcpu_e500->mas4 = spr_val; break;
|
||||
vcpu->arch.shared->mas4 = spr_val; break;
|
||||
case SPRN_MAS6:
|
||||
vcpu_e500->mas6 = spr_val; break;
|
||||
vcpu->arch.shared->mas6 = spr_val; break;
|
||||
case SPRN_MAS7:
|
||||
vcpu_e500->mas7 = spr_val; break;
|
||||
vcpu->arch.shared->mas7_3 &= (u64)0xffffffff;
|
||||
vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32;
|
||||
break;
|
||||
case SPRN_L1CSR0:
|
||||
vcpu_e500->l1csr0 = spr_val;
|
||||
vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
|
||||
@ -143,6 +147,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
int emulated = EMULATE_DONE;
|
||||
unsigned long val;
|
||||
|
||||
switch (sprn) {
|
||||
case SPRN_PID:
|
||||
@ -152,20 +157,23 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
||||
case SPRN_PID2:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
|
||||
case SPRN_MAS0:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break;
|
||||
case SPRN_MAS1:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break;
|
||||
case SPRN_MAS2:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break;
|
||||
case SPRN_MAS3:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
|
||||
val = (u32)vcpu->arch.shared->mas7_3;
|
||||
kvmppc_set_gpr(vcpu, rt, val);
|
||||
break;
|
||||
case SPRN_MAS4:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break;
|
||||
case SPRN_MAS6:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break;
|
||||
case SPRN_MAS7:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
|
||||
|
||||
val = vcpu->arch.shared->mas7_3 >> 32;
|
||||
kvmppc_set_gpr(vcpu, rt, val);
|
||||
break;
|
||||
case SPRN_TLB0CFG:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
|
||||
case SPRN_TLB1CFG:
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -20,13 +20,9 @@
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/kvm_e500.h>
|
||||
|
||||
#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */
|
||||
#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
|
||||
#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1)
|
||||
|
||||
#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */
|
||||
#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT)
|
||||
#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1)
|
||||
/* This geometry is the legacy default -- can be overridden by userspace */
|
||||
#define KVM_E500_TLB0_WAY_SIZE 128
|
||||
#define KVM_E500_TLB0_WAY_NUM 2
|
||||
|
||||
#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
|
||||
#define KVM_E500_TLB1_SIZE 16
|
||||
@ -58,50 +54,54 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
|
||||
extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
|
||||
|
||||
/* TLB helper functions */
|
||||
static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
|
||||
static inline unsigned int
|
||||
get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
return (tlbe->mas1 >> 7) & 0x1f;
|
||||
}
|
||||
|
||||
static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
|
||||
static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
return tlbe->mas2 & 0xfffff000;
|
||||
}
|
||||
|
||||
static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
|
||||
static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
unsigned int pgsize = get_tlb_size(tlbe);
|
||||
return 1ULL << 10 << pgsize;
|
||||
}
|
||||
|
||||
static inline gva_t get_tlb_end(const struct tlbe *tlbe)
|
||||
static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
u64 bytes = get_tlb_bytes(tlbe);
|
||||
return get_tlb_eaddr(tlbe) + bytes - 1;
|
||||
}
|
||||
|
||||
static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
|
||||
static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
u64 rpn = tlbe->mas7;
|
||||
return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
|
||||
return tlbe->mas7_3 & ~0xfffULL;
|
||||
}
|
||||
|
||||
static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
|
||||
static inline unsigned int
|
||||
get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
return (tlbe->mas1 >> 16) & 0xff;
|
||||
}
|
||||
|
||||
static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
|
||||
static inline unsigned int
|
||||
get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
return (tlbe->mas1 >> 12) & 0x1;
|
||||
}
|
||||
|
||||
static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
|
||||
static inline unsigned int
|
||||
get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
return (tlbe->mas1 >> 31) & 0x1;
|
||||
}
|
||||
|
||||
static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
|
||||
static inline unsigned int
|
||||
get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
return (tlbe->mas1 >> 30) & 0x1;
|
||||
}
|
||||
@ -121,59 +121,37 @@ static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
|
||||
return !!(vcpu->arch.shared->msr & MSR_PR);
|
||||
}
|
||||
|
||||
static inline unsigned int get_cur_spid(
|
||||
const struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (vcpu_e500->mas6 >> 16) & 0xff;
|
||||
return (vcpu->arch.shared->mas6 >> 16) & 0xff;
|
||||
}
|
||||
|
||||
static inline unsigned int get_cur_sas(
|
||||
const struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu_e500->mas6 & 0x1;
|
||||
return vcpu->arch.shared->mas6 & 0x1;
|
||||
}
|
||||
|
||||
static inline unsigned int get_tlb_tlbsel(
|
||||
const struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Manual says that tlbsel has 2 bits wide.
|
||||
* Since we only have two TLBs, only lower bit is used.
|
||||
*/
|
||||
return (vcpu_e500->mas0 >> 28) & 0x1;
|
||||
return (vcpu->arch.shared->mas0 >> 28) & 0x1;
|
||||
}
|
||||
|
||||
static inline unsigned int get_tlb_nv_bit(
|
||||
const struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu_e500->mas0 & 0xfff;
|
||||
return vcpu->arch.shared->mas0 & 0xfff;
|
||||
}
|
||||
|
||||
static inline unsigned int get_tlb_esel_bit(
|
||||
const struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (vcpu_e500->mas0 >> 16) & 0xfff;
|
||||
}
|
||||
|
||||
static inline unsigned int get_tlb_esel(
|
||||
const struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
int tlbsel)
|
||||
{
|
||||
unsigned int esel = get_tlb_esel_bit(vcpu_e500);
|
||||
|
||||
if (tlbsel == 0) {
|
||||
esel &= KVM_E500_TLB0_WAY_NUM_MASK;
|
||||
esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
|
||||
<< KVM_E500_TLB0_WAY_NUM_BIT;
|
||||
} else {
|
||||
esel &= KVM_E500_TLB1_SIZE - 1;
|
||||
}
|
||||
|
||||
return esel;
|
||||
return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
|
||||
}
|
||||
|
||||
static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
|
||||
const struct tlbe *tlbe)
|
||||
const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
gpa_t gpa;
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*
|
||||
* Copyright IBM Corp. 2007
|
||||
* Copyright 2011 Freescale Semiconductor, Inc.
|
||||
*
|
||||
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
|
||||
*/
|
||||
@ -69,54 +70,55 @@
|
||||
#define OP_STH 44
|
||||
#define OP_STHU 45
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.tcr & TCR_DIE;
|
||||
}
|
||||
#endif
|
||||
|
||||
void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long dec_nsec;
|
||||
unsigned long long dec_time;
|
||||
|
||||
pr_debug("mtDEC: %x\n", vcpu->arch.dec);
|
||||
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
/* mtdec lowers the interrupt line when positive. */
|
||||
kvmppc_core_dequeue_dec(vcpu);
|
||||
|
||||
/* POWER4+ triggers a dec interrupt if the value is < 0 */
|
||||
if (vcpu->arch.dec & 0x80000000) {
|
||||
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
|
||||
kvmppc_core_queue_dec(vcpu);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (kvmppc_dec_enabled(vcpu)) {
|
||||
/* The decrementer ticks at the same rate as the timebase, so
|
||||
* that's how we convert the guest DEC value to the number of
|
||||
* host ticks. */
|
||||
|
||||
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
|
||||
dec_nsec = vcpu->arch.dec;
|
||||
dec_nsec *= 1000;
|
||||
dec_nsec /= tb_ticks_per_usec;
|
||||
hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
|
||||
HRTIMER_MODE_REL);
|
||||
vcpu->arch.dec_jiffies = get_tb();
|
||||
} else {
|
||||
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
|
||||
}
|
||||
#ifdef CONFIG_BOOKE
|
||||
/* On BOOKE, DEC = 0 is as good as decrementer not enabled */
|
||||
if (vcpu->arch.dec == 0)
|
||||
return;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The decrementer ticks at the same rate as the timebase, so
|
||||
* that's how we convert the guest DEC value to the number of
|
||||
* host ticks.
|
||||
*/
|
||||
|
||||
dec_time = vcpu->arch.dec;
|
||||
dec_time *= 1000;
|
||||
do_div(dec_time, tb_ticks_per_usec);
|
||||
dec_nsec = do_div(dec_time, NSEC_PER_SEC);
|
||||
hrtimer_start(&vcpu->arch.dec_timer,
|
||||
ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
|
||||
vcpu->arch.dec_jiffies = get_tb();
|
||||
}
|
||||
|
||||
u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
|
||||
{
|
||||
u64 jd = tb - vcpu->arch.dec_jiffies;
|
||||
|
||||
#ifdef CONFIG_BOOKE
|
||||
if (vcpu->arch.dec < jd)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
return vcpu->arch.dec - jd;
|
||||
}
|
||||
|
||||
@ -159,7 +161,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
case OP_TRAP_64:
|
||||
kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
|
||||
#else
|
||||
kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR);
|
||||
kvmppc_core_queue_program(vcpu,
|
||||
vcpu->arch.shared->esr | ESR_PTR);
|
||||
#endif
|
||||
advance = 0;
|
||||
break;
|
||||
|
@ -39,7 +39,8 @@
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
{
|
||||
return !(v->arch.shared->msr & MSR_WE) ||
|
||||
!!(v->arch.pending_exceptions);
|
||||
!!(v->arch.pending_exceptions) ||
|
||||
v->requests;
|
||||
}
|
||||
|
||||
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
|
||||
@ -66,7 +67,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.magic_page_pa = param1;
|
||||
vcpu->arch.magic_page_ea = param2;
|
||||
|
||||
r2 = KVM_MAGIC_FEAT_SR;
|
||||
r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
|
||||
|
||||
r = HC_EV_SUCCESS;
|
||||
break;
|
||||
@ -171,8 +172,11 @@ void kvm_arch_check_processor_compat(void *rtn)
|
||||
*(int *)rtn = kvmppc_core_check_processor_compat();
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm)
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
if (type)
|
||||
return -EINVAL;
|
||||
|
||||
return kvmppc_core_init_vm(kvm);
|
||||
}
|
||||
|
||||
@ -208,17 +212,22 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
case KVM_CAP_PPC_BOOKE_SREGS:
|
||||
#else
|
||||
case KVM_CAP_PPC_SEGSTATE:
|
||||
case KVM_CAP_PPC_HIOR:
|
||||
case KVM_CAP_PPC_PAPR:
|
||||
#endif
|
||||
case KVM_CAP_PPC_UNSET_IRQ:
|
||||
case KVM_CAP_PPC_IRQ_LEVEL:
|
||||
case KVM_CAP_ENABLE_CAP:
|
||||
case KVM_CAP_ONE_REG:
|
||||
r = 1;
|
||||
break;
|
||||
#ifndef CONFIG_KVM_BOOK3S_64_HV
|
||||
case KVM_CAP_PPC_PAIRED_SINGLES:
|
||||
case KVM_CAP_PPC_OSI:
|
||||
case KVM_CAP_PPC_GET_PVINFO:
|
||||
#ifdef CONFIG_KVM_E500
|
||||
case KVM_CAP_SW_TLB:
|
||||
#endif
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_COALESCED_MMIO:
|
||||
@ -238,7 +247,26 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_201))
|
||||
r = 2;
|
||||
break;
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
|
||||
break;
|
||||
#endif
|
||||
case KVM_CAP_NR_VCPUS:
|
||||
/*
|
||||
* Recommending a number of CPUs is somewhat arbitrary; we
|
||||
* return the number of present CPUs for -HV (since a host
|
||||
* will have secondary threads "offline"), and for other KVM
|
||||
* implementations just count online CPUs.
|
||||
*/
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
r = num_present_cpus();
|
||||
#else
|
||||
r = num_online_cpus();
|
||||
#endif
|
||||
break;
|
||||
case KVM_CAP_MAX_VCPUS:
|
||||
r = KVM_MAX_VCPUS;
|
||||
break;
|
||||
default:
|
||||
r = 0;
|
||||
break;
|
||||
@ -253,6 +281,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
}
|
||||
|
||||
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_memory_slot old,
|
||||
@ -279,9 +317,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
vcpu = kvmppc_core_vcpu_create(kvm, id);
|
||||
vcpu->arch.wqp = &vcpu->wq;
|
||||
if (!IS_ERR(vcpu))
|
||||
if (!IS_ERR(vcpu)) {
|
||||
vcpu->arch.wqp = &vcpu->wq;
|
||||
kvmppc_create_vcpu_debugfs(vcpu, id);
|
||||
}
|
||||
return vcpu;
|
||||
}
|
||||
|
||||
@ -305,18 +344,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
return kvmppc_core_pending_dec(vcpu);
|
||||
}
|
||||
|
||||
static void kvmppc_decrementer_func(unsigned long data)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
|
||||
|
||||
kvmppc_core_queue_dec(vcpu);
|
||||
|
||||
if (waitqueue_active(vcpu->arch.wqp)) {
|
||||
wake_up_interruptible(vcpu->arch.wqp);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* low level hrtimer wake routine. Because this runs in hardirq context
|
||||
* we schedule a tasklet to do the real work.
|
||||
@ -431,20 +458,20 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
|
||||
|
||||
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
|
||||
|
||||
switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) {
|
||||
case KVM_REG_GPR:
|
||||
switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
|
||||
case KVM_MMIO_REG_GPR:
|
||||
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
|
||||
break;
|
||||
case KVM_REG_FPR:
|
||||
vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
|
||||
case KVM_MMIO_REG_FPR:
|
||||
vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
|
||||
break;
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
case KVM_REG_QPR:
|
||||
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
|
||||
case KVM_MMIO_REG_QPR:
|
||||
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
|
||||
break;
|
||||
case KVM_REG_FQPR:
|
||||
vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
|
||||
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
|
||||
case KVM_MMIO_REG_FQPR:
|
||||
vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
|
||||
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
@ -553,8 +580,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
vcpu->arch.hcall_needed = 0;
|
||||
}
|
||||
|
||||
kvmppc_core_deliver_interrupts(vcpu);
|
||||
|
||||
r = kvmppc_vcpu_run(run, vcpu);
|
||||
|
||||
if (vcpu->sigset_active)
|
||||
@ -563,6 +588,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int me;
|
||||
int cpu = vcpu->cpu;
|
||||
|
||||
me = get_cpu();
|
||||
if (waitqueue_active(vcpu->arch.wqp)) {
|
||||
wake_up_interruptible(vcpu->arch.wqp);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
} else if (cpu != me && cpu != -1) {
|
||||
smp_send_reschedule(vcpu->cpu);
|
||||
}
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
|
||||
{
|
||||
if (irq->irq == KVM_INTERRUPT_UNSET) {
|
||||
@ -571,13 +611,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
|
||||
}
|
||||
|
||||
kvmppc_core_queue_external(vcpu, irq);
|
||||
|
||||
if (waitqueue_active(vcpu->arch.wqp)) {
|
||||
wake_up_interruptible(vcpu->arch.wqp);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
} else if (vcpu->cpu != -1) {
|
||||
smp_send_reschedule(vcpu->cpu);
|
||||
}
|
||||
kvm_vcpu_kick(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -599,6 +633,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
||||
r = 0;
|
||||
vcpu->arch.papr_enabled = true;
|
||||
break;
|
||||
#ifdef CONFIG_KVM_E500
|
||||
case KVM_CAP_SW_TLB: {
|
||||
struct kvm_config_tlb cfg;
|
||||
void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cfg, user_ptr, sizeof(cfg)))
|
||||
break;
|
||||
|
||||
r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
@ -648,6 +695,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
|
||||
break;
|
||||
}
|
||||
|
||||
case KVM_SET_ONE_REG:
|
||||
case KVM_GET_ONE_REG:
|
||||
{
|
||||
struct kvm_one_reg reg;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(®, argp, sizeof(reg)))
|
||||
goto out;
|
||||
if (ioctl == KVM_SET_ONE_REG)
|
||||
r = kvm_vcpu_ioctl_set_one_reg(vcpu, ®);
|
||||
else
|
||||
r = kvm_vcpu_ioctl_get_one_reg(vcpu, ®);
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_E500
|
||||
case KVM_DIRTY_TLB: {
|
||||
struct kvm_dirty_tlb dirty;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&dirty, argp, sizeof(dirty)))
|
||||
goto out;
|
||||
r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
@ -656,6 +729,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
|
||||
{
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
|
||||
{
|
||||
u32 inst_lis = 0x3c000000;
|
||||
|
@ -118,11 +118,14 @@ TRACE_EVENT(kvm_book3s_exit,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu;
|
||||
__entry->exit_nr = exit_nr;
|
||||
__entry->pc = kvmppc_get_pc(vcpu);
|
||||
__entry->dar = kvmppc_get_fault_dar(vcpu);
|
||||
__entry->msr = vcpu->arch.shared->msr;
|
||||
__entry->srr1 = to_svcpu(vcpu)->shadow_srr1;
|
||||
svcpu = svcpu_get(vcpu);
|
||||
__entry->srr1 = svcpu->shadow_srr1;
|
||||
svcpu_put(svcpu);
|
||||
),
|
||||
|
||||
TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
|
||||
@ -337,6 +340,63 @@ TRACE_EVENT(kvm_book3s_slbmte,
|
||||
|
||||
#endif /* CONFIG_PPC_BOOK3S */
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* Book3E trace points *
|
||||
*************************************************************************/
|
||||
|
||||
#ifdef CONFIG_BOOKE
|
||||
|
||||
TRACE_EVENT(kvm_booke206_stlb_write,
|
||||
TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
|
||||
TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u32, mas0 )
|
||||
__field( __u32, mas8 )
|
||||
__field( __u32, mas1 )
|
||||
__field( __u64, mas2 )
|
||||
__field( __u64, mas7_3 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->mas0 = mas0;
|
||||
__entry->mas8 = mas8;
|
||||
__entry->mas1 = mas1;
|
||||
__entry->mas2 = mas2;
|
||||
__entry->mas7_3 = mas7_3;
|
||||
),
|
||||
|
||||
TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
|
||||
__entry->mas0, __entry->mas8, __entry->mas1,
|
||||
__entry->mas2, __entry->mas7_3)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_booke206_gtlb_write,
|
||||
TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
|
||||
TP_ARGS(mas0, mas1, mas2, mas7_3),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u32, mas0 )
|
||||
__field( __u32, mas1 )
|
||||
__field( __u64, mas2 )
|
||||
__field( __u64, mas7_3 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->mas0 = mas0;
|
||||
__entry->mas1 = mas1;
|
||||
__entry->mas2 = mas2;
|
||||
__entry->mas7_3 = mas7_3;
|
||||
),
|
||||
|
||||
TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
|
||||
__entry->mas0, __entry->mas1,
|
||||
__entry->mas2, __entry->mas7_3)
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/io.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/of_fdt.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/bootmem.h>
|
||||
@ -103,6 +104,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
|
||||
*shift = hugepd_shift(*hpdp);
|
||||
return hugepte_offset(hpdp, ea, pdshift);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
|
@ -41,4 +41,15 @@ struct kvm_debug_exit_arch {
|
||||
struct kvm_guest_debug_arch {
|
||||
};
|
||||
|
||||
#define KVM_SYNC_PREFIX (1UL << 0)
|
||||
#define KVM_SYNC_GPRS (1UL << 1)
|
||||
#define KVM_SYNC_ACRS (1UL << 2)
|
||||
#define KVM_SYNC_CRS (1UL << 3)
|
||||
/* definition of registers in kvm_run */
|
||||
struct kvm_sync_regs {
|
||||
__u64 prefix; /* prefix register */
|
||||
__u64 gprs[16]; /* general purpose registers */
|
||||
__u32 acrs[16]; /* access registers */
|
||||
__u64 crs[16]; /* control registers */
|
||||
};
|
||||
#endif
|
||||
|
@ -220,18 +220,17 @@ struct kvm_s390_float_interrupt {
|
||||
struct list_head list;
|
||||
atomic_t active;
|
||||
int next_rr_cpu;
|
||||
unsigned long idle_mask [(64 + sizeof(long) - 1) / sizeof(long)];
|
||||
struct kvm_s390_local_interrupt *local_int[64];
|
||||
unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
|
||||
/ sizeof(long)];
|
||||
struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
|
||||
};
|
||||
|
||||
|
||||
struct kvm_vcpu_arch {
|
||||
struct kvm_s390_sie_block *sie_block;
|
||||
unsigned long guest_gprs[16];
|
||||
s390_fp_regs host_fpregs;
|
||||
unsigned int host_acrs[NUM_ACRS];
|
||||
s390_fp_regs guest_fpregs;
|
||||
unsigned int guest_acrs[NUM_ACRS];
|
||||
struct kvm_s390_local_interrupt local_int;
|
||||
struct hrtimer ckc_timer;
|
||||
struct tasklet_struct tasklet;
|
||||
@ -246,6 +245,9 @@ struct kvm_vm_stat {
|
||||
u32 remote_tlb_flush;
|
||||
};
|
||||
|
||||
struct kvm_arch_memory_slot {
|
||||
};
|
||||
|
||||
struct kvm_arch{
|
||||
struct sca_block *sca;
|
||||
debug_info_t *dbf;
|
||||
@ -253,5 +255,5 @@ struct kvm_arch{
|
||||
struct gmap *gmap;
|
||||
};
|
||||
|
||||
extern int sie64a(struct kvm_s390_sie_block *, unsigned long *);
|
||||
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
|
||||
#endif
|
||||
|
@ -34,6 +34,15 @@ config KVM
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config KVM_S390_UCONTROL
|
||||
bool "Userspace controlled virtual machines"
|
||||
depends on KVM
|
||||
---help---
|
||||
Allow CAP_SYS_ADMIN users to create KVM virtual machines that are
|
||||
controlled by userspace.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
|
||||
# the virtualization menu.
|
||||
source drivers/vhost/Kconfig
|
||||
|
@ -20,8 +20,8 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
|
||||
unsigned long start, end;
|
||||
unsigned long prefix = vcpu->arch.sie_block->prefix;
|
||||
|
||||
start = vcpu->arch.guest_gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
|
||||
end = vcpu->arch.guest_gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
|
||||
start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
|
||||
end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
|
||||
|
||||
if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end
|
||||
|| start < 2 * PAGE_SIZE)
|
||||
@ -56,7 +56,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
|
||||
static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
|
||||
unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff;
|
||||
unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
|
||||
|
||||
VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
|
||||
switch (subcode) {
|
||||
|
@ -36,7 +36,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
|
||||
|
||||
useraddr = disp2;
|
||||
if (base2)
|
||||
useraddr += vcpu->arch.guest_gprs[base2];
|
||||
useraddr += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
if (useraddr & 7)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
@ -75,7 +75,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
|
||||
|
||||
useraddr = disp2;
|
||||
if (base2)
|
||||
useraddr += vcpu->arch.guest_gprs[base2];
|
||||
useraddr += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
if (useraddr & 3)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
@ -133,13 +133,6 @@ static int handle_stop(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->stat.exit_stop_request++;
|
||||
spin_lock_bh(&vcpu->arch.local_int.lock);
|
||||
if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
|
||||
vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
|
||||
rc = kvm_s390_vcpu_store_status(vcpu,
|
||||
KVM_S390_STORE_STATUS_NOADDR);
|
||||
if (rc >= 0)
|
||||
rc = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
|
||||
vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
|
||||
@ -155,7 +148,18 @@ static int handle_stop(struct kvm_vcpu *vcpu)
|
||||
rc = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
spin_unlock_bh(&vcpu->arch.local_int.lock);
|
||||
if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
|
||||
vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
|
||||
/* store status must be called unlocked. Since local_int.lock
|
||||
* only protects local_int.* and not guest memory we can give
|
||||
* up the lock here */
|
||||
spin_unlock_bh(&vcpu->arch.local_int.lock);
|
||||
rc = kvm_s390_vcpu_store_status(vcpu,
|
||||
KVM_S390_STORE_STATUS_NOADDR);
|
||||
if (rc >= 0)
|
||||
rc = -EOPNOTSUPP;
|
||||
} else
|
||||
spin_unlock_bh(&vcpu->arch.local_int.lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -236,8 +236,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
|
||||
VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
|
||||
inti->prefix.address);
|
||||
vcpu->stat.deliver_prefix_signal++;
|
||||
vcpu->arch.sie_block->prefix = inti->prefix.address;
|
||||
vcpu->arch.sie_block->ihcpu = 0xffff;
|
||||
kvm_s390_set_prefix(vcpu, inti->prefix.address);
|
||||
break;
|
||||
|
||||
case KVM_S390_RESTART:
|
||||
|
@ -129,6 +129,10 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
case KVM_CAP_S390_PSW:
|
||||
case KVM_CAP_S390_GMAP:
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
#ifdef CONFIG_KVM_S390_UCONTROL
|
||||
case KVM_CAP_S390_UCONTROL:
|
||||
#endif
|
||||
case KVM_CAP_SYNC_REGS:
|
||||
r = 1;
|
||||
break;
|
||||
default:
|
||||
@ -171,11 +175,22 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm)
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
int rc;
|
||||
char debug_name[16];
|
||||
|
||||
rc = -EINVAL;
|
||||
#ifdef CONFIG_KVM_S390_UCONTROL
|
||||
if (type & ~KVM_VM_S390_UCONTROL)
|
||||
goto out_err;
|
||||
if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
|
||||
goto out_err;
|
||||
#else
|
||||
if (type)
|
||||
goto out_err;
|
||||
#endif
|
||||
|
||||
rc = s390_enable_sie();
|
||||
if (rc)
|
||||
goto out_err;
|
||||
@ -198,10 +213,13 @@ int kvm_arch_init_vm(struct kvm *kvm)
|
||||
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
|
||||
VM_EVENT(kvm, 3, "%s", "vm created");
|
||||
|
||||
kvm->arch.gmap = gmap_alloc(current->mm);
|
||||
if (!kvm->arch.gmap)
|
||||
goto out_nogmap;
|
||||
|
||||
if (type & KVM_VM_S390_UCONTROL) {
|
||||
kvm->arch.gmap = NULL;
|
||||
} else {
|
||||
kvm->arch.gmap = gmap_alloc(current->mm);
|
||||
if (!kvm->arch.gmap)
|
||||
goto out_nogmap;
|
||||
}
|
||||
return 0;
|
||||
out_nogmap:
|
||||
debug_unregister(kvm->arch.dbf);
|
||||
@ -214,11 +232,18 @@ int kvm_arch_init_vm(struct kvm *kvm)
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
|
||||
clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn);
|
||||
if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
|
||||
(__u64) vcpu->arch.sie_block)
|
||||
vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
|
||||
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
||||
clear_bit(63 - vcpu->vcpu_id,
|
||||
(unsigned long *) &vcpu->kvm->arch.sca->mcn);
|
||||
if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
|
||||
(__u64) vcpu->arch.sie_block)
|
||||
vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
|
||||
}
|
||||
smp_mb();
|
||||
|
||||
if (kvm_is_ucontrol(vcpu->kvm))
|
||||
gmap_free(vcpu->arch.gmap);
|
||||
|
||||
free_page((unsigned long)(vcpu->arch.sie_block));
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kfree(vcpu);
|
||||
@ -249,13 +274,25 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
kvm_free_vcpus(kvm);
|
||||
free_page((unsigned long)(kvm->arch.sca));
|
||||
debug_unregister(kvm->arch.dbf);
|
||||
gmap_free(kvm->arch.gmap);
|
||||
if (!kvm_is_ucontrol(kvm))
|
||||
gmap_free(kvm->arch.gmap);
|
||||
}
|
||||
|
||||
/* Section: vcpu related */
|
||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_is_ucontrol(vcpu->kvm)) {
|
||||
vcpu->arch.gmap = gmap_alloc(current->mm);
|
||||
if (!vcpu->arch.gmap)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
|
||||
vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
|
||||
KVM_SYNC_GPRS |
|
||||
KVM_SYNC_ACRS |
|
||||
KVM_SYNC_CRS;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -270,7 +307,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
save_access_regs(vcpu->arch.host_acrs);
|
||||
vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
|
||||
restore_fp_regs(&vcpu->arch.guest_fpregs);
|
||||
restore_access_regs(vcpu->arch.guest_acrs);
|
||||
restore_access_regs(vcpu->run->s.regs.acrs);
|
||||
gmap_enable(vcpu->arch.gmap);
|
||||
atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
|
||||
}
|
||||
@ -280,7 +317,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
|
||||
gmap_disable(vcpu->arch.gmap);
|
||||
save_fp_regs(&vcpu->arch.guest_fpregs);
|
||||
save_access_regs(vcpu->arch.guest_acrs);
|
||||
save_access_regs(vcpu->run->s.regs.acrs);
|
||||
restore_fp_regs(&vcpu->arch.host_fpregs);
|
||||
restore_access_regs(vcpu->arch.host_acrs);
|
||||
}
|
||||
@ -290,8 +327,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
|
||||
/* this equals initial cpu reset in pop, but we don't switch to ESA */
|
||||
vcpu->arch.sie_block->gpsw.mask = 0UL;
|
||||
vcpu->arch.sie_block->gpsw.addr = 0UL;
|
||||
vcpu->arch.sie_block->prefix = 0UL;
|
||||
vcpu->arch.sie_block->ihcpu = 0xffff;
|
||||
kvm_s390_set_prefix(vcpu, 0);
|
||||
vcpu->arch.sie_block->cputm = 0UL;
|
||||
vcpu->arch.sie_block->ckc = 0UL;
|
||||
vcpu->arch.sie_block->todpr = 0;
|
||||
@ -342,12 +378,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
||||
goto out_free_cpu;
|
||||
|
||||
vcpu->arch.sie_block->icpua = id;
|
||||
BUG_ON(!kvm->arch.sca);
|
||||
if (!kvm->arch.sca->cpu[id].sda)
|
||||
kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
|
||||
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
|
||||
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
|
||||
set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
|
||||
if (!kvm_is_ucontrol(kvm)) {
|
||||
if (!kvm->arch.sca) {
|
||||
WARN_ON_ONCE(1);
|
||||
goto out_free_cpu;
|
||||
}
|
||||
if (!kvm->arch.sca->cpu[id].sda)
|
||||
kvm->arch.sca->cpu[id].sda =
|
||||
(__u64) vcpu->arch.sie_block;
|
||||
vcpu->arch.sie_block->scaoh =
|
||||
(__u32)(((__u64)kvm->arch.sca) >> 32);
|
||||
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
|
||||
set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
|
||||
}
|
||||
|
||||
spin_lock_init(&vcpu->arch.local_int.lock);
|
||||
INIT_LIST_HEAD(&vcpu->arch.local_int.list);
|
||||
@ -388,29 +431,29 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
{
|
||||
memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
|
||||
memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
{
|
||||
memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
|
||||
memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
struct kvm_sregs *sregs)
|
||||
{
|
||||
memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
|
||||
memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
|
||||
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
|
||||
restore_access_regs(vcpu->arch.guest_acrs);
|
||||
restore_access_regs(vcpu->run->s.regs.acrs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
|
||||
struct kvm_sregs *sregs)
|
||||
{
|
||||
memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
|
||||
memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
|
||||
memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
|
||||
return 0;
|
||||
}
|
||||
@ -418,7 +461,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
|
||||
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
{
|
||||
memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
|
||||
vcpu->arch.guest_fpregs.fpc = fpu->fpc;
|
||||
vcpu->arch.guest_fpregs.fpc = fpu->fpc & FPC_VALID_MASK;
|
||||
restore_fp_regs(&vcpu->arch.guest_fpregs);
|
||||
return 0;
|
||||
}
|
||||
@ -467,9 +510,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL; /* not implemented yet */
|
||||
}
|
||||
|
||||
static void __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
|
||||
int rc;
|
||||
|
||||
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
|
||||
|
||||
if (need_resched())
|
||||
schedule();
|
||||
@ -477,7 +522,8 @@ static void __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
if (test_thread_flag(TIF_MCCK_PENDING))
|
||||
s390_handle_mcck();
|
||||
|
||||
kvm_s390_deliver_pending_interrupts(vcpu);
|
||||
if (!kvm_is_ucontrol(vcpu->kvm))
|
||||
kvm_s390_deliver_pending_interrupts(vcpu);
|
||||
|
||||
vcpu->arch.sie_block->icptcode = 0;
|
||||
local_irq_disable();
|
||||
@ -485,9 +531,15 @@ static void __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
local_irq_enable();
|
||||
VCPU_EVENT(vcpu, 6, "entering sie flags %x",
|
||||
atomic_read(&vcpu->arch.sie_block->cpuflags));
|
||||
if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
|
||||
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
|
||||
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
|
||||
if (rc) {
|
||||
if (kvm_is_ucontrol(vcpu->kvm)) {
|
||||
rc = SIE_INTERCEPT_UCONTROL;
|
||||
} else {
|
||||
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
|
||||
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
rc = 0;
|
||||
}
|
||||
}
|
||||
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
|
||||
vcpu->arch.sie_block->icptcode);
|
||||
@ -495,7 +547,8 @@ static void __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
kvm_guest_exit();
|
||||
local_irq_enable();
|
||||
|
||||
memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
|
||||
memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
@ -516,6 +569,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
case KVM_EXIT_UNKNOWN:
|
||||
case KVM_EXIT_INTR:
|
||||
case KVM_EXIT_S390_RESET:
|
||||
case KVM_EXIT_S390_UCONTROL:
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
@ -523,12 +577,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
|
||||
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
|
||||
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
|
||||
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
|
||||
kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
|
||||
kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
|
||||
}
|
||||
if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
|
||||
kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
|
||||
memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
|
||||
kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
|
||||
}
|
||||
|
||||
might_fault();
|
||||
|
||||
do {
|
||||
__vcpu_run(vcpu);
|
||||
rc = kvm_handle_sie_intercept(vcpu);
|
||||
rc = __vcpu_run(vcpu);
|
||||
if (rc)
|
||||
break;
|
||||
if (kvm_is_ucontrol(vcpu->kvm))
|
||||
rc = -EOPNOTSUPP;
|
||||
else
|
||||
rc = kvm_handle_sie_intercept(vcpu);
|
||||
} while (!signal_pending(current) && !rc);
|
||||
|
||||
if (rc == SIE_INTERCEPT_RERUNVCPU)
|
||||
@ -539,6 +607,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
rc = -EINTR;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_S390_UCONTROL
|
||||
if (rc == SIE_INTERCEPT_UCONTROL) {
|
||||
kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL;
|
||||
kvm_run->s390_ucontrol.trans_exc_code =
|
||||
current->thread.gmap_addr;
|
||||
kvm_run->s390_ucontrol.pgm_code = 0x10;
|
||||
rc = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (rc == -EOPNOTSUPP) {
|
||||
/* intercept cannot be handled in-kernel, prepare kvm-run */
|
||||
kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
|
||||
@ -556,6 +634,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
|
||||
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
|
||||
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
|
||||
kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
|
||||
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
|
||||
|
||||
if (vcpu->sigset_active)
|
||||
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
|
||||
@ -602,7 +682,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
|
||||
return -EFAULT;
|
||||
|
||||
if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
|
||||
vcpu->arch.guest_gprs, 128, prefix))
|
||||
vcpu->run->s.regs.gprs, 128, prefix))
|
||||
return -EFAULT;
|
||||
|
||||
if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
|
||||
@ -631,7 +711,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
|
||||
return -EFAULT;
|
||||
|
||||
if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
|
||||
&vcpu->arch.guest_acrs, 64, prefix))
|
||||
&vcpu->run->s.regs.acrs, 64, prefix))
|
||||
return -EFAULT;
|
||||
|
||||
if (__guestcopy(vcpu,
|
||||
@ -673,12 +753,77 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
case KVM_S390_INITIAL_RESET:
|
||||
r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
|
||||
break;
|
||||
#ifdef CONFIG_KVM_S390_UCONTROL
|
||||
case KVM_S390_UCAS_MAP: {
|
||||
struct kvm_s390_ucas_mapping ucasmap;
|
||||
|
||||
if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
|
||||
ucasmap.vcpu_addr, ucasmap.length);
|
||||
break;
|
||||
}
|
||||
case KVM_S390_UCAS_UNMAP: {
|
||||
struct kvm_s390_ucas_mapping ucasmap;
|
||||
|
||||
if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
|
||||
ucasmap.length);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case KVM_S390_VCPU_FAULT: {
|
||||
r = gmap_fault(arg, vcpu->arch.gmap);
|
||||
if (!IS_ERR_VALUE(r))
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -EINVAL;
|
||||
r = -ENOTTY;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
|
||||
{
|
||||
#ifdef CONFIG_KVM_S390_UCONTROL
|
||||
if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
|
||||
&& (kvm_is_ucontrol(vcpu->kvm))) {
|
||||
vmf->page = virt_to_page(vcpu->arch.sie_block);
|
||||
get_page(vmf->page);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
}
|
||||
|
||||
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Section: memory related */
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
|
@ -26,6 +26,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* negativ values are error codes, positive values for internal conditions */
|
||||
#define SIE_INTERCEPT_RERUNVCPU (1<<0)
|
||||
#define SIE_INTERCEPT_UCONTROL (1<<1)
|
||||
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
|
||||
|
||||
#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
|
||||
@ -47,6 +48,23 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
|
||||
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
|
||||
}
|
||||
|
||||
static inline int kvm_is_ucontrol(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_KVM_S390_UCONTROL
|
||||
if (kvm->arch.gmap)
|
||||
return 0;
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
|
||||
{
|
||||
vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u;
|
||||
vcpu->arch.sie_block->ihcpu = 0xffff;
|
||||
}
|
||||
|
||||
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
|
||||
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
|
||||
void kvm_s390_tasklet(unsigned long parm);
|
||||
|
@ -33,7 +33,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
|
||||
|
||||
operand2 = disp2;
|
||||
if (base2)
|
||||
operand2 += vcpu->arch.guest_gprs[base2];
|
||||
operand2 += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
/* must be word boundary */
|
||||
if (operand2 & 3) {
|
||||
@ -56,8 +56,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
|
||||
goto out;
|
||||
}
|
||||
|
||||
vcpu->arch.sie_block->prefix = address;
|
||||
vcpu->arch.sie_block->ihcpu = 0xffff;
|
||||
kvm_s390_set_prefix(vcpu, address);
|
||||
|
||||
VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
|
||||
out:
|
||||
@ -74,7 +73,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
|
||||
vcpu->stat.instruction_stpx++;
|
||||
operand2 = disp2;
|
||||
if (base2)
|
||||
operand2 += vcpu->arch.guest_gprs[base2];
|
||||
operand2 += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
/* must be word boundary */
|
||||
if (operand2 & 3) {
|
||||
@ -106,7 +105,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
|
||||
vcpu->stat.instruction_stap++;
|
||||
useraddr = disp2;
|
||||
if (base2)
|
||||
useraddr += vcpu->arch.guest_gprs[base2];
|
||||
useraddr += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
if (useraddr & 1) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
@ -181,7 +180,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
|
||||
vcpu->stat.instruction_stidp++;
|
||||
operand2 = disp2;
|
||||
if (base2)
|
||||
operand2 += vcpu->arch.guest_gprs[base2];
|
||||
operand2 += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
if (operand2 & 7) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
@ -232,9 +231,9 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
|
||||
|
||||
static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28;
|
||||
int sel1 = vcpu->arch.guest_gprs[0] & 0xff;
|
||||
int sel2 = vcpu->arch.guest_gprs[1] & 0xffff;
|
||||
int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
|
||||
int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
|
||||
int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u64 operand2;
|
||||
@ -245,14 +244,14 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
|
||||
operand2 = disp2;
|
||||
if (base2)
|
||||
operand2 += vcpu->arch.guest_gprs[base2];
|
||||
operand2 += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
if (operand2 & 0xfff && fc > 0)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
switch (fc) {
|
||||
case 0:
|
||||
vcpu->arch.guest_gprs[0] = 3 << 28;
|
||||
vcpu->run->s.regs.gprs[0] = 3 << 28;
|
||||
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
|
||||
return 0;
|
||||
case 1: /* same handling for 1 and 2 */
|
||||
@ -281,7 +280,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
free_page(mem);
|
||||
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
|
||||
vcpu->arch.guest_gprs[0] = 0;
|
||||
vcpu->run->s.regs.gprs[0] = 0;
|
||||
return 0;
|
||||
out_mem:
|
||||
free_page(mem);
|
||||
@ -333,8 +332,8 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
|
||||
int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
|
||||
int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
|
||||
int disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
|
||||
u64 address1 = disp1 + base1 ? vcpu->arch.guest_gprs[base1] : 0;
|
||||
u64 address2 = disp2 + base2 ? vcpu->arch.guest_gprs[base2] : 0;
|
||||
u64 address1 = disp1 + base1 ? vcpu->run->s.regs.gprs[base1] : 0;
|
||||
u64 address2 = disp2 + base2 ? vcpu->run->s.regs.gprs[base2] : 0;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long user_address;
|
||||
|
||||
|
@ -48,7 +48,7 @@
|
||||
|
||||
|
||||
static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
|
||||
unsigned long *reg)
|
||||
u64 *reg)
|
||||
{
|
||||
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
|
||||
int rc;
|
||||
@ -160,12 +160,15 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
|
||||
inti->type = KVM_S390_SIGP_STOP;
|
||||
|
||||
spin_lock_bh(&li->lock);
|
||||
if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED))
|
||||
goto out;
|
||||
list_add_tail(&inti->list, &li->list);
|
||||
atomic_set(&li->active, 1);
|
||||
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
|
||||
li->action_bits |= action;
|
||||
if (waitqueue_active(&li->wq))
|
||||
wake_up_interruptible(&li->wq);
|
||||
out:
|
||||
spin_unlock_bh(&li->lock);
|
||||
|
||||
return 0; /* order accepted */
|
||||
@ -220,7 +223,7 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
|
||||
}
|
||||
|
||||
static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
|
||||
unsigned long *reg)
|
||||
u64 *reg)
|
||||
{
|
||||
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
|
||||
struct kvm_s390_local_interrupt *li = NULL;
|
||||
@ -278,7 +281,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
|
||||
}
|
||||
|
||||
static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
|
||||
unsigned long *reg)
|
||||
u64 *reg)
|
||||
{
|
||||
int rc;
|
||||
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
|
||||
@ -309,6 +312,34 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr)
|
||||
{
|
||||
int rc = 0;
|
||||
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
|
||||
struct kvm_s390_local_interrupt *li;
|
||||
|
||||
if (cpu_addr >= KVM_MAX_VCPUS)
|
||||
return 3; /* not operational */
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
li = fi->local_int[cpu_addr];
|
||||
if (li == NULL) {
|
||||
rc = 3; /* not operational */
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock_bh(&li->lock);
|
||||
if (li->action_bits & ACTION_STOP_ON_STOP)
|
||||
rc = 2; /* busy */
|
||||
else
|
||||
VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace",
|
||||
cpu_addr);
|
||||
spin_unlock_bh(&li->lock);
|
||||
out:
|
||||
spin_unlock(&fi->lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
|
||||
@ -316,7 +347,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u32 parameter;
|
||||
u16 cpu_addr = vcpu->arch.guest_gprs[r3];
|
||||
u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
|
||||
u8 order_code;
|
||||
int rc;
|
||||
|
||||
@ -327,18 +358,18 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
|
||||
order_code = disp2;
|
||||
if (base2)
|
||||
order_code += vcpu->arch.guest_gprs[base2];
|
||||
order_code += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
if (r1 % 2)
|
||||
parameter = vcpu->arch.guest_gprs[r1];
|
||||
parameter = vcpu->run->s.regs.gprs[r1];
|
||||
else
|
||||
parameter = vcpu->arch.guest_gprs[r1 + 1];
|
||||
parameter = vcpu->run->s.regs.gprs[r1 + 1];
|
||||
|
||||
switch (order_code) {
|
||||
case SIGP_SENSE:
|
||||
vcpu->stat.instruction_sigp_sense++;
|
||||
rc = __sigp_sense(vcpu, cpu_addr,
|
||||
&vcpu->arch.guest_gprs[r1]);
|
||||
&vcpu->run->s.regs.gprs[r1]);
|
||||
break;
|
||||
case SIGP_EXTERNAL_CALL:
|
||||
vcpu->stat.instruction_sigp_external_call++;
|
||||
@ -354,7 +385,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
break;
|
||||
case SIGP_STOP_STORE_STATUS:
|
||||
vcpu->stat.instruction_sigp_stop++;
|
||||
rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP);
|
||||
rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
|
||||
ACTION_STOP_ON_STOP);
|
||||
break;
|
||||
case SIGP_SET_ARCH:
|
||||
vcpu->stat.instruction_sigp_arch++;
|
||||
@ -363,15 +395,18 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
case SIGP_SET_PREFIX:
|
||||
vcpu->stat.instruction_sigp_prefix++;
|
||||
rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
|
||||
&vcpu->arch.guest_gprs[r1]);
|
||||
&vcpu->run->s.regs.gprs[r1]);
|
||||
break;
|
||||
case SIGP_SENSE_RUNNING:
|
||||
vcpu->stat.instruction_sigp_sense_running++;
|
||||
rc = __sigp_sense_running(vcpu, cpu_addr,
|
||||
&vcpu->arch.guest_gprs[r1]);
|
||||
&vcpu->run->s.regs.gprs[r1]);
|
||||
break;
|
||||
case SIGP_RESTART:
|
||||
vcpu->stat.instruction_sigp_restart++;
|
||||
rc = __sigp_restart(vcpu, cpu_addr);
|
||||
if (rc == 2) /* busy */
|
||||
break;
|
||||
/* user space must know about restart */
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -321,4 +321,8 @@ struct kvm_xcrs {
|
||||
__u64 padding[16];
|
||||
};
|
||||
|
||||
/* definition of registers in kvm_run */
|
||||
struct kvm_sync_regs {
|
||||
};
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
@ -176,6 +176,7 @@ struct x86_emulate_ops {
|
||||
void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
|
||||
ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
|
||||
int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
|
||||
void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val);
|
||||
int (*cpl)(struct x86_emulate_ctxt *ctxt);
|
||||
int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
|
||||
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
|
||||
@ -388,7 +389,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
|
||||
#define EMULATION_INTERCEPTED 2
|
||||
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
|
||||
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
|
||||
u16 tss_selector, int reason,
|
||||
u16 tss_selector, int idt_index, int reason,
|
||||
bool has_error_code, u32 error_code);
|
||||
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
|
||||
#endif /* _ASM_X86_KVM_X86_EMULATE_H */
|
||||
|
@ -29,7 +29,7 @@
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
#define KVM_MAX_VCPUS 254
|
||||
#define KVM_SOFT_MAX_VCPUS 64
|
||||
#define KVM_SOFT_MAX_VCPUS 160
|
||||
#define KVM_MEMORY_SLOTS 32
|
||||
/* memory slots that does not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 4
|
||||
@ -181,13 +181,6 @@ struct kvm_mmu_memory_cache {
|
||||
void *objects[KVM_NR_MEM_OBJS];
|
||||
};
|
||||
|
||||
#define NR_PTE_CHAIN_ENTRIES 5
|
||||
|
||||
struct kvm_pte_chain {
|
||||
u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
|
||||
struct hlist_node link;
|
||||
};
|
||||
|
||||
/*
|
||||
* kvm_mmu_page_role, below, is defined as:
|
||||
*
|
||||
@ -427,12 +420,16 @@ struct kvm_vcpu_arch {
|
||||
|
||||
u64 last_guest_tsc;
|
||||
u64 last_kernel_ns;
|
||||
u64 last_tsc_nsec;
|
||||
u64 last_tsc_write;
|
||||
u32 virtual_tsc_khz;
|
||||
u64 last_host_tsc;
|
||||
u64 tsc_offset_adjustment;
|
||||
u64 this_tsc_nsec;
|
||||
u64 this_tsc_write;
|
||||
u8 this_tsc_generation;
|
||||
bool tsc_catchup;
|
||||
u32 tsc_catchup_mult;
|
||||
s8 tsc_catchup_shift;
|
||||
bool tsc_always_catchup;
|
||||
s8 virtual_tsc_shift;
|
||||
u32 virtual_tsc_mult;
|
||||
u32 virtual_tsc_khz;
|
||||
|
||||
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
|
||||
unsigned nmi_pending; /* NMI queued after currently running handler */
|
||||
@ -478,6 +475,21 @@ struct kvm_vcpu_arch {
|
||||
u32 id;
|
||||
bool send_user_only;
|
||||
} apf;
|
||||
|
||||
/* OSVW MSRs (AMD only) */
|
||||
struct {
|
||||
u64 length;
|
||||
u64 status;
|
||||
} osvw;
|
||||
};
|
||||
|
||||
struct kvm_lpage_info {
|
||||
unsigned long rmap_pde;
|
||||
int write_count;
|
||||
};
|
||||
|
||||
struct kvm_arch_memory_slot {
|
||||
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
@ -511,8 +523,12 @@ struct kvm_arch {
|
||||
s64 kvmclock_offset;
|
||||
raw_spinlock_t tsc_write_lock;
|
||||
u64 last_tsc_nsec;
|
||||
u64 last_tsc_offset;
|
||||
u64 last_tsc_write;
|
||||
u32 last_tsc_khz;
|
||||
u64 cur_tsc_nsec;
|
||||
u64 cur_tsc_write;
|
||||
u64 cur_tsc_offset;
|
||||
u8 cur_tsc_generation;
|
||||
|
||||
struct kvm_xen_hvm_config xen_hvm_config;
|
||||
|
||||
@ -644,7 +660,7 @@ struct kvm_x86_ops {
|
||||
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
|
||||
int (*get_lpage_level)(void);
|
||||
bool (*rdtscp_supported)(void);
|
||||
void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment);
|
||||
void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
|
||||
|
||||
void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||
|
||||
@ -652,7 +668,7 @@ struct kvm_x86_ops {
|
||||
|
||||
bool (*has_wbinvd_exit)(void);
|
||||
|
||||
void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz);
|
||||
void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
|
||||
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
|
||||
|
||||
u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
|
||||
@ -674,6 +690,17 @@ struct kvm_arch_async_pf {
|
||||
|
||||
extern struct kvm_x86_ops *kvm_x86_ops;
|
||||
|
||||
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
|
||||
s64 adjustment)
|
||||
{
|
||||
kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false);
|
||||
}
|
||||
|
||||
static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
{
|
||||
kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true);
|
||||
}
|
||||
|
||||
int kvm_mmu_module_init(void);
|
||||
void kvm_mmu_module_exit(void);
|
||||
|
||||
@ -741,8 +768,8 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
|
||||
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
|
||||
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
|
||||
|
||||
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
|
||||
bool has_error_code, u32 error_code);
|
||||
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
|
||||
int reason, bool has_error_code, u32 error_code);
|
||||
|
||||
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
|
||||
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||
|
@ -23,6 +23,7 @@
|
||||
#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16)
|
||||
#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
|
||||
#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18)
|
||||
#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL (1ULL << 19)
|
||||
#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20)
|
||||
#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21)
|
||||
#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
|
||||
|
@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu);
|
||||
extern void check_tsc_sync_target(void);
|
||||
|
||||
extern int notsc_setup(char *);
|
||||
extern void save_sched_clock_state(void);
|
||||
extern void restore_sched_clock_state(void);
|
||||
extern void tsc_save_sched_clock_state(void);
|
||||
extern void tsc_restore_sched_clock_state(void);
|
||||
|
||||
#endif /* _ASM_X86_TSC_H */
|
||||
|
@ -145,9 +145,11 @@ struct x86_init_ops {
|
||||
/**
|
||||
* struct x86_cpuinit_ops - platform specific cpu hotplug setups
|
||||
* @setup_percpu_clockev: set up the per cpu clock event device
|
||||
* @early_percpu_clock_init: early init of the per cpu clock event device
|
||||
*/
|
||||
struct x86_cpuinit_ops {
|
||||
void (*setup_percpu_clockev)(void);
|
||||
void (*early_percpu_clock_init)(void);
|
||||
void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
|
||||
};
|
||||
|
||||
@ -160,6 +162,8 @@ struct x86_cpuinit_ops {
|
||||
* @is_untracked_pat_range exclude from PAT logic
|
||||
* @nmi_init enable NMI on cpus
|
||||
* @i8042_detect pre-detect if i8042 controller exists
|
||||
* @save_sched_clock_state: save state for sched_clock() on suspend
|
||||
* @restore_sched_clock_state: restore state for sched_clock() on resume
|
||||
*/
|
||||
struct x86_platform_ops {
|
||||
unsigned long (*calibrate_tsc)(void);
|
||||
@ -171,6 +175,8 @@ struct x86_platform_ops {
|
||||
void (*nmi_init)(void);
|
||||
unsigned char (*get_nmi_reason)(void);
|
||||
int (*i8042_detect)(void);
|
||||
void (*save_sched_clock_state)(void);
|
||||
void (*restore_sched_clock_state)(void);
|
||||
};
|
||||
|
||||
struct pci_dev;
|
||||
|
@ -136,6 +136,15 @@ int kvm_register_clock(char *txt)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_save_sched_clock_state(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void kvm_restore_sched_clock_state(void)
|
||||
{
|
||||
kvm_register_clock("primary cpu clock, resume");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
static void __cpuinit kvm_setup_secondary_clock(void)
|
||||
{
|
||||
@ -144,8 +153,6 @@ static void __cpuinit kvm_setup_secondary_clock(void)
|
||||
* we shouldn't fail.
|
||||
*/
|
||||
WARN_ON(kvm_register_clock("secondary cpu clock"));
|
||||
/* ok, done with our trickery, call native */
|
||||
setup_secondary_APIC_clock();
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -194,9 +201,11 @@ void __init kvmclock_init(void)
|
||||
x86_platform.get_wallclock = kvm_get_wallclock;
|
||||
x86_platform.set_wallclock = kvm_set_wallclock;
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
x86_cpuinit.setup_percpu_clockev =
|
||||
x86_cpuinit.early_percpu_clock_init =
|
||||
kvm_setup_secondary_clock;
|
||||
#endif
|
||||
x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
|
||||
x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
|
||||
machine_ops.shutdown = kvm_shutdown;
|
||||
#ifdef CONFIG_KEXEC
|
||||
machine_ops.crash_shutdown = kvm_crash_shutdown;
|
||||
|
@ -255,6 +255,7 @@ notrace static void __cpuinit start_secondary(void *unused)
|
||||
* most necessary things.
|
||||
*/
|
||||
cpu_init();
|
||||
x86_cpuinit.early_percpu_clock_init();
|
||||
preempt_disable();
|
||||
smp_callin();
|
||||
|
||||
|
@ -630,7 +630,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
|
||||
|
||||
static unsigned long long cyc2ns_suspend;
|
||||
|
||||
void save_sched_clock_state(void)
|
||||
void tsc_save_sched_clock_state(void)
|
||||
{
|
||||
if (!sched_clock_stable)
|
||||
return;
|
||||
@ -646,7 +646,7 @@ void save_sched_clock_state(void)
|
||||
* that sched_clock() continues from the point where it was left off during
|
||||
* suspend.
|
||||
*/
|
||||
void restore_sched_clock_state(void)
|
||||
void tsc_restore_sched_clock_state(void)
|
||||
{
|
||||
unsigned long long offset;
|
||||
unsigned long flags;
|
||||
|
@ -91,6 +91,7 @@ struct x86_init_ops x86_init __initdata = {
|
||||
};
|
||||
|
||||
struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
|
||||
.early_percpu_clock_init = x86_init_noop,
|
||||
.setup_percpu_clockev = setup_secondary_APIC_clock,
|
||||
.fixup_cpu_id = x86_default_fixup_cpu_id,
|
||||
};
|
||||
@ -107,7 +108,9 @@ struct x86_platform_ops x86_platform = {
|
||||
.is_untracked_pat_range = is_ISA_range,
|
||||
.nmi_init = default_nmi_init,
|
||||
.get_nmi_reason = default_get_nmi_reason,
|
||||
.i8042_detect = default_i8042_detect
|
||||
.i8042_detect = default_i8042_detect,
|
||||
.save_sched_clock_state = tsc_save_sched_clock_state,
|
||||
.restore_sched_clock_state = tsc_restore_sched_clock_state,
|
||||
};
|
||||
|
||||
EXPORT_SYMBOL_GPL(x86_platform);
|
||||
|
@ -236,7 +236,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
const u32 kvm_supported_word6_x86_features =
|
||||
F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
|
||||
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
|
||||
F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
|
||||
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
|
||||
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
|
||||
|
||||
/* cpuid 0xC0000001.edx */
|
||||
|
@ -43,4 +43,12 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
|
||||
return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
|
||||
return best && (best->ecx & bit(X86_FEATURE_OSVW));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -57,6 +57,7 @@
|
||||
#define OpDS 23ull /* DS */
|
||||
#define OpFS 24ull /* FS */
|
||||
#define OpGS 25ull /* GS */
|
||||
#define OpMem8 26ull /* 8-bit zero extended memory operand */
|
||||
|
||||
#define OpBits 5 /* Width of operand field */
|
||||
#define OpMask ((1ull << OpBits) - 1)
|
||||
@ -101,6 +102,7 @@
|
||||
#define SrcAcc (OpAcc << SrcShift)
|
||||
#define SrcImmU16 (OpImmU16 << SrcShift)
|
||||
#define SrcDX (OpDX << SrcShift)
|
||||
#define SrcMem8 (OpMem8 << SrcShift)
|
||||
#define SrcMask (OpMask << SrcShift)
|
||||
#define BitOp (1<<11)
|
||||
#define MemAbs (1<<12) /* Memory operand is absolute displacement */
|
||||
@ -858,8 +860,7 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
|
||||
}
|
||||
|
||||
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
|
||||
struct operand *op,
|
||||
int inhibit_bytereg)
|
||||
struct operand *op)
|
||||
{
|
||||
unsigned reg = ctxt->modrm_reg;
|
||||
int highbyte_regs = ctxt->rex_prefix == 0;
|
||||
@ -876,7 +877,7 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
|
||||
}
|
||||
|
||||
op->type = OP_REG;
|
||||
if ((ctxt->d & ByteOp) && !inhibit_bytereg) {
|
||||
if (ctxt->d & ByteOp) {
|
||||
op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs);
|
||||
op->bytes = 1;
|
||||
} else {
|
||||
@ -1151,6 +1152,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
u16 index, struct desc_struct *desc)
|
||||
{
|
||||
struct desc_ptr dt;
|
||||
ulong addr;
|
||||
|
||||
ctxt->ops->get_idt(ctxt, &dt);
|
||||
|
||||
if (dt.size < index * 8 + 7)
|
||||
return emulate_gp(ctxt, index << 3 | 0x2);
|
||||
|
||||
addr = dt.address + index * 8;
|
||||
return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
|
||||
&ctxt->exception);
|
||||
}
|
||||
|
||||
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
|
||||
u16 selector, struct desc_ptr *dt)
|
||||
{
|
||||
@ -1227,6 +1244,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
seg_desc.type = 3;
|
||||
seg_desc.p = 1;
|
||||
seg_desc.s = 1;
|
||||
if (ctxt->mode == X86EMUL_MODE_VM86)
|
||||
seg_desc.dpl = 3;
|
||||
goto load;
|
||||
}
|
||||
|
||||
@ -1891,6 +1910,17 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
|
||||
ss->p = 1;
|
||||
}
|
||||
|
||||
static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
eax = ecx = 0;
|
||||
return ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)
|
||||
&& ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
|
||||
&& ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
|
||||
&& edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
|
||||
}
|
||||
|
||||
static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
struct x86_emulate_ops *ops = ctxt->ops;
|
||||
@ -2007,6 +2037,14 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
|
||||
if (ctxt->mode == X86EMUL_MODE_REAL)
|
||||
return emulate_gp(ctxt, 0);
|
||||
|
||||
/*
|
||||
* Not recognized on AMD in compat mode (but is recognized in legacy
|
||||
* mode).
|
||||
*/
|
||||
if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA)
|
||||
&& !vendor_intel(ctxt))
|
||||
return emulate_ud(ctxt);
|
||||
|
||||
/* XXX sysenter/sysexit have not been tested in 64bit mode.
|
||||
* Therefore, we inject an #UD.
|
||||
*/
|
||||
@ -2306,6 +2344,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
|
||||
return emulate_gp(ctxt, 0);
|
||||
ctxt->_eip = tss->eip;
|
||||
ctxt->eflags = tss->eflags | 2;
|
||||
|
||||
/* General purpose registers */
|
||||
ctxt->regs[VCPU_REGS_RAX] = tss->eax;
|
||||
ctxt->regs[VCPU_REGS_RCX] = tss->ecx;
|
||||
ctxt->regs[VCPU_REGS_RDX] = tss->edx;
|
||||
@ -2327,6 +2367,24 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
|
||||
set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
|
||||
set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
|
||||
|
||||
/*
|
||||
* If we're switching between Protected Mode and VM86, we need to make
|
||||
* sure to update the mode before loading the segment descriptors so
|
||||
* that the selectors are interpreted correctly.
|
||||
*
|
||||
* Need to get rflags to the vcpu struct immediately because it
|
||||
* influences the CPL which is checked at least when loading the segment
|
||||
* descriptors and when pushing an error code to the new kernel stack.
|
||||
*
|
||||
* TODO Introduce a separate ctxt->ops->set_cpl callback
|
||||
*/
|
||||
if (ctxt->eflags & X86_EFLAGS_VM)
|
||||
ctxt->mode = X86EMUL_MODE_VM86;
|
||||
else
|
||||
ctxt->mode = X86EMUL_MODE_PROT32;
|
||||
|
||||
ctxt->ops->set_rflags(ctxt, ctxt->eflags);
|
||||
|
||||
/*
|
||||
* Now load segment descriptors. If fault happenes at this stage
|
||||
* it is handled in a context of new task
|
||||
@ -2401,7 +2459,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
|
||||
}
|
||||
|
||||
static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
|
||||
u16 tss_selector, int reason,
|
||||
u16 tss_selector, int idt_index, int reason,
|
||||
bool has_error_code, u32 error_code)
|
||||
{
|
||||
struct x86_emulate_ops *ops = ctxt->ops;
|
||||
@ -2423,12 +2481,35 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
|
||||
|
||||
/* FIXME: check that next_tss_desc is tss */
|
||||
|
||||
if (reason != TASK_SWITCH_IRET) {
|
||||
if ((tss_selector & 3) > next_tss_desc.dpl ||
|
||||
ops->cpl(ctxt) > next_tss_desc.dpl)
|
||||
return emulate_gp(ctxt, 0);
|
||||
/*
|
||||
* Check privileges. The three cases are task switch caused by...
|
||||
*
|
||||
* 1. jmp/call/int to task gate: Check against DPL of the task gate
|
||||
* 2. Exception/IRQ/iret: No check is performed
|
||||
* 3. jmp/call to TSS: Check agains DPL of the TSS
|
||||
*/
|
||||
if (reason == TASK_SWITCH_GATE) {
|
||||
if (idt_index != -1) {
|
||||
/* Software interrupts */
|
||||
struct desc_struct task_gate_desc;
|
||||
int dpl;
|
||||
|
||||
ret = read_interrupt_descriptor(ctxt, idt_index,
|
||||
&task_gate_desc);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
|
||||
dpl = task_gate_desc.dpl;
|
||||
if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
|
||||
return emulate_gp(ctxt, (idt_index << 3) | 0x2);
|
||||
}
|
||||
} else if (reason != TASK_SWITCH_IRET) {
|
||||
int dpl = next_tss_desc.dpl;
|
||||
if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
|
||||
return emulate_gp(ctxt, tss_selector);
|
||||
}
|
||||
|
||||
|
||||
desc_limit = desc_limit_scaled(&next_tss_desc);
|
||||
if (!next_tss_desc.p ||
|
||||
((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
|
||||
@ -2481,7 +2562,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
|
||||
}
|
||||
|
||||
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
|
||||
u16 tss_selector, int reason,
|
||||
u16 tss_selector, int idt_index, int reason,
|
||||
bool has_error_code, u32 error_code)
|
||||
{
|
||||
int rc;
|
||||
@ -2489,7 +2570,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
|
||||
ctxt->_eip = ctxt->eip;
|
||||
ctxt->dst.type = OP_NONE;
|
||||
|
||||
rc = emulator_do_task_switch(ctxt, tss_selector, reason,
|
||||
rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
|
||||
has_error_code, error_code);
|
||||
|
||||
if (rc == X86EMUL_CONTINUE)
|
||||
@ -3514,13 +3595,13 @@ static struct opcode twobyte_table[256] = {
|
||||
I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
|
||||
I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
|
||||
I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
|
||||
D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
|
||||
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
|
||||
/* 0xB8 - 0xBF */
|
||||
N, N,
|
||||
G(BitOp, group8),
|
||||
I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
|
||||
I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr),
|
||||
D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
|
||||
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
|
||||
/* 0xC0 - 0xCF */
|
||||
D2bv(DstMem | SrcReg | ModRM | Lock),
|
||||
N, D(DstMem | SrcReg | ModRM | Mov),
|
||||
@ -3602,9 +3683,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
|
||||
|
||||
switch (d) {
|
||||
case OpReg:
|
||||
decode_register_operand(ctxt, op,
|
||||
op == &ctxt->dst &&
|
||||
ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7));
|
||||
decode_register_operand(ctxt, op);
|
||||
break;
|
||||
case OpImmUByte:
|
||||
rc = decode_imm(ctxt, op, 1, false);
|
||||
@ -3656,6 +3735,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
|
||||
case OpImm:
|
||||
rc = decode_imm(ctxt, op, imm_size(ctxt), true);
|
||||
break;
|
||||
case OpMem8:
|
||||
ctxt->memop.bytes = 1;
|
||||
goto mem_common;
|
||||
case OpMem16:
|
||||
ctxt->memop.bytes = 2;
|
||||
goto mem_common;
|
||||
|
@ -307,6 +307,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
|
||||
if (val & 0x10) {
|
||||
s->init4 = val & 1;
|
||||
s->last_irr = 0;
|
||||
s->irr &= s->elcr;
|
||||
s->imr = 0;
|
||||
s->priority_add = 0;
|
||||
s->special_mask = 0;
|
||||
|
@ -433,7 +433,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
break;
|
||||
|
||||
case APIC_DM_INIT:
|
||||
if (level) {
|
||||
if (!trig_mode || level) {
|
||||
result = 1;
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
@ -731,7 +731,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
||||
u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
|
||||
u64 ns = 0;
|
||||
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||
unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu);
|
||||
unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(!tscdeadline || !this_tsc_khz))
|
||||
|
@ -688,9 +688,8 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
|
||||
{
|
||||
unsigned long idx;
|
||||
|
||||
idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
|
||||
(slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
|
||||
return &slot->lpage_info[level - 2][idx];
|
||||
idx = gfn_to_index(gfn, slot->base_gfn, level);
|
||||
return &slot->arch.lpage_info[level - 2][idx];
|
||||
}
|
||||
|
||||
static void account_shadowed(struct kvm *kvm, gfn_t gfn)
|
||||
@ -946,7 +945,7 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long *__gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level,
|
||||
static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
struct kvm_lpage_info *linfo;
|
||||
@ -966,7 +965,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
return __gfn_to_rmap(kvm, gfn, level, slot);
|
||||
return __gfn_to_rmap(gfn, level, slot);
|
||||
}
|
||||
|
||||
static bool rmap_can_add(struct kvm_vcpu *vcpu)
|
||||
@ -988,7 +987,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
|
||||
return pte_list_add(vcpu, spte, rmapp);
|
||||
}
|
||||
|
||||
static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
|
||||
static u64 *rmap_next(unsigned long *rmapp, u64 *spte)
|
||||
{
|
||||
return pte_list_next(rmapp, spte);
|
||||
}
|
||||
@ -1018,8 +1017,8 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
|
||||
u64 *spte;
|
||||
int i, write_protected = 0;
|
||||
|
||||
rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot);
|
||||
spte = rmap_next(kvm, rmapp, NULL);
|
||||
rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot);
|
||||
spte = rmap_next(rmapp, NULL);
|
||||
while (spte) {
|
||||
BUG_ON(!(*spte & PT_PRESENT_MASK));
|
||||
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
|
||||
@ -1027,14 +1026,14 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
|
||||
mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK);
|
||||
write_protected = 1;
|
||||
}
|
||||
spte = rmap_next(kvm, rmapp, spte);
|
||||
spte = rmap_next(rmapp, spte);
|
||||
}
|
||||
|
||||
/* check for huge page mappings */
|
||||
for (i = PT_DIRECTORY_LEVEL;
|
||||
i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
|
||||
rmapp = __gfn_to_rmap(kvm, gfn, i, slot);
|
||||
spte = rmap_next(kvm, rmapp, NULL);
|
||||
rmapp = __gfn_to_rmap(gfn, i, slot);
|
||||
spte = rmap_next(rmapp, NULL);
|
||||
while (spte) {
|
||||
BUG_ON(!(*spte & PT_PRESENT_MASK));
|
||||
BUG_ON(!is_large_pte(*spte));
|
||||
@ -1045,7 +1044,7 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
|
||||
spte = NULL;
|
||||
write_protected = 1;
|
||||
}
|
||||
spte = rmap_next(kvm, rmapp, spte);
|
||||
spte = rmap_next(rmapp, spte);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1066,7 +1065,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
u64 *spte;
|
||||
int need_tlb_flush = 0;
|
||||
|
||||
while ((spte = rmap_next(kvm, rmapp, NULL))) {
|
||||
while ((spte = rmap_next(rmapp, NULL))) {
|
||||
BUG_ON(!(*spte & PT_PRESENT_MASK));
|
||||
rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
|
||||
drop_spte(kvm, spte);
|
||||
@ -1085,14 +1084,14 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
|
||||
WARN_ON(pte_huge(*ptep));
|
||||
new_pfn = pte_pfn(*ptep);
|
||||
spte = rmap_next(kvm, rmapp, NULL);
|
||||
spte = rmap_next(rmapp, NULL);
|
||||
while (spte) {
|
||||
BUG_ON(!is_shadow_present_pte(*spte));
|
||||
rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
|
||||
need_flush = 1;
|
||||
if (pte_write(*ptep)) {
|
||||
drop_spte(kvm, spte);
|
||||
spte = rmap_next(kvm, rmapp, NULL);
|
||||
spte = rmap_next(rmapp, NULL);
|
||||
} else {
|
||||
new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
|
||||
new_spte |= (u64)new_pfn << PAGE_SHIFT;
|
||||
@ -1102,7 +1101,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
new_spte &= ~shadow_accessed_mask;
|
||||
mmu_spte_clear_track_bits(spte);
|
||||
mmu_spte_set(spte, new_spte);
|
||||
spte = rmap_next(kvm, rmapp, spte);
|
||||
spte = rmap_next(rmapp, spte);
|
||||
}
|
||||
}
|
||||
if (need_flush)
|
||||
@ -1176,7 +1175,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
if (!shadow_accessed_mask)
|
||||
return kvm_unmap_rmapp(kvm, rmapp, data);
|
||||
|
||||
spte = rmap_next(kvm, rmapp, NULL);
|
||||
spte = rmap_next(rmapp, NULL);
|
||||
while (spte) {
|
||||
int _young;
|
||||
u64 _spte = *spte;
|
||||
@ -1186,7 +1185,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
young = 1;
|
||||
clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
|
||||
}
|
||||
spte = rmap_next(kvm, rmapp, spte);
|
||||
spte = rmap_next(rmapp, spte);
|
||||
}
|
||||
return young;
|
||||
}
|
||||
@ -1205,7 +1204,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
if (!shadow_accessed_mask)
|
||||
goto out;
|
||||
|
||||
spte = rmap_next(kvm, rmapp, NULL);
|
||||
spte = rmap_next(rmapp, NULL);
|
||||
while (spte) {
|
||||
u64 _spte = *spte;
|
||||
BUG_ON(!(_spte & PT_PRESENT_MASK));
|
||||
@ -1214,7 +1213,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
young = 1;
|
||||
break;
|
||||
}
|
||||
spte = rmap_next(kvm, rmapp, spte);
|
||||
spte = rmap_next(rmapp, spte);
|
||||
}
|
||||
out:
|
||||
return young;
|
||||
@ -1391,11 +1390,6 @@ struct kvm_mmu_pages {
|
||||
unsigned int nr;
|
||||
};
|
||||
|
||||
#define for_each_unsync_children(bitmap, idx) \
|
||||
for (idx = find_first_bit(bitmap, 512); \
|
||||
idx < 512; \
|
||||
idx = find_next_bit(bitmap, 512, idx+1))
|
||||
|
||||
static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
|
||||
int idx)
|
||||
{
|
||||
@ -1417,7 +1411,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
|
||||
{
|
||||
int i, ret, nr_unsync_leaf = 0;
|
||||
|
||||
for_each_unsync_children(sp->unsync_child_bitmap, i) {
|
||||
for_each_set_bit(i, sp->unsync_child_bitmap, 512) {
|
||||
struct kvm_mmu_page *child;
|
||||
u64 ent = sp->spt[i];
|
||||
|
||||
@ -1803,6 +1797,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
|
||||
{
|
||||
if (is_large_pte(*sptep)) {
|
||||
drop_spte(vcpu->kvm, sptep);
|
||||
--vcpu->kvm->stat.lpages;
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
}
|
||||
}
|
||||
@ -3190,15 +3185,14 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
|
||||
#undef PTTYPE
|
||||
|
||||
static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu *context,
|
||||
int level)
|
||||
struct kvm_mmu *context)
|
||||
{
|
||||
int maxphyaddr = cpuid_maxphyaddr(vcpu);
|
||||
u64 exb_bit_rsvd = 0;
|
||||
|
||||
if (!context->nx)
|
||||
exb_bit_rsvd = rsvd_bits(63, 63);
|
||||
switch (level) {
|
||||
switch (context->root_level) {
|
||||
case PT32_ROOT_LEVEL:
|
||||
/* no rsvd bits for 2 level 4K page table entries */
|
||||
context->rsvd_bits_mask[0][1] = 0;
|
||||
@ -3256,8 +3250,9 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
|
||||
int level)
|
||||
{
|
||||
context->nx = is_nx(vcpu);
|
||||
context->root_level = level;
|
||||
|
||||
reset_rsvds_bits_mask(vcpu, context, level);
|
||||
reset_rsvds_bits_mask(vcpu, context);
|
||||
|
||||
ASSERT(is_pae(vcpu));
|
||||
context->new_cr3 = paging_new_cr3;
|
||||
@ -3267,7 +3262,6 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
|
||||
context->invlpg = paging64_invlpg;
|
||||
context->update_pte = paging64_update_pte;
|
||||
context->free = paging_free;
|
||||
context->root_level = level;
|
||||
context->shadow_root_level = level;
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = false;
|
||||
@ -3284,8 +3278,9 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu *context)
|
||||
{
|
||||
context->nx = false;
|
||||
context->root_level = PT32_ROOT_LEVEL;
|
||||
|
||||
reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL);
|
||||
reset_rsvds_bits_mask(vcpu, context);
|
||||
|
||||
context->new_cr3 = paging_new_cr3;
|
||||
context->page_fault = paging32_page_fault;
|
||||
@ -3294,7 +3289,6 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
|
||||
context->sync_page = paging32_sync_page;
|
||||
context->invlpg = paging32_invlpg;
|
||||
context->update_pte = paging32_update_pte;
|
||||
context->root_level = PT32_ROOT_LEVEL;
|
||||
context->shadow_root_level = PT32E_ROOT_LEVEL;
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = false;
|
||||
@ -3325,7 +3319,6 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
context->get_cr3 = get_cr3;
|
||||
context->get_pdptr = kvm_pdptr_read;
|
||||
context->inject_page_fault = kvm_inject_page_fault;
|
||||
context->nx = is_nx(vcpu);
|
||||
|
||||
if (!is_paging(vcpu)) {
|
||||
context->nx = false;
|
||||
@ -3333,19 +3326,19 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
context->root_level = 0;
|
||||
} else if (is_long_mode(vcpu)) {
|
||||
context->nx = is_nx(vcpu);
|
||||
reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL);
|
||||
context->gva_to_gpa = paging64_gva_to_gpa;
|
||||
context->root_level = PT64_ROOT_LEVEL;
|
||||
reset_rsvds_bits_mask(vcpu, context);
|
||||
context->gva_to_gpa = paging64_gva_to_gpa;
|
||||
} else if (is_pae(vcpu)) {
|
||||
context->nx = is_nx(vcpu);
|
||||
reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL);
|
||||
context->gva_to_gpa = paging64_gva_to_gpa;
|
||||
context->root_level = PT32E_ROOT_LEVEL;
|
||||
reset_rsvds_bits_mask(vcpu, context);
|
||||
context->gva_to_gpa = paging64_gva_to_gpa;
|
||||
} else {
|
||||
context->nx = false;
|
||||
reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL);
|
||||
context->gva_to_gpa = paging32_gva_to_gpa;
|
||||
context->root_level = PT32_ROOT_LEVEL;
|
||||
reset_rsvds_bits_mask(vcpu, context);
|
||||
context->gva_to_gpa = paging32_gva_to_gpa;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -3408,18 +3401,18 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
|
||||
g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
|
||||
} else if (is_long_mode(vcpu)) {
|
||||
g_context->nx = is_nx(vcpu);
|
||||
reset_rsvds_bits_mask(vcpu, g_context, PT64_ROOT_LEVEL);
|
||||
g_context->root_level = PT64_ROOT_LEVEL;
|
||||
reset_rsvds_bits_mask(vcpu, g_context);
|
||||
g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
|
||||
} else if (is_pae(vcpu)) {
|
||||
g_context->nx = is_nx(vcpu);
|
||||
reset_rsvds_bits_mask(vcpu, g_context, PT32E_ROOT_LEVEL);
|
||||
g_context->root_level = PT32E_ROOT_LEVEL;
|
||||
reset_rsvds_bits_mask(vcpu, g_context);
|
||||
g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
|
||||
} else {
|
||||
g_context->nx = false;
|
||||
reset_rsvds_bits_mask(vcpu, g_context, PT32_ROOT_LEVEL);
|
||||
g_context->root_level = PT32_ROOT_LEVEL;
|
||||
reset_rsvds_bits_mask(vcpu, g_context);
|
||||
g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
|
||||
}
|
||||
|
||||
@ -3555,7 +3548,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
|
||||
* If we're seeing too many writes to a page, it may no longer be a page table,
|
||||
* or we may be forking, in which case it is better to unmap the page.
|
||||
*/
|
||||
static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte)
|
||||
static bool detect_write_flooding(struct kvm_mmu_page *sp)
|
||||
{
|
||||
/*
|
||||
* Skip write-flooding detected for the sp whose level is 1, because
|
||||
@ -3664,10 +3657,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
|
||||
mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
|
||||
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
|
||||
spte = get_written_sptes(sp, gpa, &npte);
|
||||
|
||||
if (detect_write_misaligned(sp, gpa, bytes) ||
|
||||
detect_write_flooding(sp, spte)) {
|
||||
detect_write_flooding(sp)) {
|
||||
zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
|
||||
&invalid_list);
|
||||
++vcpu->kvm->stat.mmu_flooded;
|
||||
|
@ -200,13 +200,13 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
slot = gfn_to_memslot(kvm, sp->gfn);
|
||||
rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
|
||||
|
||||
spte = rmap_next(kvm, rmapp, NULL);
|
||||
spte = rmap_next(rmapp, NULL);
|
||||
while (spte) {
|
||||
if (is_writable_pte(*spte))
|
||||
audit_printk(kvm, "shadow page has writable "
|
||||
"mappings: gfn %llx role %x\n",
|
||||
sp->gfn, sp->role.word);
|
||||
spte = rmap_next(kvm, rmapp, spte);
|
||||
spte = rmap_next(rmapp, spte);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,10 +33,11 @@ static struct kvm_arch_event_perf_mapping {
|
||||
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
|
||||
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
||||
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
|
||||
[7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
|
||||
};
|
||||
|
||||
/* mapping between fixed pmc index and arch_events array */
|
||||
int fixed_pmc_events[] = {1, 0, 2};
|
||||
int fixed_pmc_events[] = {1, 0, 7};
|
||||
|
||||
static bool pmc_is_gp(struct kvm_pmc *pmc)
|
||||
{
|
||||
@ -210,6 +211,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
|
||||
unsigned config, type = PERF_TYPE_RAW;
|
||||
u8 event_select, unit_mask;
|
||||
|
||||
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
|
||||
printk_once("kvm pmu: pin control bit is ignored\n");
|
||||
|
||||
pmc->eventsel = eventsel;
|
||||
|
||||
stop_counter(pmc);
|
||||
@ -220,7 +224,7 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
|
||||
event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
|
||||
unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
|
||||
|
||||
if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE |
|
||||
if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
|
||||
ARCH_PERFMON_EVENTSEL_INV |
|
||||
ARCH_PERFMON_EVENTSEL_CMASK))) {
|
||||
config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
|
||||
@ -413,7 +417,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
|
||||
struct kvm_pmc *counters;
|
||||
u64 ctr;
|
||||
|
||||
pmc &= (3u << 30) - 1;
|
||||
pmc &= ~(3u << 30);
|
||||
if (!fixed && pmc >= pmu->nr_arch_gp_counters)
|
||||
return 1;
|
||||
if (fixed && pmc >= pmu->nr_arch_fixed_counters)
|
||||
|
@ -111,6 +111,12 @@ struct nested_state {
|
||||
#define MSRPM_OFFSETS 16
|
||||
static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
|
||||
|
||||
/*
|
||||
* Set osvw_len to higher value when updated Revision Guides
|
||||
* are published and we know what the new status bits are
|
||||
*/
|
||||
static uint64_t osvw_len = 4, osvw_status;
|
||||
|
||||
struct vcpu_svm {
|
||||
struct kvm_vcpu vcpu;
|
||||
struct vmcb *vmcb;
|
||||
@ -177,11 +183,13 @@ static bool npt_enabled = true;
|
||||
#else
|
||||
static bool npt_enabled;
|
||||
#endif
|
||||
static int npt = 1;
|
||||
|
||||
/* allow nested paging (virtualized MMU) for all guests */
|
||||
static int npt = true;
|
||||
module_param(npt, int, S_IRUGO);
|
||||
|
||||
static int nested = 1;
|
||||
/* allow nested virtualization in KVM/SVM */
|
||||
static int nested = true;
|
||||
module_param(nested, int, S_IRUGO);
|
||||
|
||||
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
|
||||
@ -557,6 +565,27 @@ static void svm_init_erratum_383(void)
|
||||
erratum_383_found = true;
|
||||
}
|
||||
|
||||
static void svm_init_osvw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Guests should see errata 400 and 415 as fixed (assuming that
|
||||
* HLT and IO instructions are intercepted).
|
||||
*/
|
||||
vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
|
||||
vcpu->arch.osvw.status = osvw_status & ~(6ULL);
|
||||
|
||||
/*
|
||||
* By increasing VCPU's osvw.length to 3 we are telling the guest that
|
||||
* all osvw.status bits inside that length, including bit 0 (which is
|
||||
* reserved for erratum 298), are valid. However, if host processor's
|
||||
* osvw_len is 0 then osvw_status[0] carries no information. We need to
|
||||
* be conservative here and therefore we tell the guest that erratum 298
|
||||
* is present (because we really don't know).
|
||||
*/
|
||||
if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
|
||||
vcpu->arch.osvw.status |= 1;
|
||||
}
|
||||
|
||||
static int has_svm(void)
|
||||
{
|
||||
const char *msg;
|
||||
@ -623,6 +652,36 @@ static int svm_hardware_enable(void *garbage)
|
||||
__get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get OSVW bits.
|
||||
*
|
||||
* Note that it is possible to have a system with mixed processor
|
||||
* revisions and therefore different OSVW bits. If bits are not the same
|
||||
* on different processors then choose the worst case (i.e. if erratum
|
||||
* is present on one processor and not on another then assume that the
|
||||
* erratum is present everywhere).
|
||||
*/
|
||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
|
||||
uint64_t len, status = 0;
|
||||
int err;
|
||||
|
||||
len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
|
||||
if (!err)
|
||||
status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
|
||||
&err);
|
||||
|
||||
if (err)
|
||||
osvw_status = osvw_len = 0;
|
||||
else {
|
||||
if (len < osvw_len)
|
||||
osvw_len = len;
|
||||
osvw_status |= status;
|
||||
osvw_status &= (1ULL << osvw_len) - 1;
|
||||
}
|
||||
} else
|
||||
osvw_status = osvw_len = 0;
|
||||
|
||||
svm_init_erratum_383();
|
||||
|
||||
amd_pmu_enable_virt();
|
||||
@ -910,23 +969,28 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
|
||||
return _tsc;
|
||||
}
|
||||
|
||||
static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
|
||||
static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u64 ratio;
|
||||
u64 khz;
|
||||
|
||||
/* TSC scaling supported? */
|
||||
if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
|
||||
return;
|
||||
|
||||
/* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
|
||||
if (user_tsc_khz == 0) {
|
||||
vcpu->arch.virtual_tsc_khz = 0;
|
||||
/* Guest TSC same frequency as host TSC? */
|
||||
if (!scale) {
|
||||
svm->tsc_ratio = TSC_RATIO_DEFAULT;
|
||||
return;
|
||||
}
|
||||
|
||||
/* TSC scaling supported? */
|
||||
if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
|
||||
if (user_tsc_khz > tsc_khz) {
|
||||
vcpu->arch.tsc_catchup = 1;
|
||||
vcpu->arch.tsc_always_catchup = 1;
|
||||
} else
|
||||
WARN(1, "user requested TSC rate below hardware speed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
khz = user_tsc_khz;
|
||||
|
||||
/* TSC scaling required - calculate ratio */
|
||||
@ -938,7 +1002,6 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
|
||||
user_tsc_khz);
|
||||
return;
|
||||
}
|
||||
vcpu->arch.virtual_tsc_khz = user_tsc_khz;
|
||||
svm->tsc_ratio = ratio;
|
||||
}
|
||||
|
||||
@ -958,10 +1021,14 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
|
||||
}
|
||||
|
||||
static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
WARN_ON(adjustment < 0);
|
||||
if (host)
|
||||
adjustment = svm_scale_tsc(vcpu, adjustment);
|
||||
|
||||
svm->vmcb->control.tsc_offset += adjustment;
|
||||
if (is_guest_mode(vcpu))
|
||||
svm->nested.hsave->control.tsc_offset += adjustment;
|
||||
@ -1191,6 +1258,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
if (kvm_vcpu_is_bsp(&svm->vcpu))
|
||||
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
|
||||
|
||||
svm_init_osvw(&svm->vcpu);
|
||||
|
||||
return &svm->vcpu;
|
||||
|
||||
free_page4:
|
||||
@ -1268,6 +1337,21 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
|
||||
}
|
||||
|
||||
static void svm_update_cpl(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
int cpl;
|
||||
|
||||
if (!is_protmode(vcpu))
|
||||
cpl = 0;
|
||||
else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
|
||||
cpl = 3;
|
||||
else
|
||||
cpl = svm->vmcb->save.cs.selector & 0x3;
|
||||
|
||||
svm->vmcb->save.cpl = cpl;
|
||||
}
|
||||
|
||||
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_svm(vcpu)->vmcb->save.rflags;
|
||||
@ -1275,7 +1359,11 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
|
||||
{
|
||||
unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags;
|
||||
|
||||
to_svm(vcpu)->vmcb->save.rflags = rflags;
|
||||
if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
|
||||
svm_update_cpl(vcpu);
|
||||
}
|
||||
|
||||
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
|
||||
@ -1543,9 +1631,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
|
||||
s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
|
||||
}
|
||||
if (seg == VCPU_SREG_CS)
|
||||
svm->vmcb->save.cpl
|
||||
= (svm->vmcb->save.cs.attrib
|
||||
>> SVM_SELECTOR_DPL_SHIFT) & 3;
|
||||
svm_update_cpl(vcpu);
|
||||
|
||||
mark_dirty(svm->vmcb, VMCB_SEG);
|
||||
}
|
||||
@ -2735,7 +2821,10 @@ static int task_switch_interception(struct vcpu_svm *svm)
|
||||
(int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
|
||||
if (kvm_task_switch(&svm->vcpu, tss_selector, reason,
|
||||
if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
|
||||
int_vec = -1;
|
||||
|
||||
if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
|
||||
has_error_code, error_code) == EMULATE_FAIL) {
|
||||
svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
||||
|
@ -70,9 +70,6 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
|
||||
static bool __read_mostly vmm_exclusive = 1;
|
||||
module_param(vmm_exclusive, bool, S_IRUGO);
|
||||
|
||||
static bool __read_mostly yield_on_hlt = 1;
|
||||
module_param(yield_on_hlt, bool, S_IRUGO);
|
||||
|
||||
static bool __read_mostly fasteoi = 1;
|
||||
module_param(fasteoi, bool, S_IRUGO);
|
||||
|
||||
@ -1655,17 +1652,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
vmx_set_interrupt_shadow(vcpu, 0);
|
||||
}
|
||||
|
||||
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Ensure that we clear the HLT state in the VMCS. We don't need to
|
||||
* explicitly skip the instruction because if the HLT state is set, then
|
||||
* the instruction is already executing and RIP has already been
|
||||
* advanced. */
|
||||
if (!yield_on_hlt &&
|
||||
vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
|
||||
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM wants to inject page-faults which it got to the guest. This function
|
||||
* checks whether in a nested guest, we need to inject them to L1 or L2.
|
||||
@ -1678,7 +1664,7 @@ static int nested_pf_handled(struct kvm_vcpu *vcpu)
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
|
||||
/* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
|
||||
if (!(vmcs12->exception_bitmap & PF_VECTOR))
|
||||
if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
|
||||
return 0;
|
||||
|
||||
nested_vmx_vmexit(vcpu);
|
||||
@ -1718,7 +1704,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
|
||||
intr_info |= INTR_TYPE_HARD_EXCEPTION;
|
||||
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
|
||||
vmx_clear_hlt(vcpu);
|
||||
}
|
||||
|
||||
static bool vmx_rdtscp_supported(void)
|
||||
@ -1817,13 +1802,19 @@ u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
/*
|
||||
* Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
|
||||
* ioctl. In this case the call-back should update internal vmx state to make
|
||||
* the changes effective.
|
||||
* Engage any workarounds for mis-matched TSC rates. Currently limited to
|
||||
* software catchup for faster rates on slower CPUs.
|
||||
*/
|
||||
static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
|
||||
static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
|
||||
{
|
||||
/* Nothing to do here */
|
||||
if (!scale)
|
||||
return;
|
||||
|
||||
if (user_tsc_khz > tsc_khz) {
|
||||
vcpu->arch.tsc_catchup = 1;
|
||||
vcpu->arch.tsc_always_catchup = 1;
|
||||
} else
|
||||
WARN(1, "user requested TSC rate below hardware speed\n");
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1850,7 +1841,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
|
||||
{
|
||||
u64 offset = vmcs_read64(TSC_OFFSET);
|
||||
vmcs_write64(TSC_OFFSET, offset + adjustment);
|
||||
@ -2219,6 +2210,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
msr = find_msr_entry(vmx, msr_index);
|
||||
if (msr) {
|
||||
msr->data = data;
|
||||
if (msr - vmx->guest_msrs < vmx->save_nmsrs)
|
||||
kvm_set_shared_msr(msr->index, msr->data,
|
||||
msr->mask);
|
||||
break;
|
||||
}
|
||||
ret = kvm_set_msr_common(vcpu, msr_index, data);
|
||||
@ -2399,7 +2393,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
&_pin_based_exec_control) < 0)
|
||||
return -EIO;
|
||||
|
||||
min =
|
||||
min = CPU_BASED_HLT_EXITING |
|
||||
#ifdef CONFIG_X86_64
|
||||
CPU_BASED_CR8_LOAD_EXITING |
|
||||
CPU_BASED_CR8_STORE_EXITING |
|
||||
@ -2414,9 +2408,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
CPU_BASED_INVLPG_EXITING |
|
||||
CPU_BASED_RDPMC_EXITING;
|
||||
|
||||
if (yield_on_hlt)
|
||||
min |= CPU_BASED_HLT_EXITING;
|
||||
|
||||
opt = CPU_BASED_TPR_SHADOW |
|
||||
CPU_BASED_USE_MSR_BITMAPS |
|
||||
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
|
||||
@ -4003,7 +3994,6 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
|
||||
} else
|
||||
intr |= INTR_TYPE_EXT_INTR;
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
|
||||
vmx_clear_hlt(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
||||
@ -4035,7 +4025,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
|
||||
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
|
||||
vmx_clear_hlt(vcpu);
|
||||
}
|
||||
|
||||
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
||||
@ -4672,9 +4661,10 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
|
||||
bool has_error_code = false;
|
||||
u32 error_code = 0;
|
||||
u16 tss_selector;
|
||||
int reason, type, idt_v;
|
||||
int reason, type, idt_v, idt_index;
|
||||
|
||||
idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
|
||||
idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
|
||||
type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
|
||||
|
||||
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
||||
@ -4712,8 +4702,9 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
|
||||
type != INTR_TYPE_NMI_INTR))
|
||||
skip_emulated_instruction(vcpu);
|
||||
|
||||
if (kvm_task_switch(vcpu, tss_selector, reason,
|
||||
has_error_code, error_code) == EMULATE_FAIL) {
|
||||
if (kvm_task_switch(vcpu, tss_selector,
|
||||
type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
|
||||
has_error_code, error_code) == EMULATE_FAIL) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
||||
vcpu->run->internal.ndata = 0;
|
||||
|
@ -97,6 +97,10 @@ EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
|
||||
u32 kvm_max_guest_tsc_khz;
|
||||
EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
|
||||
|
||||
/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
|
||||
static u32 tsc_tolerance_ppm = 250;
|
||||
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
|
||||
|
||||
#define KVM_NR_SHARED_MSRS 16
|
||||
|
||||
struct kvm_shared_msrs_global {
|
||||
@ -969,50 +973,51 @@ static inline u64 get_kernel_ns(void)
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
|
||||
unsigned long max_tsc_khz;
|
||||
|
||||
static inline int kvm_tsc_changes_freq(void)
|
||||
{
|
||||
int cpu = get_cpu();
|
||||
int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
|
||||
cpufreq_quick_get(cpu) != 0;
|
||||
put_cpu();
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.virtual_tsc_khz)
|
||||
return vcpu->arch.virtual_tsc_khz;
|
||||
else
|
||||
return __this_cpu_read(cpu_tsc_khz);
|
||||
}
|
||||
|
||||
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
WARN_ON(preemptible());
|
||||
if (kvm_tsc_changes_freq())
|
||||
printk_once(KERN_WARNING
|
||||
"kvm: unreliable cycle conversion on adjustable rate TSC\n");
|
||||
ret = nsec * vcpu_tsc_khz(vcpu);
|
||||
do_div(ret, USEC_PER_SEC);
|
||||
return ret;
|
||||
return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
|
||||
vcpu->arch.virtual_tsc_shift);
|
||||
}
|
||||
|
||||
static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
|
||||
static u32 adjust_tsc_khz(u32 khz, s32 ppm)
|
||||
{
|
||||
u64 v = (u64)khz * (1000000 + ppm);
|
||||
do_div(v, 1000000);
|
||||
return v;
|
||||
}
|
||||
|
||||
static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
|
||||
{
|
||||
u32 thresh_lo, thresh_hi;
|
||||
int use_scaling = 0;
|
||||
|
||||
/* Compute a scale to convert nanoseconds in TSC cycles */
|
||||
kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
|
||||
&vcpu->arch.tsc_catchup_shift,
|
||||
&vcpu->arch.tsc_catchup_mult);
|
||||
&vcpu->arch.virtual_tsc_shift,
|
||||
&vcpu->arch.virtual_tsc_mult);
|
||||
vcpu->arch.virtual_tsc_khz = this_tsc_khz;
|
||||
|
||||
/*
|
||||
* Compute the variation in TSC rate which is acceptable
|
||||
* within the range of tolerance and decide if the
|
||||
* rate being applied is within that bounds of the hardware
|
||||
* rate. If so, no scaling or compensation need be done.
|
||||
*/
|
||||
thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
|
||||
thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
|
||||
if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
|
||||
pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
|
||||
use_scaling = 1;
|
||||
}
|
||||
kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
|
||||
}
|
||||
|
||||
static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
|
||||
{
|
||||
u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
|
||||
vcpu->arch.tsc_catchup_mult,
|
||||
vcpu->arch.tsc_catchup_shift);
|
||||
tsc += vcpu->arch.last_tsc_write;
|
||||
u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
|
||||
vcpu->arch.virtual_tsc_mult,
|
||||
vcpu->arch.virtual_tsc_shift);
|
||||
tsc += vcpu->arch.this_tsc_write;
|
||||
return tsc;
|
||||
}
|
||||
|
||||
@ -1021,48 +1026,88 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
u64 offset, ns, elapsed;
|
||||
unsigned long flags;
|
||||
s64 sdiff;
|
||||
s64 usdiff;
|
||||
|
||||
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
|
||||
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
|
||||
ns = get_kernel_ns();
|
||||
elapsed = ns - kvm->arch.last_tsc_nsec;
|
||||
sdiff = data - kvm->arch.last_tsc_write;
|
||||
if (sdiff < 0)
|
||||
sdiff = -sdiff;
|
||||
|
||||
/* n.b - signed multiplication and division required */
|
||||
usdiff = data - kvm->arch.last_tsc_write;
|
||||
#ifdef CONFIG_X86_64
|
||||
usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
|
||||
#else
|
||||
/* do_div() only does unsigned */
|
||||
asm("idivl %2; xor %%edx, %%edx"
|
||||
: "=A"(usdiff)
|
||||
: "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
|
||||
#endif
|
||||
do_div(elapsed, 1000);
|
||||
usdiff -= elapsed;
|
||||
if (usdiff < 0)
|
||||
usdiff = -usdiff;
|
||||
|
||||
/*
|
||||
* Special case: close write to TSC within 5 seconds of
|
||||
* another CPU is interpreted as an attempt to synchronize
|
||||
* The 5 seconds is to accommodate host load / swapping as
|
||||
* well as any reset of TSC during the boot process.
|
||||
*
|
||||
* In that case, for a reliable TSC, we can match TSC offsets,
|
||||
* or make a best guest using elapsed value.
|
||||
*/
|
||||
if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
|
||||
elapsed < 5ULL * NSEC_PER_SEC) {
|
||||
* Special case: TSC write with a small delta (1 second) of virtual
|
||||
* cycle time against real time is interpreted as an attempt to
|
||||
* synchronize the CPU.
|
||||
*
|
||||
* For a reliable TSC, we can match TSC offsets, and for an unstable
|
||||
* TSC, we add elapsed time in this computation. We could let the
|
||||
* compensation code attempt to catch up if we fall behind, but
|
||||
* it's better to try to match offsets from the beginning.
|
||||
*/
|
||||
if (usdiff < USEC_PER_SEC &&
|
||||
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
|
||||
if (!check_tsc_unstable()) {
|
||||
offset = kvm->arch.last_tsc_offset;
|
||||
offset = kvm->arch.cur_tsc_offset;
|
||||
pr_debug("kvm: matched tsc offset for %llu\n", data);
|
||||
} else {
|
||||
u64 delta = nsec_to_cycles(vcpu, elapsed);
|
||||
offset += delta;
|
||||
data += delta;
|
||||
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
|
||||
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
|
||||
}
|
||||
ns = kvm->arch.last_tsc_nsec;
|
||||
} else {
|
||||
/*
|
||||
* We split periods of matched TSC writes into generations.
|
||||
* For each generation, we track the original measured
|
||||
* nanosecond time, offset, and write, so if TSCs are in
|
||||
* sync, we can match exact offset, and if not, we can match
|
||||
* exact software computaion in compute_guest_tsc()
|
||||
*
|
||||
* These values are tracked in kvm->arch.cur_xxx variables.
|
||||
*/
|
||||
kvm->arch.cur_tsc_generation++;
|
||||
kvm->arch.cur_tsc_nsec = ns;
|
||||
kvm->arch.cur_tsc_write = data;
|
||||
kvm->arch.cur_tsc_offset = offset;
|
||||
pr_debug("kvm: new tsc generation %u, clock %llu\n",
|
||||
kvm->arch.cur_tsc_generation, data);
|
||||
}
|
||||
|
||||
/*
|
||||
* We also track th most recent recorded KHZ, write and time to
|
||||
* allow the matching interval to be extended at each write.
|
||||
*/
|
||||
kvm->arch.last_tsc_nsec = ns;
|
||||
kvm->arch.last_tsc_write = data;
|
||||
kvm->arch.last_tsc_offset = offset;
|
||||
kvm_x86_ops->write_tsc_offset(vcpu, offset);
|
||||
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
|
||||
kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
|
||||
|
||||
/* Reset of TSC must disable overshoot protection below */
|
||||
vcpu->arch.hv_clock.tsc_timestamp = 0;
|
||||
vcpu->arch.last_tsc_write = data;
|
||||
vcpu->arch.last_tsc_nsec = ns;
|
||||
vcpu->arch.last_guest_tsc = data;
|
||||
|
||||
/* Keep track of which generation this VCPU has synchronized to */
|
||||
vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
|
||||
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
|
||||
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
|
||||
|
||||
kvm_x86_ops->write_tsc_offset(vcpu, offset);
|
||||
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(kvm_write_tsc);
|
||||
|
||||
static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
@ -1078,7 +1123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
local_irq_save(flags);
|
||||
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
|
||||
kernel_ns = get_kernel_ns();
|
||||
this_tsc_khz = vcpu_tsc_khz(v);
|
||||
this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
|
||||
if (unlikely(this_tsc_khz == 0)) {
|
||||
local_irq_restore(flags);
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
|
||||
@ -1098,7 +1143,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
if (vcpu->tsc_catchup) {
|
||||
u64 tsc = compute_guest_tsc(v, kernel_ns);
|
||||
if (tsc > tsc_timestamp) {
|
||||
kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
|
||||
adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
|
||||
tsc_timestamp = tsc;
|
||||
}
|
||||
}
|
||||
@ -1130,7 +1175,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
* observed by the guest and ensure the new system time is greater.
|
||||
*/
|
||||
max_kernel_ns = 0;
|
||||
if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) {
|
||||
if (vcpu->hv_clock.tsc_timestamp) {
|
||||
max_kernel_ns = vcpu->last_guest_tsc -
|
||||
vcpu->hv_clock.tsc_timestamp;
|
||||
max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
|
||||
@ -1504,6 +1549,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
case MSR_K7_HWCR:
|
||||
data &= ~(u64)0x40; /* ignore flush filter disable */
|
||||
data &= ~(u64)0x100; /* ignore ignne emulation enable */
|
||||
data &= ~(u64)0x8; /* ignore TLB cache disable */
|
||||
if (data != 0) {
|
||||
pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
|
||||
data);
|
||||
@ -1676,6 +1722,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
*/
|
||||
pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
|
||||
break;
|
||||
case MSR_AMD64_OSVW_ID_LENGTH:
|
||||
if (!guest_cpuid_has_osvw(vcpu))
|
||||
return 1;
|
||||
vcpu->arch.osvw.length = data;
|
||||
break;
|
||||
case MSR_AMD64_OSVW_STATUS:
|
||||
if (!guest_cpuid_has_osvw(vcpu))
|
||||
return 1;
|
||||
vcpu->arch.osvw.status = data;
|
||||
break;
|
||||
default:
|
||||
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
|
||||
return xen_hvm_config(vcpu, data);
|
||||
@ -1960,6 +2016,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
*/
|
||||
data = 0xbe702111;
|
||||
break;
|
||||
case MSR_AMD64_OSVW_ID_LENGTH:
|
||||
if (!guest_cpuid_has_osvw(vcpu))
|
||||
return 1;
|
||||
data = vcpu->arch.osvw.length;
|
||||
break;
|
||||
case MSR_AMD64_OSVW_STATUS:
|
||||
if (!guest_cpuid_has_osvw(vcpu))
|
||||
return 1;
|
||||
data = vcpu->arch.osvw.status;
|
||||
break;
|
||||
default:
|
||||
if (kvm_pmu_msr(vcpu, msr))
|
||||
return kvm_pmu_get_msr(vcpu, msr, pdata);
|
||||
@ -2080,6 +2146,7 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
case KVM_CAP_XSAVE:
|
||||
case KVM_CAP_ASYNC_PF:
|
||||
case KVM_CAP_GET_TSC_KHZ:
|
||||
case KVM_CAP_PCI_2_3:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_COALESCED_MMIO:
|
||||
@ -2214,19 +2281,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
}
|
||||
|
||||
kvm_x86_ops->vcpu_load(vcpu, cpu);
|
||||
|
||||
/* Apply any externally detected TSC adjustments (due to suspend) */
|
||||
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
|
||||
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
|
||||
vcpu->arch.tsc_offset_adjustment = 0;
|
||||
set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
|
||||
}
|
||||
|
||||
if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
|
||||
/* Make sure TSC doesn't go backwards */
|
||||
s64 tsc_delta;
|
||||
u64 tsc;
|
||||
|
||||
tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
|
||||
tsc - vcpu->arch.last_guest_tsc;
|
||||
|
||||
s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
|
||||
native_read_tsc() - vcpu->arch.last_host_tsc;
|
||||
if (tsc_delta < 0)
|
||||
mark_tsc_unstable("KVM discovered backwards TSC");
|
||||
if (check_tsc_unstable()) {
|
||||
kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
|
||||
u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
|
||||
vcpu->arch.last_guest_tsc);
|
||||
kvm_x86_ops->write_tsc_offset(vcpu, offset);
|
||||
vcpu->arch.tsc_catchup = 1;
|
||||
}
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
@ -2243,7 +2314,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_x86_ops->vcpu_put(vcpu);
|
||||
kvm_put_guest_fpu(vcpu);
|
||||
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
vcpu->arch.last_host_tsc = native_read_tsc();
|
||||
}
|
||||
|
||||
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
|
||||
@ -2785,26 +2856,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
u32 user_tsc_khz;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!kvm_has_tsc_control)
|
||||
break;
|
||||
|
||||
user_tsc_khz = (u32)arg;
|
||||
|
||||
if (user_tsc_khz >= kvm_max_guest_tsc_khz)
|
||||
goto out;
|
||||
|
||||
kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
|
||||
if (user_tsc_khz == 0)
|
||||
user_tsc_khz = tsc_khz;
|
||||
|
||||
kvm_set_tsc_khz(vcpu, user_tsc_khz);
|
||||
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
case KVM_GET_TSC_KHZ: {
|
||||
r = -EIO;
|
||||
if (check_tsc_unstable())
|
||||
goto out;
|
||||
|
||||
r = vcpu_tsc_khz(vcpu);
|
||||
|
||||
r = vcpu->arch.virtual_tsc_khz;
|
||||
goto out;
|
||||
}
|
||||
default:
|
||||
@ -2815,6 +2881,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
|
||||
{
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
|
||||
{
|
||||
int ret;
|
||||
@ -2998,6 +3069,8 @@ static void write_protect_slot(struct kvm *kvm,
|
||||
unsigned long *dirty_bitmap,
|
||||
unsigned long nr_dirty_pages)
|
||||
{
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
/* Not many dirty pages compared to # of shadow pages. */
|
||||
if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
|
||||
unsigned long gfn_offset;
|
||||
@ -3005,16 +3078,13 @@ static void write_protect_slot(struct kvm *kvm,
|
||||
for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
|
||||
unsigned long gfn = memslot->base_gfn + gfn_offset;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
} else {
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
} else
|
||||
kvm_mmu_slot_remove_write_access(kvm, memslot->id);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3133,6 +3203,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = -EEXIST;
|
||||
if (kvm->arch.vpic)
|
||||
goto create_irqchip_unlock;
|
||||
r = -EINVAL;
|
||||
if (atomic_read(&kvm->online_vcpus))
|
||||
goto create_irqchip_unlock;
|
||||
r = -ENOMEM;
|
||||
vpic = kvm_create_pic(kvm);
|
||||
if (vpic) {
|
||||
@ -4063,6 +4136,11 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
|
||||
return res;
|
||||
}
|
||||
|
||||
static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
|
||||
{
|
||||
kvm_set_rflags(emul_to_vcpu(ctxt), val);
|
||||
}
|
||||
|
||||
static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
|
||||
@ -4244,6 +4322,7 @@ static struct x86_emulate_ops emulate_ops = {
|
||||
.set_idt = emulator_set_idt,
|
||||
.get_cr = emulator_get_cr,
|
||||
.set_cr = emulator_set_cr,
|
||||
.set_rflags = emulator_set_rflags,
|
||||
.cpl = emulator_get_cpl,
|
||||
.get_dr = emulator_get_dr,
|
||||
.set_dr = emulator_set_dr,
|
||||
@ -5288,6 +5367,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
profile_hit(KVM_PROFILING, (void *)rip);
|
||||
}
|
||||
|
||||
if (unlikely(vcpu->arch.tsc_always_catchup))
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
|
||||
kvm_lapic_sync_from_vapic(vcpu);
|
||||
|
||||
@ -5587,15 +5668,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
|
||||
bool has_error_code, u32 error_code)
|
||||
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
|
||||
int reason, bool has_error_code, u32 error_code)
|
||||
{
|
||||
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
|
||||
int ret;
|
||||
|
||||
init_emulate_ctxt(vcpu);
|
||||
|
||||
ret = emulator_task_switch(ctxt, tss_selector, reason,
|
||||
ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
|
||||
has_error_code, error_code);
|
||||
|
||||
if (ret)
|
||||
@ -5928,13 +6009,88 @@ int kvm_arch_hardware_enable(void *garbage)
|
||||
struct kvm *kvm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
int ret;
|
||||
u64 local_tsc;
|
||||
u64 max_tsc = 0;
|
||||
bool stable, backwards_tsc = false;
|
||||
|
||||
kvm_shared_msr_cpu_online();
|
||||
list_for_each_entry(kvm, &vm_list, vm_list)
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
if (vcpu->cpu == smp_processor_id())
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
return kvm_x86_ops->hardware_enable(garbage);
|
||||
ret = kvm_x86_ops->hardware_enable(garbage);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
local_tsc = native_read_tsc();
|
||||
stable = !check_tsc_unstable();
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (!stable && vcpu->cpu == smp_processor_id())
|
||||
set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
|
||||
if (stable && vcpu->arch.last_host_tsc > local_tsc) {
|
||||
backwards_tsc = true;
|
||||
if (vcpu->arch.last_host_tsc > max_tsc)
|
||||
max_tsc = vcpu->arch.last_host_tsc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Sometimes, even reliable TSCs go backwards. This happens on
|
||||
* platforms that reset TSC during suspend or hibernate actions, but
|
||||
* maintain synchronization. We must compensate. Fortunately, we can
|
||||
* detect that condition here, which happens early in CPU bringup,
|
||||
* before any KVM threads can be running. Unfortunately, we can't
|
||||
* bring the TSCs fully up to date with real time, as we aren't yet far
|
||||
* enough into CPU bringup that we know how much real time has actually
|
||||
* elapsed; our helper function, get_kernel_ns() will be using boot
|
||||
* variables that haven't been updated yet.
|
||||
*
|
||||
* So we simply find the maximum observed TSC above, then record the
|
||||
* adjustment to TSC in each VCPU. When the VCPU later gets loaded,
|
||||
* the adjustment will be applied. Note that we accumulate
|
||||
* adjustments, in case multiple suspend cycles happen before some VCPU
|
||||
* gets a chance to run again. In the event that no KVM threads get a
|
||||
* chance to run, we will miss the entire elapsed period, as we'll have
|
||||
* reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
|
||||
* loose cycle time. This isn't too big a deal, since the loss will be
|
||||
* uniform across all VCPUs (not to mention the scenario is extremely
|
||||
* unlikely). It is possible that a second hibernate recovery happens
|
||||
* much faster than a first, causing the observed TSC here to be
|
||||
* smaller; this would require additional padding adjustment, which is
|
||||
* why we set last_host_tsc to the local tsc observed here.
|
||||
*
|
||||
* N.B. - this code below runs only on platforms with reliable TSC,
|
||||
* as that is the only way backwards_tsc is set above. Also note
|
||||
* that this runs for ALL vcpus, which is not a bug; all VCPUs should
|
||||
* have the same delta_cyc adjustment applied if backwards_tsc
|
||||
* is detected. Note further, this adjustment is only done once,
|
||||
* as we reset last_host_tsc on all VCPUs to stop this from being
|
||||
* called multiple times (one for each physical CPU bringup).
|
||||
*
|
||||
* Platforms with unnreliable TSCs don't have to deal with this, they
|
||||
* will be compensated by the logic in vcpu_load, which sets the TSC to
|
||||
* catchup mode. This will catchup all VCPUs to real time, but cannot
|
||||
* guarantee that they stay in perfect synchronization.
|
||||
*/
|
||||
if (backwards_tsc) {
|
||||
u64 delta_cyc = max_tsc - local_tsc;
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
vcpu->arch.tsc_offset_adjustment += delta_cyc;
|
||||
vcpu->arch.last_host_tsc = local_tsc;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have to disable TSC offset matching.. if you were
|
||||
* booting a VM while issuing an S4 host suspend....
|
||||
* you may have some problem. Solving this issue is
|
||||
* left as an exercise to the reader.
|
||||
*/
|
||||
kvm->arch.last_tsc_nsec = 0;
|
||||
kvm->arch.last_tsc_write = 0;
|
||||
}
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void *garbage)
|
||||
@ -5958,6 +6114,11 @@ void kvm_arch_check_processor_compat(void *rtn)
|
||||
kvm_x86_ops->check_processor_compatibility(rtn);
|
||||
}
|
||||
|
||||
bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct page *page;
|
||||
@ -5980,7 +6141,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
vcpu->arch.pio_data = page_address(page);
|
||||
|
||||
kvm_init_tsc_catchup(vcpu, max_tsc_khz);
|
||||
kvm_set_tsc_khz(vcpu, max_tsc_khz);
|
||||
|
||||
r = kvm_mmu_create(vcpu);
|
||||
if (r < 0)
|
||||
@ -6032,8 +6193,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
||||
free_page((unsigned long)vcpu->arch.pio_data);
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm)
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
if (type)
|
||||
return -EINVAL;
|
||||
|
||||
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
||||
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
|
||||
|
||||
@ -6093,6 +6257,65 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
put_page(kvm->arch.ept_identity_pagetable);
|
||||
}
|
||||
|
||||
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
||||
if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
|
||||
vfree(free->arch.lpage_info[i]);
|
||||
free->arch.lpage_info[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
||||
unsigned long ugfn;
|
||||
int lpages;
|
||||
int level = i + 2;
|
||||
|
||||
lpages = gfn_to_index(slot->base_gfn + npages - 1,
|
||||
slot->base_gfn, level) + 1;
|
||||
|
||||
slot->arch.lpage_info[i] =
|
||||
vzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
|
||||
if (!slot->arch.lpage_info[i])
|
||||
goto out_free;
|
||||
|
||||
if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
|
||||
slot->arch.lpage_info[i][0].write_count = 1;
|
||||
if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
|
||||
slot->arch.lpage_info[i][lpages - 1].write_count = 1;
|
||||
ugfn = slot->userspace_addr >> PAGE_SHIFT;
|
||||
/*
|
||||
* If the gfn and userspace address are not aligned wrt each
|
||||
* other, or if explicitly asked to, disable large page
|
||||
* support for this slot
|
||||
*/
|
||||
if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
|
||||
!kvm_largepages_enabled()) {
|
||||
unsigned long j;
|
||||
|
||||
for (j = 0; j < lpages; ++j)
|
||||
slot->arch.lpage_info[i][j].write_count = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_free:
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
||||
vfree(slot->arch.lpage_info[i]);
|
||||
slot->arch.lpage_info[i] = NULL;
|
||||
}
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_memory_slot old,
|
||||
|
@ -115,7 +115,7 @@ static void __save_processor_state(struct saved_context *ctxt)
|
||||
void save_processor_state(void)
|
||||
{
|
||||
__save_processor_state(&saved_context);
|
||||
save_sched_clock_state();
|
||||
x86_platform.save_sched_clock_state();
|
||||
}
|
||||
#ifdef CONFIG_X86_32
|
||||
EXPORT_SYMBOL(save_processor_state);
|
||||
@ -231,8 +231,8 @@ static void __restore_processor_state(struct saved_context *ctxt)
|
||||
/* Needed by apm.c */
|
||||
void restore_processor_state(void)
|
||||
{
|
||||
x86_platform.restore_sched_clock_state();
|
||||
__restore_processor_state(&saved_context);
|
||||
restore_sched_clock_state();
|
||||
}
|
||||
#ifdef CONFIG_X86_32
|
||||
EXPORT_SYMBOL(restore_processor_state);
|
||||
|
@ -162,6 +162,7 @@ struct kvm_pit_config {
|
||||
#define KVM_EXIT_INTERNAL_ERROR 17
|
||||
#define KVM_EXIT_OSI 18
|
||||
#define KVM_EXIT_PAPR_HCALL 19
|
||||
#define KVM_EXIT_S390_UCONTROL 20
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
#define KVM_INTERNAL_ERROR_EMULATION 1
|
||||
@ -249,6 +250,11 @@ struct kvm_run {
|
||||
#define KVM_S390_RESET_CPU_INIT 8
|
||||
#define KVM_S390_RESET_IPL 16
|
||||
__u64 s390_reset_flags;
|
||||
/* KVM_EXIT_S390_UCONTROL */
|
||||
struct {
|
||||
__u64 trans_exc_code;
|
||||
__u32 pgm_code;
|
||||
} s390_ucontrol;
|
||||
/* KVM_EXIT_DCR */
|
||||
struct {
|
||||
__u32 dcrn;
|
||||
@ -273,6 +279,20 @@ struct kvm_run {
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
|
||||
/*
|
||||
* shared registers between kvm and userspace.
|
||||
* kvm_valid_regs specifies the register classes set by the host
|
||||
* kvm_dirty_regs specified the register classes dirtied by userspace
|
||||
* struct kvm_sync_regs is architecture specific, as well as the
|
||||
* bits for kvm_valid_regs and kvm_dirty_regs
|
||||
*/
|
||||
__u64 kvm_valid_regs;
|
||||
__u64 kvm_dirty_regs;
|
||||
union {
|
||||
struct kvm_sync_regs regs;
|
||||
char padding[1024];
|
||||
} s;
|
||||
};
|
||||
|
||||
/* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */
|
||||
@ -431,6 +451,11 @@ struct kvm_ppc_pvinfo {
|
||||
|
||||
#define KVMIO 0xAE
|
||||
|
||||
/* machine type bits, to be used as argument to KVM_CREATE_VM */
|
||||
#define KVM_VM_S390_UCONTROL 1
|
||||
|
||||
#define KVM_S390_SIE_PAGE_OFFSET 1
|
||||
|
||||
/*
|
||||
* ioctls for /dev/kvm fds:
|
||||
*/
|
||||
@ -555,9 +580,15 @@ struct kvm_ppc_pvinfo {
|
||||
#define KVM_CAP_PPC_SMT 64
|
||||
#define KVM_CAP_PPC_RMA 65
|
||||
#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */
|
||||
#define KVM_CAP_PPC_HIOR 67
|
||||
#define KVM_CAP_PPC_PAPR 68
|
||||
#define KVM_CAP_SW_TLB 69
|
||||
#define KVM_CAP_ONE_REG 70
|
||||
#define KVM_CAP_S390_GMAP 71
|
||||
#define KVM_CAP_TSC_DEADLINE_TIMER 72
|
||||
#define KVM_CAP_S390_UCONTROL 73
|
||||
#define KVM_CAP_SYNC_REGS 74
|
||||
#define KVM_CAP_PCI_2_3 75
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -637,6 +668,52 @@ struct kvm_clock_data {
|
||||
__u32 pad[9];
|
||||
};
|
||||
|
||||
#define KVM_MMU_FSL_BOOKE_NOHV 0
|
||||
#define KVM_MMU_FSL_BOOKE_HV 1
|
||||
|
||||
struct kvm_config_tlb {
|
||||
__u64 params;
|
||||
__u64 array;
|
||||
__u32 mmu_type;
|
||||
__u32 array_len;
|
||||
};
|
||||
|
||||
struct kvm_dirty_tlb {
|
||||
__u64 bitmap;
|
||||
__u32 num_dirty;
|
||||
};
|
||||
|
||||
/* Available with KVM_CAP_ONE_REG */
|
||||
|
||||
#define KVM_REG_ARCH_MASK 0xff00000000000000ULL
|
||||
#define KVM_REG_GENERIC 0x0000000000000000ULL
|
||||
|
||||
/*
|
||||
* Architecture specific registers are to be defined in arch headers and
|
||||
* ORed with the arch identifier.
|
||||
*/
|
||||
#define KVM_REG_PPC 0x1000000000000000ULL
|
||||
#define KVM_REG_X86 0x2000000000000000ULL
|
||||
#define KVM_REG_IA64 0x3000000000000000ULL
|
||||
#define KVM_REG_ARM 0x4000000000000000ULL
|
||||
#define KVM_REG_S390 0x5000000000000000ULL
|
||||
|
||||
#define KVM_REG_SIZE_SHIFT 52
|
||||
#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL
|
||||
#define KVM_REG_SIZE_U8 0x0000000000000000ULL
|
||||
#define KVM_REG_SIZE_U16 0x0010000000000000ULL
|
||||
#define KVM_REG_SIZE_U32 0x0020000000000000ULL
|
||||
#define KVM_REG_SIZE_U64 0x0030000000000000ULL
|
||||
#define KVM_REG_SIZE_U128 0x0040000000000000ULL
|
||||
#define KVM_REG_SIZE_U256 0x0050000000000000ULL
|
||||
#define KVM_REG_SIZE_U512 0x0060000000000000ULL
|
||||
#define KVM_REG_SIZE_U1024 0x0070000000000000ULL
|
||||
|
||||
struct kvm_one_reg {
|
||||
__u64 id;
|
||||
__u64 addr;
|
||||
};
|
||||
|
||||
/*
|
||||
* ioctls for VM fds
|
||||
*/
|
||||
@ -655,6 +732,17 @@ struct kvm_clock_data {
|
||||
struct kvm_userspace_memory_region)
|
||||
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
|
||||
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
|
||||
|
||||
/* enable ucontrol for s390 */
|
||||
struct kvm_s390_ucas_mapping {
|
||||
__u64 user_addr;
|
||||
__u64 vcpu_addr;
|
||||
__u64 length;
|
||||
};
|
||||
#define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping)
|
||||
#define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping)
|
||||
#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long)
|
||||
|
||||
/* Device model IOC */
|
||||
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
|
||||
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
|
||||
@ -697,6 +785,9 @@ struct kvm_clock_data {
|
||||
/* Available with KVM_CAP_TSC_CONTROL */
|
||||
#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2)
|
||||
#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3)
|
||||
/* Available with KVM_CAP_PCI_2_3 */
|
||||
#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
|
||||
/*
|
||||
* ioctls for vcpu fds
|
||||
@ -763,8 +854,15 @@ struct kvm_clock_data {
|
||||
#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
|
||||
/* Available with KVM_CAP_RMA */
|
||||
#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
|
||||
/* Available with KVM_CAP_SW_TLB */
|
||||
#define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb)
|
||||
/* Available with KVM_CAP_ONE_REG */
|
||||
#define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg)
|
||||
#define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg)
|
||||
|
||||
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
|
||||
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
|
||||
#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
|
||||
|
||||
struct kvm_assigned_pci_dev {
|
||||
__u32 assigned_dev_id;
|
||||
|
@ -172,11 +172,6 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
|
||||
|
||||
struct kvm_lpage_info {
|
||||
unsigned long rmap_pde;
|
||||
int write_count;
|
||||
};
|
||||
|
||||
struct kvm_memory_slot {
|
||||
gfn_t base_gfn;
|
||||
unsigned long npages;
|
||||
@ -185,7 +180,7 @@ struct kvm_memory_slot {
|
||||
unsigned long *dirty_bitmap;
|
||||
unsigned long *dirty_bitmap_head;
|
||||
unsigned long nr_dirty_pages;
|
||||
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
||||
struct kvm_arch_memory_slot arch;
|
||||
unsigned long userspace_addr;
|
||||
int user_alloc;
|
||||
int id;
|
||||
@ -377,6 +372,9 @@ int kvm_set_memory_region(struct kvm *kvm,
|
||||
int __kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc);
|
||||
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont);
|
||||
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_memory_slot old,
|
||||
@ -386,6 +384,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot old,
|
||||
int user_alloc);
|
||||
bool kvm_largepages_enabled(void);
|
||||
void kvm_disable_largepages(void);
|
||||
void kvm_arch_flush_shadow(struct kvm *kvm);
|
||||
|
||||
@ -451,6 +450,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg);
|
||||
long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg);
|
||||
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
|
||||
|
||||
int kvm_dev_ioctl_check_extension(long ext);
|
||||
|
||||
@ -521,7 +521,7 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
|
||||
}
|
||||
#endif
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm);
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm);
|
||||
void kvm_free_all_assigned_devices(struct kvm *kvm);
|
||||
void kvm_arch_sync_events(struct kvm *kvm);
|
||||
@ -547,6 +547,7 @@ struct kvm_assigned_dev_kernel {
|
||||
unsigned int entries_nr;
|
||||
int host_irq;
|
||||
bool host_irq_disabled;
|
||||
bool pci_2_3;
|
||||
struct msix_entry *host_msix_entries;
|
||||
int guest_irq;
|
||||
struct msix_entry *guest_msix_entries;
|
||||
@ -556,6 +557,7 @@ struct kvm_assigned_dev_kernel {
|
||||
struct pci_dev *dev;
|
||||
struct kvm *kvm;
|
||||
spinlock_t intx_lock;
|
||||
spinlock_t intx_mask_lock;
|
||||
char irq_name[32];
|
||||
struct pci_saved_state *pci_saved_state;
|
||||
};
|
||||
@ -651,11 +653,43 @@ static inline void kvm_guest_exit(void)
|
||||
current->flags &= ~PF_VCPU;
|
||||
}
|
||||
|
||||
/*
|
||||
* search_memslots() and __gfn_to_memslot() are here because they are
|
||||
* used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
|
||||
* gfn_to_memslot() itself isn't here as an inline because that would
|
||||
* bloat other code too much.
|
||||
*/
|
||||
static inline struct kvm_memory_slot *
|
||||
search_memslots(struct kvm_memslots *slots, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
kvm_for_each_memslot(memslot, slots)
|
||||
if (gfn >= memslot->base_gfn &&
|
||||
gfn < memslot->base_gfn + memslot->npages)
|
||||
return memslot;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct kvm_memory_slot *
|
||||
__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
|
||||
{
|
||||
return search_memslots(slots, gfn);
|
||||
}
|
||||
|
||||
static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return gfn_to_memslot(kvm, gfn)->id;
|
||||
}
|
||||
|
||||
static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
|
||||
{
|
||||
/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
|
||||
return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
|
||||
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
|
||||
}
|
||||
|
||||
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
|
||||
gfn_t gfn)
|
||||
{
|
||||
@ -702,12 +736,16 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
|
||||
if (unlikely(vcpu->kvm->mmu_notifier_count))
|
||||
return 1;
|
||||
/*
|
||||
* Both reads happen under the mmu_lock and both values are
|
||||
* modified under mmu_lock, so there's no need of smb_rmb()
|
||||
* here in between, otherwise mmu_notifier_count should be
|
||||
* read before mmu_notifier_seq, see
|
||||
* mmu_notifier_invalidate_range_end write side.
|
||||
* Ensure the read of mmu_notifier_count happens before the read
|
||||
* of mmu_notifier_seq. This interacts with the smp_wmb() in
|
||||
* mmu_notifier_invalidate_range_end to make sure that the caller
|
||||
* either sees the old (non-zero) value of mmu_notifier_count or
|
||||
* the new (incremented) value of mmu_notifier_seq.
|
||||
* PowerPC Book3s HV KVM calls this under a per-page lock
|
||||
* rather than under kvm->mmu_lock, for scalability, so
|
||||
* can't rely on kvm->mmu_lock to keep things ordered.
|
||||
*/
|
||||
smp_rmb();
|
||||
if (vcpu->kvm->mmu_notifier_seq != mmu_seq)
|
||||
return 1;
|
||||
return 0;
|
||||
@ -770,6 +808,13 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
|
||||
}
|
||||
|
||||
bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);
|
||||
|
||||
#else
|
||||
|
||||
static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
|
||||
|
@ -49,31 +49,73 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
if (index < 0) {
|
||||
if (index < 0)
|
||||
printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id)
|
||||
static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
int ret;
|
||||
|
||||
spin_lock(&assigned_dev->intx_lock);
|
||||
if (pci_check_and_mask_intx(assigned_dev->dev)) {
|
||||
assigned_dev->host_irq_disabled = true;
|
||||
ret = IRQ_WAKE_THREAD;
|
||||
} else
|
||||
ret = IRQ_NONE;
|
||||
spin_unlock(&assigned_dev->intx_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
|
||||
int vector)
|
||||
{
|
||||
if (unlikely(assigned_dev->irq_requested_type &
|
||||
KVM_DEV_IRQ_GUEST_INTX)) {
|
||||
spin_lock(&assigned_dev->intx_mask_lock);
|
||||
if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
|
||||
kvm_set_irq(assigned_dev->kvm,
|
||||
assigned_dev->irq_source_id, vector, 1);
|
||||
spin_unlock(&assigned_dev->intx_mask_lock);
|
||||
} else
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
vector, 1);
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) {
|
||||
spin_lock(&assigned_dev->intx_lock);
|
||||
if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
|
||||
spin_lock_irq(&assigned_dev->intx_lock);
|
||||
disable_irq_nosync(irq);
|
||||
assigned_dev->host_irq_disabled = true;
|
||||
spin_unlock(&assigned_dev->intx_lock);
|
||||
spin_unlock_irq(&assigned_dev->intx_lock);
|
||||
}
|
||||
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
assigned_dev->guest_irq, 1);
|
||||
kvm_assigned_dev_raise_guest_irq(assigned_dev,
|
||||
assigned_dev->guest_irq);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
|
||||
kvm_assigned_dev_raise_guest_irq(assigned_dev,
|
||||
assigned_dev->guest_irq);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
|
||||
{
|
||||
@ -83,8 +125,7 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
|
||||
|
||||
if (index >= 0) {
|
||||
vector = assigned_dev->guest_msix_entries[index].vector;
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
vector, 1);
|
||||
kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
@ -100,15 +141,31 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
|
||||
|
||||
kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
|
||||
|
||||
/* The guest irq may be shared so this ack may be
|
||||
* from another device.
|
||||
*/
|
||||
spin_lock(&dev->intx_lock);
|
||||
if (dev->host_irq_disabled) {
|
||||
enable_irq(dev->host_irq);
|
||||
dev->host_irq_disabled = false;
|
||||
spin_lock(&dev->intx_mask_lock);
|
||||
|
||||
if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
|
||||
bool reassert = false;
|
||||
|
||||
spin_lock_irq(&dev->intx_lock);
|
||||
/*
|
||||
* The guest IRQ may be shared so this ack can come from an
|
||||
* IRQ for another guest device.
|
||||
*/
|
||||
if (dev->host_irq_disabled) {
|
||||
if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
|
||||
enable_irq(dev->host_irq);
|
||||
else if (!pci_check_and_unmask_intx(dev->dev))
|
||||
reassert = true;
|
||||
dev->host_irq_disabled = reassert;
|
||||
}
|
||||
spin_unlock_irq(&dev->intx_lock);
|
||||
|
||||
if (reassert)
|
||||
kvm_set_irq(dev->kvm, dev->irq_source_id,
|
||||
dev->guest_irq, 1);
|
||||
}
|
||||
spin_unlock(&dev->intx_lock);
|
||||
|
||||
spin_unlock(&dev->intx_mask_lock);
|
||||
}
|
||||
|
||||
static void deassign_guest_irq(struct kvm *kvm,
|
||||
@ -156,7 +213,15 @@ static void deassign_host_irq(struct kvm *kvm,
|
||||
pci_disable_msix(assigned_dev->dev);
|
||||
} else {
|
||||
/* Deal with MSI and INTx */
|
||||
disable_irq(assigned_dev->host_irq);
|
||||
if ((assigned_dev->irq_requested_type &
|
||||
KVM_DEV_IRQ_HOST_INTX) &&
|
||||
(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
|
||||
spin_lock_irq(&assigned_dev->intx_lock);
|
||||
pci_intx(assigned_dev->dev, false);
|
||||
spin_unlock_irq(&assigned_dev->intx_lock);
|
||||
synchronize_irq(assigned_dev->host_irq);
|
||||
} else
|
||||
disable_irq(assigned_dev->host_irq);
|
||||
|
||||
free_irq(assigned_dev->host_irq, assigned_dev);
|
||||
|
||||
@ -237,15 +302,34 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
|
||||
static int assigned_device_enable_host_intx(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev)
|
||||
{
|
||||
irq_handler_t irq_handler;
|
||||
unsigned long flags;
|
||||
|
||||
dev->host_irq = dev->dev->irq;
|
||||
/* Even though this is PCI, we don't want to use shared
|
||||
* interrupts. Sharing host devices with guest-assigned devices
|
||||
* on the same interrupt line is not a happy situation: there
|
||||
* are going to be long delays in accepting, acking, etc.
|
||||
|
||||
/*
|
||||
* We can only share the IRQ line with other host devices if we are
|
||||
* able to disable the IRQ source at device-level - independently of
|
||||
* the guest driver. Otherwise host devices may suffer from unbounded
|
||||
* IRQ latencies when the guest keeps the line asserted.
|
||||
*/
|
||||
if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
|
||||
IRQF_ONESHOT, dev->irq_name, dev))
|
||||
if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
|
||||
irq_handler = kvm_assigned_dev_intx;
|
||||
flags = IRQF_SHARED;
|
||||
} else {
|
||||
irq_handler = NULL;
|
||||
flags = IRQF_ONESHOT;
|
||||
}
|
||||
if (request_threaded_irq(dev->host_irq, irq_handler,
|
||||
kvm_assigned_dev_thread_intx, flags,
|
||||
dev->irq_name, dev))
|
||||
return -EIO;
|
||||
|
||||
if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
|
||||
spin_lock_irq(&dev->intx_lock);
|
||||
pci_intx(dev->dev, true);
|
||||
spin_unlock_irq(&dev->intx_lock);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -262,8 +346,9 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
|
||||
}
|
||||
|
||||
dev->host_irq = dev->dev->irq;
|
||||
if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
|
||||
0, dev->irq_name, dev)) {
|
||||
if (request_threaded_irq(dev->host_irq, NULL,
|
||||
kvm_assigned_dev_thread_msi, 0,
|
||||
dev->irq_name, dev)) {
|
||||
pci_disable_msi(dev->dev);
|
||||
return -EIO;
|
||||
}
|
||||
@ -321,7 +406,6 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,
|
||||
{
|
||||
dev->guest_irq = irq->guest_irq;
|
||||
dev->ack_notifier.gsi = -1;
|
||||
dev->host_irq_disabled = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
@ -333,7 +417,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,
|
||||
{
|
||||
dev->guest_irq = irq->guest_irq;
|
||||
dev->ack_notifier.gsi = -1;
|
||||
dev->host_irq_disabled = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
@ -367,6 +450,7 @@ static int assign_host_irq(struct kvm *kvm,
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
dev->host_irq_disabled = false;
|
||||
|
||||
if (!r)
|
||||
dev->irq_requested_type |= host_irq_type;
|
||||
@ -468,6 +552,7 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
|
||||
{
|
||||
int r = -ENODEV;
|
||||
struct kvm_assigned_dev_kernel *match;
|
||||
unsigned long irq_type;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
@ -476,7 +561,9 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
|
||||
if (!match)
|
||||
goto out;
|
||||
|
||||
r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
|
||||
irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
|
||||
KVM_DEV_IRQ_GUEST_MASK);
|
||||
r = kvm_deassign_irq(kvm, match, irq_type);
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return r;
|
||||
@ -609,6 +696,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
|
||||
if (!match->pci_saved_state)
|
||||
printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
|
||||
__func__, dev_name(&dev->dev));
|
||||
|
||||
if (!pci_intx_mask_supported(dev))
|
||||
assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
|
||||
|
||||
match->assigned_dev_id = assigned_dev->assigned_dev_id;
|
||||
match->host_segnr = assigned_dev->segnr;
|
||||
match->host_busnr = assigned_dev->busnr;
|
||||
@ -616,6 +707,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
|
||||
match->flags = assigned_dev->flags;
|
||||
match->dev = dev;
|
||||
spin_lock_init(&match->intx_lock);
|
||||
spin_lock_init(&match->intx_mask_lock);
|
||||
match->irq_source_id = -1;
|
||||
match->kvm = kvm;
|
||||
match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
|
||||
@ -761,6 +853,55 @@ static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
|
||||
}
|
||||
#endif
|
||||
|
||||
static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
|
||||
struct kvm_assigned_pci_dev *assigned_dev)
|
||||
{
|
||||
int r = 0;
|
||||
struct kvm_assigned_dev_kernel *match;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
|
||||
assigned_dev->assigned_dev_id);
|
||||
if (!match) {
|
||||
r = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&match->intx_mask_lock);
|
||||
|
||||
match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
|
||||
match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
|
||||
|
||||
if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
|
||||
if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
|
||||
kvm_set_irq(match->kvm, match->irq_source_id,
|
||||
match->guest_irq, 0);
|
||||
/*
|
||||
* Masking at hardware-level is performed on demand,
|
||||
* i.e. when an IRQ actually arrives at the host.
|
||||
*/
|
||||
} else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
|
||||
/*
|
||||
* Unmask the IRQ line if required. Unmasking at
|
||||
* device level will be performed by user space.
|
||||
*/
|
||||
spin_lock_irq(&match->intx_lock);
|
||||
if (match->host_irq_disabled) {
|
||||
enable_irq(match->host_irq);
|
||||
match->host_irq_disabled = false;
|
||||
}
|
||||
spin_unlock_irq(&match->intx_lock);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&match->intx_mask_lock);
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
||||
unsigned long arg)
|
||||
{
|
||||
@ -868,6 +1009,15 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case KVM_ASSIGN_SET_INTX_MASK: {
|
||||
struct kvm_assigned_pci_dev assigned_dev;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
break;
|
||||
@ -875,4 +1025,3 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -203,7 +203,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
|
||||
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
int dirty_count = kvm->tlbs_dirty;
|
||||
long dirty_count = kvm->tlbs_dirty;
|
||||
|
||||
smp_mb();
|
||||
if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
|
||||
@ -289,15 +289,15 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
|
||||
*/
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
kvm->mmu_notifier_seq++;
|
||||
need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
/* we've to flush the tlb before the pages can be freed */
|
||||
if (need_tlb_flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
}
|
||||
|
||||
static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
|
||||
@ -335,12 +335,12 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
||||
for (; start < end; start += PAGE_SIZE)
|
||||
need_tlb_flush |= kvm_unmap_hva(kvm, start);
|
||||
need_tlb_flush |= kvm->tlbs_dirty;
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
/* we've to flush the tlb before the pages can be freed */
|
||||
if (need_tlb_flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
}
|
||||
|
||||
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
|
||||
@ -357,11 +357,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
|
||||
* been freed.
|
||||
*/
|
||||
kvm->mmu_notifier_seq++;
|
||||
smp_wmb();
|
||||
/*
|
||||
* The above sequence increase must be visible before the
|
||||
* below count decrease but both values are read by the kvm
|
||||
* page fault under mmu_lock spinlock so we don't need to add
|
||||
* a smb_wmb() here in between the two.
|
||||
* below count decrease, which is ensured by the smp_wmb above
|
||||
* in conjunction with the smp_rmb in mmu_notifier_retry().
|
||||
*/
|
||||
kvm->mmu_notifier_count--;
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
@ -378,13 +378,14 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
young = kvm_age_hva(kvm, address);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
young = kvm_age_hva(kvm, address);
|
||||
if (young)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
return young;
|
||||
}
|
||||
|
||||
@ -449,7 +450,7 @@ static void kvm_init_memslots_id(struct kvm *kvm)
|
||||
slots->id_to_index[i] = slots->memslots[i].id = i;
|
||||
}
|
||||
|
||||
static struct kvm *kvm_create_vm(void)
|
||||
static struct kvm *kvm_create_vm(unsigned long type)
|
||||
{
|
||||
int r, i;
|
||||
struct kvm *kvm = kvm_arch_alloc_vm();
|
||||
@ -457,7 +458,7 @@ static struct kvm *kvm_create_vm(void)
|
||||
if (!kvm)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
r = kvm_arch_init_vm(kvm);
|
||||
r = kvm_arch_init_vm(kvm, type);
|
||||
if (r)
|
||||
goto out_err_nodisable;
|
||||
|
||||
@ -535,21 +536,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!dont || free->rmap != dont->rmap)
|
||||
vfree(free->rmap);
|
||||
|
||||
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
|
||||
kvm_destroy_dirty_bitmap(free);
|
||||
|
||||
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
||||
if (!dont || free->lpage_info[i] != dont->lpage_info[i]) {
|
||||
vfree(free->lpage_info[i]);
|
||||
free->lpage_info[i] = NULL;
|
||||
}
|
||||
}
|
||||
kvm_arch_free_memslot(free, dont);
|
||||
|
||||
free->npages = 0;
|
||||
free->rmap = NULL;
|
||||
@ -616,7 +609,6 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_S390
|
||||
/*
|
||||
* Allocation size is twice as large as the actual dirty bitmap size.
|
||||
* This makes it possible to do double buffering: see x86's
|
||||
@ -624,6 +616,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
|
||||
*/
|
||||
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
{
|
||||
#ifndef CONFIG_S390
|
||||
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
|
||||
|
||||
if (dirty_bytes > PAGE_SIZE)
|
||||
@ -636,21 +629,8 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
|
||||
memslot->dirty_bitmap_head = memslot->dirty_bitmap;
|
||||
memslot->nr_dirty_pages = 0;
|
||||
return 0;
|
||||
}
|
||||
#endif /* !CONFIG_S390 */
|
||||
|
||||
static struct kvm_memory_slot *
|
||||
search_memslots(struct kvm_memslots *slots, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
kvm_for_each_memslot(memslot, slots)
|
||||
if (gfn >= memslot->base_gfn &&
|
||||
gfn < memslot->base_gfn + memslot->npages)
|
||||
return memslot;
|
||||
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cmp_memslot(const void *slot1, const void *slot2)
|
||||
@ -778,69 +758,24 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
r = -ENOMEM;
|
||||
|
||||
/* Allocate if a slot is being created */
|
||||
#ifndef CONFIG_S390
|
||||
if (npages && !new.rmap) {
|
||||
new.rmap = vzalloc(npages * sizeof(*new.rmap));
|
||||
|
||||
if (!new.rmap)
|
||||
goto out_free;
|
||||
|
||||
if (npages && !old.npages) {
|
||||
new.user_alloc = user_alloc;
|
||||
new.userspace_addr = mem->userspace_addr;
|
||||
}
|
||||
if (!npages)
|
||||
goto skip_lpage;
|
||||
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
||||
unsigned long ugfn;
|
||||
unsigned long j;
|
||||
int lpages;
|
||||
int level = i + 2;
|
||||
|
||||
/* Avoid unused variable warning if no large pages */
|
||||
(void)level;
|
||||
|
||||
if (new.lpage_info[i])
|
||||
continue;
|
||||
|
||||
lpages = 1 + ((base_gfn + npages - 1)
|
||||
>> KVM_HPAGE_GFN_SHIFT(level));
|
||||
lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level);
|
||||
|
||||
new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i]));
|
||||
|
||||
if (!new.lpage_info[i])
|
||||
#ifndef CONFIG_S390
|
||||
new.rmap = vzalloc(npages * sizeof(*new.rmap));
|
||||
if (!new.rmap)
|
||||
goto out_free;
|
||||
#endif /* not defined CONFIG_S390 */
|
||||
if (kvm_arch_create_memslot(&new, npages))
|
||||
goto out_free;
|
||||
|
||||
if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
|
||||
new.lpage_info[i][0].write_count = 1;
|
||||
if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
|
||||
new.lpage_info[i][lpages - 1].write_count = 1;
|
||||
ugfn = new.userspace_addr >> PAGE_SHIFT;
|
||||
/*
|
||||
* If the gfn and userspace address are not aligned wrt each
|
||||
* other, or if explicitly asked to, disable large page
|
||||
* support for this slot
|
||||
*/
|
||||
if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
|
||||
!largepages_enabled)
|
||||
for (j = 0; j < lpages; ++j)
|
||||
new.lpage_info[i][j].write_count = 1;
|
||||
}
|
||||
|
||||
skip_lpage:
|
||||
|
||||
/* Allocate page dirty bitmap if needed */
|
||||
if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
|
||||
if (kvm_create_dirty_bitmap(&new) < 0)
|
||||
goto out_free;
|
||||
/* destroy any largepage mappings for dirty tracking */
|
||||
}
|
||||
#else /* not defined CONFIG_S390 */
|
||||
new.user_alloc = user_alloc;
|
||||
if (user_alloc)
|
||||
new.userspace_addr = mem->userspace_addr;
|
||||
#endif /* not defined CONFIG_S390 */
|
||||
|
||||
if (!npages) {
|
||||
struct kvm_memory_slot *slot;
|
||||
@ -890,8 +825,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
if (!npages) {
|
||||
new.rmap = NULL;
|
||||
new.dirty_bitmap = NULL;
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
|
||||
new.lpage_info[i] = NULL;
|
||||
memset(&new.arch, 0, sizeof(new.arch));
|
||||
}
|
||||
|
||||
update_memslots(slots, &new);
|
||||
@ -978,6 +912,11 @@ int kvm_get_dirty_log(struct kvm *kvm,
|
||||
return r;
|
||||
}
|
||||
|
||||
bool kvm_largepages_enabled(void)
|
||||
{
|
||||
return largepages_enabled;
|
||||
}
|
||||
|
||||
void kvm_disable_largepages(void)
|
||||
{
|
||||
largepages_enabled = false;
|
||||
@ -1031,12 +970,6 @@ int kvm_is_error_hva(unsigned long addr)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
|
||||
|
||||
static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots,
|
||||
gfn_t gfn)
|
||||
{
|
||||
return search_memslots(slots, gfn);
|
||||
}
|
||||
|
||||
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||
@ -1459,7 +1392,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
|
||||
ghc->gpa = gpa;
|
||||
ghc->generation = slots->generation;
|
||||
ghc->memslot = __gfn_to_memslot(slots, gfn);
|
||||
ghc->memslot = gfn_to_memslot(kvm, gfn);
|
||||
ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL);
|
||||
if (!kvm_is_error_hva(ghc->hva))
|
||||
ghc->hva += offset;
|
||||
@ -1657,7 +1590,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
|
||||
#endif
|
||||
else
|
||||
return VM_FAULT_SIGBUS;
|
||||
return kvm_arch_vcpu_fault(vcpu, vmf);
|
||||
get_page(page);
|
||||
vmf->page = page;
|
||||
return 0;
|
||||
@ -1718,6 +1651,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
|
||||
goto vcpu_destroy;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
if (!kvm_vcpu_compatible(vcpu)) {
|
||||
r = -EINVAL;
|
||||
goto unlock_vcpu_destroy;
|
||||
}
|
||||
if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
|
||||
r = -EINVAL;
|
||||
goto unlock_vcpu_destroy;
|
||||
@ -2198,12 +2135,12 @@ static struct file_operations kvm_vm_fops = {
|
||||
.llseek = noop_llseek,
|
||||
};
|
||||
|
||||
static int kvm_dev_ioctl_create_vm(void)
|
||||
static int kvm_dev_ioctl_create_vm(unsigned long type)
|
||||
{
|
||||
int r;
|
||||
struct kvm *kvm;
|
||||
|
||||
kvm = kvm_create_vm();
|
||||
kvm = kvm_create_vm(type);
|
||||
if (IS_ERR(kvm))
|
||||
return PTR_ERR(kvm);
|
||||
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
|
||||
@ -2254,10 +2191,7 @@ static long kvm_dev_ioctl(struct file *filp,
|
||||
r = KVM_API_VERSION;
|
||||
break;
|
||||
case KVM_CREATE_VM:
|
||||
r = -EINVAL;
|
||||
if (arg)
|
||||
goto out;
|
||||
r = kvm_dev_ioctl_create_vm();
|
||||
r = kvm_dev_ioctl_create_vm(arg);
|
||||
break;
|
||||
case KVM_CHECK_EXTENSION:
|
||||
r = kvm_dev_ioctl_check_extension_generic(arg);
|
||||
|
Loading…
Reference in New Issue
Block a user