mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-28 16:56:26 +00:00
Generic:
- Use memdup_array_user() to harden against overflow. - Unconditionally advertise KVM_CAP_DEVICE_CTRL for all architectures. - Clean up Kconfigs that all KVM architectures were selecting - New functionality around "guest_memfd", a new userspace API that creates an anonymous file and returns a file descriptor that refers to it. guest_memfd files are bound to their owning virtual machine, cannot be mapped, read, or written by userspace, and cannot be resized. guest_memfd files do however support PUNCH_HOLE, which can be used to switch a memory area between guest_memfd and regular anonymous memory. - New ioctl KVM_SET_MEMORY_ATTRIBUTES allowing userspace to specify per-page attributes for a given page of guest memory; right now the only attribute is whether the guest expects to access memory via guest_memfd or not, which in Confidential SVMs backed by SEV-SNP, TDX or ARM64 pKVM is checked by firmware or hypervisor that guarantees confidentiality (AMD PSP, Intel TDX module, or EL2 in the case of pKVM). x86: - Support for "software-protected VMs" that can use the new guest_memfd and page attributes infrastructure. This is mostly useful for testing, since there is no pKVM-like infrastructure to provide a meaningfully reduced TCB. - Fix a relatively benign off-by-one error when splitting huge pages during CLEAR_DIRTY_LOG. - Fix a bug where KVM could incorrectly test-and-clear dirty bits in non-leaf TDP MMU SPTEs if a racing thread replaces a huge SPTE with a non-huge SPTE. - Use more generic lockdep assertions in paths that don't actually care about whether the caller is a reader or a writer. - let Xen guests opt out of having PV clock reported as "based on a stable TSC", because some of them don't expect the "TSC stable" bit (added to the pvclock ABI by KVM, but never set by Xen) to be set. - Revert a bogus, made-up nested SVM consistency check for TLB_CONTROL. - Advertise flush-by-ASID support for nSVM unconditionally, as KVM always flushes on nested transitions, i.e. always satisfies flush requests. This allows running bleeding edge versions of VMware Workstation on top of KVM. - Sanity check that the CPU supports flush-by-ASID when enabling SEV support. - On AMD machines with vNMI, always rely on hardware instead of intercepting IRET in some cases to detect unmasking of NMIs - Support for virtualizing Linear Address Masking (LAM) - Fix a variety of vPMU bugs where KVM fail to stop/reset counters and other state prior to refreshing the vPMU model. - Fix a double-overflow PMU bug by tracking emulated counter events using a dedicated field instead of snapshotting the "previous" counter. If the hardware PMC count triggers overflow that is recognized in the same VM-Exit that KVM manually bumps an event count, KVM would pend PMIs for both the hardware-triggered overflow and for KVM-triggered overflow. - Turn off KVM_WERROR by default for all configs so that it's not inadvertantly enabled by non-KVM developers, which can be problematic for subsystems that require no regressions for W=1 builds. - Advertise all of the host-supported CPUID bits that enumerate IA32_SPEC_CTRL "features". - Don't force a masterclock update when a vCPU synchronizes to the current TSC generation, as updating the masterclock can cause kvmclock's time to "jump" unexpectedly, e.g. when userspace hotplugs a pre-created vCPU. - Use RIP-relative address to read kvm_rebooting in the VM-Enter fault paths, partly as a super minor optimization, but mostly to make KVM play nice with position independent executable builds. - Guard KVM-on-HyperV's range-based TLB flush hooks with an #ifdef on CONFIG_HYPERV as a minor optimization, and to self-document the code. - Add CONFIG_KVM_HYPERV to allow disabling KVM support for HyperV "emulation" at build time. ARM64: - LPA2 support, adding 52bit IPA/PA capability for 4kB and 16kB base granule sizes. Branch shared with the arm64 tree. - Large Fine-Grained Trap rework, bringing some sanity to the feature, although there is more to come. This comes with a prefix branch shared with the arm64 tree. - Some additional Nested Virtualization groundwork, mostly introducing the NV2 VNCR support and retargetting the NV support to that version of the architecture. - A small set of vgic fixes and associated cleanups. Loongarch: - Optimization for memslot hugepage checking - Cleanup and fix some HW/SW timer issues - Add LSX/LASX (128bit/256bit SIMD) support RISC-V: - KVM_GET_REG_LIST improvement for vector registers - Generate ISA extension reg_list using macros in get-reg-list selftest - Support for reporting steal time along with selftest s390: - Bugfixes Selftests: - Fix an annoying goof where the NX hugepage test prints out garbage instead of the magic token needed to run the test. - Fix build errors when a header is delete/moved due to a missing flag in the Makefile. - Detect if KVM bugged/killed a selftest's VM and print out a helpful message instead of complaining that a random ioctl() failed. - Annotate the guest printf/assert helpers with __printf(), and fix the various bugs that were lurking due to lack of said annotation. There are two non-KVM patches buried in the middle of guest_memfd support: fs: Rename anon_inode_getfile_secure() and anon_inode_getfd_secure() mm: Add AS_UNMOVABLE to mark mapping as completely unmovable The first is small and mostly suggested-by Christian Brauner; the second a bit less so but it was written by an mm person (Vlastimil Babka). -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmWcMWkUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroO15gf/WLmmg3SET6Uzw9iEq2xo28831ZA+ 6kpILfIDGKozV5safDmMvcInlc/PTnqOFrsKyyN4kDZ+rIJiafJdg/loE0kPXBML wdR+2ix5kYI1FucCDaGTahskBDz8Lb/xTpwGg9BFLYFNmuUeHc74o6GoNvr1uliE 4kLZL2K6w0cSMPybUD+HqGaET80ZqPwecv+s1JL+Ia0kYZJONJifoHnvOUJ7DpEi rgudVdgzt3EPjG0y1z6MjvDBXTCOLDjXajErlYuZD3Ej8N8s59Dh2TxOiDNTLdP4 a4zjRvDmgyr6H6sz+upvwc7f4M4p+DBvf+TkWF54mbeObHUYliStqURIoA== =66Ws -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm updates from Paolo Bonzini: "Generic: - Use memdup_array_user() to harden against overflow. - Unconditionally advertise KVM_CAP_DEVICE_CTRL for all architectures. - Clean up Kconfigs that all KVM architectures were selecting - New functionality around "guest_memfd", a new userspace API that creates an anonymous file and returns a file descriptor that refers to it. guest_memfd files are bound to their owning virtual machine, cannot be mapped, read, or written by userspace, and cannot be resized. guest_memfd files do however support PUNCH_HOLE, which can be used to switch a memory area between guest_memfd and regular anonymous memory. - New ioctl KVM_SET_MEMORY_ATTRIBUTES allowing userspace to specify per-page attributes for a given page of guest memory; right now the only attribute is whether the guest expects to access memory via guest_memfd or not, which in Confidential SVMs backed by SEV-SNP, TDX or ARM64 pKVM is checked by firmware or hypervisor that guarantees confidentiality (AMD PSP, Intel TDX module, or EL2 in the case of pKVM). x86: - Support for "software-protected VMs" that can use the new guest_memfd and page attributes infrastructure. This is mostly useful for testing, since there is no pKVM-like infrastructure to provide a meaningfully reduced TCB. - Fix a relatively benign off-by-one error when splitting huge pages during CLEAR_DIRTY_LOG. - Fix a bug where KVM could incorrectly test-and-clear dirty bits in non-leaf TDP MMU SPTEs if a racing thread replaces a huge SPTE with a non-huge SPTE. - Use more generic lockdep assertions in paths that don't actually care about whether the caller is a reader or a writer. - let Xen guests opt out of having PV clock reported as "based on a stable TSC", because some of them don't expect the "TSC stable" bit (added to the pvclock ABI by KVM, but never set by Xen) to be set. - Revert a bogus, made-up nested SVM consistency check for TLB_CONTROL. - Advertise flush-by-ASID support for nSVM unconditionally, as KVM always flushes on nested transitions, i.e. always satisfies flush requests. This allows running bleeding edge versions of VMware Workstation on top of KVM. - Sanity check that the CPU supports flush-by-ASID when enabling SEV support. - On AMD machines with vNMI, always rely on hardware instead of intercepting IRET in some cases to detect unmasking of NMIs - Support for virtualizing Linear Address Masking (LAM) - Fix a variety of vPMU bugs where KVM fail to stop/reset counters and other state prior to refreshing the vPMU model. - Fix a double-overflow PMU bug by tracking emulated counter events using a dedicated field instead of snapshotting the "previous" counter. If the hardware PMC count triggers overflow that is recognized in the same VM-Exit that KVM manually bumps an event count, KVM would pend PMIs for both the hardware-triggered overflow and for KVM-triggered overflow. - Turn off KVM_WERROR by default for all configs so that it's not inadvertantly enabled by non-KVM developers, which can be problematic for subsystems that require no regressions for W=1 builds. - Advertise all of the host-supported CPUID bits that enumerate IA32_SPEC_CTRL "features". - Don't force a masterclock update when a vCPU synchronizes to the current TSC generation, as updating the masterclock can cause kvmclock's time to "jump" unexpectedly, e.g. when userspace hotplugs a pre-created vCPU. - Use RIP-relative address to read kvm_rebooting in the VM-Enter fault paths, partly as a super minor optimization, but mostly to make KVM play nice with position independent executable builds. - Guard KVM-on-HyperV's range-based TLB flush hooks with an #ifdef on CONFIG_HYPERV as a minor optimization, and to self-document the code. - Add CONFIG_KVM_HYPERV to allow disabling KVM support for HyperV "emulation" at build time. ARM64: - LPA2 support, adding 52bit IPA/PA capability for 4kB and 16kB base granule sizes. Branch shared with the arm64 tree. - Large Fine-Grained Trap rework, bringing some sanity to the feature, although there is more to come. This comes with a prefix branch shared with the arm64 tree. - Some additional Nested Virtualization groundwork, mostly introducing the NV2 VNCR support and retargetting the NV support to that version of the architecture. - A small set of vgic fixes and associated cleanups. Loongarch: - Optimization for memslot hugepage checking - Cleanup and fix some HW/SW timer issues - Add LSX/LASX (128bit/256bit SIMD) support RISC-V: - KVM_GET_REG_LIST improvement for vector registers - Generate ISA extension reg_list using macros in get-reg-list selftest - Support for reporting steal time along with selftest s390: - Bugfixes Selftests: - Fix an annoying goof where the NX hugepage test prints out garbage instead of the magic token needed to run the test. - Fix build errors when a header is delete/moved due to a missing flag in the Makefile. - Detect if KVM bugged/killed a selftest's VM and print out a helpful message instead of complaining that a random ioctl() failed. - Annotate the guest printf/assert helpers with __printf(), and fix the various bugs that were lurking due to lack of said annotation" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (185 commits) x86/kvm: Do not try to disable kvmclock if it was not enabled KVM: x86: add missing "depends on KVM" KVM: fix direction of dependency on MMU notifiers KVM: introduce CONFIG_KVM_COMMON KVM: arm64: Add missing memory barriers when switching to pKVM's hyp pgd KVM: arm64: vgic-its: Avoid potential UAF in LPI translation cache RISC-V: KVM: selftests: Add get-reg-list test for STA registers RISC-V: KVM: selftests: Add steal_time test support RISC-V: KVM: selftests: Add guest_sbi_probe_extension RISC-V: KVM: selftests: Move sbi_ecall to processor.c RISC-V: KVM: Implement SBI STA extension RISC-V: KVM: Add support for SBI STA registers RISC-V: KVM: Add support for SBI extension registers RISC-V: KVM: Add SBI STA info to vcpu_arch RISC-V: KVM: Add steal-update vcpu request RISC-V: KVM: Add SBI STA extension skeleton RISC-V: paravirt: Implement steal-time support RISC-V: Add SBI STA extension definitions RISC-V: paravirt: Add skeleton for pv-time support RISC-V: KVM: Fix indentation in kvm_riscv_vcpu_set_reg_csr() ...
This commit is contained in:
commit
09d1c6a80f
@ -3996,9 +3996,9 @@
|
||||
vulnerability. System may allow data leaks with this
|
||||
option.
|
||||
|
||||
no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES] Disable paravirtualized
|
||||
steal time accounting. steal time is computed, but
|
||||
won't influence scheduler behaviour
|
||||
no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES,RISCV] Disable
|
||||
paravirtualized steal time accounting. steal time is
|
||||
computed, but won't influence scheduler behaviour
|
||||
|
||||
nosync [HW,M68K] Disables sync negotiation for all devices.
|
||||
|
||||
|
@ -147,10 +147,29 @@ described as 'basic' will be available.
|
||||
The new VM has no virtual cpus and no memory.
|
||||
You probably want to use 0 as machine type.
|
||||
|
||||
X86:
|
||||
^^^^
|
||||
|
||||
Supported X86 VM types can be queried via KVM_CAP_VM_TYPES.
|
||||
|
||||
S390:
|
||||
^^^^^
|
||||
|
||||
In order to create user controlled virtual machines on S390, check
|
||||
KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
|
||||
privileged user (CAP_SYS_ADMIN).
|
||||
|
||||
MIPS:
|
||||
^^^^^
|
||||
|
||||
To use hardware assisted virtualization on MIPS (VZ ASE) rather than
|
||||
the default trap & emulate implementation (which changes the virtual
|
||||
memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
|
||||
flag KVM_VM_MIPS_VZ.
|
||||
|
||||
ARM64:
|
||||
^^^^^^
|
||||
|
||||
On arm64, the physical address size for a VM (IPA Size limit) is limited
|
||||
to 40bits by default. The limit can be configured if the host supports the
|
||||
extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
|
||||
@ -608,18 +627,6 @@ interrupt number dequeues the interrupt.
|
||||
This is an asynchronous vcpu ioctl and can be invoked from any thread.
|
||||
|
||||
|
||||
4.17 KVM_DEBUG_GUEST
|
||||
--------------------
|
||||
|
||||
:Capability: basic
|
||||
:Architectures: none
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: none)
|
||||
:Returns: -1 on error
|
||||
|
||||
Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead.
|
||||
|
||||
|
||||
4.18 KVM_GET_MSRS
|
||||
-----------------
|
||||
|
||||
@ -6192,6 +6199,130 @@ to know what fields can be changed for the system register described by
|
||||
``op0, op1, crn, crm, op2``. KVM rejects ID register values that describe a
|
||||
superset of the features supported by the system.
|
||||
|
||||
4.140 KVM_SET_USER_MEMORY_REGION2
|
||||
---------------------------------
|
||||
|
||||
:Capability: KVM_CAP_USER_MEMORY2
|
||||
:Architectures: all
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_userspace_memory_region2 (in)
|
||||
:Returns: 0 on success, -1 on error
|
||||
|
||||
KVM_SET_USER_MEMORY_REGION2 is an extension to KVM_SET_USER_MEMORY_REGION that
|
||||
allows mapping guest_memfd memory into a guest. All fields shared with
|
||||
KVM_SET_USER_MEMORY_REGION identically. Userspace can set KVM_MEM_GUEST_MEMFD
|
||||
in flags to have KVM bind the memory region to a given guest_memfd range of
|
||||
[guest_memfd_offset, guest_memfd_offset + memory_size]. The target guest_memfd
|
||||
must point at a file created via KVM_CREATE_GUEST_MEMFD on the current VM, and
|
||||
the target range must not be bound to any other memory region. All standard
|
||||
bounds checks apply (use common sense).
|
||||
|
||||
::
|
||||
|
||||
struct kvm_userspace_memory_region2 {
|
||||
__u32 slot;
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size; /* bytes */
|
||||
__u64 userspace_addr; /* start of the userspace allocated memory */
|
||||
__u64 guest_memfd_offset;
|
||||
__u32 guest_memfd;
|
||||
__u32 pad1;
|
||||
__u64 pad2[14];
|
||||
};
|
||||
|
||||
A KVM_MEM_GUEST_MEMFD region _must_ have a valid guest_memfd (private memory) and
|
||||
userspace_addr (shared memory). However, "valid" for userspace_addr simply
|
||||
means that the address itself must be a legal userspace address. The backing
|
||||
mapping for userspace_addr is not required to be valid/populated at the time of
|
||||
KVM_SET_USER_MEMORY_REGION2, e.g. shared memory can be lazily mapped/allocated
|
||||
on-demand.
|
||||
|
||||
When mapping a gfn into the guest, KVM selects shared vs. private, i.e consumes
|
||||
userspace_addr vs. guest_memfd, based on the gfn's KVM_MEMORY_ATTRIBUTE_PRIVATE
|
||||
state. At VM creation time, all memory is shared, i.e. the PRIVATE attribute
|
||||
is '0' for all gfns. Userspace can control whether memory is shared/private by
|
||||
toggling KVM_MEMORY_ATTRIBUTE_PRIVATE via KVM_SET_MEMORY_ATTRIBUTES as needed.
|
||||
|
||||
4.141 KVM_SET_MEMORY_ATTRIBUTES
|
||||
-------------------------------
|
||||
|
||||
:Capability: KVM_CAP_MEMORY_ATTRIBUTES
|
||||
:Architectures: x86
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_memory_attributes (in)
|
||||
:Returns: 0 on success, <0 on error
|
||||
|
||||
KVM_SET_MEMORY_ATTRIBUTES allows userspace to set memory attributes for a range
|
||||
of guest physical memory.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_memory_attributes {
|
||||
__u64 address;
|
||||
__u64 size;
|
||||
__u64 attributes;
|
||||
__u64 flags;
|
||||
};
|
||||
|
||||
#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
|
||||
|
||||
The address and size must be page aligned. The supported attributes can be
|
||||
retrieved via ioctl(KVM_CHECK_EXTENSION) on KVM_CAP_MEMORY_ATTRIBUTES. If
|
||||
executed on a VM, KVM_CAP_MEMORY_ATTRIBUTES precisely returns the attributes
|
||||
supported by that VM. If executed at system scope, KVM_CAP_MEMORY_ATTRIBUTES
|
||||
returns all attributes supported by KVM. The only attribute defined at this
|
||||
time is KVM_MEMORY_ATTRIBUTE_PRIVATE, which marks the associated gfn as being
|
||||
guest private memory.
|
||||
|
||||
Note, there is no "get" API. Userspace is responsible for explicitly tracking
|
||||
the state of a gfn/page as needed.
|
||||
|
||||
The "flags" field is reserved for future extensions and must be '0'.
|
||||
|
||||
4.142 KVM_CREATE_GUEST_MEMFD
|
||||
----------------------------
|
||||
|
||||
:Capability: KVM_CAP_GUEST_MEMFD
|
||||
:Architectures: none
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_create_guest_memfd(in)
|
||||
:Returns: 0 on success, <0 on error
|
||||
|
||||
KVM_CREATE_GUEST_MEMFD creates an anonymous file and returns a file descriptor
|
||||
that refers to it. guest_memfd files are roughly analogous to files created
|
||||
via memfd_create(), e.g. guest_memfd files live in RAM, have volatile storage,
|
||||
and are automatically released when the last reference is dropped. Unlike
|
||||
"regular" memfd_create() files, guest_memfd files are bound to their owning
|
||||
virtual machine (see below), cannot be mapped, read, or written by userspace,
|
||||
and cannot be resized (guest_memfd files do however support PUNCH_HOLE).
|
||||
|
||||
::
|
||||
|
||||
struct kvm_create_guest_memfd {
|
||||
__u64 size;
|
||||
__u64 flags;
|
||||
__u64 reserved[6];
|
||||
};
|
||||
|
||||
Conceptually, the inode backing a guest_memfd file represents physical memory,
|
||||
i.e. is coupled to the virtual machine as a thing, not to a "struct kvm". The
|
||||
file itself, which is bound to a "struct kvm", is that instance's view of the
|
||||
underlying memory, e.g. effectively provides the translation of guest addresses
|
||||
to host memory. This allows for use cases where multiple KVM structures are
|
||||
used to manage a single virtual machine, e.g. when performing intrahost
|
||||
migration of a virtual machine.
|
||||
|
||||
KVM currently only supports mapping guest_memfd via KVM_SET_USER_MEMORY_REGION2,
|
||||
and more specifically via the guest_memfd and guest_memfd_offset fields in
|
||||
"struct kvm_userspace_memory_region2", where guest_memfd_offset is the offset
|
||||
into the guest_memfd instance. For a given guest_memfd file, there can be at
|
||||
most one mapping per page, i.e. binding multiple memory regions to a single
|
||||
guest_memfd range is not allowed (any number of memory regions can be bound to
|
||||
a single guest_memfd file, but the bound ranges must not overlap).
|
||||
|
||||
See KVM_SET_USER_MEMORY_REGION2 for additional details.
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
|
||||
@ -6824,6 +6955,30 @@ array field represents return values. The userspace should update the return
|
||||
values of SBI call before resuming the VCPU. For more details on RISC-V SBI
|
||||
spec refer, https://github.com/riscv/riscv-sbi-doc.
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_MEMORY_FAULT */
|
||||
struct {
|
||||
#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
|
||||
__u64 flags;
|
||||
__u64 gpa;
|
||||
__u64 size;
|
||||
} memory_fault;
|
||||
|
||||
KVM_EXIT_MEMORY_FAULT indicates the vCPU has encountered a memory fault that
|
||||
could not be resolved by KVM. The 'gpa' and 'size' (in bytes) describe the
|
||||
guest physical address range [gpa, gpa + size) of the fault. The 'flags' field
|
||||
describes properties of the faulting access that are likely pertinent:
|
||||
|
||||
- KVM_MEMORY_EXIT_FLAG_PRIVATE - When set, indicates the memory fault occurred
|
||||
on a private memory access. When clear, indicates the fault occurred on a
|
||||
shared access.
|
||||
|
||||
Note! KVM_EXIT_MEMORY_FAULT is unique among all KVM exit reasons in that it
|
||||
accompanies a return code of '-1', not '0'! errno will always be set to EFAULT
|
||||
or EHWPOISON when KVM exits with KVM_EXIT_MEMORY_FAULT, userspace should assume
|
||||
kvm_run.exit_reason is stale/undefined for all other error numbers.
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_NOTIFY */
|
||||
@ -7858,6 +8013,27 @@ This capability is aimed to mitigate the threat that malicious VMs can
|
||||
cause CPU stuck (due to event windows don't open up) and make the CPU
|
||||
unavailable to host or other VMs.
|
||||
|
||||
7.34 KVM_CAP_MEMORY_FAULT_INFO
|
||||
------------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP.
|
||||
|
||||
The presence of this capability indicates that KVM_RUN will fill
|
||||
kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if
|
||||
there is a valid memslot but no backing VMA for the corresponding host virtual
|
||||
address.
|
||||
|
||||
The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns
|
||||
an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set
|
||||
to KVM_EXIT_MEMORY_FAULT.
|
||||
|
||||
Note: Userspaces which attempt to resolve memory faults so that they can retry
|
||||
KVM_RUN are encouraged to guard against repeatedly receiving the same
|
||||
error/annotated fault.
|
||||
|
||||
See KVM_EXIT_MEMORY_FAULT for more information.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
@ -8374,6 +8550,7 @@ PVHVM guests. Valid flags are::
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
|
||||
#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
|
||||
ioctl is available, for the guest to set its hypercall page.
|
||||
@ -8417,6 +8594,11 @@ behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless
|
||||
specifically enabled (by the guest making the hypercall, causing the VMM
|
||||
to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute).
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag indicates that KVM supports
|
||||
clearing the PVCLOCK_TSC_STABLE_BIT flag in Xen pvclock sources. This will be
|
||||
done when the KVM_CAP_XEN_HVM ioctl sets the
|
||||
KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag.
|
||||
|
||||
8.31 KVM_CAP_PPC_MULTITCE
|
||||
-------------------------
|
||||
|
||||
@ -8596,6 +8778,19 @@ block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
|
||||
64-bit bitmap (each bit describing a block size). The default value is
|
||||
0, to disable the eager page splitting.
|
||||
|
||||
8.41 KVM_CAP_VM_TYPES
|
||||
---------------------
|
||||
|
||||
:Capability: KVM_CAP_MEMORY_ATTRIBUTES
|
||||
:Architectures: x86
|
||||
:Type: system ioctl
|
||||
|
||||
This capability returns a bitmap of support VM types. The 1-setting of bit @n
|
||||
means the VM type with value @n is supported. Possible values of @n are::
|
||||
|
||||
#define KVM_X86_DEFAULT_VM 0
|
||||
#define KVM_X86_SW_PROTECTED_VM 1
|
||||
|
||||
9. Known KVM API problems
|
||||
=========================
|
||||
|
||||
|
@ -43,10 +43,9 @@ On x86:
|
||||
|
||||
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock and kvm->arch.xen.xen_lock
|
||||
|
||||
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and
|
||||
kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
|
||||
cannot be taken without already holding kvm->arch.mmu_lock (typically with
|
||||
``read_lock`` for the TDP MMU, thus the need for additional spinlocks).
|
||||
- kvm->arch.mmu_lock is an rwlock; critical sections for
|
||||
kvm->arch.tdp_mmu_pages_lock and kvm->arch.mmu_unsync_pages_lock must
|
||||
also take kvm->arch.mmu_lock
|
||||
|
||||
Everything else is a leaf: no other lock is taken inside the critical
|
||||
sections.
|
||||
|
@ -392,6 +392,21 @@ static inline bool esr_is_data_abort(unsigned long esr)
|
||||
return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
|
||||
}
|
||||
|
||||
static inline bool esr_fsc_is_translation_fault(unsigned long esr)
|
||||
{
|
||||
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
|
||||
}
|
||||
|
||||
static inline bool esr_fsc_is_permission_fault(unsigned long esr)
|
||||
{
|
||||
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM;
|
||||
}
|
||||
|
||||
static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
|
||||
{
|
||||
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS;
|
||||
}
|
||||
|
||||
const char *esr_get_class_string(unsigned long esr);
|
||||
#endif /* __ASSEMBLY */
|
||||
|
||||
|
@ -108,6 +108,7 @@
|
||||
#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
|
||||
|
||||
/* TCR_EL2 Registers bits */
|
||||
#define TCR_EL2_DS (1UL << 32)
|
||||
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
|
||||
#define TCR_EL2_TBI (1 << 20)
|
||||
#define TCR_EL2_PS_SHIFT 16
|
||||
@ -122,6 +123,7 @@
|
||||
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
|
||||
|
||||
/* VTCR_EL2 Registers bits */
|
||||
#define VTCR_EL2_DS TCR_EL2_DS
|
||||
#define VTCR_EL2_RES1 (1U << 31)
|
||||
#define VTCR_EL2_HD (1 << 22)
|
||||
#define VTCR_EL2_HA (1 << 21)
|
||||
@ -344,36 +346,47 @@
|
||||
* Once we get to a point where the two describe the same thing, we'll
|
||||
* merge the definitions. One day.
|
||||
*/
|
||||
#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51))
|
||||
#define __HFGRTR_EL2_RES0 HFGxTR_EL2_RES0
|
||||
#define __HFGRTR_EL2_MASK GENMASK(49, 0)
|
||||
#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
|
||||
#define __HFGRTR_EL2_nMASK ~(__HFGRTR_EL2_RES0 | __HFGRTR_EL2_MASK)
|
||||
|
||||
#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \
|
||||
BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
|
||||
GENMASK(26, 25) | BIT(21) | BIT(18) | \
|
||||
/*
|
||||
* The HFGWTR bits are a subset of HFGRTR bits. To ensure we don't miss any
|
||||
* future additions, define __HFGWTR* macros relative to __HFGRTR* ones.
|
||||
*/
|
||||
#define __HFGRTR_ONLY_MASK (BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
|
||||
GENMASK(26, 25) | BIT(21) | BIT(18) | \
|
||||
GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
|
||||
#define __HFGWTR_EL2_MASK GENMASK(49, 0)
|
||||
#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
|
||||
#define __HFGWTR_EL2_RES0 (__HFGRTR_EL2_RES0 | __HFGRTR_ONLY_MASK)
|
||||
#define __HFGWTR_EL2_MASK (__HFGRTR_EL2_MASK & ~__HFGRTR_ONLY_MASK)
|
||||
#define __HFGWTR_EL2_nMASK ~(__HFGWTR_EL2_RES0 | __HFGWTR_EL2_MASK)
|
||||
|
||||
#define __HFGITR_EL2_RES0 GENMASK(63, 57)
|
||||
#define __HFGITR_EL2_MASK GENMASK(54, 0)
|
||||
#define __HFGITR_EL2_nMASK GENMASK(56, 55)
|
||||
#define __HFGITR_EL2_RES0 HFGITR_EL2_RES0
|
||||
#define __HFGITR_EL2_MASK (BIT(62) | BIT(60) | GENMASK(54, 0))
|
||||
#define __HFGITR_EL2_nMASK ~(__HFGITR_EL2_RES0 | __HFGITR_EL2_MASK)
|
||||
|
||||
#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \
|
||||
GENMASK(21, 20) | BIT(8))
|
||||
#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK
|
||||
#define __HDFGRTR_EL2_nMASK GENMASK(62, 59)
|
||||
#define __HDFGRTR_EL2_RES0 HDFGRTR_EL2_RES0
|
||||
#define __HDFGRTR_EL2_MASK (BIT(63) | GENMASK(58, 50) | GENMASK(48, 43) | \
|
||||
GENMASK(41, 40) | GENMASK(37, 22) | \
|
||||
GENMASK(19, 9) | GENMASK(7, 0))
|
||||
#define __HDFGRTR_EL2_nMASK ~(__HDFGRTR_EL2_RES0 | __HDFGRTR_EL2_MASK)
|
||||
|
||||
#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \
|
||||
BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \
|
||||
BIT(22) | BIT(9) | BIT(6))
|
||||
#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK
|
||||
#define __HDFGWTR_EL2_nMASK GENMASK(62, 60)
|
||||
#define __HDFGWTR_EL2_RES0 HDFGWTR_EL2_RES0
|
||||
#define __HDFGWTR_EL2_MASK (GENMASK(57, 52) | GENMASK(50, 48) | \
|
||||
GENMASK(46, 44) | GENMASK(42, 41) | \
|
||||
GENMASK(37, 35) | GENMASK(33, 31) | \
|
||||
GENMASK(29, 23) | GENMASK(21, 10) | \
|
||||
GENMASK(8, 7) | GENMASK(5, 0))
|
||||
#define __HDFGWTR_EL2_nMASK ~(__HDFGWTR_EL2_RES0 | __HDFGWTR_EL2_MASK)
|
||||
|
||||
#define __HAFGRTR_EL2_RES0 HAFGRTR_EL2_RES0
|
||||
#define __HAFGRTR_EL2_MASK (GENMASK(49, 17) | GENMASK(4, 0))
|
||||
#define __HAFGRTR_EL2_nMASK ~(__HAFGRTR_EL2_RES0 | __HAFGRTR_EL2_MASK)
|
||||
|
||||
/* Similar definitions for HCRX_EL2 */
|
||||
#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12))
|
||||
#define __HCRX_EL2_MASK (0)
|
||||
#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0))
|
||||
#define __HCRX_EL2_RES0 HCRX_EL2_RES0
|
||||
#define __HCRX_EL2_MASK (BIT(6))
|
||||
#define __HCRX_EL2_nMASK ~(__HCRX_EL2_RES0 | __HCRX_EL2_MASK)
|
||||
|
||||
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
|
||||
#define HPFAR_MASK (~UL(0xf))
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <asm/esr.h>
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_nested.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/virt.h>
|
||||
@ -54,11 +55,6 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
|
||||
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
|
||||
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline bool vcpu_has_feature(const struct kvm_vcpu *vcpu, int feature)
|
||||
{
|
||||
return test_bit(feature, vcpu->kvm->arch.vcpu_features);
|
||||
}
|
||||
|
||||
#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
|
||||
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -248,7 +244,7 @@ static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
|
||||
|
||||
static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return __is_hyp_ctxt(&vcpu->arch.ctxt);
|
||||
return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -404,14 +400,25 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
|
||||
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
|
||||
}
|
||||
|
||||
static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
|
||||
static inline
|
||||
bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
|
||||
return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu));
|
||||
}
|
||||
|
||||
static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
|
||||
static inline
|
||||
bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
|
||||
return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu));
|
||||
}
|
||||
|
||||
static inline
|
||||
u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long esr = kvm_vcpu_get_esr(vcpu);
|
||||
|
||||
BUG_ON(!esr_fsc_is_permission_fault(esr));
|
||||
return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL));
|
||||
}
|
||||
|
||||
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
|
||||
@ -454,12 +461,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
|
||||
* first), then a permission fault to allow the flags
|
||||
* to be set.
|
||||
*/
|
||||
switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
|
||||
case ESR_ELx_FSC_PERM:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return kvm_vcpu_trap_is_permission_fault(vcpu);
|
||||
}
|
||||
|
||||
if (kvm_vcpu_trap_is_iabt(vcpu))
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/kvm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/vncr_mapping.h>
|
||||
|
||||
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
|
||||
|
||||
@ -306,6 +307,7 @@ struct kvm_arch {
|
||||
* Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
|
||||
*/
|
||||
#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
|
||||
#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask)
|
||||
#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)])
|
||||
#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
|
||||
u64 id_regs[KVM_ARM_ID_REG_NUM];
|
||||
@ -324,33 +326,33 @@ struct kvm_vcpu_fault_info {
|
||||
u64 disr_el1; /* Deferred [SError] Status Register */
|
||||
};
|
||||
|
||||
/*
|
||||
* VNCR() just places the VNCR_capable registers in the enum after
|
||||
* __VNCR_START__, and the value (after correction) to be an 8-byte offset
|
||||
* from the VNCR base. As we don't require the enum to be otherwise ordered,
|
||||
* we need the terrible hack below to ensure that we correctly size the
|
||||
* sys_regs array, no matter what.
|
||||
*
|
||||
* The __MAX__ macro has been lifted from Sean Eron Anderson's wonderful
|
||||
* treasure trove of bit hacks:
|
||||
* https://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
|
||||
*/
|
||||
#define __MAX__(x,y) ((x) ^ (((x) ^ (y)) & -((x) < (y))))
|
||||
#define VNCR(r) \
|
||||
__before_##r, \
|
||||
r = __VNCR_START__ + ((VNCR_ ## r) / 8), \
|
||||
__after_##r = __MAX__(__before_##r - 1, r)
|
||||
|
||||
enum vcpu_sysreg {
|
||||
__INVALID_SYSREG__, /* 0 is reserved as an invalid value */
|
||||
MPIDR_EL1, /* MultiProcessor Affinity Register */
|
||||
CLIDR_EL1, /* Cache Level ID Register */
|
||||
CSSELR_EL1, /* Cache Size Selection Register */
|
||||
SCTLR_EL1, /* System Control Register */
|
||||
ACTLR_EL1, /* Auxiliary Control Register */
|
||||
CPACR_EL1, /* Coprocessor Access Control */
|
||||
ZCR_EL1, /* SVE Control */
|
||||
TTBR0_EL1, /* Translation Table Base Register 0 */
|
||||
TTBR1_EL1, /* Translation Table Base Register 1 */
|
||||
TCR_EL1, /* Translation Control Register */
|
||||
TCR2_EL1, /* Extended Translation Control Register */
|
||||
ESR_EL1, /* Exception Syndrome Register */
|
||||
AFSR0_EL1, /* Auxiliary Fault Status Register 0 */
|
||||
AFSR1_EL1, /* Auxiliary Fault Status Register 1 */
|
||||
FAR_EL1, /* Fault Address Register */
|
||||
MAIR_EL1, /* Memory Attribute Indirection Register */
|
||||
VBAR_EL1, /* Vector Base Address Register */
|
||||
CONTEXTIDR_EL1, /* Context ID Register */
|
||||
TPIDR_EL0, /* Thread ID, User R/W */
|
||||
TPIDRRO_EL0, /* Thread ID, User R/O */
|
||||
TPIDR_EL1, /* Thread ID, Privileged */
|
||||
AMAIR_EL1, /* Aux Memory Attribute Indirection Register */
|
||||
CNTKCTL_EL1, /* Timer Control Register (EL1) */
|
||||
PAR_EL1, /* Physical Address Register */
|
||||
MDSCR_EL1, /* Monitor Debug System Control Register */
|
||||
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
|
||||
OSLSR_EL1, /* OS Lock Status Register */
|
||||
DISR_EL1, /* Deferred Interrupt Status Register */
|
||||
@ -381,26 +383,11 @@ enum vcpu_sysreg {
|
||||
APGAKEYLO_EL1,
|
||||
APGAKEYHI_EL1,
|
||||
|
||||
ELR_EL1,
|
||||
SP_EL1,
|
||||
SPSR_EL1,
|
||||
|
||||
CNTVOFF_EL2,
|
||||
CNTV_CVAL_EL0,
|
||||
CNTV_CTL_EL0,
|
||||
CNTP_CVAL_EL0,
|
||||
CNTP_CTL_EL0,
|
||||
|
||||
/* Memory Tagging Extension registers */
|
||||
RGSR_EL1, /* Random Allocation Tag Seed Register */
|
||||
GCR_EL1, /* Tag Control Register */
|
||||
TFSR_EL1, /* Tag Fault Status Register (EL1) */
|
||||
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
|
||||
|
||||
/* Permission Indirection Extension registers */
|
||||
PIR_EL1, /* Permission Indirection Register 1 (EL1) */
|
||||
PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */
|
||||
|
||||
/* 32bit specific registers. */
|
||||
DACR32_EL2, /* Domain Access Control Register */
|
||||
IFSR32_EL2, /* Instruction Fault Status Register */
|
||||
@ -408,21 +395,14 @@ enum vcpu_sysreg {
|
||||
DBGVCR32_EL2, /* Debug Vector Catch Register */
|
||||
|
||||
/* EL2 registers */
|
||||
VPIDR_EL2, /* Virtualization Processor ID Register */
|
||||
VMPIDR_EL2, /* Virtualization Multiprocessor ID Register */
|
||||
SCTLR_EL2, /* System Control Register (EL2) */
|
||||
ACTLR_EL2, /* Auxiliary Control Register (EL2) */
|
||||
HCR_EL2, /* Hypervisor Configuration Register */
|
||||
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
|
||||
CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
|
||||
HSTR_EL2, /* Hypervisor System Trap Register */
|
||||
HACR_EL2, /* Hypervisor Auxiliary Control Register */
|
||||
HCRX_EL2, /* Extended Hypervisor Configuration Register */
|
||||
TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
|
||||
TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
|
||||
TCR_EL2, /* Translation Control Register (EL2) */
|
||||
VTTBR_EL2, /* Virtualization Translation Table Base Register */
|
||||
VTCR_EL2, /* Virtualization Translation Control Register */
|
||||
SPSR_EL2, /* EL2 saved program status register */
|
||||
ELR_EL2, /* EL2 exception link register */
|
||||
AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */
|
||||
@ -435,19 +415,62 @@ enum vcpu_sysreg {
|
||||
VBAR_EL2, /* Vector Base Address Register (EL2) */
|
||||
RVBAR_EL2, /* Reset Vector Base Address Register */
|
||||
CONTEXTIDR_EL2, /* Context ID Register (EL2) */
|
||||
TPIDR_EL2, /* EL2 Software Thread ID Register */
|
||||
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
|
||||
SP_EL2, /* EL2 Stack Pointer */
|
||||
HFGRTR_EL2,
|
||||
HFGWTR_EL2,
|
||||
HFGITR_EL2,
|
||||
HDFGRTR_EL2,
|
||||
HDFGWTR_EL2,
|
||||
CNTHP_CTL_EL2,
|
||||
CNTHP_CVAL_EL2,
|
||||
CNTHV_CTL_EL2,
|
||||
CNTHV_CVAL_EL2,
|
||||
|
||||
__VNCR_START__, /* Any VNCR-capable reg goes after this point */
|
||||
|
||||
VNCR(SCTLR_EL1),/* System Control Register */
|
||||
VNCR(ACTLR_EL1),/* Auxiliary Control Register */
|
||||
VNCR(CPACR_EL1),/* Coprocessor Access Control */
|
||||
VNCR(ZCR_EL1), /* SVE Control */
|
||||
VNCR(TTBR0_EL1),/* Translation Table Base Register 0 */
|
||||
VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */
|
||||
VNCR(TCR_EL1), /* Translation Control Register */
|
||||
VNCR(TCR2_EL1), /* Extended Translation Control Register */
|
||||
VNCR(ESR_EL1), /* Exception Syndrome Register */
|
||||
VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */
|
||||
VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */
|
||||
VNCR(FAR_EL1), /* Fault Address Register */
|
||||
VNCR(MAIR_EL1), /* Memory Attribute Indirection Register */
|
||||
VNCR(VBAR_EL1), /* Vector Base Address Register */
|
||||
VNCR(CONTEXTIDR_EL1), /* Context ID Register */
|
||||
VNCR(AMAIR_EL1),/* Aux Memory Attribute Indirection Register */
|
||||
VNCR(MDSCR_EL1),/* Monitor Debug System Control Register */
|
||||
VNCR(ELR_EL1),
|
||||
VNCR(SP_EL1),
|
||||
VNCR(SPSR_EL1),
|
||||
VNCR(TFSR_EL1), /* Tag Fault Status Register (EL1) */
|
||||
VNCR(VPIDR_EL2),/* Virtualization Processor ID Register */
|
||||
VNCR(VMPIDR_EL2),/* Virtualization Multiprocessor ID Register */
|
||||
VNCR(HCR_EL2), /* Hypervisor Configuration Register */
|
||||
VNCR(HSTR_EL2), /* Hypervisor System Trap Register */
|
||||
VNCR(VTTBR_EL2),/* Virtualization Translation Table Base Register */
|
||||
VNCR(VTCR_EL2), /* Virtualization Translation Control Register */
|
||||
VNCR(TPIDR_EL2),/* EL2 Software Thread ID Register */
|
||||
VNCR(HCRX_EL2), /* Extended Hypervisor Configuration Register */
|
||||
|
||||
/* Permission Indirection Extension registers */
|
||||
VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */
|
||||
VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */
|
||||
|
||||
VNCR(HFGRTR_EL2),
|
||||
VNCR(HFGWTR_EL2),
|
||||
VNCR(HFGITR_EL2),
|
||||
VNCR(HDFGRTR_EL2),
|
||||
VNCR(HDFGWTR_EL2),
|
||||
VNCR(HAFGRTR_EL2),
|
||||
|
||||
VNCR(CNTVOFF_EL2),
|
||||
VNCR(CNTV_CVAL_EL0),
|
||||
VNCR(CNTV_CTL_EL0),
|
||||
VNCR(CNTP_CVAL_EL0),
|
||||
VNCR(CNTP_CTL_EL0),
|
||||
|
||||
NR_SYS_REGS /* Nothing after this line! */
|
||||
};
|
||||
|
||||
@ -464,6 +487,9 @@ struct kvm_cpu_context {
|
||||
u64 sys_regs[NR_SYS_REGS];
|
||||
|
||||
struct kvm_vcpu *__hyp_running_vcpu;
|
||||
|
||||
/* This pointer has to be 4kB aligned. */
|
||||
u64 *vncr_array;
|
||||
};
|
||||
|
||||
struct kvm_host_data {
|
||||
@ -826,8 +852,19 @@ struct kvm_vcpu_arch {
|
||||
* accessed by a running VCPU. For example, for userspace access or
|
||||
* for system registers that are never context switched, but only
|
||||
* emulated.
|
||||
*
|
||||
* Don't bother with VNCR-based accesses in the nVHE code, it has no
|
||||
* business dealing with NV.
|
||||
*/
|
||||
#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)])
|
||||
static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
|
||||
{
|
||||
#if !defined (__KVM_NVHE_HYPERVISOR__)
|
||||
if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
|
||||
r >= __VNCR_START__ && ctxt->vncr_array))
|
||||
return &ctxt->vncr_array[r - __VNCR_START__];
|
||||
#endif
|
||||
return (u64 *)&ctxt->sys_regs[r];
|
||||
}
|
||||
|
||||
#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
|
||||
|
||||
@ -871,6 +908,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
|
||||
case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
|
||||
case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
|
||||
case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
|
||||
case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break;
|
||||
case PAR_EL1: *val = read_sysreg_par(); break;
|
||||
case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
|
||||
case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
|
||||
@ -915,6 +953,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
|
||||
case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
|
||||
case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
|
||||
case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
|
||||
case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break;
|
||||
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
|
||||
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
|
||||
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
|
||||
@ -954,8 +993,6 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
|
||||
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events);
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
void kvm_arm_halt_guest(struct kvm *kvm);
|
||||
void kvm_arm_resume_guest(struct kvm *kvm);
|
||||
|
||||
@ -1177,6 +1214,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
||||
#define kvm_vm_has_ran_once(kvm) \
|
||||
(test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
|
||||
|
||||
static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
|
||||
{
|
||||
return test_bit(feature, ka->vcpu_features);
|
||||
}
|
||||
|
||||
#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
|
||||
|
||||
int kvm_trng_call(struct kvm_vcpu *vcpu);
|
||||
#ifdef CONFIG_KVM
|
||||
extern phys_addr_t hyp_mem_base;
|
||||
|
@ -2,8 +2,9 @@
|
||||
#ifndef __ARM64_KVM_NESTED_H
|
||||
#define __ARM64_KVM_NESTED_H
|
||||
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
|
||||
static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -12,12 +13,55 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
|
||||
vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2));
|
||||
}
|
||||
|
||||
/* Translation helpers from non-VHE EL2 to EL1 */
|
||||
static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2)
|
||||
{
|
||||
return (u64)FIELD_GET(TCR_EL2_PS_MASK, tcr_el2) << TCR_IPS_SHIFT;
|
||||
}
|
||||
|
||||
static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
|
||||
{
|
||||
return TCR_EPD1_MASK | /* disable TTBR1_EL1 */
|
||||
((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
|
||||
tcr_el2_ps_to_tcr_el1_ips(tcr) |
|
||||
(tcr & TCR_EL2_TG0_MASK) |
|
||||
(tcr & TCR_EL2_ORGN0_MASK) |
|
||||
(tcr & TCR_EL2_IRGN0_MASK) |
|
||||
(tcr & TCR_EL2_T0SZ_MASK);
|
||||
}
|
||||
|
||||
static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2)
|
||||
{
|
||||
u64 cpacr_el1 = 0;
|
||||
|
||||
if (cptr_el2 & CPTR_EL2_TTA)
|
||||
cpacr_el1 |= CPACR_ELx_TTA;
|
||||
if (!(cptr_el2 & CPTR_EL2_TFP))
|
||||
cpacr_el1 |= CPACR_ELx_FPEN;
|
||||
if (!(cptr_el2 & CPTR_EL2_TZ))
|
||||
cpacr_el1 |= CPACR_ELx_ZEN;
|
||||
|
||||
return cpacr_el1;
|
||||
}
|
||||
|
||||
static inline u64 translate_sctlr_el2_to_sctlr_el1(u64 val)
|
||||
{
|
||||
/* Only preserve the minimal set of bits we support */
|
||||
val &= (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | SCTLR_ELx_SA |
|
||||
SCTLR_ELx_I | SCTLR_ELx_IESB | SCTLR_ELx_WXN | SCTLR_ELx_EE);
|
||||
val |= SCTLR_EL1_RES1;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
|
||||
{
|
||||
/* Clear the ASID field */
|
||||
return ttbr0 & ~GENMASK_ULL(63, 48);
|
||||
}
|
||||
|
||||
extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
|
||||
|
||||
struct sys_reg_params;
|
||||
struct sys_reg_desc;
|
||||
|
||||
void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r);
|
||||
int kvm_init_nv_sysregs(struct kvm *kvm);
|
||||
|
||||
#endif /* __ARM64_KVM_NESTED_H */
|
||||
|
@ -11,7 +11,8 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define KVM_PGTABLE_MAX_LEVELS 4U
|
||||
#define KVM_PGTABLE_FIRST_LEVEL -1
|
||||
#define KVM_PGTABLE_LAST_LEVEL 3
|
||||
|
||||
/*
|
||||
* The largest supported block sizes for KVM (no 52-bit PA support):
|
||||
@ -20,19 +21,29 @@
|
||||
* - 64K (level 2): 512MB
|
||||
*/
|
||||
#ifdef CONFIG_ARM64_4K_PAGES
|
||||
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1U
|
||||
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1
|
||||
#else
|
||||
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U
|
||||
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2
|
||||
#endif
|
||||
|
||||
#define kvm_lpa2_is_enabled() false
|
||||
#define kvm_lpa2_is_enabled() system_supports_lpa2()
|
||||
|
||||
static inline u64 kvm_get_parange_max(void)
|
||||
{
|
||||
if (kvm_lpa2_is_enabled() ||
|
||||
(IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16))
|
||||
return ID_AA64MMFR0_EL1_PARANGE_52;
|
||||
else
|
||||
return ID_AA64MMFR0_EL1_PARANGE_48;
|
||||
}
|
||||
|
||||
static inline u64 kvm_get_parange(u64 mmfr0)
|
||||
{
|
||||
u64 parange_max = kvm_get_parange_max();
|
||||
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
|
||||
if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX)
|
||||
parange = ID_AA64MMFR0_EL1_PARANGE_MAX;
|
||||
if (parange > parange_max)
|
||||
parange = parange_max;
|
||||
|
||||
return parange;
|
||||
}
|
||||
@ -43,6 +54,8 @@ typedef u64 kvm_pte_t;
|
||||
|
||||
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
|
||||
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
|
||||
#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT)
|
||||
#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
|
||||
|
||||
#define KVM_PHYS_INVALID (-1ULL)
|
||||
|
||||
@ -53,21 +66,34 @@ static inline bool kvm_pte_valid(kvm_pte_t pte)
|
||||
|
||||
static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
|
||||
{
|
||||
u64 pa = pte & KVM_PTE_ADDR_MASK;
|
||||
u64 pa;
|
||||
|
||||
if (PAGE_SHIFT == 16)
|
||||
pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
|
||||
if (kvm_lpa2_is_enabled()) {
|
||||
pa = pte & KVM_PTE_ADDR_MASK_LPA2;
|
||||
pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50;
|
||||
} else {
|
||||
pa = pte & KVM_PTE_ADDR_MASK;
|
||||
if (PAGE_SHIFT == 16)
|
||||
pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
|
||||
}
|
||||
|
||||
return pa;
|
||||
}
|
||||
|
||||
static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
|
||||
{
|
||||
kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
|
||||
kvm_pte_t pte;
|
||||
|
||||
if (PAGE_SHIFT == 16) {
|
||||
pa &= GENMASK(51, 48);
|
||||
pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
|
||||
if (kvm_lpa2_is_enabled()) {
|
||||
pte = pa & KVM_PTE_ADDR_MASK_LPA2;
|
||||
pa &= GENMASK(51, 50);
|
||||
pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50);
|
||||
} else {
|
||||
pte = pa & KVM_PTE_ADDR_MASK;
|
||||
if (PAGE_SHIFT == 16) {
|
||||
pa &= GENMASK(51, 48);
|
||||
pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
|
||||
}
|
||||
}
|
||||
|
||||
return pte;
|
||||
@ -78,28 +104,28 @@ static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
|
||||
return __phys_to_pfn(kvm_pte_to_phys(pte));
|
||||
}
|
||||
|
||||
static inline u64 kvm_granule_shift(u32 level)
|
||||
static inline u64 kvm_granule_shift(s8 level)
|
||||
{
|
||||
/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
|
||||
/* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */
|
||||
return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
|
||||
}
|
||||
|
||||
static inline u64 kvm_granule_size(u32 level)
|
||||
static inline u64 kvm_granule_size(s8 level)
|
||||
{
|
||||
return BIT(kvm_granule_shift(level));
|
||||
}
|
||||
|
||||
static inline bool kvm_level_supports_block_mapping(u32 level)
|
||||
static inline bool kvm_level_supports_block_mapping(s8 level)
|
||||
{
|
||||
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
}
|
||||
|
||||
static inline u32 kvm_supported_block_sizes(void)
|
||||
{
|
||||
u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
u32 r = 0;
|
||||
|
||||
for (; level < KVM_PGTABLE_MAX_LEVELS; level++)
|
||||
for (; level <= KVM_PGTABLE_LAST_LEVEL; level++)
|
||||
r |= BIT(kvm_granule_shift(level));
|
||||
|
||||
return r;
|
||||
@ -144,7 +170,7 @@ struct kvm_pgtable_mm_ops {
|
||||
void* (*zalloc_page)(void *arg);
|
||||
void* (*zalloc_pages_exact)(size_t size);
|
||||
void (*free_pages_exact)(void *addr, size_t size);
|
||||
void (*free_unlinked_table)(void *addr, u32 level);
|
||||
void (*free_unlinked_table)(void *addr, s8 level);
|
||||
void (*get_page)(void *addr);
|
||||
void (*put_page)(void *addr);
|
||||
int (*page_count)(void *addr);
|
||||
@ -240,7 +266,7 @@ struct kvm_pgtable_visit_ctx {
|
||||
u64 start;
|
||||
u64 addr;
|
||||
u64 end;
|
||||
u32 level;
|
||||
s8 level;
|
||||
enum kvm_pgtable_walk_flags flags;
|
||||
};
|
||||
|
||||
@ -343,7 +369,7 @@ static inline bool kvm_pgtable_walk_lock_held(void)
|
||||
*/
|
||||
struct kvm_pgtable {
|
||||
u32 ia_bits;
|
||||
u32 start_level;
|
||||
s8 start_level;
|
||||
kvm_pteref_t pgd;
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
|
||||
@ -477,7 +503,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
* The page-table is assumed to be unreachable by any hardware walkers prior to
|
||||
* freeing and therefore no TLB invalidation is performed.
|
||||
*/
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
|
||||
@ -501,7 +527,7 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p
|
||||
* an ERR_PTR(error) on failure.
|
||||
*/
|
||||
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
u64 phys, u32 level,
|
||||
u64 phys, s8 level,
|
||||
enum kvm_pgtable_prot prot,
|
||||
void *mc, bool force_pte);
|
||||
|
||||
@ -727,7 +753,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
|
||||
kvm_pte_t *ptep, u32 *level);
|
||||
kvm_pte_t *ptep, s8 *level);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
|
||||
|
@ -56,10 +56,11 @@ static inline unsigned long hyp_vm_table_pages(void)
|
||||
|
||||
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
|
||||
{
|
||||
unsigned long total = 0, i;
|
||||
unsigned long total = 0;
|
||||
int i;
|
||||
|
||||
/* Provision the worst case scenario */
|
||||
for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
|
||||
for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) {
|
||||
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
|
||||
total += nr_pages;
|
||||
}
|
||||
|
103
arch/arm64/include/asm/vncr_mapping.h
Normal file
103
arch/arm64/include/asm/vncr_mapping.h
Normal file
@ -0,0 +1,103 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* System register offsets in the VNCR page
|
||||
* All offsets are *byte* displacements!
|
||||
*/
|
||||
|
||||
#ifndef __ARM64_VNCR_MAPPING_H__
|
||||
#define __ARM64_VNCR_MAPPING_H__
|
||||
|
||||
#define VNCR_VTTBR_EL2 0x020
|
||||
#define VNCR_VTCR_EL2 0x040
|
||||
#define VNCR_VMPIDR_EL2 0x050
|
||||
#define VNCR_CNTVOFF_EL2 0x060
|
||||
#define VNCR_HCR_EL2 0x078
|
||||
#define VNCR_HSTR_EL2 0x080
|
||||
#define VNCR_VPIDR_EL2 0x088
|
||||
#define VNCR_TPIDR_EL2 0x090
|
||||
#define VNCR_HCRX_EL2 0x0A0
|
||||
#define VNCR_VNCR_EL2 0x0B0
|
||||
#define VNCR_CPACR_EL1 0x100
|
||||
#define VNCR_CONTEXTIDR_EL1 0x108
|
||||
#define VNCR_SCTLR_EL1 0x110
|
||||
#define VNCR_ACTLR_EL1 0x118
|
||||
#define VNCR_TCR_EL1 0x120
|
||||
#define VNCR_AFSR0_EL1 0x128
|
||||
#define VNCR_AFSR1_EL1 0x130
|
||||
#define VNCR_ESR_EL1 0x138
|
||||
#define VNCR_MAIR_EL1 0x140
|
||||
#define VNCR_AMAIR_EL1 0x148
|
||||
#define VNCR_MDSCR_EL1 0x158
|
||||
#define VNCR_SPSR_EL1 0x160
|
||||
#define VNCR_CNTV_CVAL_EL0 0x168
|
||||
#define VNCR_CNTV_CTL_EL0 0x170
|
||||
#define VNCR_CNTP_CVAL_EL0 0x178
|
||||
#define VNCR_CNTP_CTL_EL0 0x180
|
||||
#define VNCR_SCXTNUM_EL1 0x188
|
||||
#define VNCR_TFSR_EL1 0x190
|
||||
#define VNCR_HFGRTR_EL2 0x1B8
|
||||
#define VNCR_HFGWTR_EL2 0x1C0
|
||||
#define VNCR_HFGITR_EL2 0x1C8
|
||||
#define VNCR_HDFGRTR_EL2 0x1D0
|
||||
#define VNCR_HDFGWTR_EL2 0x1D8
|
||||
#define VNCR_ZCR_EL1 0x1E0
|
||||
#define VNCR_HAFGRTR_EL2 0x1E8
|
||||
#define VNCR_TTBR0_EL1 0x200
|
||||
#define VNCR_TTBR1_EL1 0x210
|
||||
#define VNCR_FAR_EL1 0x220
|
||||
#define VNCR_ELR_EL1 0x230
|
||||
#define VNCR_SP_EL1 0x240
|
||||
#define VNCR_VBAR_EL1 0x250
|
||||
#define VNCR_TCR2_EL1 0x270
|
||||
#define VNCR_PIRE0_EL1 0x290
|
||||
#define VNCR_PIRE0_EL2 0x298
|
||||
#define VNCR_PIR_EL1 0x2A0
|
||||
#define VNCR_ICH_LR0_EL2 0x400
|
||||
#define VNCR_ICH_LR1_EL2 0x408
|
||||
#define VNCR_ICH_LR2_EL2 0x410
|
||||
#define VNCR_ICH_LR3_EL2 0x418
|
||||
#define VNCR_ICH_LR4_EL2 0x420
|
||||
#define VNCR_ICH_LR5_EL2 0x428
|
||||
#define VNCR_ICH_LR6_EL2 0x430
|
||||
#define VNCR_ICH_LR7_EL2 0x438
|
||||
#define VNCR_ICH_LR8_EL2 0x440
|
||||
#define VNCR_ICH_LR9_EL2 0x448
|
||||
#define VNCR_ICH_LR10_EL2 0x450
|
||||
#define VNCR_ICH_LR11_EL2 0x458
|
||||
#define VNCR_ICH_LR12_EL2 0x460
|
||||
#define VNCR_ICH_LR13_EL2 0x468
|
||||
#define VNCR_ICH_LR14_EL2 0x470
|
||||
#define VNCR_ICH_LR15_EL2 0x478
|
||||
#define VNCR_ICH_AP0R0_EL2 0x480
|
||||
#define VNCR_ICH_AP0R1_EL2 0x488
|
||||
#define VNCR_ICH_AP0R2_EL2 0x490
|
||||
#define VNCR_ICH_AP0R3_EL2 0x498
|
||||
#define VNCR_ICH_AP1R0_EL2 0x4A0
|
||||
#define VNCR_ICH_AP1R1_EL2 0x4A8
|
||||
#define VNCR_ICH_AP1R2_EL2 0x4B0
|
||||
#define VNCR_ICH_AP1R3_EL2 0x4B8
|
||||
#define VNCR_ICH_HCR_EL2 0x4C0
|
||||
#define VNCR_ICH_VMCR_EL2 0x4C8
|
||||
#define VNCR_VDISR_EL2 0x500
|
||||
#define VNCR_PMBLIMITR_EL1 0x800
|
||||
#define VNCR_PMBPTR_EL1 0x810
|
||||
#define VNCR_PMBSR_EL1 0x820
|
||||
#define VNCR_PMSCR_EL1 0x828
|
||||
#define VNCR_PMSEVFR_EL1 0x830
|
||||
#define VNCR_PMSICR_EL1 0x838
|
||||
#define VNCR_PMSIRR_EL1 0x840
|
||||
#define VNCR_PMSLATFR_EL1 0x848
|
||||
#define VNCR_TRFCR_EL1 0x880
|
||||
#define VNCR_MPAM1_EL1 0x900
|
||||
#define VNCR_MPAMHCR_EL2 0x930
|
||||
#define VNCR_MPAMVPMV_EL2 0x938
|
||||
#define VNCR_MPAMVPM0_EL2 0x940
|
||||
#define VNCR_MPAMVPM1_EL2 0x948
|
||||
#define VNCR_MPAMVPM2_EL2 0x950
|
||||
#define VNCR_MPAMVPM3_EL2 0x958
|
||||
#define VNCR_MPAMVPM4_EL2 0x960
|
||||
#define VNCR_MPAMVPM5_EL2 0x968
|
||||
#define VNCR_MPAMVPM6_EL2 0x970
|
||||
#define VNCR_MPAMVPM7_EL2 0x978
|
||||
|
||||
#endif /* __ARM64_VNCR_MAPPING_H__ */
|
@ -2341,7 +2341,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
.capability = ARM64_HAS_NESTED_VIRT,
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.matches = has_nested_virt_support,
|
||||
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, IMP)
|
||||
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
|
||||
},
|
||||
{
|
||||
.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
|
||||
|
@ -21,16 +21,14 @@ if VIRTUALIZATION
|
||||
menuconfig KVM
|
||||
bool "Kernel-based Virtual Machine (KVM) support"
|
||||
depends on HAVE_KVM
|
||||
select KVM_COMMON
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
select MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select KVM_MMIO
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_XFER_TO_GUEST_WORK
|
||||
select KVM_VFIO
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_DIRTY_RING_ACQ_REL
|
||||
select NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
select HAVE_KVM_MSI
|
||||
@ -41,7 +39,6 @@ menuconfig KVM
|
||||
select HAVE_KVM_VCPU_RUN_PID_CHANGE
|
||||
select SCHED_INFO
|
||||
select GUEST_PERF_EVENTS if PERF_EVENTS
|
||||
select INTERVAL_TREE
|
||||
select XARRAY_MULTI
|
||||
help
|
||||
Support hosting virtualized guest machines.
|
||||
|
@ -295,8 +295,7 @@ static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
|
||||
u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
|
||||
struct arch_timer_context *ctx;
|
||||
|
||||
ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu)
|
||||
: vcpu_vtimer(vcpu);
|
||||
ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu);
|
||||
|
||||
return kvm_counter_compute_delta(ctx, val);
|
||||
}
|
||||
|
@ -221,7 +221,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
r = vgic_present;
|
||||
break;
|
||||
case KVM_CAP_IOEVENTFD:
|
||||
case KVM_CAP_DEVICE_CTRL:
|
||||
case KVM_CAP_USER_MEMORY:
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
|
||||
@ -669,6 +668,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (vcpu_has_nv(vcpu)) {
|
||||
ret = kvm_init_nv_sysregs(vcpu->kvm);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = kvm_timer_enable(vcpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -1837,6 +1842,7 @@ static int kvm_init_vector_slots(void)
|
||||
static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
|
||||
{
|
||||
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
|
||||
u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
unsigned long tcr;
|
||||
|
||||
/*
|
||||
@ -1859,6 +1865,10 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
|
||||
}
|
||||
tcr &= ~TCR_T0SZ_MASK;
|
||||
tcr |= TCR_T0SZ(hyp_va_bits);
|
||||
tcr &= ~TCR_EL2_PS_MASK;
|
||||
tcr |= FIELD_PREP(TCR_EL2_PS_MASK, kvm_get_parange(mmfr0));
|
||||
if (kvm_lpa2_is_enabled())
|
||||
tcr |= TCR_EL2_DS;
|
||||
params->tcr_el2 = tcr;
|
||||
|
||||
params->pgd_pa = kvm_mmu_get_httbr();
|
||||
|
@ -1012,6 +1012,7 @@ enum fgt_group_id {
|
||||
HDFGRTR_GROUP,
|
||||
HDFGWTR_GROUP,
|
||||
HFGITR_GROUP,
|
||||
HAFGRTR_GROUP,
|
||||
|
||||
/* Must be last */
|
||||
__NR_FGT_GROUP_IDS__
|
||||
@ -1042,10 +1043,20 @@ enum fg_filter_id {
|
||||
|
||||
static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
|
||||
/* HFGRTR_EL2, HFGWTR_EL2 */
|
||||
SR_FGT(SYS_AMAIR2_EL1, HFGxTR, nAMAIR2_EL1, 0),
|
||||
SR_FGT(SYS_MAIR2_EL1, HFGxTR, nMAIR2_EL1, 0),
|
||||
SR_FGT(SYS_S2POR_EL1, HFGxTR, nS2POR_EL1, 0),
|
||||
SR_FGT(SYS_POR_EL1, HFGxTR, nPOR_EL1, 0),
|
||||
SR_FGT(SYS_POR_EL0, HFGxTR, nPOR_EL0, 0),
|
||||
SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0),
|
||||
SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0),
|
||||
SR_FGT(SYS_RCWMASK_EL1, HFGxTR, nRCWMASK_EL1, 0),
|
||||
SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0),
|
||||
SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0),
|
||||
SR_FGT(SYS_GCSCR_EL1, HFGxTR, nGCS_EL1, 0),
|
||||
SR_FGT(SYS_GCSPR_EL1, HFGxTR, nGCS_EL1, 0),
|
||||
SR_FGT(SYS_GCSCRE0_EL1, HFGxTR, nGCS_EL0, 0),
|
||||
SR_FGT(SYS_GCSPR_EL0, HFGxTR, nGCS_EL0, 0),
|
||||
SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0),
|
||||
SR_FGT(SYS_ERXADDR_EL1, HFGxTR, ERXADDR_EL1, 1),
|
||||
SR_FGT(SYS_ERXPFGCDN_EL1, HFGxTR, ERXPFGCDN_EL1, 1),
|
||||
@ -1107,6 +1118,11 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
|
||||
SR_FGT(SYS_AFSR1_EL1, HFGxTR, AFSR1_EL1, 1),
|
||||
SR_FGT(SYS_AFSR0_EL1, HFGxTR, AFSR0_EL1, 1),
|
||||
/* HFGITR_EL2 */
|
||||
SR_FGT(OP_AT_S1E1A, HFGITR, ATS1E1A, 1),
|
||||
SR_FGT(OP_COSP_RCTX, HFGITR, COSPRCTX, 1),
|
||||
SR_FGT(OP_GCSPUSHX, HFGITR, nGCSEPP, 0),
|
||||
SR_FGT(OP_GCSPOPX, HFGITR, nGCSEPP, 0),
|
||||
SR_FGT(OP_GCSPUSHM, HFGITR, nGCSPUSHM_EL1, 0),
|
||||
SR_FGT(OP_BRB_IALL, HFGITR, nBRBIALL, 0),
|
||||
SR_FGT(OP_BRB_INJ, HFGITR, nBRBINJ, 0),
|
||||
SR_FGT(SYS_DC_CVAC, HFGITR, DCCVAC, 1),
|
||||
@ -1674,6 +1690,49 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
|
||||
SR_FGT(SYS_PMCR_EL0, HDFGWTR, PMCR_EL0, 1),
|
||||
SR_FGT(SYS_PMSWINC_EL0, HDFGWTR, PMSWINC_EL0, 1),
|
||||
SR_FGT(SYS_OSLAR_EL1, HDFGWTR, OSLAR_EL1, 1),
|
||||
/*
|
||||
* HAFGRTR_EL2
|
||||
*/
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(15), HAFGRTR, AMEVTYPER115_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(14), HAFGRTR, AMEVTYPER114_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(13), HAFGRTR, AMEVTYPER113_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(12), HAFGRTR, AMEVTYPER112_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(11), HAFGRTR, AMEVTYPER111_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(10), HAFGRTR, AMEVTYPER110_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(9), HAFGRTR, AMEVTYPER19_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(8), HAFGRTR, AMEVTYPER18_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(7), HAFGRTR, AMEVTYPER17_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(6), HAFGRTR, AMEVTYPER16_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(5), HAFGRTR, AMEVTYPER15_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(4), HAFGRTR, AMEVTYPER14_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(3), HAFGRTR, AMEVTYPER13_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(2), HAFGRTR, AMEVTYPER12_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(1), HAFGRTR, AMEVTYPER11_EL0, 1),
|
||||
SR_FGT(SYS_AMEVTYPER1_EL0(0), HAFGRTR, AMEVTYPER10_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(15), HAFGRTR, AMEVCNTR115_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(14), HAFGRTR, AMEVCNTR114_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(13), HAFGRTR, AMEVCNTR113_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(12), HAFGRTR, AMEVCNTR112_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(11), HAFGRTR, AMEVCNTR111_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(10), HAFGRTR, AMEVCNTR110_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(9), HAFGRTR, AMEVCNTR19_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(8), HAFGRTR, AMEVCNTR18_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(7), HAFGRTR, AMEVCNTR17_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(6), HAFGRTR, AMEVCNTR16_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(5), HAFGRTR, AMEVCNTR15_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(4), HAFGRTR, AMEVCNTR14_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(3), HAFGRTR, AMEVCNTR13_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(2), HAFGRTR, AMEVCNTR12_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(1), HAFGRTR, AMEVCNTR11_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR1_EL0(0), HAFGRTR, AMEVCNTR10_EL0, 1),
|
||||
SR_FGT(SYS_AMCNTENCLR1_EL0, HAFGRTR, AMCNTEN1, 1),
|
||||
SR_FGT(SYS_AMCNTENSET1_EL0, HAFGRTR, AMCNTEN1, 1),
|
||||
SR_FGT(SYS_AMCNTENCLR0_EL0, HAFGRTR, AMCNTEN0, 1),
|
||||
SR_FGT(SYS_AMCNTENSET0_EL0, HAFGRTR, AMCNTEN0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR0_EL0(3), HAFGRTR, AMEVCNTR03_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR0_EL0(2), HAFGRTR, AMEVCNTR02_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR0_EL0(1), HAFGRTR, AMEVCNTR01_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR0_EL0(0), HAFGRTR, AMEVCNTR00_EL0, 1),
|
||||
};
|
||||
|
||||
static union trap_config get_trap_config(u32 sysreg)
|
||||
@ -1894,6 +1953,10 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
|
||||
val = sanitised_sys_reg(vcpu, HDFGWTR_EL2);
|
||||
break;
|
||||
|
||||
case HAFGRTR_GROUP:
|
||||
val = sanitised_sys_reg(vcpu, HAFGRTR_EL2);
|
||||
break;
|
||||
|
||||
case HFGITR_GROUP:
|
||||
val = sanitised_sys_reg(vcpu, HFGITR_EL2);
|
||||
switch (tc.fgf) {
|
||||
|
@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
|
||||
*/
|
||||
if (!(esr & ESR_ELx_S1PTW) &&
|
||||
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
|
||||
(esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
|
||||
esr_fsc_is_permission_fault(esr))) {
|
||||
if (!__translate_far_to_hpfar(far, &hpfar))
|
||||
return false;
|
||||
} else {
|
||||
|
@ -79,6 +79,45 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
|
||||
clr |= ~hfg & __ ## reg ## _nMASK; \
|
||||
} while(0)
|
||||
|
||||
#define update_fgt_traps_cs(vcpu, reg, clr, set) \
|
||||
do { \
|
||||
struct kvm_cpu_context *hctxt = \
|
||||
&this_cpu_ptr(&kvm_host_data)->host_ctxt; \
|
||||
u64 c = 0, s = 0; \
|
||||
\
|
||||
ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
|
||||
compute_clr_set(vcpu, reg, c, s); \
|
||||
s |= set; \
|
||||
c |= clr; \
|
||||
if (c || s) { \
|
||||
u64 val = __ ## reg ## _nMASK; \
|
||||
val |= s; \
|
||||
val &= ~c; \
|
||||
write_sysreg_s(val, SYS_ ## reg); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define update_fgt_traps(vcpu, reg) \
|
||||
update_fgt_traps_cs(vcpu, reg, 0, 0)
|
||||
|
||||
/*
|
||||
* Validate the fine grain trap masks.
|
||||
* Check that the masks do not overlap and that all bits are accounted for.
|
||||
*/
|
||||
#define CHECK_FGT_MASKS(reg) \
|
||||
do { \
|
||||
BUILD_BUG_ON((__ ## reg ## _MASK) & (__ ## reg ## _nMASK)); \
|
||||
BUILD_BUG_ON(~((__ ## reg ## _RES0) ^ (__ ## reg ## _MASK) ^ \
|
||||
(__ ## reg ## _nMASK))); \
|
||||
} while(0)
|
||||
|
||||
static inline bool cpu_has_amu(void)
|
||||
{
|
||||
u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
|
||||
|
||||
return cpuid_feature_extract_unsigned_field(pfr0,
|
||||
ID_AA64PFR0_EL1_AMU_SHIFT);
|
||||
}
|
||||
|
||||
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -86,6 +125,14 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
|
||||
u64 r_val, w_val;
|
||||
|
||||
CHECK_FGT_MASKS(HFGRTR_EL2);
|
||||
CHECK_FGT_MASKS(HFGWTR_EL2);
|
||||
CHECK_FGT_MASKS(HFGITR_EL2);
|
||||
CHECK_FGT_MASKS(HDFGRTR_EL2);
|
||||
CHECK_FGT_MASKS(HDFGWTR_EL2);
|
||||
CHECK_FGT_MASKS(HAFGRTR_EL2);
|
||||
CHECK_FGT_MASKS(HCRX_EL2);
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_HAS_FGT))
|
||||
return;
|
||||
|
||||
@ -110,12 +157,15 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
|
||||
}
|
||||
|
||||
/* The default is not to trap anything but ACCDATA_EL1 */
|
||||
r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
|
||||
/* The default to trap everything not handled or supported in KVM. */
|
||||
tmp = HFGxTR_EL2_nAMAIR2_EL1 | HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nS2POR_EL1 |
|
||||
HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nACCDATA_EL1;
|
||||
|
||||
r_val = __HFGRTR_EL2_nMASK & ~tmp;
|
||||
r_val |= r_set;
|
||||
r_val &= ~r_clr;
|
||||
|
||||
w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
|
||||
w_val = __HFGWTR_EL2_nMASK & ~tmp;
|
||||
w_val |= w_set;
|
||||
w_val &= ~w_clr;
|
||||
|
||||
@ -125,34 +175,12 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
|
||||
return;
|
||||
|
||||
ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2);
|
||||
update_fgt_traps(vcpu, HFGITR_EL2);
|
||||
update_fgt_traps(vcpu, HDFGRTR_EL2);
|
||||
update_fgt_traps(vcpu, HDFGWTR_EL2);
|
||||
|
||||
r_set = r_clr = 0;
|
||||
compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set);
|
||||
r_val = __HFGITR_EL2_nMASK;
|
||||
r_val |= r_set;
|
||||
r_val &= ~r_clr;
|
||||
|
||||
write_sysreg_s(r_val, SYS_HFGITR_EL2);
|
||||
|
||||
ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2);
|
||||
ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2);
|
||||
|
||||
r_clr = r_set = w_clr = w_set = 0;
|
||||
|
||||
compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set);
|
||||
compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set);
|
||||
|
||||
r_val = __HDFGRTR_EL2_nMASK;
|
||||
r_val |= r_set;
|
||||
r_val &= ~r_clr;
|
||||
|
||||
w_val = __HDFGWTR_EL2_nMASK;
|
||||
w_val |= w_set;
|
||||
w_val &= ~w_clr;
|
||||
|
||||
write_sysreg_s(r_val, SYS_HDFGRTR_EL2);
|
||||
write_sysreg_s(w_val, SYS_HDFGWTR_EL2);
|
||||
if (cpu_has_amu())
|
||||
update_fgt_traps(vcpu, HAFGRTR_EL2);
|
||||
}
|
||||
|
||||
static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
@ -171,6 +199,9 @@ static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
|
||||
|
||||
if (cpu_has_amu())
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, HAFGRTR_EL2), SYS_HAFGRTR_EL2);
|
||||
}
|
||||
|
||||
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
|
||||
@ -591,7 +622,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
|
||||
bool valid;
|
||||
|
||||
valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
|
||||
valid = kvm_vcpu_trap_is_translation_fault(vcpu) &&
|
||||
kvm_vcpu_dabt_isvalid(vcpu) &&
|
||||
!kvm_vcpu_abt_issea(vcpu) &&
|
||||
!kvm_vcpu_abt_iss1tw(vcpu);
|
||||
|
@ -69,6 +69,8 @@
|
||||
ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
|
||||
)
|
||||
|
||||
#define PVM_ID_AA64PFR2_ALLOW 0ULL
|
||||
|
||||
/*
|
||||
* Allow for protected VMs:
|
||||
* - Mixed-endian
|
||||
@ -101,6 +103,7 @@
|
||||
* - Privileged Access Never
|
||||
* - SError interrupt exceptions from speculative reads
|
||||
* - Enhanced Translation Synchronization
|
||||
* - Control for cache maintenance permission
|
||||
*/
|
||||
#define PVM_ID_AA64MMFR1_ALLOW (\
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
|
||||
@ -108,7 +111,8 @@
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \
|
||||
)
|
||||
|
||||
/*
|
||||
@ -133,6 +137,8 @@
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
|
||||
)
|
||||
|
||||
#define PVM_ID_AA64MMFR3_ALLOW (0ULL)
|
||||
|
||||
/*
|
||||
* No support for Scalable Vectors for protected VMs:
|
||||
* Requires additional support from KVM, e.g., context-switching and
|
||||
@ -178,10 +184,18 @@
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
|
||||
)
|
||||
|
||||
/* Restrict pointer authentication to the basic version. */
|
||||
#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\
|
||||
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \
|
||||
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \
|
||||
)
|
||||
|
||||
#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\
|
||||
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \
|
||||
)
|
||||
|
||||
#define PVM_ID_AA64ISAR1_ALLOW (\
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
|
||||
@ -196,8 +210,8 @@
|
||||
)
|
||||
|
||||
#define PVM_ID_AA64ISAR2_ALLOW (\
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \
|
||||
)
|
||||
|
||||
|
@ -122,11 +122,7 @@ alternative_if ARM64_HAS_CNP
|
||||
alternative_else_nop_endif
|
||||
msr ttbr0_el2, x2
|
||||
|
||||
/*
|
||||
* Set the PS bits in TCR_EL2.
|
||||
*/
|
||||
ldr x0, [x0, #NVHE_INIT_TCR_EL2]
|
||||
tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
|
||||
msr tcr_el2, x0
|
||||
|
||||
isb
|
||||
@ -292,6 +288,8 @@ alternative_else_nop_endif
|
||||
mov sp, x0
|
||||
|
||||
/* And turn the MMU back on! */
|
||||
dsb nsh
|
||||
isb
|
||||
set_sctlr_el2 x2
|
||||
ret x1
|
||||
SYM_FUNC_END(__pkvm_init_switch_pgd)
|
||||
|
@ -91,7 +91,7 @@ static void host_s2_put_page(void *addr)
|
||||
hyp_put_page(&host_s2_pool, addr);
|
||||
}
|
||||
|
||||
static void host_s2_free_unlinked_table(void *addr, u32 level)
|
||||
static void host_s2_free_unlinked_table(void *addr, s8 level)
|
||||
{
|
||||
kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
|
||||
}
|
||||
@ -443,7 +443,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
|
||||
{
|
||||
struct kvm_mem_range cur;
|
||||
kvm_pte_t pte;
|
||||
u32 level;
|
||||
s8 level;
|
||||
int ret;
|
||||
|
||||
hyp_assert_lock_held(&host_mmu.lock);
|
||||
@ -462,7 +462,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
|
||||
cur.start = ALIGN_DOWN(addr, granule);
|
||||
cur.end = cur.start + granule;
|
||||
level++;
|
||||
} while ((level < KVM_PGTABLE_MAX_LEVELS) &&
|
||||
} while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
|
||||
!(kvm_level_supports_block_mapping(level) &&
|
||||
range_included(&cur, range)));
|
||||
|
||||
|
@ -260,7 +260,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
|
||||
* https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
|
||||
*/
|
||||
dsb(ishst);
|
||||
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1));
|
||||
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL);
|
||||
dsb(ish);
|
||||
isb();
|
||||
}
|
||||
@ -275,7 +275,7 @@ static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
{
|
||||
struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
|
||||
|
||||
if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL)
|
||||
return -EINVAL;
|
||||
|
||||
slot->addr = ctx->addr;
|
||||
|
@ -136,6 +136,10 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
|
||||
cptr_set |= CPTR_EL2_TTA;
|
||||
}
|
||||
|
||||
/* Trap External Trace */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids))
|
||||
mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
|
||||
|
||||
vcpu->arch.mdcr_el2 |= mdcr_set;
|
||||
vcpu->arch.mdcr_el2 &= ~mdcr_clear;
|
||||
vcpu->arch.cptr_el2 |= cptr_set;
|
||||
|
@ -181,7 +181,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (!kvm_pte_valid(ctx->old))
|
||||
return 0;
|
||||
|
||||
if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
if (ctx->level != KVM_PGTABLE_LAST_LEVEL)
|
||||
return -EINVAL;
|
||||
|
||||
phys = kvm_pte_to_phys(ctx->old);
|
||||
|
@ -79,7 +79,10 @@ static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
|
||||
|
||||
static bool kvm_phys_is_valid(u64 phys)
|
||||
{
|
||||
return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
|
||||
u64 parange_max = kvm_get_parange_max();
|
||||
u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max);
|
||||
|
||||
return phys < BIT(shift);
|
||||
}
|
||||
|
||||
static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
|
||||
@ -98,7 +101,7 @@ static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
return IS_ALIGNED(ctx->addr, granule);
|
||||
}
|
||||
|
||||
static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
|
||||
static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, s8 level)
|
||||
{
|
||||
u64 shift = kvm_granule_shift(level);
|
||||
u64 mask = BIT(PAGE_SHIFT - 3) - 1;
|
||||
@ -114,7 +117,7 @@ static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
|
||||
return (addr & mask) >> shift;
|
||||
}
|
||||
|
||||
static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
|
||||
static u32 kvm_pgd_pages(u32 ia_bits, s8 start_level)
|
||||
{
|
||||
struct kvm_pgtable pgt = {
|
||||
.ia_bits = ia_bits,
|
||||
@ -124,9 +127,9 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
|
||||
return kvm_pgd_page_idx(&pgt, -1ULL) + 1;
|
||||
}
|
||||
|
||||
static bool kvm_pte_table(kvm_pte_t pte, u32 level)
|
||||
static bool kvm_pte_table(kvm_pte_t pte, s8 level)
|
||||
{
|
||||
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
if (level == KVM_PGTABLE_LAST_LEVEL)
|
||||
return false;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
@ -154,11 +157,11 @@ static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops
|
||||
return pte;
|
||||
}
|
||||
|
||||
static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
|
||||
static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level)
|
||||
{
|
||||
kvm_pte_t pte = kvm_phys_to_pte(pa);
|
||||
u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
|
||||
KVM_PTE_TYPE_BLOCK;
|
||||
u64 type = (level == KVM_PGTABLE_LAST_LEVEL) ? KVM_PTE_TYPE_PAGE :
|
||||
KVM_PTE_TYPE_BLOCK;
|
||||
|
||||
pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
|
||||
pte |= FIELD_PREP(KVM_PTE_TYPE, type);
|
||||
@ -203,11 +206,11 @@ static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker,
|
||||
}
|
||||
|
||||
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
|
||||
struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level);
|
||||
struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level);
|
||||
|
||||
static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
kvm_pteref_t pteref, u32 level)
|
||||
kvm_pteref_t pteref, s8 level)
|
||||
{
|
||||
enum kvm_pgtable_walk_flags flags = data->walker->flags;
|
||||
kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref);
|
||||
@ -272,12 +275,13 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
|
||||
}
|
||||
|
||||
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
|
||||
struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level)
|
||||
struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level)
|
||||
{
|
||||
u32 idx;
|
||||
int ret = 0;
|
||||
|
||||
if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
|
||||
if (WARN_ON_ONCE(level < KVM_PGTABLE_FIRST_LEVEL ||
|
||||
level > KVM_PGTABLE_LAST_LEVEL))
|
||||
return -EINVAL;
|
||||
|
||||
for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
|
||||
@ -340,7 +344,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
|
||||
struct leaf_walk_data {
|
||||
kvm_pte_t pte;
|
||||
u32 level;
|
||||
s8 level;
|
||||
};
|
||||
|
||||
static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
@ -355,7 +359,7 @@ static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
}
|
||||
|
||||
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
|
||||
kvm_pte_t *ptep, u32 *level)
|
||||
kvm_pte_t *ptep, s8 *level)
|
||||
{
|
||||
struct leaf_walk_data data;
|
||||
struct kvm_pgtable_walker walker = {
|
||||
@ -408,7 +412,8 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
|
||||
}
|
||||
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
|
||||
if (!kvm_lpa2_is_enabled())
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
|
||||
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
|
||||
*ptep = attr;
|
||||
@ -467,7 +472,7 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (hyp_map_walker_try_leaf(ctx, data))
|
||||
return 0;
|
||||
|
||||
if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
|
||||
return -EINVAL;
|
||||
|
||||
childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
|
||||
@ -563,14 +568,19 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
|
||||
struct kvm_pgtable_mm_ops *mm_ops)
|
||||
{
|
||||
u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
|
||||
s8 start_level = KVM_PGTABLE_LAST_LEVEL + 1 -
|
||||
ARM64_HW_PGTABLE_LEVELS(va_bits);
|
||||
|
||||
if (start_level < KVM_PGTABLE_FIRST_LEVEL ||
|
||||
start_level > KVM_PGTABLE_LAST_LEVEL)
|
||||
return -EINVAL;
|
||||
|
||||
pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL);
|
||||
if (!pgt->pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
pgt->ia_bits = va_bits;
|
||||
pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
|
||||
pgt->start_level = start_level;
|
||||
pgt->mm_ops = mm_ops;
|
||||
pgt->mmu = NULL;
|
||||
pgt->force_pte_cb = NULL;
|
||||
@ -624,7 +634,7 @@ struct stage2_map_data {
|
||||
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
|
||||
{
|
||||
u64 vtcr = VTCR_EL2_FLAGS;
|
||||
u8 lvls;
|
||||
s8 lvls;
|
||||
|
||||
vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
|
||||
vtcr |= VTCR_EL2_T0SZ(phys_shift);
|
||||
@ -635,6 +645,15 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
|
||||
lvls = stage2_pgtable_levels(phys_shift);
|
||||
if (lvls < 2)
|
||||
lvls = 2;
|
||||
|
||||
/*
|
||||
* When LPA2 is enabled, the HW supports an extra level of translation
|
||||
* (for 5 in total) when using 4K pages. It also introduces VTCR_EL2.SL2
|
||||
* to as an addition to SL0 to enable encoding this extra start level.
|
||||
* However, since we always use concatenated pages for the first level
|
||||
* lookup, we will never need this extra level and therefore do not need
|
||||
* to touch SL2.
|
||||
*/
|
||||
vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
|
||||
|
||||
#ifdef CONFIG_ARM64_HW_AFDBM
|
||||
@ -654,6 +673,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
|
||||
vtcr |= VTCR_EL2_HA;
|
||||
#endif /* CONFIG_ARM64_HW_AFDBM */
|
||||
|
||||
if (kvm_lpa2_is_enabled())
|
||||
vtcr |= VTCR_EL2_DS;
|
||||
|
||||
/* Set the vmid bits */
|
||||
vtcr |= (get_vmid_bits(mmfr1) == 16) ?
|
||||
VTCR_EL2_VS_16BIT :
|
||||
@ -711,7 +733,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
|
||||
if (prot & KVM_PGTABLE_PROT_W)
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
|
||||
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
|
||||
if (!kvm_lpa2_is_enabled())
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
|
||||
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
|
||||
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
|
||||
*ptep = attr;
|
||||
@ -902,7 +926,7 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
{
|
||||
u64 phys = stage2_map_walker_phys_addr(ctx, data);
|
||||
|
||||
if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
|
||||
if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL)
|
||||
return false;
|
||||
|
||||
return kvm_block_mapping_supported(ctx, phys);
|
||||
@ -981,7 +1005,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (ret != -E2BIG)
|
||||
return ret;
|
||||
|
||||
if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
|
||||
return -EINVAL;
|
||||
|
||||
if (!data->memcache)
|
||||
@ -1151,7 +1175,7 @@ struct stage2_attr_data {
|
||||
kvm_pte_t attr_set;
|
||||
kvm_pte_t attr_clr;
|
||||
kvm_pte_t pte;
|
||||
u32 level;
|
||||
s8 level;
|
||||
};
|
||||
|
||||
static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
@ -1194,7 +1218,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
|
||||
u64 size, kvm_pte_t attr_set,
|
||||
kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
|
||||
u32 *level, enum kvm_pgtable_walk_flags flags)
|
||||
s8 *level, enum kvm_pgtable_walk_flags flags)
|
||||
{
|
||||
int ret;
|
||||
kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
|
||||
@ -1296,7 +1320,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
int ret;
|
||||
u32 level;
|
||||
s8 level;
|
||||
kvm_pte_t set = 0, clr = 0;
|
||||
|
||||
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
|
||||
@ -1349,7 +1373,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
}
|
||||
|
||||
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
u64 phys, u32 level,
|
||||
u64 phys, s8 level,
|
||||
enum kvm_pgtable_prot prot,
|
||||
void *mc, bool force_pte)
|
||||
{
|
||||
@ -1407,7 +1431,7 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
* fully populated tree up to the PTE entries. Note that @level is
|
||||
* interpreted as in "level @level entry".
|
||||
*/
|
||||
static int stage2_block_get_nr_page_tables(u32 level)
|
||||
static int stage2_block_get_nr_page_tables(s8 level)
|
||||
{
|
||||
switch (level) {
|
||||
case 1:
|
||||
@ -1418,7 +1442,7 @@ static int stage2_block_get_nr_page_tables(u32 level)
|
||||
return 0;
|
||||
default:
|
||||
WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
|
||||
level >= KVM_PGTABLE_MAX_LEVELS);
|
||||
level > KVM_PGTABLE_LAST_LEVEL);
|
||||
return -EINVAL;
|
||||
};
|
||||
}
|
||||
@ -1431,13 +1455,13 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
struct kvm_s2_mmu *mmu;
|
||||
kvm_pte_t pte = ctx->old, new, *childp;
|
||||
enum kvm_pgtable_prot prot;
|
||||
u32 level = ctx->level;
|
||||
s8 level = ctx->level;
|
||||
bool force_pte;
|
||||
int nr_pages;
|
||||
u64 phys;
|
||||
|
||||
/* No huge-pages exist at the last level */
|
||||
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
if (level == KVM_PGTABLE_LAST_LEVEL)
|
||||
return 0;
|
||||
|
||||
/* We only split valid block mappings */
|
||||
@ -1514,7 +1538,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
u64 vtcr = mmu->vtcr;
|
||||
u32 ia_bits = VTCR_EL2_IPA(vtcr);
|
||||
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
|
||||
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
|
||||
s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
|
||||
|
||||
pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
|
||||
pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
|
||||
@ -1537,7 +1561,7 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
|
||||
{
|
||||
u32 ia_bits = VTCR_EL2_IPA(vtcr);
|
||||
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
|
||||
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
|
||||
s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
|
||||
|
||||
return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
|
||||
}
|
||||
@ -1573,7 +1597,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
|
||||
pgt->pgd = NULL;
|
||||
}
|
||||
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
|
||||
{
|
||||
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
|
||||
struct kvm_pgtable_walker walker = {
|
||||
|
@ -223,12 +223,12 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
|
||||
{
|
||||
struct page *page = container_of(head, struct page, rcu_head);
|
||||
void *pgtable = page_to_virt(page);
|
||||
u32 level = page_private(page);
|
||||
s8 level = page_private(page);
|
||||
|
||||
kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
|
||||
}
|
||||
|
||||
static void stage2_free_unlinked_table(void *addr, u32 level)
|
||||
static void stage2_free_unlinked_table(void *addr, s8 level)
|
||||
{
|
||||
struct page *page = virt_to_page(addr);
|
||||
|
||||
@ -804,13 +804,13 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
|
||||
struct kvm_pgtable pgt = {
|
||||
.pgd = (kvm_pteref_t)kvm->mm->pgd,
|
||||
.ia_bits = vabits_actual,
|
||||
.start_level = (KVM_PGTABLE_MAX_LEVELS -
|
||||
CONFIG_PGTABLE_LEVELS),
|
||||
.start_level = (KVM_PGTABLE_LAST_LEVEL -
|
||||
CONFIG_PGTABLE_LEVELS + 1),
|
||||
.mm_ops = &kvm_user_mm_ops,
|
||||
};
|
||||
unsigned long flags;
|
||||
kvm_pte_t pte = 0; /* Keep GCC quiet... */
|
||||
u32 level = ~0;
|
||||
s8 level = S8_MAX;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
@ -829,7 +829,9 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
|
||||
* Not seeing an error, but not updating level? Something went
|
||||
* deeply wrong...
|
||||
*/
|
||||
if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
|
||||
if (WARN_ON(level > KVM_PGTABLE_LAST_LEVEL))
|
||||
return -EFAULT;
|
||||
if (WARN_ON(level < KVM_PGTABLE_FIRST_LEVEL))
|
||||
return -EFAULT;
|
||||
|
||||
/* Oops, the userspace PTs are gone... Replay the fault */
|
||||
@ -1374,7 +1376,7 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
|
||||
|
||||
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
struct kvm_memory_slot *memslot, unsigned long hva,
|
||||
unsigned long fault_status)
|
||||
bool fault_is_perm)
|
||||
{
|
||||
int ret = 0;
|
||||
bool write_fault, writable, force_pte = false;
|
||||
@ -1388,17 +1390,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
gfn_t gfn;
|
||||
kvm_pfn_t pfn;
|
||||
bool logging_active = memslot_is_logging(memslot);
|
||||
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
|
||||
long vma_pagesize, fault_granule;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
|
||||
struct kvm_pgtable *pgt;
|
||||
|
||||
fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
|
||||
if (fault_is_perm)
|
||||
fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
|
||||
write_fault = kvm_is_write_fault(vcpu);
|
||||
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
|
||||
VM_BUG_ON(write_fault && exec_fault);
|
||||
|
||||
if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
|
||||
if (fault_is_perm && !write_fault && !exec_fault) {
|
||||
kvm_err("Unexpected L2 read permission error\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
@ -1409,8 +1411,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* only exception to this is when dirty logging is enabled at runtime
|
||||
* and a write fault needs to collapse a block entry into a table.
|
||||
*/
|
||||
if (fault_status != ESR_ELx_FSC_PERM ||
|
||||
(logging_active && write_fault)) {
|
||||
if (!fault_is_perm || (logging_active && write_fault)) {
|
||||
ret = kvm_mmu_topup_memory_cache(memcache,
|
||||
kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
|
||||
if (ret)
|
||||
@ -1527,8 +1528,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* backed by a THP and thus use block mapping if possible.
|
||||
*/
|
||||
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
|
||||
if (fault_status == ESR_ELx_FSC_PERM &&
|
||||
fault_granule > PAGE_SIZE)
|
||||
if (fault_is_perm && fault_granule > PAGE_SIZE)
|
||||
vma_pagesize = fault_granule;
|
||||
else
|
||||
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
|
||||
@ -1541,7 +1541,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
}
|
||||
}
|
||||
|
||||
if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new disallowed VMA */
|
||||
if (mte_allowed) {
|
||||
sanitise_mte_tags(kvm, pfn, vma_pagesize);
|
||||
@ -1567,7 +1567,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* permissions only if vma_pagesize equals fault_granule. Otherwise,
|
||||
* kvm_pgtable_stage2_map() should be called to change block size.
|
||||
*/
|
||||
if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
|
||||
if (fault_is_perm && vma_pagesize == fault_granule)
|
||||
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
|
||||
else
|
||||
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
|
||||
@ -1618,7 +1618,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
|
||||
*/
|
||||
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long fault_status;
|
||||
unsigned long esr;
|
||||
phys_addr_t fault_ipa;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long hva;
|
||||
@ -1626,12 +1626,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
gfn_t gfn;
|
||||
int ret, idx;
|
||||
|
||||
fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
|
||||
esr = kvm_vcpu_get_esr(vcpu);
|
||||
|
||||
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
|
||||
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
|
||||
|
||||
if (fault_status == ESR_ELx_FSC_FAULT) {
|
||||
if (esr_fsc_is_permission_fault(esr)) {
|
||||
/* Beyond sanitised PARange (which is the IPA limit) */
|
||||
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
|
||||
kvm_inject_size_fault(vcpu);
|
||||
@ -1666,9 +1666,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
kvm_vcpu_get_hfar(vcpu), fault_ipa);
|
||||
|
||||
/* Check the stage-2 fault is trans. fault or write fault */
|
||||
if (fault_status != ESR_ELx_FSC_FAULT &&
|
||||
fault_status != ESR_ELx_FSC_PERM &&
|
||||
fault_status != ESR_ELx_FSC_ACCESS) {
|
||||
if (!esr_fsc_is_translation_fault(esr) &&
|
||||
!esr_fsc_is_permission_fault(esr) &&
|
||||
!esr_fsc_is_access_flag_fault(esr)) {
|
||||
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
|
||||
kvm_vcpu_trap_get_class(vcpu),
|
||||
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
|
||||
@ -1730,13 +1730,14 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
/* Userspace should not be able to register out-of-bounds IPAs */
|
||||
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
|
||||
|
||||
if (fault_status == ESR_ELx_FSC_ACCESS) {
|
||||
if (esr_fsc_is_access_flag_fault(esr)) {
|
||||
handle_access_fault(vcpu, fault_ipa);
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
|
||||
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva,
|
||||
esr_fsc_is_permission_fault(esr));
|
||||
if (ret == 0)
|
||||
ret = 1;
|
||||
out:
|
||||
|
@ -23,13 +23,9 @@
|
||||
* This list should get updated as new features get added to the NV
|
||||
* support, and new extension to the architecture.
|
||||
*/
|
||||
void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
static u64 limit_nv_id_reg(u32 id, u64 val)
|
||||
{
|
||||
u32 id = reg_to_encoding(r);
|
||||
u64 val, tmp;
|
||||
|
||||
val = p->regval;
|
||||
u64 tmp;
|
||||
|
||||
switch (id) {
|
||||
case SYS_ID_AA64ISAR0_EL1:
|
||||
@ -158,5 +154,17 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
|
||||
break;
|
||||
}
|
||||
|
||||
p->regval = val;
|
||||
return val;
|
||||
}
|
||||
int kvm_init_nv_sysregs(struct kvm *kvm)
|
||||
{
|
||||
mutex_lock(&kvm->arch.config_lock);
|
||||
|
||||
for (int i = 0; i < KVM_ARM_ID_REG_NUM; i++)
|
||||
kvm->arch.id_regs[i] = limit_nv_id_reg(IDX_IDREG(i),
|
||||
kvm->arch.id_regs[i]);
|
||||
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -280,12 +280,11 @@ int __init kvm_set_ipa_limit(void)
|
||||
parange = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
|
||||
/*
|
||||
* IPA size beyond 48 bits could not be supported
|
||||
* on either 4K or 16K page size. Hence let's cap
|
||||
* it to 48 bits, in case it's reported as larger
|
||||
* on the system.
|
||||
* IPA size beyond 48 bits for 4K and 16K page size is only supported
|
||||
* when LPA2 is available. So if we have LPA2, enable it, else cap to 48
|
||||
* bits, in case it's reported as larger on the system.
|
||||
*/
|
||||
if (PAGE_SIZE != SZ_64K)
|
||||
if (!kvm_lpa2_is_enabled() && PAGE_SIZE != SZ_64K)
|
||||
parange = min(parange, (unsigned int)ID_AA64MMFR0_EL1_PARANGE_48);
|
||||
|
||||
/*
|
||||
|
@ -45,44 +45,170 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
|
||||
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
u64 val);
|
||||
|
||||
static bool bad_trap(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc *r,
|
||||
const char *msg)
|
||||
{
|
||||
WARN_ONCE(1, "Unexpected %s\n", msg);
|
||||
print_sys_reg_instr(params);
|
||||
kvm_inject_undefined(vcpu);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool read_from_write_only(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n");
|
||||
print_sys_reg_instr(params);
|
||||
kvm_inject_undefined(vcpu);
|
||||
return false;
|
||||
return bad_trap(vcpu, params, r,
|
||||
"sys_reg read to write-only register");
|
||||
}
|
||||
|
||||
static bool write_to_read_only(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
WARN_ONCE(1, "Unexpected sys_reg write to read-only register\n");
|
||||
print_sys_reg_instr(params);
|
||||
kvm_inject_undefined(vcpu);
|
||||
return false;
|
||||
return bad_trap(vcpu, params, r,
|
||||
"sys_reg write to read-only register");
|
||||
}
|
||||
|
||||
#define PURE_EL2_SYSREG(el2) \
|
||||
case el2: { \
|
||||
*el1r = el2; \
|
||||
return true; \
|
||||
}
|
||||
|
||||
#define MAPPED_EL2_SYSREG(el2, el1, fn) \
|
||||
case el2: { \
|
||||
*xlate = fn; \
|
||||
*el1r = el1; \
|
||||
return true; \
|
||||
}
|
||||
|
||||
static bool get_el2_to_el1_mapping(unsigned int reg,
|
||||
unsigned int *el1r, u64 (**xlate)(u64))
|
||||
{
|
||||
switch (reg) {
|
||||
PURE_EL2_SYSREG( VPIDR_EL2 );
|
||||
PURE_EL2_SYSREG( VMPIDR_EL2 );
|
||||
PURE_EL2_SYSREG( ACTLR_EL2 );
|
||||
PURE_EL2_SYSREG( HCR_EL2 );
|
||||
PURE_EL2_SYSREG( MDCR_EL2 );
|
||||
PURE_EL2_SYSREG( HSTR_EL2 );
|
||||
PURE_EL2_SYSREG( HACR_EL2 );
|
||||
PURE_EL2_SYSREG( VTTBR_EL2 );
|
||||
PURE_EL2_SYSREG( VTCR_EL2 );
|
||||
PURE_EL2_SYSREG( RVBAR_EL2 );
|
||||
PURE_EL2_SYSREG( TPIDR_EL2 );
|
||||
PURE_EL2_SYSREG( HPFAR_EL2 );
|
||||
PURE_EL2_SYSREG( CNTHCTL_EL2 );
|
||||
MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1,
|
||||
translate_sctlr_el2_to_sctlr_el1 );
|
||||
MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1,
|
||||
translate_cptr_el2_to_cpacr_el1 );
|
||||
MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1,
|
||||
translate_ttbr0_el2_to_ttbr0_el1 );
|
||||
MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1, NULL );
|
||||
MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1,
|
||||
translate_tcr_el2_to_tcr_el1 );
|
||||
MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1, NULL );
|
||||
MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1, NULL );
|
||||
MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1, NULL );
|
||||
MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1, NULL );
|
||||
MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1, NULL );
|
||||
MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1, NULL );
|
||||
MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL );
|
||||
MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL );
|
||||
MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
|
||||
{
|
||||
u64 val = 0x8badf00d8badf00d;
|
||||
u64 (*xlate)(u64) = NULL;
|
||||
unsigned int el1r;
|
||||
|
||||
if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
|
||||
__vcpu_read_sys_reg_from_cpu(reg, &val))
|
||||
if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
|
||||
goto memory_read;
|
||||
|
||||
if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) {
|
||||
if (!is_hyp_ctxt(vcpu))
|
||||
goto memory_read;
|
||||
|
||||
/*
|
||||
* If this register does not have an EL1 counterpart,
|
||||
* then read the stored EL2 version.
|
||||
*/
|
||||
if (reg == el1r)
|
||||
goto memory_read;
|
||||
|
||||
/*
|
||||
* If we have a non-VHE guest and that the sysreg
|
||||
* requires translation to be used at EL1, use the
|
||||
* in-memory copy instead.
|
||||
*/
|
||||
if (!vcpu_el2_e2h_is_set(vcpu) && xlate)
|
||||
goto memory_read;
|
||||
|
||||
/* Get the current version of the EL1 counterpart. */
|
||||
WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val));
|
||||
return val;
|
||||
}
|
||||
|
||||
/* EL1 register can't be on the CPU if the guest is in vEL2. */
|
||||
if (unlikely(is_hyp_ctxt(vcpu)))
|
||||
goto memory_read;
|
||||
|
||||
if (__vcpu_read_sys_reg_from_cpu(reg, &val))
|
||||
return val;
|
||||
|
||||
memory_read:
|
||||
return __vcpu_sys_reg(vcpu, reg);
|
||||
}
|
||||
|
||||
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
|
||||
{
|
||||
if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
|
||||
__vcpu_write_sys_reg_to_cpu(val, reg))
|
||||
u64 (*xlate)(u64) = NULL;
|
||||
unsigned int el1r;
|
||||
|
||||
if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
|
||||
goto memory_write;
|
||||
|
||||
if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) {
|
||||
if (!is_hyp_ctxt(vcpu))
|
||||
goto memory_write;
|
||||
|
||||
/*
|
||||
* Always store a copy of the write to memory to avoid having
|
||||
* to reverse-translate virtual EL2 system registers for a
|
||||
* non-VHE guest hypervisor.
|
||||
*/
|
||||
__vcpu_sys_reg(vcpu, reg) = val;
|
||||
|
||||
/* No EL1 counterpart? We're done here.? */
|
||||
if (reg == el1r)
|
||||
return;
|
||||
|
||||
if (!vcpu_el2_e2h_is_set(vcpu) && xlate)
|
||||
val = xlate(val);
|
||||
|
||||
/* Redirect this to the EL1 version of the register. */
|
||||
WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, el1r));
|
||||
return;
|
||||
}
|
||||
|
||||
/* EL1 register can't be on the CPU if the guest is in vEL2. */
|
||||
if (unlikely(is_hyp_ctxt(vcpu)))
|
||||
goto memory_write;
|
||||
|
||||
if (__vcpu_write_sys_reg_to_cpu(val, reg))
|
||||
return;
|
||||
|
||||
__vcpu_sys_reg(vcpu, reg) = val;
|
||||
memory_write:
|
||||
__vcpu_sys_reg(vcpu, reg) = val;
|
||||
}
|
||||
|
||||
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
|
||||
@ -1505,8 +1631,6 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
|
||||
return write_to_read_only(vcpu, p, r);
|
||||
|
||||
p->regval = read_id_reg(vcpu, r);
|
||||
if (vcpu_has_nv(vcpu))
|
||||
access_nested_id_reg(vcpu, p, r);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1885,6 +2009,32 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
|
||||
return REG_HIDDEN;
|
||||
}
|
||||
|
||||
static bool bad_vncr_trap(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
/*
|
||||
* We really shouldn't be here, and this is likely the result
|
||||
* of a misconfigured trap, as this register should target the
|
||||
* VNCR page, and nothing else.
|
||||
*/
|
||||
return bad_trap(vcpu, p, r,
|
||||
"trap of VNCR-backed register");
|
||||
}
|
||||
|
||||
static bool bad_redir_trap(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
/*
|
||||
* We really shouldn't be here, and this is likely the result
|
||||
* of a misconfigured trap, as this register should target the
|
||||
* corresponding EL1, and nothing else.
|
||||
*/
|
||||
return bad_trap(vcpu, p, r,
|
||||
"trap of EL2 register redirected to EL1");
|
||||
}
|
||||
|
||||
#define EL2_REG(name, acc, rst, v) { \
|
||||
SYS_DESC(SYS_##name), \
|
||||
.access = acc, \
|
||||
@ -1894,6 +2044,9 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
|
||||
.val = v, \
|
||||
}
|
||||
|
||||
#define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v)
|
||||
#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
|
||||
|
||||
/*
|
||||
* EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when
|
||||
* HCR_EL2.E2H==1, and only in the sysreg table for convenience of
|
||||
@ -2508,32 +2661,33 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
{ PMU_SYS_REG(PMCCFILTR_EL0), .access = access_pmu_evtyper,
|
||||
.reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 },
|
||||
|
||||
EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0),
|
||||
EL2_REG(VMPIDR_EL2, access_rw, reset_unknown, 0),
|
||||
EL2_REG_VNCR(VPIDR_EL2, reset_unknown, 0),
|
||||
EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0),
|
||||
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
|
||||
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HCR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG_VNCR(HCR_EL2, reset_val, 0),
|
||||
EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
|
||||
EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HFGITR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HACR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
|
||||
EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0),
|
||||
EL2_REG_VNCR(HFGWTR_EL2, reset_val, 0),
|
||||
EL2_REG_VNCR(HFGITR_EL2, reset_val, 0),
|
||||
EL2_REG_VNCR(HACR_EL2, reset_val, 0),
|
||||
|
||||
EL2_REG(HCRX_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG_VNCR(HCRX_EL2, reset_val, 0),
|
||||
|
||||
EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
|
||||
EL2_REG(VTTBR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG_VNCR(VTTBR_EL2, reset_val, 0),
|
||||
EL2_REG_VNCR(VTCR_EL2, reset_val, 0),
|
||||
|
||||
{ SYS_DESC(SYS_DACR32_EL2), trap_undef, reset_unknown, DACR32_EL2 },
|
||||
EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(ELR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0),
|
||||
EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0),
|
||||
EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0),
|
||||
EL2_REG_REDIR(SPSR_EL2, reset_val, 0),
|
||||
EL2_REG_REDIR(ELR_EL2, reset_val, 0),
|
||||
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
|
||||
|
||||
/* AArch32 SPSR_* are RES0 if trapped from a NV guest */
|
||||
@ -2549,10 +2703,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
{ SYS_DESC(SYS_IFSR32_EL2), trap_undef, reset_unknown, IFSR32_EL2 },
|
||||
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(ESR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG_REDIR(ESR_EL2, reset_val, 0),
|
||||
{ SYS_DESC(SYS_FPEXC32_EL2), trap_undef, reset_val, FPEXC32_EL2, 0x700 },
|
||||
|
||||
EL2_REG(FAR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG_REDIR(FAR_EL2, reset_val, 0),
|
||||
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
|
||||
|
||||
EL2_REG(MAIR_EL2, access_rw, reset_val, 0),
|
||||
@ -2565,24 +2719,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
|
||||
|
||||
EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0),
|
||||
EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
|
||||
|
||||
EL12_REG(SCTLR, access_vm_reg, reset_val, 0x00C50078),
|
||||
EL12_REG(CPACR, access_rw, reset_val, 0),
|
||||
EL12_REG(TTBR0, access_vm_reg, reset_unknown, 0),
|
||||
EL12_REG(TTBR1, access_vm_reg, reset_unknown, 0),
|
||||
EL12_REG(TCR, access_vm_reg, reset_val, 0),
|
||||
{ SYS_DESC(SYS_SPSR_EL12), access_spsr},
|
||||
{ SYS_DESC(SYS_ELR_EL12), access_elr},
|
||||
EL12_REG(AFSR0, access_vm_reg, reset_unknown, 0),
|
||||
EL12_REG(AFSR1, access_vm_reg, reset_unknown, 0),
|
||||
EL12_REG(ESR, access_vm_reg, reset_unknown, 0),
|
||||
EL12_REG(FAR, access_vm_reg, reset_unknown, 0),
|
||||
EL12_REG(MAIR, access_vm_reg, reset_unknown, 0),
|
||||
EL12_REG(AMAIR, access_vm_reg, reset_amair_el1, 0),
|
||||
EL12_REG(VBAR, access_rw, reset_val, 0),
|
||||
EL12_REG(CONTEXTIDR, access_vm_reg, reset_val, 0),
|
||||
EL12_REG(CNTKCTL, access_rw, reset_val, 0),
|
||||
|
||||
EL2_REG(SP_EL2, NULL, reset_unknown, 0),
|
||||
|
@ -590,7 +590,11 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
|
||||
|
||||
irq = __vgic_its_check_cache(dist, db, devid, eventid);
|
||||
if (irq)
|
||||
vgic_get_irq_kref(irq);
|
||||
|
||||
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
|
||||
|
||||
return irq;
|
||||
@ -769,6 +773,7 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
irq->pending_latch = true;
|
||||
vgic_queue_irq_unlock(kvm, irq, flags);
|
||||
vgic_put_irq(kvm, irq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -357,31 +357,13 @@ static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
int i;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < len * 8; i++) {
|
||||
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
||||
ret = vgic_uaccess_write_spending(vcpu, addr, len, val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
if (test_bit(i, &val)) {
|
||||
/*
|
||||
* pending_latch is set irrespective of irq type
|
||||
* (level or edge) to avoid dependency that VM should
|
||||
* restore irq config before pending info.
|
||||
*/
|
||||
irq->pending_latch = true;
|
||||
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
||||
} else {
|
||||
irq->pending_latch = false;
|
||||
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||
}
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return vgic_uaccess_write_cpending(vcpu, addr, len, ~val);
|
||||
}
|
||||
|
||||
/* We want to avoid outer shareable. */
|
||||
|
@ -301,9 +301,8 @@ static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
|
||||
vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
}
|
||||
|
||||
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
static void __set_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
|
||||
unsigned long val, bool is_user)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
int i;
|
||||
@ -312,14 +311,22 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
|
||||
for_each_set_bit(i, &val, len * 8) {
|
||||
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
||||
|
||||
/* GICD_ISPENDR0 SGI bits are WI */
|
||||
if (is_vgic_v2_sgi(vcpu, irq)) {
|
||||
/* GICD_ISPENDR0 SGI bits are WI when written from the guest. */
|
||||
if (is_vgic_v2_sgi(vcpu, irq) && !is_user) {
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
continue;
|
||||
}
|
||||
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
|
||||
/*
|
||||
* GICv2 SGIs are terribly broken. We can't restore
|
||||
* the source of the interrupt, so just pick the vcpu
|
||||
* itself as the source...
|
||||
*/
|
||||
if (is_vgic_v2_sgi(vcpu, irq))
|
||||
irq->source |= BIT(vcpu->vcpu_id);
|
||||
|
||||
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
||||
/* HW SGI? Ask the GIC to inject it */
|
||||
int err;
|
||||
@ -335,7 +342,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
irq->pending_latch = true;
|
||||
if (irq->hw)
|
||||
if (irq->hw && !is_user)
|
||||
vgic_irq_set_phys_active(irq, true);
|
||||
|
||||
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
||||
@ -343,33 +350,18 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
__set_pending(vcpu, addr, len, val, false);
|
||||
}
|
||||
|
||||
int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
int i;
|
||||
unsigned long flags;
|
||||
|
||||
for_each_set_bit(i, &val, len * 8) {
|
||||
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
||||
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
irq->pending_latch = true;
|
||||
|
||||
/*
|
||||
* GICv2 SGIs are terribly broken. We can't restore
|
||||
* the source of the interrupt, so just pick the vcpu
|
||||
* itself as the source...
|
||||
*/
|
||||
if (is_vgic_v2_sgi(vcpu, irq))
|
||||
irq->source |= BIT(vcpu->vcpu_id);
|
||||
|
||||
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
__set_pending(vcpu, addr, len, val, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -394,9 +386,9 @@ static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
|
||||
vgic_irq_set_phys_active(irq, false);
|
||||
}
|
||||
|
||||
void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
static void __clear_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val, bool is_user)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
int i;
|
||||
@ -405,14 +397,22 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
|
||||
for_each_set_bit(i, &val, len * 8) {
|
||||
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
||||
|
||||
/* GICD_ICPENDR0 SGI bits are WI */
|
||||
if (is_vgic_v2_sgi(vcpu, irq)) {
|
||||
/* GICD_ICPENDR0 SGI bits are WI when written from the guest. */
|
||||
if (is_vgic_v2_sgi(vcpu, irq) && !is_user) {
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
continue;
|
||||
}
|
||||
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
|
||||
/*
|
||||
* More fun with GICv2 SGIs! If we're clearing one of them
|
||||
* from userspace, which source vcpu to clear? Let's not
|
||||
* even think of it, and blow the whole set.
|
||||
*/
|
||||
if (is_vgic_v2_sgi(vcpu, irq))
|
||||
irq->source = 0;
|
||||
|
||||
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
||||
/* HW SGI? Ask the GIC to clear its pending bit */
|
||||
int err;
|
||||
@ -427,7 +427,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (irq->hw)
|
||||
if (irq->hw && !is_user)
|
||||
vgic_hw_irq_cpending(vcpu, irq);
|
||||
else
|
||||
irq->pending_latch = false;
|
||||
@ -437,33 +437,18 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
__clear_pending(vcpu, addr, len, val, false);
|
||||
}
|
||||
|
||||
int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
int i;
|
||||
unsigned long flags;
|
||||
|
||||
for_each_set_bit(i, &val, len * 8) {
|
||||
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
||||
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
/*
|
||||
* More fun with GICv2 SGIs! If we're clearing one of them
|
||||
* from userspace, which source vcpu to clear? Let's not
|
||||
* even think of it, and blow the whole set.
|
||||
*/
|
||||
if (is_vgic_v2_sgi(vcpu, irq))
|
||||
irq->source = 0;
|
||||
|
||||
irq->pending_latch = false;
|
||||
|
||||
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
__clear_pending(vcpu, addr, len, val, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,10 @@ struct kvm_vcpu_stat {
|
||||
u64 signal_exits;
|
||||
};
|
||||
|
||||
#define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
|
||||
#define KVM_MEM_HUGEPAGE_INCAPABLE (1UL << 1)
|
||||
struct kvm_arch_memory_slot {
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
struct kvm_context {
|
||||
@ -92,8 +95,10 @@ enum emulation_result {
|
||||
};
|
||||
|
||||
#define KVM_LARCH_FPU (0x1 << 0)
|
||||
#define KVM_LARCH_SWCSR_LATEST (0x1 << 1)
|
||||
#define KVM_LARCH_HWCSR_USABLE (0x1 << 2)
|
||||
#define KVM_LARCH_LSX (0x1 << 1)
|
||||
#define KVM_LARCH_LASX (0x1 << 2)
|
||||
#define KVM_LARCH_SWCSR_LATEST (0x1 << 3)
|
||||
#define KVM_LARCH_HWCSR_USABLE (0x1 << 4)
|
||||
|
||||
struct kvm_vcpu_arch {
|
||||
/*
|
||||
@ -175,6 +180,21 @@ static inline void writel_sw_gcsr(struct loongarch_csrs *csr, int reg, unsigned
|
||||
csr->csrs[reg] = val;
|
||||
}
|
||||
|
||||
static inline bool kvm_guest_has_fpu(struct kvm_vcpu_arch *arch)
|
||||
{
|
||||
return arch->cpucfg[2] & CPUCFG2_FP;
|
||||
}
|
||||
|
||||
static inline bool kvm_guest_has_lsx(struct kvm_vcpu_arch *arch)
|
||||
{
|
||||
return arch->cpucfg[2] & CPUCFG2_LSX;
|
||||
}
|
||||
|
||||
static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch)
|
||||
{
|
||||
return arch->cpucfg[2] & CPUCFG2_LASX;
|
||||
}
|
||||
|
||||
/* Debug: dump vcpu state */
|
||||
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
|
||||
|
||||
@ -183,7 +203,6 @@ void kvm_flush_tlb_all(void);
|
||||
void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa);
|
||||
int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write);
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable);
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
|
@ -55,7 +55,26 @@ void kvm_save_fpu(struct loongarch_fpu *fpu);
|
||||
void kvm_restore_fpu(struct loongarch_fpu *fpu);
|
||||
void kvm_restore_fcsr(struct loongarch_fpu *fpu);
|
||||
|
||||
void kvm_acquire_timer(struct kvm_vcpu *vcpu);
|
||||
#ifdef CONFIG_CPU_HAS_LSX
|
||||
int kvm_own_lsx(struct kvm_vcpu *vcpu);
|
||||
void kvm_save_lsx(struct loongarch_fpu *fpu);
|
||||
void kvm_restore_lsx(struct loongarch_fpu *fpu);
|
||||
#else
|
||||
static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { }
|
||||
static inline void kvm_save_lsx(struct loongarch_fpu *fpu) { }
|
||||
static inline void kvm_restore_lsx(struct loongarch_fpu *fpu) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_HAS_LASX
|
||||
int kvm_own_lasx(struct kvm_vcpu *vcpu);
|
||||
void kvm_save_lasx(struct loongarch_fpu *fpu);
|
||||
void kvm_restore_lasx(struct loongarch_fpu *fpu);
|
||||
#else
|
||||
static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { }
|
||||
static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { }
|
||||
static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { }
|
||||
#endif
|
||||
|
||||
void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz);
|
||||
void kvm_reset_timer(struct kvm_vcpu *vcpu);
|
||||
void kvm_save_timer(struct kvm_vcpu *vcpu);
|
||||
|
@ -79,6 +79,7 @@ struct kvm_fpu {
|
||||
#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
|
||||
#define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
|
||||
#define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
|
||||
#define KVM_LOONGARCH_VCPU_CPUCFG 0
|
||||
|
||||
struct kvm_debug_exit_arch {
|
||||
};
|
||||
|
@ -349,6 +349,7 @@ SYM_FUNC_START(_restore_lsx_upper)
|
||||
lsx_restore_all_upper a0 t0 t1
|
||||
jr ra
|
||||
SYM_FUNC_END(_restore_lsx_upper)
|
||||
EXPORT_SYMBOL(_restore_lsx_upper)
|
||||
|
||||
SYM_FUNC_START(_init_lsx_upper)
|
||||
lsx_init_all_upper t1
|
||||
@ -384,6 +385,7 @@ SYM_FUNC_START(_restore_lasx_upper)
|
||||
lasx_restore_all_upper a0 t0 t1
|
||||
jr ra
|
||||
SYM_FUNC_END(_restore_lasx_upper)
|
||||
EXPORT_SYMBOL(_restore_lasx_upper)
|
||||
|
||||
SYM_FUNC_START(_init_lasx_upper)
|
||||
lasx_init_all_upper t1
|
||||
|
@ -22,14 +22,13 @@ config KVM
|
||||
depends on AS_HAS_LVZ_EXTENSION
|
||||
depends on HAVE_KVM
|
||||
select HAVE_KVM_DIRTY_RING_ACQ_REL
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select KVM_COMMON
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select KVM_MMIO
|
||||
select KVM_XFER_TO_GUEST_WORK
|
||||
select MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
help
|
||||
Support hosting virtualized guest machines using
|
||||
hardware virtualization extensions. You will need
|
||||
|
@ -200,17 +200,8 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
|
||||
++vcpu->stat.idle_exits;
|
||||
trace_kvm_exit_idle(vcpu, KVM_TRACE_EXIT_IDLE);
|
||||
|
||||
if (!kvm_arch_vcpu_runnable(vcpu)) {
|
||||
/*
|
||||
* Switch to the software timer before halt-polling/blocking as
|
||||
* the guest's timer may be a break event for the vCPU, and the
|
||||
* hypervisor timer runs only when the CPU is in guest mode.
|
||||
* Switch before halt-polling so that KVM recognizes an expired
|
||||
* timer before blocking.
|
||||
*/
|
||||
kvm_save_timer(vcpu);
|
||||
kvm_vcpu_block(vcpu);
|
||||
}
|
||||
if (!kvm_arch_vcpu_runnable(vcpu))
|
||||
kvm_vcpu_halt(vcpu);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
@ -643,6 +634,11 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_run *run = vcpu->run;
|
||||
|
||||
if (!kvm_guest_has_fpu(&vcpu->arch)) {
|
||||
kvm_queue_exception(vcpu, EXCCODE_INE, 0);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
/*
|
||||
* If guest FPU not present, the FPU operation should have been
|
||||
* treated as a reserved instruction!
|
||||
@ -659,6 +655,36 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
|
||||
* @vcpu: Virtual CPU context.
|
||||
*
|
||||
* Handle when the guest attempts to use LSX when it is disabled in the root
|
||||
* context.
|
||||
*/
|
||||
static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_own_lsx(vcpu))
|
||||
kvm_queue_exception(vcpu, EXCCODE_INE, 0);
|
||||
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_handle_lasx_disabled() - Guest used LASX while disabled in root.
|
||||
* @vcpu: Virtual CPU context.
|
||||
*
|
||||
* Handle when the guest attempts to use LASX when it is disabled in the root
|
||||
* context.
|
||||
*/
|
||||
static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_own_lasx(vcpu))
|
||||
kvm_queue_exception(vcpu, EXCCODE_INE, 0);
|
||||
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
/*
|
||||
* LoongArch KVM callback handling for unimplemented guest exiting
|
||||
*/
|
||||
@ -687,6 +713,8 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = {
|
||||
[EXCCODE_TLBS] = kvm_handle_write_fault,
|
||||
[EXCCODE_TLBM] = kvm_handle_write_fault,
|
||||
[EXCCODE_FPDIS] = kvm_handle_fpu_disabled,
|
||||
[EXCCODE_LSXDIS] = kvm_handle_lsx_disabled,
|
||||
[EXCCODE_LASXDIS] = kvm_handle_lasx_disabled,
|
||||
[EXCCODE_GSPR] = kvm_handle_gspr,
|
||||
};
|
||||
|
||||
|
@ -287,7 +287,6 @@ int kvm_arch_hardware_enable(void)
|
||||
if (env & CSR_GCFG_MATC_ROOT)
|
||||
gcfg |= CSR_GCFG_MATC_ROOT;
|
||||
|
||||
gcfg |= CSR_GCFG_TIT;
|
||||
write_csr_gcfg(gcfg);
|
||||
|
||||
kvm_flush_tlb_all();
|
||||
|
@ -13,6 +13,16 @@
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
static inline bool kvm_hugepage_capable(struct kvm_memory_slot *slot)
|
||||
{
|
||||
return slot->arch.flags & KVM_MEM_HUGEPAGE_CAPABLE;
|
||||
}
|
||||
|
||||
static inline bool kvm_hugepage_incapable(struct kvm_memory_slot *slot)
|
||||
{
|
||||
return slot->arch.flags & KVM_MEM_HUGEPAGE_INCAPABLE;
|
||||
}
|
||||
|
||||
static inline void kvm_ptw_prepare(struct kvm *kvm, kvm_ptw_ctx *ctx)
|
||||
{
|
||||
ctx->level = kvm->arch.root_level;
|
||||
@ -365,6 +375,69 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
kvm_ptw_top(kvm->arch.pgd, start << PAGE_SHIFT, end << PAGE_SHIFT, &ctx);
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *new, enum kvm_mr_change change)
|
||||
{
|
||||
gpa_t gpa_start;
|
||||
hva_t hva_start;
|
||||
size_t size, gpa_offset, hva_offset;
|
||||
|
||||
if ((change != KVM_MR_MOVE) && (change != KVM_MR_CREATE))
|
||||
return 0;
|
||||
/*
|
||||
* Prevent userspace from creating a memory region outside of the
|
||||
* VM GPA address space
|
||||
*/
|
||||
if ((new->base_gfn + new->npages) > (kvm->arch.gpa_size >> PAGE_SHIFT))
|
||||
return -ENOMEM;
|
||||
|
||||
new->arch.flags = 0;
|
||||
size = new->npages * PAGE_SIZE;
|
||||
gpa_start = new->base_gfn << PAGE_SHIFT;
|
||||
hva_start = new->userspace_addr;
|
||||
if (IS_ALIGNED(size, PMD_SIZE) && IS_ALIGNED(gpa_start, PMD_SIZE)
|
||||
&& IS_ALIGNED(hva_start, PMD_SIZE))
|
||||
new->arch.flags |= KVM_MEM_HUGEPAGE_CAPABLE;
|
||||
else {
|
||||
/*
|
||||
* Pages belonging to memslots that don't have the same
|
||||
* alignment within a PMD for userspace and GPA cannot be
|
||||
* mapped with PMD entries, because we'll end up mapping
|
||||
* the wrong pages.
|
||||
*
|
||||
* Consider a layout like the following:
|
||||
*
|
||||
* memslot->userspace_addr:
|
||||
* +-----+--------------------+--------------------+---+
|
||||
* |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
|
||||
* +-----+--------------------+--------------------+---+
|
||||
*
|
||||
* memslot->base_gfn << PAGE_SIZE:
|
||||
* +---+--------------------+--------------------+-----+
|
||||
* |abc|def Stage-2 block | Stage-2 block |tvxyz|
|
||||
* +---+--------------------+--------------------+-----+
|
||||
*
|
||||
* If we create those stage-2 blocks, we'll end up with this
|
||||
* incorrect mapping:
|
||||
* d -> f
|
||||
* e -> g
|
||||
* f -> h
|
||||
*/
|
||||
gpa_offset = gpa_start & (PMD_SIZE - 1);
|
||||
hva_offset = hva_start & (PMD_SIZE - 1);
|
||||
if (gpa_offset != hva_offset) {
|
||||
new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE;
|
||||
} else {
|
||||
if (gpa_offset == 0)
|
||||
gpa_offset = PMD_SIZE;
|
||||
if ((size + gpa_offset) < (PMD_SIZE * 2))
|
||||
new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
@ -562,47 +635,23 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ
|
||||
}
|
||||
|
||||
static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
|
||||
unsigned long hva, unsigned long map_size, bool write)
|
||||
unsigned long hva, bool write)
|
||||
{
|
||||
size_t size;
|
||||
gpa_t gpa_start;
|
||||
hva_t uaddr_start, uaddr_end;
|
||||
hva_t start, end;
|
||||
|
||||
/* Disable dirty logging on HugePages */
|
||||
if (kvm_slot_dirty_track_enabled(memslot) && write)
|
||||
return false;
|
||||
|
||||
size = memslot->npages * PAGE_SIZE;
|
||||
gpa_start = memslot->base_gfn << PAGE_SHIFT;
|
||||
uaddr_start = memslot->userspace_addr;
|
||||
uaddr_end = uaddr_start + size;
|
||||
if (kvm_hugepage_capable(memslot))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Pages belonging to memslots that don't have the same alignment
|
||||
* within a PMD for userspace and GPA cannot be mapped with stage-2
|
||||
* PMD entries, because we'll end up mapping the wrong pages.
|
||||
*
|
||||
* Consider a layout like the following:
|
||||
*
|
||||
* memslot->userspace_addr:
|
||||
* +-----+--------------------+--------------------+---+
|
||||
* |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
|
||||
* +-----+--------------------+--------------------+---+
|
||||
*
|
||||
* memslot->base_gfn << PAGE_SIZE:
|
||||
* +---+--------------------+--------------------+-----+
|
||||
* |abc|def Stage-2 block | Stage-2 block |tvxyz|
|
||||
* +---+--------------------+--------------------+-----+
|
||||
*
|
||||
* If we create those stage-2 blocks, we'll end up with this incorrect
|
||||
* mapping:
|
||||
* d -> f
|
||||
* e -> g
|
||||
* f -> h
|
||||
*/
|
||||
if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
|
||||
if (kvm_hugepage_incapable(memslot))
|
||||
return false;
|
||||
|
||||
start = memslot->userspace_addr;
|
||||
end = start + memslot->npages * PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* Next, let's make sure we're not trying to map anything not covered
|
||||
* by the memslot. This means we have to prohibit block size mappings
|
||||
@ -615,8 +664,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
|
||||
* userspace_addr or the base_gfn, as both are equally aligned (per
|
||||
* the check above) and equally sized.
|
||||
*/
|
||||
return (hva & ~(map_size - 1)) >= uaddr_start &&
|
||||
(hva & ~(map_size - 1)) + map_size <= uaddr_end;
|
||||
return (hva >= ALIGN(start, PMD_SIZE)) && (hva < ALIGN_DOWN(end, PMD_SIZE));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -842,7 +890,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
|
||||
|
||||
/* Disable dirty logging on HugePages */
|
||||
level = 0;
|
||||
if (!fault_supports_huge_mapping(memslot, hva, PMD_SIZE, write)) {
|
||||
if (!fault_supports_huge_mapping(memslot, hva, write)) {
|
||||
level = 0;
|
||||
} else {
|
||||
level = host_pfn_mapping_level(kvm, gfn, memslot);
|
||||
@ -901,12 +949,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
{
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *new, enum kvm_mr_change change)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
|
@ -245,6 +245,37 @@ SYM_FUNC_START(kvm_restore_fpu)
|
||||
jr ra
|
||||
SYM_FUNC_END(kvm_restore_fpu)
|
||||
|
||||
#ifdef CONFIG_CPU_HAS_LSX
|
||||
SYM_FUNC_START(kvm_save_lsx)
|
||||
fpu_save_csr a0 t1
|
||||
fpu_save_cc a0 t1 t2
|
||||
lsx_save_data a0 t1
|
||||
jr ra
|
||||
SYM_FUNC_END(kvm_save_lsx)
|
||||
|
||||
SYM_FUNC_START(kvm_restore_lsx)
|
||||
lsx_restore_data a0 t1
|
||||
fpu_restore_cc a0 t1 t2
|
||||
fpu_restore_csr a0 t1 t2
|
||||
jr ra
|
||||
SYM_FUNC_END(kvm_restore_lsx)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_HAS_LASX
|
||||
SYM_FUNC_START(kvm_save_lasx)
|
||||
fpu_save_csr a0 t1
|
||||
fpu_save_cc a0 t1 t2
|
||||
lasx_save_data a0 t1
|
||||
jr ra
|
||||
SYM_FUNC_END(kvm_save_lasx)
|
||||
|
||||
SYM_FUNC_START(kvm_restore_lasx)
|
||||
lasx_restore_data a0 t1
|
||||
fpu_restore_cc a0 t1 t2
|
||||
fpu_restore_csr a0 t1 t2
|
||||
jr ra
|
||||
SYM_FUNC_END(kvm_restore_lasx)
|
||||
#endif
|
||||
.section ".rodata"
|
||||
SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry)
|
||||
SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
|
||||
|
@ -64,41 +64,24 @@ void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz)
|
||||
kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TVAL, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore hard timer state and enable guest to access timer registers
|
||||
* without trap, should be called with irq disabled
|
||||
*/
|
||||
void kvm_acquire_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long cfg;
|
||||
|
||||
cfg = read_csr_gcfg();
|
||||
if (!(cfg & CSR_GCFG_TIT))
|
||||
return;
|
||||
|
||||
/* Enable guest access to hard timer */
|
||||
write_csr_gcfg(cfg & ~CSR_GCFG_TIT);
|
||||
|
||||
/*
|
||||
* Freeze the soft-timer and sync the guest stable timer with it. We do
|
||||
* this with interrupts disabled to avoid latency.
|
||||
*/
|
||||
hrtimer_cancel(&vcpu->arch.swtimer);
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore soft timer state from saved context.
|
||||
*/
|
||||
void kvm_restore_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long cfg, delta, period;
|
||||
unsigned long cfg, estat;
|
||||
unsigned long ticks, delta, period;
|
||||
ktime_t expire, now;
|
||||
struct loongarch_csrs *csr = vcpu->arch.csr;
|
||||
|
||||
/*
|
||||
* Set guest stable timer cfg csr
|
||||
* Disable timer before restore estat CSR register, avoid to
|
||||
* get invalid timer interrupt for old timer cfg
|
||||
*/
|
||||
cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
|
||||
|
||||
write_gcsr_timercfg(0);
|
||||
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
|
||||
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
|
||||
if (!(cfg & CSR_TCFG_EN)) {
|
||||
@ -107,24 +90,56 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Freeze the soft-timer and sync the guest stable timer with it.
|
||||
*/
|
||||
hrtimer_cancel(&vcpu->arch.swtimer);
|
||||
|
||||
/*
|
||||
* From LoongArch Reference Manual Volume 1 Chapter 7.6.2
|
||||
* If oneshot timer is fired, CSR TVAL will be -1, there are two
|
||||
* conditions:
|
||||
* 1) timer is fired during exiting to host
|
||||
* 2) timer is fired and vm is doing timer irq, and then exiting to
|
||||
* host. Host should not inject timer irq to avoid spurious
|
||||
* timer interrupt again
|
||||
*/
|
||||
ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
|
||||
estat = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
|
||||
if (!(cfg & CSR_TCFG_PERIOD) && (ticks > cfg)) {
|
||||
/*
|
||||
* Writing 0 to LOONGARCH_CSR_TVAL will inject timer irq
|
||||
* and set CSR TVAL with -1
|
||||
*/
|
||||
write_gcsr_timertick(0);
|
||||
|
||||
/*
|
||||
* Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear
|
||||
* timer interrupt, and CSR TVAL keeps unchanged with -1, it
|
||||
* avoids spurious timer interrupt
|
||||
*/
|
||||
if (!(estat & CPU_TIMER))
|
||||
gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set remainder tick value if not expired
|
||||
*/
|
||||
delta = 0;
|
||||
now = ktime_get();
|
||||
expire = vcpu->arch.expire;
|
||||
if (ktime_before(now, expire))
|
||||
delta = ktime_to_tick(vcpu, ktime_sub(expire, now));
|
||||
else {
|
||||
if (cfg & CSR_TCFG_PERIOD) {
|
||||
period = cfg & CSR_TCFG_VAL;
|
||||
delta = ktime_to_tick(vcpu, ktime_sub(now, expire));
|
||||
delta = period - (delta % period);
|
||||
} else
|
||||
delta = 0;
|
||||
else if (cfg & CSR_TCFG_PERIOD) {
|
||||
period = cfg & CSR_TCFG_VAL;
|
||||
delta = ktime_to_tick(vcpu, ktime_sub(now, expire));
|
||||
delta = period - (delta % period);
|
||||
|
||||
/*
|
||||
* Inject timer here though sw timer should inject timer
|
||||
* interrupt async already, since sw timer may be cancelled
|
||||
* during injecting intr async in function kvm_acquire_timer
|
||||
* during injecting intr async
|
||||
*/
|
||||
kvm_queue_irq(vcpu, INT_TI);
|
||||
}
|
||||
@ -139,27 +154,41 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
static void _kvm_save_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long ticks, delta;
|
||||
unsigned long ticks, delta, cfg;
|
||||
ktime_t expire;
|
||||
struct loongarch_csrs *csr = vcpu->arch.csr;
|
||||
|
||||
cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
|
||||
ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
|
||||
delta = tick_to_ns(vcpu, ticks);
|
||||
expire = ktime_add_ns(ktime_get(), delta);
|
||||
vcpu->arch.expire = expire;
|
||||
if (ticks) {
|
||||
|
||||
/*
|
||||
* From LoongArch Reference Manual Volume 1 Chapter 7.6.2
|
||||
* If period timer is fired, CSR TVAL will be reloaded from CSR TCFG
|
||||
* If oneshot timer is fired, CSR TVAL will be -1
|
||||
* Here judge one-shot timer fired by checking whether TVAL is larger
|
||||
* than TCFG
|
||||
*/
|
||||
if (ticks < cfg) {
|
||||
delta = tick_to_ns(vcpu, ticks);
|
||||
expire = ktime_add_ns(ktime_get(), delta);
|
||||
vcpu->arch.expire = expire;
|
||||
|
||||
/*
|
||||
* Update hrtimer to use new timeout
|
||||
* HRTIMER_MODE_PINNED is suggested since vcpu may run in
|
||||
* the same physical cpu in next time
|
||||
*/
|
||||
hrtimer_cancel(&vcpu->arch.swtimer);
|
||||
hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
|
||||
} else
|
||||
} else if (vcpu->stat.generic.blocking) {
|
||||
/*
|
||||
* Inject timer interrupt so that hall polling can dectect and exit
|
||||
* Inject timer interrupt so that halt polling can dectect and exit.
|
||||
* VCPU is scheduled out already and sleeps in rcuwait queue and
|
||||
* will not poll pending events again. kvm_queue_irq() is not enough,
|
||||
* hrtimer swtimer should be used here.
|
||||
*/
|
||||
kvm_queue_irq(vcpu, INT_TI);
|
||||
expire = ktime_add_ns(ktime_get(), 10);
|
||||
vcpu->arch.expire = expire;
|
||||
hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -168,21 +197,15 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
void kvm_save_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long cfg;
|
||||
struct loongarch_csrs *csr = vcpu->arch.csr;
|
||||
|
||||
preempt_disable();
|
||||
cfg = read_csr_gcfg();
|
||||
if (!(cfg & CSR_GCFG_TIT)) {
|
||||
/* Disable guest use of hard timer */
|
||||
write_csr_gcfg(cfg | CSR_GCFG_TIT);
|
||||
|
||||
/* Save hard timer state */
|
||||
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
|
||||
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
|
||||
if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN)
|
||||
_kvm_save_timer(vcpu);
|
||||
}
|
||||
/* Save hard timer state */
|
||||
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
|
||||
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
|
||||
if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN)
|
||||
_kvm_save_timer(vcpu);
|
||||
|
||||
/* Save timer-related state to vCPU context */
|
||||
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
|
||||
|
@ -102,6 +102,8 @@ TRACE_EVENT(kvm_exit_gspr,
|
||||
#define KVM_TRACE_AUX_DISCARD 4
|
||||
|
||||
#define KVM_TRACE_AUX_FPU 1
|
||||
#define KVM_TRACE_AUX_LSX 2
|
||||
#define KVM_TRACE_AUX_LASX 3
|
||||
|
||||
#define kvm_trace_symbol_aux_op \
|
||||
{ KVM_TRACE_AUX_SAVE, "save" }, \
|
||||
@ -111,7 +113,9 @@ TRACE_EVENT(kvm_exit_gspr,
|
||||
{ KVM_TRACE_AUX_DISCARD, "discard" }
|
||||
|
||||
#define kvm_trace_symbol_aux_state \
|
||||
{ KVM_TRACE_AUX_FPU, "FPU" }
|
||||
{ KVM_TRACE_AUX_FPU, "FPU" }, \
|
||||
{ KVM_TRACE_AUX_LSX, "LSX" }, \
|
||||
{ KVM_TRACE_AUX_LASX, "LASX" }
|
||||
|
||||
TRACE_EVENT(kvm_aux,
|
||||
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op,
|
||||
|
@ -95,7 +95,6 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
|
||||
* check vmid before vcpu enter guest
|
||||
*/
|
||||
local_irq_disable();
|
||||
kvm_acquire_timer(vcpu);
|
||||
kvm_deliver_intr(vcpu);
|
||||
kvm_deliver_exception(vcpu);
|
||||
/* Make sure the vcpu mode has been written */
|
||||
@ -187,8 +186,15 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
|
||||
|
||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_pending_timer(vcpu) ||
|
||||
int ret;
|
||||
|
||||
/* Protect from TOD sync and vcpu_load/put() */
|
||||
preempt_disable();
|
||||
ret = kvm_pending_timer(vcpu) ||
|
||||
kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT) & (1 << INT_TI);
|
||||
preempt_enable();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
|
||||
@ -244,23 +250,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_migrate_count() - Migrate timer.
|
||||
* @vcpu: Virtual CPU.
|
||||
*
|
||||
* Migrate hrtimer to the current CPU by cancelling and restarting it
|
||||
* if the hrtimer is active.
|
||||
*
|
||||
* Must be called when the vCPU is migrated to a different CPU, so that
|
||||
* the timer can interrupt the guest at the new CPU, and the timer irq can
|
||||
* be delivered to the vCPU.
|
||||
*/
|
||||
static void kvm_migrate_count(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (hrtimer_cancel(&vcpu->arch.swtimer))
|
||||
hrtimer_restart(&vcpu->arch.swtimer);
|
||||
}
|
||||
|
||||
static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
|
||||
{
|
||||
unsigned long gintc;
|
||||
@ -309,6 +298,76 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int _kvm_get_cpucfg(int id, u64 *v)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (id < 0 && id >= KVM_MAX_CPUCFG_REGS)
|
||||
return -EINVAL;
|
||||
|
||||
switch (id) {
|
||||
case 2:
|
||||
/* Return CPUCFG2 features which have been supported by KVM */
|
||||
*v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP |
|
||||
CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV |
|
||||
CPUCFG2_LAM;
|
||||
/*
|
||||
* If LSX is supported by CPU, it is also supported by KVM,
|
||||
* as we implement it.
|
||||
*/
|
||||
if (cpu_has_lsx)
|
||||
*v |= CPUCFG2_LSX;
|
||||
/*
|
||||
* if LASX is supported by CPU, it is also supported by KVM,
|
||||
* as we implement it.
|
||||
*/
|
||||
if (cpu_has_lasx)
|
||||
*v |= CPUCFG2_LASX;
|
||||
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_check_cpucfg(int id, u64 val)
|
||||
{
|
||||
u64 mask;
|
||||
int ret = 0;
|
||||
|
||||
if (id < 0 && id >= KVM_MAX_CPUCFG_REGS)
|
||||
return -EINVAL;
|
||||
|
||||
if (_kvm_get_cpucfg(id, &mask))
|
||||
return ret;
|
||||
|
||||
switch (id) {
|
||||
case 2:
|
||||
/* CPUCFG2 features checking */
|
||||
if (val & ~mask)
|
||||
/* The unsupported features should not be set */
|
||||
ret = -EINVAL;
|
||||
else if (!(val & CPUCFG2_LLFTP))
|
||||
/* The LLFTP must be set, as guest must has a constant timer */
|
||||
ret = -EINVAL;
|
||||
else if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP)))
|
||||
/* Single and double float point must both be set when enable FP */
|
||||
ret = -EINVAL;
|
||||
else if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP))
|
||||
/* FP should be set when enable LSX */
|
||||
ret = -EINVAL;
|
||||
else if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX))
|
||||
/* LSX, FP should be set when enable LASX, and FP has been checked before. */
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_get_one_reg(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg, u64 *v)
|
||||
{
|
||||
@ -378,10 +437,10 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
case KVM_REG_LOONGARCH_CPUCFG:
|
||||
id = KVM_GET_IOC_CPUCFG_IDX(reg->id);
|
||||
if (id >= 0 && id < KVM_MAX_CPUCFG_REGS)
|
||||
vcpu->arch.cpucfg[id] = (u32)v;
|
||||
else
|
||||
ret = -EINVAL;
|
||||
ret = kvm_check_cpucfg(id, v);
|
||||
if (ret)
|
||||
break;
|
||||
vcpu->arch.cpucfg[id] = (u32)v;
|
||||
break;
|
||||
case KVM_REG_LOONGARCH_KVM:
|
||||
switch (reg->id) {
|
||||
@ -471,10 +530,94 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
switch (attr->attr) {
|
||||
case 2:
|
||||
return 0;
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret = -ENXIO;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_LOONGARCH_VCPU_CPUCFG:
|
||||
ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret = 0;
|
||||
uint64_t val;
|
||||
uint64_t __user *uaddr = (uint64_t __user *)attr->addr;
|
||||
|
||||
ret = _kvm_get_cpucfg(attr->attr, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
put_user(val, uaddr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret = -ENXIO;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_LOONGARCH_VCPU_CPUCFG:
|
||||
ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret = -ENXIO;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_LOONGARCH_VCPU_CPUCFG:
|
||||
ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
long r;
|
||||
struct kvm_device_attr attr;
|
||||
void __user *argp = (void __user *)arg;
|
||||
struct kvm_vcpu *vcpu = filp->private_data;
|
||||
|
||||
@ -514,6 +657,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
|
||||
break;
|
||||
}
|
||||
case KVM_HAS_DEVICE_ATTR: {
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&attr, argp, sizeof(attr)))
|
||||
break;
|
||||
r = kvm_loongarch_vcpu_has_attr(vcpu, &attr);
|
||||
break;
|
||||
}
|
||||
case KVM_GET_DEVICE_ATTR: {
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&attr, argp, sizeof(attr)))
|
||||
break;
|
||||
r = kvm_loongarch_vcpu_get_attr(vcpu, &attr);
|
||||
break;
|
||||
}
|
||||
case KVM_SET_DEVICE_ATTR: {
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&attr, argp, sizeof(attr)))
|
||||
break;
|
||||
r = kvm_loongarch_vcpu_set_attr(vcpu, &attr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOIOCTLCMD;
|
||||
break;
|
||||
@ -561,12 +725,96 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPU_HAS_LSX
|
||||
/* Enable LSX and restore context */
|
||||
int kvm_own_lsx(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch))
|
||||
return -EINVAL;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/* Enable LSX for guest */
|
||||
set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
|
||||
switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
|
||||
case KVM_LARCH_FPU:
|
||||
/*
|
||||
* Guest FPU state already loaded,
|
||||
* only restore upper LSX state
|
||||
*/
|
||||
_restore_lsx_upper(&vcpu->arch.fpu);
|
||||
break;
|
||||
default:
|
||||
/* Neither FP or LSX already active,
|
||||
* restore full LSX state
|
||||
*/
|
||||
kvm_restore_lsx(&vcpu->arch.fpu);
|
||||
break;
|
||||
}
|
||||
|
||||
trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
|
||||
vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
|
||||
preempt_enable();
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_HAS_LASX
|
||||
/* Enable LASX and restore context */
|
||||
int kvm_own_lasx(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch) || !kvm_guest_has_lasx(&vcpu->arch))
|
||||
return -EINVAL;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
|
||||
switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
|
||||
case KVM_LARCH_LSX:
|
||||
case KVM_LARCH_LSX | KVM_LARCH_FPU:
|
||||
/* Guest LSX state already loaded, only restore upper LASX state */
|
||||
_restore_lasx_upper(&vcpu->arch.fpu);
|
||||
break;
|
||||
case KVM_LARCH_FPU:
|
||||
/* Guest FP state already loaded, only restore upper LSX & LASX state */
|
||||
_restore_lsx_upper(&vcpu->arch.fpu);
|
||||
_restore_lasx_upper(&vcpu->arch.fpu);
|
||||
break;
|
||||
default:
|
||||
/* Neither FP or LSX already active, restore full LASX state */
|
||||
kvm_restore_lasx(&vcpu->arch.fpu);
|
||||
break;
|
||||
}
|
||||
|
||||
trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
|
||||
vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
|
||||
preempt_enable();
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Save context and disable FPU */
|
||||
void kvm_lose_fpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
preempt_disable();
|
||||
|
||||
if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
|
||||
if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) {
|
||||
kvm_save_lasx(&vcpu->arch.fpu);
|
||||
vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX);
|
||||
trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LASX);
|
||||
|
||||
/* Disable LASX & LSX & FPU */
|
||||
clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
|
||||
} else if (vcpu->arch.aux_inuse & KVM_LARCH_LSX) {
|
||||
kvm_save_lsx(&vcpu->arch.fpu);
|
||||
vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU);
|
||||
trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LSX);
|
||||
|
||||
/* Disable LSX & FPU */
|
||||
clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN);
|
||||
} else if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
|
||||
kvm_save_fpu(&vcpu->arch.fpu);
|
||||
vcpu->arch.aux_inuse &= ~KVM_LARCH_FPU;
|
||||
trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU);
|
||||
@ -789,17 +1037,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
if (vcpu->arch.last_sched_cpu != cpu) {
|
||||
kvm_debug("[%d->%d]KVM vCPU[%d] switch\n",
|
||||
vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
|
||||
/*
|
||||
* Migrate the timer interrupt to the current CPU so that it
|
||||
* always interrupts the guest and synchronously triggers a
|
||||
* guest timer interrupt.
|
||||
*/
|
||||
kvm_migrate_count(vcpu);
|
||||
}
|
||||
|
||||
/* Restore guest state to registers */
|
||||
_kvm_vcpu_load(vcpu, cpu);
|
||||
local_irq_restore(flags);
|
||||
|
@ -810,8 +810,6 @@ int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn);
|
||||
pgd_t *kvm_pgd_alloc(void);
|
||||
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
/* Emulation */
|
||||
enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
|
||||
int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
|
||||
|
@ -20,13 +20,11 @@ config KVM
|
||||
depends on HAVE_KVM
|
||||
depends on MIPS_FP_SUPPORT
|
||||
select EXPORT_UASM
|
||||
select PREEMPT_NOTIFIERS
|
||||
select KVM_COMMON
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
select INTERVAL_TREE
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
help
|
||||
Support for hosting Guest kernels.
|
||||
|
@ -63,8 +63,6 @@
|
||||
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
#define HPTEG_CACHE_NUM (1 << 15)
|
||||
#define HPTEG_HASH_BITS_PTE 13
|
||||
#define HPTEG_HASH_BITS_PTE_LONG 12
|
||||
|
@ -19,13 +19,11 @@ if VIRTUALIZATION
|
||||
|
||||
config KVM
|
||||
bool
|
||||
select PREEMPT_NOTIFIERS
|
||||
select HAVE_KVM_EVENTFD
|
||||
select KVM_COMMON
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select KVM_VFIO
|
||||
select IRQ_BYPASS_MANAGER
|
||||
select HAVE_KVM_IRQ_BYPASS
|
||||
select INTERVAL_TREE
|
||||
|
||||
config KVM_BOOK3S_HANDLER
|
||||
bool
|
||||
@ -42,7 +40,7 @@ config KVM_BOOK3S_64_HANDLER
|
||||
config KVM_BOOK3S_PR_POSSIBLE
|
||||
bool
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
|
||||
config KVM_BOOK3S_HV_POSSIBLE
|
||||
bool
|
||||
@ -85,7 +83,7 @@ config KVM_BOOK3S_64_HV
|
||||
tristate "KVM for POWER7 and later using hypervisor mode in host"
|
||||
depends on KVM_BOOK3S_64 && PPC_POWERNV
|
||||
select KVM_BOOK3S_HV_POSSIBLE
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select CMA
|
||||
help
|
||||
Support running unmodified book3s_64 guest kernels in
|
||||
@ -194,7 +192,7 @@ config KVM_E500V2
|
||||
depends on !CONTEXT_TRACKING_USER
|
||||
select KVM
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
help
|
||||
Support running unmodified E500 guest kernels in virtual machines on
|
||||
E500v2 host processors.
|
||||
@ -211,7 +209,7 @@ config KVM_E500MC
|
||||
select KVM
|
||||
select KVM_MMIO
|
||||
select KVM_BOOKE_HV
|
||||
select MMU_NOTIFIER
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
help
|
||||
Support running unmodified E500MC/E5500/E6500 guest kernels in
|
||||
virtual machines on E500MC/E5500/E6500 host processors.
|
||||
@ -225,7 +223,6 @@ config KVM_MPIC
|
||||
bool "KVM in-kernel MPIC emulation"
|
||||
depends on KVM && PPC_E500
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
select HAVE_KVM_MSI
|
||||
help
|
||||
@ -238,7 +235,6 @@ config KVM_XICS
|
||||
bool "KVM in-kernel XICS emulation"
|
||||
depends on KVM_BOOK3S_64 && !KVM_MPIC
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
default y
|
||||
help
|
||||
Include support for the XICS (eXternal Interrupt Controller
|
||||
|
@ -6240,7 +6240,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
|
||||
}
|
||||
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct kvm_memslots *slots = __kvm_memslots(kvm, i);
|
||||
int bkt;
|
||||
|
@ -528,7 +528,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_ENABLE_CAP:
|
||||
case KVM_CAP_ONE_REG:
|
||||
case KVM_CAP_IOEVENTFD:
|
||||
case KVM_CAP_DEVICE_CTRL:
|
||||
case KVM_CAP_IMMEDIATE_EXIT:
|
||||
case KVM_CAP_SET_GUEST_DEBUG:
|
||||
r = 1;
|
||||
@ -578,7 +577,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
case KVM_CAP_IRQFD_RESAMPLE:
|
||||
r = !xive_enabled();
|
||||
break;
|
||||
@ -632,13 +631,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
break;
|
||||
#endif
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
r = hv_enabled;
|
||||
#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
BUILD_BUG_ON(!IS_ENABLED(CONFIG_KVM_GENERIC_MMU_NOTIFIER));
|
||||
r = 1;
|
||||
#else
|
||||
r = 0;
|
||||
#endif
|
||||
break;
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
case KVM_CAP_PPC_HTAB_FD:
|
||||
|
@ -723,6 +723,25 @@ config COMPAT
|
||||
|
||||
If you want to execute 32-bit userspace applications, say Y.
|
||||
|
||||
config PARAVIRT
|
||||
bool "Enable paravirtualization code"
|
||||
depends on RISCV_SBI
|
||||
help
|
||||
This changes the kernel so it can modify itself when it is run
|
||||
under a hypervisor, potentially improving performance significantly
|
||||
over full virtualization.
|
||||
|
||||
config PARAVIRT_TIME_ACCOUNTING
|
||||
bool "Paravirtual steal time accounting"
|
||||
depends on PARAVIRT
|
||||
help
|
||||
Select this option to enable fine granularity task steal time
|
||||
accounting. Time spent executing other tasks in parallel with
|
||||
the current vCPU is discounted from the vCPU power. To account for
|
||||
that, there can be a small performance impact.
|
||||
|
||||
If in doubt, say N here.
|
||||
|
||||
config RELOCATABLE
|
||||
bool "Build a relocatable kernel"
|
||||
depends on MMU && 64BIT && !XIP_KERNEL
|
||||
|
@ -41,6 +41,7 @@
|
||||
KVM_ARCH_REQ_FLAGS(4, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_HFENCE \
|
||||
KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6)
|
||||
|
||||
enum kvm_riscv_hfence_type {
|
||||
KVM_RISCV_HFENCE_UNKNOWN = 0,
|
||||
@ -262,13 +263,17 @@ struct kvm_vcpu_arch {
|
||||
|
||||
/* 'static' configurations which are set only once */
|
||||
struct kvm_vcpu_config cfg;
|
||||
|
||||
/* SBI steal-time accounting */
|
||||
struct {
|
||||
gpa_t shmem;
|
||||
u64 last_steal;
|
||||
} sta;
|
||||
};
|
||||
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
|
||||
|
||||
void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
|
||||
@ -372,4 +377,7 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
|
||||
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu);
|
||||
|
||||
#endif /* __RISCV_KVM_HOST_H__ */
|
||||
|
@ -15,9 +15,10 @@
|
||||
#define KVM_SBI_VERSION_MINOR 0
|
||||
|
||||
enum kvm_riscv_sbi_ext_status {
|
||||
KVM_RISCV_SBI_EXT_UNINITIALIZED,
|
||||
KVM_RISCV_SBI_EXT_AVAILABLE,
|
||||
KVM_RISCV_SBI_EXT_UNAVAILABLE,
|
||||
KVM_RISCV_SBI_EXT_STATUS_UNINITIALIZED,
|
||||
KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE,
|
||||
KVM_RISCV_SBI_EXT_STATUS_ENABLED,
|
||||
KVM_RISCV_SBI_EXT_STATUS_DISABLED,
|
||||
};
|
||||
|
||||
struct kvm_vcpu_sbi_context {
|
||||
@ -36,7 +37,7 @@ struct kvm_vcpu_sbi_extension {
|
||||
unsigned long extid_start;
|
||||
unsigned long extid_end;
|
||||
|
||||
bool default_unavail;
|
||||
bool default_disabled;
|
||||
|
||||
/**
|
||||
* SBI extension handler. It can be defined for a given extension or group of
|
||||
@ -59,11 +60,21 @@ int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg);
|
||||
int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg);
|
||||
int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg);
|
||||
int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg);
|
||||
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
|
||||
struct kvm_vcpu *vcpu, unsigned long extid);
|
||||
bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx);
|
||||
int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
|
||||
void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num,
|
||||
unsigned long *reg_val);
|
||||
int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num,
|
||||
unsigned long reg_val);
|
||||
|
||||
#ifdef CONFIG_RISCV_SBI_V01
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
|
||||
#endif
|
||||
@ -74,6 +85,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
|
||||
|
||||
|
28
arch/riscv/include/asm/paravirt.h
Normal file
28
arch/riscv/include/asm/paravirt.h
Normal file
@ -0,0 +1,28 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_RISCV_PARAVIRT_H
|
||||
#define _ASM_RISCV_PARAVIRT_H
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <linux/static_call_types.h>
|
||||
|
||||
struct static_key;
|
||||
extern struct static_key paravirt_steal_enabled;
|
||||
extern struct static_key paravirt_steal_rq_enabled;
|
||||
|
||||
u64 dummy_steal_clock(int cpu);
|
||||
|
||||
DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
|
||||
|
||||
static inline u64 paravirt_steal_clock(int cpu)
|
||||
{
|
||||
return static_call(pv_steal_clock)(cpu);
|
||||
}
|
||||
|
||||
int __init pv_time_init(void);
|
||||
|
||||
#else
|
||||
|
||||
#define pv_time_init() do {} while (0)
|
||||
|
||||
#endif /* CONFIG_PARAVIRT */
|
||||
#endif /* _ASM_RISCV_PARAVIRT_H */
|
1
arch/riscv/include/asm/paravirt_api_clock.h
Normal file
1
arch/riscv/include/asm/paravirt_api_clock.h
Normal file
@ -0,0 +1 @@
|
||||
#include <asm/paravirt.h>
|
@ -31,6 +31,7 @@ enum sbi_ext_id {
|
||||
SBI_EXT_SRST = 0x53525354,
|
||||
SBI_EXT_PMU = 0x504D55,
|
||||
SBI_EXT_DBCN = 0x4442434E,
|
||||
SBI_EXT_STA = 0x535441,
|
||||
|
||||
/* Experimentals extensions must lie within this range */
|
||||
SBI_EXT_EXPERIMENTAL_START = 0x08000000,
|
||||
@ -243,6 +244,22 @@ enum sbi_ext_dbcn_fid {
|
||||
SBI_EXT_DBCN_CONSOLE_WRITE_BYTE = 2,
|
||||
};
|
||||
|
||||
/* SBI STA (steal-time accounting) extension */
|
||||
enum sbi_ext_sta_fid {
|
||||
SBI_EXT_STA_STEAL_TIME_SET_SHMEM = 0,
|
||||
};
|
||||
|
||||
struct sbi_sta_struct {
|
||||
__le32 sequence;
|
||||
__le32 flags;
|
||||
__le64 steal;
|
||||
u8 preempted;
|
||||
u8 pad[47];
|
||||
} __packed;
|
||||
|
||||
#define SBI_STA_SHMEM_DISABLE -1
|
||||
|
||||
/* SBI spec version fields */
|
||||
#define SBI_SPEC_VERSION_DEFAULT 0x1
|
||||
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
|
||||
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
|
||||
|
@ -157,9 +157,16 @@ enum KVM_RISCV_SBI_EXT_ID {
|
||||
KVM_RISCV_SBI_EXT_EXPERIMENTAL,
|
||||
KVM_RISCV_SBI_EXT_VENDOR,
|
||||
KVM_RISCV_SBI_EXT_DBCN,
|
||||
KVM_RISCV_SBI_EXT_STA,
|
||||
KVM_RISCV_SBI_EXT_MAX,
|
||||
};
|
||||
|
||||
/* SBI STA extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
|
||||
struct kvm_riscv_sbi_sta {
|
||||
unsigned long shmem_lo;
|
||||
unsigned long shmem_hi;
|
||||
};
|
||||
|
||||
/* Possible states for kvm_riscv_timer */
|
||||
#define KVM_RISCV_TIMER_STATE_OFF 0
|
||||
#define KVM_RISCV_TIMER_STATE_ON 1
|
||||
@ -241,6 +248,12 @@ enum KVM_RISCV_SBI_EXT_ID {
|
||||
#define KVM_REG_RISCV_VECTOR_REG(n) \
|
||||
((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long))
|
||||
|
||||
/* Registers for specific SBI extensions are mapped as type 10 */
|
||||
#define KVM_REG_RISCV_SBI_STATE (0x0a << KVM_REG_RISCV_TYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_SBI_STA (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_SBI_STA_REG(name) \
|
||||
(offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long))
|
||||
|
||||
/* Device Control API: RISC-V AIA */
|
||||
#define KVM_DEV_RISCV_APLIC_ALIGN 0x1000
|
||||
#define KVM_DEV_RISCV_APLIC_SIZE 0x4000
|
||||
|
@ -86,6 +86,7 @@ obj-$(CONFIG_SMP) += sbi-ipi.o
|
||||
obj-$(CONFIG_SMP) += cpu_ops_sbi.o
|
||||
endif
|
||||
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
|
||||
obj-$(CONFIG_PARAVIRT) += paravirt.o
|
||||
obj-$(CONFIG_KGDB) += kgdb.o
|
||||
obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o
|
||||
obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
|
||||
|
135
arch/riscv/kernel/paravirt.c
Normal file
135
arch/riscv/kernel/paravirt.c
Normal file
@ -0,0 +1,135 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023 Ventana Micro Systems Inc.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "riscv-pv: " fmt
|
||||
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/kconfig.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/percpu-defs.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/static_call.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/sbi.h>
|
||||
|
||||
struct static_key paravirt_steal_enabled;
|
||||
struct static_key paravirt_steal_rq_enabled;
|
||||
|
||||
static u64 native_steal_clock(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
|
||||
|
||||
static bool steal_acc = true;
|
||||
static int __init parse_no_stealacc(char *arg)
|
||||
{
|
||||
steal_acc = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
early_param("no-steal-acc", parse_no_stealacc);
|
||||
|
||||
DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
|
||||
|
||||
static bool __init has_pv_steal_clock(void)
|
||||
{
|
||||
if (sbi_spec_version >= sbi_mk_version(2, 0) &&
|
||||
sbi_probe_extension(SBI_EXT_STA) > 0) {
|
||||
pr_info("SBI STA extension detected\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct sbiret ret;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM,
|
||||
lo, hi, flags, 0, 0, 0);
|
||||
if (ret.error) {
|
||||
if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE)
|
||||
pr_warn("Failed to disable steal-time shmem");
|
||||
else
|
||||
pr_warn("Failed to set steal-time shmem");
|
||||
return sbi_err_map_linux_errno(ret.error);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pv_time_cpu_online(unsigned int cpu)
|
||||
{
|
||||
struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
|
||||
phys_addr_t pa = __pa(st);
|
||||
unsigned long lo = (unsigned long)pa;
|
||||
unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
|
||||
|
||||
return sbi_sta_steal_time_set_shmem(lo, hi, 0);
|
||||
}
|
||||
|
||||
static int pv_time_cpu_down_prepare(unsigned int cpu)
|
||||
{
|
||||
return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE,
|
||||
SBI_STA_SHMEM_DISABLE, 0);
|
||||
}
|
||||
|
||||
static u64 pv_time_steal_clock(int cpu)
|
||||
{
|
||||
struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
|
||||
u32 sequence;
|
||||
u64 steal;
|
||||
|
||||
/*
|
||||
* Check the sequence field before and after reading the steal
|
||||
* field. Repeat the read if it is different or odd.
|
||||
*/
|
||||
do {
|
||||
sequence = READ_ONCE(st->sequence);
|
||||
virt_rmb();
|
||||
steal = READ_ONCE(st->steal);
|
||||
virt_rmb();
|
||||
} while ((le32_to_cpu(sequence) & 1) ||
|
||||
sequence != READ_ONCE(st->sequence));
|
||||
|
||||
return le64_to_cpu(steal);
|
||||
}
|
||||
|
||||
int __init pv_time_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!has_pv_steal_clock())
|
||||
return 0;
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
|
||||
"riscv/pv_time:online",
|
||||
pv_time_cpu_online,
|
||||
pv_time_cpu_down_prepare);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
static_call_update(pv_steal_clock, pv_time_steal_clock);
|
||||
|
||||
static_key_slow_inc(¶virt_steal_enabled);
|
||||
if (steal_acc)
|
||||
static_key_slow_inc(¶virt_steal_rq_enabled);
|
||||
|
||||
pr_info("Computing paravirt steal-time\n");
|
||||
|
||||
return 0;
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
#include <asm/sbi.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/timex.h>
|
||||
#include <asm/paravirt.h>
|
||||
|
||||
unsigned long riscv_timebase __ro_after_init;
|
||||
EXPORT_SYMBOL_GPL(riscv_timebase);
|
||||
@ -45,4 +46,6 @@ void __init time_init(void)
|
||||
timer_probe();
|
||||
|
||||
tick_setup_hrtimer_broadcast();
|
||||
|
||||
pv_time_init();
|
||||
}
|
||||
|
@ -20,18 +20,17 @@ if VIRTUALIZATION
|
||||
config KVM
|
||||
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
|
||||
depends on RISCV_SBI && MMU
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
select HAVE_KVM_MSI
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select KVM_COMMON
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
select KVM_MMIO
|
||||
select KVM_XFER_TO_GUEST_WORK
|
||||
select MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select SCHED_INFO
|
||||
help
|
||||
Support hosting virtualized guest machines.
|
||||
|
||||
|
@ -26,6 +26,7 @@ kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
|
||||
kvm-y += vcpu_sbi_base.o
|
||||
kvm-y += vcpu_sbi_replace.o
|
||||
kvm-y += vcpu_sbi_hsm.o
|
||||
kvm-y += vcpu_sbi_sta.o
|
||||
kvm-y += vcpu_timer.o
|
||||
kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
|
||||
kvm-y += aia.o
|
||||
|
@ -83,6 +83,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.hfence_tail = 0;
|
||||
memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
|
||||
|
||||
kvm_riscv_vcpu_sbi_sta_reset(vcpu);
|
||||
|
||||
/* Reset the guest CSRs for hotplug usecase */
|
||||
if (loaded)
|
||||
kvm_arch_vcpu_load(vcpu, smp_processor_id());
|
||||
@ -541,6 +543,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
kvm_riscv_vcpu_aia_load(vcpu, cpu);
|
||||
|
||||
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
|
||||
|
||||
vcpu->cpu = cpu;
|
||||
}
|
||||
|
||||
@ -614,6 +618,9 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
|
||||
kvm_riscv_hfence_process(vcpu);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
|
||||
kvm_riscv_vcpu_record_steal_time(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
@ -757,8 +764,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
/* Update HVIP CSR for current CPU */
|
||||
kvm_riscv_update_hvip(vcpu);
|
||||
|
||||
if (ret <= 0 ||
|
||||
kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
|
||||
if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
|
||||
kvm_request_pending(vcpu) ||
|
||||
xfer_to_guest_mode_work_pending()) {
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
|
@ -485,7 +485,7 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
|
||||
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN))
|
||||
rc = kvm_riscv_vcpu_smstateen_set_csr(vcpu, reg_num,
|
||||
reg_val);
|
||||
break;
|
||||
break;
|
||||
default:
|
||||
rc = -ENOENT;
|
||||
break;
|
||||
@ -931,50 +931,92 @@ static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
|
||||
return copy_isa_ext_reg_indices(vcpu, NULL);;
|
||||
}
|
||||
|
||||
static inline unsigned long num_sbi_ext_regs(void)
|
||||
static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
|
||||
{
|
||||
/*
|
||||
* number of KVM_REG_RISCV_SBI_SINGLE +
|
||||
* 2 x (number of KVM_REG_RISCV_SBI_MULTI)
|
||||
*/
|
||||
return KVM_RISCV_SBI_EXT_MAX + 2*(KVM_REG_RISCV_SBI_MULTI_REG_LAST+1);
|
||||
}
|
||||
unsigned int n = 0;
|
||||
|
||||
static int copy_sbi_ext_reg_indices(u64 __user *uindices)
|
||||
{
|
||||
int n;
|
||||
|
||||
/* copy KVM_REG_RISCV_SBI_SINGLE */
|
||||
n = KVM_RISCV_SBI_EXT_MAX;
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) {
|
||||
u64 size = IS_ENABLED(CONFIG_32BIT) ?
|
||||
KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
|
||||
u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
|
||||
KVM_REG_RISCV_SBI_SINGLE | i;
|
||||
|
||||
if (!riscv_vcpu_supports_sbi_ext(vcpu, i))
|
||||
continue;
|
||||
|
||||
if (uindices) {
|
||||
if (put_user(reg, uindices))
|
||||
return -EFAULT;
|
||||
uindices++;
|
||||
}
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
/* copy KVM_REG_RISCV_SBI_MULTI */
|
||||
n = KVM_REG_RISCV_SBI_MULTI_REG_LAST + 1;
|
||||
for (int i = 0; i < n; i++) {
|
||||
u64 size = IS_ENABLED(CONFIG_32BIT) ?
|
||||
KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
|
||||
u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
|
||||
KVM_REG_RISCV_SBI_MULTI_EN | i;
|
||||
return n;
|
||||
}
|
||||
|
||||
if (uindices) {
|
||||
if (put_user(reg, uindices))
|
||||
return -EFAULT;
|
||||
uindices++;
|
||||
static unsigned long num_sbi_ext_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return copy_sbi_ext_reg_indices(vcpu, NULL);
|
||||
}
|
||||
|
||||
static int copy_sbi_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
|
||||
{
|
||||
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
|
||||
int total = 0;
|
||||
|
||||
if (scontext->ext_status[KVM_RISCV_SBI_EXT_STA] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) {
|
||||
u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
|
||||
int n = sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long);
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
u64 reg = KVM_REG_RISCV | size |
|
||||
KVM_REG_RISCV_SBI_STATE |
|
||||
KVM_REG_RISCV_SBI_STA | i;
|
||||
|
||||
if (uindices) {
|
||||
if (put_user(reg, uindices))
|
||||
return -EFAULT;
|
||||
uindices++;
|
||||
}
|
||||
}
|
||||
|
||||
reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
|
||||
KVM_REG_RISCV_SBI_MULTI_DIS | i;
|
||||
total += n;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static inline unsigned long num_sbi_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return copy_sbi_reg_indices(vcpu, NULL);
|
||||
}
|
||||
|
||||
static inline unsigned long num_vector_regs(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!riscv_isa_extension_available(vcpu->arch.isa, v))
|
||||
return 0;
|
||||
|
||||
/* vstart, vl, vtype, vcsr, vlenb and 32 vector regs */
|
||||
return 37;
|
||||
}
|
||||
|
||||
static int copy_vector_reg_indices(const struct kvm_vcpu *vcpu,
|
||||
u64 __user *uindices)
|
||||
{
|
||||
const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
int n = num_vector_regs(vcpu);
|
||||
u64 reg, size;
|
||||
int i;
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
/* copy vstart, vl, vtype, vcsr and vlenb */
|
||||
size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
|
||||
for (i = 0; i < 5; i++) {
|
||||
reg = KVM_REG_RISCV | size | KVM_REG_RISCV_VECTOR | i;
|
||||
|
||||
if (uindices) {
|
||||
if (put_user(reg, uindices))
|
||||
@ -983,7 +1025,21 @@ static int copy_sbi_ext_reg_indices(u64 __user *uindices)
|
||||
}
|
||||
}
|
||||
|
||||
return num_sbi_ext_regs();
|
||||
/* vector_regs have a variable 'vlenb' size */
|
||||
size = __builtin_ctzl(cntx->vector.vlenb);
|
||||
size <<= KVM_REG_SIZE_SHIFT;
|
||||
for (i = 0; i < 32; i++) {
|
||||
reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size |
|
||||
KVM_REG_RISCV_VECTOR_REG(i);
|
||||
|
||||
if (uindices) {
|
||||
if (put_user(reg, uindices))
|
||||
return -EFAULT;
|
||||
uindices++;
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1001,8 +1057,10 @@ unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu)
|
||||
res += num_timer_regs();
|
||||
res += num_fp_f_regs(vcpu);
|
||||
res += num_fp_d_regs(vcpu);
|
||||
res += num_vector_regs(vcpu);
|
||||
res += num_isa_ext_regs(vcpu);
|
||||
res += num_sbi_ext_regs();
|
||||
res += num_sbi_ext_regs(vcpu);
|
||||
res += num_sbi_regs(vcpu);
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -1045,14 +1103,25 @@ int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
|
||||
return ret;
|
||||
uindices += ret;
|
||||
|
||||
ret = copy_vector_reg_indices(vcpu, uindices);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
uindices += ret;
|
||||
|
||||
ret = copy_isa_ext_reg_indices(vcpu, uindices);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
uindices += ret;
|
||||
|
||||
ret = copy_sbi_ext_reg_indices(uindices);
|
||||
ret = copy_sbi_ext_reg_indices(vcpu, uindices);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
uindices += ret;
|
||||
|
||||
ret = copy_sbi_reg_indices(vcpu, uindices);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
uindices += ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1075,12 +1144,14 @@ int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
|
||||
case KVM_REG_RISCV_FP_D:
|
||||
return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
|
||||
KVM_REG_RISCV_FP_D);
|
||||
case KVM_REG_RISCV_VECTOR:
|
||||
return kvm_riscv_vcpu_set_reg_vector(vcpu, reg);
|
||||
case KVM_REG_RISCV_ISA_EXT:
|
||||
return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
|
||||
case KVM_REG_RISCV_SBI_EXT:
|
||||
return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
|
||||
case KVM_REG_RISCV_VECTOR:
|
||||
return kvm_riscv_vcpu_set_reg_vector(vcpu, reg);
|
||||
case KVM_REG_RISCV_SBI_STATE:
|
||||
return kvm_riscv_vcpu_set_reg_sbi(vcpu, reg);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1106,12 +1177,14 @@ int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
|
||||
case KVM_REG_RISCV_FP_D:
|
||||
return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
|
||||
KVM_REG_RISCV_FP_D);
|
||||
case KVM_REG_RISCV_VECTOR:
|
||||
return kvm_riscv_vcpu_get_reg_vector(vcpu, reg);
|
||||
case KVM_REG_RISCV_ISA_EXT:
|
||||
return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
|
||||
case KVM_REG_RISCV_SBI_EXT:
|
||||
return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
|
||||
case KVM_REG_RISCV_VECTOR:
|
||||
return kvm_riscv_vcpu_get_reg_vector(vcpu, reg);
|
||||
case KVM_REG_RISCV_SBI_STATE:
|
||||
return kvm_riscv_vcpu_get_reg_sbi(vcpu, reg);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -70,6 +70,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = {
|
||||
.ext_idx = KVM_RISCV_SBI_EXT_DBCN,
|
||||
.ext_ptr = &vcpu_sbi_ext_dbcn,
|
||||
},
|
||||
{
|
||||
.ext_idx = KVM_RISCV_SBI_EXT_STA,
|
||||
.ext_ptr = &vcpu_sbi_ext_sta,
|
||||
},
|
||||
{
|
||||
.ext_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL,
|
||||
.ext_ptr = &vcpu_sbi_ext_experimental,
|
||||
@ -80,6 +84,34 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = {
|
||||
},
|
||||
};
|
||||
|
||||
static const struct kvm_riscv_sbi_extension_entry *
|
||||
riscv_vcpu_get_sbi_ext(struct kvm_vcpu *vcpu, unsigned long idx)
|
||||
{
|
||||
const struct kvm_riscv_sbi_extension_entry *sext = NULL;
|
||||
|
||||
if (idx >= KVM_RISCV_SBI_EXT_MAX)
|
||||
return NULL;
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
|
||||
if (sbi_ext[i].ext_idx == idx) {
|
||||
sext = &sbi_ext[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return sext;
|
||||
}
|
||||
|
||||
bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx)
|
||||
{
|
||||
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
|
||||
const struct kvm_riscv_sbi_extension_entry *sext;
|
||||
|
||||
sext = riscv_vcpu_get_sbi_ext(vcpu, idx);
|
||||
|
||||
return sext && scontext->ext_status[sext->ext_idx] != KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE;
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
|
||||
@ -140,28 +172,19 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu,
|
||||
unsigned long reg_num,
|
||||
unsigned long reg_val)
|
||||
{
|
||||
unsigned long i;
|
||||
const struct kvm_riscv_sbi_extension_entry *sext = NULL;
|
||||
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
|
||||
|
||||
if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
|
||||
return -ENOENT;
|
||||
const struct kvm_riscv_sbi_extension_entry *sext;
|
||||
|
||||
if (reg_val != 1 && reg_val != 0)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
|
||||
if (sbi_ext[i].ext_idx == reg_num) {
|
||||
sext = &sbi_ext[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!sext)
|
||||
sext = riscv_vcpu_get_sbi_ext(vcpu, reg_num);
|
||||
if (!sext || scontext->ext_status[sext->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE)
|
||||
return -ENOENT;
|
||||
|
||||
scontext->ext_status[sext->ext_idx] = (reg_val) ?
|
||||
KVM_RISCV_SBI_EXT_AVAILABLE :
|
||||
KVM_RISCV_SBI_EXT_UNAVAILABLE;
|
||||
KVM_RISCV_SBI_EXT_STATUS_ENABLED :
|
||||
KVM_RISCV_SBI_EXT_STATUS_DISABLED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -170,24 +193,16 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu,
|
||||
unsigned long reg_num,
|
||||
unsigned long *reg_val)
|
||||
{
|
||||
unsigned long i;
|
||||
const struct kvm_riscv_sbi_extension_entry *sext = NULL;
|
||||
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
|
||||
const struct kvm_riscv_sbi_extension_entry *sext;
|
||||
|
||||
if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
|
||||
return -ENOENT;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
|
||||
if (sbi_ext[i].ext_idx == reg_num) {
|
||||
sext = &sbi_ext[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!sext)
|
||||
sext = riscv_vcpu_get_sbi_ext(vcpu, reg_num);
|
||||
if (!sext || scontext->ext_status[sext->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE)
|
||||
return -ENOENT;
|
||||
|
||||
*reg_val = scontext->ext_status[sext->ext_idx] ==
|
||||
KVM_RISCV_SBI_EXT_AVAILABLE;
|
||||
KVM_RISCV_SBI_EXT_STATUS_ENABLED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -310,6 +325,69 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_SBI_STATE);
|
||||
unsigned long reg_subtype, reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
|
||||
reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
|
||||
|
||||
switch (reg_subtype) {
|
||||
case KVM_REG_RISCV_SBI_STA:
|
||||
return kvm_riscv_vcpu_set_reg_sbi_sta(vcpu, reg_num, reg_val);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_SBI_STATE);
|
||||
unsigned long reg_subtype, reg_val;
|
||||
int ret;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
|
||||
reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
|
||||
|
||||
switch (reg_subtype) {
|
||||
case KVM_REG_RISCV_SBI_STA:
|
||||
ret = kvm_riscv_vcpu_get_reg_sbi_sta(vcpu, reg_num, ®_val);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
|
||||
struct kvm_vcpu *vcpu, unsigned long extid)
|
||||
{
|
||||
@ -325,7 +403,7 @@ const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
|
||||
if (ext->extid_start <= extid && ext->extid_end >= extid) {
|
||||
if (entry->ext_idx >= KVM_RISCV_SBI_EXT_MAX ||
|
||||
scontext->ext_status[entry->ext_idx] ==
|
||||
KVM_RISCV_SBI_EXT_AVAILABLE)
|
||||
KVM_RISCV_SBI_EXT_STATUS_ENABLED)
|
||||
return ext;
|
||||
|
||||
return NULL;
|
||||
@ -413,12 +491,12 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (ext->probe && !ext->probe(vcpu)) {
|
||||
scontext->ext_status[entry->ext_idx] =
|
||||
KVM_RISCV_SBI_EXT_UNAVAILABLE;
|
||||
KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE;
|
||||
continue;
|
||||
}
|
||||
|
||||
scontext->ext_status[entry->ext_idx] = ext->default_unavail ?
|
||||
KVM_RISCV_SBI_EXT_UNAVAILABLE :
|
||||
KVM_RISCV_SBI_EXT_AVAILABLE;
|
||||
scontext->ext_status[entry->ext_idx] = ext->default_disabled ?
|
||||
KVM_RISCV_SBI_EXT_STATUS_DISABLED :
|
||||
KVM_RISCV_SBI_EXT_STATUS_ENABLED;
|
||||
}
|
||||
}
|
||||
|
@ -204,6 +204,6 @@ static int kvm_sbi_ext_dbcn_handler(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn = {
|
||||
.extid_start = SBI_EXT_DBCN,
|
||||
.extid_end = SBI_EXT_DBCN,
|
||||
.default_unavail = true,
|
||||
.default_disabled = true,
|
||||
.handler = kvm_sbi_ext_dbcn_handler,
|
||||
};
|
||||
|
208
arch/riscv/kvm/vcpu_sbi_sta.c
Normal file
208
arch/riscv/kvm/vcpu_sbi_sta.c
Normal file
@ -0,0 +1,208 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2023 Ventana Micro Systems Inc.
|
||||
*/
|
||||
|
||||
#include <linux/kconfig.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include <asm/bug.h>
|
||||
#include <asm/current.h>
|
||||
#include <asm/kvm_vcpu_sbi.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/sbi.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.sta.shmem = INVALID_GPA;
|
||||
vcpu->arch.sta.last_steal = 0;
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
gpa_t shmem = vcpu->arch.sta.shmem;
|
||||
u64 last_steal = vcpu->arch.sta.last_steal;
|
||||
u32 *sequence_ptr, sequence;
|
||||
u64 *steal_ptr, steal;
|
||||
unsigned long hva;
|
||||
gfn_t gfn;
|
||||
|
||||
if (shmem == INVALID_GPA)
|
||||
return;
|
||||
|
||||
/*
|
||||
* shmem is 64-byte aligned (see the enforcement in
|
||||
* kvm_sbi_sta_steal_time_set_shmem()) and the size of sbi_sta_struct
|
||||
* is 64 bytes, so we know all its offsets are in the same page.
|
||||
*/
|
||||
gfn = shmem >> PAGE_SHIFT;
|
||||
hva = kvm_vcpu_gfn_to_hva(vcpu, gfn);
|
||||
|
||||
if (WARN_ON(kvm_is_error_hva(hva))) {
|
||||
vcpu->arch.sta.shmem = INVALID_GPA;
|
||||
return;
|
||||
}
|
||||
|
||||
sequence_ptr = (u32 *)(hva + offset_in_page(shmem) +
|
||||
offsetof(struct sbi_sta_struct, sequence));
|
||||
steal_ptr = (u64 *)(hva + offset_in_page(shmem) +
|
||||
offsetof(struct sbi_sta_struct, steal));
|
||||
|
||||
if (WARN_ON(get_user(sequence, sequence_ptr)))
|
||||
return;
|
||||
|
||||
sequence = le32_to_cpu(sequence);
|
||||
sequence += 1;
|
||||
|
||||
if (WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr)))
|
||||
return;
|
||||
|
||||
if (!WARN_ON(get_user(steal, steal_ptr))) {
|
||||
steal = le64_to_cpu(steal);
|
||||
vcpu->arch.sta.last_steal = READ_ONCE(current->sched_info.run_delay);
|
||||
steal += vcpu->arch.sta.last_steal - last_steal;
|
||||
WARN_ON(put_user(cpu_to_le64(steal), steal_ptr));
|
||||
}
|
||||
|
||||
sequence += 1;
|
||||
WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr));
|
||||
|
||||
kvm_vcpu_mark_page_dirty(vcpu, gfn);
|
||||
}
|
||||
|
||||
static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
|
||||
unsigned long shmem_phys_lo = cp->a0;
|
||||
unsigned long shmem_phys_hi = cp->a1;
|
||||
u32 flags = cp->a2;
|
||||
struct sbi_sta_struct zero_sta = {0};
|
||||
unsigned long hva;
|
||||
bool writable;
|
||||
gpa_t shmem;
|
||||
int ret;
|
||||
|
||||
if (flags != 0)
|
||||
return SBI_ERR_INVALID_PARAM;
|
||||
|
||||
if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE &&
|
||||
shmem_phys_hi == SBI_STA_SHMEM_DISABLE) {
|
||||
vcpu->arch.sta.shmem = INVALID_GPA;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (shmem_phys_lo & (SZ_64 - 1))
|
||||
return SBI_ERR_INVALID_PARAM;
|
||||
|
||||
shmem = shmem_phys_lo;
|
||||
|
||||
if (shmem_phys_hi != 0) {
|
||||
if (IS_ENABLED(CONFIG_32BIT))
|
||||
shmem |= ((gpa_t)shmem_phys_hi << 32);
|
||||
else
|
||||
return SBI_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable);
|
||||
if (kvm_is_error_hva(hva) || !writable)
|
||||
return SBI_ERR_INVALID_ADDRESS;
|
||||
|
||||
ret = kvm_vcpu_write_guest(vcpu, shmem, &zero_sta, sizeof(zero_sta));
|
||||
if (ret)
|
||||
return SBI_ERR_FAILURE;
|
||||
|
||||
vcpu->arch.sta.shmem = shmem;
|
||||
vcpu->arch.sta.last_steal = current->sched_info.run_delay;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_sbi_ext_sta_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_vcpu_sbi_return *retdata)
|
||||
{
|
||||
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
|
||||
unsigned long funcid = cp->a6;
|
||||
int ret;
|
||||
|
||||
switch (funcid) {
|
||||
case SBI_EXT_STA_STEAL_TIME_SET_SHMEM:
|
||||
ret = kvm_sbi_sta_steal_time_set_shmem(vcpu);
|
||||
break;
|
||||
default:
|
||||
ret = SBI_ERR_NOT_SUPPORTED;
|
||||
break;
|
||||
}
|
||||
|
||||
retdata->err_val = ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long kvm_sbi_ext_sta_probe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !!sched_info_on();
|
||||
}
|
||||
|
||||
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = {
|
||||
.extid_start = SBI_EXT_STA,
|
||||
.extid_end = SBI_EXT_STA,
|
||||
.handler = kvm_sbi_ext_sta_handler,
|
||||
.probe = kvm_sbi_ext_sta_probe,
|
||||
};
|
||||
|
||||
int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu,
|
||||
unsigned long reg_num,
|
||||
unsigned long *reg_val)
|
||||
{
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_SBI_STA_REG(shmem_lo):
|
||||
*reg_val = (unsigned long)vcpu->arch.sta.shmem;
|
||||
break;
|
||||
case KVM_REG_RISCV_SBI_STA_REG(shmem_hi):
|
||||
if (IS_ENABLED(CONFIG_32BIT))
|
||||
*reg_val = upper_32_bits(vcpu->arch.sta.shmem);
|
||||
else
|
||||
*reg_val = 0;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu,
|
||||
unsigned long reg_num,
|
||||
unsigned long reg_val)
|
||||
{
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_SBI_STA_REG(shmem_lo):
|
||||
if (IS_ENABLED(CONFIG_32BIT)) {
|
||||
gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem);
|
||||
|
||||
vcpu->arch.sta.shmem = reg_val;
|
||||
vcpu->arch.sta.shmem |= hi << 32;
|
||||
} else {
|
||||
vcpu->arch.sta.shmem = reg_val;
|
||||
}
|
||||
break;
|
||||
case KVM_REG_RISCV_SBI_STA_REG(shmem_hi):
|
||||
if (IS_ENABLED(CONFIG_32BIT)) {
|
||||
gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem);
|
||||
|
||||
vcpu->arch.sta.shmem = ((gpa_t)reg_val << 32);
|
||||
vcpu->arch.sta.shmem |= lo;
|
||||
} else if (reg_val != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -15,7 +15,7 @@
|
||||
.altmacro
|
||||
.option norelax
|
||||
|
||||
ENTRY(__kvm_riscv_switch_to)
|
||||
SYM_FUNC_START(__kvm_riscv_switch_to)
|
||||
/* Save Host GPRs (except A0 and T0-T6) */
|
||||
REG_S ra, (KVM_ARCH_HOST_RA)(a0)
|
||||
REG_S sp, (KVM_ARCH_HOST_SP)(a0)
|
||||
@ -45,7 +45,7 @@ ENTRY(__kvm_riscv_switch_to)
|
||||
REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0)
|
||||
REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
|
||||
REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
|
||||
la t4, __kvm_switch_return
|
||||
la t4, .Lkvm_switch_return
|
||||
REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0)
|
||||
|
||||
/* Save Host and Restore Guest SSTATUS */
|
||||
@ -113,7 +113,7 @@ ENTRY(__kvm_riscv_switch_to)
|
||||
|
||||
/* Back to Host */
|
||||
.align 2
|
||||
__kvm_switch_return:
|
||||
.Lkvm_switch_return:
|
||||
/* Swap Guest A0 with SSCRATCH */
|
||||
csrrw a0, CSR_SSCRATCH, a0
|
||||
|
||||
@ -208,9 +208,9 @@ __kvm_switch_return:
|
||||
|
||||
/* Return to C code */
|
||||
ret
|
||||
ENDPROC(__kvm_riscv_switch_to)
|
||||
SYM_FUNC_END(__kvm_riscv_switch_to)
|
||||
|
||||
ENTRY(__kvm_riscv_unpriv_trap)
|
||||
SYM_CODE_START(__kvm_riscv_unpriv_trap)
|
||||
/*
|
||||
* We assume that faulting unpriv load/store instruction is
|
||||
* 4-byte long and blindly increment SEPC by 4.
|
||||
@ -231,12 +231,10 @@ ENTRY(__kvm_riscv_unpriv_trap)
|
||||
csrr a1, CSR_HTINST
|
||||
REG_S a1, (KVM_ARCH_TRAP_HTINST)(a0)
|
||||
sret
|
||||
ENDPROC(__kvm_riscv_unpriv_trap)
|
||||
SYM_CODE_END(__kvm_riscv_unpriv_trap)
|
||||
|
||||
#ifdef CONFIG_FPU
|
||||
.align 3
|
||||
.global __kvm_riscv_fp_f_save
|
||||
__kvm_riscv_fp_f_save:
|
||||
SYM_FUNC_START(__kvm_riscv_fp_f_save)
|
||||
csrr t2, CSR_SSTATUS
|
||||
li t1, SR_FS
|
||||
csrs CSR_SSTATUS, t1
|
||||
@ -276,10 +274,9 @@ __kvm_riscv_fp_f_save:
|
||||
sw t0, KVM_ARCH_FP_F_FCSR(a0)
|
||||
csrw CSR_SSTATUS, t2
|
||||
ret
|
||||
SYM_FUNC_END(__kvm_riscv_fp_f_save)
|
||||
|
||||
.align 3
|
||||
.global __kvm_riscv_fp_d_save
|
||||
__kvm_riscv_fp_d_save:
|
||||
SYM_FUNC_START(__kvm_riscv_fp_d_save)
|
||||
csrr t2, CSR_SSTATUS
|
||||
li t1, SR_FS
|
||||
csrs CSR_SSTATUS, t1
|
||||
@ -319,10 +316,9 @@ __kvm_riscv_fp_d_save:
|
||||
sw t0, KVM_ARCH_FP_D_FCSR(a0)
|
||||
csrw CSR_SSTATUS, t2
|
||||
ret
|
||||
SYM_FUNC_END(__kvm_riscv_fp_d_save)
|
||||
|
||||
.align 3
|
||||
.global __kvm_riscv_fp_f_restore
|
||||
__kvm_riscv_fp_f_restore:
|
||||
SYM_FUNC_START(__kvm_riscv_fp_f_restore)
|
||||
csrr t2, CSR_SSTATUS
|
||||
li t1, SR_FS
|
||||
lw t0, KVM_ARCH_FP_F_FCSR(a0)
|
||||
@ -362,10 +358,9 @@ __kvm_riscv_fp_f_restore:
|
||||
fscsr t0
|
||||
csrw CSR_SSTATUS, t2
|
||||
ret
|
||||
SYM_FUNC_END(__kvm_riscv_fp_f_restore)
|
||||
|
||||
.align 3
|
||||
.global __kvm_riscv_fp_d_restore
|
||||
__kvm_riscv_fp_d_restore:
|
||||
SYM_FUNC_START(__kvm_riscv_fp_d_restore)
|
||||
csrr t2, CSR_SSTATUS
|
||||
li t1, SR_FS
|
||||
lw t0, KVM_ARCH_FP_D_FCSR(a0)
|
||||
@ -405,4 +400,5 @@ __kvm_riscv_fp_d_restore:
|
||||
fscsr t0
|
||||
csrw CSR_SSTATUS, t2
|
||||
ret
|
||||
SYM_FUNC_END(__kvm_riscv_fp_d_restore)
|
||||
#endif
|
||||
|
@ -76,6 +76,7 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu,
|
||||
cntx->vector.datap = kmalloc(riscv_v_vsize, GFP_KERNEL);
|
||||
if (!cntx->vector.datap)
|
||||
return -ENOMEM;
|
||||
cntx->vector.vlenb = riscv_v_vsize / 32;
|
||||
|
||||
vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
|
||||
if (!vcpu->arch.host_context.vector.datap)
|
||||
@ -115,6 +116,9 @@ static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
|
||||
case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
|
||||
*reg_addr = &cntx->vector.vcsr;
|
||||
break;
|
||||
case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb):
|
||||
*reg_addr = &cntx->vector.vlenb;
|
||||
break;
|
||||
case KVM_REG_RISCV_VECTOR_CSR_REG(datap):
|
||||
default:
|
||||
return -ENOENT;
|
||||
@ -173,6 +177,18 @@ int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
|
||||
if (!riscv_isa_extension_available(isa, v))
|
||||
return -ENOENT;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)) {
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
unsigned long reg_val;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, reg_size))
|
||||
return -EFAULT;
|
||||
if (reg_val != cntx->vector.vlenb)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, ®_addr);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
@ -179,7 +179,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
r = kvm_riscv_aia_available();
|
||||
break;
|
||||
case KVM_CAP_IOEVENTFD:
|
||||
case KVM_CAP_DEVICE_CTRL:
|
||||
case KVM_CAP_USER_MEMORY:
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
|
||||
|
@ -111,4 +111,10 @@ static inline void stfle(u64 *stfle_fac_list, int size)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/**
|
||||
* stfle_size - Actual size of the facility list as specified by stfle
|
||||
* (number of double words)
|
||||
*/
|
||||
unsigned int stfle_size(void);
|
||||
|
||||
#endif /* __ASM_FACILITY_H */
|
||||
|
@ -818,7 +818,7 @@ struct s390_io_adapter {
|
||||
|
||||
struct kvm_s390_cpu_model {
|
||||
/* facility mask supported by kvm & hosting machine */
|
||||
__u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
|
||||
__u64 fac_mask[S390_ARCH_FAC_MASK_SIZE_U64];
|
||||
struct kvm_s390_vm_cpu_subfunc subfuncs;
|
||||
/* facility list requested by guest (in dma page) */
|
||||
__u64 *fac_list;
|
||||
|
@ -41,7 +41,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
|
||||
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
|
||||
obj-y += entry.o reipl.o kdebugfs.o alternative.o
|
||||
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
|
||||
obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o
|
||||
obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o
|
||||
|
||||
extra-y += vmlinux.lds
|
||||
|
||||
|
21
arch/s390/kernel/facility.c
Normal file
21
arch/s390/kernel/facility.c
Normal file
@ -0,0 +1,21 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright IBM Corp. 2023
|
||||
*/
|
||||
|
||||
#include <asm/facility.h>
|
||||
|
||||
unsigned int stfle_size(void)
|
||||
{
|
||||
static unsigned int size;
|
||||
unsigned int r;
|
||||
u64 dummy;
|
||||
|
||||
r = READ_ONCE(size);
|
||||
if (!r) {
|
||||
r = __stfle_asm(&dummy, 1) + 1;
|
||||
WRITE_ONCE(size, r);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL(stfle_size);
|
@ -20,19 +20,16 @@ config KVM
|
||||
def_tristate y
|
||||
prompt "Kernel-based Virtual Machine (KVM) support"
|
||||
depends on HAVE_KVM
|
||||
select PREEMPT_NOTIFIERS
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select HAVE_KVM_EVENTFD
|
||||
select KVM_ASYNC_PF
|
||||
select KVM_ASYNC_PF_SYNC
|
||||
select KVM_COMMON
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
select HAVE_KVM_INVALID_WAKEUPS
|
||||
select HAVE_KVM_NO_POLL
|
||||
select KVM_VFIO
|
||||
select INTERVAL_TREE
|
||||
select MMU_NOTIFIER
|
||||
help
|
||||
Support hosting paravirtualized guest machines using the SIE
|
||||
|
@ -213,8 +213,8 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
|
||||
else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
|
||||
return -EINVAL;
|
||||
|
||||
bp_data = memdup_user(dbg->arch.hw_bp,
|
||||
sizeof(*bp_data) * dbg->arch.nr_hw_bp);
|
||||
bp_data = memdup_array_user(dbg->arch.hw_bp, dbg->arch.nr_hw_bp,
|
||||
sizeof(*bp_data));
|
||||
if (IS_ERR(bp_data))
|
||||
return PTR_ERR(bp_data);
|
||||
|
||||
|
@ -563,7 +563,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_ENABLE_CAP:
|
||||
case KVM_CAP_S390_CSS_SUPPORT:
|
||||
case KVM_CAP_IOEVENTFD:
|
||||
case KVM_CAP_DEVICE_CTRL:
|
||||
case KVM_CAP_S390_IRQCHIP:
|
||||
case KVM_CAP_VM_ATTRIBUTES:
|
||||
case KVM_CAP_MP_STATE:
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/dis.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/facility.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
|
||||
@ -984,12 +985,26 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page)
|
||||
static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
{
|
||||
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
|
||||
__u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
|
||||
__u32 fac = READ_ONCE(vsie_page->scb_o->fac);
|
||||
|
||||
/*
|
||||
* Alternate-STFLE-Interpretive-Execution facilities are not supported
|
||||
* -> format-0 flcb
|
||||
*/
|
||||
if (fac && test_kvm_facility(vcpu->kvm, 7)) {
|
||||
retry_vsie_icpt(vsie_page);
|
||||
/*
|
||||
* The facility list origin (FLO) is in bits 1 - 28 of the FLD
|
||||
* so we need to mask here before reading.
|
||||
*/
|
||||
fac = fac & 0x7ffffff8U;
|
||||
/*
|
||||
* format-0 -> size of nested guest's facility list == guest's size
|
||||
* guest's size == host's size, since STFLE is interpretatively executed
|
||||
* using a format-0 for the guest, too.
|
||||
*/
|
||||
if (read_guest_real(vcpu, fac, &vsie_page->fac,
|
||||
sizeof(vsie_page->fac)))
|
||||
stfle_size() * sizeof(u64)))
|
||||
return set_validity_icpt(scb_s, 0x1090U);
|
||||
scb_s->fac = (__u32)(__u64) &vsie_page->fac;
|
||||
}
|
||||
|
@ -55,8 +55,10 @@ KVM_X86_OP(set_rflags)
|
||||
KVM_X86_OP(get_if_flag)
|
||||
KVM_X86_OP(flush_tlb_all)
|
||||
KVM_X86_OP(flush_tlb_current)
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
KVM_X86_OP_OPTIONAL(flush_remote_tlbs)
|
||||
KVM_X86_OP_OPTIONAL(flush_remote_tlbs_range)
|
||||
#endif
|
||||
KVM_X86_OP(flush_tlb_gva)
|
||||
KVM_X86_OP(flush_tlb_guest)
|
||||
KVM_X86_OP(vcpu_pre_run)
|
||||
@ -135,6 +137,7 @@ KVM_X86_OP(msr_filter_changed)
|
||||
KVM_X86_OP(complete_emulated_msr)
|
||||
KVM_X86_OP(vcpu_deliver_sipi_vector)
|
||||
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
|
||||
KVM_X86_OP_OPTIONAL(get_untagged_addr)
|
||||
|
||||
#undef KVM_X86_OP
|
||||
#undef KVM_X86_OP_OPTIONAL
|
||||
|
@ -22,7 +22,7 @@ KVM_X86_PMU_OP(get_msr)
|
||||
KVM_X86_PMU_OP(set_msr)
|
||||
KVM_X86_PMU_OP(refresh)
|
||||
KVM_X86_PMU_OP(init)
|
||||
KVM_X86_PMU_OP(reset)
|
||||
KVM_X86_PMU_OP_OPTIONAL(reset)
|
||||
KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
|
||||
KVM_X86_PMU_OP_OPTIONAL(cleanup)
|
||||
|
||||
|
@ -133,7 +133,8 @@
|
||||
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
|
||||
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
|
||||
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
|
||||
| X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
|
||||
| X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
|
||||
| X86_CR4_LAM_SUP))
|
||||
|
||||
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
|
||||
|
||||
@ -500,8 +501,23 @@ struct kvm_pmc {
|
||||
u8 idx;
|
||||
bool is_paused;
|
||||
bool intr;
|
||||
/*
|
||||
* Base value of the PMC counter, relative to the *consumed* count in
|
||||
* the associated perf_event. This value includes counter updates from
|
||||
* the perf_event and emulated_count since the last time the counter
|
||||
* was reprogrammed, but it is *not* the current value as seen by the
|
||||
* guest or userspace.
|
||||
*
|
||||
* The count is relative to the associated perf_event so that KVM
|
||||
* doesn't need to reprogram the perf_event every time the guest writes
|
||||
* to the counter.
|
||||
*/
|
||||
u64 counter;
|
||||
u64 prev_counter;
|
||||
/*
|
||||
* PMC events triggered by KVM emulation that haven't been fully
|
||||
* processed, i.e. haven't undergone overflow detection.
|
||||
*/
|
||||
u64 emulated_counter;
|
||||
u64 eventsel;
|
||||
struct perf_event *perf_event;
|
||||
struct kvm_vcpu *vcpu;
|
||||
@ -937,8 +953,10 @@ struct kvm_vcpu_arch {
|
||||
/* used for guest single stepping over the given code position */
|
||||
unsigned long singlestep_rip;
|
||||
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
bool hyperv_enabled;
|
||||
struct kvm_vcpu_hv *hyperv;
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_XEN
|
||||
struct kvm_vcpu_xen xen;
|
||||
#endif
|
||||
@ -1095,6 +1113,7 @@ enum hv_tsc_page_status {
|
||||
HV_TSC_PAGE_BROKEN,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
/* Hyper-V emulation context */
|
||||
struct kvm_hv {
|
||||
struct mutex hv_lock;
|
||||
@ -1125,9 +1144,9 @@ struct kvm_hv {
|
||||
*/
|
||||
unsigned int synic_auto_eoi_used;
|
||||
|
||||
struct hv_partition_assist_pg *hv_pa_pg;
|
||||
struct kvm_hv_syndbg hv_syndbg;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct msr_bitmap_range {
|
||||
u32 flags;
|
||||
@ -1136,6 +1155,7 @@ struct msr_bitmap_range {
|
||||
unsigned long *bitmap;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_KVM_XEN
|
||||
/* Xen emulation context */
|
||||
struct kvm_xen {
|
||||
struct mutex xen_lock;
|
||||
@ -1147,6 +1167,7 @@ struct kvm_xen {
|
||||
struct idr evtchn_ports;
|
||||
unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
|
||||
};
|
||||
#endif
|
||||
|
||||
enum kvm_irqchip_mode {
|
||||
KVM_IRQCHIP_NONE,
|
||||
@ -1255,6 +1276,7 @@ enum kvm_apicv_inhibit {
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned long vm_type;
|
||||
unsigned long n_used_mmu_pages;
|
||||
unsigned long n_requested_mmu_pages;
|
||||
unsigned long n_max_mmu_pages;
|
||||
@ -1347,8 +1369,13 @@ struct kvm_arch {
|
||||
/* reads protected by irq_srcu, writes by irq_lock */
|
||||
struct hlist_head mask_notifier_list;
|
||||
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
struct kvm_hv hyperv;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_XEN
|
||||
struct kvm_xen xen;
|
||||
#endif
|
||||
|
||||
bool backwards_tsc_observed;
|
||||
bool boot_vcpu_runs_old_kvmclock;
|
||||
@ -1406,9 +1433,8 @@ struct kvm_arch {
|
||||
* the MMU lock in read mode + RCU or
|
||||
* the MMU lock in write mode
|
||||
*
|
||||
* For writes, this list is protected by:
|
||||
* the MMU lock in read mode + the tdp_mmu_pages_lock or
|
||||
* the MMU lock in write mode
|
||||
* For writes, this list is protected by tdp_mmu_pages_lock; see
|
||||
* below for the details.
|
||||
*
|
||||
* Roots will remain in the list until their tdp_mmu_root_count
|
||||
* drops to zero, at which point the thread that decremented the
|
||||
@ -1425,8 +1451,10 @@ struct kvm_arch {
|
||||
* - possible_nx_huge_pages;
|
||||
* - the possible_nx_huge_page_link field of kvm_mmu_page structs used
|
||||
* by the TDP MMU
|
||||
* It is acceptable, but not necessary, to acquire this lock when
|
||||
* the thread holds the MMU lock in write mode.
|
||||
* Because the lock is only taken within the MMU lock, strictly
|
||||
* speaking it is redundant to acquire this lock when the thread
|
||||
* holds the MMU lock in write mode. However it often simplifies
|
||||
* the code to do so.
|
||||
*/
|
||||
spinlock_t tdp_mmu_pages_lock;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
@ -1441,6 +1469,7 @@ struct kvm_arch {
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
hpa_t hv_root_tdp;
|
||||
spinlock_t hv_root_tdp_lock;
|
||||
struct hv_partition_assist_pg *hv_pa_pg;
|
||||
#endif
|
||||
/*
|
||||
* VM-scope maximum vCPU ID. Used to determine the size of structures
|
||||
@ -1613,9 +1642,11 @@ struct kvm_x86_ops {
|
||||
|
||||
void (*flush_tlb_all)(struct kvm_vcpu *vcpu);
|
||||
void (*flush_tlb_current)(struct kvm_vcpu *vcpu);
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
int (*flush_remote_tlbs)(struct kvm *kvm);
|
||||
int (*flush_remote_tlbs_range)(struct kvm *kvm, gfn_t gfn,
|
||||
gfn_t nr_pages);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Flush any TLB entries associated with the given GVA.
|
||||
@ -1761,6 +1792,8 @@ struct kvm_x86_ops {
|
||||
* Returns vCPU specific APICv inhibit reasons
|
||||
*/
|
||||
unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
|
||||
|
||||
gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
|
||||
};
|
||||
|
||||
struct kvm_x86_nested_ops {
|
||||
@ -1824,6 +1857,7 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
|
||||
#define __KVM_HAVE_ARCH_VM_FREE
|
||||
void kvm_arch_free_vm(struct kvm *kvm);
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
|
||||
static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
@ -1835,6 +1869,15 @@ static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
|
||||
}
|
||||
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
|
||||
static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn,
|
||||
u64 nr_pages)
|
||||
{
|
||||
if (!kvm_x86_ops.flush_remote_tlbs_range)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
|
||||
}
|
||||
#endif /* CONFIG_HYPERV */
|
||||
|
||||
#define kvm_arch_pmi_in_guest(vcpu) \
|
||||
((vcpu) && (vcpu)->arch.handling_intr_from_guest)
|
||||
@ -1848,6 +1891,9 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_init_vm(struct kvm *kvm);
|
||||
void kvm_mmu_uninit_vm(struct kvm *kvm);
|
||||
|
||||
void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot);
|
||||
|
||||
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
@ -2086,6 +2132,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
|
||||
void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
|
||||
int tdp_max_root_level, int tdp_huge_page_level);
|
||||
|
||||
#ifdef CONFIG_KVM_PRIVATE_MEM
|
||||
#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM)
|
||||
#else
|
||||
#define kvm_arch_has_private_mem(kvm) false
|
||||
#endif
|
||||
|
||||
static inline u16 kvm_read_ldt(void)
|
||||
{
|
||||
u16 ldt;
|
||||
@ -2133,16 +2185,15 @@ enum {
|
||||
#define HF_SMM_MASK (1 << 1)
|
||||
#define HF_SMM_INSIDE_NMI_MASK (1 << 2)
|
||||
|
||||
# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
|
||||
# define KVM_ADDRESS_SPACE_NUM 2
|
||||
# define KVM_MAX_NR_ADDRESS_SPACES 2
|
||||
/* SMM is currently unsupported for guests with private memory. */
|
||||
# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2)
|
||||
# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
|
||||
# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
|
||||
#else
|
||||
# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
|
||||
#endif
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_has_extint(struct kvm_vcpu *v);
|
||||
|
@ -562,4 +562,7 @@ struct kvm_pmu_event_filter {
|
||||
/* x86-specific KVM_EXIT_HYPERCALL flags. */
|
||||
#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
|
||||
|
||||
#define KVM_X86_DEFAULT_VM 0
|
||||
#define KVM_X86_SW_PROTECTED_VM 1
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
@ -24,8 +24,8 @@
|
||||
|
||||
static int kvmclock __initdata = 1;
|
||||
static int kvmclock_vsyscall __initdata = 1;
|
||||
static int msr_kvm_system_time __ro_after_init = MSR_KVM_SYSTEM_TIME;
|
||||
static int msr_kvm_wall_clock __ro_after_init = MSR_KVM_WALL_CLOCK;
|
||||
static int msr_kvm_system_time __ro_after_init;
|
||||
static int msr_kvm_wall_clock __ro_after_init;
|
||||
static u64 kvm_sched_clock_offset __ro_after_init;
|
||||
|
||||
static int __init parse_no_kvmclock(char *arg)
|
||||
@ -195,7 +195,8 @@ static void kvm_setup_secondary_clock(void)
|
||||
|
||||
void kvmclock_disable(void)
|
||||
{
|
||||
native_write_msr(msr_kvm_system_time, 0, 0);
|
||||
if (msr_kvm_system_time)
|
||||
native_write_msr(msr_kvm_system_time, 0, 0);
|
||||
}
|
||||
|
||||
static void __init kvmclock_init_mem(void)
|
||||
@ -294,7 +295,10 @@ void __init kvmclock_init(void)
|
||||
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) {
|
||||
msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
|
||||
msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
|
||||
} else if (!kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
|
||||
} else if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
|
||||
msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
|
||||
msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -23,17 +23,15 @@ config KVM
|
||||
depends on HAVE_KVM
|
||||
depends on HIGH_RES_TIMERS
|
||||
depends on X86_LOCAL_APIC
|
||||
select PREEMPT_NOTIFIERS
|
||||
select MMU_NOTIFIER
|
||||
select KVM_COMMON
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_PFNCACHE
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_DIRTY_RING_TSO
|
||||
select HAVE_KVM_DIRTY_RING_ACQ_REL
|
||||
select IRQ_BYPASS_MANAGER
|
||||
select HAVE_KVM_IRQ_BYPASS
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
select HAVE_KVM_EVENTFD
|
||||
select KVM_ASYNC_PF
|
||||
select USER_RETURN_NOTIFIER
|
||||
select KVM_MMIO
|
||||
@ -46,7 +44,6 @@ config KVM
|
||||
select KVM_XFER_TO_GUEST_WORK
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_VFIO
|
||||
select INTERVAL_TREE
|
||||
select HAVE_KVM_PM_NOTIFIER if PM
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
help
|
||||
@ -65,18 +62,30 @@ config KVM
|
||||
|
||||
config KVM_WERROR
|
||||
bool "Compile KVM with -Werror"
|
||||
# KASAN may cause the build to fail due to larger frames
|
||||
default y if X86_64 && !KASAN
|
||||
# We use the dependency on !COMPILE_TEST to not be enabled
|
||||
# blindly in allmodconfig or allyesconfig configurations
|
||||
depends on KVM
|
||||
depends on (X86_64 && !KASAN) || !COMPILE_TEST
|
||||
depends on EXPERT
|
||||
# Disallow KVM's -Werror if KASAN is enabled, e.g. to guard against
|
||||
# randomized configs from selecting KVM_WERROR=y, which doesn't play
|
||||
# nice with KASAN. KASAN builds generates warnings for the default
|
||||
# FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning.
|
||||
# Building KVM with -Werror and KASAN is still doable via enabling
|
||||
# the kernel-wide WERROR=y.
|
||||
depends on KVM && EXPERT && !KASAN
|
||||
help
|
||||
Add -Werror to the build flags for KVM.
|
||||
|
||||
If in doubt, say "N".
|
||||
|
||||
config KVM_SW_PROTECTED_VM
|
||||
bool "Enable support for KVM software-protected VMs"
|
||||
depends on EXPERT
|
||||
depends on KVM && X86_64
|
||||
select KVM_GENERIC_PRIVATE_MEM
|
||||
help
|
||||
Enable support for KVM software-protected VMs. Currently "protected"
|
||||
means the VM can be backed with memory provided by
|
||||
KVM_CREATE_GUEST_MEMFD.
|
||||
|
||||
If unsure, say "N".
|
||||
|
||||
config KVM_INTEL
|
||||
tristate "KVM for Intel (and compatible) processors support"
|
||||
depends on KVM && IA32_FEAT_CTL
|
||||
@ -129,6 +138,20 @@ config KVM_SMM
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config KVM_HYPERV
|
||||
bool "Support for Microsoft Hyper-V emulation"
|
||||
depends on KVM
|
||||
default y
|
||||
help
|
||||
Provides KVM support for emulating Microsoft Hyper-V. This allows KVM
|
||||
to expose a subset of the paravirtualized interfaces defined in the
|
||||
Hyper-V Hypervisor Top-Level Functional Specification (TLFS):
|
||||
https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
|
||||
These interfaces are required for the correct and performant functioning
|
||||
of Windows and Hyper-V guests on KVM.
|
||||
|
||||
If unsure, say "Y".
|
||||
|
||||
config KVM_XEN
|
||||
bool "Support for Xen hypercall interface"
|
||||
depends on KVM
|
||||
|
@ -11,25 +11,27 @@ include $(srctree)/virt/kvm/Makefile.kvm
|
||||
|
||||
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
|
||||
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
|
||||
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
|
||||
debugfs.o mmu/mmu.o mmu/page_track.o \
|
||||
mmu/spte.o
|
||||
|
||||
ifdef CONFIG_HYPERV
|
||||
kvm-y += kvm_onhyperv.o
|
||||
endif
|
||||
|
||||
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
|
||||
kvm-$(CONFIG_KVM_HYPERV) += hyperv.o
|
||||
kvm-$(CONFIG_KVM_XEN) += xen.o
|
||||
kvm-$(CONFIG_KVM_SMM) += smm.o
|
||||
|
||||
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
|
||||
vmx/hyperv.o vmx/nested.o vmx/posted_intr.o
|
||||
vmx/nested.o vmx/posted_intr.o
|
||||
|
||||
kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o
|
||||
kvm-intel-$(CONFIG_KVM_HYPERV) += vmx/hyperv.o vmx/hyperv_evmcs.o
|
||||
|
||||
kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \
|
||||
svm/sev.o svm/hyperv.o
|
||||
svm/sev.o
|
||||
kvm-amd-$(CONFIG_KVM_HYPERV) += svm/hyperv.o
|
||||
|
||||
ifdef CONFIG_HYPERV
|
||||
kvm-y += kvm_onhyperv.o
|
||||
kvm-intel-y += vmx/vmx_onhyperv.o vmx/hyperv_evmcs.o
|
||||
kvm-amd-y += svm/svm_onhyperv.o
|
||||
endif
|
||||
|
||||
|
@ -314,11 +314,15 @@ EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
|
||||
|
||||
static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
|
||||
{
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
|
||||
entry = cpuid_entry2_find(entries, nent, HYPERV_CPUID_INTERFACE,
|
||||
KVM_CPUID_INDEX_NOT_SIGNIFICANT);
|
||||
return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
@ -433,11 +437,13 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
if (kvm_cpuid_has_hyperv(e2, nent)) {
|
||||
r = kvm_hv_vcpu_init(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
r = kvm_check_cpuid(vcpu, e2, nent);
|
||||
if (r)
|
||||
@ -469,7 +475,7 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
|
||||
return -E2BIG;
|
||||
|
||||
if (cpuid->nent) {
|
||||
e = vmemdup_user(entries, array_size(sizeof(*e), cpuid->nent));
|
||||
e = vmemdup_array_user(entries, cpuid->nent, sizeof(*e));
|
||||
if (IS_ERR(e))
|
||||
return PTR_ERR(e);
|
||||
|
||||
@ -513,7 +519,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
|
||||
return -E2BIG;
|
||||
|
||||
if (cpuid->nent) {
|
||||
e2 = vmemdup_user(entries, array_size(sizeof(*e2), cpuid->nent));
|
||||
e2 = vmemdup_array_user(entries, cpuid->nent, sizeof(*e2));
|
||||
if (IS_ERR(e2))
|
||||
return PTR_ERR(e2);
|
||||
}
|
||||
@ -671,7 +677,7 @@ void kvm_set_cpu_caps(void)
|
||||
kvm_cpu_cap_mask(CPUID_7_1_EAX,
|
||||
F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
|
||||
F(FZRM) | F(FSRS) | F(FSRC) |
|
||||
F(AMX_FP16) | F(AVX_IFMA)
|
||||
F(AMX_FP16) | F(AVX_IFMA) | F(LAM)
|
||||
);
|
||||
|
||||
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
|
||||
@ -679,6 +685,11 @@ void kvm_set_cpu_caps(void)
|
||||
F(AMX_COMPLEX)
|
||||
);
|
||||
|
||||
kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
|
||||
F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) |
|
||||
F(BHI_CTRL) | F(MCDT_NO)
|
||||
);
|
||||
|
||||
kvm_cpu_cap_mask(CPUID_D_1_EAX,
|
||||
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
|
||||
);
|
||||
@ -960,13 +971,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
break;
|
||||
/* function 7 has additional index. */
|
||||
case 7:
|
||||
entry->eax = min(entry->eax, 1u);
|
||||
max_idx = entry->eax = min(entry->eax, 2u);
|
||||
cpuid_entry_override(entry, CPUID_7_0_EBX);
|
||||
cpuid_entry_override(entry, CPUID_7_ECX);
|
||||
cpuid_entry_override(entry, CPUID_7_EDX);
|
||||
|
||||
/* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */
|
||||
if (entry->eax == 1) {
|
||||
/* KVM only supports up to 0x7.2, capped above via min(). */
|
||||
if (max_idx >= 1) {
|
||||
entry = do_host_cpuid(array, function, 1);
|
||||
if (!entry)
|
||||
goto out;
|
||||
@ -976,6 +987,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
entry->ebx = 0;
|
||||
entry->ecx = 0;
|
||||
}
|
||||
if (max_idx >= 2) {
|
||||
entry = do_host_cpuid(array, function, 2);
|
||||
if (!entry)
|
||||
goto out;
|
||||
|
||||
cpuid_entry_override(entry, CPUID_7_2_EDX);
|
||||
entry->ecx = 0;
|
||||
entry->ebx = 0;
|
||||
entry->eax = 0;
|
||||
}
|
||||
break;
|
||||
case 0xa: { /* Architectural Performance Monitoring */
|
||||
union cpuid10_eax eax;
|
||||
|
@ -47,11 +47,6 @@ static inline bool kvm_vcpu_is_legal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
|
||||
return !(gpa & vcpu->arch.reserved_gpa_bits);
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
|
||||
{
|
||||
return !kvm_vcpu_is_legal_gpa(vcpu, gpa);
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_is_legal_aligned_gpa(struct kvm_vcpu *vcpu,
|
||||
gpa_t gpa, gpa_t alignment)
|
||||
{
|
||||
@ -279,4 +274,12 @@ static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.governed_features.enabled);
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
{
|
||||
if (guest_can_use(vcpu, X86_FEATURE_LAM))
|
||||
cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
|
||||
|
||||
return kvm_vcpu_is_legal_gpa(vcpu, cr3);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -111,7 +111,7 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
int bkt;
|
||||
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
|
@ -687,8 +687,8 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
|
||||
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
|
||||
struct segmented_address addr,
|
||||
unsigned *max_size, unsigned size,
|
||||
bool write, bool fetch,
|
||||
enum x86emul_mode mode, ulong *linear)
|
||||
enum x86emul_mode mode, ulong *linear,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct desc_struct desc;
|
||||
bool usable;
|
||||
@ -701,7 +701,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
|
||||
*max_size = 0;
|
||||
switch (mode) {
|
||||
case X86EMUL_MODE_PROT64:
|
||||
*linear = la;
|
||||
*linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
|
||||
va_bits = ctxt_virt_addr_bits(ctxt);
|
||||
if (!__is_canonical_address(la, va_bits))
|
||||
goto bad;
|
||||
@ -717,11 +717,11 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
|
||||
if (!usable)
|
||||
goto bad;
|
||||
/* code segment in protected mode or read-only data segment */
|
||||
if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
|
||||
|| !(desc.type & 2)) && write)
|
||||
if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
|
||||
(flags & X86EMUL_F_WRITE))
|
||||
goto bad;
|
||||
/* unreadable code segment */
|
||||
if (!fetch && (desc.type & 8) && !(desc.type & 2))
|
||||
if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
|
||||
goto bad;
|
||||
lim = desc_limit_scaled(&desc);
|
||||
if (!(desc.type & 8) && (desc.type & 4)) {
|
||||
@ -757,8 +757,8 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
|
||||
ulong *linear)
|
||||
{
|
||||
unsigned max_size;
|
||||
return __linearize(ctxt, addr, &max_size, size, write, false,
|
||||
ctxt->mode, linear);
|
||||
return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
|
||||
write ? X86EMUL_F_WRITE : 0);
|
||||
}
|
||||
|
||||
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
|
||||
@ -771,7 +771,8 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
|
||||
|
||||
if (ctxt->op_bytes != sizeof(unsigned long))
|
||||
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
|
||||
rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
|
||||
rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
|
||||
X86EMUL_F_FETCH);
|
||||
if (rc == X86EMUL_CONTINUE)
|
||||
ctxt->_eip = addr.ea;
|
||||
return rc;
|
||||
@ -907,8 +908,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
|
||||
* boundary check itself. Instead, we use max_size to check
|
||||
* against op_size.
|
||||
*/
|
||||
rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
|
||||
&linear);
|
||||
rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
|
||||
X86EMUL_F_FETCH);
|
||||
if (unlikely(rc != X86EMUL_CONTINUE))
|
||||
return rc;
|
||||
|
||||
@ -3439,8 +3440,10 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
int rc;
|
||||
ulong linear;
|
||||
unsigned int max_size;
|
||||
|
||||
rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
|
||||
rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
|
||||
&linear, X86EMUL_F_INVLPG);
|
||||
if (rc == X86EMUL_CONTINUE)
|
||||
ctxt->ops->invlpg(ctxt, linear);
|
||||
/* Disable writeback. */
|
||||
|
@ -16,6 +16,7 @@ KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
|
||||
KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
|
||||
KVM_GOVERNED_X86_FEATURE(VGIF)
|
||||
KVM_GOVERNED_X86_FEATURE(VNMI)
|
||||
KVM_GOVERNED_X86_FEATURE(LAM)
|
||||
|
||||
#undef KVM_GOVERNED_X86_FEATURE
|
||||
#undef KVM_GOVERNED_FEATURE
|
||||
|
@ -24,6 +24,8 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include "x86.h"
|
||||
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
|
||||
/* "Hv#1" signature */
|
||||
#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
|
||||
|
||||
@ -105,6 +107,17 @@ int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
|
||||
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
|
||||
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
|
||||
|
||||
static inline bool kvm_hv_synic_has_vector(struct kvm_vcpu *vcpu, int vector)
|
||||
{
|
||||
return to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->vec_bitmap);
|
||||
}
|
||||
|
||||
static inline bool kvm_hv_synic_auto_eoi_set(struct kvm_vcpu *vcpu, int vector)
|
||||
{
|
||||
return to_hv_vcpu(vcpu) &&
|
||||
test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap);
|
||||
}
|
||||
|
||||
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
|
||||
|
||||
bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
|
||||
@ -236,6 +249,76 @@ static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu)
|
||||
return kvm_hv_get_assist_page(vcpu);
|
||||
}
|
||||
|
||||
int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
|
||||
static inline void kvm_hv_nested_transtion_tlb_flush(struct kvm_vcpu *vcpu,
|
||||
bool tdp_enabled)
|
||||
{
|
||||
/*
|
||||
* KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or
|
||||
* L2's VP_ID upon request from the guest. Make sure we check for
|
||||
* pending entries in the right FIFO upon L1/L2 transition as these
|
||||
* requests are put by other vCPUs asynchronously.
|
||||
*/
|
||||
if (to_hv_vcpu(vcpu) && tdp_enabled)
|
||||
kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
|
||||
}
|
||||
|
||||
#endif
|
||||
int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
|
||||
#else /* CONFIG_KVM_HYPERV */
|
||||
static inline void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
||||
struct pvclock_vcpu_time_info *hv_clock) {}
|
||||
static inline void kvm_hv_request_tsc_page_update(struct kvm *kvm) {}
|
||||
static inline void kvm_hv_init_vm(struct kvm *kvm) {}
|
||||
static inline void kvm_hv_destroy_vm(struct kvm *kvm) {}
|
||||
static inline int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) {}
|
||||
static inline bool kvm_hv_hypercall_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return HV_STATUS_ACCESS_DENIED;
|
||||
}
|
||||
static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_hv_free_pa_page(struct kvm *kvm) {}
|
||||
static inline bool kvm_hv_synic_has_vector(struct kvm_vcpu *vcpu, int vector)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool kvm_hv_synic_auto_eoi_set(struct kvm_vcpu *vcpu, int vector)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) {}
|
||||
static inline bool kvm_hv_invtsc_suppressed(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled) {}
|
||||
static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline u32 kvm_hv_get_vpindex(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->vcpu_idx;
|
||||
}
|
||||
static inline void kvm_hv_nested_transtion_tlb_flush(struct kvm_vcpu *vcpu, bool tdp_enabled) {}
|
||||
#endif /* CONFIG_KVM_HYPERV */
|
||||
|
||||
#endif /* __ARCH_X86_KVM_HYPERV_H__ */
|
||||
|
@ -118,8 +118,10 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
|
||||
if (!lapic_in_kernel(v))
|
||||
return v->arch.interrupt.nr;
|
||||
|
||||
#ifdef CONFIG_KVM_XEN
|
||||
if (kvm_xen_has_interrupt(v))
|
||||
return v->kvm->arch.xen.upcall_vector;
|
||||
#endif
|
||||
|
||||
if (irqchip_split(v->kvm)) {
|
||||
int vector = v->arch.pending_external_vector;
|
||||
|
@ -144,7 +144,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level,
|
||||
bool line_status)
|
||||
@ -154,6 +154,7 @@ static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
|
||||
|
||||
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
|
||||
}
|
||||
#endif
|
||||
|
||||
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level,
|
||||
@ -163,9 +164,11 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
||||
int r;
|
||||
|
||||
switch (e->type) {
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
case KVM_IRQ_ROUTING_HV_SINT:
|
||||
return kvm_hv_set_sint(e, kvm, irq_source_id, level,
|
||||
line_status);
|
||||
#endif
|
||||
|
||||
case KVM_IRQ_ROUTING_MSI:
|
||||
if (kvm_msi_route_invalid(kvm, e))
|
||||
@ -314,11 +317,13 @@ int kvm_set_routing_entry(struct kvm *kvm,
|
||||
if (kvm_msi_route_invalid(kvm, e))
|
||||
return -EINVAL;
|
||||
break;
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
case KVM_IRQ_ROUTING_HV_SINT:
|
||||
e->set = kvm_hv_set_sint;
|
||||
e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
|
||||
e->hv_sint.sint = ue->u.hv_sint.sint;
|
||||
break;
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_XEN
|
||||
case KVM_IRQ_ROUTING_XEN_EVTCHN:
|
||||
return kvm_xen_setup_evtchn(kvm, e, ue);
|
||||
@ -438,5 +443,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
|
||||
|
||||
void kvm_arch_irq_routing_update(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
kvm_hv_irq_routing_update(kvm);
|
||||
#endif
|
||||
}
|
||||
|
@ -88,6 +88,12 @@ struct x86_instruction_info {
|
||||
#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
|
||||
#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
|
||||
|
||||
/* x86-specific emulation flags */
|
||||
#define X86EMUL_F_WRITE BIT(0)
|
||||
#define X86EMUL_F_FETCH BIT(1)
|
||||
#define X86EMUL_F_IMPLICIT BIT(2)
|
||||
#define X86EMUL_F_INVLPG BIT(3)
|
||||
|
||||
struct x86_emulate_ops {
|
||||
void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
|
||||
/*
|
||||
@ -224,6 +230,9 @@ struct x86_emulate_ops {
|
||||
int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
|
||||
void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
|
||||
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
|
||||
|
||||
gva_t (*get_untagged_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
|
||||
unsigned int flags);
|
||||
};
|
||||
|
||||
/* Type, address-of, and value of an instruction's operand. */
|
||||
|
@ -10,6 +10,26 @@
|
||||
int hv_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, gfn_t nr_pages);
|
||||
int hv_flush_remote_tlbs(struct kvm *kvm);
|
||||
void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp);
|
||||
static inline hpa_t hv_get_partition_assist_page(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Partition assist page is something which Hyper-V running in L0
|
||||
* requires from KVM running in L1 before direct TLB flush for L2
|
||||
* guests can be enabled. KVM doesn't currently use the page but to
|
||||
* comply with TLFS it still needs to be allocated. For now, this
|
||||
* is a single page shared among all vCPUs.
|
||||
*/
|
||||
struct hv_partition_assist_pg **p_hv_pa_pg =
|
||||
&vcpu->kvm->arch.hv_pa_pg;
|
||||
|
||||
if (!*p_hv_pa_pg)
|
||||
*p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
|
||||
|
||||
if (!*p_hv_pa_pg)
|
||||
return INVALID_PAGE;
|
||||
|
||||
return __pa(*p_hv_pa_pg);
|
||||
}
|
||||
#else /* !CONFIG_HYPERV */
|
||||
static inline int hv_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
|
@ -1475,8 +1475,7 @@ static int apic_set_eoi(struct kvm_lapic *apic)
|
||||
apic_clear_isr(vector, apic);
|
||||
apic_update_ppr(apic);
|
||||
|
||||
if (to_hv_vcpu(apic->vcpu) &&
|
||||
test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
|
||||
if (kvm_hv_synic_has_vector(apic->vcpu, vector))
|
||||
kvm_hv_synic_send_eoi(apic->vcpu, vector);
|
||||
|
||||
kvm_ioapic_send_eoi(apic, vector);
|
||||
@ -2905,7 +2904,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
|
||||
apic_clear_irr(vector, apic);
|
||||
if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
|
||||
if (kvm_hv_synic_auto_eoi_set(vcpu, vector)) {
|
||||
/*
|
||||
* For auto-EOI interrupts, there might be another pending
|
||||
* interrupt above PPR, so check whether to raise another
|
||||
|
@ -146,6 +146,14 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
|
||||
return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
|
||||
}
|
||||
|
||||
static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!guest_can_use(vcpu, X86_FEATURE_LAM))
|
||||
return 0;
|
||||
|
||||
return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
|
||||
}
|
||||
|
||||
static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 root_hpa = vcpu->arch.mmu->root.hpa;
|
||||
|
@ -271,15 +271,11 @@ static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu,
|
||||
|
||||
static inline bool kvm_available_flush_remote_tlbs_range(void)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
return kvm_x86_ops.flush_remote_tlbs_range;
|
||||
}
|
||||
|
||||
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
|
||||
{
|
||||
if (!kvm_x86_ops.flush_remote_tlbs_range)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
|
||||
@ -795,16 +791,26 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
|
||||
return &slot->arch.lpage_info[level - 2][idx];
|
||||
}
|
||||
|
||||
/*
|
||||
* The most significant bit in disallow_lpage tracks whether or not memory
|
||||
* attributes are mixed, i.e. not identical for all gfns at the current level.
|
||||
* The lower order bits are used to refcount other cases where a hugepage is
|
||||
* disallowed, e.g. if KVM has shadow a page table at the gfn.
|
||||
*/
|
||||
#define KVM_LPAGE_MIXED_FLAG BIT(31)
|
||||
|
||||
static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, int count)
|
||||
{
|
||||
struct kvm_lpage_info *linfo;
|
||||
int i;
|
||||
int old, i;
|
||||
|
||||
for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
|
||||
linfo = lpage_info_slot(gfn, slot, i);
|
||||
|
||||
old = linfo->disallow_lpage;
|
||||
linfo->disallow_lpage += count;
|
||||
WARN_ON_ONCE(linfo->disallow_lpage < 0);
|
||||
WARN_ON_ONCE((old ^ linfo->disallow_lpage) & KVM_LPAGE_MIXED_FLAG);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1382,7 +1388,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
gfn_t end = slot->base_gfn + gfn_offset + __fls(mask);
|
||||
|
||||
if (READ_ONCE(eager_page_split))
|
||||
kvm_mmu_try_split_huge_pages(kvm, slot, start, end, PG_LEVEL_4K);
|
||||
kvm_mmu_try_split_huge_pages(kvm, slot, start, end + 1, PG_LEVEL_4K);
|
||||
|
||||
kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M);
|
||||
|
||||
@ -2840,9 +2846,9 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
|
||||
/*
|
||||
* Recheck after taking the spinlock, a different vCPU
|
||||
* may have since marked the page unsync. A false
|
||||
* positive on the unprotected check above is not
|
||||
* negative on the unprotected check above is not
|
||||
* possible as clearing sp->unsync _must_ hold mmu_lock
|
||||
* for write, i.e. unsync cannot transition from 0->1
|
||||
* for write, i.e. unsync cannot transition from 1->0
|
||||
* while this CPU holds mmu_lock for read (or write).
|
||||
*/
|
||||
if (READ_ONCE(sp->unsync))
|
||||
@ -3056,7 +3062,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
|
||||
*
|
||||
* There are several ways to safely use this helper:
|
||||
*
|
||||
* - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before
|
||||
* - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before
|
||||
* consuming it. In this case, mmu_lock doesn't need to be held during the
|
||||
* lookup, but it does need to be held while checking the MMU notifier.
|
||||
*
|
||||
@ -3137,9 +3143,9 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
|
||||
return level;
|
||||
}
|
||||
|
||||
int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int max_level)
|
||||
static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, int max_level, bool is_private)
|
||||
{
|
||||
struct kvm_lpage_info *linfo;
|
||||
int host_level;
|
||||
@ -3151,6 +3157,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_private)
|
||||
return max_level;
|
||||
|
||||
if (max_level == PG_LEVEL_4K)
|
||||
return PG_LEVEL_4K;
|
||||
|
||||
@ -3158,6 +3167,16 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
return min(host_level, max_level);
|
||||
}
|
||||
|
||||
int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int max_level)
|
||||
{
|
||||
bool is_private = kvm_slot_can_be_private(slot) &&
|
||||
kvm_mem_is_private(kvm, gfn);
|
||||
|
||||
return __kvm_mmu_max_mapping_level(kvm, slot, gfn, max_level, is_private);
|
||||
}
|
||||
|
||||
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
struct kvm_memory_slot *slot = fault->slot;
|
||||
@ -3178,8 +3197,9 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
* Enforce the iTLB multihit workaround after capturing the requested
|
||||
* level, which will be used to do precise, accurate accounting.
|
||||
*/
|
||||
fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
|
||||
fault->gfn, fault->max_level);
|
||||
fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot,
|
||||
fault->gfn, fault->max_level,
|
||||
fault->is_private);
|
||||
if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
|
||||
return;
|
||||
|
||||
@ -3556,7 +3576,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
|
||||
return;
|
||||
|
||||
if (is_tdp_mmu_page(sp))
|
||||
kvm_tdp_mmu_put_root(kvm, sp, false);
|
||||
kvm_tdp_mmu_put_root(kvm, sp);
|
||||
else if (!--sp->root_count && sp->role.invalid)
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
|
||||
|
||||
@ -3739,7 +3759,7 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
|
||||
kvm_page_track_write_tracking_enabled(kvm))
|
||||
goto out_success;
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
kvm_for_each_memslot(slot, bkt, slots) {
|
||||
/*
|
||||
@ -3782,7 +3802,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
hpa_t root;
|
||||
|
||||
root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
|
||||
root_gfn = root_pgd >> PAGE_SHIFT;
|
||||
root_gfn = (root_pgd & __PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
|
||||
|
||||
if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
|
||||
mmu->root.hpa = kvm_mmu_get_dummy_root();
|
||||
@ -4259,6 +4279,55 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
|
||||
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
|
||||
}
|
||||
|
||||
static inline u8 kvm_max_level_for_order(int order)
|
||||
{
|
||||
BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
|
||||
|
||||
KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) &&
|
||||
order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) &&
|
||||
order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K));
|
||||
|
||||
if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
|
||||
return PG_LEVEL_1G;
|
||||
|
||||
if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
|
||||
return PG_LEVEL_2M;
|
||||
|
||||
return PG_LEVEL_4K;
|
||||
}
|
||||
|
||||
static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT,
|
||||
PAGE_SIZE, fault->write, fault->exec,
|
||||
fault->is_private);
|
||||
}
|
||||
|
||||
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
int max_order, r;
|
||||
|
||||
if (!kvm_slot_can_be_private(fault->slot)) {
|
||||
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn,
|
||||
&max_order);
|
||||
if (r) {
|
||||
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
|
||||
return r;
|
||||
}
|
||||
|
||||
fault->max_level = min(kvm_max_level_for_order(max_order),
|
||||
fault->max_level);
|
||||
fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY);
|
||||
|
||||
return RET_PF_CONTINUE;
|
||||
}
|
||||
|
||||
static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
struct kvm_memory_slot *slot = fault->slot;
|
||||
@ -4291,6 +4360,14 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
return RET_PF_EMULATE;
|
||||
}
|
||||
|
||||
if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
|
||||
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (fault->is_private)
|
||||
return kvm_faultin_pfn_private(vcpu, fault);
|
||||
|
||||
async = false;
|
||||
fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
|
||||
fault->write, &fault->map_writable,
|
||||
@ -4366,7 +4443,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
|
||||
return true;
|
||||
|
||||
return fault->slot &&
|
||||
mmu_invalidate_retry_hva(vcpu->kvm, fault->mmu_seq, fault->hva);
|
||||
mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
|
||||
}
|
||||
|
||||
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
@ -6228,7 +6305,7 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e
|
||||
if (!kvm_memslots_have_rmaps(kvm))
|
||||
return flush;
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
|
||||
kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
|
||||
@ -6260,7 +6337,9 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
kvm_mmu_invalidate_begin(kvm, 0, -1ul);
|
||||
kvm_mmu_invalidate_begin(kvm);
|
||||
|
||||
kvm_mmu_invalidate_range_add(kvm, gfn_start, gfn_end);
|
||||
|
||||
flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
|
||||
|
||||
@ -6270,7 +6349,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);
|
||||
|
||||
kvm_mmu_invalidate_end(kvm, 0, -1ul);
|
||||
kvm_mmu_invalidate_end(kvm);
|
||||
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
@ -6723,7 +6802,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
|
||||
* modifier prior to checking for a wrap of the MMIO generation so
|
||||
* that a wrap in any address space is detected.
|
||||
*/
|
||||
gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1);
|
||||
gen &= ~((u64)kvm_arch_nr_memslot_as_ids(kvm) - 1);
|
||||
|
||||
/*
|
||||
* The very rare case: if the MMIO generation number has wrapped,
|
||||
@ -7176,3 +7255,163 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
|
||||
if (kvm->arch.nx_huge_page_recovery_thread)
|
||||
kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_gfn_range *range)
|
||||
{
|
||||
/*
|
||||
* Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only
|
||||
* supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM
|
||||
* can simply ignore such slots. But if userspace is making memory
|
||||
* PRIVATE, then KVM must prevent the guest from accessing the memory
|
||||
* as shared. And if userspace is making memory SHARED and this point
|
||||
* is reached, then at least one page within the range was previously
|
||||
* PRIVATE, i.e. the slot's possible hugepage ranges are changing.
|
||||
* Zapping SPTEs in this case ensures KVM will reassess whether or not
|
||||
* a hugepage can be used for affected ranges.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm)))
|
||||
return false;
|
||||
|
||||
return kvm_unmap_gfn_range(kvm, range);
|
||||
}
|
||||
|
||||
static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int level)
|
||||
{
|
||||
return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG;
|
||||
}
|
||||
|
||||
static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int level)
|
||||
{
|
||||
lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG;
|
||||
}
|
||||
|
||||
static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
int level)
|
||||
{
|
||||
lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG;
|
||||
}
|
||||
|
||||
static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, int level, unsigned long attrs)
|
||||
{
|
||||
const unsigned long start = gfn;
|
||||
const unsigned long end = start + KVM_PAGES_PER_HPAGE(level);
|
||||
|
||||
if (level == PG_LEVEL_2M)
|
||||
return kvm_range_has_memory_attributes(kvm, start, end, attrs);
|
||||
|
||||
for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) {
|
||||
if (hugepage_test_mixed(slot, gfn, level - 1) ||
|
||||
attrs != kvm_get_memory_attributes(kvm, gfn))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_gfn_range *range)
|
||||
{
|
||||
unsigned long attrs = range->arg.attributes;
|
||||
struct kvm_memory_slot *slot = range->slot;
|
||||
int level;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
/*
|
||||
* Calculate which ranges can be mapped with hugepages even if the slot
|
||||
* can't map memory PRIVATE. KVM mustn't create a SHARED hugepage over
|
||||
* a range that has PRIVATE GFNs, and conversely converting a range to
|
||||
* SHARED may now allow hugepages.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* The sequence matters here: upper levels consume the result of lower
|
||||
* level's scanning.
|
||||
*/
|
||||
for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
|
||||
gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level);
|
||||
gfn_t gfn = gfn_round_for_level(range->start, level);
|
||||
|
||||
/* Process the head page if it straddles the range. */
|
||||
if (gfn != range->start || gfn + nr_pages > range->end) {
|
||||
/*
|
||||
* Skip mixed tracking if the aligned gfn isn't covered
|
||||
* by the memslot, KVM can't use a hugepage due to the
|
||||
* misaligned address regardless of memory attributes.
|
||||
*/
|
||||
if (gfn >= slot->base_gfn) {
|
||||
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
|
||||
hugepage_clear_mixed(slot, gfn, level);
|
||||
else
|
||||
hugepage_set_mixed(slot, gfn, level);
|
||||
}
|
||||
gfn += nr_pages;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pages entirely covered by the range are guaranteed to have
|
||||
* only the attributes which were just set.
|
||||
*/
|
||||
for ( ; gfn + nr_pages <= range->end; gfn += nr_pages)
|
||||
hugepage_clear_mixed(slot, gfn, level);
|
||||
|
||||
/*
|
||||
* Process the last tail page if it straddles the range and is
|
||||
* contained by the memslot. Like the head page, KVM can't
|
||||
* create a hugepage if the slot size is misaligned.
|
||||
*/
|
||||
if (gfn < range->end &&
|
||||
(gfn + nr_pages) <= (slot->base_gfn + slot->npages)) {
|
||||
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
|
||||
hugepage_clear_mixed(slot, gfn, level);
|
||||
else
|
||||
hugepage_set_mixed(slot, gfn, level);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
int level;
|
||||
|
||||
if (!kvm_arch_has_private_mem(kvm))
|
||||
return;
|
||||
|
||||
for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
|
||||
/*
|
||||
* Don't bother tracking mixed attributes for pages that can't
|
||||
* be huge due to alignment, i.e. process only pages that are
|
||||
* entirely contained by the memslot.
|
||||
*/
|
||||
gfn_t end = gfn_round_for_level(slot->base_gfn + slot->npages, level);
|
||||
gfn_t start = gfn_round_for_level(slot->base_gfn, level);
|
||||
gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level);
|
||||
gfn_t gfn;
|
||||
|
||||
if (start < slot->base_gfn)
|
||||
start += nr_pages;
|
||||
|
||||
/*
|
||||
* Unlike setting attributes, every potential hugepage needs to
|
||||
* be manually checked as the attributes may already be mixed.
|
||||
*/
|
||||
for (gfn = start; gfn < end; gfn += nr_pages) {
|
||||
unsigned long attrs = kvm_get_memory_attributes(kvm, gfn);
|
||||
|
||||
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
|
||||
hugepage_clear_mixed(slot, gfn, level);
|
||||
else
|
||||
hugepage_set_mixed(slot, gfn, level);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -13,6 +13,7 @@
|
||||
#endif
|
||||
|
||||
/* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
|
||||
#define __PT_BASE_ADDR_MASK GENMASK_ULL(51, 12)
|
||||
#define __PT_LEVEL_SHIFT(level, bits_per_level) \
|
||||
(PAGE_SHIFT + ((level) - 1) * (bits_per_level))
|
||||
#define __PT_INDEX(address, level, bits_per_level) \
|
||||
@ -201,6 +202,7 @@ struct kvm_page_fault {
|
||||
|
||||
/* Derived from mmu and global state. */
|
||||
const bool is_tdp;
|
||||
const bool is_private;
|
||||
const bool nx_huge_page_workaround_enabled;
|
||||
|
||||
/*
|
||||
@ -296,6 +298,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
.max_level = KVM_MAX_HUGEPAGE_LEVEL,
|
||||
.req_level = PG_LEVEL_4K,
|
||||
.goal_level = PG_LEVEL_4K,
|
||||
.is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
|
||||
};
|
||||
int r;
|
||||
|
||||
|
@ -62,7 +62,7 @@
|
||||
#endif
|
||||
|
||||
/* Common logic, but per-type values. These also need to be undefined. */
|
||||
#define PT_BASE_ADDR_MASK ((pt_element_t)(((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)))
|
||||
#define PT_BASE_ADDR_MASK ((pt_element_t)__PT_BASE_ADDR_MASK)
|
||||
#define PT_LVL_ADDR_MASK(lvl) __PT_LVL_ADDR_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS)
|
||||
#define PT_LVL_OFFSET_MASK(lvl) __PT_LVL_OFFSET_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS)
|
||||
#define PT_INDEX(addr, lvl) __PT_INDEX(addr, lvl, PT_LEVEL_BITS)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user