mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 18:53:30 +00:00
ARM:
* More progress on the protected VM front, now with the full fixed feature set as well as the limitation of some hypercalls after initialisation. * Cleanup of the RAZ/WI sysreg handling, which was pointlessly complicated * Fixes for the vgic placement in the IPA space, together with a bunch of selftests * More memcg accounting of the memory allocated on behalf of a guest * Timer and vgic selftests * Workarounds for the Apple M1 broken vgic implementation * KConfig cleanups * New kvmarm.mode=none option, for those who really dislike us RISC-V: * New KVM port. x86: * New API to control TSC offset from userspace * TSC scaling for nested hypervisors on SVM * Switch masterclock protection from raw_spin_lock to seqcount * Clean up function prototypes in the page fault code and avoid repeated memslot lookups * Convey the exit reason to userspace on emulation failure * Configure time between NX page recovery iterations * Expose Predictive Store Forwarding Disable CPUID leaf * Allocate page tracking data structures lazily (if the i915 KVM-GT functionality is not compiled in) * Cleanups, fixes and optimizations for the shadow MMU code s390: * SIGP Fixes * initial preparations for lazy destroy of secure VMs * storage key improvements/fixes * Log the guest CPNC Starting from this release, KVM-PPC patches will come from Michael Ellerman's PPC tree. -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmGBOiEUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroNowwf/axlx3g9sgCwQHr12/6UF/7hL/RwP 9z+pGiUzjl2YQE+RjSvLqyd6zXh+h4dOdOKbZDLSkSTbcral/8U70ojKnQsXM0XM 1LoymxBTJqkgQBLm9LjYreEbzrPV4irk4ygEmuk3CPOHZu8xX1ei6c5LdandtM/n XVUkXsQY+STkmnGv4P3GcPoDththCr0tBTWrFWtxa0w9hYOxx0ay1AZFlgM4FFX0 QFuRc8VBLoDJpIUjbkhsIRIbrlHc/YDGjuYnAU7lV/CIME8vf2BW6uBwIZJdYcDj 0ejozLjodEnuKXQGnc8sXFioLX2gbMyQJEvwCgRvUu/EU7ncFm1lfs7THQ== =UxKM -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Paolo Bonzini: "ARM: - More progress on the protected VM front, now with the full fixed feature set as well as the limitation of some hypercalls after initialisation. - Cleanup of the RAZ/WI sysreg handling, which was pointlessly complicated - Fixes for the vgic placement in the IPA space, together with a bunch of selftests - More memcg accounting of the memory allocated on behalf of a guest - Timer and vgic selftests - Workarounds for the Apple M1 broken vgic implementation - KConfig cleanups - New kvmarm.mode=none option, for those who really dislike us RISC-V: - New KVM port. x86: - New API to control TSC offset from userspace - TSC scaling for nested hypervisors on SVM - Switch masterclock protection from raw_spin_lock to seqcount - Clean up function prototypes in the page fault code and avoid repeated memslot lookups - Convey the exit reason to userspace on emulation failure - Configure time between NX page recovery iterations - Expose Predictive Store Forwarding Disable CPUID leaf - Allocate page tracking data structures lazily (if the i915 KVM-GT functionality is not compiled in) - Cleanups, fixes and optimizations for the shadow MMU code s390: - SIGP Fixes - initial preparations for lazy destroy of secure VMs - storage key improvements/fixes - Log the guest CPNC Starting from this release, KVM-PPC patches will come from Michael Ellerman's PPC tree" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (227 commits) RISC-V: KVM: fix boolreturn.cocci warnings RISC-V: KVM: remove unneeded semicolon RISC-V: KVM: Fix GPA passed to __kvm_riscv_hfence_gvma_xyz() functions RISC-V: KVM: Factor-out FP virtualization into separate sources KVM: s390: add debug statement for diag 318 CPNC data KVM: s390: pv: properly handle page flags for protected guests KVM: s390: Fix handle_sske page fault handling KVM: x86: SGX must obey the KVM_INTERNAL_ERROR_EMULATION protocol KVM: x86: On emulation failure, convey the exit reason, etc. to userspace KVM: x86: Get exit_reason as part of kvm_x86_ops.get_exit_info KVM: x86: Clarify the kvm_run.emulation_failure structure layout KVM: s390: Add a routine for setting userspace CPU state KVM: s390: Simplify SIGP Set Arch handling KVM: s390: pv: avoid stalls when making pages secure KVM: s390: pv: avoid stalls for kvm_s390_pv_init_vm KVM: s390: pv: avoid double free of sida page KVM: s390: pv: add macros for UVC CC values s390/mm: optimize reset_guest_reference_bit() s390/mm: optimize set_guest_storage_key() s390/mm: no need for pte_alloc_map_lock() if we know the pmd is present ...
This commit is contained in:
commit
d7e0a795bf
@ -2353,7 +2353,14 @@
|
||||
[KVM] Controls how many 4KiB pages are periodically zapped
|
||||
back to huge pages. 0 disables the recovery, otherwise if
|
||||
the value is N KVM will zap 1/Nth of the 4KiB pages every
|
||||
minute. The default is 60.
|
||||
period (see below). The default is 60.
|
||||
|
||||
kvm.nx_huge_pages_recovery_period_ms=
|
||||
[KVM] Controls the time period at which KVM zaps 4KiB pages
|
||||
back to huge pages. If the value is a non-zero N, KVM will
|
||||
zap a portion (see ratio above) of the pages every N msecs.
|
||||
If the value is 0 (the default), KVM will pick a period based
|
||||
on the ratio, such that a page is zapped after 1 hour on average.
|
||||
|
||||
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
|
||||
Default is 1 (enabled)
|
||||
@ -2365,6 +2372,8 @@
|
||||
kvm-arm.mode=
|
||||
[KVM,ARM] Select one of KVM/arm64's modes of operation.
|
||||
|
||||
none: Forcefully disable KVM.
|
||||
|
||||
nvhe: Standard nVHE-based mode, without support for
|
||||
protected guests.
|
||||
|
||||
@ -2372,7 +2381,9 @@
|
||||
state is kept private from the host.
|
||||
Not valid if the kernel is running in EL2.
|
||||
|
||||
Defaults to VHE/nVHE based on hardware support.
|
||||
Defaults to VHE/nVHE based on hardware support. Setting
|
||||
mode to "protected" will disable kexec and hibernation
|
||||
for the host.
|
||||
|
||||
kvm-arm.vgic_v3_group0_trap=
|
||||
[KVM,ARM] Trap guest accesses to GICv3 group-0
|
||||
|
@ -532,7 +532,7 @@ translation mode.
|
||||
------------------
|
||||
|
||||
:Capability: basic
|
||||
:Architectures: x86, ppc, mips
|
||||
:Architectures: x86, ppc, mips, riscv
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: struct kvm_interrupt (in)
|
||||
:Returns: 0 on success, negative on failure.
|
||||
@ -601,6 +601,23 @@ interrupt number dequeues the interrupt.
|
||||
|
||||
This is an asynchronous vcpu ioctl and can be invoked from any thread.
|
||||
|
||||
RISC-V:
|
||||
^^^^^^^
|
||||
|
||||
Queues an external interrupt to be injected into the virutal CPU. This ioctl
|
||||
is overloaded with 2 different irq values:
|
||||
|
||||
a) KVM_INTERRUPT_SET
|
||||
|
||||
This sets external interrupt for a virtual CPU and it will receive
|
||||
once it is ready.
|
||||
|
||||
b) KVM_INTERRUPT_UNSET
|
||||
|
||||
This clears pending external interrupt for a virtual CPU.
|
||||
|
||||
This is an asynchronous vcpu ioctl and can be invoked from any thread.
|
||||
|
||||
|
||||
4.17 KVM_DEBUG_GUEST
|
||||
--------------------
|
||||
@ -993,20 +1010,37 @@ such as migration.
|
||||
When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
|
||||
set of bits that KVM can return in struct kvm_clock_data's flag member.
|
||||
|
||||
The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned
|
||||
value is the exact kvmclock value seen by all VCPUs at the instant
|
||||
when KVM_GET_CLOCK was called. If clear, the returned value is simply
|
||||
CLOCK_MONOTONIC plus a constant offset; the offset can be modified
|
||||
with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock,
|
||||
but the exact value read by each VCPU could differ, because the host
|
||||
TSC is not stable.
|
||||
The following flags are defined:
|
||||
|
||||
KVM_CLOCK_TSC_STABLE
|
||||
If set, the returned value is the exact kvmclock
|
||||
value seen by all VCPUs at the instant when KVM_GET_CLOCK was called.
|
||||
If clear, the returned value is simply CLOCK_MONOTONIC plus a constant
|
||||
offset; the offset can be modified with KVM_SET_CLOCK. KVM will try
|
||||
to make all VCPUs follow this clock, but the exact value read by each
|
||||
VCPU could differ, because the host TSC is not stable.
|
||||
|
||||
KVM_CLOCK_REALTIME
|
||||
If set, the `realtime` field in the kvm_clock_data
|
||||
structure is populated with the value of the host's real time
|
||||
clocksource at the instant when KVM_GET_CLOCK was called. If clear,
|
||||
the `realtime` field does not contain a value.
|
||||
|
||||
KVM_CLOCK_HOST_TSC
|
||||
If set, the `host_tsc` field in the kvm_clock_data
|
||||
structure is populated with the value of the host's timestamp counter (TSC)
|
||||
at the instant when KVM_GET_CLOCK was called. If clear, the `host_tsc` field
|
||||
does not contain a value.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_clock_data {
|
||||
__u64 clock; /* kvmclock current value */
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
__u32 pad0;
|
||||
__u64 realtime;
|
||||
__u64 host_tsc;
|
||||
__u32 pad[4];
|
||||
};
|
||||
|
||||
|
||||
@ -1023,12 +1057,25 @@ Sets the current timestamp of kvmclock to the value specified in its parameter.
|
||||
In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
|
||||
such as migration.
|
||||
|
||||
The following flags can be passed:
|
||||
|
||||
KVM_CLOCK_REALTIME
|
||||
If set, KVM will compare the value of the `realtime` field
|
||||
with the value of the host's real time clocksource at the instant when
|
||||
KVM_SET_CLOCK was called. The difference in elapsed time is added to the final
|
||||
kvmclock value that will be provided to guests.
|
||||
|
||||
Other flags returned by ``KVM_GET_CLOCK`` are accepted but ignored.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_clock_data {
|
||||
__u64 clock; /* kvmclock current value */
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
__u32 pad0;
|
||||
__u64 realtime;
|
||||
__u64 host_tsc;
|
||||
__u32 pad[4];
|
||||
};
|
||||
|
||||
|
||||
@ -1399,7 +1446,7 @@ for vm-wide capabilities.
|
||||
---------------------
|
||||
|
||||
:Capability: KVM_CAP_MP_STATE
|
||||
:Architectures: x86, s390, arm, arm64
|
||||
:Architectures: x86, s390, arm, arm64, riscv
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: struct kvm_mp_state (out)
|
||||
:Returns: 0 on success; -1 on error
|
||||
@ -1416,7 +1463,8 @@ uniprocessor guests).
|
||||
Possible values are:
|
||||
|
||||
========================== ===============================================
|
||||
KVM_MP_STATE_RUNNABLE the vcpu is currently running [x86,arm/arm64]
|
||||
KVM_MP_STATE_RUNNABLE the vcpu is currently running
|
||||
[x86,arm/arm64,riscv]
|
||||
KVM_MP_STATE_UNINITIALIZED the vcpu is an application processor (AP)
|
||||
which has not yet received an INIT signal [x86]
|
||||
KVM_MP_STATE_INIT_RECEIVED the vcpu has received an INIT signal, and is
|
||||
@ -1425,7 +1473,7 @@ Possible values are:
|
||||
is waiting for an interrupt [x86]
|
||||
KVM_MP_STATE_SIPI_RECEIVED the vcpu has just received a SIPI (vector
|
||||
accessible via KVM_GET_VCPU_EVENTS) [x86]
|
||||
KVM_MP_STATE_STOPPED the vcpu is stopped [s390,arm/arm64]
|
||||
KVM_MP_STATE_STOPPED the vcpu is stopped [s390,arm/arm64,riscv]
|
||||
KVM_MP_STATE_CHECK_STOP the vcpu is in a special error state [s390]
|
||||
KVM_MP_STATE_OPERATING the vcpu is operating (running or halted)
|
||||
[s390]
|
||||
@ -1437,8 +1485,8 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
|
||||
in-kernel irqchip, the multiprocessing state must be maintained by userspace on
|
||||
these architectures.
|
||||
|
||||
For arm/arm64:
|
||||
^^^^^^^^^^^^^^
|
||||
For arm/arm64/riscv:
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The only states that are valid are KVM_MP_STATE_STOPPED and
|
||||
KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
|
||||
@ -1447,7 +1495,7 @@ KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
|
||||
---------------------
|
||||
|
||||
:Capability: KVM_CAP_MP_STATE
|
||||
:Architectures: x86, s390, arm, arm64
|
||||
:Architectures: x86, s390, arm, arm64, riscv
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: struct kvm_mp_state (in)
|
||||
:Returns: 0 on success; -1 on error
|
||||
@ -1459,8 +1507,8 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
|
||||
in-kernel irqchip, the multiprocessing state must be maintained by userspace on
|
||||
these architectures.
|
||||
|
||||
For arm/arm64:
|
||||
^^^^^^^^^^^^^^
|
||||
For arm/arm64/riscv:
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The only states that are valid are KVM_MP_STATE_STOPPED and
|
||||
KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
|
||||
@ -2577,6 +2625,144 @@ following id bit patterns::
|
||||
|
||||
0x7020 0000 0003 02 <0:3> <reg:5>
|
||||
|
||||
RISC-V registers are mapped using the lower 32 bits. The upper 8 bits of
|
||||
that is the register group type.
|
||||
|
||||
RISC-V config registers are meant for configuring a Guest VCPU and it has
|
||||
the following id bit patterns::
|
||||
|
||||
0x8020 0000 01 <index into the kvm_riscv_config struct:24> (32bit Host)
|
||||
0x8030 0000 01 <index into the kvm_riscv_config struct:24> (64bit Host)
|
||||
|
||||
Following are the RISC-V config registers:
|
||||
|
||||
======================= ========= =============================================
|
||||
Encoding Register Description
|
||||
======================= ========= =============================================
|
||||
0x80x0 0000 0100 0000 isa ISA feature bitmap of Guest VCPU
|
||||
======================= ========= =============================================
|
||||
|
||||
The isa config register can be read anytime but can only be written before
|
||||
a Guest VCPU runs. It will have ISA feature bits matching underlying host
|
||||
set by default.
|
||||
|
||||
RISC-V core registers represent the general excution state of a Guest VCPU
|
||||
and it has the following id bit patterns::
|
||||
|
||||
0x8020 0000 02 <index into the kvm_riscv_core struct:24> (32bit Host)
|
||||
0x8030 0000 02 <index into the kvm_riscv_core struct:24> (64bit Host)
|
||||
|
||||
Following are the RISC-V core registers:
|
||||
|
||||
======================= ========= =============================================
|
||||
Encoding Register Description
|
||||
======================= ========= =============================================
|
||||
0x80x0 0000 0200 0000 regs.pc Program counter
|
||||
0x80x0 0000 0200 0001 regs.ra Return address
|
||||
0x80x0 0000 0200 0002 regs.sp Stack pointer
|
||||
0x80x0 0000 0200 0003 regs.gp Global pointer
|
||||
0x80x0 0000 0200 0004 regs.tp Task pointer
|
||||
0x80x0 0000 0200 0005 regs.t0 Caller saved register 0
|
||||
0x80x0 0000 0200 0006 regs.t1 Caller saved register 1
|
||||
0x80x0 0000 0200 0007 regs.t2 Caller saved register 2
|
||||
0x80x0 0000 0200 0008 regs.s0 Callee saved register 0
|
||||
0x80x0 0000 0200 0009 regs.s1 Callee saved register 1
|
||||
0x80x0 0000 0200 000a regs.a0 Function argument (or return value) 0
|
||||
0x80x0 0000 0200 000b regs.a1 Function argument (or return value) 1
|
||||
0x80x0 0000 0200 000c regs.a2 Function argument 2
|
||||
0x80x0 0000 0200 000d regs.a3 Function argument 3
|
||||
0x80x0 0000 0200 000e regs.a4 Function argument 4
|
||||
0x80x0 0000 0200 000f regs.a5 Function argument 5
|
||||
0x80x0 0000 0200 0010 regs.a6 Function argument 6
|
||||
0x80x0 0000 0200 0011 regs.a7 Function argument 7
|
||||
0x80x0 0000 0200 0012 regs.s2 Callee saved register 2
|
||||
0x80x0 0000 0200 0013 regs.s3 Callee saved register 3
|
||||
0x80x0 0000 0200 0014 regs.s4 Callee saved register 4
|
||||
0x80x0 0000 0200 0015 regs.s5 Callee saved register 5
|
||||
0x80x0 0000 0200 0016 regs.s6 Callee saved register 6
|
||||
0x80x0 0000 0200 0017 regs.s7 Callee saved register 7
|
||||
0x80x0 0000 0200 0018 regs.s8 Callee saved register 8
|
||||
0x80x0 0000 0200 0019 regs.s9 Callee saved register 9
|
||||
0x80x0 0000 0200 001a regs.s10 Callee saved register 10
|
||||
0x80x0 0000 0200 001b regs.s11 Callee saved register 11
|
||||
0x80x0 0000 0200 001c regs.t3 Caller saved register 3
|
||||
0x80x0 0000 0200 001d regs.t4 Caller saved register 4
|
||||
0x80x0 0000 0200 001e regs.t5 Caller saved register 5
|
||||
0x80x0 0000 0200 001f regs.t6 Caller saved register 6
|
||||
0x80x0 0000 0200 0020 mode Privilege mode (1 = S-mode or 0 = U-mode)
|
||||
======================= ========= =============================================
|
||||
|
||||
RISC-V csr registers represent the supervisor mode control/status registers
|
||||
of a Guest VCPU and it has the following id bit patterns::
|
||||
|
||||
0x8020 0000 03 <index into the kvm_riscv_csr struct:24> (32bit Host)
|
||||
0x8030 0000 03 <index into the kvm_riscv_csr struct:24> (64bit Host)
|
||||
|
||||
Following are the RISC-V csr registers:
|
||||
|
||||
======================= ========= =============================================
|
||||
Encoding Register Description
|
||||
======================= ========= =============================================
|
||||
0x80x0 0000 0300 0000 sstatus Supervisor status
|
||||
0x80x0 0000 0300 0001 sie Supervisor interrupt enable
|
||||
0x80x0 0000 0300 0002 stvec Supervisor trap vector base
|
||||
0x80x0 0000 0300 0003 sscratch Supervisor scratch register
|
||||
0x80x0 0000 0300 0004 sepc Supervisor exception program counter
|
||||
0x80x0 0000 0300 0005 scause Supervisor trap cause
|
||||
0x80x0 0000 0300 0006 stval Supervisor bad address or instruction
|
||||
0x80x0 0000 0300 0007 sip Supervisor interrupt pending
|
||||
0x80x0 0000 0300 0008 satp Supervisor address translation and protection
|
||||
======================= ========= =============================================
|
||||
|
||||
RISC-V timer registers represent the timer state of a Guest VCPU and it has
|
||||
the following id bit patterns::
|
||||
|
||||
0x8030 0000 04 <index into the kvm_riscv_timer struct:24>
|
||||
|
||||
Following are the RISC-V timer registers:
|
||||
|
||||
======================= ========= =============================================
|
||||
Encoding Register Description
|
||||
======================= ========= =============================================
|
||||
0x8030 0000 0400 0000 frequency Time base frequency (read-only)
|
||||
0x8030 0000 0400 0001 time Time value visible to Guest
|
||||
0x8030 0000 0400 0002 compare Time compare programmed by Guest
|
||||
0x8030 0000 0400 0003 state Time compare state (1 = ON or 0 = OFF)
|
||||
======================= ========= =============================================
|
||||
|
||||
RISC-V F-extension registers represent the single precision floating point
|
||||
state of a Guest VCPU and it has the following id bit patterns::
|
||||
|
||||
0x8020 0000 05 <index into the __riscv_f_ext_state struct:24>
|
||||
|
||||
Following are the RISC-V F-extension registers:
|
||||
|
||||
======================= ========= =============================================
|
||||
Encoding Register Description
|
||||
======================= ========= =============================================
|
||||
0x8020 0000 0500 0000 f[0] Floating point register 0
|
||||
...
|
||||
0x8020 0000 0500 001f f[31] Floating point register 31
|
||||
0x8020 0000 0500 0020 fcsr Floating point control and status register
|
||||
======================= ========= =============================================
|
||||
|
||||
RISC-V D-extension registers represent the double precision floating point
|
||||
state of a Guest VCPU and it has the following id bit patterns::
|
||||
|
||||
0x8020 0000 06 <index into the __riscv_d_ext_state struct:24> (fcsr)
|
||||
0x8030 0000 06 <index into the __riscv_d_ext_state struct:24> (non-fcsr)
|
||||
|
||||
Following are the RISC-V D-extension registers:
|
||||
|
||||
======================= ========= =============================================
|
||||
Encoding Register Description
|
||||
======================= ========= =============================================
|
||||
0x8030 0000 0600 0000 f[0] Floating point register 0
|
||||
...
|
||||
0x8030 0000 0600 001f f[31] Floating point register 31
|
||||
0x8020 0000 0600 0020 fcsr Floating point control and status register
|
||||
======================= ========= =============================================
|
||||
|
||||
|
||||
4.69 KVM_GET_ONE_REG
|
||||
--------------------
|
||||
@ -5848,6 +6034,25 @@ Valid values for 'type' are:
|
||||
Userspace is expected to place the hypercall result into the appropriate
|
||||
field before invoking KVM_RUN again.
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_RISCV_SBI */
|
||||
struct {
|
||||
unsigned long extension_id;
|
||||
unsigned long function_id;
|
||||
unsigned long args[6];
|
||||
unsigned long ret[2];
|
||||
} riscv_sbi;
|
||||
If exit reason is KVM_EXIT_RISCV_SBI then it indicates that the VCPU has
|
||||
done a SBI call which is not handled by KVM RISC-V kernel module. The details
|
||||
of the SBI call are available in 'riscv_sbi' member of kvm_run structure. The
|
||||
'extension_id' field of 'riscv_sbi' represents SBI extension ID whereas the
|
||||
'function_id' field represents function ID of given SBI extension. The 'args'
|
||||
array field of 'riscv_sbi' represents parameters for the SBI call and 'ret'
|
||||
array field represents return values. The userspace should update the return
|
||||
values of SBI call before resuming the VCPU. For more details on RISC-V SBI
|
||||
spec refer, https://github.com/riscv/riscv-sbi-doc.
|
||||
|
||||
::
|
||||
|
||||
/* Fix the size of the union. */
|
||||
|
@ -161,3 +161,73 @@ Specifies the base address of the stolen time structure for this VCPU. The
|
||||
base address must be 64 byte aligned and exist within a valid guest memory
|
||||
region. See Documentation/virt/kvm/arm/pvtime.rst for more information
|
||||
including the layout of the stolen time structure.
|
||||
|
||||
4. GROUP: KVM_VCPU_TSC_CTRL
|
||||
===========================
|
||||
|
||||
:Architectures: x86
|
||||
|
||||
4.1 ATTRIBUTE: KVM_VCPU_TSC_OFFSET
|
||||
|
||||
:Parameters: 64-bit unsigned TSC offset
|
||||
|
||||
Returns:
|
||||
|
||||
======= ======================================
|
||||
-EFAULT Error reading/writing the provided
|
||||
parameter address.
|
||||
-ENXIO Attribute not supported
|
||||
======= ======================================
|
||||
|
||||
Specifies the guest's TSC offset relative to the host's TSC. The guest's
|
||||
TSC is then derived by the following equation:
|
||||
|
||||
guest_tsc = host_tsc + KVM_VCPU_TSC_OFFSET
|
||||
|
||||
This attribute is useful to adjust the guest's TSC on live migration,
|
||||
so that the TSC counts the time during which the VM was paused. The
|
||||
following describes a possible algorithm to use for this purpose.
|
||||
|
||||
From the source VMM process:
|
||||
|
||||
1. Invoke the KVM_GET_CLOCK ioctl to record the host TSC (tsc_src),
|
||||
kvmclock nanoseconds (guest_src), and host CLOCK_REALTIME nanoseconds
|
||||
(host_src).
|
||||
|
||||
2. Read the KVM_VCPU_TSC_OFFSET attribute for every vCPU to record the
|
||||
guest TSC offset (ofs_src[i]).
|
||||
|
||||
3. Invoke the KVM_GET_TSC_KHZ ioctl to record the frequency of the
|
||||
guest's TSC (freq).
|
||||
|
||||
From the destination VMM process:
|
||||
|
||||
4. Invoke the KVM_SET_CLOCK ioctl, providing the source nanoseconds from
|
||||
kvmclock (guest_src) and CLOCK_REALTIME (host_src) in their respective
|
||||
fields. Ensure that the KVM_CLOCK_REALTIME flag is set in the provided
|
||||
structure.
|
||||
|
||||
KVM will advance the VM's kvmclock to account for elapsed time since
|
||||
recording the clock values. Note that this will cause problems in
|
||||
the guest (e.g., timeouts) unless CLOCK_REALTIME is synchronized
|
||||
between the source and destination, and a reasonably short time passes
|
||||
between the source pausing the VMs and the destination executing
|
||||
steps 4-7.
|
||||
|
||||
5. Invoke the KVM_GET_CLOCK ioctl to record the host TSC (tsc_dest) and
|
||||
kvmclock nanoseconds (guest_dest).
|
||||
|
||||
6. Adjust the guest TSC offsets for every vCPU to account for (1) time
|
||||
elapsed since recording state and (2) difference in TSCs between the
|
||||
source and destination machine:
|
||||
|
||||
ofs_dst[i] = ofs_src[i] -
|
||||
(guest_src - guest_dest) * freq +
|
||||
(tsc_src - tsc_dest)
|
||||
|
||||
("ofs[i] + tsc - guest * freq" is the guest TSC value corresponding to
|
||||
a time of 0 in kvmclock. The above formula ensures that it is the
|
||||
same on the destination as it was on the source).
|
||||
|
||||
7. Write the KVM_VCPU_TSC_OFFSET attribute for every vCPU with the
|
||||
respective value derived in the previous step.
|
||||
|
@ -22,7 +22,7 @@ Groups:
|
||||
Errors:
|
||||
|
||||
======= ==========================================
|
||||
-EINVAL Value greater than KVM_MAX_VCPU_ID.
|
||||
-EINVAL Value greater than KVM_MAX_VCPU_IDS.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-EBUSY A vcpu is already connected to the device.
|
||||
======= ==========================================
|
||||
|
@ -91,7 +91,7 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
Errors:
|
||||
|
||||
======= ==========================================
|
||||
-EINVAL Value greater than KVM_MAX_VCPU_ID.
|
||||
-EINVAL Value greater than KVM_MAX_VCPU_IDS.
|
||||
-EFAULT Invalid user pointer for attr->addr.
|
||||
-EBUSY A vCPU is already connected to the device.
|
||||
======= ==========================================
|
||||
|
12
MAINTAINERS
12
MAINTAINERS
@ -10342,6 +10342,18 @@ F: arch/powerpc/include/uapi/asm/kvm*
|
||||
F: arch/powerpc/kernel/kvm*
|
||||
F: arch/powerpc/kvm/
|
||||
|
||||
KERNEL VIRTUAL MACHINE FOR RISC-V (KVM/riscv)
|
||||
M: Anup Patel <anup.patel@wdc.com>
|
||||
R: Atish Patra <atish.patra@wdc.com>
|
||||
L: kvm@vger.kernel.org
|
||||
L: kvm-riscv@lists.infradead.org
|
||||
L: linux-riscv@lists.infradead.org
|
||||
S: Maintained
|
||||
T: git git://github.com/kvm-riscv/linux.git
|
||||
F: arch/riscv/include/asm/kvm*
|
||||
F: arch/riscv/include/uapi/asm/kvm*
|
||||
F: arch/riscv/kvm/
|
||||
|
||||
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
|
||||
M: Christian Borntraeger <borntraeger@de.ibm.com>
|
||||
M: Janosch Frank <frankja@linux.ibm.com>
|
||||
|
@ -185,6 +185,7 @@ config ARM64
|
||||
select HAVE_GCC_PLUGINS
|
||||
select HAVE_HW_BREAKPOINT if PERF_EVENTS
|
||||
select HAVE_IRQ_TIME_ACCOUNTING
|
||||
select HAVE_KVM
|
||||
select HAVE_NMI
|
||||
select HAVE_PATA_PLATFORM
|
||||
select HAVE_PERF_EVENTS
|
||||
|
@ -295,6 +295,7 @@
|
||||
#define MDCR_EL2_HPMFZO (UL(1) << 29)
|
||||
#define MDCR_EL2_MTPME (UL(1) << 28)
|
||||
#define MDCR_EL2_TDCC (UL(1) << 27)
|
||||
#define MDCR_EL2_HLP (UL(1) << 26)
|
||||
#define MDCR_EL2_HCCD (UL(1) << 23)
|
||||
#define MDCR_EL2_TTRF (UL(1) << 19)
|
||||
#define MDCR_EL2_HPMD (UL(1) << 17)
|
||||
|
@ -44,31 +44,39 @@
|
||||
#define KVM_HOST_SMCCC_FUNC(name) KVM_HOST_SMCCC_ID(__KVM_HOST_SMCCC_FUNC_##name)
|
||||
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run 1
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context 5
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_init 15
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp 16
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
enum __kvm_host_smccc_func {
|
||||
/* Hypercalls available only prior to pKVM finalisation */
|
||||
/* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_init,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize,
|
||||
|
||||
/* Hypercalls available after pKVM finalisation */
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
|
||||
};
|
||||
|
||||
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
|
||||
#define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[]
|
||||
|
||||
|
@ -396,7 +396,10 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
|
||||
if (vcpu_mode_is_32bit(vcpu))
|
||||
return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT);
|
||||
|
||||
return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
|
||||
if (vcpu_mode_priv(vcpu))
|
||||
return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE);
|
||||
else
|
||||
return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E);
|
||||
}
|
||||
|
||||
static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
|
||||
|
@ -58,6 +58,7 @@
|
||||
enum kvm_mode {
|
||||
KVM_MODE_DEFAULT,
|
||||
KVM_MODE_PROTECTED,
|
||||
KVM_MODE_NONE,
|
||||
};
|
||||
enum kvm_mode kvm_get_mode(void);
|
||||
|
||||
@ -771,7 +772,6 @@ int kvm_set_ipa_limit(void);
|
||||
|
||||
#define __KVM_HAVE_ARCH_VM_ALLOC
|
||||
struct kvm *kvm_arch_alloc_vm(void);
|
||||
void kvm_arch_free_vm(struct kvm *kvm);
|
||||
|
||||
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
|
||||
|
||||
@ -780,6 +780,8 @@ static inline bool kvm_vm_is_protected(struct kvm *kvm)
|
||||
return false;
|
||||
}
|
||||
|
||||
void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
|
||||
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -115,7 +115,12 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
|
||||
void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
|
||||
#endif
|
||||
|
||||
extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
|
||||
|
||||
#endif /* __ARM64_KVM_HYP_H__ */
|
||||
|
@ -1160,6 +1160,7 @@
|
||||
#define ICH_HCR_TC (1 << 10)
|
||||
#define ICH_HCR_TALL0 (1 << 11)
|
||||
#define ICH_HCR_TALL1 (1 << 12)
|
||||
#define ICH_HCR_TDIR (1 << 14)
|
||||
#define ICH_HCR_EOIcount_SHIFT 27
|
||||
#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT)
|
||||
|
||||
@ -1192,6 +1193,8 @@
|
||||
#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)
|
||||
#define ICH_VTR_A3V_SHIFT 21
|
||||
#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)
|
||||
#define ICH_VTR_TDS_SHIFT 19
|
||||
#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT)
|
||||
|
||||
#define ARM64_FEATURE_FIELD_BITS 4
|
||||
|
||||
|
@ -1128,5 +1128,6 @@ bool cpus_are_stuck_in_kernel(void)
|
||||
{
|
||||
bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
|
||||
|
||||
return !!cpus_stuck_in_kernel || smp_spin_tables;
|
||||
return !!cpus_stuck_in_kernel || smp_spin_tables ||
|
||||
is_protected_kvm_enabled();
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#
|
||||
|
||||
source "virt/lib/Kconfig"
|
||||
source "virt/kvm/Kconfig"
|
||||
|
||||
menuconfig VIRTUALIZATION
|
||||
bool "Virtualization"
|
||||
@ -19,7 +20,7 @@ if VIRTUALIZATION
|
||||
|
||||
menuconfig KVM
|
||||
bool "Kernel-based Virtual Machine (KVM) support"
|
||||
depends on OF
|
||||
depends on HAVE_KVM
|
||||
select MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
@ -43,12 +44,9 @@ menuconfig KVM
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
if KVM
|
||||
|
||||
source "virt/kvm/Kconfig"
|
||||
|
||||
config NVHE_EL2_DEBUG
|
||||
bool "Debug mode for non-VHE EL2 object"
|
||||
depends on KVM
|
||||
help
|
||||
Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
|
||||
Failure reports will BUG() in the hypervisor. This is intended for
|
||||
@ -56,6 +54,4 @@ config NVHE_EL2_DEBUG
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
endif # KVM
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
@ -291,18 +291,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
|
||||
struct kvm *kvm_arch_alloc_vm(void)
|
||||
{
|
||||
if (!has_vhe())
|
||||
return kzalloc(sizeof(struct kvm), GFP_KERNEL);
|
||||
size_t sz = sizeof(struct kvm);
|
||||
|
||||
return vzalloc(sizeof(struct kvm));
|
||||
}
|
||||
|
||||
void kvm_arch_free_vm(struct kvm *kvm)
|
||||
{
|
||||
if (!has_vhe())
|
||||
kfree(kvm);
|
||||
else
|
||||
vfree(kvm);
|
||||
return kzalloc(sz, GFP_KERNEL_ACCOUNT);
|
||||
|
||||
return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
|
||||
@ -620,6 +614,14 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
|
||||
|
||||
ret = kvm_arm_pmu_v3_enable(vcpu);
|
||||
|
||||
/*
|
||||
* Initialize traps for protected VMs.
|
||||
* NOTE: Move to run in EL2 directly, rather than via a hypercall, once
|
||||
* the code is in place for first run initialization at EL2.
|
||||
*/
|
||||
if (kvm_vm_is_protected(kvm))
|
||||
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1579,25 +1581,33 @@ static void cpu_set_hyp_vector(void)
|
||||
kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot);
|
||||
}
|
||||
|
||||
static void cpu_hyp_reinit(void)
|
||||
static void cpu_hyp_init_context(void)
|
||||
{
|
||||
kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
|
||||
|
||||
cpu_hyp_reset();
|
||||
if (!is_kernel_in_hyp_mode())
|
||||
cpu_init_hyp_mode();
|
||||
}
|
||||
|
||||
static void cpu_hyp_init_features(void)
|
||||
{
|
||||
cpu_set_hyp_vector();
|
||||
kvm_arm_init_debug();
|
||||
|
||||
if (is_kernel_in_hyp_mode())
|
||||
kvm_timer_init_vhe();
|
||||
else
|
||||
cpu_init_hyp_mode();
|
||||
|
||||
cpu_set_hyp_vector();
|
||||
|
||||
kvm_arm_init_debug();
|
||||
|
||||
if (vgic_present)
|
||||
kvm_vgic_init_cpu_hardware();
|
||||
}
|
||||
|
||||
static void cpu_hyp_reinit(void)
|
||||
{
|
||||
cpu_hyp_reset();
|
||||
cpu_hyp_init_context();
|
||||
cpu_hyp_init_features();
|
||||
}
|
||||
|
||||
static void _kvm_arch_hardware_enable(void *discard)
|
||||
{
|
||||
if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
|
||||
@ -1788,10 +1798,17 @@ static int do_pkvm_init(u32 hyp_va_bits)
|
||||
int ret;
|
||||
|
||||
preempt_disable();
|
||||
hyp_install_host_vector();
|
||||
cpu_hyp_init_context();
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size,
|
||||
num_possible_cpus(), kern_hyp_va(per_cpu_base),
|
||||
hyp_va_bits);
|
||||
cpu_hyp_init_features();
|
||||
|
||||
/*
|
||||
* The stub hypercalls are now disabled, so set our local flag to
|
||||
* prevent a later re-init attempt in kvm_arch_hardware_enable().
|
||||
*/
|
||||
__this_cpu_write(kvm_arm_hardware_enabled, 1);
|
||||
preempt_enable();
|
||||
|
||||
return ret;
|
||||
@ -1802,8 +1819,13 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
|
||||
void *addr = phys_to_virt(hyp_mem_base);
|
||||
int ret;
|
||||
|
||||
kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
|
||||
kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
|
||||
kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
|
||||
kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
|
||||
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
|
||||
kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
|
||||
|
||||
ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
|
||||
if (ret)
|
||||
@ -1971,9 +1993,25 @@ static int init_hyp_mode(void)
|
||||
return err;
|
||||
}
|
||||
|
||||
static void _kvm_host_prot_finalize(void *discard)
|
||||
static void _kvm_host_prot_finalize(void *arg)
|
||||
{
|
||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
|
||||
int *err = arg;
|
||||
|
||||
if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
|
||||
WRITE_ONCE(*err, -EINVAL);
|
||||
}
|
||||
|
||||
static int pkvm_drop_host_privileges(void)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Flip the static key upfront as that may no longer be possible
|
||||
* once the host stage 2 is installed.
|
||||
*/
|
||||
static_branch_enable(&kvm_protected_mode_initialized);
|
||||
on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int finalize_hyp_mode(void)
|
||||
@ -1987,15 +2025,7 @@ static int finalize_hyp_mode(void)
|
||||
* None of other sections should ever be introspected.
|
||||
*/
|
||||
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
|
||||
|
||||
/*
|
||||
* Flip the static key upfront as that may no longer be possible
|
||||
* once the host stage 2 is installed.
|
||||
*/
|
||||
static_branch_enable(&kvm_protected_mode_initialized);
|
||||
on_each_cpu(_kvm_host_prot_finalize, NULL, 1);
|
||||
|
||||
return 0;
|
||||
return pkvm_drop_host_privileges();
|
||||
}
|
||||
|
||||
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
|
||||
@ -2064,6 +2094,11 @@ int kvm_arch_init(void *opaque)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (kvm_get_mode() == KVM_MODE_NONE) {
|
||||
kvm_info("KVM disabled from command line\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
in_hyp_mode = is_kernel_in_hyp_mode();
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
|
||||
@ -2137,8 +2172,15 @@ static int __init early_kvm_mode_cfg(char *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode()))
|
||||
if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) {
|
||||
kvm_mode = KVM_MODE_DEFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (strcmp(arg, "none") == 0) {
|
||||
kvm_mode = KVM_MODE_NONE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
75
arch/arm64/kvm/hyp/include/hyp/fault.h
Normal file
75
arch/arm64/kvm/hyp/include/hyp/fault.h
Normal file
@ -0,0 +1,75 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2015 - ARM Ltd
|
||||
* Author: Marc Zyngier <marc.zyngier@arm.com>
|
||||
*/
|
||||
|
||||
#ifndef __ARM64_KVM_HYP_FAULT_H__
|
||||
#define __ARM64_KVM_HYP_FAULT_H__
|
||||
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
|
||||
{
|
||||
u64 par, tmp;
|
||||
|
||||
/*
|
||||
* Resolve the IPA the hard way using the guest VA.
|
||||
*
|
||||
* Stage-1 translation already validated the memory access
|
||||
* rights. As such, we can use the EL1 translation regime, and
|
||||
* don't have to distinguish between EL0 and EL1 access.
|
||||
*
|
||||
* We do need to save/restore PAR_EL1 though, as we haven't
|
||||
* saved the guest context yet, and we may return early...
|
||||
*/
|
||||
par = read_sysreg_par();
|
||||
if (!__kvm_at("s1e1r", far))
|
||||
tmp = read_sysreg_par();
|
||||
else
|
||||
tmp = SYS_PAR_EL1_F; /* back to the guest */
|
||||
write_sysreg(par, par_el1);
|
||||
|
||||
if (unlikely(tmp & SYS_PAR_EL1_F))
|
||||
return false; /* Translation failed, back to guest */
|
||||
|
||||
/* Convert PAR to HPFAR format */
|
||||
*hpfar = PAR_TO_HPFAR(tmp);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
|
||||
{
|
||||
u64 hpfar, far;
|
||||
|
||||
far = read_sysreg_el2(SYS_FAR);
|
||||
|
||||
/*
|
||||
* The HPFAR can be invalid if the stage 2 fault did not
|
||||
* happen during a stage 1 page table walk (the ESR_EL2.S1PTW
|
||||
* bit is clear) and one of the two following cases are true:
|
||||
* 1. The fault was due to a permission fault
|
||||
* 2. The processor carries errata 834220
|
||||
*
|
||||
* Therefore, for all non S1PTW faults where we either have a
|
||||
* permission fault or the errata workaround is enabled, we
|
||||
* resolve the IPA using the AT instruction.
|
||||
*/
|
||||
if (!(esr & ESR_ELx_S1PTW) &&
|
||||
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
|
||||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
|
||||
if (!__translate_far_to_hpfar(far, &hpfar))
|
||||
return false;
|
||||
} else {
|
||||
hpfar = read_sysreg(hpfar_el2);
|
||||
}
|
||||
|
||||
fault->far_el2 = far;
|
||||
fault->hpfar_el2 = hpfar;
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
@ -8,6 +8,7 @@
|
||||
#define __ARM64_KVM_HYP_SWITCH_H__
|
||||
|
||||
#include <hyp/adjust_pc.h>
|
||||
#include <hyp/fault.h>
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/kvm_host.h>
|
||||
@ -137,78 +138,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
|
||||
{
|
||||
u64 par, tmp;
|
||||
|
||||
/*
|
||||
* Resolve the IPA the hard way using the guest VA.
|
||||
*
|
||||
* Stage-1 translation already validated the memory access
|
||||
* rights. As such, we can use the EL1 translation regime, and
|
||||
* don't have to distinguish between EL0 and EL1 access.
|
||||
*
|
||||
* We do need to save/restore PAR_EL1 though, as we haven't
|
||||
* saved the guest context yet, and we may return early...
|
||||
*/
|
||||
par = read_sysreg_par();
|
||||
if (!__kvm_at("s1e1r", far))
|
||||
tmp = read_sysreg_par();
|
||||
else
|
||||
tmp = SYS_PAR_EL1_F; /* back to the guest */
|
||||
write_sysreg(par, par_el1);
|
||||
|
||||
if (unlikely(tmp & SYS_PAR_EL1_F))
|
||||
return false; /* Translation failed, back to guest */
|
||||
|
||||
/* Convert PAR to HPFAR format */
|
||||
*hpfar = PAR_TO_HPFAR(tmp);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
|
||||
{
|
||||
u64 hpfar, far;
|
||||
|
||||
far = read_sysreg_el2(SYS_FAR);
|
||||
|
||||
/*
|
||||
* The HPFAR can be invalid if the stage 2 fault did not
|
||||
* happen during a stage 1 page table walk (the ESR_EL2.S1PTW
|
||||
* bit is clear) and one of the two following cases are true:
|
||||
* 1. The fault was due to a permission fault
|
||||
* 2. The processor carries errata 834220
|
||||
*
|
||||
* Therefore, for all non S1PTW faults where we either have a
|
||||
* permission fault or the errata workaround is enabled, we
|
||||
* resolve the IPA using the AT instruction.
|
||||
*/
|
||||
if (!(esr & ESR_ELx_S1PTW) &&
|
||||
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
|
||||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
|
||||
if (!__translate_far_to_hpfar(far, &hpfar))
|
||||
return false;
|
||||
} else {
|
||||
hpfar = read_sysreg(hpfar_el2);
|
||||
}
|
||||
|
||||
fault->far_el2 = far;
|
||||
fault->hpfar_el2 = hpfar;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u8 ec;
|
||||
u64 esr;
|
||||
|
||||
esr = vcpu->arch.fault.esr_el2;
|
||||
ec = ESR_ELx_EC(esr);
|
||||
|
||||
if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
|
||||
return true;
|
||||
|
||||
return __get_fault_info(esr, &vcpu->arch.fault);
|
||||
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
|
||||
}
|
||||
|
||||
static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
|
||||
@ -229,8 +161,13 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
|
||||
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
|
||||
}
|
||||
|
||||
/* Check for an FPSIMD/SVE trap and handle as appropriate */
|
||||
static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
|
||||
/*
|
||||
* We trap the first access to the FP/SIMD to save the host context and
|
||||
* restore the guest context lazily.
|
||||
* If FP/SIMD is not implemented, handle the trap and inject an undefined
|
||||
* instruction exception to the guest. Similarly for trapped SVE accesses.
|
||||
*/
|
||||
static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
bool sve_guest, sve_host;
|
||||
u8 esr_ec;
|
||||
@ -248,9 +185,6 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
esr_ec = kvm_vcpu_trap_get_class(vcpu);
|
||||
if (esr_ec != ESR_ELx_EC_FP_ASIMD &&
|
||||
esr_ec != ESR_ELx_EC_SVE)
|
||||
return false;
|
||||
|
||||
/* Don't handle SVE traps for non-SVE vcpus here: */
|
||||
if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
|
||||
@ -352,14 +286,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
|
||||
|
||||
static inline bool esr_is_ptrauth_trap(u32 esr)
|
||||
{
|
||||
u32 ec = ESR_ELx_EC(esr);
|
||||
|
||||
if (ec == ESR_ELx_EC_PAC)
|
||||
return true;
|
||||
|
||||
if (ec != ESR_ELx_EC_SYS64)
|
||||
return false;
|
||||
|
||||
switch (esr_sys64_to_sysreg(esr)) {
|
||||
case SYS_APIAKEYLO_EL1:
|
||||
case SYS_APIAKEYHI_EL1:
|
||||
@ -388,13 +314,12 @@ static inline bool esr_is_ptrauth_trap(u32 esr)
|
||||
|
||||
DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
|
||||
|
||||
static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
|
||||
static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
struct kvm_cpu_context *ctxt;
|
||||
u64 val;
|
||||
|
||||
if (!vcpu_has_ptrauth(vcpu) ||
|
||||
!esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
|
||||
if (!vcpu_has_ptrauth(vcpu))
|
||||
return false;
|
||||
|
||||
ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
|
||||
@ -413,6 +338,90 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
|
||||
handle_tx2_tvm(vcpu))
|
||||
return true;
|
||||
|
||||
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
|
||||
__vgic_v3_perform_cpuif_access(vcpu) == 1)
|
||||
return true;
|
||||
|
||||
if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
|
||||
return kvm_hyp_handle_ptrauth(vcpu, exit_code);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
|
||||
__vgic_v3_perform_cpuif_access(vcpu) == 1)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (!__populate_fault_info(vcpu))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (!__populate_fault_info(vcpu))
|
||||
return true;
|
||||
|
||||
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
|
||||
bool valid;
|
||||
|
||||
valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
|
||||
kvm_vcpu_dabt_isvalid(vcpu) &&
|
||||
!kvm_vcpu_abt_issea(vcpu) &&
|
||||
!kvm_vcpu_abt_iss1tw(vcpu);
|
||||
|
||||
if (valid) {
|
||||
int ret = __vgic_v2_perform_cpuif_access(vcpu);
|
||||
|
||||
if (ret == 1)
|
||||
return true;
|
||||
|
||||
/* Promote an illegal access to an SError.*/
|
||||
if (ret == -1)
|
||||
*exit_code = ARM_EXCEPTION_EL1_SERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
|
||||
|
||||
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
|
||||
|
||||
/*
|
||||
* Allow the hypervisor to handle the exit with an exit handler if it has one.
|
||||
*
|
||||
* Returns true if the hypervisor handled the exit, and control should go back
|
||||
* to the guest, or false if it hasn't.
|
||||
*/
|
||||
static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
|
||||
exit_handler_fn fn;
|
||||
|
||||
fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
|
||||
|
||||
if (fn)
|
||||
return fn(vcpu, exit_code);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true when we were able to fixup the guest exit and should return to
|
||||
* the guest, false when we should restore the host state and return to the
|
||||
@ -447,59 +456,9 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
if (*exit_code != ARM_EXCEPTION_TRAP)
|
||||
goto exit;
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
|
||||
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
|
||||
handle_tx2_tvm(vcpu))
|
||||
/* Check if there's an exit handler and allow it to handle the exit. */
|
||||
if (kvm_hyp_handle_exit(vcpu, exit_code))
|
||||
goto guest;
|
||||
|
||||
/*
|
||||
* We trap the first access to the FP/SIMD to save the host context
|
||||
* and restore the guest context lazily.
|
||||
* If FP/SIMD is not implemented, handle the trap and inject an
|
||||
* undefined instruction exception to the guest.
|
||||
* Similarly for trapped SVE accesses.
|
||||
*/
|
||||
if (__hyp_handle_fpsimd(vcpu))
|
||||
goto guest;
|
||||
|
||||
if (__hyp_handle_ptrauth(vcpu))
|
||||
goto guest;
|
||||
|
||||
if (!__populate_fault_info(vcpu))
|
||||
goto guest;
|
||||
|
||||
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
|
||||
bool valid;
|
||||
|
||||
valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
|
||||
kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
|
||||
kvm_vcpu_dabt_isvalid(vcpu) &&
|
||||
!kvm_vcpu_abt_issea(vcpu) &&
|
||||
!kvm_vcpu_abt_iss1tw(vcpu);
|
||||
|
||||
if (valid) {
|
||||
int ret = __vgic_v2_perform_cpuif_access(vcpu);
|
||||
|
||||
if (ret == 1)
|
||||
goto guest;
|
||||
|
||||
/* Promote an illegal access to an SError.*/
|
||||
if (ret == -1)
|
||||
*exit_code = ARM_EXCEPTION_EL1_SERROR;
|
||||
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
|
||||
(kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
|
||||
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
|
||||
int ret = __vgic_v3_perform_cpuif_access(vcpu);
|
||||
|
||||
if (ret == 1)
|
||||
goto guest;
|
||||
}
|
||||
|
||||
exit:
|
||||
/* Return to the host kernel and handle the exit */
|
||||
return false;
|
||||
|
200
arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
Normal file
200
arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
Normal file
@ -0,0 +1,200 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2021 Google LLC
|
||||
* Author: Fuad Tabba <tabba@google.com>
|
||||
*/
|
||||
|
||||
#ifndef __ARM64_KVM_FIXED_CONFIG_H__
|
||||
#define __ARM64_KVM_FIXED_CONFIG_H__
|
||||
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
/*
|
||||
* This file contains definitions for features to be allowed or restricted for
|
||||
* guest virtual machines, depending on the mode KVM is running in and on the
|
||||
* type of guest that is running.
|
||||
*
|
||||
* The ALLOW masks represent a bitmask of feature fields that are allowed
|
||||
* without any restrictions as long as they are supported by the system.
|
||||
*
|
||||
* The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
|
||||
* features that are restricted to support at most the specified feature.
|
||||
*
|
||||
* If a feature field is not present in either, than it is not supported.
|
||||
*
|
||||
* The approach taken for protected VMs is to allow features that are:
|
||||
* - Needed by common Linux distributions (e.g., floating point)
|
||||
* - Trivial to support, e.g., supporting the feature does not introduce or
|
||||
* require tracking of additional state in KVM
|
||||
* - Cannot be trapped or prevent the guest from using anyway
|
||||
*/
|
||||
|
||||
/*
|
||||
* Allow for protected VMs:
|
||||
* - Floating-point and Advanced SIMD
|
||||
* - Data Independent Timing
|
||||
*/
|
||||
#define PVM_ID_AA64PFR0_ALLOW (\
|
||||
ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
|
||||
)
|
||||
|
||||
/*
|
||||
* Restrict to the following *unsigned* features for protected VMs:
|
||||
* - AArch64 guests only (no support for AArch32 guests):
|
||||
* AArch32 adds complexity in trap handling, emulation, condition codes,
|
||||
* etc...
|
||||
* - RAS (v1)
|
||||
* Supported by KVM
|
||||
*/
|
||||
#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
|
||||
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
|
||||
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
|
||||
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
|
||||
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
|
||||
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
|
||||
)
|
||||
|
||||
/*
|
||||
* Allow for protected VMs:
|
||||
* - Branch Target Identification
|
||||
* - Speculative Store Bypassing
|
||||
*/
|
||||
#define PVM_ID_AA64PFR1_ALLOW (\
|
||||
ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
|
||||
)
|
||||
|
||||
/*
|
||||
* Allow for protected VMs:
|
||||
* - Mixed-endian
|
||||
* - Distinction between Secure and Non-secure Memory
|
||||
* - Mixed-endian at EL0 only
|
||||
* - Non-context synchronizing exception entry and exit
|
||||
*/
|
||||
#define PVM_ID_AA64MMFR0_ALLOW (\
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
|
||||
)
|
||||
|
||||
/*
|
||||
* Restrict to the following *unsigned* features for protected VMs:
|
||||
* - 40-bit IPA
|
||||
* - 16-bit ASID
|
||||
*/
|
||||
#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
|
||||
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
|
||||
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
|
||||
)
|
||||
|
||||
/*
|
||||
* Allow for protected VMs:
|
||||
* - Hardware translation table updates to Access flag and Dirty state
|
||||
* - Number of VMID bits from CPU
|
||||
* - Hierarchical Permission Disables
|
||||
* - Privileged Access Never
|
||||
* - SError interrupt exceptions from speculative reads
|
||||
* - Enhanced Translation Synchronization
|
||||
*/
|
||||
#define PVM_ID_AA64MMFR1_ALLOW (\
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
|
||||
)
|
||||
|
||||
/*
|
||||
* Allow for protected VMs:
|
||||
* - Common not Private translations
|
||||
* - User Access Override
|
||||
* - IESB bit in the SCTLR_ELx registers
|
||||
* - Unaligned single-copy atomicity and atomic functions
|
||||
* - ESR_ELx.EC value on an exception by read access to feature ID space
|
||||
* - TTL field in address operations.
|
||||
* - Break-before-make sequences when changing translation block size
|
||||
* - E0PDx mechanism
|
||||
*/
|
||||
#define PVM_ID_AA64MMFR2_ALLOW (\
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
|
||||
)
|
||||
|
||||
/*
|
||||
* No support for Scalable Vectors for protected VMs:
|
||||
* Requires additional support from KVM, e.g., context-switching and
|
||||
* trapping at EL2
|
||||
*/
|
||||
#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
|
||||
|
||||
/*
|
||||
* No support for debug, including breakpoints, and watchpoints for protected
|
||||
* VMs:
|
||||
* The Arm architecture mandates support for at least the Armv8 debug
|
||||
* architecture, which would include at least 2 hardware breakpoints and
|
||||
* watchpoints. Providing that support to protected guests adds
|
||||
* considerable state and complexity. Therefore, the reserved value of 0 is
|
||||
* used for debug-related fields.
|
||||
*/
|
||||
#define PVM_ID_AA64DFR0_ALLOW (0ULL)
|
||||
#define PVM_ID_AA64DFR1_ALLOW (0ULL)
|
||||
|
||||
/*
|
||||
* No support for implementation defined features.
|
||||
*/
|
||||
#define PVM_ID_AA64AFR0_ALLOW (0ULL)
|
||||
#define PVM_ID_AA64AFR1_ALLOW (0ULL)
|
||||
|
||||
/*
|
||||
* No restrictions on instructions implemented in AArch64.
|
||||
*/
|
||||
#define PVM_ID_AA64ISAR0_ALLOW (\
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
|
||||
)
|
||||
|
||||
#define PVM_ID_AA64ISAR1_ALLOW (\
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
|
||||
)
|
||||
|
||||
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
|
||||
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
int kvm_check_pvm_sysreg_table(void);
|
||||
|
||||
#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */
|
@ -15,4 +15,6 @@
|
||||
#define DECLARE_REG(type, name, ctxt, reg) \
|
||||
type name = (type)cpu_reg(ctxt, (reg))
|
||||
|
||||
void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu);
|
||||
|
||||
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
|
||||
|
@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
|
||||
|
||||
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
|
||||
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
|
||||
cache.o setup.o mm.o mem_protect.o
|
||||
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
|
||||
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
|
||||
obj-y += $(lib-objs)
|
||||
|
@ -110,17 +110,14 @@ SYM_FUNC_START(__hyp_do_panic)
|
||||
b __host_enter_for_panic
|
||||
SYM_FUNC_END(__hyp_do_panic)
|
||||
|
||||
.macro host_el1_sync_vect
|
||||
.align 7
|
||||
.L__vect_start\@:
|
||||
stp x0, x1, [sp, #-16]!
|
||||
mrs x0, esr_el2
|
||||
lsr x0, x0, #ESR_ELx_EC_SHIFT
|
||||
cmp x0, #ESR_ELx_EC_HVC64
|
||||
b.ne __host_exit
|
||||
|
||||
SYM_FUNC_START(__host_hvc)
|
||||
ldp x0, x1, [sp] // Don't fixup the stack yet
|
||||
|
||||
/* No stub for you, sonny Jim */
|
||||
alternative_if ARM64_KVM_PROTECTED_MODE
|
||||
b __host_exit
|
||||
alternative_else_nop_endif
|
||||
|
||||
/* Check for a stub HVC call */
|
||||
cmp x0, #HVC_STUB_HCALL_NR
|
||||
b.hs __host_exit
|
||||
@ -137,6 +134,17 @@ SYM_FUNC_END(__hyp_do_panic)
|
||||
ldr x5, =__kvm_handle_stub_hvc
|
||||
hyp_pa x5, x6
|
||||
br x5
|
||||
SYM_FUNC_END(__host_hvc)
|
||||
|
||||
.macro host_el1_sync_vect
|
||||
.align 7
|
||||
.L__vect_start\@:
|
||||
stp x0, x1, [sp, #-16]!
|
||||
mrs x0, esr_el2
|
||||
lsr x0, x0, #ESR_ELx_EC_SHIFT
|
||||
cmp x0, #ESR_ELx_EC_HVC64
|
||||
b.eq __host_hvc
|
||||
b __host_exit
|
||||
.L__vect_end\@:
|
||||
.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
|
||||
.error "host_el1_sync_vect larger than vector entry"
|
||||
|
@ -4,7 +4,7 @@
|
||||
* Author: Andrew Scull <ascull@google.com>
|
||||
*/
|
||||
|
||||
#include <hyp/switch.h>
|
||||
#include <hyp/adjust_pc.h>
|
||||
|
||||
#include <asm/pgtable-types.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
@ -160,41 +160,65 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
|
||||
}
|
||||
|
||||
static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
|
||||
|
||||
__pkvm_vcpu_init_traps(kern_hyp_va(vcpu));
|
||||
}
|
||||
|
||||
typedef void (*hcall_t)(struct kvm_cpu_context *);
|
||||
|
||||
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
|
||||
|
||||
static const hcall_t host_hcall[] = {
|
||||
HANDLE_FUNC(__kvm_vcpu_run),
|
||||
/* ___kvm_hyp_init */
|
||||
HANDLE_FUNC(__kvm_get_mdcr_el2),
|
||||
HANDLE_FUNC(__pkvm_init),
|
||||
HANDLE_FUNC(__pkvm_create_private_mapping),
|
||||
HANDLE_FUNC(__pkvm_cpu_set_vector),
|
||||
HANDLE_FUNC(__kvm_enable_ssbs),
|
||||
HANDLE_FUNC(__vgic_v3_init_lrs),
|
||||
HANDLE_FUNC(__vgic_v3_get_gic_config),
|
||||
HANDLE_FUNC(__pkvm_prot_finalize),
|
||||
|
||||
HANDLE_FUNC(__pkvm_host_share_hyp),
|
||||
HANDLE_FUNC(__kvm_adjust_pc),
|
||||
HANDLE_FUNC(__kvm_vcpu_run),
|
||||
HANDLE_FUNC(__kvm_flush_vm_context),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid),
|
||||
HANDLE_FUNC(__kvm_flush_cpu_context),
|
||||
HANDLE_FUNC(__kvm_timer_set_cntvoff),
|
||||
HANDLE_FUNC(__kvm_enable_ssbs),
|
||||
HANDLE_FUNC(__vgic_v3_get_gic_config),
|
||||
HANDLE_FUNC(__vgic_v3_read_vmcr),
|
||||
HANDLE_FUNC(__vgic_v3_write_vmcr),
|
||||
HANDLE_FUNC(__vgic_v3_init_lrs),
|
||||
HANDLE_FUNC(__kvm_get_mdcr_el2),
|
||||
HANDLE_FUNC(__vgic_v3_save_aprs),
|
||||
HANDLE_FUNC(__vgic_v3_restore_aprs),
|
||||
HANDLE_FUNC(__pkvm_init),
|
||||
HANDLE_FUNC(__pkvm_cpu_set_vector),
|
||||
HANDLE_FUNC(__pkvm_host_share_hyp),
|
||||
HANDLE_FUNC(__pkvm_create_private_mapping),
|
||||
HANDLE_FUNC(__pkvm_prot_finalize),
|
||||
HANDLE_FUNC(__pkvm_vcpu_init_traps),
|
||||
};
|
||||
|
||||
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned long, id, host_ctxt, 0);
|
||||
unsigned long hcall_min = 0;
|
||||
hcall_t hfn;
|
||||
|
||||
/*
|
||||
* If pKVM has been initialised then reject any calls to the
|
||||
* early "privileged" hypercalls. Note that we cannot reject
|
||||
* calls to __pkvm_prot_finalize for two reasons: (1) The static
|
||||
* key used to determine initialisation must be toggled prior to
|
||||
* finalisation and (2) finalisation is performed on a per-CPU
|
||||
* basis. This is all fine, however, since __pkvm_prot_finalize
|
||||
* returns -EPERM after the first call for a given CPU.
|
||||
*/
|
||||
if (static_branch_unlikely(&kvm_protected_mode_initialized))
|
||||
hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize;
|
||||
|
||||
id -= KVM_HOST_SMCCC_ID(0);
|
||||
|
||||
if (unlikely(id >= ARRAY_SIZE(host_hcall)))
|
||||
if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall)))
|
||||
goto inval;
|
||||
|
||||
hfn = host_hcall[id];
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/stage2_pgtable.h>
|
||||
|
||||
#include <hyp/switch.h>
|
||||
#include <hyp/fault.h>
|
||||
|
||||
#include <nvhe/gfp.h>
|
||||
#include <nvhe/memory.h>
|
||||
@ -25,12 +25,6 @@ struct host_kvm host_kvm;
|
||||
|
||||
static struct hyp_pool host_s2_pool;
|
||||
|
||||
/*
|
||||
* Copies of the host's CPU features registers holding sanitized values.
|
||||
*/
|
||||
u64 id_aa64mmfr0_el1_sys_val;
|
||||
u64 id_aa64mmfr1_el1_sys_val;
|
||||
|
||||
const u8 pkvm_hyp_id = 1;
|
||||
|
||||
static void *host_s2_zalloc_pages_exact(size_t size)
|
||||
@ -134,6 +128,9 @@ int __pkvm_prot_finalize(void)
|
||||
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
|
||||
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
|
||||
|
||||
if (params->hcr_el2 & HCR_VM)
|
||||
return -EPERM;
|
||||
|
||||
params->vttbr = kvm_get_vttbr(mmu);
|
||||
params->vtcr = host_kvm.arch.vtcr;
|
||||
params->hcr_el2 |= HCR_VM;
|
||||
|
185
arch/arm64/kvm/hyp/nvhe/pkvm.c
Normal file
185
arch/arm64/kvm/hyp/nvhe/pkvm.c
Normal file
@ -0,0 +1,185 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2021 Google LLC
|
||||
* Author: Fuad Tabba <tabba@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/mm.h>
|
||||
#include <nvhe/fixed_config.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
/*
|
||||
* Set trap register values based on features in ID_AA64PFR0.
|
||||
*/
|
||||
static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
|
||||
u64 hcr_set = HCR_RW;
|
||||
u64 hcr_clear = 0;
|
||||
u64 cptr_set = 0;
|
||||
|
||||
/* Protected KVM does not support AArch32 guests. */
|
||||
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0),
|
||||
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
|
||||
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
|
||||
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
|
||||
|
||||
/*
|
||||
* Linux guests assume support for floating-point and Advanced SIMD. Do
|
||||
* not change the trapping behavior for these from the KVM default.
|
||||
*/
|
||||
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
|
||||
PVM_ID_AA64PFR0_ALLOW));
|
||||
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
|
||||
PVM_ID_AA64PFR0_ALLOW));
|
||||
|
||||
/* Trap RAS unless all current versions are supported */
|
||||
if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) <
|
||||
ID_AA64PFR0_RAS_V1P1) {
|
||||
hcr_set |= HCR_TERR | HCR_TEA;
|
||||
hcr_clear |= HCR_FIEN;
|
||||
}
|
||||
|
||||
/* Trap AMU */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) {
|
||||
hcr_clear |= HCR_AMVOFFEN;
|
||||
cptr_set |= CPTR_EL2_TAM;
|
||||
}
|
||||
|
||||
/* Trap SVE */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids))
|
||||
cptr_set |= CPTR_EL2_TZ;
|
||||
|
||||
vcpu->arch.hcr_el2 |= hcr_set;
|
||||
vcpu->arch.hcr_el2 &= ~hcr_clear;
|
||||
vcpu->arch.cptr_el2 |= cptr_set;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set trap register values based on features in ID_AA64PFR1.
|
||||
*/
|
||||
static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
|
||||
u64 hcr_set = 0;
|
||||
u64 hcr_clear = 0;
|
||||
|
||||
/* Memory Tagging: Trap and Treat as Untagged if not supported. */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) {
|
||||
hcr_set |= HCR_TID5;
|
||||
hcr_clear |= HCR_DCT | HCR_ATA;
|
||||
}
|
||||
|
||||
vcpu->arch.hcr_el2 |= hcr_set;
|
||||
vcpu->arch.hcr_el2 &= ~hcr_clear;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set trap register values based on features in ID_AA64DFR0.
|
||||
*/
|
||||
static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
|
||||
u64 mdcr_set = 0;
|
||||
u64 mdcr_clear = 0;
|
||||
u64 cptr_set = 0;
|
||||
|
||||
/* Trap/constrain PMU */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) {
|
||||
mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
|
||||
mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
|
||||
MDCR_EL2_HPMN_MASK;
|
||||
}
|
||||
|
||||
/* Trap Debug */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids))
|
||||
mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
|
||||
|
||||
/* Trap OS Double Lock */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids))
|
||||
mdcr_set |= MDCR_EL2_TDOSA;
|
||||
|
||||
/* Trap SPE */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) {
|
||||
mdcr_set |= MDCR_EL2_TPMS;
|
||||
mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
|
||||
}
|
||||
|
||||
/* Trap Trace Filter */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids))
|
||||
mdcr_set |= MDCR_EL2_TTRF;
|
||||
|
||||
/* Trap Trace */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids))
|
||||
cptr_set |= CPTR_EL2_TTA;
|
||||
|
||||
vcpu->arch.mdcr_el2 |= mdcr_set;
|
||||
vcpu->arch.mdcr_el2 &= ~mdcr_clear;
|
||||
vcpu->arch.cptr_el2 |= cptr_set;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set trap register values based on features in ID_AA64MMFR0.
|
||||
*/
|
||||
static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
|
||||
u64 mdcr_set = 0;
|
||||
|
||||
/* Trap Debug Communications Channel registers */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids))
|
||||
mdcr_set |= MDCR_EL2_TDCC;
|
||||
|
||||
vcpu->arch.mdcr_el2 |= mdcr_set;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set trap register values based on features in ID_AA64MMFR1.
|
||||
*/
|
||||
static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
|
||||
u64 hcr_set = 0;
|
||||
|
||||
/* Trap LOR */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids))
|
||||
hcr_set |= HCR_TLOR;
|
||||
|
||||
vcpu->arch.hcr_el2 |= hcr_set;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set baseline trap register values.
|
||||
*/
|
||||
static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const u64 hcr_trap_feat_regs = HCR_TID3;
|
||||
const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1;
|
||||
|
||||
/*
|
||||
* Always trap:
|
||||
* - Feature id registers: to control features exposed to guests
|
||||
* - Implementation-defined features
|
||||
*/
|
||||
vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef;
|
||||
|
||||
/* Clear res0 and set res1 bits to trap potential new features. */
|
||||
vcpu->arch.hcr_el2 &= ~(HCR_RES0);
|
||||
vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
|
||||
vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
|
||||
vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize trap register values for protected VMs.
|
||||
*/
|
||||
void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
pvm_init_trap_regs(vcpu);
|
||||
pvm_init_traps_aa64pfr0(vcpu);
|
||||
pvm_init_traps_aa64pfr1(vcpu);
|
||||
pvm_init_traps_aa64dfr0(vcpu);
|
||||
pvm_init_traps_aa64mmfr0(vcpu);
|
||||
pvm_init_traps_aa64mmfr1(vcpu);
|
||||
}
|
@ -10,6 +10,7 @@
|
||||
#include <asm/kvm_pgtable.h>
|
||||
|
||||
#include <nvhe/early_alloc.h>
|
||||
#include <nvhe/fixed_config.h>
|
||||
#include <nvhe/gfp.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
@ -260,6 +261,8 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
|
||||
void (*fn)(phys_addr_t params_pa, void *finalize_fn_va);
|
||||
int ret;
|
||||
|
||||
BUG_ON(kvm_check_pvm_sysreg_table());
|
||||
|
||||
if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
#include <nvhe/fixed_config.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
|
||||
/* Non-VHE specific context */
|
||||
@ -158,6 +159,101 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
|
||||
write_sysreg(pmu->events_host, pmcntenset_el0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler for protected VM MSR, MRS or System instruction execution in AArch64.
|
||||
*
|
||||
* Returns true if the hypervisor has handled the exit, and control should go
|
||||
* back to the guest, or false if it hasn't.
|
||||
*/
|
||||
static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
/*
|
||||
* Make sure we handle the exit for workarounds and ptrauth
|
||||
* before the pKVM handling, as the latter could decide to
|
||||
* UNDEF.
|
||||
*/
|
||||
return (kvm_hyp_handle_sysreg(vcpu, exit_code) ||
|
||||
kvm_handle_pvm_sysreg(vcpu, exit_code));
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler for protected floating-point and Advanced SIMD accesses.
|
||||
*
|
||||
* Returns true if the hypervisor has handled the exit, and control should go
|
||||
* back to the guest, or false if it hasn't.
|
||||
*/
|
||||
static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
/* Linux guests assume support for floating-point and Advanced SIMD. */
|
||||
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
|
||||
PVM_ID_AA64PFR0_ALLOW));
|
||||
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
|
||||
PVM_ID_AA64PFR0_ALLOW));
|
||||
|
||||
return kvm_hyp_handle_fpsimd(vcpu, exit_code);
|
||||
}
|
||||
|
||||
static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
|
||||
[ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
|
||||
[ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
|
||||
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
|
||||
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
|
||||
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
|
||||
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
|
||||
};
|
||||
|
||||
static const exit_handler_fn pvm_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
|
||||
[ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
|
||||
[ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd,
|
||||
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
|
||||
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
|
||||
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
|
||||
};
|
||||
|
||||
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
|
||||
return pvm_exit_handlers;
|
||||
|
||||
return hyp_exit_handlers;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
|
||||
* The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
|
||||
* guest from dropping to AArch32 EL0 if implemented by the CPU. If the
|
||||
* hypervisor spots a guest in such a state ensure it is handled, and don't
|
||||
* trust the host to spot or fix it. The check below is based on the one in
|
||||
* kvm_arch_vcpu_ioctl_run().
|
||||
*
|
||||
* Returns false if the guest ran in AArch32 when it shouldn't have, and
|
||||
* thus should exit to the host, or true if a the guest run loop can continue.
|
||||
*/
|
||||
static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
|
||||
|
||||
if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
|
||||
/*
|
||||
* As we have caught the guest red-handed, decide that it isn't
|
||||
* fit for purpose anymore by making the vcpu invalid. The VMM
|
||||
* can try and fix it by re-initializing the vcpu with
|
||||
* KVM_ARM_VCPU_INIT, however, this is likely not possible for
|
||||
* protected VMs.
|
||||
*/
|
||||
vcpu->arch.target = -1;
|
||||
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
|
||||
*exit_code |= ARM_EXCEPTION_IL;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Switch to the guest for legacy non-VHE systems */
|
||||
int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -220,6 +316,9 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
/* Jump in the fire! */
|
||||
exit_code = __guest_enter(vcpu);
|
||||
|
||||
if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
|
||||
break;
|
||||
|
||||
/* And we're baaack! */
|
||||
} while (fixup_guest_exit(vcpu, &exit_code));
|
||||
|
||||
|
487
arch/arm64/kvm/hyp/nvhe/sys_regs.c
Normal file
487
arch/arm64/kvm/hyp/nvhe/sys_regs.c
Normal file
@ -0,0 +1,487 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2021 Google LLC
|
||||
* Author: Fuad Tabba <tabba@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/irqchip/arm-gic-v3.h>
|
||||
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
#include <hyp/adjust_pc.h>
|
||||
|
||||
#include <nvhe/fixed_config.h>
|
||||
|
||||
#include "../../sys_regs.h"
|
||||
|
||||
/*
|
||||
* Copies of the host's CPU features registers holding sanitized values at hyp.
|
||||
*/
|
||||
u64 id_aa64pfr0_el1_sys_val;
|
||||
u64 id_aa64pfr1_el1_sys_val;
|
||||
u64 id_aa64isar0_el1_sys_val;
|
||||
u64 id_aa64isar1_el1_sys_val;
|
||||
u64 id_aa64mmfr0_el1_sys_val;
|
||||
u64 id_aa64mmfr1_el1_sys_val;
|
||||
u64 id_aa64mmfr2_el1_sys_val;
|
||||
|
||||
/*
|
||||
* Inject an unknown/undefined exception to an AArch64 guest while most of its
|
||||
* sysregs are live.
|
||||
*/
|
||||
static void inject_undef64(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
|
||||
|
||||
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
|
||||
*vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
|
||||
|
||||
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
|
||||
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
|
||||
KVM_ARM64_PENDING_EXCEPTION);
|
||||
|
||||
__kvm_adjust_pc(vcpu);
|
||||
|
||||
write_sysreg_el1(esr, SYS_ESR);
|
||||
write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
|
||||
write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
|
||||
write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the restricted features values of the feature register based on the
|
||||
* limitations in restrict_fields.
|
||||
* A feature id field value of 0b0000 does not impose any restrictions.
|
||||
* Note: Use only for unsigned feature field values.
|
||||
*/
|
||||
static u64 get_restricted_features_unsigned(u64 sys_reg_val,
|
||||
u64 restrict_fields)
|
||||
{
|
||||
u64 value = 0UL;
|
||||
u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
|
||||
|
||||
/*
|
||||
* According to the Arm Architecture Reference Manual, feature fields
|
||||
* use increasing values to indicate increases in functionality.
|
||||
* Iterate over the restricted feature fields and calculate the minimum
|
||||
* unsigned value between the one supported by the system, and what the
|
||||
* value is being restricted to.
|
||||
*/
|
||||
while (sys_reg_val && restrict_fields) {
|
||||
value |= min(sys_reg_val & mask, restrict_fields & mask);
|
||||
sys_reg_val &= ~mask;
|
||||
restrict_fields &= ~mask;
|
||||
mask <<= ARM64_FEATURE_FIELD_BITS;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
/*
|
||||
* Functions that return the value of feature id registers for protected VMs
|
||||
* based on allowed features, system features, and KVM support.
|
||||
*/
|
||||
|
||||
static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
|
||||
u64 set_mask = 0;
|
||||
u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
|
||||
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
|
||||
|
||||
set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
|
||||
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
|
||||
|
||||
/* Spectre and Meltdown mitigation in KVM */
|
||||
set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2),
|
||||
(u64)kvm->arch.pfr0_csv2);
|
||||
set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3),
|
||||
(u64)kvm->arch.pfr0_csv3);
|
||||
|
||||
return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
|
||||
u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
|
||||
|
||||
if (!kvm_has_mte(kvm))
|
||||
allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
|
||||
|
||||
return id_aa64pfr1_el1_sys_val & allow_mask;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* No support for Scalable Vectors, therefore, hyp has no sanitized
|
||||
* copy of the feature id register.
|
||||
*/
|
||||
BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* No support for debug, including breakpoints, and watchpoints,
|
||||
* therefore, pKVM has no sanitized copy of the feature id register.
|
||||
*/
|
||||
BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* No support for debug, therefore, hyp has no sanitized copy of the
|
||||
* feature id register.
|
||||
*/
|
||||
BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* No support for implementation defined features, therefore, hyp has no
|
||||
* sanitized copy of the feature id register.
|
||||
*/
|
||||
BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* No support for implementation defined features, therefore, hyp has no
|
||||
* sanitized copy of the feature id register.
|
||||
*/
|
||||
BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
|
||||
|
||||
if (!vcpu_has_ptrauth(vcpu))
|
||||
allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
|
||||
|
||||
return id_aa64isar1_el1_sys_val & allow_mask;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 set_mask;
|
||||
|
||||
set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val,
|
||||
PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED);
|
||||
|
||||
return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW;
|
||||
}
|
||||
|
||||
static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW;
|
||||
}
|
||||
|
||||
/* Read a sanitized cpufeature ID register by its encoding */
|
||||
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
|
||||
{
|
||||
switch (id) {
|
||||
case SYS_ID_AA64PFR0_EL1:
|
||||
return get_pvm_id_aa64pfr0(vcpu);
|
||||
case SYS_ID_AA64PFR1_EL1:
|
||||
return get_pvm_id_aa64pfr1(vcpu);
|
||||
case SYS_ID_AA64ZFR0_EL1:
|
||||
return get_pvm_id_aa64zfr0(vcpu);
|
||||
case SYS_ID_AA64DFR0_EL1:
|
||||
return get_pvm_id_aa64dfr0(vcpu);
|
||||
case SYS_ID_AA64DFR1_EL1:
|
||||
return get_pvm_id_aa64dfr1(vcpu);
|
||||
case SYS_ID_AA64AFR0_EL1:
|
||||
return get_pvm_id_aa64afr0(vcpu);
|
||||
case SYS_ID_AA64AFR1_EL1:
|
||||
return get_pvm_id_aa64afr1(vcpu);
|
||||
case SYS_ID_AA64ISAR0_EL1:
|
||||
return get_pvm_id_aa64isar0(vcpu);
|
||||
case SYS_ID_AA64ISAR1_EL1:
|
||||
return get_pvm_id_aa64isar1(vcpu);
|
||||
case SYS_ID_AA64MMFR0_EL1:
|
||||
return get_pvm_id_aa64mmfr0(vcpu);
|
||||
case SYS_ID_AA64MMFR1_EL1:
|
||||
return get_pvm_id_aa64mmfr1(vcpu);
|
||||
case SYS_ID_AA64MMFR2_EL1:
|
||||
return get_pvm_id_aa64mmfr2(vcpu);
|
||||
default:
|
||||
/*
|
||||
* Should never happen because all cases are covered in
|
||||
* pvm_sys_reg_descs[].
|
||||
*/
|
||||
WARN_ON(1);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_desc const *r)
|
||||
{
|
||||
return pvm_read_id_reg(vcpu, reg_to_encoding(r));
|
||||
}
|
||||
|
||||
/* Handler to RAZ/WI sysregs */
|
||||
static bool pvm_access_raz_wi(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (!p->is_write)
|
||||
p->regval = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Accessor for AArch32 feature id registers.
|
||||
*
|
||||
* The value of these registers is "unknown" according to the spec if AArch32
|
||||
* isn't supported.
|
||||
*/
|
||||
static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (p->is_write) {
|
||||
inject_undef64(vcpu);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* No support for AArch32 guests, therefore, pKVM has no sanitized copy
|
||||
* of AArch32 feature id registers.
|
||||
*/
|
||||
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
|
||||
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY);
|
||||
|
||||
return pvm_access_raz_wi(vcpu, p, r);
|
||||
}
|
||||
|
||||
/*
|
||||
* Accessor for AArch64 feature id registers.
|
||||
*
|
||||
* If access is allowed, set the regval to the protected VM's view of the
|
||||
* register and return true.
|
||||
* Otherwise, inject an undefined exception and return false.
|
||||
*/
|
||||
static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (p->is_write) {
|
||||
inject_undef64(vcpu);
|
||||
return false;
|
||||
}
|
||||
|
||||
p->regval = read_id_reg(vcpu, r);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
/* pVMs only support GICv3. 'nuf said. */
|
||||
if (!p->is_write)
|
||||
p->regval = ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB | ICC_SRE_EL1_SRE;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Mark the specified system register as an AArch32 feature id register. */
|
||||
#define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 }
|
||||
|
||||
/* Mark the specified system register as an AArch64 feature id register. */
|
||||
#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
|
||||
|
||||
/* Mark the specified system register as Read-As-Zero/Write-Ignored */
|
||||
#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
|
||||
|
||||
/* Mark the specified system register as not being handled in hyp. */
|
||||
#define HOST_HANDLED(REG) { SYS_DESC(REG), .access = NULL }
|
||||
|
||||
/*
|
||||
* Architected system registers.
|
||||
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
|
||||
*
|
||||
* NOTE: Anything not explicitly listed here is *restricted by default*, i.e.,
|
||||
* it will lead to injecting an exception into the guest.
|
||||
*/
|
||||
static const struct sys_reg_desc pvm_sys_reg_descs[] = {
|
||||
/* Cache maintenance by set/way operations are restricted. */
|
||||
|
||||
/* Debug and Trace Registers are restricted. */
|
||||
|
||||
/* AArch64 mappings of the AArch32 ID registers */
|
||||
/* CRm=1 */
|
||||
AARCH32(SYS_ID_PFR0_EL1),
|
||||
AARCH32(SYS_ID_PFR1_EL1),
|
||||
AARCH32(SYS_ID_DFR0_EL1),
|
||||
AARCH32(SYS_ID_AFR0_EL1),
|
||||
AARCH32(SYS_ID_MMFR0_EL1),
|
||||
AARCH32(SYS_ID_MMFR1_EL1),
|
||||
AARCH32(SYS_ID_MMFR2_EL1),
|
||||
AARCH32(SYS_ID_MMFR3_EL1),
|
||||
|
||||
/* CRm=2 */
|
||||
AARCH32(SYS_ID_ISAR0_EL1),
|
||||
AARCH32(SYS_ID_ISAR1_EL1),
|
||||
AARCH32(SYS_ID_ISAR2_EL1),
|
||||
AARCH32(SYS_ID_ISAR3_EL1),
|
||||
AARCH32(SYS_ID_ISAR4_EL1),
|
||||
AARCH32(SYS_ID_ISAR5_EL1),
|
||||
AARCH32(SYS_ID_MMFR4_EL1),
|
||||
AARCH32(SYS_ID_ISAR6_EL1),
|
||||
|
||||
/* CRm=3 */
|
||||
AARCH32(SYS_MVFR0_EL1),
|
||||
AARCH32(SYS_MVFR1_EL1),
|
||||
AARCH32(SYS_MVFR2_EL1),
|
||||
AARCH32(SYS_ID_PFR2_EL1),
|
||||
AARCH32(SYS_ID_DFR1_EL1),
|
||||
AARCH32(SYS_ID_MMFR5_EL1),
|
||||
|
||||
/* AArch64 ID registers */
|
||||
/* CRm=4 */
|
||||
AARCH64(SYS_ID_AA64PFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64PFR1_EL1),
|
||||
AARCH64(SYS_ID_AA64ZFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64DFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64DFR1_EL1),
|
||||
AARCH64(SYS_ID_AA64AFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64AFR1_EL1),
|
||||
AARCH64(SYS_ID_AA64ISAR0_EL1),
|
||||
AARCH64(SYS_ID_AA64ISAR1_EL1),
|
||||
AARCH64(SYS_ID_AA64MMFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64MMFR1_EL1),
|
||||
AARCH64(SYS_ID_AA64MMFR2_EL1),
|
||||
|
||||
/* Scalable Vector Registers are restricted. */
|
||||
|
||||
RAZ_WI(SYS_ERRIDR_EL1),
|
||||
RAZ_WI(SYS_ERRSELR_EL1),
|
||||
RAZ_WI(SYS_ERXFR_EL1),
|
||||
RAZ_WI(SYS_ERXCTLR_EL1),
|
||||
RAZ_WI(SYS_ERXSTATUS_EL1),
|
||||
RAZ_WI(SYS_ERXADDR_EL1),
|
||||
RAZ_WI(SYS_ERXMISC0_EL1),
|
||||
RAZ_WI(SYS_ERXMISC1_EL1),
|
||||
|
||||
/* Performance Monitoring Registers are restricted. */
|
||||
|
||||
/* Limited Ordering Regions Registers are restricted. */
|
||||
|
||||
HOST_HANDLED(SYS_ICC_SGI1R_EL1),
|
||||
HOST_HANDLED(SYS_ICC_ASGI1R_EL1),
|
||||
HOST_HANDLED(SYS_ICC_SGI0R_EL1),
|
||||
{ SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, },
|
||||
|
||||
HOST_HANDLED(SYS_CCSIDR_EL1),
|
||||
HOST_HANDLED(SYS_CLIDR_EL1),
|
||||
HOST_HANDLED(SYS_CSSELR_EL1),
|
||||
HOST_HANDLED(SYS_CTR_EL0),
|
||||
|
||||
/* Performance Monitoring Registers are restricted. */
|
||||
|
||||
/* Activity Monitoring Registers are restricted. */
|
||||
|
||||
HOST_HANDLED(SYS_CNTP_TVAL_EL0),
|
||||
HOST_HANDLED(SYS_CNTP_CTL_EL0),
|
||||
HOST_HANDLED(SYS_CNTP_CVAL_EL0),
|
||||
|
||||
/* Performance Monitoring Registers are restricted. */
|
||||
};
|
||||
|
||||
/*
|
||||
* Checks that the sysreg table is unique and in-order.
|
||||
*
|
||||
* Returns 0 if the table is consistent, or 1 otherwise.
|
||||
*/
|
||||
int kvm_check_pvm_sysreg_table(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 1; i < ARRAY_SIZE(pvm_sys_reg_descs); i++) {
|
||||
if (cmp_sys_reg(&pvm_sys_reg_descs[i-1], &pvm_sys_reg_descs[i]) >= 0)
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handler for protected VM MSR, MRS or System instruction execution.
|
||||
*
|
||||
* Returns true if the hypervisor has handled the exit, and control should go
|
||||
* back to the guest, or false if it hasn't, to be handled by the host.
|
||||
*/
|
||||
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
const struct sys_reg_desc *r;
|
||||
struct sys_reg_params params;
|
||||
unsigned long esr = kvm_vcpu_get_esr(vcpu);
|
||||
int Rt = kvm_vcpu_sys_get_rt(vcpu);
|
||||
|
||||
params = esr_sys64_to_params(esr);
|
||||
params.regval = vcpu_get_reg(vcpu, Rt);
|
||||
|
||||
r = find_reg(¶ms, pvm_sys_reg_descs, ARRAY_SIZE(pvm_sys_reg_descs));
|
||||
|
||||
/* Undefined (RESTRICTED). */
|
||||
if (r == NULL) {
|
||||
inject_undef64(vcpu);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Handled by the host (HOST_HANDLED) */
|
||||
if (r->access == NULL)
|
||||
return false;
|
||||
|
||||
/* Handled by hyp: skip instruction if instructed to do so. */
|
||||
if (r->access(vcpu, ¶ms, r))
|
||||
__kvm_skip_instr(vcpu);
|
||||
|
||||
if (!params.is_write)
|
||||
vcpu_set_reg(vcpu, Rt, params.regval);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler for protected VM restricted exceptions.
|
||||
*
|
||||
* Inject an undefined exception into the guest and return true to indicate that
|
||||
* the hypervisor has handled the exit, and control should go back to the guest.
|
||||
*/
|
||||
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
inject_undef64(vcpu);
|
||||
return true;
|
||||
}
|
@ -695,9 +695,7 @@ static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
|
||||
goto spurious;
|
||||
|
||||
lr_val &= ~ICH_LR_STATE;
|
||||
/* No active state for LPIs */
|
||||
if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI)
|
||||
lr_val |= ICH_LR_ACTIVE_BIT;
|
||||
lr_val |= ICH_LR_ACTIVE_BIT;
|
||||
__gic_v3_set_lr(lr_val, lr);
|
||||
__vgic_v3_set_active_priority(lr_prio, vmcr, grp);
|
||||
vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
|
||||
@ -764,20 +762,18 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
|
||||
/* Drop priority in any case */
|
||||
act_prio = __vgic_v3_clear_highest_active_priority();
|
||||
|
||||
/* If EOIing an LPI, no deactivate to be performed */
|
||||
if (vid >= VGIC_MIN_LPI)
|
||||
return;
|
||||
|
||||
/* EOImode == 1, nothing to be done here */
|
||||
if (vmcr & ICH_VMCR_EOIM_MASK)
|
||||
return;
|
||||
|
||||
lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
|
||||
if (lr == -1) {
|
||||
__vgic_v3_bump_eoicount();
|
||||
/* Do not bump EOIcount for LPIs that aren't in the LRs */
|
||||
if (!(vid >= VGIC_MIN_LPI))
|
||||
__vgic_v3_bump_eoicount();
|
||||
return;
|
||||
}
|
||||
|
||||
/* EOImode == 1 and not an LPI, nothing to be done here */
|
||||
if ((vmcr & ICH_VMCR_EOIM_MASK) && !(vid >= VGIC_MIN_LPI))
|
||||
return;
|
||||
|
||||
lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
|
||||
|
||||
/* If priorities or group do not match, the guest has fscked-up. */
|
||||
@ -987,8 +983,6 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
|
||||
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
|
||||
/* IDbits */
|
||||
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
|
||||
/* SEIS */
|
||||
val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT;
|
||||
/* A3V */
|
||||
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
|
||||
/* EOImode */
|
||||
|
@ -96,6 +96,22 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
|
||||
__deactivate_traps_common(vcpu);
|
||||
}
|
||||
|
||||
static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
|
||||
[ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
|
||||
[ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
|
||||
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
|
||||
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
|
||||
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
|
||||
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
|
||||
};
|
||||
|
||||
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return hyp_exit_handlers;
|
||||
}
|
||||
|
||||
/* Switch to the guest for VHE systems running in EL2 */
|
||||
static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -512,7 +512,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pgt = kzalloc(sizeof(*pgt), GFP_KERNEL);
|
||||
pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT);
|
||||
if (!pgt)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -978,7 +978,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
|
||||
if (!vcpu->kvm->arch.pmu_filter) {
|
||||
vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL);
|
||||
vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
|
||||
if (!vcpu->kvm->arch.pmu_filter) {
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
return -ENOMEM;
|
||||
|
@ -106,7 +106,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
|
||||
vl > SVE_VL_ARCH_MAX))
|
||||
return -EIO;
|
||||
|
||||
buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL);
|
||||
buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL_ACCOUNT);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -1064,7 +1064,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_desc const *r, bool raz)
|
||||
{
|
||||
u32 id = reg_to_encoding(r);
|
||||
u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
|
||||
u64 val;
|
||||
|
||||
if (raz)
|
||||
return 0;
|
||||
|
||||
val = read_sanitised_ftr_reg(id);
|
||||
|
||||
switch (id) {
|
||||
case SYS_ID_AA64PFR0_EL1:
|
||||
@ -1075,16 +1080,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
|
||||
if (irqchip_in_kernel(vcpu->kvm) &&
|
||||
vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1);
|
||||
}
|
||||
break;
|
||||
case SYS_ID_AA64PFR1_EL1:
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
|
||||
if (kvm_has_mte(vcpu->kvm)) {
|
||||
u64 pfr, mte;
|
||||
|
||||
pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
|
||||
mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte);
|
||||
}
|
||||
if (!kvm_has_mte(vcpu->kvm))
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
|
||||
break;
|
||||
case SYS_ID_AA64ISAR1_EL1:
|
||||
if (!vcpu_has_ptrauth(vcpu))
|
||||
@ -1268,18 +1272,21 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
return __set_id_reg(vcpu, rd, uaddr, raz);
|
||||
}
|
||||
|
||||
static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
{
|
||||
return __get_id_reg(vcpu, rd, uaddr, true);
|
||||
}
|
||||
|
||||
static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
{
|
||||
return __set_id_reg(vcpu, rd, uaddr, true);
|
||||
}
|
||||
|
||||
static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
{
|
||||
const u64 id = sys_reg_to_index(rd);
|
||||
const u64 val = 0;
|
||||
|
||||
return reg_to_user(uaddr, &val, id);
|
||||
}
|
||||
|
||||
static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
{
|
||||
@ -1388,7 +1395,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
|
||||
#define ID_UNALLOCATED(crm, op2) { \
|
||||
Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
|
||||
.access = access_raz_id_reg, \
|
||||
.get_user = get_raz_id_reg, \
|
||||
.get_user = get_raz_reg, \
|
||||
.set_user = set_raz_id_reg, \
|
||||
}
|
||||
|
||||
@ -1400,7 +1407,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
|
||||
#define ID_HIDDEN(name) { \
|
||||
SYS_DESC(SYS_##name), \
|
||||
.access = access_raz_id_reg, \
|
||||
.get_user = get_raz_id_reg, \
|
||||
.get_user = get_raz_reg, \
|
||||
.set_user = set_raz_id_reg, \
|
||||
}
|
||||
|
||||
@ -1642,7 +1649,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
* previously (and pointlessly) advertised in the past...
|
||||
*/
|
||||
{ PMU_SYS_REG(SYS_PMSWINC_EL0),
|
||||
.get_user = get_raz_id_reg, .set_user = set_wi_reg,
|
||||
.get_user = get_raz_reg, .set_user = set_wi_reg,
|
||||
.access = access_pmswinc, .reset = NULL },
|
||||
{ PMU_SYS_REG(SYS_PMSELR_EL0),
|
||||
.access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
|
||||
|
@ -134,7 +134,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
|
||||
struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
|
||||
int i;
|
||||
|
||||
dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL);
|
||||
dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
|
||||
if (!dist->spis)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -139,7 +139,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm)
|
||||
u32 nr = dist->nr_spis;
|
||||
int i, ret;
|
||||
|
||||
entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
|
||||
entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL_ACCOUNT);
|
||||
if (!entries)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -48,7 +48,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
|
||||
if (irq)
|
||||
return irq;
|
||||
|
||||
irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
|
||||
irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
|
||||
if (!irq)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@ -332,7 +332,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
|
||||
* we must be careful not to overrun the array.
|
||||
*/
|
||||
irq_count = READ_ONCE(dist->lpi_list_count);
|
||||
intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL);
|
||||
intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
|
||||
if (!intids)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -985,7 +985,7 @@ static int vgic_its_alloc_collection(struct vgic_its *its,
|
||||
if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
|
||||
return E_ITS_MAPC_COLLECTION_OOR;
|
||||
|
||||
collection = kzalloc(sizeof(*collection), GFP_KERNEL);
|
||||
collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT);
|
||||
if (!collection)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1029,7 +1029,7 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
|
||||
{
|
||||
struct its_ite *ite;
|
||||
|
||||
ite = kzalloc(sizeof(*ite), GFP_KERNEL);
|
||||
ite = kzalloc(sizeof(*ite), GFP_KERNEL_ACCOUNT);
|
||||
if (!ite)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@ -1150,7 +1150,7 @@ static struct its_device *vgic_its_alloc_device(struct vgic_its *its,
|
||||
{
|
||||
struct its_device *device;
|
||||
|
||||
device = kzalloc(sizeof(*device), GFP_KERNEL);
|
||||
device = kzalloc(sizeof(*device), GFP_KERNEL_ACCOUNT);
|
||||
if (!device)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@ -1847,7 +1847,7 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm)
|
||||
struct vgic_translation_cache_entry *cte;
|
||||
|
||||
/* An allocation failure is not fatal */
|
||||
cte = kzalloc(sizeof(*cte), GFP_KERNEL);
|
||||
cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT);
|
||||
if (WARN_ON(!cte))
|
||||
break;
|
||||
|
||||
@ -1888,7 +1888,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
|
||||
if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
|
||||
return -ENODEV;
|
||||
|
||||
its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL);
|
||||
its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL_ACCOUNT);
|
||||
if (!its)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -2710,8 +2710,8 @@ static int vgic_its_set_attr(struct kvm_device *dev,
|
||||
if (copy_from_user(&addr, uaddr, sizeof(addr)))
|
||||
return -EFAULT;
|
||||
|
||||
ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base,
|
||||
addr, SZ_64K);
|
||||
ret = vgic_check_iorange(dev->kvm, its->vgic_its_base,
|
||||
addr, SZ_64K, KVM_VGIC_V3_ITS_SIZE);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -14,17 +14,21 @@
|
||||
|
||||
/* common helpers */
|
||||
|
||||
int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
|
||||
phys_addr_t addr, phys_addr_t alignment)
|
||||
int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
|
||||
phys_addr_t addr, phys_addr_t alignment,
|
||||
phys_addr_t size)
|
||||
{
|
||||
if (addr & ~kvm_phys_mask(kvm))
|
||||
return -E2BIG;
|
||||
if (!IS_VGIC_ADDR_UNDEF(ioaddr))
|
||||
return -EEXIST;
|
||||
|
||||
if (!IS_ALIGNED(addr, alignment))
|
||||
if (!IS_ALIGNED(addr, alignment) || !IS_ALIGNED(size, alignment))
|
||||
return -EINVAL;
|
||||
|
||||
if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
|
||||
return -EEXIST;
|
||||
if (addr + size < addr)
|
||||
return -EINVAL;
|
||||
|
||||
if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm))
|
||||
return -E2BIG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -57,7 +61,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
|
||||
{
|
||||
int r = 0;
|
||||
struct vgic_dist *vgic = &kvm->arch.vgic;
|
||||
phys_addr_t *addr_ptr, alignment;
|
||||
phys_addr_t *addr_ptr, alignment, size;
|
||||
u64 undef_value = VGIC_ADDR_UNDEF;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
@ -66,16 +70,19 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
|
||||
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
addr_ptr = &vgic->vgic_dist_base;
|
||||
alignment = SZ_4K;
|
||||
size = KVM_VGIC_V2_DIST_SIZE;
|
||||
break;
|
||||
case KVM_VGIC_V2_ADDR_TYPE_CPU:
|
||||
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
addr_ptr = &vgic->vgic_cpu_base;
|
||||
alignment = SZ_4K;
|
||||
size = KVM_VGIC_V2_CPU_SIZE;
|
||||
break;
|
||||
case KVM_VGIC_V3_ADDR_TYPE_DIST:
|
||||
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
|
||||
addr_ptr = &vgic->vgic_dist_base;
|
||||
alignment = SZ_64K;
|
||||
size = KVM_VGIC_V3_DIST_SIZE;
|
||||
break;
|
||||
case KVM_VGIC_V3_ADDR_TYPE_REDIST: {
|
||||
struct vgic_redist_region *rdreg;
|
||||
@ -140,7 +147,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
|
||||
goto out;
|
||||
|
||||
if (write) {
|
||||
r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment);
|
||||
r = vgic_check_iorange(kvm, *addr_ptr, *addr, alignment, size);
|
||||
if (!r)
|
||||
*addr_ptr = *addr;
|
||||
} else {
|
||||
|
@ -796,7 +796,9 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
|
||||
struct vgic_dist *d = &kvm->arch.vgic;
|
||||
struct vgic_redist_region *rdreg;
|
||||
struct list_head *rd_regions = &d->rd_regions;
|
||||
size_t size = count * KVM_VGIC_V3_REDIST_SIZE;
|
||||
int nr_vcpus = atomic_read(&kvm->online_vcpus);
|
||||
size_t size = count ? count * KVM_VGIC_V3_REDIST_SIZE
|
||||
: nr_vcpus * KVM_VGIC_V3_REDIST_SIZE;
|
||||
int ret;
|
||||
|
||||
/* cross the end of memory ? */
|
||||
@ -834,13 +836,13 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
|
||||
if (vgic_v3_rdist_overlap(kvm, base, size))
|
||||
return -EINVAL;
|
||||
|
||||
rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL);
|
||||
rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL_ACCOUNT);
|
||||
if (!rdreg)
|
||||
return -ENOMEM;
|
||||
|
||||
rdreg->base = VGIC_ADDR_UNDEF;
|
||||
|
||||
ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K);
|
||||
ret = vgic_check_iorange(kvm, rdreg->base, base, SZ_64K, size);
|
||||
if (ret)
|
||||
goto free;
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
static bool group0_trap;
|
||||
static bool group1_trap;
|
||||
static bool common_trap;
|
||||
static bool dir_trap;
|
||||
static bool gicv4_enable;
|
||||
|
||||
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
|
||||
@ -296,6 +297,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
|
||||
vgic_v3->vgic_hcr |= ICH_HCR_TALL1;
|
||||
if (common_trap)
|
||||
vgic_v3->vgic_hcr |= ICH_HCR_TC;
|
||||
if (dir_trap)
|
||||
vgic_v3->vgic_hcr |= ICH_HCR_TDIR;
|
||||
}
|
||||
|
||||
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
|
||||
@ -483,8 +486,10 @@ bool vgic_v3_check_base(struct kvm *kvm)
|
||||
return false;
|
||||
|
||||
list_for_each_entry(rdreg, &d->rd_regions, list) {
|
||||
if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) <
|
||||
rdreg->base)
|
||||
size_t sz = vgic_v3_rd_region_size(kvm, rdreg);
|
||||
|
||||
if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF,
|
||||
rdreg->base, SZ_64K, sz))
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -671,11 +676,23 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
|
||||
group1_trap = true;
|
||||
}
|
||||
|
||||
if (group0_trap || group1_trap || common_trap) {
|
||||
kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n",
|
||||
if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) {
|
||||
kvm_info("GICv3 with locally generated SEI\n");
|
||||
|
||||
group0_trap = true;
|
||||
group1_trap = true;
|
||||
if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
|
||||
dir_trap = true;
|
||||
else
|
||||
common_trap = true;
|
||||
}
|
||||
|
||||
if (group0_trap || group1_trap || common_trap | dir_trap) {
|
||||
kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n",
|
||||
group0_trap ? "G0" : "",
|
||||
group1_trap ? "G1" : "",
|
||||
common_trap ? "C" : "");
|
||||
common_trap ? "C" : "",
|
||||
dir_trap ? "D" : "");
|
||||
static_branch_enable(&vgic_v3_cpuif_trap);
|
||||
}
|
||||
|
||||
|
@ -246,7 +246,7 @@ int vgic_v4_init(struct kvm *kvm)
|
||||
nr_vcpus = atomic_read(&kvm->online_vcpus);
|
||||
|
||||
dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes),
|
||||
GFP_KERNEL);
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!dist->its_vm.vpes)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -172,8 +172,9 @@ void vgic_kick_vcpus(struct kvm *kvm);
|
||||
void vgic_irq_handle_resampling(struct vgic_irq *irq,
|
||||
bool lr_deactivated, bool lr_pending);
|
||||
|
||||
int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
|
||||
phys_addr_t addr, phys_addr_t alignment);
|
||||
int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
|
||||
phys_addr_t addr, phys_addr_t alignment,
|
||||
phys_addr_t size);
|
||||
|
||||
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
|
||||
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
|
||||
|
@ -1073,7 +1073,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
r = KVM_MAX_VCPUS;
|
||||
break;
|
||||
case KVM_CAP_MAX_VCPU_ID:
|
||||
r = KVM_MAX_VCPU_ID;
|
||||
r = KVM_MAX_VCPU_IDS;
|
||||
break;
|
||||
case KVM_CAP_MIPS_FPU:
|
||||
/* We don't handle systems with inconsistent cpu_has_fpu */
|
||||
|
@ -434,7 +434,7 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
|
||||
#define SPLIT_HACK_OFFS 0xfb000000
|
||||
|
||||
/*
|
||||
* This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
|
||||
* This packs a VCPU ID from the [0..KVM_MAX_VCPU_IDS) space down to the
|
||||
* [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
|
||||
* (but not its actual threading mode, which is not available) to avoid
|
||||
* collisions.
|
||||
|
@ -33,11 +33,11 @@
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
|
||||
#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
|
||||
#define KVM_MAX_VCPU_IDS (MAX_SMT_THREADS * KVM_MAX_VCORES)
|
||||
#define KVM_MAX_NESTED_GUESTS KVMPPC_NR_LPIDS
|
||||
|
||||
#else
|
||||
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
|
||||
#define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
|
||||
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
|
||||
|
@ -1928,7 +1928,7 @@ int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
|
||||
|
||||
pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
|
||||
|
||||
if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
|
||||
if (!nr_servers || nr_servers > KVM_MAX_VCPU_IDS)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&xive->lock);
|
||||
|
@ -649,7 +649,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
r = KVM_MAX_VCPUS;
|
||||
break;
|
||||
case KVM_CAP_MAX_VCPU_ID:
|
||||
r = KVM_MAX_VCPU_ID;
|
||||
r = KVM_MAX_VCPU_IDS;
|
||||
break;
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
case KVM_CAP_PPC_GET_SMMU_INFO:
|
||||
|
@ -566,3 +566,5 @@ menu "Power management options"
|
||||
source "kernel/power/Kconfig"
|
||||
|
||||
endmenu
|
||||
|
||||
source "arch/riscv/kvm/Kconfig"
|
||||
|
@ -100,6 +100,7 @@ endif
|
||||
head-y := arch/riscv/kernel/head.o
|
||||
|
||||
core-$(CONFIG_RISCV_ERRATA_ALTERNATIVE) += arch/riscv/errata/
|
||||
core-$(CONFIG_KVM) += arch/riscv/kvm/
|
||||
|
||||
libs-y += arch/riscv/lib/
|
||||
libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
|
||||
|
@ -58,22 +58,32 @@
|
||||
|
||||
/* Interrupt causes (minus the high bit) */
|
||||
#define IRQ_S_SOFT 1
|
||||
#define IRQ_VS_SOFT 2
|
||||
#define IRQ_M_SOFT 3
|
||||
#define IRQ_S_TIMER 5
|
||||
#define IRQ_VS_TIMER 6
|
||||
#define IRQ_M_TIMER 7
|
||||
#define IRQ_S_EXT 9
|
||||
#define IRQ_VS_EXT 10
|
||||
#define IRQ_M_EXT 11
|
||||
|
||||
/* Exception causes */
|
||||
#define EXC_INST_MISALIGNED 0
|
||||
#define EXC_INST_ACCESS 1
|
||||
#define EXC_INST_ILLEGAL 2
|
||||
#define EXC_BREAKPOINT 3
|
||||
#define EXC_LOAD_ACCESS 5
|
||||
#define EXC_STORE_ACCESS 7
|
||||
#define EXC_SYSCALL 8
|
||||
#define EXC_HYPERVISOR_SYSCALL 9
|
||||
#define EXC_SUPERVISOR_SYSCALL 10
|
||||
#define EXC_INST_PAGE_FAULT 12
|
||||
#define EXC_LOAD_PAGE_FAULT 13
|
||||
#define EXC_STORE_PAGE_FAULT 15
|
||||
#define EXC_INST_GUEST_PAGE_FAULT 20
|
||||
#define EXC_LOAD_GUEST_PAGE_FAULT 21
|
||||
#define EXC_VIRTUAL_INST_FAULT 22
|
||||
#define EXC_STORE_GUEST_PAGE_FAULT 23
|
||||
|
||||
/* PMP configuration */
|
||||
#define PMP_R 0x01
|
||||
@ -85,6 +95,58 @@
|
||||
#define PMP_A_NAPOT 0x18
|
||||
#define PMP_L 0x80
|
||||
|
||||
/* HSTATUS flags */
|
||||
#ifdef CONFIG_64BIT
|
||||
#define HSTATUS_VSXL _AC(0x300000000, UL)
|
||||
#define HSTATUS_VSXL_SHIFT 32
|
||||
#endif
|
||||
#define HSTATUS_VTSR _AC(0x00400000, UL)
|
||||
#define HSTATUS_VTW _AC(0x00200000, UL)
|
||||
#define HSTATUS_VTVM _AC(0x00100000, UL)
|
||||
#define HSTATUS_VGEIN _AC(0x0003f000, UL)
|
||||
#define HSTATUS_VGEIN_SHIFT 12
|
||||
#define HSTATUS_HU _AC(0x00000200, UL)
|
||||
#define HSTATUS_SPVP _AC(0x00000100, UL)
|
||||
#define HSTATUS_SPV _AC(0x00000080, UL)
|
||||
#define HSTATUS_GVA _AC(0x00000040, UL)
|
||||
#define HSTATUS_VSBE _AC(0x00000020, UL)
|
||||
|
||||
/* HGATP flags */
|
||||
#define HGATP_MODE_OFF _AC(0, UL)
|
||||
#define HGATP_MODE_SV32X4 _AC(1, UL)
|
||||
#define HGATP_MODE_SV39X4 _AC(8, UL)
|
||||
#define HGATP_MODE_SV48X4 _AC(9, UL)
|
||||
|
||||
#define HGATP32_MODE_SHIFT 31
|
||||
#define HGATP32_VMID_SHIFT 22
|
||||
#define HGATP32_VMID_MASK _AC(0x1FC00000, UL)
|
||||
#define HGATP32_PPN _AC(0x003FFFFF, UL)
|
||||
|
||||
#define HGATP64_MODE_SHIFT 60
|
||||
#define HGATP64_VMID_SHIFT 44
|
||||
#define HGATP64_VMID_MASK _AC(0x03FFF00000000000, UL)
|
||||
#define HGATP64_PPN _AC(0x00000FFFFFFFFFFF, UL)
|
||||
|
||||
#define HGATP_PAGE_SHIFT 12
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define HGATP_PPN HGATP64_PPN
|
||||
#define HGATP_VMID_SHIFT HGATP64_VMID_SHIFT
|
||||
#define HGATP_VMID_MASK HGATP64_VMID_MASK
|
||||
#define HGATP_MODE_SHIFT HGATP64_MODE_SHIFT
|
||||
#else
|
||||
#define HGATP_PPN HGATP32_PPN
|
||||
#define HGATP_VMID_SHIFT HGATP32_VMID_SHIFT
|
||||
#define HGATP_VMID_MASK HGATP32_VMID_MASK
|
||||
#define HGATP_MODE_SHIFT HGATP32_MODE_SHIFT
|
||||
#endif
|
||||
|
||||
/* VSIP & HVIP relation */
|
||||
#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
|
||||
#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
|
||||
(_AC(1, UL) << IRQ_S_TIMER) | \
|
||||
(_AC(1, UL) << IRQ_S_EXT))
|
||||
|
||||
/* symbolic CSR names: */
|
||||
#define CSR_CYCLE 0xc00
|
||||
#define CSR_TIME 0xc01
|
||||
@ -104,6 +166,31 @@
|
||||
#define CSR_SIP 0x144
|
||||
#define CSR_SATP 0x180
|
||||
|
||||
#define CSR_VSSTATUS 0x200
|
||||
#define CSR_VSIE 0x204
|
||||
#define CSR_VSTVEC 0x205
|
||||
#define CSR_VSSCRATCH 0x240
|
||||
#define CSR_VSEPC 0x241
|
||||
#define CSR_VSCAUSE 0x242
|
||||
#define CSR_VSTVAL 0x243
|
||||
#define CSR_VSIP 0x244
|
||||
#define CSR_VSATP 0x280
|
||||
|
||||
#define CSR_HSTATUS 0x600
|
||||
#define CSR_HEDELEG 0x602
|
||||
#define CSR_HIDELEG 0x603
|
||||
#define CSR_HIE 0x604
|
||||
#define CSR_HTIMEDELTA 0x605
|
||||
#define CSR_HCOUNTEREN 0x606
|
||||
#define CSR_HGEIE 0x607
|
||||
#define CSR_HTIMEDELTAH 0x615
|
||||
#define CSR_HTVAL 0x643
|
||||
#define CSR_HIP 0x644
|
||||
#define CSR_HVIP 0x645
|
||||
#define CSR_HTINST 0x64a
|
||||
#define CSR_HGATP 0x680
|
||||
#define CSR_HGEIP 0xe12
|
||||
|
||||
#define CSR_MSTATUS 0x300
|
||||
#define CSR_MISA 0x301
|
||||
#define CSR_MIE 0x304
|
||||
|
264
arch/riscv/include/asm/kvm_host.h
Normal file
264
arch/riscv/include/asm/kvm_host.h
Normal file
@ -0,0 +1,264 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#ifndef __RISCV_KVM_HOST_H__
|
||||
#define __RISCV_KVM_HOST_H__
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_types.h>
|
||||
#include <asm/kvm_vcpu_fp.h>
|
||||
#include <asm/kvm_vcpu_timer.h>
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define KVM_MAX_VCPUS (1U << 16)
|
||||
#else
|
||||
#define KVM_MAX_VCPUS (1U << 9)
|
||||
#endif
|
||||
|
||||
#define KVM_HALT_POLL_NS_DEFAULT 500000
|
||||
|
||||
#define KVM_VCPU_MAX_FEATURES 0
|
||||
|
||||
#define KVM_REQ_SLEEP \
|
||||
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
|
||||
#define KVM_REQ_UPDATE_HGATP KVM_ARCH_REQ(2)
|
||||
|
||||
struct kvm_vm_stat {
|
||||
struct kvm_vm_stat_generic generic;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_stat {
|
||||
struct kvm_vcpu_stat_generic generic;
|
||||
u64 ecall_exit_stat;
|
||||
u64 wfi_exit_stat;
|
||||
u64 mmio_exit_user;
|
||||
u64 mmio_exit_kernel;
|
||||
u64 exits;
|
||||
};
|
||||
|
||||
struct kvm_arch_memory_slot {
|
||||
};
|
||||
|
||||
struct kvm_vmid {
|
||||
/*
|
||||
* Writes to vmid_version and vmid happen with vmid_lock held
|
||||
* whereas reads happen without any lock held.
|
||||
*/
|
||||
unsigned long vmid_version;
|
||||
unsigned long vmid;
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
/* stage2 vmid */
|
||||
struct kvm_vmid vmid;
|
||||
|
||||
/* stage2 page table */
|
||||
pgd_t *pgd;
|
||||
phys_addr_t pgd_phys;
|
||||
|
||||
/* Guest Timer */
|
||||
struct kvm_guest_timer timer;
|
||||
};
|
||||
|
||||
struct kvm_mmio_decode {
|
||||
unsigned long insn;
|
||||
int insn_len;
|
||||
int len;
|
||||
int shift;
|
||||
int return_handled;
|
||||
};
|
||||
|
||||
struct kvm_sbi_context {
|
||||
int return_handled;
|
||||
};
|
||||
|
||||
#define KVM_MMU_PAGE_CACHE_NR_OBJS 32
|
||||
|
||||
struct kvm_mmu_page_cache {
|
||||
int nobjs;
|
||||
void *objects[KVM_MMU_PAGE_CACHE_NR_OBJS];
|
||||
};
|
||||
|
||||
struct kvm_cpu_trap {
|
||||
unsigned long sepc;
|
||||
unsigned long scause;
|
||||
unsigned long stval;
|
||||
unsigned long htval;
|
||||
unsigned long htinst;
|
||||
};
|
||||
|
||||
struct kvm_cpu_context {
|
||||
unsigned long zero;
|
||||
unsigned long ra;
|
||||
unsigned long sp;
|
||||
unsigned long gp;
|
||||
unsigned long tp;
|
||||
unsigned long t0;
|
||||
unsigned long t1;
|
||||
unsigned long t2;
|
||||
unsigned long s0;
|
||||
unsigned long s1;
|
||||
unsigned long a0;
|
||||
unsigned long a1;
|
||||
unsigned long a2;
|
||||
unsigned long a3;
|
||||
unsigned long a4;
|
||||
unsigned long a5;
|
||||
unsigned long a6;
|
||||
unsigned long a7;
|
||||
unsigned long s2;
|
||||
unsigned long s3;
|
||||
unsigned long s4;
|
||||
unsigned long s5;
|
||||
unsigned long s6;
|
||||
unsigned long s7;
|
||||
unsigned long s8;
|
||||
unsigned long s9;
|
||||
unsigned long s10;
|
||||
unsigned long s11;
|
||||
unsigned long t3;
|
||||
unsigned long t4;
|
||||
unsigned long t5;
|
||||
unsigned long t6;
|
||||
unsigned long sepc;
|
||||
unsigned long sstatus;
|
||||
unsigned long hstatus;
|
||||
union __riscv_fp_state fp;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_csr {
|
||||
unsigned long vsstatus;
|
||||
unsigned long vsie;
|
||||
unsigned long vstvec;
|
||||
unsigned long vsscratch;
|
||||
unsigned long vsepc;
|
||||
unsigned long vscause;
|
||||
unsigned long vstval;
|
||||
unsigned long hvip;
|
||||
unsigned long vsatp;
|
||||
unsigned long scounteren;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_arch {
|
||||
/* VCPU ran at least once */
|
||||
bool ran_atleast_once;
|
||||
|
||||
/* ISA feature bits (similar to MISA) */
|
||||
unsigned long isa;
|
||||
|
||||
/* SSCRATCH, STVEC, and SCOUNTEREN of Host */
|
||||
unsigned long host_sscratch;
|
||||
unsigned long host_stvec;
|
||||
unsigned long host_scounteren;
|
||||
|
||||
/* CPU context of Host */
|
||||
struct kvm_cpu_context host_context;
|
||||
|
||||
/* CPU context of Guest VCPU */
|
||||
struct kvm_cpu_context guest_context;
|
||||
|
||||
/* CPU CSR context of Guest VCPU */
|
||||
struct kvm_vcpu_csr guest_csr;
|
||||
|
||||
/* CPU context upon Guest VCPU reset */
|
||||
struct kvm_cpu_context guest_reset_context;
|
||||
|
||||
/* CPU CSR context upon Guest VCPU reset */
|
||||
struct kvm_vcpu_csr guest_reset_csr;
|
||||
|
||||
/*
|
||||
* VCPU interrupts
|
||||
*
|
||||
* We have a lockless approach for tracking pending VCPU interrupts
|
||||
* implemented using atomic bitops. The irqs_pending bitmap represent
|
||||
* pending interrupts whereas irqs_pending_mask represent bits changed
|
||||
* in irqs_pending. Our approach is modeled around multiple producer
|
||||
* and single consumer problem where the consumer is the VCPU itself.
|
||||
*/
|
||||
unsigned long irqs_pending;
|
||||
unsigned long irqs_pending_mask;
|
||||
|
||||
/* VCPU Timer */
|
||||
struct kvm_vcpu_timer timer;
|
||||
|
||||
/* MMIO instruction details */
|
||||
struct kvm_mmio_decode mmio_decode;
|
||||
|
||||
/* SBI context */
|
||||
struct kvm_sbi_context sbi_context;
|
||||
|
||||
/* Cache pages needed to program page tables with spinlock held */
|
||||
struct kvm_mmu_page_cache mmu_page_cache;
|
||||
|
||||
/* VCPU power-off state */
|
||||
bool power_off;
|
||||
|
||||
/* Don't run the VCPU (blocked) */
|
||||
bool pause;
|
||||
|
||||
/* SRCU lock index for in-kernel run loop */
|
||||
int srcu_idx;
|
||||
};
|
||||
|
||||
static inline void kvm_arch_hardware_unsetup(void) {}
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long gpa_divby_4,
|
||||
unsigned long vmid);
|
||||
void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
|
||||
void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa_divby_4);
|
||||
void __kvm_riscv_hfence_gvma_all(void);
|
||||
|
||||
int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
|
||||
struct kvm_memory_slot *memslot,
|
||||
gpa_t gpa, unsigned long hva, bool is_write);
|
||||
void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
|
||||
int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
|
||||
void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
|
||||
void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_stage2_mode_detect(void);
|
||||
unsigned long kvm_riscv_stage2_mode(void);
|
||||
|
||||
void kvm_riscv_stage2_vmid_detect(void);
|
||||
unsigned long kvm_riscv_stage2_vmid_bits(void);
|
||||
int kvm_riscv_stage2_vmid_init(struct kvm *kvm);
|
||||
bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid);
|
||||
void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu);
|
||||
|
||||
void __kvm_riscv_unpriv_trap(void);
|
||||
|
||||
unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
|
||||
bool read_insn,
|
||||
unsigned long guest_addr,
|
||||
struct kvm_cpu_trap *trap);
|
||||
void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
|
||||
struct kvm_cpu_trap *trap);
|
||||
int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
|
||||
int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_cpu_trap *trap);
|
||||
|
||||
void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
|
||||
|
||||
int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
|
||||
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
|
||||
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
|
||||
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask);
|
||||
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
|
||||
int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
|
||||
|
||||
#endif /* __RISCV_KVM_HOST_H__ */
|
7
arch/riscv/include/asm/kvm_types.h
Normal file
7
arch/riscv/include/asm/kvm_types.h
Normal file
@ -0,0 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_RISCV_KVM_TYPES_H
|
||||
#define _ASM_RISCV_KVM_TYPES_H
|
||||
|
||||
#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
|
||||
|
||||
#endif /* _ASM_RISCV_KVM_TYPES_H */
|
59
arch/riscv/include/asm/kvm_vcpu_fp.h
Normal file
59
arch/riscv/include/asm/kvm_vcpu_fp.h
Normal file
@ -0,0 +1,59 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2021 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Atish Patra <atish.patra@wdc.com>
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#ifndef __KVM_VCPU_RISCV_FP_H
|
||||
#define __KVM_VCPU_RISCV_FP_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct kvm_cpu_context;
|
||||
|
||||
#ifdef CONFIG_FPU
|
||||
void __kvm_riscv_fp_f_save(struct kvm_cpu_context *context);
|
||||
void __kvm_riscv_fp_f_restore(struct kvm_cpu_context *context);
|
||||
void __kvm_riscv_fp_d_save(struct kvm_cpu_context *context);
|
||||
void __kvm_riscv_fp_d_restore(struct kvm_cpu_context *context);
|
||||
|
||||
void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
|
||||
unsigned long isa);
|
||||
void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
|
||||
unsigned long isa);
|
||||
void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx);
|
||||
void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx);
|
||||
#else
|
||||
static inline void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
static inline void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
|
||||
unsigned long isa)
|
||||
{
|
||||
}
|
||||
static inline void kvm_riscv_vcpu_guest_fp_restore(
|
||||
struct kvm_cpu_context *cntx,
|
||||
unsigned long isa)
|
||||
{
|
||||
}
|
||||
static inline void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
|
||||
{
|
||||
}
|
||||
static inline void kvm_riscv_vcpu_host_fp_restore(
|
||||
struct kvm_cpu_context *cntx)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg,
|
||||
unsigned long rtype);
|
||||
int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg,
|
||||
unsigned long rtype);
|
||||
|
||||
#endif
|
44
arch/riscv/include/asm/kvm_vcpu_timer.h
Normal file
44
arch/riscv/include/asm/kvm_vcpu_timer.h
Normal file
@ -0,0 +1,44 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Atish Patra <atish.patra@wdc.com>
|
||||
*/
|
||||
|
||||
#ifndef __KVM_VCPU_RISCV_TIMER_H
|
||||
#define __KVM_VCPU_RISCV_TIMER_H
|
||||
|
||||
#include <linux/hrtimer.h>
|
||||
|
||||
struct kvm_guest_timer {
|
||||
/* Mult & Shift values to get nanoseconds from cycles */
|
||||
u32 nsec_mult;
|
||||
u32 nsec_shift;
|
||||
/* Time delta value */
|
||||
u64 time_delta;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_timer {
|
||||
/* Flag for whether init is done */
|
||||
bool init_done;
|
||||
/* Flag for whether timer event is configured */
|
||||
bool next_set;
|
||||
/* Next timer event cycles */
|
||||
u64 next_cycles;
|
||||
/* Underlying hrtimer instance */
|
||||
struct hrtimer hrt;
|
||||
};
|
||||
|
||||
int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles);
|
||||
int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg);
|
||||
int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg);
|
||||
int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu);
|
||||
int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
|
||||
int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu);
|
||||
int kvm_riscv_guest_timer_init(struct kvm *kvm);
|
||||
|
||||
#endif
|
128
arch/riscv/include/uapi/asm/kvm.h
Normal file
128
arch/riscv/include/uapi/asm/kvm.h
Normal file
@ -0,0 +1,128 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_KVM_RISCV_H
|
||||
#define __LINUX_KVM_RISCV_H
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
#define __KVM_HAVE_READONLY_MEM
|
||||
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
|
||||
#define KVM_INTERRUPT_SET -1U
|
||||
#define KVM_INTERRUPT_UNSET -2U
|
||||
|
||||
/* for KVM_GET_REGS and KVM_SET_REGS */
|
||||
struct kvm_regs {
|
||||
};
|
||||
|
||||
/* for KVM_GET_FPU and KVM_SET_FPU */
|
||||
struct kvm_fpu {
|
||||
};
|
||||
|
||||
/* KVM Debug exit structure */
|
||||
struct kvm_debug_exit_arch {
|
||||
};
|
||||
|
||||
/* for KVM_SET_GUEST_DEBUG */
|
||||
struct kvm_guest_debug_arch {
|
||||
};
|
||||
|
||||
/* definition of registers in kvm_run */
|
||||
struct kvm_sync_regs {
|
||||
};
|
||||
|
||||
/* for KVM_GET_SREGS and KVM_SET_SREGS */
|
||||
struct kvm_sregs {
|
||||
};
|
||||
|
||||
/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
|
||||
struct kvm_riscv_config {
|
||||
unsigned long isa;
|
||||
};
|
||||
|
||||
/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
|
||||
struct kvm_riscv_core {
|
||||
struct user_regs_struct regs;
|
||||
unsigned long mode;
|
||||
};
|
||||
|
||||
/* Possible privilege modes for kvm_riscv_core */
|
||||
#define KVM_RISCV_MODE_S 1
|
||||
#define KVM_RISCV_MODE_U 0
|
||||
|
||||
/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
|
||||
struct kvm_riscv_csr {
|
||||
unsigned long sstatus;
|
||||
unsigned long sie;
|
||||
unsigned long stvec;
|
||||
unsigned long sscratch;
|
||||
unsigned long sepc;
|
||||
unsigned long scause;
|
||||
unsigned long stval;
|
||||
unsigned long sip;
|
||||
unsigned long satp;
|
||||
unsigned long scounteren;
|
||||
};
|
||||
|
||||
/* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
|
||||
struct kvm_riscv_timer {
|
||||
__u64 frequency;
|
||||
__u64 time;
|
||||
__u64 compare;
|
||||
__u64 state;
|
||||
};
|
||||
|
||||
/* Possible states for kvm_riscv_timer */
|
||||
#define KVM_RISCV_TIMER_STATE_OFF 0
|
||||
#define KVM_RISCV_TIMER_STATE_ON 1
|
||||
|
||||
#define KVM_REG_SIZE(id) \
|
||||
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
|
||||
|
||||
/* If you need to interpret the index values, here is the key: */
|
||||
#define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000
|
||||
#define KVM_REG_RISCV_TYPE_SHIFT 24
|
||||
|
||||
/* Config registers are mapped as type 1 */
|
||||
#define KVM_REG_RISCV_CONFIG (0x01 << KVM_REG_RISCV_TYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_CONFIG_REG(name) \
|
||||
(offsetof(struct kvm_riscv_config, name) / sizeof(unsigned long))
|
||||
|
||||
/* Core registers are mapped as type 2 */
|
||||
#define KVM_REG_RISCV_CORE (0x02 << KVM_REG_RISCV_TYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_CORE_REG(name) \
|
||||
(offsetof(struct kvm_riscv_core, name) / sizeof(unsigned long))
|
||||
|
||||
/* Control and status registers are mapped as type 3 */
|
||||
#define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_CSR_REG(name) \
|
||||
(offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
|
||||
|
||||
/* Timer registers are mapped as type 4 */
|
||||
#define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_TIMER_REG(name) \
|
||||
(offsetof(struct kvm_riscv_timer, name) / sizeof(__u64))
|
||||
|
||||
/* F extension registers are mapped as type 5 */
|
||||
#define KVM_REG_RISCV_FP_F (0x05 << KVM_REG_RISCV_TYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_FP_F_REG(name) \
|
||||
(offsetof(struct __riscv_f_ext_state, name) / sizeof(__u32))
|
||||
|
||||
/* D extension registers are mapped as type 6 */
|
||||
#define KVM_REG_RISCV_FP_D (0x06 << KVM_REG_RISCV_TYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_FP_D_REG(name) \
|
||||
(offsetof(struct __riscv_d_ext_state, name) / sizeof(__u64))
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* __LINUX_KVM_RISCV_H */
|
@ -7,7 +7,9 @@
|
||||
#define GENERATING_ASM_OFFSETS
|
||||
|
||||
#include <linux/kbuild.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
@ -110,6 +112,160 @@ void asm_offsets(void)
|
||||
OFFSET(PT_BADADDR, pt_regs, badaddr);
|
||||
OFFSET(PT_CAUSE, pt_regs, cause);
|
||||
|
||||
OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero);
|
||||
OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra);
|
||||
OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp);
|
||||
OFFSET(KVM_ARCH_GUEST_GP, kvm_vcpu_arch, guest_context.gp);
|
||||
OFFSET(KVM_ARCH_GUEST_TP, kvm_vcpu_arch, guest_context.tp);
|
||||
OFFSET(KVM_ARCH_GUEST_T0, kvm_vcpu_arch, guest_context.t0);
|
||||
OFFSET(KVM_ARCH_GUEST_T1, kvm_vcpu_arch, guest_context.t1);
|
||||
OFFSET(KVM_ARCH_GUEST_T2, kvm_vcpu_arch, guest_context.t2);
|
||||
OFFSET(KVM_ARCH_GUEST_S0, kvm_vcpu_arch, guest_context.s0);
|
||||
OFFSET(KVM_ARCH_GUEST_S1, kvm_vcpu_arch, guest_context.s1);
|
||||
OFFSET(KVM_ARCH_GUEST_A0, kvm_vcpu_arch, guest_context.a0);
|
||||
OFFSET(KVM_ARCH_GUEST_A1, kvm_vcpu_arch, guest_context.a1);
|
||||
OFFSET(KVM_ARCH_GUEST_A2, kvm_vcpu_arch, guest_context.a2);
|
||||
OFFSET(KVM_ARCH_GUEST_A3, kvm_vcpu_arch, guest_context.a3);
|
||||
OFFSET(KVM_ARCH_GUEST_A4, kvm_vcpu_arch, guest_context.a4);
|
||||
OFFSET(KVM_ARCH_GUEST_A5, kvm_vcpu_arch, guest_context.a5);
|
||||
OFFSET(KVM_ARCH_GUEST_A6, kvm_vcpu_arch, guest_context.a6);
|
||||
OFFSET(KVM_ARCH_GUEST_A7, kvm_vcpu_arch, guest_context.a7);
|
||||
OFFSET(KVM_ARCH_GUEST_S2, kvm_vcpu_arch, guest_context.s2);
|
||||
OFFSET(KVM_ARCH_GUEST_S3, kvm_vcpu_arch, guest_context.s3);
|
||||
OFFSET(KVM_ARCH_GUEST_S4, kvm_vcpu_arch, guest_context.s4);
|
||||
OFFSET(KVM_ARCH_GUEST_S5, kvm_vcpu_arch, guest_context.s5);
|
||||
OFFSET(KVM_ARCH_GUEST_S6, kvm_vcpu_arch, guest_context.s6);
|
||||
OFFSET(KVM_ARCH_GUEST_S7, kvm_vcpu_arch, guest_context.s7);
|
||||
OFFSET(KVM_ARCH_GUEST_S8, kvm_vcpu_arch, guest_context.s8);
|
||||
OFFSET(KVM_ARCH_GUEST_S9, kvm_vcpu_arch, guest_context.s9);
|
||||
OFFSET(KVM_ARCH_GUEST_S10, kvm_vcpu_arch, guest_context.s10);
|
||||
OFFSET(KVM_ARCH_GUEST_S11, kvm_vcpu_arch, guest_context.s11);
|
||||
OFFSET(KVM_ARCH_GUEST_T3, kvm_vcpu_arch, guest_context.t3);
|
||||
OFFSET(KVM_ARCH_GUEST_T4, kvm_vcpu_arch, guest_context.t4);
|
||||
OFFSET(KVM_ARCH_GUEST_T5, kvm_vcpu_arch, guest_context.t5);
|
||||
OFFSET(KVM_ARCH_GUEST_T6, kvm_vcpu_arch, guest_context.t6);
|
||||
OFFSET(KVM_ARCH_GUEST_SEPC, kvm_vcpu_arch, guest_context.sepc);
|
||||
OFFSET(KVM_ARCH_GUEST_SSTATUS, kvm_vcpu_arch, guest_context.sstatus);
|
||||
OFFSET(KVM_ARCH_GUEST_HSTATUS, kvm_vcpu_arch, guest_context.hstatus);
|
||||
OFFSET(KVM_ARCH_GUEST_SCOUNTEREN, kvm_vcpu_arch, guest_csr.scounteren);
|
||||
|
||||
OFFSET(KVM_ARCH_HOST_ZERO, kvm_vcpu_arch, host_context.zero);
|
||||
OFFSET(KVM_ARCH_HOST_RA, kvm_vcpu_arch, host_context.ra);
|
||||
OFFSET(KVM_ARCH_HOST_SP, kvm_vcpu_arch, host_context.sp);
|
||||
OFFSET(KVM_ARCH_HOST_GP, kvm_vcpu_arch, host_context.gp);
|
||||
OFFSET(KVM_ARCH_HOST_TP, kvm_vcpu_arch, host_context.tp);
|
||||
OFFSET(KVM_ARCH_HOST_T0, kvm_vcpu_arch, host_context.t0);
|
||||
OFFSET(KVM_ARCH_HOST_T1, kvm_vcpu_arch, host_context.t1);
|
||||
OFFSET(KVM_ARCH_HOST_T2, kvm_vcpu_arch, host_context.t2);
|
||||
OFFSET(KVM_ARCH_HOST_S0, kvm_vcpu_arch, host_context.s0);
|
||||
OFFSET(KVM_ARCH_HOST_S1, kvm_vcpu_arch, host_context.s1);
|
||||
OFFSET(KVM_ARCH_HOST_A0, kvm_vcpu_arch, host_context.a0);
|
||||
OFFSET(KVM_ARCH_HOST_A1, kvm_vcpu_arch, host_context.a1);
|
||||
OFFSET(KVM_ARCH_HOST_A2, kvm_vcpu_arch, host_context.a2);
|
||||
OFFSET(KVM_ARCH_HOST_A3, kvm_vcpu_arch, host_context.a3);
|
||||
OFFSET(KVM_ARCH_HOST_A4, kvm_vcpu_arch, host_context.a4);
|
||||
OFFSET(KVM_ARCH_HOST_A5, kvm_vcpu_arch, host_context.a5);
|
||||
OFFSET(KVM_ARCH_HOST_A6, kvm_vcpu_arch, host_context.a6);
|
||||
OFFSET(KVM_ARCH_HOST_A7, kvm_vcpu_arch, host_context.a7);
|
||||
OFFSET(KVM_ARCH_HOST_S2, kvm_vcpu_arch, host_context.s2);
|
||||
OFFSET(KVM_ARCH_HOST_S3, kvm_vcpu_arch, host_context.s3);
|
||||
OFFSET(KVM_ARCH_HOST_S4, kvm_vcpu_arch, host_context.s4);
|
||||
OFFSET(KVM_ARCH_HOST_S5, kvm_vcpu_arch, host_context.s5);
|
||||
OFFSET(KVM_ARCH_HOST_S6, kvm_vcpu_arch, host_context.s6);
|
||||
OFFSET(KVM_ARCH_HOST_S7, kvm_vcpu_arch, host_context.s7);
|
||||
OFFSET(KVM_ARCH_HOST_S8, kvm_vcpu_arch, host_context.s8);
|
||||
OFFSET(KVM_ARCH_HOST_S9, kvm_vcpu_arch, host_context.s9);
|
||||
OFFSET(KVM_ARCH_HOST_S10, kvm_vcpu_arch, host_context.s10);
|
||||
OFFSET(KVM_ARCH_HOST_S11, kvm_vcpu_arch, host_context.s11);
|
||||
OFFSET(KVM_ARCH_HOST_T3, kvm_vcpu_arch, host_context.t3);
|
||||
OFFSET(KVM_ARCH_HOST_T4, kvm_vcpu_arch, host_context.t4);
|
||||
OFFSET(KVM_ARCH_HOST_T5, kvm_vcpu_arch, host_context.t5);
|
||||
OFFSET(KVM_ARCH_HOST_T6, kvm_vcpu_arch, host_context.t6);
|
||||
OFFSET(KVM_ARCH_HOST_SEPC, kvm_vcpu_arch, host_context.sepc);
|
||||
OFFSET(KVM_ARCH_HOST_SSTATUS, kvm_vcpu_arch, host_context.sstatus);
|
||||
OFFSET(KVM_ARCH_HOST_HSTATUS, kvm_vcpu_arch, host_context.hstatus);
|
||||
OFFSET(KVM_ARCH_HOST_SSCRATCH, kvm_vcpu_arch, host_sscratch);
|
||||
OFFSET(KVM_ARCH_HOST_STVEC, kvm_vcpu_arch, host_stvec);
|
||||
OFFSET(KVM_ARCH_HOST_SCOUNTEREN, kvm_vcpu_arch, host_scounteren);
|
||||
|
||||
OFFSET(KVM_ARCH_TRAP_SEPC, kvm_cpu_trap, sepc);
|
||||
OFFSET(KVM_ARCH_TRAP_SCAUSE, kvm_cpu_trap, scause);
|
||||
OFFSET(KVM_ARCH_TRAP_STVAL, kvm_cpu_trap, stval);
|
||||
OFFSET(KVM_ARCH_TRAP_HTVAL, kvm_cpu_trap, htval);
|
||||
OFFSET(KVM_ARCH_TRAP_HTINST, kvm_cpu_trap, htinst);
|
||||
|
||||
/* F extension */
|
||||
|
||||
OFFSET(KVM_ARCH_FP_F_F0, kvm_cpu_context, fp.f.f[0]);
|
||||
OFFSET(KVM_ARCH_FP_F_F1, kvm_cpu_context, fp.f.f[1]);
|
||||
OFFSET(KVM_ARCH_FP_F_F2, kvm_cpu_context, fp.f.f[2]);
|
||||
OFFSET(KVM_ARCH_FP_F_F3, kvm_cpu_context, fp.f.f[3]);
|
||||
OFFSET(KVM_ARCH_FP_F_F4, kvm_cpu_context, fp.f.f[4]);
|
||||
OFFSET(KVM_ARCH_FP_F_F5, kvm_cpu_context, fp.f.f[5]);
|
||||
OFFSET(KVM_ARCH_FP_F_F6, kvm_cpu_context, fp.f.f[6]);
|
||||
OFFSET(KVM_ARCH_FP_F_F7, kvm_cpu_context, fp.f.f[7]);
|
||||
OFFSET(KVM_ARCH_FP_F_F8, kvm_cpu_context, fp.f.f[8]);
|
||||
OFFSET(KVM_ARCH_FP_F_F9, kvm_cpu_context, fp.f.f[9]);
|
||||
OFFSET(KVM_ARCH_FP_F_F10, kvm_cpu_context, fp.f.f[10]);
|
||||
OFFSET(KVM_ARCH_FP_F_F11, kvm_cpu_context, fp.f.f[11]);
|
||||
OFFSET(KVM_ARCH_FP_F_F12, kvm_cpu_context, fp.f.f[12]);
|
||||
OFFSET(KVM_ARCH_FP_F_F13, kvm_cpu_context, fp.f.f[13]);
|
||||
OFFSET(KVM_ARCH_FP_F_F14, kvm_cpu_context, fp.f.f[14]);
|
||||
OFFSET(KVM_ARCH_FP_F_F15, kvm_cpu_context, fp.f.f[15]);
|
||||
OFFSET(KVM_ARCH_FP_F_F16, kvm_cpu_context, fp.f.f[16]);
|
||||
OFFSET(KVM_ARCH_FP_F_F17, kvm_cpu_context, fp.f.f[17]);
|
||||
OFFSET(KVM_ARCH_FP_F_F18, kvm_cpu_context, fp.f.f[18]);
|
||||
OFFSET(KVM_ARCH_FP_F_F19, kvm_cpu_context, fp.f.f[19]);
|
||||
OFFSET(KVM_ARCH_FP_F_F20, kvm_cpu_context, fp.f.f[20]);
|
||||
OFFSET(KVM_ARCH_FP_F_F21, kvm_cpu_context, fp.f.f[21]);
|
||||
OFFSET(KVM_ARCH_FP_F_F22, kvm_cpu_context, fp.f.f[22]);
|
||||
OFFSET(KVM_ARCH_FP_F_F23, kvm_cpu_context, fp.f.f[23]);
|
||||
OFFSET(KVM_ARCH_FP_F_F24, kvm_cpu_context, fp.f.f[24]);
|
||||
OFFSET(KVM_ARCH_FP_F_F25, kvm_cpu_context, fp.f.f[25]);
|
||||
OFFSET(KVM_ARCH_FP_F_F26, kvm_cpu_context, fp.f.f[26]);
|
||||
OFFSET(KVM_ARCH_FP_F_F27, kvm_cpu_context, fp.f.f[27]);
|
||||
OFFSET(KVM_ARCH_FP_F_F28, kvm_cpu_context, fp.f.f[28]);
|
||||
OFFSET(KVM_ARCH_FP_F_F29, kvm_cpu_context, fp.f.f[29]);
|
||||
OFFSET(KVM_ARCH_FP_F_F30, kvm_cpu_context, fp.f.f[30]);
|
||||
OFFSET(KVM_ARCH_FP_F_F31, kvm_cpu_context, fp.f.f[31]);
|
||||
OFFSET(KVM_ARCH_FP_F_FCSR, kvm_cpu_context, fp.f.fcsr);
|
||||
|
||||
/* D extension */
|
||||
|
||||
OFFSET(KVM_ARCH_FP_D_F0, kvm_cpu_context, fp.d.f[0]);
|
||||
OFFSET(KVM_ARCH_FP_D_F1, kvm_cpu_context, fp.d.f[1]);
|
||||
OFFSET(KVM_ARCH_FP_D_F2, kvm_cpu_context, fp.d.f[2]);
|
||||
OFFSET(KVM_ARCH_FP_D_F3, kvm_cpu_context, fp.d.f[3]);
|
||||
OFFSET(KVM_ARCH_FP_D_F4, kvm_cpu_context, fp.d.f[4]);
|
||||
OFFSET(KVM_ARCH_FP_D_F5, kvm_cpu_context, fp.d.f[5]);
|
||||
OFFSET(KVM_ARCH_FP_D_F6, kvm_cpu_context, fp.d.f[6]);
|
||||
OFFSET(KVM_ARCH_FP_D_F7, kvm_cpu_context, fp.d.f[7]);
|
||||
OFFSET(KVM_ARCH_FP_D_F8, kvm_cpu_context, fp.d.f[8]);
|
||||
OFFSET(KVM_ARCH_FP_D_F9, kvm_cpu_context, fp.d.f[9]);
|
||||
OFFSET(KVM_ARCH_FP_D_F10, kvm_cpu_context, fp.d.f[10]);
|
||||
OFFSET(KVM_ARCH_FP_D_F11, kvm_cpu_context, fp.d.f[11]);
|
||||
OFFSET(KVM_ARCH_FP_D_F12, kvm_cpu_context, fp.d.f[12]);
|
||||
OFFSET(KVM_ARCH_FP_D_F13, kvm_cpu_context, fp.d.f[13]);
|
||||
OFFSET(KVM_ARCH_FP_D_F14, kvm_cpu_context, fp.d.f[14]);
|
||||
OFFSET(KVM_ARCH_FP_D_F15, kvm_cpu_context, fp.d.f[15]);
|
||||
OFFSET(KVM_ARCH_FP_D_F16, kvm_cpu_context, fp.d.f[16]);
|
||||
OFFSET(KVM_ARCH_FP_D_F17, kvm_cpu_context, fp.d.f[17]);
|
||||
OFFSET(KVM_ARCH_FP_D_F18, kvm_cpu_context, fp.d.f[18]);
|
||||
OFFSET(KVM_ARCH_FP_D_F19, kvm_cpu_context, fp.d.f[19]);
|
||||
OFFSET(KVM_ARCH_FP_D_F20, kvm_cpu_context, fp.d.f[20]);
|
||||
OFFSET(KVM_ARCH_FP_D_F21, kvm_cpu_context, fp.d.f[21]);
|
||||
OFFSET(KVM_ARCH_FP_D_F22, kvm_cpu_context, fp.d.f[22]);
|
||||
OFFSET(KVM_ARCH_FP_D_F23, kvm_cpu_context, fp.d.f[23]);
|
||||
OFFSET(KVM_ARCH_FP_D_F24, kvm_cpu_context, fp.d.f[24]);
|
||||
OFFSET(KVM_ARCH_FP_D_F25, kvm_cpu_context, fp.d.f[25]);
|
||||
OFFSET(KVM_ARCH_FP_D_F26, kvm_cpu_context, fp.d.f[26]);
|
||||
OFFSET(KVM_ARCH_FP_D_F27, kvm_cpu_context, fp.d.f[27]);
|
||||
OFFSET(KVM_ARCH_FP_D_F28, kvm_cpu_context, fp.d.f[28]);
|
||||
OFFSET(KVM_ARCH_FP_D_F29, kvm_cpu_context, fp.d.f[29]);
|
||||
OFFSET(KVM_ARCH_FP_D_F30, kvm_cpu_context, fp.d.f[30]);
|
||||
OFFSET(KVM_ARCH_FP_D_F31, kvm_cpu_context, fp.d.f[31]);
|
||||
OFFSET(KVM_ARCH_FP_D_FCSR, kvm_cpu_context, fp.d.fcsr);
|
||||
|
||||
/*
|
||||
* THREAD_{F,X}* might be larger than a S-type offset can handle, but
|
||||
* these are used in performance-sensitive assembly so we can't resort
|
||||
|
35
arch/riscv/kvm/Kconfig
Normal file
35
arch/riscv/kvm/Kconfig
Normal file
@ -0,0 +1,35 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# KVM configuration
|
||||
#
|
||||
|
||||
source "virt/kvm/Kconfig"
|
||||
|
||||
menuconfig VIRTUALIZATION
|
||||
bool "Virtualization"
|
||||
help
|
||||
Say Y here to get to see options for using your Linux host to run
|
||||
other operating systems inside virtual machines (guests).
|
||||
This option alone does not add any kernel code.
|
||||
|
||||
If you say N, all options in this submenu will be skipped and
|
||||
disabled.
|
||||
|
||||
if VIRTUALIZATION
|
||||
|
||||
config KVM
|
||||
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
|
||||
depends on RISCV_SBI && MMU
|
||||
select MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
select KVM_MMIO
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select HAVE_KVM_EVENTFD
|
||||
select SRCU
|
||||
help
|
||||
Support hosting virtualized guest machines.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
endif # VIRTUALIZATION
|
26
arch/riscv/kvm/Makefile
Normal file
26
arch/riscv/kvm/Makefile
Normal file
@ -0,0 +1,26 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Makefile for RISC-V KVM support
|
||||
#
|
||||
|
||||
ccflags-y += -I $(srctree)/$(src)
|
||||
|
||||
KVM := ../../../virt/kvm
|
||||
|
||||
obj-$(CONFIG_KVM) += kvm.o
|
||||
|
||||
kvm-y += $(KVM)/kvm_main.o
|
||||
kvm-y += $(KVM)/coalesced_mmio.o
|
||||
kvm-y += $(KVM)/binary_stats.o
|
||||
kvm-y += $(KVM)/eventfd.o
|
||||
kvm-y += main.o
|
||||
kvm-y += vm.o
|
||||
kvm-y += vmid.o
|
||||
kvm-y += tlb.o
|
||||
kvm-y += mmu.o
|
||||
kvm-y += vcpu.o
|
||||
kvm-y += vcpu_exit.o
|
||||
kvm-y += vcpu_fp.o
|
||||
kvm-y += vcpu_switch.o
|
||||
kvm-y += vcpu_sbi.o
|
||||
kvm-y += vcpu_timer.o
|
118
arch/riscv/kvm/main.c
Normal file
118
arch/riscv/kvm/main.c
Normal file
@ -0,0 +1,118 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/sbi.h>
|
||||
|
||||
long kvm_arch_dev_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arch_check_processor_compat(void *opaque)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_setup(void *opaque)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
{
|
||||
unsigned long hideleg, hedeleg;
|
||||
|
||||
hedeleg = 0;
|
||||
hedeleg |= (1UL << EXC_INST_MISALIGNED);
|
||||
hedeleg |= (1UL << EXC_BREAKPOINT);
|
||||
hedeleg |= (1UL << EXC_SYSCALL);
|
||||
hedeleg |= (1UL << EXC_INST_PAGE_FAULT);
|
||||
hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT);
|
||||
hedeleg |= (1UL << EXC_STORE_PAGE_FAULT);
|
||||
csr_write(CSR_HEDELEG, hedeleg);
|
||||
|
||||
hideleg = 0;
|
||||
hideleg |= (1UL << IRQ_VS_SOFT);
|
||||
hideleg |= (1UL << IRQ_VS_TIMER);
|
||||
hideleg |= (1UL << IRQ_VS_EXT);
|
||||
csr_write(CSR_HIDELEG, hideleg);
|
||||
|
||||
csr_write(CSR_HCOUNTEREN, -1UL);
|
||||
|
||||
csr_write(CSR_HVIP, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void)
|
||||
{
|
||||
csr_write(CSR_HEDELEG, 0);
|
||||
csr_write(CSR_HIDELEG, 0);
|
||||
}
|
||||
|
||||
int kvm_arch_init(void *opaque)
|
||||
{
|
||||
const char *str;
|
||||
|
||||
if (!riscv_isa_extension_available(NULL, h)) {
|
||||
kvm_info("hypervisor extension not available\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (sbi_spec_is_0_1()) {
|
||||
kvm_info("require SBI v0.2 or higher\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (sbi_probe_extension(SBI_EXT_RFENCE) <= 0) {
|
||||
kvm_info("require SBI RFENCE extension\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
kvm_riscv_stage2_mode_detect();
|
||||
|
||||
kvm_riscv_stage2_vmid_detect();
|
||||
|
||||
kvm_info("hypervisor extension available\n");
|
||||
|
||||
switch (kvm_riscv_stage2_mode()) {
|
||||
case HGATP_MODE_SV32X4:
|
||||
str = "Sv32x4";
|
||||
break;
|
||||
case HGATP_MODE_SV39X4:
|
||||
str = "Sv39x4";
|
||||
break;
|
||||
case HGATP_MODE_SV48X4:
|
||||
str = "Sv48x4";
|
||||
break;
|
||||
default:
|
||||
return -ENODEV;
|
||||
}
|
||||
kvm_info("using %s G-stage page table format\n", str);
|
||||
|
||||
kvm_info("VMID %ld bits available\n", kvm_riscv_stage2_vmid_bits());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
static int riscv_kvm_init(void)
|
||||
{
|
||||
return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
|
||||
}
|
||||
module_init(riscv_kvm_init);
|
802
arch/riscv/kvm/mmu.c
Normal file
802
arch/riscv/kvm/mmu.c
Normal file
@ -0,0 +1,802 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/sbi.h>
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
static unsigned long stage2_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
|
||||
static unsigned long stage2_pgd_levels = 3;
|
||||
#define stage2_index_bits 9
|
||||
#else
|
||||
static unsigned long stage2_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
|
||||
static unsigned long stage2_pgd_levels = 2;
|
||||
#define stage2_index_bits 10
|
||||
#endif
|
||||
|
||||
#define stage2_pgd_xbits 2
|
||||
#define stage2_pgd_size (1UL << (HGATP_PAGE_SHIFT + stage2_pgd_xbits))
|
||||
#define stage2_gpa_bits (HGATP_PAGE_SHIFT + \
|
||||
(stage2_pgd_levels * stage2_index_bits) + \
|
||||
stage2_pgd_xbits)
|
||||
#define stage2_gpa_size ((gpa_t)(1ULL << stage2_gpa_bits))
|
||||
|
||||
#define stage2_pte_leaf(__ptep) \
|
||||
(pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
|
||||
|
||||
static inline unsigned long stage2_pte_index(gpa_t addr, u32 level)
|
||||
{
|
||||
unsigned long mask;
|
||||
unsigned long shift = HGATP_PAGE_SHIFT + (stage2_index_bits * level);
|
||||
|
||||
if (level == (stage2_pgd_levels - 1))
|
||||
mask = (PTRS_PER_PTE * (1UL << stage2_pgd_xbits)) - 1;
|
||||
else
|
||||
mask = PTRS_PER_PTE - 1;
|
||||
|
||||
return (addr >> shift) & mask;
|
||||
}
|
||||
|
||||
static inline unsigned long stage2_pte_page_vaddr(pte_t pte)
|
||||
{
|
||||
return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT);
|
||||
}
|
||||
|
||||
static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level)
|
||||
{
|
||||
u32 i;
|
||||
unsigned long psz = 1UL << 12;
|
||||
|
||||
for (i = 0; i < stage2_pgd_levels; i++) {
|
||||
if (page_size == (psz << (i * stage2_index_bits))) {
|
||||
*out_level = i;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize)
|
||||
{
|
||||
if (stage2_pgd_levels < level)
|
||||
return -EINVAL;
|
||||
|
||||
*out_pgsize = 1UL << (12 + (level * stage2_index_bits));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache,
|
||||
int min, int max)
|
||||
{
|
||||
void *page;
|
||||
|
||||
BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS);
|
||||
if (pcache->nobjs >= min)
|
||||
return 0;
|
||||
while (pcache->nobjs < max) {
|
||||
page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
pcache->objects[pcache->nobjs++] = page;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache)
|
||||
{
|
||||
while (pcache && pcache->nobjs)
|
||||
free_page((unsigned long)pcache->objects[--pcache->nobjs]);
|
||||
}
|
||||
|
||||
static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache)
|
||||
{
|
||||
void *p;
|
||||
|
||||
if (!pcache)
|
||||
return NULL;
|
||||
|
||||
BUG_ON(!pcache->nobjs);
|
||||
p = pcache->objects[--pcache->nobjs];
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
|
||||
pte_t **ptepp, u32 *ptep_level)
|
||||
{
|
||||
pte_t *ptep;
|
||||
u32 current_level = stage2_pgd_levels - 1;
|
||||
|
||||
*ptep_level = current_level;
|
||||
ptep = (pte_t *)kvm->arch.pgd;
|
||||
ptep = &ptep[stage2_pte_index(addr, current_level)];
|
||||
while (ptep && pte_val(*ptep)) {
|
||||
if (stage2_pte_leaf(ptep)) {
|
||||
*ptep_level = current_level;
|
||||
*ptepp = ptep;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (current_level) {
|
||||
current_level--;
|
||||
*ptep_level = current_level;
|
||||
ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
|
||||
ptep = &ptep[stage2_pte_index(addr, current_level)];
|
||||
} else {
|
||||
ptep = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
|
||||
{
|
||||
struct cpumask hmask;
|
||||
unsigned long size = PAGE_SIZE;
|
||||
struct kvm_vmid *vmid = &kvm->arch.vmid;
|
||||
|
||||
if (stage2_level_to_page_size(level, &size))
|
||||
return;
|
||||
addr &= ~(size - 1);
|
||||
|
||||
/*
|
||||
* TODO: Instead of cpu_online_mask, we should only target CPUs
|
||||
* where the Guest/VM is running.
|
||||
*/
|
||||
preempt_disable();
|
||||
riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
|
||||
sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size,
|
||||
READ_ONCE(vmid->vmid));
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static int stage2_set_pte(struct kvm *kvm, u32 level,
|
||||
struct kvm_mmu_page_cache *pcache,
|
||||
gpa_t addr, const pte_t *new_pte)
|
||||
{
|
||||
u32 current_level = stage2_pgd_levels - 1;
|
||||
pte_t *next_ptep = (pte_t *)kvm->arch.pgd;
|
||||
pte_t *ptep = &next_ptep[stage2_pte_index(addr, current_level)];
|
||||
|
||||
if (current_level < level)
|
||||
return -EINVAL;
|
||||
|
||||
while (current_level != level) {
|
||||
if (stage2_pte_leaf(ptep))
|
||||
return -EEXIST;
|
||||
|
||||
if (!pte_val(*ptep)) {
|
||||
next_ptep = stage2_cache_alloc(pcache);
|
||||
if (!next_ptep)
|
||||
return -ENOMEM;
|
||||
*ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
|
||||
__pgprot(_PAGE_TABLE));
|
||||
} else {
|
||||
if (stage2_pte_leaf(ptep))
|
||||
return -EEXIST;
|
||||
next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
|
||||
}
|
||||
|
||||
current_level--;
|
||||
ptep = &next_ptep[stage2_pte_index(addr, current_level)];
|
||||
}
|
||||
|
||||
*ptep = *new_pte;
|
||||
if (stage2_pte_leaf(ptep))
|
||||
stage2_remote_tlb_flush(kvm, current_level, addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stage2_map_page(struct kvm *kvm,
|
||||
struct kvm_mmu_page_cache *pcache,
|
||||
gpa_t gpa, phys_addr_t hpa,
|
||||
unsigned long page_size,
|
||||
bool page_rdonly, bool page_exec)
|
||||
{
|
||||
int ret;
|
||||
u32 level = 0;
|
||||
pte_t new_pte;
|
||||
pgprot_t prot;
|
||||
|
||||
ret = stage2_page_size_to_level(page_size, &level);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* A RISC-V implementation can choose to either:
|
||||
* 1) Update 'A' and 'D' PTE bits in hardware
|
||||
* 2) Generate page fault when 'A' and/or 'D' bits are not set
|
||||
* PTE so that software can update these bits.
|
||||
*
|
||||
* We support both options mentioned above. To achieve this, we
|
||||
* always set 'A' and 'D' PTE bits at time of creating stage2
|
||||
* mapping. To support KVM dirty page logging with both options
|
||||
* mentioned above, we will write-protect stage2 PTEs to track
|
||||
* dirty pages.
|
||||
*/
|
||||
|
||||
if (page_exec) {
|
||||
if (page_rdonly)
|
||||
prot = PAGE_READ_EXEC;
|
||||
else
|
||||
prot = PAGE_WRITE_EXEC;
|
||||
} else {
|
||||
if (page_rdonly)
|
||||
prot = PAGE_READ;
|
||||
else
|
||||
prot = PAGE_WRITE;
|
||||
}
|
||||
new_pte = pfn_pte(PFN_DOWN(hpa), prot);
|
||||
new_pte = pte_mkdirty(new_pte);
|
||||
|
||||
return stage2_set_pte(kvm, level, pcache, gpa, &new_pte);
|
||||
}
|
||||
|
||||
enum stage2_op {
|
||||
STAGE2_OP_NOP = 0, /* Nothing */
|
||||
STAGE2_OP_CLEAR, /* Clear/Unmap */
|
||||
STAGE2_OP_WP, /* Write-protect */
|
||||
};
|
||||
|
||||
static void stage2_op_pte(struct kvm *kvm, gpa_t addr,
|
||||
pte_t *ptep, u32 ptep_level, enum stage2_op op)
|
||||
{
|
||||
int i, ret;
|
||||
pte_t *next_ptep;
|
||||
u32 next_ptep_level;
|
||||
unsigned long next_page_size, page_size;
|
||||
|
||||
ret = stage2_level_to_page_size(ptep_level, &page_size);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
BUG_ON(addr & (page_size - 1));
|
||||
|
||||
if (!pte_val(*ptep))
|
||||
return;
|
||||
|
||||
if (ptep_level && !stage2_pte_leaf(ptep)) {
|
||||
next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
|
||||
next_ptep_level = ptep_level - 1;
|
||||
ret = stage2_level_to_page_size(next_ptep_level,
|
||||
&next_page_size);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
if (op == STAGE2_OP_CLEAR)
|
||||
set_pte(ptep, __pte(0));
|
||||
for (i = 0; i < PTRS_PER_PTE; i++)
|
||||
stage2_op_pte(kvm, addr + i * next_page_size,
|
||||
&next_ptep[i], next_ptep_level, op);
|
||||
if (op == STAGE2_OP_CLEAR)
|
||||
put_page(virt_to_page(next_ptep));
|
||||
} else {
|
||||
if (op == STAGE2_OP_CLEAR)
|
||||
set_pte(ptep, __pte(0));
|
||||
else if (op == STAGE2_OP_WP)
|
||||
set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE));
|
||||
stage2_remote_tlb_flush(kvm, ptep_level, addr);
|
||||
}
|
||||
}
|
||||
|
||||
static void stage2_unmap_range(struct kvm *kvm, gpa_t start,
|
||||
gpa_t size, bool may_block)
|
||||
{
|
||||
int ret;
|
||||
pte_t *ptep;
|
||||
u32 ptep_level;
|
||||
bool found_leaf;
|
||||
unsigned long page_size;
|
||||
gpa_t addr = start, end = start + size;
|
||||
|
||||
while (addr < end) {
|
||||
found_leaf = stage2_get_leaf_entry(kvm, addr,
|
||||
&ptep, &ptep_level);
|
||||
ret = stage2_level_to_page_size(ptep_level, &page_size);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!found_leaf)
|
||||
goto next;
|
||||
|
||||
if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
|
||||
stage2_op_pte(kvm, addr, ptep,
|
||||
ptep_level, STAGE2_OP_CLEAR);
|
||||
|
||||
next:
|
||||
addr += page_size;
|
||||
|
||||
/*
|
||||
* If the range is too large, release the kvm->mmu_lock
|
||||
* to prevent starvation and lockup detector warnings.
|
||||
*/
|
||||
if (may_block && addr < end)
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
|
||||
{
|
||||
int ret;
|
||||
pte_t *ptep;
|
||||
u32 ptep_level;
|
||||
bool found_leaf;
|
||||
gpa_t addr = start;
|
||||
unsigned long page_size;
|
||||
|
||||
while (addr < end) {
|
||||
found_leaf = stage2_get_leaf_entry(kvm, addr,
|
||||
&ptep, &ptep_level);
|
||||
ret = stage2_level_to_page_size(ptep_level, &page_size);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!found_leaf)
|
||||
goto next;
|
||||
|
||||
if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
|
||||
stage2_op_pte(kvm, addr, ptep,
|
||||
ptep_level, STAGE2_OP_WP);
|
||||
|
||||
next:
|
||||
addr += page_size;
|
||||
}
|
||||
}
|
||||
|
||||
static void stage2_wp_memory_region(struct kvm *kvm, int slot)
|
||||
{
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
|
||||
phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
|
||||
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
stage2_wp_range(kvm, start, end);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
|
||||
unsigned long size, bool writable)
|
||||
{
|
||||
pte_t pte;
|
||||
int ret = 0;
|
||||
unsigned long pfn;
|
||||
phys_addr_t addr, end;
|
||||
struct kvm_mmu_page_cache pcache = { 0, };
|
||||
|
||||
end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
pfn = __phys_to_pfn(hpa);
|
||||
|
||||
for (addr = gpa; addr < end; addr += PAGE_SIZE) {
|
||||
pte = pfn_pte(pfn, PAGE_KERNEL);
|
||||
|
||||
if (!writable)
|
||||
pte = pte_wrprotect(pte);
|
||||
|
||||
ret = stage2_cache_topup(&pcache,
|
||||
stage2_pgd_levels,
|
||||
KVM_MMU_PAGE_CACHE_NR_OBJS);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
ret = stage2_set_pte(kvm, 0, &pcache, addr, &pte);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
pfn++;
|
||||
}
|
||||
|
||||
out:
|
||||
stage2_cache_flush(&pcache);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset,
|
||||
unsigned long mask)
|
||||
{
|
||||
phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
|
||||
phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
|
||||
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
|
||||
|
||||
stage2_wp_range(kvm, start, end);
|
||||
}
|
||||
|
||||
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
{
|
||||
}
|
||||
|
||||
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
|
||||
{
|
||||
}
|
||||
|
||||
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
|
||||
{
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
kvm_riscv_stage2_free_pgd(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
/*
|
||||
* At this point memslot has been committed and there is an
|
||||
* allocated dirty_bitmap[], dirty pages will be tracked while
|
||||
* the memory slot is write protected.
|
||||
*/
|
||||
if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
|
||||
stage2_wp_memory_region(kvm, mem->slot);
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
hva_t hva = mem->userspace_addr;
|
||||
hva_t reg_end = hva + mem->memory_size;
|
||||
bool writable = !(mem->flags & KVM_MEM_READONLY);
|
||||
int ret = 0;
|
||||
|
||||
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
|
||||
change != KVM_MR_FLAGS_ONLY)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Prevent userspace from creating a memory region outside of the GPA
|
||||
* space addressable by the KVM guest GPA space.
|
||||
*/
|
||||
if ((memslot->base_gfn + memslot->npages) >=
|
||||
(stage2_gpa_size >> PAGE_SHIFT))
|
||||
return -EFAULT;
|
||||
|
||||
mmap_read_lock(current->mm);
|
||||
|
||||
/*
|
||||
* A memory region could potentially cover multiple VMAs, and
|
||||
* any holes between them, so iterate over all of them to find
|
||||
* out if we can map any of them right now.
|
||||
*
|
||||
* +--------------------------------------------+
|
||||
* +---------------+----------------+ +----------------+
|
||||
* | : VMA 1 | VMA 2 | | VMA 3 : |
|
||||
* +---------------+----------------+ +----------------+
|
||||
* | memory region |
|
||||
* +--------------------------------------------+
|
||||
*/
|
||||
do {
|
||||
struct vm_area_struct *vma = find_vma(current->mm, hva);
|
||||
hva_t vm_start, vm_end;
|
||||
|
||||
if (!vma || vma->vm_start >= reg_end)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Mapping a read-only VMA is only allowed if the
|
||||
* memory region is configured as read-only.
|
||||
*/
|
||||
if (writable && !(vma->vm_flags & VM_WRITE)) {
|
||||
ret = -EPERM;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Take the intersection of this VMA with the memory region */
|
||||
vm_start = max(hva, vma->vm_start);
|
||||
vm_end = min(reg_end, vma->vm_end);
|
||||
|
||||
if (vma->vm_flags & VM_PFNMAP) {
|
||||
gpa_t gpa = mem->guest_phys_addr +
|
||||
(vm_start - mem->userspace_addr);
|
||||
phys_addr_t pa;
|
||||
|
||||
pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
|
||||
pa += vm_start - vma->vm_start;
|
||||
|
||||
/* IO region dirty page logging not allowed */
|
||||
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = stage2_ioremap(kvm, gpa, pa,
|
||||
vm_end - vm_start, writable);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
hva = vm_end;
|
||||
} while (hva < reg_end);
|
||||
|
||||
if (change == KVM_MR_FLAGS_ONLY)
|
||||
goto out;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (ret)
|
||||
stage2_unmap_range(kvm, mem->guest_phys_addr,
|
||||
mem->memory_size, false);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
out:
|
||||
mmap_read_unlock(current->mm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
if (!kvm->arch.pgd)
|
||||
return false;
|
||||
|
||||
stage2_unmap_range(kvm, range->start << PAGE_SHIFT,
|
||||
(range->end - range->start) << PAGE_SHIFT,
|
||||
range->may_block);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
int ret;
|
||||
kvm_pfn_t pfn = pte_pfn(range->pte);
|
||||
|
||||
if (!kvm->arch.pgd)
|
||||
return false;
|
||||
|
||||
WARN_ON(range->end - range->start != 1);
|
||||
|
||||
ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT,
|
||||
__pfn_to_phys(pfn), PAGE_SIZE, true, true);
|
||||
if (ret) {
|
||||
kvm_debug("Failed to map stage2 page (error %d)\n", ret);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
pte_t *ptep;
|
||||
u32 ptep_level = 0;
|
||||
u64 size = (range->end - range->start) << PAGE_SHIFT;
|
||||
|
||||
if (!kvm->arch.pgd)
|
||||
return false;
|
||||
|
||||
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
|
||||
|
||||
if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
|
||||
&ptep, &ptep_level))
|
||||
return false;
|
||||
|
||||
return ptep_test_and_clear_young(NULL, 0, ptep);
|
||||
}
|
||||
|
||||
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
pte_t *ptep;
|
||||
u32 ptep_level = 0;
|
||||
u64 size = (range->end - range->start) << PAGE_SHIFT;
|
||||
|
||||
if (!kvm->arch.pgd)
|
||||
return false;
|
||||
|
||||
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
|
||||
|
||||
if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
|
||||
&ptep, &ptep_level))
|
||||
return false;
|
||||
|
||||
return pte_young(*ptep);
|
||||
}
|
||||
|
||||
int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
|
||||
struct kvm_memory_slot *memslot,
|
||||
gpa_t gpa, unsigned long hva, bool is_write)
|
||||
{
|
||||
int ret;
|
||||
kvm_pfn_t hfn;
|
||||
bool writeable;
|
||||
short vma_pageshift;
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
struct vm_area_struct *vma;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache;
|
||||
bool logging = (memslot->dirty_bitmap &&
|
||||
!(memslot->flags & KVM_MEM_READONLY)) ? true : false;
|
||||
unsigned long vma_pagesize, mmu_seq;
|
||||
|
||||
mmap_read_lock(current->mm);
|
||||
|
||||
vma = find_vma_intersection(current->mm, hva, hva + 1);
|
||||
if (unlikely(!vma)) {
|
||||
kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
|
||||
mmap_read_unlock(current->mm);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
vma_pageshift = huge_page_shift(hstate_vma(vma));
|
||||
else
|
||||
vma_pageshift = PAGE_SHIFT;
|
||||
vma_pagesize = 1ULL << vma_pageshift;
|
||||
if (logging || (vma->vm_flags & VM_PFNMAP))
|
||||
vma_pagesize = PAGE_SIZE;
|
||||
|
||||
if (vma_pagesize == PMD_SIZE || vma_pagesize == PGDIR_SIZE)
|
||||
gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
|
||||
|
||||
mmap_read_unlock(current->mm);
|
||||
|
||||
if (vma_pagesize != PGDIR_SIZE &&
|
||||
vma_pagesize != PMD_SIZE &&
|
||||
vma_pagesize != PAGE_SIZE) {
|
||||
kvm_err("Invalid VMA page size 0x%lx\n", vma_pagesize);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/* We need minimum second+third level pages */
|
||||
ret = stage2_cache_topup(pcache, stage2_pgd_levels,
|
||||
KVM_MMU_PAGE_CACHE_NR_OBJS);
|
||||
if (ret) {
|
||||
kvm_err("Failed to topup stage2 cache\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
|
||||
hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable);
|
||||
if (hfn == KVM_PFN_ERR_HWPOISON) {
|
||||
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva,
|
||||
vma_pageshift, current);
|
||||
return 0;
|
||||
}
|
||||
if (is_error_noslot_pfn(hfn))
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* If logging is active then we allow writable pages only
|
||||
* for write faults.
|
||||
*/
|
||||
if (logging && !is_write)
|
||||
writeable = false;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
if (mmu_notifier_retry(kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
if (writeable) {
|
||||
kvm_set_pfn_dirty(hfn);
|
||||
mark_page_dirty(kvm, gfn);
|
||||
ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
|
||||
vma_pagesize, false, true);
|
||||
} else {
|
||||
ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
|
||||
vma_pagesize, true, true);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
kvm_err("Failed to map in stage2\n");
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
kvm_set_pfn_accessed(hfn);
|
||||
kvm_release_pfn_clean(hfn);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
stage2_cache_flush(&vcpu->arch.mmu_page_cache);
|
||||
}
|
||||
|
||||
int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm)
|
||||
{
|
||||
struct page *pgd_page;
|
||||
|
||||
if (kvm->arch.pgd != NULL) {
|
||||
kvm_err("kvm_arch already initialized?\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pgd_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
|
||||
get_order(stage2_pgd_size));
|
||||
if (!pgd_page)
|
||||
return -ENOMEM;
|
||||
kvm->arch.pgd = page_to_virt(pgd_page);
|
||||
kvm->arch.pgd_phys = page_to_phys(pgd_page);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_riscv_stage2_free_pgd(struct kvm *kvm)
|
||||
{
|
||||
void *pgd = NULL;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (kvm->arch.pgd) {
|
||||
stage2_unmap_range(kvm, 0UL, stage2_gpa_size, false);
|
||||
pgd = READ_ONCE(kvm->arch.pgd);
|
||||
kvm->arch.pgd = NULL;
|
||||
kvm->arch.pgd_phys = 0;
|
||||
}
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
if (pgd)
|
||||
free_pages((unsigned long)pgd, get_order(stage2_pgd_size));
|
||||
}
|
||||
|
||||
void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long hgatp = stage2_mode;
|
||||
struct kvm_arch *k = &vcpu->kvm->arch;
|
||||
|
||||
hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) &
|
||||
HGATP_VMID_MASK;
|
||||
hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
|
||||
|
||||
csr_write(CSR_HGATP, hgatp);
|
||||
|
||||
if (!kvm_riscv_stage2_vmid_bits())
|
||||
__kvm_riscv_hfence_gvma_all();
|
||||
}
|
||||
|
||||
void kvm_riscv_stage2_mode_detect(void)
|
||||
{
|
||||
#ifdef CONFIG_64BIT
|
||||
/* Try Sv48x4 stage2 mode */
|
||||
csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
|
||||
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
|
||||
stage2_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
|
||||
stage2_pgd_levels = 4;
|
||||
}
|
||||
csr_write(CSR_HGATP, 0);
|
||||
|
||||
__kvm_riscv_hfence_gvma_all();
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned long kvm_riscv_stage2_mode(void)
|
||||
{
|
||||
return stage2_mode >> HGATP_MODE_SHIFT;
|
||||
}
|
74
arch/riscv/kvm/tlb.S
Normal file
74
arch/riscv/kvm/tlb.S
Normal file
@ -0,0 +1,74 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm.h>
|
||||
|
||||
.text
|
||||
.altmacro
|
||||
.option norelax
|
||||
|
||||
/*
|
||||
* Instruction encoding of hfence.gvma is:
|
||||
* HFENCE.GVMA rs1, rs2
|
||||
* HFENCE.GVMA zero, rs2
|
||||
* HFENCE.GVMA rs1
|
||||
* HFENCE.GVMA
|
||||
*
|
||||
* rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2
|
||||
* rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2
|
||||
* rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1
|
||||
* rs1==zero and rs2==zero ==> HFENCE.GVMA
|
||||
*
|
||||
* Instruction encoding of HFENCE.GVMA is:
|
||||
* 0110001 rs2(5) rs1(5) 000 00000 1110011
|
||||
*/
|
||||
|
||||
ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa)
|
||||
/*
|
||||
* rs1 = a0 (GPA >> 2)
|
||||
* rs2 = a1 (VMID)
|
||||
* HFENCE.GVMA a0, a1
|
||||
* 0110001 01011 01010 000 00000 1110011
|
||||
*/
|
||||
.word 0x62b50073
|
||||
ret
|
||||
ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa)
|
||||
|
||||
ENTRY(__kvm_riscv_hfence_gvma_vmid)
|
||||
/*
|
||||
* rs1 = zero
|
||||
* rs2 = a0 (VMID)
|
||||
* HFENCE.GVMA zero, a0
|
||||
* 0110001 01010 00000 000 00000 1110011
|
||||
*/
|
||||
.word 0x62a00073
|
||||
ret
|
||||
ENDPROC(__kvm_riscv_hfence_gvma_vmid)
|
||||
|
||||
ENTRY(__kvm_riscv_hfence_gvma_gpa)
|
||||
/*
|
||||
* rs1 = a0 (GPA >> 2)
|
||||
* rs2 = zero
|
||||
* HFENCE.GVMA a0
|
||||
* 0110001 00000 01010 000 00000 1110011
|
||||
*/
|
||||
.word 0x62050073
|
||||
ret
|
||||
ENDPROC(__kvm_riscv_hfence_gvma_gpa)
|
||||
|
||||
ENTRY(__kvm_riscv_hfence_gvma_all)
|
||||
/*
|
||||
* rs1 = zero
|
||||
* rs2 = zero
|
||||
* HFENCE.GVMA
|
||||
* 0110001 00000 00000 000 00000 1110011
|
||||
*/
|
||||
.word 0x62000073
|
||||
ret
|
||||
ENDPROC(__kvm_riscv_hfence_gvma_all)
|
825
arch/riscv/kvm/vcpu.c
Normal file
825
arch/riscv/kvm/vcpu.c
Normal file
@ -0,0 +1,825 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
||||
KVM_GENERIC_VCPU_STATS(),
|
||||
STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
|
||||
STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
|
||||
STATS_DESC_COUNTER(VCPU, mmio_exit_user),
|
||||
STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
|
||||
STATS_DESC_COUNTER(VCPU, exits)
|
||||
};
|
||||
|
||||
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
|
||||
.id_offset = sizeof(struct kvm_stats_header),
|
||||
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||
sizeof(kvm_vcpu_stats_desc),
|
||||
};
|
||||
|
||||
#define KVM_RISCV_ISA_ALLOWED (riscv_isa_extension_mask(a) | \
|
||||
riscv_isa_extension_mask(c) | \
|
||||
riscv_isa_extension_mask(d) | \
|
||||
riscv_isa_extension_mask(f) | \
|
||||
riscv_isa_extension_mask(i) | \
|
||||
riscv_isa_extension_mask(m) | \
|
||||
riscv_isa_extension_mask(s) | \
|
||||
riscv_isa_extension_mask(u))
|
||||
|
||||
static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
|
||||
|
||||
memcpy(csr, reset_csr, sizeof(*csr));
|
||||
|
||||
memcpy(cntx, reset_cntx, sizeof(*cntx));
|
||||
|
||||
kvm_riscv_vcpu_fp_reset(vcpu);
|
||||
|
||||
kvm_riscv_vcpu_timer_reset(vcpu);
|
||||
|
||||
WRITE_ONCE(vcpu->arch.irqs_pending, 0);
|
||||
WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpu_context *cntx;
|
||||
|
||||
/* Mark this VCPU never ran */
|
||||
vcpu->arch.ran_atleast_once = false;
|
||||
|
||||
/* Setup ISA features available to VCPU */
|
||||
vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
|
||||
|
||||
/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
|
||||
cntx = &vcpu->arch.guest_reset_context;
|
||||
cntx->sstatus = SR_SPP | SR_SPIE;
|
||||
cntx->hstatus = 0;
|
||||
cntx->hstatus |= HSTATUS_VTW;
|
||||
cntx->hstatus |= HSTATUS_SPVP;
|
||||
cntx->hstatus |= HSTATUS_SPV;
|
||||
|
||||
/* Setup VCPU timer */
|
||||
kvm_riscv_vcpu_timer_init(vcpu);
|
||||
|
||||
/* Reset VCPU */
|
||||
kvm_riscv_reset_vcpu(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Cleanup VCPU timer */
|
||||
kvm_riscv_vcpu_timer_deinit(vcpu);
|
||||
|
||||
/* Flush the pages pre-allocated for Stage2 page table mappings */
|
||||
kvm_riscv_stage2_flush_cache(vcpu);
|
||||
}
|
||||
|
||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
|
||||
!vcpu->arch.power_off && !vcpu->arch.pause);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
|
||||
}
|
||||
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
|
||||
}
|
||||
|
||||
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
|
||||
{
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CONFIG);
|
||||
unsigned long reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_CONFIG_REG(isa):
|
||||
reg_val = vcpu->arch.isa;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CONFIG);
|
||||
unsigned long reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_CONFIG_REG(isa):
|
||||
if (!vcpu->arch.ran_atleast_once) {
|
||||
vcpu->arch.isa = reg_val;
|
||||
vcpu->arch.isa &= riscv_isa_extension_base(NULL);
|
||||
vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
|
||||
kvm_riscv_vcpu_fp_reset(vcpu);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CORE);
|
||||
unsigned long reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
|
||||
reg_val = cntx->sepc;
|
||||
else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
|
||||
reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
|
||||
reg_val = ((unsigned long *)cntx)[reg_num];
|
||||
else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
|
||||
reg_val = (cntx->sstatus & SR_SPP) ?
|
||||
KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CORE);
|
||||
unsigned long reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
|
||||
cntx->sepc = reg_val;
|
||||
else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
|
||||
reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
|
||||
((unsigned long *)cntx)[reg_num] = reg_val;
|
||||
else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
|
||||
if (reg_val == KVM_RISCV_MODE_S)
|
||||
cntx->sstatus |= SR_SPP;
|
||||
else
|
||||
cntx->sstatus &= ~SR_SPP;
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CSR);
|
||||
unsigned long reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
|
||||
kvm_riscv_vcpu_flush_interrupts(vcpu);
|
||||
reg_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
|
||||
} else
|
||||
reg_val = ((unsigned long *)csr)[reg_num];
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CSR);
|
||||
unsigned long reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
|
||||
reg_val &= VSIP_VALID_MASK;
|
||||
reg_val <<= VSIP_TO_HVIP_SHIFT;
|
||||
}
|
||||
|
||||
((unsigned long *)csr)[reg_num] = reg_val;
|
||||
|
||||
if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
|
||||
WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
|
||||
return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
|
||||
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
|
||||
return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
|
||||
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
|
||||
return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
|
||||
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
|
||||
return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
|
||||
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
|
||||
return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
|
||||
KVM_REG_RISCV_FP_F);
|
||||
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
|
||||
return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
|
||||
KVM_REG_RISCV_FP_D);
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
|
||||
return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
|
||||
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
|
||||
return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
|
||||
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
|
||||
return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
|
||||
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
|
||||
return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
|
||||
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
|
||||
return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
|
||||
KVM_REG_RISCV_FP_F);
|
||||
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
|
||||
return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
|
||||
KVM_REG_RISCV_FP_D);
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
long kvm_arch_vcpu_async_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = filp->private_data;
|
||||
void __user *argp = (void __user *)arg;
|
||||
|
||||
if (ioctl == KVM_INTERRUPT) {
|
||||
struct kvm_interrupt irq;
|
||||
|
||||
if (copy_from_user(&irq, argp, sizeof(irq)))
|
||||
return -EFAULT;
|
||||
|
||||
if (irq.irq == KVM_INTERRUPT_SET)
|
||||
return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
|
||||
else
|
||||
return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
|
||||
}
|
||||
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = filp->private_data;
|
||||
void __user *argp = (void __user *)arg;
|
||||
long r = -EINVAL;
|
||||
|
||||
switch (ioctl) {
|
||||
case KVM_SET_ONE_REG:
|
||||
case KVM_GET_ONE_REG: {
|
||||
struct kvm_one_reg reg;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(®, argp, sizeof(reg)))
|
||||
break;
|
||||
|
||||
if (ioctl == KVM_SET_ONE_REG)
|
||||
r = kvm_riscv_vcpu_set_reg(vcpu, ®);
|
||||
else
|
||||
r = kvm_riscv_vcpu_get_reg(vcpu, ®);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
|
||||
struct kvm_sregs *sregs)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
struct kvm_sregs *sregs)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
|
||||
struct kvm_translation *tr)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
unsigned long mask, val;
|
||||
|
||||
if (READ_ONCE(vcpu->arch.irqs_pending_mask)) {
|
||||
mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0);
|
||||
val = READ_ONCE(vcpu->arch.irqs_pending) & mask;
|
||||
|
||||
csr->hvip &= ~mask;
|
||||
csr->hvip |= val;
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long hvip;
|
||||
struct kvm_vcpu_arch *v = &vcpu->arch;
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
|
||||
/* Read current HVIP and VSIE CSRs */
|
||||
csr->vsie = csr_read(CSR_VSIE);
|
||||
|
||||
/* Sync-up HVIP.VSSIP bit changes does by Guest */
|
||||
hvip = csr_read(CSR_HVIP);
|
||||
if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
|
||||
if (hvip & (1UL << IRQ_VS_SOFT)) {
|
||||
if (!test_and_set_bit(IRQ_VS_SOFT,
|
||||
&v->irqs_pending_mask))
|
||||
set_bit(IRQ_VS_SOFT, &v->irqs_pending);
|
||||
} else {
|
||||
if (!test_and_set_bit(IRQ_VS_SOFT,
|
||||
&v->irqs_pending_mask))
|
||||
clear_bit(IRQ_VS_SOFT, &v->irqs_pending);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
|
||||
{
|
||||
if (irq != IRQ_VS_SOFT &&
|
||||
irq != IRQ_VS_TIMER &&
|
||||
irq != IRQ_VS_EXT)
|
||||
return -EINVAL;
|
||||
|
||||
set_bit(irq, &vcpu->arch.irqs_pending);
|
||||
smp_mb__before_atomic();
|
||||
set_bit(irq, &vcpu->arch.irqs_pending_mask);
|
||||
|
||||
kvm_vcpu_kick(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
|
||||
{
|
||||
if (irq != IRQ_VS_SOFT &&
|
||||
irq != IRQ_VS_TIMER &&
|
||||
irq != IRQ_VS_EXT)
|
||||
return -EINVAL;
|
||||
|
||||
clear_bit(irq, &vcpu->arch.irqs_pending);
|
||||
smp_mb__before_atomic();
|
||||
set_bit(irq, &vcpu->arch.irqs_pending_mask);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask)
|
||||
{
|
||||
unsigned long ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
|
||||
<< VSIP_TO_HVIP_SHIFT) & mask;
|
||||
|
||||
return (READ_ONCE(vcpu->arch.irqs_pending) & ie) ? true : false;
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.power_off = true;
|
||||
kvm_make_request(KVM_REQ_SLEEP, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.power_off = false;
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mp_state *mp_state)
|
||||
{
|
||||
if (vcpu->arch.power_off)
|
||||
mp_state->mp_state = KVM_MP_STATE_STOPPED;
|
||||
else
|
||||
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mp_state *mp_state)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (mp_state->mp_state) {
|
||||
case KVM_MP_STATE_RUNNABLE:
|
||||
vcpu->arch.power_off = false;
|
||||
break;
|
||||
case KVM_MP_STATE_STOPPED:
|
||||
kvm_riscv_vcpu_power_off(vcpu);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
struct kvm_guest_debug *dbg)
|
||||
{
|
||||
/* TODO; To be implemented later. */
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
|
||||
csr_write(CSR_VSSTATUS, csr->vsstatus);
|
||||
csr_write(CSR_VSIE, csr->vsie);
|
||||
csr_write(CSR_VSTVEC, csr->vstvec);
|
||||
csr_write(CSR_VSSCRATCH, csr->vsscratch);
|
||||
csr_write(CSR_VSEPC, csr->vsepc);
|
||||
csr_write(CSR_VSCAUSE, csr->vscause);
|
||||
csr_write(CSR_VSTVAL, csr->vstval);
|
||||
csr_write(CSR_HVIP, csr->hvip);
|
||||
csr_write(CSR_VSATP, csr->vsatp);
|
||||
|
||||
kvm_riscv_stage2_update_hgatp(vcpu);
|
||||
|
||||
kvm_riscv_vcpu_timer_restore(vcpu);
|
||||
|
||||
kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
|
||||
kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
|
||||
vcpu->arch.isa);
|
||||
|
||||
vcpu->cpu = cpu;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
|
||||
vcpu->cpu = -1;
|
||||
|
||||
kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
|
||||
vcpu->arch.isa);
|
||||
kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
|
||||
|
||||
csr_write(CSR_HGATP, 0);
|
||||
|
||||
csr->vsstatus = csr_read(CSR_VSSTATUS);
|
||||
csr->vsie = csr_read(CSR_VSIE);
|
||||
csr->vstvec = csr_read(CSR_VSTVEC);
|
||||
csr->vsscratch = csr_read(CSR_VSSCRATCH);
|
||||
csr->vsepc = csr_read(CSR_VSEPC);
|
||||
csr->vscause = csr_read(CSR_VSCAUSE);
|
||||
csr->vstval = csr_read(CSR_VSTVAL);
|
||||
csr->hvip = csr_read(CSR_HVIP);
|
||||
csr->vsatp = csr_read(CSR_VSATP);
|
||||
}
|
||||
|
||||
static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
|
||||
|
||||
if (kvm_request_pending(vcpu)) {
|
||||
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
|
||||
rcuwait_wait_event(wait,
|
||||
(!vcpu->arch.power_off) && (!vcpu->arch.pause),
|
||||
TASK_INTERRUPTIBLE);
|
||||
|
||||
if (vcpu->arch.power_off || vcpu->arch.pause) {
|
||||
/*
|
||||
* Awaken to handle a signal, request to
|
||||
* sleep again later.
|
||||
*/
|
||||
kvm_make_request(KVM_REQ_SLEEP, vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
|
||||
kvm_riscv_reset_vcpu(vcpu);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
|
||||
kvm_riscv_stage2_update_hgatp(vcpu);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
|
||||
__kvm_riscv_hfence_gvma_all();
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
|
||||
csr_write(CSR_HVIP, csr->hvip);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
struct kvm_cpu_trap trap;
|
||||
struct kvm_run *run = vcpu->run;
|
||||
|
||||
/* Mark this VCPU ran at least once */
|
||||
vcpu->arch.ran_atleast_once = true;
|
||||
|
||||
vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
|
||||
/* Process MMIO value returned from user-space */
|
||||
if (run->exit_reason == KVM_EXIT_MMIO) {
|
||||
ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
|
||||
if (ret) {
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/* Process SBI value returned from user-space */
|
||||
if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
|
||||
ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
|
||||
if (ret) {
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (run->immediate_exit) {
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
kvm_sigset_activate(vcpu);
|
||||
|
||||
ret = 1;
|
||||
run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
while (ret > 0) {
|
||||
/* Check conditions before entering the guest */
|
||||
cond_resched();
|
||||
|
||||
kvm_riscv_stage2_vmid_update(vcpu);
|
||||
|
||||
kvm_riscv_check_vcpu_requests(vcpu);
|
||||
|
||||
preempt_disable();
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
/*
|
||||
* Exit if we have a signal pending so that we can deliver
|
||||
* the signal to user space.
|
||||
*/
|
||||
if (signal_pending(current)) {
|
||||
ret = -EINTR;
|
||||
run->exit_reason = KVM_EXIT_INTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure we set mode to IN_GUEST_MODE after we disable
|
||||
* interrupts and before the final VCPU requests check.
|
||||
* See the comment in kvm_vcpu_exiting_guest_mode() and
|
||||
* Documentation/virtual/kvm/vcpu-requests.rst
|
||||
*/
|
||||
vcpu->mode = IN_GUEST_MODE;
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
smp_mb__after_srcu_read_unlock();
|
||||
|
||||
/*
|
||||
* We might have got VCPU interrupts updated asynchronously
|
||||
* so update it in HW.
|
||||
*/
|
||||
kvm_riscv_vcpu_flush_interrupts(vcpu);
|
||||
|
||||
/* Update HVIP CSR for current CPU */
|
||||
kvm_riscv_update_hvip(vcpu);
|
||||
|
||||
if (ret <= 0 ||
|
||||
kvm_riscv_stage2_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
|
||||
kvm_request_pending(vcpu)) {
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
continue;
|
||||
}
|
||||
|
||||
guest_enter_irqoff();
|
||||
|
||||
__kvm_riscv_switch_to(&vcpu->arch);
|
||||
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
vcpu->stat.exits++;
|
||||
|
||||
/*
|
||||
* Save SCAUSE, STVAL, HTVAL, and HTINST because we might
|
||||
* get an interrupt between __kvm_riscv_switch_to() and
|
||||
* local_irq_enable() which can potentially change CSRs.
|
||||
*/
|
||||
trap.sepc = vcpu->arch.guest_context.sepc;
|
||||
trap.scause = csr_read(CSR_SCAUSE);
|
||||
trap.stval = csr_read(CSR_STVAL);
|
||||
trap.htval = csr_read(CSR_HTVAL);
|
||||
trap.htinst = csr_read(CSR_HTINST);
|
||||
|
||||
/* Syncup interrupts state with HW */
|
||||
kvm_riscv_vcpu_sync_interrupts(vcpu);
|
||||
|
||||
/*
|
||||
* We may have taken a host interrupt in VS/VU-mode (i.e.
|
||||
* while executing the guest). This interrupt is still
|
||||
* pending, as we haven't serviced it yet!
|
||||
*
|
||||
* We're now back in HS-mode with interrupts disabled
|
||||
* so enabling the interrupts now will have the effect
|
||||
* of taking the interrupt again, in HS-mode this time.
|
||||
*/
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* We do local_irq_enable() before calling guest_exit() so
|
||||
* that if a timer interrupt hits while running the guest
|
||||
* we account that tick as being spent in the guest. We
|
||||
* enable preemption after calling guest_exit() so that if
|
||||
* we get preempted we make sure ticks after that is not
|
||||
* counted as guest time.
|
||||
*/
|
||||
guest_exit();
|
||||
|
||||
preempt_enable();
|
||||
|
||||
vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
|
||||
ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
|
||||
}
|
||||
|
||||
kvm_sigset_deactivate(vcpu);
|
||||
|
||||
vcpu_put(vcpu);
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
|
||||
return ret;
|
||||
}
|
701
arch/riscv/kvm/vcpu_exit.c
Normal file
701
arch/riscv/kvm/vcpu_exit.c
Normal file
@ -0,0 +1,701 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/csr.h>
|
||||
|
||||
#define INSN_OPCODE_MASK 0x007c
|
||||
#define INSN_OPCODE_SHIFT 2
|
||||
#define INSN_OPCODE_SYSTEM 28
|
||||
|
||||
#define INSN_MASK_WFI 0xffffffff
|
||||
#define INSN_MATCH_WFI 0x10500073
|
||||
|
||||
#define INSN_MATCH_LB 0x3
|
||||
#define INSN_MASK_LB 0x707f
|
||||
#define INSN_MATCH_LH 0x1003
|
||||
#define INSN_MASK_LH 0x707f
|
||||
#define INSN_MATCH_LW 0x2003
|
||||
#define INSN_MASK_LW 0x707f
|
||||
#define INSN_MATCH_LD 0x3003
|
||||
#define INSN_MASK_LD 0x707f
|
||||
#define INSN_MATCH_LBU 0x4003
|
||||
#define INSN_MASK_LBU 0x707f
|
||||
#define INSN_MATCH_LHU 0x5003
|
||||
#define INSN_MASK_LHU 0x707f
|
||||
#define INSN_MATCH_LWU 0x6003
|
||||
#define INSN_MASK_LWU 0x707f
|
||||
#define INSN_MATCH_SB 0x23
|
||||
#define INSN_MASK_SB 0x707f
|
||||
#define INSN_MATCH_SH 0x1023
|
||||
#define INSN_MASK_SH 0x707f
|
||||
#define INSN_MATCH_SW 0x2023
|
||||
#define INSN_MASK_SW 0x707f
|
||||
#define INSN_MATCH_SD 0x3023
|
||||
#define INSN_MASK_SD 0x707f
|
||||
|
||||
#define INSN_MATCH_C_LD 0x6000
|
||||
#define INSN_MASK_C_LD 0xe003
|
||||
#define INSN_MATCH_C_SD 0xe000
|
||||
#define INSN_MASK_C_SD 0xe003
|
||||
#define INSN_MATCH_C_LW 0x4000
|
||||
#define INSN_MASK_C_LW 0xe003
|
||||
#define INSN_MATCH_C_SW 0xc000
|
||||
#define INSN_MASK_C_SW 0xe003
|
||||
#define INSN_MATCH_C_LDSP 0x6002
|
||||
#define INSN_MASK_C_LDSP 0xe003
|
||||
#define INSN_MATCH_C_SDSP 0xe002
|
||||
#define INSN_MASK_C_SDSP 0xe003
|
||||
#define INSN_MATCH_C_LWSP 0x4002
|
||||
#define INSN_MASK_C_LWSP 0xe003
|
||||
#define INSN_MATCH_C_SWSP 0xc002
|
||||
#define INSN_MASK_C_SWSP 0xe003
|
||||
|
||||
#define INSN_16BIT_MASK 0x3
|
||||
|
||||
#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK)
|
||||
|
||||
#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4)
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define LOG_REGBYTES 3
|
||||
#else
|
||||
#define LOG_REGBYTES 2
|
||||
#endif
|
||||
#define REGBYTES (1 << LOG_REGBYTES)
|
||||
|
||||
#define SH_RD 7
|
||||
#define SH_RS1 15
|
||||
#define SH_RS2 20
|
||||
#define SH_RS2C 2
|
||||
|
||||
#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
|
||||
#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \
|
||||
(RV_X(x, 10, 3) << 3) | \
|
||||
(RV_X(x, 5, 1) << 6))
|
||||
#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \
|
||||
(RV_X(x, 5, 2) << 6))
|
||||
#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \
|
||||
(RV_X(x, 12, 1) << 5) | \
|
||||
(RV_X(x, 2, 2) << 6))
|
||||
#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \
|
||||
(RV_X(x, 12, 1) << 5) | \
|
||||
(RV_X(x, 2, 3) << 6))
|
||||
#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \
|
||||
(RV_X(x, 7, 2) << 6))
|
||||
#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \
|
||||
(RV_X(x, 7, 3) << 6))
|
||||
#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3))
|
||||
#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3))
|
||||
#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5)
|
||||
|
||||
#define SHIFT_RIGHT(x, y) \
|
||||
((y) < 0 ? ((x) << -(y)) : ((x) >> (y)))
|
||||
|
||||
#define REG_MASK \
|
||||
((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES))
|
||||
|
||||
#define REG_OFFSET(insn, pos) \
|
||||
(SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK)
|
||||
|
||||
#define REG_PTR(insn, pos, regs) \
|
||||
((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)))
|
||||
|
||||
#define GET_RM(insn) (((insn) >> 12) & 7)
|
||||
|
||||
#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
|
||||
#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
|
||||
#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
|
||||
#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs))
|
||||
#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs))
|
||||
#define GET_SP(regs) (*REG_PTR(2, 0, regs))
|
||||
#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val))
|
||||
#define IMM_I(insn) ((s32)(insn) >> 20)
|
||||
#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \
|
||||
(s32)(((insn) >> 7) & 0x1f))
|
||||
#define MASK_FUNCT3 0x7000
|
||||
|
||||
static int truly_illegal_insn(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *run,
|
||||
ulong insn)
|
||||
{
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
|
||||
/* Redirect trap to Guest VCPU */
|
||||
utrap.sepc = vcpu->arch.guest_context.sepc;
|
||||
utrap.scause = EXC_INST_ILLEGAL;
|
||||
utrap.stval = insn;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int system_opcode_insn(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *run,
|
||||
ulong insn)
|
||||
{
|
||||
if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) {
|
||||
vcpu->stat.wfi_exit_stat++;
|
||||
if (!kvm_arch_vcpu_runnable(vcpu)) {
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
kvm_vcpu_block(vcpu);
|
||||
vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
}
|
||||
vcpu->arch.guest_context.sepc += INSN_LEN(insn);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return truly_illegal_insn(vcpu, run, insn);
|
||||
}
|
||||
|
||||
static int virtual_inst_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_cpu_trap *trap)
|
||||
{
|
||||
unsigned long insn = trap->stval;
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
struct kvm_cpu_context *ct;
|
||||
|
||||
if (unlikely(INSN_IS_16BIT(insn))) {
|
||||
if (insn == 0) {
|
||||
ct = &vcpu->arch.guest_context;
|
||||
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true,
|
||||
ct->sepc,
|
||||
&utrap);
|
||||
if (utrap.scause) {
|
||||
utrap.sepc = ct->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (INSN_IS_16BIT(insn))
|
||||
return truly_illegal_insn(vcpu, run, insn);
|
||||
}
|
||||
|
||||
switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) {
|
||||
case INSN_OPCODE_SYSTEM:
|
||||
return system_opcode_insn(vcpu, run, insn);
|
||||
default:
|
||||
return truly_illegal_insn(vcpu, run, insn);
|
||||
}
|
||||
}
|
||||
|
||||
static int emulate_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
unsigned long fault_addr, unsigned long htinst)
|
||||
{
|
||||
u8 data_buf[8];
|
||||
unsigned long insn;
|
||||
int shift = 0, len = 0, insn_len = 0;
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
|
||||
|
||||
/* Determine trapped instruction */
|
||||
if (htinst & 0x1) {
|
||||
/*
|
||||
* Bit[0] == 1 implies trapped instruction value is
|
||||
* transformed instruction or custom instruction.
|
||||
*/
|
||||
insn = htinst | INSN_16BIT_MASK;
|
||||
insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
|
||||
} else {
|
||||
/*
|
||||
* Bit[0] == 0 implies trapped instruction value is
|
||||
* zero or special value.
|
||||
*/
|
||||
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
|
||||
&utrap);
|
||||
if (utrap.scause) {
|
||||
/* Redirect trap if we failed to read instruction */
|
||||
utrap.sepc = ct->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
return 1;
|
||||
}
|
||||
insn_len = INSN_LEN(insn);
|
||||
}
|
||||
|
||||
/* Decode length of MMIO and shift */
|
||||
if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
|
||||
len = 4;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LB) == INSN_MATCH_LB) {
|
||||
len = 1;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) {
|
||||
len = 1;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) {
|
||||
len = 8;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) {
|
||||
len = 4;
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) {
|
||||
len = 2;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) {
|
||||
len = 2;
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) {
|
||||
len = 8;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
insn = RVC_RS2S(insn) << SH_RD;
|
||||
} else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 8;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) {
|
||||
len = 4;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
insn = RVC_RS2S(insn) << SH_RD;
|
||||
} else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 4;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Fault address should be aligned to length of MMIO */
|
||||
if (fault_addr & (len - 1))
|
||||
return -EIO;
|
||||
|
||||
/* Save instruction decode info */
|
||||
vcpu->arch.mmio_decode.insn = insn;
|
||||
vcpu->arch.mmio_decode.insn_len = insn_len;
|
||||
vcpu->arch.mmio_decode.shift = shift;
|
||||
vcpu->arch.mmio_decode.len = len;
|
||||
vcpu->arch.mmio_decode.return_handled = 0;
|
||||
|
||||
/* Update MMIO details in kvm_run struct */
|
||||
run->mmio.is_write = false;
|
||||
run->mmio.phys_addr = fault_addr;
|
||||
run->mmio.len = len;
|
||||
|
||||
/* Try to handle MMIO access in the kernel */
|
||||
if (!kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_addr, len, data_buf)) {
|
||||
/* Successfully handled MMIO access in the kernel so resume */
|
||||
memcpy(run->mmio.data, data_buf, len);
|
||||
vcpu->stat.mmio_exit_kernel++;
|
||||
kvm_riscv_vcpu_mmio_return(vcpu, run);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Exit to userspace for MMIO emulation */
|
||||
vcpu->stat.mmio_exit_user++;
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int emulate_store(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
unsigned long fault_addr, unsigned long htinst)
|
||||
{
|
||||
u8 data8;
|
||||
u16 data16;
|
||||
u32 data32;
|
||||
u64 data64;
|
||||
ulong data;
|
||||
unsigned long insn;
|
||||
int len = 0, insn_len = 0;
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
|
||||
|
||||
/* Determine trapped instruction */
|
||||
if (htinst & 0x1) {
|
||||
/*
|
||||
* Bit[0] == 1 implies trapped instruction value is
|
||||
* transformed instruction or custom instruction.
|
||||
*/
|
||||
insn = htinst | INSN_16BIT_MASK;
|
||||
insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
|
||||
} else {
|
||||
/*
|
||||
* Bit[0] == 0 implies trapped instruction value is
|
||||
* zero or special value.
|
||||
*/
|
||||
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
|
||||
&utrap);
|
||||
if (utrap.scause) {
|
||||
/* Redirect trap if we failed to read instruction */
|
||||
utrap.sepc = ct->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
return 1;
|
||||
}
|
||||
insn_len = INSN_LEN(insn);
|
||||
}
|
||||
|
||||
data = GET_RS2(insn, &vcpu->arch.guest_context);
|
||||
data8 = data16 = data32 = data64 = data;
|
||||
|
||||
if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) {
|
||||
len = 4;
|
||||
} else if ((insn & INSN_MASK_SB) == INSN_MATCH_SB) {
|
||||
len = 1;
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
|
||||
len = 8;
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
|
||||
len = 2;
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) {
|
||||
len = 8;
|
||||
data64 = GET_RS2S(insn, &vcpu->arch.guest_context);
|
||||
} else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 8;
|
||||
data64 = GET_RS2C(insn, &vcpu->arch.guest_context);
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) {
|
||||
len = 4;
|
||||
data32 = GET_RS2S(insn, &vcpu->arch.guest_context);
|
||||
} else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 4;
|
||||
data32 = GET_RS2C(insn, &vcpu->arch.guest_context);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Fault address should be aligned to length of MMIO */
|
||||
if (fault_addr & (len - 1))
|
||||
return -EIO;
|
||||
|
||||
/* Save instruction decode info */
|
||||
vcpu->arch.mmio_decode.insn = insn;
|
||||
vcpu->arch.mmio_decode.insn_len = insn_len;
|
||||
vcpu->arch.mmio_decode.shift = 0;
|
||||
vcpu->arch.mmio_decode.len = len;
|
||||
vcpu->arch.mmio_decode.return_handled = 0;
|
||||
|
||||
/* Copy data to kvm_run instance */
|
||||
switch (len) {
|
||||
case 1:
|
||||
*((u8 *)run->mmio.data) = data8;
|
||||
break;
|
||||
case 2:
|
||||
*((u16 *)run->mmio.data) = data16;
|
||||
break;
|
||||
case 4:
|
||||
*((u32 *)run->mmio.data) = data32;
|
||||
break;
|
||||
case 8:
|
||||
*((u64 *)run->mmio.data) = data64;
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Update MMIO details in kvm_run struct */
|
||||
run->mmio.is_write = true;
|
||||
run->mmio.phys_addr = fault_addr;
|
||||
run->mmio.len = len;
|
||||
|
||||
/* Try to handle MMIO access in the kernel */
|
||||
if (!kvm_io_bus_write(vcpu, KVM_MMIO_BUS,
|
||||
fault_addr, len, run->mmio.data)) {
|
||||
/* Successfully handled MMIO access in the kernel so resume */
|
||||
vcpu->stat.mmio_exit_kernel++;
|
||||
kvm_riscv_vcpu_mmio_return(vcpu, run);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Exit to userspace for MMIO emulation */
|
||||
vcpu->stat.mmio_exit_user++;
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_cpu_trap *trap)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long hva, fault_addr;
|
||||
bool writeable;
|
||||
gfn_t gfn;
|
||||
int ret;
|
||||
|
||||
fault_addr = (trap->htval << 2) | (trap->stval & 0x3);
|
||||
gfn = fault_addr >> PAGE_SHIFT;
|
||||
memslot = gfn_to_memslot(vcpu->kvm, gfn);
|
||||
hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable);
|
||||
|
||||
if (kvm_is_error_hva(hva) ||
|
||||
(trap->scause == EXC_STORE_GUEST_PAGE_FAULT && !writeable)) {
|
||||
switch (trap->scause) {
|
||||
case EXC_LOAD_GUEST_PAGE_FAULT:
|
||||
return emulate_load(vcpu, run, fault_addr,
|
||||
trap->htinst);
|
||||
case EXC_STORE_GUEST_PAGE_FAULT:
|
||||
return emulate_store(vcpu, run, fault_addr,
|
||||
trap->htinst);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
};
|
||||
}
|
||||
|
||||
ret = kvm_riscv_stage2_map(vcpu, memslot, fault_addr, hva,
|
||||
(trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_unpriv_read -- Read machine word from Guest memory
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
* @read_insn: Flag representing whether we are reading instruction
|
||||
* @guest_addr: Guest address to read
|
||||
* @trap: Output pointer to trap details
|
||||
*/
|
||||
unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
|
||||
bool read_insn,
|
||||
unsigned long guest_addr,
|
||||
struct kvm_cpu_trap *trap)
|
||||
{
|
||||
register unsigned long taddr asm("a0") = (unsigned long)trap;
|
||||
register unsigned long ttmp asm("a1");
|
||||
register unsigned long val asm("t0");
|
||||
register unsigned long tmp asm("t1");
|
||||
register unsigned long addr asm("t2") = guest_addr;
|
||||
unsigned long flags;
|
||||
unsigned long old_stvec, old_hstatus;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
old_hstatus = csr_swap(CSR_HSTATUS, vcpu->arch.guest_context.hstatus);
|
||||
old_stvec = csr_swap(CSR_STVEC, (ulong)&__kvm_riscv_unpriv_trap);
|
||||
|
||||
if (read_insn) {
|
||||
/*
|
||||
* HLVX.HU instruction
|
||||
* 0110010 00011 rs1 100 rd 1110011
|
||||
*/
|
||||
asm volatile ("\n"
|
||||
".option push\n"
|
||||
".option norvc\n"
|
||||
"add %[ttmp], %[taddr], 0\n"
|
||||
/*
|
||||
* HLVX.HU %[val], (%[addr])
|
||||
* HLVX.HU t0, (t2)
|
||||
* 0110010 00011 00111 100 00101 1110011
|
||||
*/
|
||||
".word 0x6433c2f3\n"
|
||||
"andi %[tmp], %[val], 3\n"
|
||||
"addi %[tmp], %[tmp], -3\n"
|
||||
"bne %[tmp], zero, 2f\n"
|
||||
"addi %[addr], %[addr], 2\n"
|
||||
/*
|
||||
* HLVX.HU %[tmp], (%[addr])
|
||||
* HLVX.HU t1, (t2)
|
||||
* 0110010 00011 00111 100 00110 1110011
|
||||
*/
|
||||
".word 0x6433c373\n"
|
||||
"sll %[tmp], %[tmp], 16\n"
|
||||
"add %[val], %[val], %[tmp]\n"
|
||||
"2:\n"
|
||||
".option pop"
|
||||
: [val] "=&r" (val), [tmp] "=&r" (tmp),
|
||||
[taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp),
|
||||
[addr] "+&r" (addr) : : "memory");
|
||||
|
||||
if (trap->scause == EXC_LOAD_PAGE_FAULT)
|
||||
trap->scause = EXC_INST_PAGE_FAULT;
|
||||
} else {
|
||||
/*
|
||||
* HLV.D instruction
|
||||
* 0110110 00000 rs1 100 rd 1110011
|
||||
*
|
||||
* HLV.W instruction
|
||||
* 0110100 00000 rs1 100 rd 1110011
|
||||
*/
|
||||
asm volatile ("\n"
|
||||
".option push\n"
|
||||
".option norvc\n"
|
||||
"add %[ttmp], %[taddr], 0\n"
|
||||
#ifdef CONFIG_64BIT
|
||||
/*
|
||||
* HLV.D %[val], (%[addr])
|
||||
* HLV.D t0, (t2)
|
||||
* 0110110 00000 00111 100 00101 1110011
|
||||
*/
|
||||
".word 0x6c03c2f3\n"
|
||||
#else
|
||||
/*
|
||||
* HLV.W %[val], (%[addr])
|
||||
* HLV.W t0, (t2)
|
||||
* 0110100 00000 00111 100 00101 1110011
|
||||
*/
|
||||
".word 0x6803c2f3\n"
|
||||
#endif
|
||||
".option pop"
|
||||
: [val] "=&r" (val),
|
||||
[taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp)
|
||||
: [addr] "r" (addr) : "memory");
|
||||
}
|
||||
|
||||
csr_write(CSR_STVEC, old_stvec);
|
||||
csr_write(CSR_HSTATUS, old_hstatus);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_trap_redirect -- Redirect trap to Guest
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
* @trap: Trap details
|
||||
*/
|
||||
void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
|
||||
struct kvm_cpu_trap *trap)
|
||||
{
|
||||
unsigned long vsstatus = csr_read(CSR_VSSTATUS);
|
||||
|
||||
/* Change Guest SSTATUS.SPP bit */
|
||||
vsstatus &= ~SR_SPP;
|
||||
if (vcpu->arch.guest_context.sstatus & SR_SPP)
|
||||
vsstatus |= SR_SPP;
|
||||
|
||||
/* Change Guest SSTATUS.SPIE bit */
|
||||
vsstatus &= ~SR_SPIE;
|
||||
if (vsstatus & SR_SIE)
|
||||
vsstatus |= SR_SPIE;
|
||||
|
||||
/* Clear Guest SSTATUS.SIE bit */
|
||||
vsstatus &= ~SR_SIE;
|
||||
|
||||
/* Update Guest SSTATUS */
|
||||
csr_write(CSR_VSSTATUS, vsstatus);
|
||||
|
||||
/* Update Guest SCAUSE, STVAL, and SEPC */
|
||||
csr_write(CSR_VSCAUSE, trap->scause);
|
||||
csr_write(CSR_VSTVAL, trap->stval);
|
||||
csr_write(CSR_VSEPC, trap->sepc);
|
||||
|
||||
/* Set Guest PC to Guest exception vector */
|
||||
vcpu->arch.guest_context.sepc = csr_read(CSR_VSTVEC);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation
|
||||
* or in-kernel IO emulation
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
* @run: The VCPU run struct containing the mmio data
|
||||
*/
|
||||
int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
u8 data8;
|
||||
u16 data16;
|
||||
u32 data32;
|
||||
u64 data64;
|
||||
ulong insn;
|
||||
int len, shift;
|
||||
|
||||
if (vcpu->arch.mmio_decode.return_handled)
|
||||
return 0;
|
||||
|
||||
vcpu->arch.mmio_decode.return_handled = 1;
|
||||
insn = vcpu->arch.mmio_decode.insn;
|
||||
|
||||
if (run->mmio.is_write)
|
||||
goto done;
|
||||
|
||||
len = vcpu->arch.mmio_decode.len;
|
||||
shift = vcpu->arch.mmio_decode.shift;
|
||||
|
||||
switch (len) {
|
||||
case 1:
|
||||
data8 = *((u8 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data8 << shift >> shift);
|
||||
break;
|
||||
case 2:
|
||||
data16 = *((u16 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data16 << shift >> shift);
|
||||
break;
|
||||
case 4:
|
||||
data32 = *((u32 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data32 << shift >> shift);
|
||||
break;
|
||||
case 8:
|
||||
data64 = *((u64 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data64 << shift >> shift);
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
done:
|
||||
/* Move to next instruction */
|
||||
vcpu->arch.guest_context.sepc += vcpu->arch.mmio_decode.insn_len;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
|
||||
* proper exit to userspace.
|
||||
*/
|
||||
int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_cpu_trap *trap)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* If we got host interrupt then do nothing */
|
||||
if (trap->scause & CAUSE_IRQ_FLAG)
|
||||
return 1;
|
||||
|
||||
/* Handle guest traps */
|
||||
ret = -EFAULT;
|
||||
run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
switch (trap->scause) {
|
||||
case EXC_VIRTUAL_INST_FAULT:
|
||||
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
|
||||
ret = virtual_inst_fault(vcpu, run, trap);
|
||||
break;
|
||||
case EXC_INST_GUEST_PAGE_FAULT:
|
||||
case EXC_LOAD_GUEST_PAGE_FAULT:
|
||||
case EXC_STORE_GUEST_PAGE_FAULT:
|
||||
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
|
||||
ret = stage2_page_fault(vcpu, run, trap);
|
||||
break;
|
||||
case EXC_SUPERVISOR_SYSCALL:
|
||||
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
|
||||
ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Print details in-case of error */
|
||||
if (ret < 0) {
|
||||
kvm_err("VCPU exit error %d\n", ret);
|
||||
kvm_err("SEPC=0x%lx SSTATUS=0x%lx HSTATUS=0x%lx\n",
|
||||
vcpu->arch.guest_context.sepc,
|
||||
vcpu->arch.guest_context.sstatus,
|
||||
vcpu->arch.guest_context.hstatus);
|
||||
kvm_err("SCAUSE=0x%lx STVAL=0x%lx HTVAL=0x%lx HTINST=0x%lx\n",
|
||||
trap->scause, trap->stval, trap->htval, trap->htinst);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
167
arch/riscv/kvm/vcpu_fp.c
Normal file
167
arch/riscv/kvm/vcpu_fp.c
Normal file
@ -0,0 +1,167 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2021 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Atish Patra <atish.patra@wdc.com>
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#ifdef CONFIG_FPU
|
||||
void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long isa = vcpu->arch.isa;
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
|
||||
cntx->sstatus &= ~SR_FS;
|
||||
if (riscv_isa_extension_available(&isa, f) ||
|
||||
riscv_isa_extension_available(&isa, d))
|
||||
cntx->sstatus |= SR_FS_INITIAL;
|
||||
else
|
||||
cntx->sstatus |= SR_FS_OFF;
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
|
||||
{
|
||||
cntx->sstatus &= ~SR_FS;
|
||||
cntx->sstatus |= SR_FS_CLEAN;
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
|
||||
unsigned long isa)
|
||||
{
|
||||
if ((cntx->sstatus & SR_FS) == SR_FS_DIRTY) {
|
||||
if (riscv_isa_extension_available(&isa, d))
|
||||
__kvm_riscv_fp_d_save(cntx);
|
||||
else if (riscv_isa_extension_available(&isa, f))
|
||||
__kvm_riscv_fp_f_save(cntx);
|
||||
kvm_riscv_vcpu_fp_clean(cntx);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
|
||||
unsigned long isa)
|
||||
{
|
||||
if ((cntx->sstatus & SR_FS) != SR_FS_OFF) {
|
||||
if (riscv_isa_extension_available(&isa, d))
|
||||
__kvm_riscv_fp_d_restore(cntx);
|
||||
else if (riscv_isa_extension_available(&isa, f))
|
||||
__kvm_riscv_fp_f_restore(cntx);
|
||||
kvm_riscv_vcpu_fp_clean(cntx);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
|
||||
{
|
||||
/* No need to check host sstatus as it can be modified outside */
|
||||
if (riscv_isa_extension_available(NULL, d))
|
||||
__kvm_riscv_fp_d_save(cntx);
|
||||
else if (riscv_isa_extension_available(NULL, f))
|
||||
__kvm_riscv_fp_f_save(cntx);
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx)
|
||||
{
|
||||
if (riscv_isa_extension_available(NULL, d))
|
||||
__kvm_riscv_fp_d_restore(cntx);
|
||||
else if (riscv_isa_extension_available(NULL, f))
|
||||
__kvm_riscv_fp_f_restore(cntx);
|
||||
}
|
||||
#endif
|
||||
|
||||
int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg,
|
||||
unsigned long rtype)
|
||||
{
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
unsigned long isa = vcpu->arch.isa;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
rtype);
|
||||
void *reg_val;
|
||||
|
||||
if ((rtype == KVM_REG_RISCV_FP_F) &&
|
||||
riscv_isa_extension_available(&isa, f)) {
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
|
||||
return -EINVAL;
|
||||
if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
|
||||
reg_val = &cntx->fp.f.fcsr;
|
||||
else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
|
||||
reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
|
||||
reg_val = &cntx->fp.f.f[reg_num];
|
||||
else
|
||||
return -EINVAL;
|
||||
} else if ((rtype == KVM_REG_RISCV_FP_D) &&
|
||||
riscv_isa_extension_available(&isa, d)) {
|
||||
if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
|
||||
return -EINVAL;
|
||||
reg_val = &cntx->fp.d.fcsr;
|
||||
} else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) &&
|
||||
reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) {
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u64))
|
||||
return -EINVAL;
|
||||
reg_val = &cntx->fp.d.f[reg_num];
|
||||
} else
|
||||
return -EINVAL;
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg,
|
||||
unsigned long rtype)
|
||||
{
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
unsigned long isa = vcpu->arch.isa;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
rtype);
|
||||
void *reg_val;
|
||||
|
||||
if ((rtype == KVM_REG_RISCV_FP_F) &&
|
||||
riscv_isa_extension_available(&isa, f)) {
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
|
||||
return -EINVAL;
|
||||
if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
|
||||
reg_val = &cntx->fp.f.fcsr;
|
||||
else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
|
||||
reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
|
||||
reg_val = &cntx->fp.f.f[reg_num];
|
||||
else
|
||||
return -EINVAL;
|
||||
} else if ((rtype == KVM_REG_RISCV_FP_D) &&
|
||||
riscv_isa_extension_available(&isa, d)) {
|
||||
if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
|
||||
return -EINVAL;
|
||||
reg_val = &cntx->fp.d.fcsr;
|
||||
} else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) &&
|
||||
reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) {
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u64))
|
||||
return -EINVAL;
|
||||
reg_val = &cntx->fp.d.f[reg_num];
|
||||
} else
|
||||
return -EINVAL;
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
185
arch/riscv/kvm/vcpu_sbi.c
Normal file
185
arch/riscv/kvm/vcpu_sbi.c
Normal file
@ -0,0 +1,185 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/**
|
||||
* Copyright (c) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Atish Patra <atish.patra@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/sbi.h>
|
||||
#include <asm/kvm_vcpu_timer.h>
|
||||
|
||||
#define SBI_VERSION_MAJOR 0
|
||||
#define SBI_VERSION_MINOR 1
|
||||
|
||||
static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *run)
|
||||
{
|
||||
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
|
||||
|
||||
vcpu->arch.sbi_context.return_handled = 0;
|
||||
vcpu->stat.ecall_exit_stat++;
|
||||
run->exit_reason = KVM_EXIT_RISCV_SBI;
|
||||
run->riscv_sbi.extension_id = cp->a7;
|
||||
run->riscv_sbi.function_id = cp->a6;
|
||||
run->riscv_sbi.args[0] = cp->a0;
|
||||
run->riscv_sbi.args[1] = cp->a1;
|
||||
run->riscv_sbi.args[2] = cp->a2;
|
||||
run->riscv_sbi.args[3] = cp->a3;
|
||||
run->riscv_sbi.args[4] = cp->a4;
|
||||
run->riscv_sbi.args[5] = cp->a5;
|
||||
run->riscv_sbi.ret[0] = cp->a0;
|
||||
run->riscv_sbi.ret[1] = cp->a1;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
|
||||
|
||||
/* Handle SBI return only once */
|
||||
if (vcpu->arch.sbi_context.return_handled)
|
||||
return 0;
|
||||
vcpu->arch.sbi_context.return_handled = 1;
|
||||
|
||||
/* Update return values */
|
||||
cp->a0 = run->riscv_sbi.ret[0];
|
||||
cp->a1 = run->riscv_sbi.ret[1];
|
||||
|
||||
/* Move to next instruction */
|
||||
vcpu->arch.guest_context.sepc += 4;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RISCV_SBI_V01
|
||||
|
||||
static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *run, u32 type)
|
||||
{
|
||||
int i;
|
||||
struct kvm_vcpu *tmp;
|
||||
|
||||
kvm_for_each_vcpu(i, tmp, vcpu->kvm)
|
||||
tmp->arch.power_off = true;
|
||||
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
|
||||
|
||||
memset(&run->system_event, 0, sizeof(run->system_event));
|
||||
run->system_event.type = type;
|
||||
run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
ulong hmask;
|
||||
int i, ret = 1;
|
||||
u64 next_cycle;
|
||||
struct kvm_vcpu *rvcpu;
|
||||
bool next_sepc = true;
|
||||
struct cpumask cm, hm;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
|
||||
|
||||
if (!cp)
|
||||
return -EINVAL;
|
||||
|
||||
switch (cp->a7) {
|
||||
case SBI_EXT_0_1_CONSOLE_GETCHAR:
|
||||
case SBI_EXT_0_1_CONSOLE_PUTCHAR:
|
||||
/*
|
||||
* The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be
|
||||
* handled in kernel so we forward these to user-space
|
||||
*/
|
||||
kvm_riscv_vcpu_sbi_forward(vcpu, run);
|
||||
next_sepc = false;
|
||||
ret = 0;
|
||||
break;
|
||||
case SBI_EXT_0_1_SET_TIMER:
|
||||
#if __riscv_xlen == 32
|
||||
next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
|
||||
#else
|
||||
next_cycle = (u64)cp->a0;
|
||||
#endif
|
||||
kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
|
||||
break;
|
||||
case SBI_EXT_0_1_CLEAR_IPI:
|
||||
kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT);
|
||||
break;
|
||||
case SBI_EXT_0_1_SEND_IPI:
|
||||
if (cp->a0)
|
||||
hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
|
||||
&utrap);
|
||||
else
|
||||
hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
|
||||
if (utrap.scause) {
|
||||
utrap.sepc = cp->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
next_sepc = false;
|
||||
break;
|
||||
}
|
||||
for_each_set_bit(i, &hmask, BITS_PER_LONG) {
|
||||
rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
|
||||
kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
|
||||
}
|
||||
break;
|
||||
case SBI_EXT_0_1_SHUTDOWN:
|
||||
kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN);
|
||||
next_sepc = false;
|
||||
ret = 0;
|
||||
break;
|
||||
case SBI_EXT_0_1_REMOTE_FENCE_I:
|
||||
case SBI_EXT_0_1_REMOTE_SFENCE_VMA:
|
||||
case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID:
|
||||
if (cp->a0)
|
||||
hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
|
||||
&utrap);
|
||||
else
|
||||
hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
|
||||
if (utrap.scause) {
|
||||
utrap.sepc = cp->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
next_sepc = false;
|
||||
break;
|
||||
}
|
||||
cpumask_clear(&cm);
|
||||
for_each_set_bit(i, &hmask, BITS_PER_LONG) {
|
||||
rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
|
||||
if (rvcpu->cpu < 0)
|
||||
continue;
|
||||
cpumask_set_cpu(rvcpu->cpu, &cm);
|
||||
}
|
||||
riscv_cpuid_to_hartid_mask(&cm, &hm);
|
||||
if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
|
||||
sbi_remote_fence_i(cpumask_bits(&hm));
|
||||
else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
|
||||
sbi_remote_hfence_vvma(cpumask_bits(&hm),
|
||||
cp->a1, cp->a2);
|
||||
else
|
||||
sbi_remote_hfence_vvma_asid(cpumask_bits(&hm),
|
||||
cp->a1, cp->a2, cp->a3);
|
||||
break;
|
||||
default:
|
||||
/* Return error for unsupported SBI calls */
|
||||
cp->a0 = SBI_ERR_NOT_SUPPORTED;
|
||||
break;
|
||||
}
|
||||
|
||||
if (next_sepc)
|
||||
cp->sepc += 4;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
kvm_riscv_vcpu_sbi_forward(vcpu, run);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
400
arch/riscv/kvm/vcpu_switch.S
Normal file
400
arch/riscv/kvm/vcpu_switch.S
Normal file
@ -0,0 +1,400 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/csr.h>
|
||||
|
||||
.text
|
||||
.altmacro
|
||||
.option norelax
|
||||
|
||||
ENTRY(__kvm_riscv_switch_to)
|
||||
/* Save Host GPRs (except A0 and T0-T6) */
|
||||
REG_S ra, (KVM_ARCH_HOST_RA)(a0)
|
||||
REG_S sp, (KVM_ARCH_HOST_SP)(a0)
|
||||
REG_S gp, (KVM_ARCH_HOST_GP)(a0)
|
||||
REG_S tp, (KVM_ARCH_HOST_TP)(a0)
|
||||
REG_S s0, (KVM_ARCH_HOST_S0)(a0)
|
||||
REG_S s1, (KVM_ARCH_HOST_S1)(a0)
|
||||
REG_S a1, (KVM_ARCH_HOST_A1)(a0)
|
||||
REG_S a2, (KVM_ARCH_HOST_A2)(a0)
|
||||
REG_S a3, (KVM_ARCH_HOST_A3)(a0)
|
||||
REG_S a4, (KVM_ARCH_HOST_A4)(a0)
|
||||
REG_S a5, (KVM_ARCH_HOST_A5)(a0)
|
||||
REG_S a6, (KVM_ARCH_HOST_A6)(a0)
|
||||
REG_S a7, (KVM_ARCH_HOST_A7)(a0)
|
||||
REG_S s2, (KVM_ARCH_HOST_S2)(a0)
|
||||
REG_S s3, (KVM_ARCH_HOST_S3)(a0)
|
||||
REG_S s4, (KVM_ARCH_HOST_S4)(a0)
|
||||
REG_S s5, (KVM_ARCH_HOST_S5)(a0)
|
||||
REG_S s6, (KVM_ARCH_HOST_S6)(a0)
|
||||
REG_S s7, (KVM_ARCH_HOST_S7)(a0)
|
||||
REG_S s8, (KVM_ARCH_HOST_S8)(a0)
|
||||
REG_S s9, (KVM_ARCH_HOST_S9)(a0)
|
||||
REG_S s10, (KVM_ARCH_HOST_S10)(a0)
|
||||
REG_S s11, (KVM_ARCH_HOST_S11)(a0)
|
||||
|
||||
/* Save Host and Restore Guest SSTATUS */
|
||||
REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0)
|
||||
csrrw t0, CSR_SSTATUS, t0
|
||||
REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0)
|
||||
|
||||
/* Save Host and Restore Guest HSTATUS */
|
||||
REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
|
||||
csrrw t1, CSR_HSTATUS, t1
|
||||
REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0)
|
||||
|
||||
/* Save Host and Restore Guest SCOUNTEREN */
|
||||
REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
|
||||
csrrw t2, CSR_SCOUNTEREN, t2
|
||||
REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
|
||||
|
||||
/* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */
|
||||
csrrw t3, CSR_SSCRATCH, a0
|
||||
REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0)
|
||||
|
||||
/* Save Host STVEC and change it to return path */
|
||||
la t4, __kvm_switch_return
|
||||
csrrw t4, CSR_STVEC, t4
|
||||
REG_S t4, (KVM_ARCH_HOST_STVEC)(a0)
|
||||
|
||||
/* Restore Guest SEPC */
|
||||
REG_L t0, (KVM_ARCH_GUEST_SEPC)(a0)
|
||||
csrw CSR_SEPC, t0
|
||||
|
||||
/* Restore Guest GPRs (except A0) */
|
||||
REG_L ra, (KVM_ARCH_GUEST_RA)(a0)
|
||||
REG_L sp, (KVM_ARCH_GUEST_SP)(a0)
|
||||
REG_L gp, (KVM_ARCH_GUEST_GP)(a0)
|
||||
REG_L tp, (KVM_ARCH_GUEST_TP)(a0)
|
||||
REG_L t0, (KVM_ARCH_GUEST_T0)(a0)
|
||||
REG_L t1, (KVM_ARCH_GUEST_T1)(a0)
|
||||
REG_L t2, (KVM_ARCH_GUEST_T2)(a0)
|
||||
REG_L s0, (KVM_ARCH_GUEST_S0)(a0)
|
||||
REG_L s1, (KVM_ARCH_GUEST_S1)(a0)
|
||||
REG_L a1, (KVM_ARCH_GUEST_A1)(a0)
|
||||
REG_L a2, (KVM_ARCH_GUEST_A2)(a0)
|
||||
REG_L a3, (KVM_ARCH_GUEST_A3)(a0)
|
||||
REG_L a4, (KVM_ARCH_GUEST_A4)(a0)
|
||||
REG_L a5, (KVM_ARCH_GUEST_A5)(a0)
|
||||
REG_L a6, (KVM_ARCH_GUEST_A6)(a0)
|
||||
REG_L a7, (KVM_ARCH_GUEST_A7)(a0)
|
||||
REG_L s2, (KVM_ARCH_GUEST_S2)(a0)
|
||||
REG_L s3, (KVM_ARCH_GUEST_S3)(a0)
|
||||
REG_L s4, (KVM_ARCH_GUEST_S4)(a0)
|
||||
REG_L s5, (KVM_ARCH_GUEST_S5)(a0)
|
||||
REG_L s6, (KVM_ARCH_GUEST_S6)(a0)
|
||||
REG_L s7, (KVM_ARCH_GUEST_S7)(a0)
|
||||
REG_L s8, (KVM_ARCH_GUEST_S8)(a0)
|
||||
REG_L s9, (KVM_ARCH_GUEST_S9)(a0)
|
||||
REG_L s10, (KVM_ARCH_GUEST_S10)(a0)
|
||||
REG_L s11, (KVM_ARCH_GUEST_S11)(a0)
|
||||
REG_L t3, (KVM_ARCH_GUEST_T3)(a0)
|
||||
REG_L t4, (KVM_ARCH_GUEST_T4)(a0)
|
||||
REG_L t5, (KVM_ARCH_GUEST_T5)(a0)
|
||||
REG_L t6, (KVM_ARCH_GUEST_T6)(a0)
|
||||
|
||||
/* Restore Guest A0 */
|
||||
REG_L a0, (KVM_ARCH_GUEST_A0)(a0)
|
||||
|
||||
/* Resume Guest */
|
||||
sret
|
||||
|
||||
/* Back to Host */
|
||||
.align 2
|
||||
__kvm_switch_return:
|
||||
/* Swap Guest A0 with SSCRATCH */
|
||||
csrrw a0, CSR_SSCRATCH, a0
|
||||
|
||||
/* Save Guest GPRs (except A0) */
|
||||
REG_S ra, (KVM_ARCH_GUEST_RA)(a0)
|
||||
REG_S sp, (KVM_ARCH_GUEST_SP)(a0)
|
||||
REG_S gp, (KVM_ARCH_GUEST_GP)(a0)
|
||||
REG_S tp, (KVM_ARCH_GUEST_TP)(a0)
|
||||
REG_S t0, (KVM_ARCH_GUEST_T0)(a0)
|
||||
REG_S t1, (KVM_ARCH_GUEST_T1)(a0)
|
||||
REG_S t2, (KVM_ARCH_GUEST_T2)(a0)
|
||||
REG_S s0, (KVM_ARCH_GUEST_S0)(a0)
|
||||
REG_S s1, (KVM_ARCH_GUEST_S1)(a0)
|
||||
REG_S a1, (KVM_ARCH_GUEST_A1)(a0)
|
||||
REG_S a2, (KVM_ARCH_GUEST_A2)(a0)
|
||||
REG_S a3, (KVM_ARCH_GUEST_A3)(a0)
|
||||
REG_S a4, (KVM_ARCH_GUEST_A4)(a0)
|
||||
REG_S a5, (KVM_ARCH_GUEST_A5)(a0)
|
||||
REG_S a6, (KVM_ARCH_GUEST_A6)(a0)
|
||||
REG_S a7, (KVM_ARCH_GUEST_A7)(a0)
|
||||
REG_S s2, (KVM_ARCH_GUEST_S2)(a0)
|
||||
REG_S s3, (KVM_ARCH_GUEST_S3)(a0)
|
||||
REG_S s4, (KVM_ARCH_GUEST_S4)(a0)
|
||||
REG_S s5, (KVM_ARCH_GUEST_S5)(a0)
|
||||
REG_S s6, (KVM_ARCH_GUEST_S6)(a0)
|
||||
REG_S s7, (KVM_ARCH_GUEST_S7)(a0)
|
||||
REG_S s8, (KVM_ARCH_GUEST_S8)(a0)
|
||||
REG_S s9, (KVM_ARCH_GUEST_S9)(a0)
|
||||
REG_S s10, (KVM_ARCH_GUEST_S10)(a0)
|
||||
REG_S s11, (KVM_ARCH_GUEST_S11)(a0)
|
||||
REG_S t3, (KVM_ARCH_GUEST_T3)(a0)
|
||||
REG_S t4, (KVM_ARCH_GUEST_T4)(a0)
|
||||
REG_S t5, (KVM_ARCH_GUEST_T5)(a0)
|
||||
REG_S t6, (KVM_ARCH_GUEST_T6)(a0)
|
||||
|
||||
/* Save Guest SEPC */
|
||||
csrr t0, CSR_SEPC
|
||||
REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0)
|
||||
|
||||
/* Restore Host STVEC */
|
||||
REG_L t1, (KVM_ARCH_HOST_STVEC)(a0)
|
||||
csrw CSR_STVEC, t1
|
||||
|
||||
/* Save Guest A0 and Restore Host SSCRATCH */
|
||||
REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0)
|
||||
csrrw t2, CSR_SSCRATCH, t2
|
||||
REG_S t2, (KVM_ARCH_GUEST_A0)(a0)
|
||||
|
||||
/* Save Guest and Restore Host SCOUNTEREN */
|
||||
REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
|
||||
csrrw t3, CSR_SCOUNTEREN, t3
|
||||
REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
|
||||
|
||||
/* Save Guest and Restore Host HSTATUS */
|
||||
REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0)
|
||||
csrrw t4, CSR_HSTATUS, t4
|
||||
REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0)
|
||||
|
||||
/* Save Guest and Restore Host SSTATUS */
|
||||
REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0)
|
||||
csrrw t5, CSR_SSTATUS, t5
|
||||
REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0)
|
||||
|
||||
/* Restore Host GPRs (except A0 and T0-T6) */
|
||||
REG_L ra, (KVM_ARCH_HOST_RA)(a0)
|
||||
REG_L sp, (KVM_ARCH_HOST_SP)(a0)
|
||||
REG_L gp, (KVM_ARCH_HOST_GP)(a0)
|
||||
REG_L tp, (KVM_ARCH_HOST_TP)(a0)
|
||||
REG_L s0, (KVM_ARCH_HOST_S0)(a0)
|
||||
REG_L s1, (KVM_ARCH_HOST_S1)(a0)
|
||||
REG_L a1, (KVM_ARCH_HOST_A1)(a0)
|
||||
REG_L a2, (KVM_ARCH_HOST_A2)(a0)
|
||||
REG_L a3, (KVM_ARCH_HOST_A3)(a0)
|
||||
REG_L a4, (KVM_ARCH_HOST_A4)(a0)
|
||||
REG_L a5, (KVM_ARCH_HOST_A5)(a0)
|
||||
REG_L a6, (KVM_ARCH_HOST_A6)(a0)
|
||||
REG_L a7, (KVM_ARCH_HOST_A7)(a0)
|
||||
REG_L s2, (KVM_ARCH_HOST_S2)(a0)
|
||||
REG_L s3, (KVM_ARCH_HOST_S3)(a0)
|
||||
REG_L s4, (KVM_ARCH_HOST_S4)(a0)
|
||||
REG_L s5, (KVM_ARCH_HOST_S5)(a0)
|
||||
REG_L s6, (KVM_ARCH_HOST_S6)(a0)
|
||||
REG_L s7, (KVM_ARCH_HOST_S7)(a0)
|
||||
REG_L s8, (KVM_ARCH_HOST_S8)(a0)
|
||||
REG_L s9, (KVM_ARCH_HOST_S9)(a0)
|
||||
REG_L s10, (KVM_ARCH_HOST_S10)(a0)
|
||||
REG_L s11, (KVM_ARCH_HOST_S11)(a0)
|
||||
|
||||
/* Return to C code */
|
||||
ret
|
||||
ENDPROC(__kvm_riscv_switch_to)
|
||||
|
||||
ENTRY(__kvm_riscv_unpriv_trap)
|
||||
/*
|
||||
* We assume that faulting unpriv load/store instruction is
|
||||
* 4-byte long and blindly increment SEPC by 4.
|
||||
*
|
||||
* The trap details will be saved at address pointed by 'A0'
|
||||
* register and we use 'A1' register as temporary.
|
||||
*/
|
||||
csrr a1, CSR_SEPC
|
||||
REG_S a1, (KVM_ARCH_TRAP_SEPC)(a0)
|
||||
addi a1, a1, 4
|
||||
csrw CSR_SEPC, a1
|
||||
csrr a1, CSR_SCAUSE
|
||||
REG_S a1, (KVM_ARCH_TRAP_SCAUSE)(a0)
|
||||
csrr a1, CSR_STVAL
|
||||
REG_S a1, (KVM_ARCH_TRAP_STVAL)(a0)
|
||||
csrr a1, CSR_HTVAL
|
||||
REG_S a1, (KVM_ARCH_TRAP_HTVAL)(a0)
|
||||
csrr a1, CSR_HTINST
|
||||
REG_S a1, (KVM_ARCH_TRAP_HTINST)(a0)
|
||||
sret
|
||||
ENDPROC(__kvm_riscv_unpriv_trap)
|
||||
|
||||
#ifdef CONFIG_FPU
|
||||
.align 3
|
||||
.global __kvm_riscv_fp_f_save
|
||||
__kvm_riscv_fp_f_save:
|
||||
csrr t2, CSR_SSTATUS
|
||||
li t1, SR_FS
|
||||
csrs CSR_SSTATUS, t1
|
||||
frcsr t0
|
||||
fsw f0, KVM_ARCH_FP_F_F0(a0)
|
||||
fsw f1, KVM_ARCH_FP_F_F1(a0)
|
||||
fsw f2, KVM_ARCH_FP_F_F2(a0)
|
||||
fsw f3, KVM_ARCH_FP_F_F3(a0)
|
||||
fsw f4, KVM_ARCH_FP_F_F4(a0)
|
||||
fsw f5, KVM_ARCH_FP_F_F5(a0)
|
||||
fsw f6, KVM_ARCH_FP_F_F6(a0)
|
||||
fsw f7, KVM_ARCH_FP_F_F7(a0)
|
||||
fsw f8, KVM_ARCH_FP_F_F8(a0)
|
||||
fsw f9, KVM_ARCH_FP_F_F9(a0)
|
||||
fsw f10, KVM_ARCH_FP_F_F10(a0)
|
||||
fsw f11, KVM_ARCH_FP_F_F11(a0)
|
||||
fsw f12, KVM_ARCH_FP_F_F12(a0)
|
||||
fsw f13, KVM_ARCH_FP_F_F13(a0)
|
||||
fsw f14, KVM_ARCH_FP_F_F14(a0)
|
||||
fsw f15, KVM_ARCH_FP_F_F15(a0)
|
||||
fsw f16, KVM_ARCH_FP_F_F16(a0)
|
||||
fsw f17, KVM_ARCH_FP_F_F17(a0)
|
||||
fsw f18, KVM_ARCH_FP_F_F18(a0)
|
||||
fsw f19, KVM_ARCH_FP_F_F19(a0)
|
||||
fsw f20, KVM_ARCH_FP_F_F20(a0)
|
||||
fsw f21, KVM_ARCH_FP_F_F21(a0)
|
||||
fsw f22, KVM_ARCH_FP_F_F22(a0)
|
||||
fsw f23, KVM_ARCH_FP_F_F23(a0)
|
||||
fsw f24, KVM_ARCH_FP_F_F24(a0)
|
||||
fsw f25, KVM_ARCH_FP_F_F25(a0)
|
||||
fsw f26, KVM_ARCH_FP_F_F26(a0)
|
||||
fsw f27, KVM_ARCH_FP_F_F27(a0)
|
||||
fsw f28, KVM_ARCH_FP_F_F28(a0)
|
||||
fsw f29, KVM_ARCH_FP_F_F29(a0)
|
||||
fsw f30, KVM_ARCH_FP_F_F30(a0)
|
||||
fsw f31, KVM_ARCH_FP_F_F31(a0)
|
||||
sw t0, KVM_ARCH_FP_F_FCSR(a0)
|
||||
csrw CSR_SSTATUS, t2
|
||||
ret
|
||||
|
||||
.align 3
|
||||
.global __kvm_riscv_fp_d_save
|
||||
__kvm_riscv_fp_d_save:
|
||||
csrr t2, CSR_SSTATUS
|
||||
li t1, SR_FS
|
||||
csrs CSR_SSTATUS, t1
|
||||
frcsr t0
|
||||
fsd f0, KVM_ARCH_FP_D_F0(a0)
|
||||
fsd f1, KVM_ARCH_FP_D_F1(a0)
|
||||
fsd f2, KVM_ARCH_FP_D_F2(a0)
|
||||
fsd f3, KVM_ARCH_FP_D_F3(a0)
|
||||
fsd f4, KVM_ARCH_FP_D_F4(a0)
|
||||
fsd f5, KVM_ARCH_FP_D_F5(a0)
|
||||
fsd f6, KVM_ARCH_FP_D_F6(a0)
|
||||
fsd f7, KVM_ARCH_FP_D_F7(a0)
|
||||
fsd f8, KVM_ARCH_FP_D_F8(a0)
|
||||
fsd f9, KVM_ARCH_FP_D_F9(a0)
|
||||
fsd f10, KVM_ARCH_FP_D_F10(a0)
|
||||
fsd f11, KVM_ARCH_FP_D_F11(a0)
|
||||
fsd f12, KVM_ARCH_FP_D_F12(a0)
|
||||
fsd f13, KVM_ARCH_FP_D_F13(a0)
|
||||
fsd f14, KVM_ARCH_FP_D_F14(a0)
|
||||
fsd f15, KVM_ARCH_FP_D_F15(a0)
|
||||
fsd f16, KVM_ARCH_FP_D_F16(a0)
|
||||
fsd f17, KVM_ARCH_FP_D_F17(a0)
|
||||
fsd f18, KVM_ARCH_FP_D_F18(a0)
|
||||
fsd f19, KVM_ARCH_FP_D_F19(a0)
|
||||
fsd f20, KVM_ARCH_FP_D_F20(a0)
|
||||
fsd f21, KVM_ARCH_FP_D_F21(a0)
|
||||
fsd f22, KVM_ARCH_FP_D_F22(a0)
|
||||
fsd f23, KVM_ARCH_FP_D_F23(a0)
|
||||
fsd f24, KVM_ARCH_FP_D_F24(a0)
|
||||
fsd f25, KVM_ARCH_FP_D_F25(a0)
|
||||
fsd f26, KVM_ARCH_FP_D_F26(a0)
|
||||
fsd f27, KVM_ARCH_FP_D_F27(a0)
|
||||
fsd f28, KVM_ARCH_FP_D_F28(a0)
|
||||
fsd f29, KVM_ARCH_FP_D_F29(a0)
|
||||
fsd f30, KVM_ARCH_FP_D_F30(a0)
|
||||
fsd f31, KVM_ARCH_FP_D_F31(a0)
|
||||
sw t0, KVM_ARCH_FP_D_FCSR(a0)
|
||||
csrw CSR_SSTATUS, t2
|
||||
ret
|
||||
|
||||
.align 3
|
||||
.global __kvm_riscv_fp_f_restore
|
||||
__kvm_riscv_fp_f_restore:
|
||||
csrr t2, CSR_SSTATUS
|
||||
li t1, SR_FS
|
||||
lw t0, KVM_ARCH_FP_F_FCSR(a0)
|
||||
csrs CSR_SSTATUS, t1
|
||||
flw f0, KVM_ARCH_FP_F_F0(a0)
|
||||
flw f1, KVM_ARCH_FP_F_F1(a0)
|
||||
flw f2, KVM_ARCH_FP_F_F2(a0)
|
||||
flw f3, KVM_ARCH_FP_F_F3(a0)
|
||||
flw f4, KVM_ARCH_FP_F_F4(a0)
|
||||
flw f5, KVM_ARCH_FP_F_F5(a0)
|
||||
flw f6, KVM_ARCH_FP_F_F6(a0)
|
||||
flw f7, KVM_ARCH_FP_F_F7(a0)
|
||||
flw f8, KVM_ARCH_FP_F_F8(a0)
|
||||
flw f9, KVM_ARCH_FP_F_F9(a0)
|
||||
flw f10, KVM_ARCH_FP_F_F10(a0)
|
||||
flw f11, KVM_ARCH_FP_F_F11(a0)
|
||||
flw f12, KVM_ARCH_FP_F_F12(a0)
|
||||
flw f13, KVM_ARCH_FP_F_F13(a0)
|
||||
flw f14, KVM_ARCH_FP_F_F14(a0)
|
||||
flw f15, KVM_ARCH_FP_F_F15(a0)
|
||||
flw f16, KVM_ARCH_FP_F_F16(a0)
|
||||
flw f17, KVM_ARCH_FP_F_F17(a0)
|
||||
flw f18, KVM_ARCH_FP_F_F18(a0)
|
||||
flw f19, KVM_ARCH_FP_F_F19(a0)
|
||||
flw f20, KVM_ARCH_FP_F_F20(a0)
|
||||
flw f21, KVM_ARCH_FP_F_F21(a0)
|
||||
flw f22, KVM_ARCH_FP_F_F22(a0)
|
||||
flw f23, KVM_ARCH_FP_F_F23(a0)
|
||||
flw f24, KVM_ARCH_FP_F_F24(a0)
|
||||
flw f25, KVM_ARCH_FP_F_F25(a0)
|
||||
flw f26, KVM_ARCH_FP_F_F26(a0)
|
||||
flw f27, KVM_ARCH_FP_F_F27(a0)
|
||||
flw f28, KVM_ARCH_FP_F_F28(a0)
|
||||
flw f29, KVM_ARCH_FP_F_F29(a0)
|
||||
flw f30, KVM_ARCH_FP_F_F30(a0)
|
||||
flw f31, KVM_ARCH_FP_F_F31(a0)
|
||||
fscsr t0
|
||||
csrw CSR_SSTATUS, t2
|
||||
ret
|
||||
|
||||
.align 3
|
||||
.global __kvm_riscv_fp_d_restore
|
||||
__kvm_riscv_fp_d_restore:
|
||||
csrr t2, CSR_SSTATUS
|
||||
li t1, SR_FS
|
||||
lw t0, KVM_ARCH_FP_D_FCSR(a0)
|
||||
csrs CSR_SSTATUS, t1
|
||||
fld f0, KVM_ARCH_FP_D_F0(a0)
|
||||
fld f1, KVM_ARCH_FP_D_F1(a0)
|
||||
fld f2, KVM_ARCH_FP_D_F2(a0)
|
||||
fld f3, KVM_ARCH_FP_D_F3(a0)
|
||||
fld f4, KVM_ARCH_FP_D_F4(a0)
|
||||
fld f5, KVM_ARCH_FP_D_F5(a0)
|
||||
fld f6, KVM_ARCH_FP_D_F6(a0)
|
||||
fld f7, KVM_ARCH_FP_D_F7(a0)
|
||||
fld f8, KVM_ARCH_FP_D_F8(a0)
|
||||
fld f9, KVM_ARCH_FP_D_F9(a0)
|
||||
fld f10, KVM_ARCH_FP_D_F10(a0)
|
||||
fld f11, KVM_ARCH_FP_D_F11(a0)
|
||||
fld f12, KVM_ARCH_FP_D_F12(a0)
|
||||
fld f13, KVM_ARCH_FP_D_F13(a0)
|
||||
fld f14, KVM_ARCH_FP_D_F14(a0)
|
||||
fld f15, KVM_ARCH_FP_D_F15(a0)
|
||||
fld f16, KVM_ARCH_FP_D_F16(a0)
|
||||
fld f17, KVM_ARCH_FP_D_F17(a0)
|
||||
fld f18, KVM_ARCH_FP_D_F18(a0)
|
||||
fld f19, KVM_ARCH_FP_D_F19(a0)
|
||||
fld f20, KVM_ARCH_FP_D_F20(a0)
|
||||
fld f21, KVM_ARCH_FP_D_F21(a0)
|
||||
fld f22, KVM_ARCH_FP_D_F22(a0)
|
||||
fld f23, KVM_ARCH_FP_D_F23(a0)
|
||||
fld f24, KVM_ARCH_FP_D_F24(a0)
|
||||
fld f25, KVM_ARCH_FP_D_F25(a0)
|
||||
fld f26, KVM_ARCH_FP_D_F26(a0)
|
||||
fld f27, KVM_ARCH_FP_D_F27(a0)
|
||||
fld f28, KVM_ARCH_FP_D_F28(a0)
|
||||
fld f29, KVM_ARCH_FP_D_F29(a0)
|
||||
fld f30, KVM_ARCH_FP_D_F30(a0)
|
||||
fld f31, KVM_ARCH_FP_D_F31(a0)
|
||||
fscsr t0
|
||||
csrw CSR_SSTATUS, t2
|
||||
ret
|
||||
#endif
|
225
arch/riscv/kvm/vcpu_timer.c
Normal file
225
arch/riscv/kvm/vcpu_timer.c
Normal file
@ -0,0 +1,225 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Atish Patra <atish.patra@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <clocksource/timer-riscv.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/kvm_vcpu_timer.h>
|
||||
|
||||
static u64 kvm_riscv_current_cycles(struct kvm_guest_timer *gt)
|
||||
{
|
||||
return get_cycles64() + gt->time_delta;
|
||||
}
|
||||
|
||||
static u64 kvm_riscv_delta_cycles2ns(u64 cycles,
|
||||
struct kvm_guest_timer *gt,
|
||||
struct kvm_vcpu_timer *t)
|
||||
{
|
||||
unsigned long flags;
|
||||
u64 cycles_now, cycles_delta, delta_ns;
|
||||
|
||||
local_irq_save(flags);
|
||||
cycles_now = kvm_riscv_current_cycles(gt);
|
||||
if (cycles_now < cycles)
|
||||
cycles_delta = cycles - cycles_now;
|
||||
else
|
||||
cycles_delta = 0;
|
||||
delta_ns = (cycles_delta * gt->nsec_mult) >> gt->nsec_shift;
|
||||
local_irq_restore(flags);
|
||||
|
||||
return delta_ns;
|
||||
}
|
||||
|
||||
static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h)
|
||||
{
|
||||
u64 delta_ns;
|
||||
struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt);
|
||||
struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer);
|
||||
struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
|
||||
|
||||
if (kvm_riscv_current_cycles(gt) < t->next_cycles) {
|
||||
delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t);
|
||||
hrtimer_forward_now(&t->hrt, ktime_set(0, delta_ns));
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
t->next_set = false;
|
||||
kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_TIMER);
|
||||
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t)
|
||||
{
|
||||
if (!t->init_done || !t->next_set)
|
||||
return -EINVAL;
|
||||
|
||||
hrtimer_cancel(&t->hrt);
|
||||
t->next_set = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles)
|
||||
{
|
||||
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
|
||||
struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
|
||||
u64 delta_ns;
|
||||
|
||||
if (!t->init_done)
|
||||
return -EINVAL;
|
||||
|
||||
kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_TIMER);
|
||||
|
||||
delta_ns = kvm_riscv_delta_cycles2ns(ncycles, gt, t);
|
||||
t->next_cycles = ncycles;
|
||||
hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL);
|
||||
t->next_set = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
|
||||
struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
|
||||
u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_TIMER);
|
||||
u64 reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u64))
|
||||
return -EINVAL;
|
||||
if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
|
||||
return -EINVAL;
|
||||
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_TIMER_REG(frequency):
|
||||
reg_val = riscv_timebase;
|
||||
break;
|
||||
case KVM_REG_RISCV_TIMER_REG(time):
|
||||
reg_val = kvm_riscv_current_cycles(gt);
|
||||
break;
|
||||
case KVM_REG_RISCV_TIMER_REG(compare):
|
||||
reg_val = t->next_cycles;
|
||||
break;
|
||||
case KVM_REG_RISCV_TIMER_REG(state):
|
||||
reg_val = (t->next_set) ? KVM_RISCV_TIMER_STATE_ON :
|
||||
KVM_RISCV_TIMER_STATE_OFF;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
|
||||
struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
|
||||
u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_TIMER);
|
||||
u64 reg_val;
|
||||
int ret = 0;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u64))
|
||||
return -EINVAL;
|
||||
if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_TIMER_REG(frequency):
|
||||
ret = -EOPNOTSUPP;
|
||||
break;
|
||||
case KVM_REG_RISCV_TIMER_REG(time):
|
||||
gt->time_delta = reg_val - get_cycles64();
|
||||
break;
|
||||
case KVM_REG_RISCV_TIMER_REG(compare):
|
||||
t->next_cycles = reg_val;
|
||||
break;
|
||||
case KVM_REG_RISCV_TIMER_REG(state):
|
||||
if (reg_val == KVM_RISCV_TIMER_STATE_ON)
|
||||
ret = kvm_riscv_vcpu_timer_next_event(vcpu, reg_val);
|
||||
else
|
||||
ret = kvm_riscv_vcpu_timer_cancel(t);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
|
||||
|
||||
if (t->init_done)
|
||||
return -EINVAL;
|
||||
|
||||
hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
t->hrt.function = kvm_riscv_vcpu_hrtimer_expired;
|
||||
t->init_done = true;
|
||||
t->next_set = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer);
|
||||
vcpu->arch.timer.init_done = false;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer);
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
csr_write(CSR_HTIMEDELTA, gt->time_delta);
|
||||
#else
|
||||
csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta));
|
||||
csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32));
|
||||
#endif
|
||||
}
|
||||
|
||||
int kvm_riscv_guest_timer_init(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_guest_timer *gt = &kvm->arch.timer;
|
||||
|
||||
riscv_cs_get_mult_shift(>->nsec_mult, >->nsec_shift);
|
||||
gt->time_delta = -get_cycles64();
|
||||
|
||||
return 0;
|
||||
}
|
97
arch/riscv/kvm/vm.c
Normal file
97
arch/riscv/kvm/vm.c
Normal file
@ -0,0 +1,97 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||
KVM_GENERIC_VM_STATS()
|
||||
};
|
||||
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||
|
||||
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||
.name_size = KVM_STATS_NAME_SIZE,
|
||||
.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
|
||||
.id_offset = sizeof(struct kvm_stats_header),
|
||||
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||
sizeof(kvm_vm_stats_desc),
|
||||
};
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = kvm_riscv_stage2_alloc_pgd(kvm);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = kvm_riscv_stage2_vmid_init(kvm);
|
||||
if (r) {
|
||||
kvm_riscv_stage2_free_pgd(kvm);
|
||||
return r;
|
||||
}
|
||||
|
||||
return kvm_riscv_guest_timer_init(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
if (kvm->vcpus[i]) {
|
||||
kvm_vcpu_destroy(kvm->vcpus[i]);
|
||||
kvm->vcpus[i] = NULL;
|
||||
}
|
||||
}
|
||||
atomic_set(&kvm->online_vcpus, 0);
|
||||
}
|
||||
|
||||
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
{
|
||||
int r;
|
||||
|
||||
switch (ext) {
|
||||
case KVM_CAP_IOEVENTFD:
|
||||
case KVM_CAP_DEVICE_CTRL:
|
||||
case KVM_CAP_USER_MEMORY:
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
|
||||
case KVM_CAP_ONE_REG:
|
||||
case KVM_CAP_READONLY_MEM:
|
||||
case KVM_CAP_MP_STATE:
|
||||
case KVM_CAP_IMMEDIATE_EXIT:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_NR_VCPUS:
|
||||
r = num_online_cpus();
|
||||
break;
|
||||
case KVM_CAP_MAX_VCPUS:
|
||||
r = KVM_MAX_VCPUS;
|
||||
break;
|
||||
case KVM_CAP_NR_MEMSLOTS:
|
||||
r = KVM_USER_MEM_SLOTS;
|
||||
break;
|
||||
default:
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
long kvm_arch_vm_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
120
arch/riscv/kvm/vmid.c
Normal file
120
arch/riscv/kvm/vmid.c
Normal file
@ -0,0 +1,120 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/sbi.h>
|
||||
|
||||
static unsigned long vmid_version = 1;
|
||||
static unsigned long vmid_next;
|
||||
static unsigned long vmid_bits;
|
||||
static DEFINE_SPINLOCK(vmid_lock);
|
||||
|
||||
void kvm_riscv_stage2_vmid_detect(void)
|
||||
{
|
||||
unsigned long old;
|
||||
|
||||
/* Figure-out number of VMID bits in HW */
|
||||
old = csr_read(CSR_HGATP);
|
||||
csr_write(CSR_HGATP, old | HGATP_VMID_MASK);
|
||||
vmid_bits = csr_read(CSR_HGATP);
|
||||
vmid_bits = (vmid_bits & HGATP_VMID_MASK) >> HGATP_VMID_SHIFT;
|
||||
vmid_bits = fls_long(vmid_bits);
|
||||
csr_write(CSR_HGATP, old);
|
||||
|
||||
/* We polluted local TLB so flush all guest TLB */
|
||||
__kvm_riscv_hfence_gvma_all();
|
||||
|
||||
/* We don't use VMID bits if they are not sufficient */
|
||||
if ((1UL << vmid_bits) < num_possible_cpus())
|
||||
vmid_bits = 0;
|
||||
}
|
||||
|
||||
unsigned long kvm_riscv_stage2_vmid_bits(void)
|
||||
{
|
||||
return vmid_bits;
|
||||
}
|
||||
|
||||
int kvm_riscv_stage2_vmid_init(struct kvm *kvm)
|
||||
{
|
||||
/* Mark the initial VMID and VMID version invalid */
|
||||
kvm->arch.vmid.vmid_version = 0;
|
||||
kvm->arch.vmid.vmid = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
|
||||
{
|
||||
if (!vmid_bits)
|
||||
return false;
|
||||
|
||||
return unlikely(READ_ONCE(vmid->vmid_version) !=
|
||||
READ_ONCE(vmid_version));
|
||||
}
|
||||
|
||||
void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
struct kvm_vcpu *v;
|
||||
struct cpumask hmask;
|
||||
struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
|
||||
|
||||
if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
|
||||
return;
|
||||
|
||||
spin_lock(&vmid_lock);
|
||||
|
||||
/*
|
||||
* We need to re-check the vmid_version here to ensure that if
|
||||
* another vcpu already allocated a valid vmid for this vm.
|
||||
*/
|
||||
if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) {
|
||||
spin_unlock(&vmid_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/* First user of a new VMID version? */
|
||||
if (unlikely(vmid_next == 0)) {
|
||||
WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1);
|
||||
vmid_next = 1;
|
||||
|
||||
/*
|
||||
* We ran out of VMIDs so we increment vmid_version and
|
||||
* start assigning VMIDs from 1.
|
||||
*
|
||||
* This also means existing VMIDs assignement to all Guest
|
||||
* instances is invalid and we have force VMID re-assignement
|
||||
* for all Guest instances. The Guest instances that were not
|
||||
* running will automatically pick-up new VMIDs because will
|
||||
* call kvm_riscv_stage2_vmid_update() whenever they enter
|
||||
* in-kernel run loop. For Guest instances that are already
|
||||
* running, we force VM exits on all host CPUs using IPI and
|
||||
* flush all Guest TLBs.
|
||||
*/
|
||||
riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
|
||||
sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0);
|
||||
}
|
||||
|
||||
vmid->vmid = vmid_next;
|
||||
vmid_next++;
|
||||
vmid_next &= (1 << vmid_bits) - 1;
|
||||
|
||||
WRITE_ONCE(vmid->vmid_version, READ_ONCE(vmid_version));
|
||||
|
||||
spin_unlock(&vmid_lock);
|
||||
|
||||
/* Request stage2 page table update for all VCPUs */
|
||||
kvm_for_each_vcpu(i, v, vcpu->kvm)
|
||||
kvm_make_request(KVM_REQ_UPDATE_HGATP, v);
|
||||
}
|
@ -1074,8 +1074,9 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
||||
pte_t res;
|
||||
|
||||
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
|
||||
/* At this point the reference through the mapping is still present */
|
||||
if (mm_is_protected(mm) && pte_present(res))
|
||||
uv_convert_from_secure(pte_val(res) & PAGE_MASK);
|
||||
uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -1091,8 +1092,9 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
|
||||
pte_t res;
|
||||
|
||||
res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
|
||||
/* At this point the reference through the mapping is still present */
|
||||
if (mm_is_protected(vma->vm_mm) && pte_present(res))
|
||||
uv_convert_from_secure(pte_val(res) & PAGE_MASK);
|
||||
uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -1116,8 +1118,9 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
|
||||
} else {
|
||||
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
|
||||
}
|
||||
/* At this point the reference through the mapping is still present */
|
||||
if (mm_is_protected(mm) && pte_present(res))
|
||||
uv_convert_from_secure(pte_val(res) & PAGE_MASK);
|
||||
uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,11 @@
|
||||
#include <asm/page.h>
|
||||
#include <asm/gmap.h>
|
||||
|
||||
#define UVC_CC_OK 0
|
||||
#define UVC_CC_ERROR 1
|
||||
#define UVC_CC_BUSY 2
|
||||
#define UVC_CC_PARTIAL 3
|
||||
|
||||
#define UVC_RC_EXECUTED 0x0001
|
||||
#define UVC_RC_INV_CMD 0x0002
|
||||
#define UVC_RC_INV_STATE 0x0003
|
||||
@ -351,8 +356,9 @@ static inline int is_prot_virt_host(void)
|
||||
}
|
||||
|
||||
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
|
||||
int uv_destroy_page(unsigned long paddr);
|
||||
int uv_destroy_owned_page(unsigned long paddr);
|
||||
int uv_convert_from_secure(unsigned long paddr);
|
||||
int uv_convert_owned_from_secure(unsigned long paddr);
|
||||
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
|
||||
|
||||
void setup_uv(void);
|
||||
@ -360,7 +366,7 @@ void setup_uv(void);
|
||||
#define is_prot_virt_host() 0
|
||||
static inline void setup_uv(void) {}
|
||||
|
||||
static inline int uv_destroy_page(unsigned long paddr)
|
||||
static inline int uv_destroy_owned_page(unsigned long paddr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -369,6 +375,11 @@ static inline int uv_convert_from_secure(unsigned long paddr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int uv_convert_owned_from_secure(unsigned long paddr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_S390_UV_H */
|
||||
|
@ -100,7 +100,7 @@ static int uv_pin_shared(unsigned long paddr)
|
||||
*
|
||||
* @paddr: Absolute host address of page to be destroyed
|
||||
*/
|
||||
int uv_destroy_page(unsigned long paddr)
|
||||
static int uv_destroy_page(unsigned long paddr)
|
||||
{
|
||||
struct uv_cb_cfs uvcb = {
|
||||
.header.cmd = UVC_CMD_DESTR_SEC_STOR,
|
||||
@ -120,6 +120,22 @@ int uv_destroy_page(unsigned long paddr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller must already hold a reference to the page
|
||||
*/
|
||||
int uv_destroy_owned_page(unsigned long paddr)
|
||||
{
|
||||
struct page *page = phys_to_page(paddr);
|
||||
int rc;
|
||||
|
||||
get_page(page);
|
||||
rc = uv_destroy_page(paddr);
|
||||
if (!rc)
|
||||
clear_bit(PG_arch_1, &page->flags);
|
||||
put_page(page);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Requests the Ultravisor to encrypt a guest page and make it
|
||||
* accessible to the host for paging (export).
|
||||
@ -139,6 +155,22 @@ int uv_convert_from_secure(unsigned long paddr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller must already hold a reference to the page
|
||||
*/
|
||||
int uv_convert_owned_from_secure(unsigned long paddr)
|
||||
{
|
||||
struct page *page = phys_to_page(paddr);
|
||||
int rc;
|
||||
|
||||
get_page(page);
|
||||
rc = uv_convert_from_secure(paddr);
|
||||
if (!rc)
|
||||
clear_bit(PG_arch_1, &page->flags);
|
||||
put_page(page);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the expected ref_count for a page that would otherwise have no
|
||||
* further pins. This was cribbed from similar functions in other places in
|
||||
@ -165,7 +197,7 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
|
||||
{
|
||||
pte_t entry = READ_ONCE(*ptep);
|
||||
struct page *page;
|
||||
int expected, rc = 0;
|
||||
int expected, cc = 0;
|
||||
|
||||
if (!pte_present(entry))
|
||||
return -ENXIO;
|
||||
@ -181,12 +213,25 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
|
||||
if (!page_ref_freeze(page, expected))
|
||||
return -EBUSY;
|
||||
set_bit(PG_arch_1, &page->flags);
|
||||
rc = uv_call(0, (u64)uvcb);
|
||||
/*
|
||||
* If the UVC does not succeed or fail immediately, we don't want to
|
||||
* loop for long, or we might get stall notifications.
|
||||
* On the other hand, this is a complex scenario and we are holding a lot of
|
||||
* locks, so we can't easily sleep and reschedule. We try only once,
|
||||
* and if the UVC returned busy or partial completion, we return
|
||||
* -EAGAIN and we let the callers deal with it.
|
||||
*/
|
||||
cc = __uv_call(0, (u64)uvcb);
|
||||
page_ref_unfreeze(page, expected);
|
||||
/* Return -ENXIO if the page was not mapped, -EINVAL otherwise */
|
||||
if (rc)
|
||||
rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
|
||||
return rc;
|
||||
/*
|
||||
* Return -ENXIO if the page was not mapped, -EINVAL for other errors.
|
||||
* If busy or partially completed, return -EAGAIN.
|
||||
*/
|
||||
if (cc == UVC_CC_OK)
|
||||
return 0;
|
||||
else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL)
|
||||
return -EAGAIN;
|
||||
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -212,7 +257,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
|
||||
uaddr = __gmap_translate(gmap, gaddr);
|
||||
if (IS_ERR_VALUE(uaddr))
|
||||
goto out;
|
||||
vma = find_vma(gmap->mm, uaddr);
|
||||
vma = vma_lookup(gmap->mm, uaddr);
|
||||
if (!vma)
|
||||
goto out;
|
||||
/*
|
||||
@ -239,6 +284,10 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
|
||||
mmap_read_unlock(gmap->mm);
|
||||
|
||||
if (rc == -EAGAIN) {
|
||||
/*
|
||||
* If we are here because the UVC returned busy or partial
|
||||
* completion, this is just a useless check, but it is safe.
|
||||
*/
|
||||
wait_on_page_writeback(page);
|
||||
} else if (rc == -EBUSY) {
|
||||
/*
|
||||
|
@ -518,6 +518,11 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
if (rc == -EINVAL)
|
||||
return 0;
|
||||
/*
|
||||
* If we got -EAGAIN here, we simply return it. It will eventually
|
||||
* get propagated all the way to userspace, which should then try
|
||||
* again.
|
||||
*/
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -2487,8 +2487,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
case KVM_S390_PV_COMMAND: {
|
||||
struct kvm_pv_cmd args;
|
||||
|
||||
/* protvirt means user sigp */
|
||||
kvm->arch.user_cpu_state_ctrl = 1;
|
||||
/* protvirt means user cpu state */
|
||||
kvm_s390_set_user_cpu_state_ctrl(kvm);
|
||||
r = 0;
|
||||
if (!is_prot_virt_host()) {
|
||||
r = -EINVAL;
|
||||
@ -3802,7 +3802,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
||||
vcpu_load(vcpu);
|
||||
|
||||
/* user space knows about this interface - let it control the state */
|
||||
vcpu->kvm->arch.user_cpu_state_ctrl = 1;
|
||||
kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
|
||||
|
||||
switch (mp_state->mp_state) {
|
||||
case KVM_MP_STATE_STOPPED:
|
||||
@ -4255,6 +4255,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
|
||||
if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
|
||||
vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
|
||||
vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
|
||||
VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
|
||||
}
|
||||
/*
|
||||
* If userspace sets the riccb (e.g. after migration) to a valid state,
|
||||
|
@ -208,6 +208,15 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
|
||||
return kvm->arch.user_cpu_state_ctrl != 0;
|
||||
}
|
||||
|
||||
static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm)
|
||||
{
|
||||
if (kvm->arch.user_cpu_state_ctrl)
|
||||
return;
|
||||
|
||||
VM_EVENT(kvm, 3, "%s", "ENABLE: Userspace CPU state control");
|
||||
kvm->arch.user_cpu_state_ctrl = 1;
|
||||
}
|
||||
|
||||
/* implemented in pv.c */
|
||||
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
|
||||
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
|
||||
|
@ -397,6 +397,8 @@ static int handle_sske(struct kvm_vcpu *vcpu)
|
||||
mmap_read_unlock(current->mm);
|
||||
if (rc == -EFAULT)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
if (rc == -EAGAIN)
|
||||
continue;
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
start += PAGE_SIZE;
|
||||
|
@ -16,18 +16,17 @@
|
||||
|
||||
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
|
||||
{
|
||||
int cc = 0;
|
||||
int cc;
|
||||
|
||||
if (kvm_s390_pv_cpu_get_handle(vcpu)) {
|
||||
cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
|
||||
UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
|
||||
if (!kvm_s390_pv_cpu_get_handle(vcpu))
|
||||
return 0;
|
||||
|
||||
cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
|
||||
|
||||
KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
|
||||
vcpu->vcpu_id, *rc, *rrc);
|
||||
WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
|
||||
|
||||
KVM_UV_EVENT(vcpu->kvm, 3,
|
||||
"PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
|
||||
vcpu->vcpu_id, *rc, *rrc);
|
||||
WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x",
|
||||
*rc, *rrc);
|
||||
}
|
||||
/* Intended memory leak for something that should never happen. */
|
||||
if (!cc)
|
||||
free_pages(vcpu->arch.pv.stor_base,
|
||||
@ -196,7 +195,7 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
|
||||
uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
|
||||
|
||||
cc = uv_call(0, (u64)&uvcb);
|
||||
cc = uv_call_sched(0, (u64)&uvcb);
|
||||
*rc = uvcb.header.rc;
|
||||
*rrc = uvcb.header.rrc;
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
|
||||
|
@ -151,22 +151,10 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
|
||||
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter,
|
||||
u64 *status_reg)
|
||||
{
|
||||
unsigned int i;
|
||||
struct kvm_vcpu *v;
|
||||
bool all_stopped = true;
|
||||
|
||||
kvm_for_each_vcpu(i, v, vcpu->kvm) {
|
||||
if (v == vcpu)
|
||||
continue;
|
||||
if (!is_vcpu_stopped(v))
|
||||
all_stopped = false;
|
||||
}
|
||||
|
||||
*status_reg &= 0xffffffff00000000UL;
|
||||
|
||||
/* Reject set arch order, with czam we're always in z/Arch mode. */
|
||||
*status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER :
|
||||
SIGP_STATUS_INCORRECT_STATE);
|
||||
*status_reg |= SIGP_STATUS_INVALID_PARAMETER;
|
||||
return SIGP_CC_STATUS_STORED;
|
||||
}
|
||||
|
||||
|
@ -672,6 +672,7 @@ EXPORT_SYMBOL_GPL(gmap_fault);
|
||||
*/
|
||||
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long vmaddr;
|
||||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
@ -681,11 +682,17 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
|
||||
gaddr >> PMD_SHIFT);
|
||||
if (vmaddr) {
|
||||
vmaddr |= gaddr & ~PMD_MASK;
|
||||
|
||||
vma = vma_lookup(gmap->mm, vmaddr);
|
||||
if (!vma || is_vm_hugetlb_page(vma))
|
||||
return;
|
||||
|
||||
/* Get pointer to the page table entry */
|
||||
ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
|
||||
if (likely(ptep))
|
||||
if (likely(ptep)) {
|
||||
ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__gmap_zap);
|
||||
@ -2677,8 +2684,10 @@ static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
|
||||
{
|
||||
pte_t pte = READ_ONCE(*ptep);
|
||||
|
||||
/* There is a reference through the mapping */
|
||||
if (pte_present(pte))
|
||||
WARN_ON_ONCE(uv_destroy_page(pte_val(pte) & PAGE_MASK));
|
||||
WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -429,22 +429,36 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PGSTE
|
||||
static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
|
||||
static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
/* We need a valid VMA, otherwise this is clearly a fault. */
|
||||
vma = vma_lookup(mm, addr);
|
||||
if (!vma)
|
||||
return -EFAULT;
|
||||
|
||||
pgd = pgd_offset(mm, addr);
|
||||
p4d = p4d_alloc(mm, pgd, addr);
|
||||
if (!p4d)
|
||||
return NULL;
|
||||
pud = pud_alloc(mm, p4d, addr);
|
||||
if (!pud)
|
||||
return NULL;
|
||||
pmd = pmd_alloc(mm, pud, addr);
|
||||
return pmd;
|
||||
if (!pgd_present(*pgd))
|
||||
return -ENOENT;
|
||||
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
if (!p4d_present(*p4d))
|
||||
return -ENOENT;
|
||||
|
||||
pud = pud_offset(p4d, addr);
|
||||
if (!pud_present(*pud))
|
||||
return -ENOENT;
|
||||
|
||||
/* Large PUDs are not supported yet. */
|
||||
if (pud_large(*pud))
|
||||
return -EFAULT;
|
||||
|
||||
*pmdp = pmd_offset(pud, addr);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -778,14 +792,23 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
|
||||
pmdp = pmd_alloc_map(mm, addr);
|
||||
if (unlikely(!pmdp))
|
||||
/*
|
||||
* If we don't have a PTE table and if there is no huge page mapped,
|
||||
* we can ignore attempts to set the key to 0, because it already is 0.
|
||||
*/
|
||||
switch (pmd_lookup(mm, addr, &pmdp)) {
|
||||
case -ENOENT:
|
||||
return key ? -EFAULT : 0;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
ptl = pmd_lock(mm, pmdp);
|
||||
if (!pmd_present(*pmdp)) {
|
||||
spin_unlock(ptl);
|
||||
return -EFAULT;
|
||||
return key ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
if (pmd_large(*pmdp)) {
|
||||
@ -801,10 +824,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
|
||||
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
|
||||
if (unlikely(!ptep))
|
||||
return -EFAULT;
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
|
||||
new = old = pgste_get_lock(ptep);
|
||||
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
|
||||
PGSTE_ACC_BITS | PGSTE_FP_BIT);
|
||||
@ -881,14 +901,23 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *ptep;
|
||||
int cc = 0;
|
||||
|
||||
pmdp = pmd_alloc_map(mm, addr);
|
||||
if (unlikely(!pmdp))
|
||||
/*
|
||||
* If we don't have a PTE table and if there is no huge page mapped,
|
||||
* the storage key is 0 and there is nothing for us to do.
|
||||
*/
|
||||
switch (pmd_lookup(mm, addr, &pmdp)) {
|
||||
case -ENOENT:
|
||||
return 0;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
ptl = pmd_lock(mm, pmdp);
|
||||
if (!pmd_present(*pmdp)) {
|
||||
spin_unlock(ptl);
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pmd_large(*pmdp)) {
|
||||
@ -900,10 +929,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
|
||||
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
|
||||
if (unlikely(!ptep))
|
||||
return -EFAULT;
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
|
||||
new = old = pgste_get_lock(ptep);
|
||||
/* Reset guest reference bit only */
|
||||
pgste_val(new) &= ~PGSTE_GR_BIT;
|
||||
@ -935,15 +961,24 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
|
||||
pmdp = pmd_alloc_map(mm, addr);
|
||||
if (unlikely(!pmdp))
|
||||
/*
|
||||
* If we don't have a PTE table and if there is no huge page mapped,
|
||||
* the storage key is 0.
|
||||
*/
|
||||
*key = 0;
|
||||
|
||||
switch (pmd_lookup(mm, addr, &pmdp)) {
|
||||
case -ENOENT:
|
||||
return 0;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
ptl = pmd_lock(mm, pmdp);
|
||||
if (!pmd_present(*pmdp)) {
|
||||
/* Not yet mapped memory has a zero key */
|
||||
spin_unlock(ptl);
|
||||
*key = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -956,10 +991,7 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
|
||||
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
|
||||
if (unlikely(!ptep))
|
||||
return -EFAULT;
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
|
||||
pgste = pgste_get_lock(ptep);
|
||||
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
|
||||
paddr = pte_val(*ptep) & PAGE_MASK;
|
||||
@ -988,6 +1020,7 @@ EXPORT_SYMBOL(get_guest_storage_key);
|
||||
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
|
||||
unsigned long *oldpte, unsigned long *oldpgste)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long pgstev;
|
||||
spinlock_t *ptl;
|
||||
pgste_t pgste;
|
||||
@ -997,6 +1030,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
|
||||
WARN_ON_ONCE(orc > ESSA_MAX);
|
||||
if (unlikely(orc > ESSA_MAX))
|
||||
return -EINVAL;
|
||||
|
||||
vma = vma_lookup(mm, hva);
|
||||
if (!vma || is_vm_hugetlb_page(vma))
|
||||
return -EFAULT;
|
||||
ptep = get_locked_pte(mm, hva, &ptl);
|
||||
if (unlikely(!ptep))
|
||||
return -EFAULT;
|
||||
@ -1089,10 +1126,14 @@ EXPORT_SYMBOL(pgste_perform_essa);
|
||||
int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
|
||||
unsigned long bits, unsigned long value)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
spinlock_t *ptl;
|
||||
pgste_t new;
|
||||
pte_t *ptep;
|
||||
|
||||
vma = vma_lookup(mm, hva);
|
||||
if (!vma || is_vm_hugetlb_page(vma))
|
||||
return -EFAULT;
|
||||
ptep = get_locked_pte(mm, hva, &ptl);
|
||||
if (unlikely(!ptep))
|
||||
return -EFAULT;
|
||||
@ -1117,9 +1158,13 @@ EXPORT_SYMBOL(set_pgste_bits);
|
||||
*/
|
||||
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
|
||||
vma = vma_lookup(mm, hva);
|
||||
if (!vma || is_vm_hugetlb_page(vma))
|
||||
return -EFAULT;
|
||||
ptep = get_locked_pte(mm, hva, &ptl);
|
||||
if (unlikely(!ptep))
|
||||
return -EFAULT;
|
||||
|
@ -50,7 +50,7 @@
|
||||
* so ratio of 4 should be enough.
|
||||
*/
|
||||
#define KVM_VCPU_ID_RATIO 4
|
||||
#define KVM_MAX_VCPU_ID (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
|
||||
#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
|
||||
|
||||
/* memory slots that are not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 3
|
||||
@ -407,6 +407,7 @@ struct kvm_mmu_root_info {
|
||||
#define KVM_HAVE_MMU_RWLOCK
|
||||
|
||||
struct kvm_mmu_page;
|
||||
struct kvm_page_fault;
|
||||
|
||||
/*
|
||||
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
|
||||
@ -416,8 +417,7 @@ struct kvm_mmu_page;
|
||||
struct kvm_mmu {
|
||||
unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu);
|
||||
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
|
||||
int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err,
|
||||
bool prefault);
|
||||
int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
|
||||
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *fault);
|
||||
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa,
|
||||
@ -499,7 +499,6 @@ struct kvm_pmu {
|
||||
u64 fixed_ctr_ctrl;
|
||||
u64 global_ctrl;
|
||||
u64 global_status;
|
||||
u64 global_ovf_ctrl;
|
||||
u64 counter_bitmask[2];
|
||||
u64 global_ctrl_mask;
|
||||
u64 global_ovf_ctrl_mask;
|
||||
@ -581,7 +580,6 @@ struct kvm_vcpu_hv {
|
||||
struct kvm_hyperv_exit exit;
|
||||
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
|
||||
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
|
||||
cpumask_t tlb_flush;
|
||||
bool enforce_cpuid;
|
||||
struct {
|
||||
u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */
|
||||
@ -1073,7 +1071,7 @@ struct kvm_arch {
|
||||
atomic_t apic_map_dirty;
|
||||
|
||||
/* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */
|
||||
struct mutex apicv_update_lock;
|
||||
struct rw_semaphore apicv_update_lock;
|
||||
|
||||
bool apic_access_memslot_enabled;
|
||||
unsigned long apicv_inhibit_reasons;
|
||||
@ -1087,17 +1085,23 @@ struct kvm_arch {
|
||||
|
||||
unsigned long irq_sources_bitmap;
|
||||
s64 kvmclock_offset;
|
||||
|
||||
/*
|
||||
* This also protects nr_vcpus_matched_tsc which is read from a
|
||||
* preemption-disabled region, so it must be a raw spinlock.
|
||||
*/
|
||||
raw_spinlock_t tsc_write_lock;
|
||||
u64 last_tsc_nsec;
|
||||
u64 last_tsc_write;
|
||||
u32 last_tsc_khz;
|
||||
u64 last_tsc_offset;
|
||||
u64 cur_tsc_nsec;
|
||||
u64 cur_tsc_write;
|
||||
u64 cur_tsc_offset;
|
||||
u64 cur_tsc_generation;
|
||||
int nr_vcpus_matched_tsc;
|
||||
|
||||
raw_spinlock_t pvclock_gtod_sync_lock;
|
||||
seqcount_raw_spinlock_t pvclock_sc;
|
||||
bool use_master_clock;
|
||||
u64 master_kernel_ns;
|
||||
u64 master_cycle_now;
|
||||
@ -1207,10 +1211,11 @@ struct kvm_arch {
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* If set, rmaps have been allocated for all memslots and should be
|
||||
* allocated for any newly created or modified memslots.
|
||||
* If set, at least one shadow root has been allocated. This flag
|
||||
* is used as one input when determining whether certain memslot
|
||||
* related allocations are necessary.
|
||||
*/
|
||||
bool memslots_have_rmaps;
|
||||
bool shadow_root_allocated;
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
hpa_t hv_root_tdp;
|
||||
@ -1296,6 +1301,8 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
|
||||
}
|
||||
|
||||
struct kvm_x86_ops {
|
||||
const char *name;
|
||||
|
||||
int (*hardware_enable)(void);
|
||||
void (*hardware_disable)(void);
|
||||
void (*hardware_unsetup)(void);
|
||||
@ -1405,10 +1412,11 @@ struct kvm_x86_ops {
|
||||
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
|
||||
|
||||
/*
|
||||
* Retrieve somewhat arbitrary exit information. Intended to be used
|
||||
* only from within tracepoints to avoid VMREADs when tracing is off.
|
||||
* Retrieve somewhat arbitrary exit information. Intended to
|
||||
* be used only from within tracepoints or error paths.
|
||||
*/
|
||||
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
|
||||
void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
|
||||
u64 *info1, u64 *info2,
|
||||
u32 *exit_int_info, u32 *exit_int_info_err_code);
|
||||
|
||||
int (*check_intercept)(struct kvm_vcpu *vcpu,
|
||||
@ -1541,6 +1549,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
|
||||
{
|
||||
return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
||||
}
|
||||
|
||||
#define __KVM_HAVE_ARCH_VM_FREE
|
||||
void kvm_arch_free_vm(struct kvm *kvm);
|
||||
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
|
||||
@ -1657,6 +1667,9 @@ extern u64 kvm_mce_cap_supported;
|
||||
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
|
||||
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
|
||||
void *insn, int insn_len);
|
||||
void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
|
||||
u64 *data, u8 ndata);
|
||||
void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_enable_efer_bits(u64);
|
||||
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
|
||||
@ -1713,9 +1726,6 @@ void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
|
||||
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
|
||||
bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *fault);
|
||||
int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
gfn_t gfn, void *data, int offset, int len,
|
||||
u32 access);
|
||||
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
|
||||
bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
|
||||
|
||||
@ -1864,7 +1874,6 @@ u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier);
|
||||
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
|
||||
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
|
||||
|
||||
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
|
||||
void kvm_make_scan_ioapic_request(struct kvm *kvm);
|
||||
void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
|
||||
unsigned long *vcpu_bitmap);
|
||||
@ -1933,6 +1942,9 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
|
||||
|
||||
int kvm_cpu_dirty_log_size(void);
|
||||
|
||||
int alloc_all_memslots_rmaps(struct kvm *kvm);
|
||||
int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
|
||||
|
||||
#define KVM_CLOCK_VALID_FLAGS \
|
||||
(KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
|
||||
|
||||
#endif /* _ASM_X86_KVM_HOST_H */
|
||||
|
@ -49,8 +49,12 @@ struct kvm_page_track_notifier_node {
|
||||
int kvm_page_track_init(struct kvm *kvm);
|
||||
void kvm_page_track_cleanup(struct kvm *kvm);
|
||||
|
||||
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
|
||||
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
|
||||
int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
|
||||
int kvm_page_track_create_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned long npages);
|
||||
|
||||
void kvm_slot_page_track_add_page(struct kvm *kvm,
|
||||
@ -59,8 +63,9 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
|
||||
void kvm_slot_page_track_remove_page(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode);
|
||||
bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode);
|
||||
bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode);
|
||||
|
||||
void
|
||||
kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
|
@ -504,4 +504,8 @@ struct kvm_pmu_event_filter {
|
||||
#define KVM_PMU_EVENT_ALLOW 0
|
||||
#define KVM_PMU_EVENT_DENY 1
|
||||
|
||||
/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
|
||||
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
|
||||
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
@ -291,8 +291,10 @@ void kvm_set_posted_intr_wakeup_handler(void (*handler)(void))
|
||||
{
|
||||
if (handler)
|
||||
kvm_posted_intr_wakeup_handler = handler;
|
||||
else
|
||||
else {
|
||||
kvm_posted_intr_wakeup_handler = dummy_handler;
|
||||
synchronize_rcu();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler);
|
||||
|
||||
|
@ -129,4 +129,7 @@ config KVM_MMU_AUDIT
|
||||
This option adds a R/W kVM module parameter 'mmu_audit', which allows
|
||||
auditing of KVM MMU events at runtime.
|
||||
|
||||
config KVM_EXTERNAL_WRITE_TRACKING
|
||||
bool
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
@ -53,9 +53,16 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This one is tied to SSB in the user API, and not
|
||||
* visible in /proc/cpuinfo.
|
||||
*/
|
||||
#define KVM_X86_FEATURE_PSFD (13*32+28) /* Predictive Store Forwarding Disable */
|
||||
|
||||
#define F feature_bit
|
||||
#define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0)
|
||||
|
||||
|
||||
static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
|
||||
struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index)
|
||||
{
|
||||
@ -500,7 +507,8 @@ void kvm_set_cpu_caps(void)
|
||||
kvm_cpu_cap_mask(CPUID_8000_0008_EBX,
|
||||
F(CLZERO) | F(XSAVEERPTR) |
|
||||
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
|
||||
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON)
|
||||
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
|
||||
__feature_bit(KVM_X86_FEATURE_PSFD)
|
||||
);
|
||||
|
||||
/*
|
||||
|
@ -4222,6 +4222,11 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
|
||||
if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
/*
|
||||
* If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
|
||||
* check however is unnecessary because CPL is always 0 outside
|
||||
* protected mode.
|
||||
*/
|
||||
if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
|
||||
ctxt->ops->check_pmc(ctxt, rcx))
|
||||
return emulate_gp(ctxt, 0);
|
||||
|
@ -112,7 +112,7 @@ static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
|
||||
if (!!auto_eoi_old == !!auto_eoi_new)
|
||||
return;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.apicv_update_lock);
|
||||
down_write(&vcpu->kvm->arch.apicv_update_lock);
|
||||
|
||||
if (auto_eoi_new)
|
||||
hv->synic_auto_eoi_used++;
|
||||
@ -123,7 +123,7 @@ static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
|
||||
!hv->synic_auto_eoi_used,
|
||||
APICV_INHIBIT_REASON_HYPERV);
|
||||
|
||||
mutex_unlock(&vcpu->kvm->arch.apicv_update_lock);
|
||||
up_write(&vcpu->kvm->arch.apicv_update_lock);
|
||||
}
|
||||
|
||||
static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
|
||||
@ -1754,7 +1754,6 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
|
||||
int i;
|
||||
gpa_t gpa;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
struct hv_tlb_flush_ex flush_ex;
|
||||
struct hv_tlb_flush flush;
|
||||
u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
|
||||
@ -1836,18 +1835,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
|
||||
}
|
||||
}
|
||||
|
||||
cpumask_clear(&hv_vcpu->tlb_flush);
|
||||
|
||||
vcpu_mask = all_cpus ? NULL :
|
||||
sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
|
||||
vp_bitmap, vcpu_bitmap);
|
||||
|
||||
/*
|
||||
* vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
|
||||
* analyze it here, flush TLB regardless of the specified address space.
|
||||
*/
|
||||
kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
|
||||
NULL, vcpu_mask, &hv_vcpu->tlb_flush);
|
||||
if (all_cpus) {
|
||||
kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST);
|
||||
} else {
|
||||
vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
|
||||
vp_bitmap, vcpu_bitmap);
|
||||
|
||||
kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
|
||||
vcpu_mask);
|
||||
}
|
||||
|
||||
ret_success:
|
||||
/* We always do full TLB flush, set 'Reps completed' = 'Rep Count' */
|
||||
|
@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
|
||||
static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
|
||||
{
|
||||
ioapic->rtc_status.pending_eoi = 0;
|
||||
bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1);
|
||||
bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_IDS);
|
||||
}
|
||||
|
||||
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
|
||||
|
@ -39,13 +39,13 @@ struct kvm_vcpu;
|
||||
|
||||
struct dest_map {
|
||||
/* vcpu bitmap where IRQ has been sent */
|
||||
DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1);
|
||||
DECLARE_BITMAP(map, KVM_MAX_VCPU_IDS);
|
||||
|
||||
/*
|
||||
* Vector sent to a given vcpu, only valid when
|
||||
* the vcpu's bit in map is set
|
||||
*/
|
||||
u8 vectors[KVM_MAX_VCPU_ID + 1];
|
||||
u8 vectors[KVM_MAX_VCPU_IDS];
|
||||
};
|
||||
|
||||
|
||||
|
@ -44,9 +44,8 @@
|
||||
#define PT32_ROOT_LEVEL 2
|
||||
#define PT32E_ROOT_LEVEL 3
|
||||
|
||||
#define KVM_MMU_CR4_ROLE_BITS (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | \
|
||||
X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE | \
|
||||
X86_CR4_LA57)
|
||||
#define KVM_MMU_CR4_ROLE_BITS (X86_CR4_PSE | X86_CR4_PAE | X86_CR4_LA57 | \
|
||||
X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE)
|
||||
|
||||
#define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP)
|
||||
|
||||
@ -80,6 +79,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
|
||||
int kvm_mmu_load(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -114,17 +114,91 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.mmu->shadow_root_level);
|
||||
}
|
||||
|
||||
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
bool prefault);
|
||||
struct kvm_page_fault {
|
||||
/* arguments to kvm_mmu_do_page_fault. */
|
||||
const gpa_t addr;
|
||||
const u32 error_code;
|
||||
const bool prefetch;
|
||||
|
||||
/* Derived from error_code. */
|
||||
const bool exec;
|
||||
const bool write;
|
||||
const bool present;
|
||||
const bool rsvd;
|
||||
const bool user;
|
||||
|
||||
/* Derived from mmu and global state. */
|
||||
const bool is_tdp;
|
||||
const bool nx_huge_page_workaround_enabled;
|
||||
|
||||
/*
|
||||
* Whether a >4KB mapping can be created or is forbidden due to NX
|
||||
* hugepages.
|
||||
*/
|
||||
bool huge_page_disallowed;
|
||||
|
||||
/*
|
||||
* Maximum page size that can be created for this fault; input to
|
||||
* FNAME(fetch), __direct_map and kvm_tdp_mmu_map.
|
||||
*/
|
||||
u8 max_level;
|
||||
|
||||
/*
|
||||
* Page size that can be created based on the max_level and the
|
||||
* page size used by the host mapping.
|
||||
*/
|
||||
u8 req_level;
|
||||
|
||||
/*
|
||||
* Page size that will be created based on the req_level and
|
||||
* huge_page_disallowed.
|
||||
*/
|
||||
u8 goal_level;
|
||||
|
||||
/* Shifted addr, or result of guest page table walk if addr is a gva. */
|
||||
gfn_t gfn;
|
||||
|
||||
/* The memslot containing gfn. May be NULL. */
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
/* Outputs of kvm_faultin_pfn. */
|
||||
kvm_pfn_t pfn;
|
||||
hva_t hva;
|
||||
bool map_writable;
|
||||
};
|
||||
|
||||
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
|
||||
|
||||
extern int nx_huge_pages;
|
||||
static inline bool is_nx_huge_page_enabled(void)
|
||||
{
|
||||
return READ_ONCE(nx_huge_pages);
|
||||
}
|
||||
|
||||
static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
u32 err, bool prefault)
|
||||
u32 err, bool prefetch)
|
||||
{
|
||||
struct kvm_page_fault fault = {
|
||||
.addr = cr2_or_gpa,
|
||||
.error_code = err,
|
||||
.exec = err & PFERR_FETCH_MASK,
|
||||
.write = err & PFERR_WRITE_MASK,
|
||||
.present = err & PFERR_PRESENT_MASK,
|
||||
.rsvd = err & PFERR_RSVD_MASK,
|
||||
.user = err & PFERR_USER_MASK,
|
||||
.prefetch = prefetch,
|
||||
.is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
|
||||
.nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(),
|
||||
|
||||
.max_level = KVM_MAX_HUGEPAGE_LEVEL,
|
||||
.req_level = PG_LEVEL_4K,
|
||||
.goal_level = PG_LEVEL_4K,
|
||||
};
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
if (likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault))
|
||||
return kvm_tdp_page_fault(vcpu, cr2_or_gpa, err, prefault);
|
||||
if (fault.is_tdp)
|
||||
return kvm_tdp_page_fault(vcpu, &fault);
|
||||
#endif
|
||||
return vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, err, prefault);
|
||||
return vcpu->arch.mmu->page_fault(vcpu, &fault);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -230,14 +304,26 @@ int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
|
||||
int kvm_mmu_post_init_vm(struct kvm *kvm);
|
||||
void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
|
||||
|
||||
static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
|
||||
static inline bool kvm_shadow_root_allocated(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* Read memslot_have_rmaps before rmap pointers. Hence, threads reading
|
||||
* memslots_have_rmaps in any lock context are guaranteed to see the
|
||||
* pointers. Pairs with smp_store_release in alloc_all_memslots_rmaps.
|
||||
* Read shadow_root_allocated before related pointers. Hence, threads
|
||||
* reading shadow_root_allocated in any lock context are guaranteed to
|
||||
* see the pointers. Pairs with smp_store_release in
|
||||
* mmu_first_shadow_root_alloc.
|
||||
*/
|
||||
return smp_load_acquire(&kvm->arch.memslots_have_rmaps);
|
||||
return smp_load_acquire(&kvm->arch.shadow_root_allocated);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
|
||||
#else
|
||||
static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
|
||||
#endif
|
||||
|
||||
static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
|
||||
{
|
||||
return !is_tdp_mmu_enabled(kvm) || kvm_shadow_root_allocated(kvm);
|
||||
}
|
||||
|
||||
static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -118,13 +118,8 @@ static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
|
||||
kvm_x86_ops.cpu_dirty_log_size;
|
||||
}
|
||||
|
||||
extern int nx_huge_pages;
|
||||
static inline bool is_nx_huge_page_enabled(void)
|
||||
{
|
||||
return READ_ONCE(nx_huge_pages);
|
||||
}
|
||||
|
||||
int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync);
|
||||
int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, bool can_unsync, bool prefetch);
|
||||
|
||||
void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
@ -155,19 +150,11 @@ enum {
|
||||
RET_PF_SPURIOUS,
|
||||
};
|
||||
|
||||
/* Bits which may be returned by set_spte() */
|
||||
#define SET_SPTE_WRITE_PROTECTED_PT BIT(0)
|
||||
#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
|
||||
#define SET_SPTE_SPURIOUS BIT(2)
|
||||
|
||||
int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
kvm_pfn_t pfn, int max_level);
|
||||
int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
int max_level, kvm_pfn_t *pfnp,
|
||||
bool huge_page_disallowed, int *req_level);
|
||||
void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
|
||||
kvm_pfn_t *pfnp, int *goal_levelp);
|
||||
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
|
||||
void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level);
|
||||
|
||||
void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
|
||||
|
||||
|
@ -252,9 +252,9 @@ TRACE_EVENT(
|
||||
|
||||
TRACE_EVENT(
|
||||
fast_page_fault,
|
||||
TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code,
|
||||
TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
||||
u64 *sptep, u64 old_spte, int ret),
|
||||
TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, ret),
|
||||
TP_ARGS(vcpu, fault, sptep, old_spte, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, vcpu_id)
|
||||
@ -268,8 +268,8 @@ TRACE_EVENT(
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu->vcpu_id;
|
||||
__entry->cr2_or_gpa = cr2_or_gpa;
|
||||
__entry->error_code = error_code;
|
||||
__entry->cr2_or_gpa = fault->addr;
|
||||
__entry->error_code = fault->error_code;
|
||||
__entry->sptep = sptep;
|
||||
__entry->old_spte = old_spte;
|
||||
__entry->new_spte = *sptep;
|
||||
@ -367,8 +367,8 @@ TRACE_EVENT(
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_spte_requested,
|
||||
TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn),
|
||||
TP_ARGS(addr, level, pfn),
|
||||
TP_PROTO(struct kvm_page_fault *fault),
|
||||
TP_ARGS(fault),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, gfn)
|
||||
@ -377,9 +377,9 @@ TRACE_EVENT(
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->gfn = addr >> PAGE_SHIFT;
|
||||
__entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
|
||||
__entry->level = level;
|
||||
__entry->gfn = fault->gfn;
|
||||
__entry->pfn = fault->pfn | (fault->gfn & (KVM_PAGES_PER_HPAGE(fault->goal_level) - 1));
|
||||
__entry->level = fault->goal_level;
|
||||
),
|
||||
|
||||
TP_printk("gfn %llx pfn %llx level %d",
|
||||
|
@ -19,6 +19,12 @@
|
||||
#include "mmu.h"
|
||||
#include "mmu_internal.h"
|
||||
|
||||
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) ||
|
||||
!tdp_enabled || kvm_shadow_root_allocated(kvm);
|
||||
}
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
|
||||
{
|
||||
int i;
|
||||
@ -29,12 +35,17 @@ void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
|
||||
int kvm_page_track_create_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned long npages)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
|
||||
if (i == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm))
|
||||
continue;
|
||||
|
||||
slot->arch.gfn_track[i] =
|
||||
kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
@ -57,6 +68,21 @@ static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
|
||||
return true;
|
||||
}
|
||||
|
||||
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
|
||||
{
|
||||
unsigned short *gfn_track;
|
||||
|
||||
if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE])
|
||||
return 0;
|
||||
|
||||
gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT);
|
||||
if (gfn_track == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode, short count)
|
||||
{
|
||||
@ -92,6 +118,10 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
|
||||
if (WARN_ON(!page_track_mode_is_valid(mode)))
|
||||
return;
|
||||
|
||||
if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm)))
|
||||
return;
|
||||
|
||||
update_gfn_track(slot, gfn, mode, 1);
|
||||
|
||||
/*
|
||||
@ -126,6 +156,10 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
|
||||
if (WARN_ON(!page_track_mode_is_valid(mode)))
|
||||
return;
|
||||
|
||||
if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(kvm)))
|
||||
return;
|
||||
|
||||
update_gfn_track(slot, gfn, mode, -1);
|
||||
|
||||
/*
|
||||
@ -139,19 +173,22 @@ EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
|
||||
/*
|
||||
* check if the corresponding access on the specified guest page is tracked.
|
||||
*/
|
||||
bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode)
|
||||
bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
enum kvm_page_track_mode mode)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
int index;
|
||||
|
||||
if (WARN_ON(!page_track_mode_is_valid(mode)))
|
||||
return false;
|
||||
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
if (!slot)
|
||||
return false;
|
||||
|
||||
if (mode == KVM_PAGE_TRACK_WRITE &&
|
||||
!kvm_page_track_write_tracking_enabled(vcpu->kvm))
|
||||
return false;
|
||||
|
||||
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
|
||||
return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
|
||||
}
|
||||
|
@ -561,6 +561,7 @@ static bool
|
||||
FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *spte, pt_element_t gpte, bool no_dirty_log)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned pte_access;
|
||||
gfn_t gfn;
|
||||
kvm_pfn_t pfn;
|
||||
@ -573,30 +574,21 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
pte_access = sp->role.access & FNAME(gpte_access)(gpte);
|
||||
FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
|
||||
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
||||
|
||||
slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn,
|
||||
no_dirty_log && (pte_access & ACC_WRITE_MASK));
|
||||
if (!slot)
|
||||
return false;
|
||||
|
||||
pfn = gfn_to_pfn_memslot_atomic(slot, gfn);
|
||||
if (is_error_pfn(pfn))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* we call mmu_set_spte() with host_writable = true because
|
||||
* pte_prefetch_gfn_to_pfn always gets a writable pfn.
|
||||
*/
|
||||
mmu_set_spte(vcpu, spte, pte_access, false, PG_LEVEL_4K, gfn, pfn,
|
||||
true, true);
|
||||
|
||||
mmu_set_spte(vcpu, slot, spte, pte_access, gfn, pfn, NULL);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *spte, const void *pte)
|
||||
{
|
||||
pt_element_t gpte = *(const pt_element_t *)pte;
|
||||
|
||||
FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
|
||||
}
|
||||
|
||||
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
|
||||
struct guest_walker *gw, int level)
|
||||
{
|
||||
@ -663,21 +655,16 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
||||
* If the guest tries to write a write-protected page, we need to
|
||||
* emulate this operation, return 1 to indicate this case.
|
||||
*/
|
||||
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
struct guest_walker *gw, u32 error_code,
|
||||
int max_level, kvm_pfn_t pfn, bool map_writable,
|
||||
bool prefault)
|
||||
static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
||||
struct guest_walker *gw)
|
||||
{
|
||||
bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
|
||||
bool write_fault = error_code & PFERR_WRITE_MASK;
|
||||
bool exec = error_code & PFERR_FETCH_MASK;
|
||||
bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
|
||||
struct kvm_mmu_page *sp = NULL;
|
||||
struct kvm_shadow_walk_iterator it;
|
||||
unsigned int direct_access, access;
|
||||
int top_level, level, req_level, ret;
|
||||
gfn_t base_gfn = gw->gfn;
|
||||
int top_level, ret;
|
||||
gfn_t base_gfn = fault->gfn;
|
||||
|
||||
WARN_ON_ONCE(gw->gfn != base_gfn);
|
||||
direct_access = gw->pte_access;
|
||||
|
||||
top_level = vcpu->arch.mmu->root_level;
|
||||
@ -695,7 +682,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
|
||||
goto out_gpte_changed;
|
||||
|
||||
for (shadow_walk_init(&it, vcpu, addr);
|
||||
for (shadow_walk_init(&it, vcpu, fault->addr);
|
||||
shadow_walk_okay(&it) && it.level > gw->level;
|
||||
shadow_walk_next(&it)) {
|
||||
gfn_t table_gfn;
|
||||
@ -707,7 +694,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
if (!is_shadow_present_pte(*it.sptep)) {
|
||||
table_gfn = gw->table_gfn[it.level - 2];
|
||||
access = gw->pt_access[it.level - 2];
|
||||
sp = kvm_mmu_get_page(vcpu, table_gfn, addr,
|
||||
sp = kvm_mmu_get_page(vcpu, table_gfn, fault->addr,
|
||||
it.level-1, false, access);
|
||||
/*
|
||||
* We must synchronize the pagetable before linking it
|
||||
@ -741,10 +728,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
link_shadow_page(vcpu, it.sptep, sp);
|
||||
}
|
||||
|
||||
level = kvm_mmu_hugepage_adjust(vcpu, gw->gfn, max_level, &pfn,
|
||||
huge_page_disallowed, &req_level);
|
||||
kvm_mmu_hugepage_adjust(vcpu, fault);
|
||||
|
||||
trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
|
||||
trace_kvm_mmu_spte_requested(fault);
|
||||
|
||||
for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
|
||||
clear_sp_write_flooding_count(it.sptep);
|
||||
@ -753,12 +739,11 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
* We cannot overwrite existing page tables with an NX
|
||||
* large page, as the leaf could be executable.
|
||||
*/
|
||||
if (nx_huge_page_workaround_enabled)
|
||||
disallowed_hugepage_adjust(*it.sptep, gw->gfn, it.level,
|
||||
&pfn, &level);
|
||||
if (fault->nx_huge_page_workaround_enabled)
|
||||
disallowed_hugepage_adjust(fault, *it.sptep, it.level);
|
||||
|
||||
base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
||||
if (it.level == level)
|
||||
base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
||||
if (it.level == fault->goal_level)
|
||||
break;
|
||||
|
||||
validate_direct_spte(vcpu, it.sptep, direct_access);
|
||||
@ -766,16 +751,20 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
drop_large_spte(vcpu, it.sptep);
|
||||
|
||||
if (!is_shadow_present_pte(*it.sptep)) {
|
||||
sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
|
||||
sp = kvm_mmu_get_page(vcpu, base_gfn, fault->addr,
|
||||
it.level - 1, true, direct_access);
|
||||
link_shadow_page(vcpu, it.sptep, sp);
|
||||
if (huge_page_disallowed && req_level >= it.level)
|
||||
if (fault->huge_page_disallowed &&
|
||||
fault->req_level >= it.level)
|
||||
account_huge_nx_page(vcpu->kvm, sp);
|
||||
}
|
||||
}
|
||||
|
||||
ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
|
||||
it.level, base_gfn, pfn, prefault, map_writable);
|
||||
if (WARN_ON_ONCE(it.level != fault->goal_level))
|
||||
return -EFAULT;
|
||||
|
||||
ret = mmu_set_spte(vcpu, fault->slot, it.sptep, gw->pte_access,
|
||||
base_gfn, fault->pfn, fault);
|
||||
if (ret == RET_PF_SPURIOUS)
|
||||
return ret;
|
||||
|
||||
@ -841,45 +830,40 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
|
||||
* Returns: 1 if we need to emulate the instruction, 0 otherwise, or
|
||||
* a negative value on error.
|
||||
*/
|
||||
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
|
||||
bool prefault)
|
||||
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
bool write_fault = error_code & PFERR_WRITE_MASK;
|
||||
bool user_fault = error_code & PFERR_USER_MASK;
|
||||
struct guest_walker walker;
|
||||
int r;
|
||||
kvm_pfn_t pfn;
|
||||
hva_t hva;
|
||||
unsigned long mmu_seq;
|
||||
bool map_writable, is_self_change_mapping;
|
||||
int max_level;
|
||||
bool is_self_change_mapping;
|
||||
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
|
||||
|
||||
/*
|
||||
* If PFEC.RSVD is set, this is a shadow page fault.
|
||||
* The bit needs to be cleared before walking guest page tables.
|
||||
*/
|
||||
error_code &= ~PFERR_RSVD_MASK;
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
|
||||
WARN_ON_ONCE(fault->is_tdp);
|
||||
|
||||
/*
|
||||
* Look up the guest pte for the faulting address.
|
||||
* If PFEC.RSVD is set, this is a shadow page fault.
|
||||
* The bit needs to be cleared before walking guest page tables.
|
||||
*/
|
||||
r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
|
||||
r = FNAME(walk_addr)(&walker, vcpu, fault->addr,
|
||||
fault->error_code & ~PFERR_RSVD_MASK);
|
||||
|
||||
/*
|
||||
* The page is not mapped by the guest. Let the guest handle it.
|
||||
*/
|
||||
if (!r) {
|
||||
pgprintk("%s: guest page fault\n", __func__);
|
||||
if (!prefault)
|
||||
if (!fault->prefetch)
|
||||
kvm_inject_emulated_page_fault(vcpu, &walker.fault);
|
||||
|
||||
return RET_PF_RETRY;
|
||||
}
|
||||
|
||||
if (page_fault_handle_page_track(vcpu, error_code, walker.gfn)) {
|
||||
shadow_page_table_clear_flood(vcpu, addr);
|
||||
fault->gfn = walker.gfn;
|
||||
fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
|
||||
|
||||
if (page_fault_handle_page_track(vcpu, fault)) {
|
||||
shadow_page_table_clear_flood(vcpu, fault->addr);
|
||||
return RET_PF_EMULATE;
|
||||
}
|
||||
|
||||
@ -890,29 +874,28 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
|
||||
vcpu->arch.write_fault_to_shadow_pgtable = false;
|
||||
|
||||
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
|
||||
&walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
|
||||
&walker, fault->user, &vcpu->arch.write_fault_to_shadow_pgtable);
|
||||
|
||||
if (is_self_change_mapping)
|
||||
max_level = PG_LEVEL_4K;
|
||||
fault->max_level = PG_LEVEL_4K;
|
||||
else
|
||||
max_level = walker.level;
|
||||
fault->max_level = walker.level;
|
||||
|
||||
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
||||
if (kvm_faultin_pfn(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
|
||||
write_fault, &map_writable, &r))
|
||||
if (kvm_faultin_pfn(vcpu, fault, &r))
|
||||
return r;
|
||||
|
||||
if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
|
||||
if (handle_abnormal_pfn(vcpu, fault, walker.pte_access, &r))
|
||||
return r;
|
||||
|
||||
/*
|
||||
* Do not change pte_access if the pfn is a mmio page, otherwise
|
||||
* we will cache the incorrect access into mmio spte.
|
||||
*/
|
||||
if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) &&
|
||||
!is_cr0_wp(vcpu->arch.mmu) && !user_fault && !is_noslot_pfn(pfn)) {
|
||||
if (fault->write && !(walker.pte_access & ACC_WRITE_MASK) &&
|
||||
!is_cr0_wp(vcpu->arch.mmu) && !fault->user && fault->slot) {
|
||||
walker.pte_access |= ACC_WRITE_MASK;
|
||||
walker.pte_access &= ~ACC_USER_MASK;
|
||||
|
||||
@ -928,20 +911,19 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
|
||||
|
||||
r = RET_PF_RETRY;
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
if (!is_noslot_pfn(pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva))
|
||||
if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva))
|
||||
goto out_unlock;
|
||||
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
|
||||
r = make_mmu_pages_available(vcpu);
|
||||
if (r)
|
||||
goto out_unlock;
|
||||
r = FNAME(fetch)(vcpu, addr, &walker, error_code, max_level, pfn,
|
||||
map_writable, prefault);
|
||||
r = FNAME(fetch)(vcpu, fault, &walker);
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
|
||||
|
||||
out_unlock:
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
kvm_release_pfn_clean(fault->pfn);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -1007,10 +989,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
|
||||
sizeof(pt_element_t)))
|
||||
break;
|
||||
|
||||
FNAME(update_pte)(vcpu, sp, sptep, &gpte);
|
||||
FNAME(prefetch_gpte)(vcpu, sp, sptep, gpte, false);
|
||||
}
|
||||
|
||||
if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
|
||||
if (!sp->unsync_children)
|
||||
break;
|
||||
}
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
@ -1066,14 +1048,19 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
|
||||
* Using the cached information from sp->gfns is safe because:
|
||||
* - The spte has a reference to the struct page, so the pfn for a given gfn
|
||||
* can't change unless all sptes pointing to it are nuked first.
|
||||
*
|
||||
* Returns
|
||||
* < 0: the sp should be zapped
|
||||
* 0: the sp is synced and no tlb flushing is required
|
||||
* > 0: the sp is synced and tlb flushing is required
|
||||
*/
|
||||
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
{
|
||||
union kvm_mmu_page_role mmu_role = vcpu->arch.mmu->mmu_role.base;
|
||||
int i, nr_present = 0;
|
||||
int i;
|
||||
bool host_writable;
|
||||
gpa_t first_pte_gpa;
|
||||
int set_spte_ret = 0;
|
||||
bool flush = false;
|
||||
|
||||
/*
|
||||
* Ignore various flags when verifying that it's safe to sync a shadow
|
||||
@ -1098,11 +1085,13 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
*/
|
||||
if (WARN_ON_ONCE(sp->role.direct ||
|
||||
(sp->role.word ^ mmu_role.word) & ~sync_role_ign.word))
|
||||
return 0;
|
||||
return -1;
|
||||
|
||||
first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
|
||||
|
||||
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
|
||||
u64 *sptep, spte;
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned pte_access;
|
||||
pt_element_t gpte;
|
||||
gpa_t pte_gpa;
|
||||
@ -1115,10 +1104,10 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
|
||||
if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
|
||||
sizeof(pt_element_t)))
|
||||
return 0;
|
||||
return -1;
|
||||
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
|
||||
set_spte_ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
|
||||
flush = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1127,30 +1116,27 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
pte_access &= FNAME(gpte_access)(gpte);
|
||||
FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
|
||||
|
||||
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
|
||||
&nr_present))
|
||||
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access))
|
||||
continue;
|
||||
|
||||
if (gfn != sp->gfns[i]) {
|
||||
drop_spte(vcpu->kvm, &sp->spt[i]);
|
||||
set_spte_ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
|
||||
flush = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
nr_present++;
|
||||
sptep = &sp->spt[i];
|
||||
spte = *sptep;
|
||||
host_writable = spte & shadow_host_writable_mask;
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
make_spte(vcpu, sp, slot, pte_access, gfn,
|
||||
spte_to_pfn(spte), spte, true, false,
|
||||
host_writable, &spte);
|
||||
|
||||
host_writable = sp->spt[i] & shadow_host_writable_mask;
|
||||
|
||||
set_spte_ret |= set_spte(vcpu, &sp->spt[i],
|
||||
pte_access, PG_LEVEL_4K,
|
||||
gfn, spte_to_pfn(sp->spt[i]),
|
||||
true, false, host_writable);
|
||||
flush |= mmu_spte_update(sptep, spte);
|
||||
}
|
||||
|
||||
if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
|
||||
return nr_present;
|
||||
return flush;
|
||||
}
|
||||
|
||||
#undef pt_element_t
|
||||
|
@ -89,15 +89,17 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
|
||||
E820_TYPE_RAM);
|
||||
}
|
||||
|
||||
int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
|
||||
gfn_t gfn, kvm_pfn_t pfn, u64 old_spte, bool speculative,
|
||||
bool can_unsync, bool host_writable, bool ad_disabled,
|
||||
u64 *new_spte)
|
||||
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
|
||||
u64 old_spte, bool prefetch, bool can_unsync,
|
||||
bool host_writable, u64 *new_spte)
|
||||
{
|
||||
int level = sp->role.level;
|
||||
u64 spte = SPTE_MMU_PRESENT_MASK;
|
||||
int ret = 0;
|
||||
bool wrprot = false;
|
||||
|
||||
if (ad_disabled)
|
||||
if (sp->role.ad_disabled)
|
||||
spte |= SPTE_TDP_AD_DISABLED_MASK;
|
||||
else if (kvm_vcpu_ad_need_write_protect(vcpu))
|
||||
spte |= SPTE_TDP_AD_WRPROT_ONLY_MASK;
|
||||
@ -109,7 +111,7 @@ int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
|
||||
* read access. See FNAME(gpte_access) in paging_tmpl.h.
|
||||
*/
|
||||
spte |= shadow_present_mask;
|
||||
if (!speculative)
|
||||
if (!prefetch)
|
||||
spte |= spte_shadow_accessed_mask(spte);
|
||||
|
||||
if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) &&
|
||||
@ -150,7 +152,7 @@ int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
|
||||
* is responsibility of kvm_mmu_get_page / kvm_mmu_sync_roots.
|
||||
* Same reasoning can be applied to dirty page accounting.
|
||||
*/
|
||||
if (!can_unsync && is_writable_pte(old_spte))
|
||||
if (is_writable_pte(old_spte))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
@ -159,10 +161,10 @@ int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
|
||||
* e.g. it's write-tracked (upper-level SPs) or has one or more
|
||||
* shadow pages and unsync'ing pages is not allowed.
|
||||
*/
|
||||
if (mmu_try_to_unsync_pages(vcpu, gfn, can_unsync)) {
|
||||
if (mmu_try_to_unsync_pages(vcpu, slot, gfn, can_unsync, prefetch)) {
|
||||
pgprintk("%s: found shadow page for %llx, marking ro\n",
|
||||
__func__, gfn);
|
||||
ret |= SET_SPTE_WRITE_PROTECTED_PT;
|
||||
wrprot = true;
|
||||
pte_access &= ~ACC_WRITE_MASK;
|
||||
spte &= ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
|
||||
}
|
||||
@ -171,16 +173,22 @@ int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
|
||||
if (pte_access & ACC_WRITE_MASK)
|
||||
spte |= spte_shadow_dirty_mask(spte);
|
||||
|
||||
if (speculative)
|
||||
out:
|
||||
if (prefetch)
|
||||
spte = mark_spte_for_access_track(spte);
|
||||
|
||||
out:
|
||||
WARN_ONCE(is_rsvd_spte(&vcpu->arch.mmu->shadow_zero_check, spte, level),
|
||||
"spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level,
|
||||
get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level));
|
||||
|
||||
if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
|
||||
/* Enforced by kvm_mmu_hugepage_adjust. */
|
||||
WARN_ON(level > PG_LEVEL_4K);
|
||||
mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
|
||||
}
|
||||
|
||||
*new_spte = spte;
|
||||
return ret;
|
||||
return wrprot;
|
||||
}
|
||||
|
||||
u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
|
||||
|
@ -310,12 +310,7 @@ static inline bool __is_bad_mt_xwr(struct rsvd_bits_validate *rsvd_check,
|
||||
static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
|
||||
u64 spte, int level)
|
||||
{
|
||||
/*
|
||||
* Use a bitwise-OR instead of a logical-OR to aggregate the reserved
|
||||
* bits and EPT's invalid memtype/XWR checks to avoid an extra Jcc
|
||||
* (this is extremely unlikely to be short-circuited as true).
|
||||
*/
|
||||
return __is_bad_mt_xwr(rsvd_check, spte) |
|
||||
return __is_bad_mt_xwr(rsvd_check, spte) ||
|
||||
__is_rsvd_bits_set(rsvd_check, spte, level);
|
||||
}
|
||||
|
||||
@ -334,15 +329,11 @@ static inline u64 get_mmio_spte_generation(u64 spte)
|
||||
return gen;
|
||||
}
|
||||
|
||||
/* Bits which may be returned by set_spte() */
|
||||
#define SET_SPTE_WRITE_PROTECTED_PT BIT(0)
|
||||
#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
|
||||
#define SET_SPTE_SPURIOUS BIT(2)
|
||||
|
||||
int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
|
||||
gfn_t gfn, kvm_pfn_t pfn, u64 old_spte, bool speculative,
|
||||
bool can_unsync, bool host_writable, bool ad_disabled,
|
||||
u64 *new_spte);
|
||||
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
struct kvm_memory_slot *slot,
|
||||
unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
|
||||
u64 old_spte, bool prefetch, bool can_unsync,
|
||||
bool host_writable, u64 *new_spte);
|
||||
u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled);
|
||||
u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access);
|
||||
u64 mark_spte_for_access_track(u64 spte);
|
||||
|
@ -167,6 +167,7 @@ static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu,
|
||||
role.direct = true;
|
||||
role.gpte_is_8_bytes = true;
|
||||
role.access = ACC_ALL;
|
||||
role.ad_disabled = !shadow_accessed_mask;
|
||||
|
||||
return role;
|
||||
}
|
||||
@ -489,8 +490,8 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
}
|
||||
|
||||
/*
|
||||
* tdp_mmu_set_spte_atomic_no_dirty_log - Set a TDP MMU SPTE atomically
|
||||
* and handle the associated bookkeeping, but do not mark the page dirty
|
||||
* tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically
|
||||
* and handle the associated bookkeeping. Do not mark the page dirty
|
||||
* in KVM's dirty bitmaps.
|
||||
*
|
||||
* @kvm: kvm instance
|
||||
@ -499,9 +500,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
* Returns: true if the SPTE was set, false if it was not. If false is returned,
|
||||
* this function will have no side-effects.
|
||||
*/
|
||||
static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
|
||||
struct tdp_iter *iter,
|
||||
u64 new_spte)
|
||||
static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
|
||||
struct tdp_iter *iter,
|
||||
u64 new_spte)
|
||||
{
|
||||
lockdep_assert_held_read(&kvm->mmu_lock);
|
||||
|
||||
@ -527,43 +528,6 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* tdp_mmu_map_set_spte_atomic - Set a leaf TDP MMU SPTE atomically to resolve a
|
||||
* TDP page fault.
|
||||
*
|
||||
* @vcpu: The vcpu instance that took the TDP page fault.
|
||||
* @iter: a tdp_iter instance currently on the SPTE that should be set
|
||||
* @new_spte: The value the SPTE should be set to
|
||||
*
|
||||
* Returns: true if the SPTE was set, false if it was not. If false is returned,
|
||||
* this function will have no side-effects.
|
||||
*/
|
||||
static inline bool tdp_mmu_map_set_spte_atomic(struct kvm_vcpu *vcpu,
|
||||
struct tdp_iter *iter,
|
||||
u64 new_spte)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, new_spte))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Use kvm_vcpu_gfn_to_memslot() instead of going through
|
||||
* handle_changed_spte_dirty_log() to leverage vcpu->last_used_slot.
|
||||
*/
|
||||
if (is_writable_pte(new_spte)) {
|
||||
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, iter->gfn);
|
||||
|
||||
if (slot && kvm_slot_dirty_track_enabled(slot)) {
|
||||
/* Enforced by kvm_mmu_hugepage_adjust. */
|
||||
WARN_ON_ONCE(iter->level > PG_LEVEL_4K);
|
||||
mark_page_dirty_in_slot(kvm, slot, iter->gfn);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
|
||||
struct tdp_iter *iter)
|
||||
{
|
||||
@ -573,7 +537,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
|
||||
* immediately installing a present entry in its place
|
||||
* before the TLBs are flushed.
|
||||
*/
|
||||
if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, REMOVED_SPTE))
|
||||
if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE))
|
||||
return false;
|
||||
|
||||
kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
|
||||
@ -929,26 +893,26 @@ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
|
||||
* Installs a last-level SPTE to handle a TDP page fault.
|
||||
* (NPT/EPT violation/misconfiguration)
|
||||
*/
|
||||
static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
|
||||
int map_writable,
|
||||
struct tdp_iter *iter,
|
||||
kvm_pfn_t pfn, bool prefault)
|
||||
static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault,
|
||||
struct tdp_iter *iter)
|
||||
{
|
||||
struct kvm_mmu_page *sp = sptep_to_sp(iter->sptep);
|
||||
u64 new_spte;
|
||||
int ret = RET_PF_FIXED;
|
||||
int make_spte_ret = 0;
|
||||
bool wrprot = false;
|
||||
|
||||
if (unlikely(is_noslot_pfn(pfn)))
|
||||
WARN_ON(sp->role.level != fault->goal_level);
|
||||
if (unlikely(!fault->slot))
|
||||
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
|
||||
else
|
||||
make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn,
|
||||
pfn, iter->old_spte, prefault, true,
|
||||
map_writable, !shadow_accessed_mask,
|
||||
&new_spte);
|
||||
wrprot = make_spte(vcpu, sp, fault->slot, ACC_ALL, iter->gfn,
|
||||
fault->pfn, iter->old_spte, fault->prefetch, true,
|
||||
fault->map_writable, &new_spte);
|
||||
|
||||
if (new_spte == iter->old_spte)
|
||||
ret = RET_PF_SPURIOUS;
|
||||
else if (!tdp_mmu_map_set_spte_atomic(vcpu, iter, new_spte))
|
||||
else if (!tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte))
|
||||
return RET_PF_RETRY;
|
||||
|
||||
/*
|
||||
@ -956,10 +920,9 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
|
||||
* protected, emulation is needed. If the emulation was skipped,
|
||||
* the vCPU would have the same fault again.
|
||||
*/
|
||||
if (make_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
|
||||
if (write)
|
||||
if (wrprot) {
|
||||
if (fault->write)
|
||||
ret = RET_PF_EMULATE;
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
|
||||
}
|
||||
|
||||
/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
|
||||
@ -986,37 +949,26 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
|
||||
* Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing
|
||||
* page tables and SPTEs to translate the faulting guest physical address.
|
||||
*/
|
||||
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
int map_writable, int max_level, kvm_pfn_t pfn,
|
||||
bool prefault)
|
||||
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
|
||||
bool write = error_code & PFERR_WRITE_MASK;
|
||||
bool exec = error_code & PFERR_FETCH_MASK;
|
||||
bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
|
||||
struct kvm_mmu *mmu = vcpu->arch.mmu;
|
||||
struct tdp_iter iter;
|
||||
struct kvm_mmu_page *sp;
|
||||
u64 *child_pt;
|
||||
u64 new_spte;
|
||||
int ret;
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
int level;
|
||||
int req_level;
|
||||
|
||||
level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn,
|
||||
huge_page_disallowed, &req_level);
|
||||
kvm_mmu_hugepage_adjust(vcpu, fault);
|
||||
|
||||
trace_kvm_mmu_spte_requested(gpa, level, pfn);
|
||||
trace_kvm_mmu_spte_requested(fault);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
|
||||
if (nx_huge_page_workaround_enabled)
|
||||
disallowed_hugepage_adjust(iter.old_spte, gfn,
|
||||
iter.level, &pfn, &level);
|
||||
tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) {
|
||||
if (fault->nx_huge_page_workaround_enabled)
|
||||
disallowed_hugepage_adjust(fault, iter.old_spte, iter.level);
|
||||
|
||||
if (iter.level == level)
|
||||
if (iter.level == fault->goal_level)
|
||||
break;
|
||||
|
||||
/*
|
||||
@ -1052,10 +1004,10 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
new_spte = make_nonleaf_spte(child_pt,
|
||||
!shadow_accessed_mask);
|
||||
|
||||
if (tdp_mmu_set_spte_atomic_no_dirty_log(vcpu->kvm, &iter, new_spte)) {
|
||||
if (tdp_mmu_set_spte_atomic(vcpu->kvm, &iter, new_spte)) {
|
||||
tdp_mmu_link_page(vcpu->kvm, sp,
|
||||
huge_page_disallowed &&
|
||||
req_level >= iter.level);
|
||||
fault->huge_page_disallowed &&
|
||||
fault->req_level >= iter.level);
|
||||
|
||||
trace_kvm_mmu_get_page(sp, true);
|
||||
} else {
|
||||
@ -1065,13 +1017,12 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
}
|
||||
}
|
||||
|
||||
if (iter.level != level) {
|
||||
if (iter.level != fault->goal_level) {
|
||||
rcu_read_unlock();
|
||||
return RET_PF_RETRY;
|
||||
}
|
||||
|
||||
ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter,
|
||||
pfn, prefault);
|
||||
ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
@ -1241,8 +1192,7 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
|
||||
new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
|
||||
|
||||
if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter,
|
||||
new_spte)) {
|
||||
if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
|
||||
/*
|
||||
* The iter must explicitly re-read the SPTE because
|
||||
* the atomic cmpxchg failed.
|
||||
@ -1310,8 +1260,7 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter,
|
||||
new_spte)) {
|
||||
if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
|
||||
/*
|
||||
* The iter must explicitly re-read the SPTE because
|
||||
* the atomic cmpxchg failed.
|
||||
|
@ -48,9 +48,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm);
|
||||
void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm);
|
||||
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm);
|
||||
|
||||
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
int map_writable, int max_level, kvm_pfn_t pfn,
|
||||
bool prefault);
|
||||
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
|
||||
|
||||
bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
|
||||
bool flush);
|
||||
@ -92,7 +90,6 @@ u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
|
||||
#ifdef CONFIG_X86_64
|
||||
bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
|
||||
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
|
||||
static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
|
||||
static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
|
||||
|
||||
static inline bool is_tdp_mmu(struct kvm_mmu *mmu)
|
||||
@ -114,7 +111,6 @@ static inline bool is_tdp_mmu(struct kvm_mmu *mmu)
|
||||
#else
|
||||
static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; }
|
||||
static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
|
||||
static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
|
||||
static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
|
||||
static inline bool is_tdp_mmu(struct kvm_mmu *mmu) { return false; }
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user