mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
Merge v6.12-rc6 into usb-next
We need the USB fixes in here as well, and this resolves a merge conflict in: drivers/usb/typec/tcpm/tcpm.c Reported-by: Stephen Rothwell <sfr@canb.auug.org.au> Link: https://lore.kernel.org/r/20241101150730.090dc30f@canb.auug.org.au Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
commit
85c4efbe60
10
.mailmap
10
.mailmap
@ -199,7 +199,8 @@ Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
|
||||
Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
|
||||
Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
|
||||
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
|
||||
Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
|
||||
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@microchip.com>
|
||||
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@collabora.com>
|
||||
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
|
||||
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
|
||||
Faith Ekstrand <faith.ekstrand@collabora.com> <jason@jlekstrand.net>
|
||||
@ -282,7 +283,7 @@ Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
|
||||
Jan Kuliga <jtkuliga.kdev@gmail.com> <jankul@alatek.krakow.pl>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@tuni.fi>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@parity.io>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
|
||||
@ -306,6 +307,11 @@ Jens Axboe <axboe@kernel.dk> <axboe@fb.com>
|
||||
Jens Axboe <axboe@kernel.dk> <axboe@meta.com>
|
||||
Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
|
||||
Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
|
||||
Jesper Dangaard Brouer <hawk@kernel.org> <brouer@redhat.com>
|
||||
Jesper Dangaard Brouer <hawk@kernel.org> <hawk@comx.dk>
|
||||
Jesper Dangaard Brouer <hawk@kernel.org> <jbrouer@redhat.com>
|
||||
Jesper Dangaard Brouer <hawk@kernel.org> <jdb@comx.dk>
|
||||
Jesper Dangaard Brouer <hawk@kernel.org> <netoptimizer@brouer.com>
|
||||
Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org>
|
||||
Jilai Wang <quic_jilaiw@quicinc.com> <jilaiw@codeaurora.org>
|
||||
Jiri Kosina <jikos@kernel.org> <jikos@jikos.cz>
|
||||
|
@ -425,8 +425,8 @@ This governor exposes only one tunable:
|
||||
|
||||
``rate_limit_us``
|
||||
Minimum time (in microseconds) that has to pass between two consecutive
|
||||
runs of governor computations (default: 1000 times the scaling driver's
|
||||
transition latency).
|
||||
runs of governor computations (default: 1.5 times the scaling driver's
|
||||
transition latency or the maximum 2ms).
|
||||
|
||||
The purpose of this tunable is to reduce the scheduler context overhead
|
||||
of the governor which might be excessive without it.
|
||||
@ -474,17 +474,17 @@ This governor exposes the following tunables:
|
||||
This is how often the governor's worker routine should run, in
|
||||
microseconds.
|
||||
|
||||
Typically, it is set to values of the order of 10000 (10 ms). Its
|
||||
default value is equal to the value of ``cpuinfo_transition_latency``
|
||||
for each policy this governor is attached to (but since the unit here
|
||||
is greater by 1000, this means that the time represented by
|
||||
``sampling_rate`` is 1000 times greater than the transition latency by
|
||||
default).
|
||||
Typically, it is set to values of the order of 2000 (2 ms). Its
|
||||
default value is to add a 50% breathing room
|
||||
to ``cpuinfo_transition_latency`` on each policy this governor is
|
||||
attached to. The minimum is typically the length of two scheduler
|
||||
ticks.
|
||||
|
||||
If this tunable is per-policy, the following shell command sets the time
|
||||
represented by it to be 750 times as high as the transition latency::
|
||||
represented by it to be 1.5 times as high as the transition latency
|
||||
(the default)::
|
||||
|
||||
# echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) > ondemand/sampling_rate
|
||||
# echo `$(($(cat cpuinfo_transition_latency) * 3 / 2)) > ondemand/sampling_rate
|
||||
|
||||
``up_threshold``
|
||||
If the estimated CPU load is above this value (in percent), the governor
|
||||
|
@ -63,6 +63,16 @@ properties:
|
||||
- const: sleep
|
||||
|
||||
power-domains:
|
||||
description: |
|
||||
The MediaTek DPI module is typically associated with one of the
|
||||
following multimedia power domains:
|
||||
POWER_DOMAIN_DISPLAY
|
||||
POWER_DOMAIN_VDOSYS
|
||||
POWER_DOMAIN_MM
|
||||
The specific power domain used varies depending on the SoC design.
|
||||
|
||||
It is recommended to explicitly add the appropriate power domain
|
||||
property to the DPI node in the device tree.
|
||||
maxItems: 1
|
||||
|
||||
port:
|
||||
@ -79,20 +89,6 @@ required:
|
||||
- clock-names
|
||||
- port
|
||||
|
||||
allOf:
|
||||
- if:
|
||||
not:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- mediatek,mt6795-dpi
|
||||
- mediatek,mt8173-dpi
|
||||
- mediatek,mt8186-dpi
|
||||
then:
|
||||
properties:
|
||||
power-domains: false
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
|
@ -38,6 +38,7 @@ properties:
|
||||
description: A phandle and PM domain specifier as defined by bindings of
|
||||
the power controller specified by phandle. See
|
||||
Documentation/devicetree/bindings/power/power-domain.yaml for details.
|
||||
maxItems: 1
|
||||
|
||||
mediatek,gce-client-reg:
|
||||
description:
|
||||
@ -57,6 +58,9 @@ properties:
|
||||
clocks:
|
||||
items:
|
||||
- description: SPLIT Clock
|
||||
- description: Used for interfacing with the HDMI RX signal source.
|
||||
- description: Paired with receiving HDMI RX metadata.
|
||||
minItems: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
@ -72,9 +76,24 @@ allOf:
|
||||
const: mediatek,mt8195-mdp3-split
|
||||
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
minItems: 3
|
||||
|
||||
required:
|
||||
- mediatek,gce-client-reg
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: mediatek,mt8173-disp-split
|
||||
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
maxItems: 1
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
|
@ -67,6 +67,10 @@ properties:
|
||||
A 2.5V to 3.3V supply for the external reference voltage. When omitted,
|
||||
the internal 2.5V reference is used.
|
||||
|
||||
refin-supply:
|
||||
description:
|
||||
A 2.5V to 3.3V supply for external reference voltage, for ad7380-4 only.
|
||||
|
||||
aina-supply:
|
||||
description:
|
||||
The common mode voltage supply for the AINA- pin on pseudo-differential
|
||||
@ -135,6 +139,23 @@ allOf:
|
||||
ainc-supply: false
|
||||
aind-supply: false
|
||||
|
||||
# ad7380-4 uses refin-supply as external reference.
|
||||
# All other chips from ad738x family use refio as optional external reference.
|
||||
# When refio-supply is omitted, internal reference is used.
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- adi,ad7380-4
|
||||
then:
|
||||
properties:
|
||||
refio-supply: false
|
||||
required:
|
||||
- refin-supply
|
||||
else:
|
||||
properties:
|
||||
refin-supply: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/irq.h>
|
||||
|
@ -154,8 +154,6 @@ allOf:
|
||||
- qcom,sm8550-qmp-gen4x2-pcie-phy
|
||||
- qcom,sm8650-qmp-gen3x2-pcie-phy
|
||||
- qcom,sm8650-qmp-gen4x2-pcie-phy
|
||||
- qcom,x1e80100-qmp-gen3x2-pcie-phy
|
||||
- qcom,x1e80100-qmp-gen4x2-pcie-phy
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
@ -171,6 +169,8 @@ allOf:
|
||||
- qcom,sc8280xp-qmp-gen3x1-pcie-phy
|
||||
- qcom,sc8280xp-qmp-gen3x2-pcie-phy
|
||||
- qcom,sc8280xp-qmp-gen3x4-pcie-phy
|
||||
- qcom,x1e80100-qmp-gen3x2-pcie-phy
|
||||
- qcom,x1e80100-qmp-gen4x2-pcie-phy
|
||||
- qcom,x1e80100-qmp-gen4x4-pcie-phy
|
||||
then:
|
||||
properties:
|
||||
@ -201,6 +201,7 @@ allOf:
|
||||
- qcom,sm8550-qmp-gen4x2-pcie-phy
|
||||
- qcom,sm8650-qmp-gen4x2-pcie-phy
|
||||
- qcom,x1e80100-qmp-gen4x2-pcie-phy
|
||||
- qcom,x1e80100-qmp-gen4x4-pcie-phy
|
||||
then:
|
||||
properties:
|
||||
resets:
|
||||
|
@ -102,21 +102,21 @@ properties:
|
||||
default: 2
|
||||
|
||||
interrupts:
|
||||
oneOf:
|
||||
- minItems: 1
|
||||
items:
|
||||
- description: TX interrupt
|
||||
- description: RX interrupt
|
||||
- items:
|
||||
- description: common/combined interrupt
|
||||
minItems: 1
|
||||
maxItems: 2
|
||||
|
||||
interrupt-names:
|
||||
oneOf:
|
||||
- minItems: 1
|
||||
- description: TX interrupt
|
||||
const: tx
|
||||
- description: RX interrupt
|
||||
const: rx
|
||||
- description: TX and RX interrupts
|
||||
items:
|
||||
- const: tx
|
||||
- const: rx
|
||||
- const: common
|
||||
- description: Common/combined interrupt
|
||||
const: common
|
||||
|
||||
fck_parent:
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
|
@ -48,6 +48,10 @@ properties:
|
||||
- const: mclk_rx
|
||||
- const: hclk
|
||||
|
||||
port:
|
||||
$ref: audio-graph-port.yaml#
|
||||
unevaluatedProperties: false
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
|
@ -115,7 +115,7 @@ set up cache ready for use. The following script commands are available:
|
||||
|
||||
This mask can also be set through sysfs, eg::
|
||||
|
||||
echo 5 >/sys/modules/cachefiles/parameters/debug
|
||||
echo 5 > /sys/module/cachefiles/parameters/debug
|
||||
|
||||
|
||||
Starting the Cache
|
||||
|
@ -592,4 +592,3 @@ API Function Reference
|
||||
|
||||
.. kernel-doc:: include/linux/netfs.h
|
||||
.. kernel-doc:: fs/netfs/buffered_read.c
|
||||
.. kernel-doc:: fs/netfs/io.c
|
||||
|
@ -41,13 +41,22 @@ supports only 1 SDO line.
|
||||
Reference voltage
|
||||
-----------------
|
||||
|
||||
2 possible reference voltage sources are supported:
|
||||
ad7380-4
|
||||
~~~~~~~~
|
||||
|
||||
ad7380-4 supports only an external reference voltage (2.5V to 3.3V). It must be
|
||||
declared in the device tree as ``refin-supply``.
|
||||
|
||||
All other devices from ad738x family
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
All other devices from ad738x support 2 possible reference voltage sources:
|
||||
|
||||
- Internal reference (2.5V)
|
||||
- External reference (2.5V to 3.3V)
|
||||
|
||||
The source is determined by the device tree. If ``refio-supply`` is present,
|
||||
then the external reference is used, else the internal reference is used.
|
||||
then it is used as external reference, else the internal reference is used.
|
||||
|
||||
Oversampling and resolution boost
|
||||
---------------------------------
|
||||
|
@ -16,7 +16,7 @@ ii) transmit network traffic, or any other that needs raw
|
||||
|
||||
Howto can be found at:
|
||||
|
||||
https://sites.google.com/site/packetmmap/
|
||||
https://web.archive.org/web/20220404160947/https://sites.google.com/site/packetmmap/
|
||||
|
||||
Please send your comments to
|
||||
- Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
|
||||
@ -166,7 +166,8 @@ As capture, each frame contains two parts::
|
||||
/* bind socket to eth0 */
|
||||
bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
|
||||
|
||||
A complete tutorial is available at: https://sites.google.com/site/packetmmap/
|
||||
A complete tutorial is available at:
|
||||
https://web.archive.org/web/20220404160947/https://sites.google.com/site/packetmmap/
|
||||
|
||||
By default, the user should put data at::
|
||||
|
||||
|
@ -17,7 +17,7 @@ Architecture Level of support Constraints
|
||||
============= ================ ==============================================
|
||||
``arm64`` Maintained Little Endian only.
|
||||
``loongarch`` Maintained \-
|
||||
``riscv`` Maintained ``riscv64`` only.
|
||||
``riscv`` Maintained ``riscv64`` and LLVM/Clang only.
|
||||
``um`` Maintained \-
|
||||
``x86`` Maintained ``x86_64`` only.
|
||||
============= ================ ==============================================
|
||||
|
@ -23,177 +23,166 @@ applications can additionally seal security critical data at runtime.
|
||||
A similar feature already exists in the XNU kernel with the
|
||||
VM_FLAGS_PERMANENT flag [1] and on OpenBSD with the mimmutable syscall [2].
|
||||
|
||||
User API
|
||||
========
|
||||
mseal()
|
||||
-----------
|
||||
The mseal() syscall has the following signature:
|
||||
SYSCALL
|
||||
=======
|
||||
mseal syscall signature
|
||||
-----------------------
|
||||
``int mseal(void \* addr, size_t len, unsigned long flags)``
|
||||
|
||||
``int mseal(void addr, size_t len, unsigned long flags)``
|
||||
**addr**/**len**: virtual memory address range.
|
||||
The address range set by **addr**/**len** must meet:
|
||||
- The start address must be in an allocated VMA.
|
||||
- The start address must be page aligned.
|
||||
- The end address (**addr** + **len**) must be in an allocated VMA.
|
||||
- no gap (unallocated memory) between start and end address.
|
||||
|
||||
**addr/len**: virtual memory address range.
|
||||
The ``len`` will be paged aligned implicitly by the kernel.
|
||||
|
||||
The address range set by ``addr``/``len`` must meet:
|
||||
- The start address must be in an allocated VMA.
|
||||
- The start address must be page aligned.
|
||||
- The end address (``addr`` + ``len``) must be in an allocated VMA.
|
||||
- no gap (unallocated memory) between start and end address.
|
||||
**flags**: reserved for future use.
|
||||
|
||||
The ``len`` will be paged aligned implicitly by the kernel.
|
||||
**Return values**:
|
||||
- **0**: Success.
|
||||
- **-EINVAL**:
|
||||
* Invalid input ``flags``.
|
||||
* The start address (``addr``) is not page aligned.
|
||||
* Address range (``addr`` + ``len``) overflow.
|
||||
- **-ENOMEM**:
|
||||
* The start address (``addr``) is not allocated.
|
||||
* The end address (``addr`` + ``len``) is not allocated.
|
||||
* A gap (unallocated memory) between start and end address.
|
||||
- **-EPERM**:
|
||||
* sealing is supported only on 64-bit CPUs, 32-bit is not supported.
|
||||
|
||||
**flags**: reserved for future use.
|
||||
**Note about error return**:
|
||||
- For above error cases, users can expect the given memory range is
|
||||
unmodified, i.e. no partial update.
|
||||
- There might be other internal errors/cases not listed here, e.g.
|
||||
error during merging/splitting VMAs, or the process reaching the maximum
|
||||
number of supported VMAs. In those cases, partial updates to the given
|
||||
memory range could happen. However, those cases should be rare.
|
||||
|
||||
**return values**:
|
||||
**Architecture support**:
|
||||
mseal only works on 64-bit CPUs, not 32-bit CPUs.
|
||||
|
||||
- ``0``: Success.
|
||||
**Idempotent**:
|
||||
users can call mseal multiple times. mseal on an already sealed memory
|
||||
is a no-action (not error).
|
||||
|
||||
- ``-EINVAL``:
|
||||
- Invalid input ``flags``.
|
||||
- The start address (``addr``) is not page aligned.
|
||||
- Address range (``addr`` + ``len``) overflow.
|
||||
**no munseal**
|
||||
Once mapping is sealed, it can't be unsealed. The kernel should never
|
||||
have munseal, this is consistent with other sealing feature, e.g.
|
||||
F_SEAL_SEAL for file.
|
||||
|
||||
- ``-ENOMEM``:
|
||||
- The start address (``addr``) is not allocated.
|
||||
- The end address (``addr`` + ``len``) is not allocated.
|
||||
- A gap (unallocated memory) between start and end address.
|
||||
Blocked mm syscall for sealed mapping
|
||||
-------------------------------------
|
||||
It might be important to note: **once the mapping is sealed, it will
|
||||
stay in the process's memory until the process terminates**.
|
||||
|
||||
- ``-EPERM``:
|
||||
- sealing is supported only on 64-bit CPUs, 32-bit is not supported.
|
||||
Example::
|
||||
|
||||
- For above error cases, users can expect the given memory range is
|
||||
unmodified, i.e. no partial update.
|
||||
*ptr = mmap(0, 4096, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
|
||||
rc = mseal(ptr, 4096, 0);
|
||||
/* munmap will fail */
|
||||
rc = munmap(ptr, 4096);
|
||||
assert(rc < 0);
|
||||
|
||||
- There might be other internal errors/cases not listed here, e.g.
|
||||
error during merging/splitting VMAs, or the process reaching the max
|
||||
number of supported VMAs. In those cases, partial updates to the given
|
||||
memory range could happen. However, those cases should be rare.
|
||||
Blocked mm syscall:
|
||||
- munmap
|
||||
- mmap
|
||||
- mremap
|
||||
- mprotect and pkey_mprotect
|
||||
- some destructive madvise behaviors: MADV_DONTNEED, MADV_FREE,
|
||||
MADV_DONTNEED_LOCKED, MADV_FREE, MADV_DONTFORK, MADV_WIPEONFORK
|
||||
|
||||
**Blocked operations after sealing**:
|
||||
Unmapping, moving to another location, and shrinking the size,
|
||||
via munmap() and mremap(), can leave an empty space, therefore
|
||||
can be replaced with a VMA with a new set of attributes.
|
||||
The first set of syscalls to block is munmap, mremap, mmap. They can
|
||||
either leave an empty space in the address space, therefore allowing
|
||||
replacement with a new mapping with new set of attributes, or can
|
||||
overwrite the existing mapping with another mapping.
|
||||
|
||||
Moving or expanding a different VMA into the current location,
|
||||
via mremap().
|
||||
mprotect and pkey_mprotect are blocked because they changes the
|
||||
protection bits (RWX) of the mapping.
|
||||
|
||||
Modifying a VMA via mmap(MAP_FIXED).
|
||||
Certain destructive madvise behaviors, specifically MADV_DONTNEED,
|
||||
MADV_FREE, MADV_DONTNEED_LOCKED, and MADV_WIPEONFORK, can introduce
|
||||
risks when applied to anonymous memory by threads lacking write
|
||||
permissions. Consequently, these operations are prohibited under such
|
||||
conditions. The aforementioned behaviors have the potential to modify
|
||||
region contents by discarding pages, effectively performing a memset(0)
|
||||
operation on the anonymous memory.
|
||||
|
||||
Size expansion, via mremap(), does not appear to pose any
|
||||
specific risks to sealed VMAs. It is included anyway because
|
||||
the use case is unclear. In any case, users can rely on
|
||||
merging to expand a sealed VMA.
|
||||
Kernel will return -EPERM for blocked syscalls.
|
||||
|
||||
mprotect() and pkey_mprotect().
|
||||
When blocked syscall return -EPERM due to sealing, the memory regions may
|
||||
or may not be changed, depends on the syscall being blocked:
|
||||
|
||||
Some destructive madvice() behaviors (e.g. MADV_DONTNEED)
|
||||
for anonymous memory, when users don't have write permission to the
|
||||
memory. Those behaviors can alter region contents by discarding pages,
|
||||
effectively a memset(0) for anonymous memory.
|
||||
- munmap: munmap is atomic. If one of VMAs in the given range is
|
||||
sealed, none of VMAs are updated.
|
||||
- mprotect, pkey_mprotect, madvise: partial update might happen, e.g.
|
||||
when mprotect over multiple VMAs, mprotect might update the beginning
|
||||
VMAs before reaching the sealed VMA and return -EPERM.
|
||||
- mmap and mremap: undefined behavior.
|
||||
|
||||
Kernel will return -EPERM for blocked operations.
|
||||
|
||||
For blocked operations, one can expect the given address is unmodified,
|
||||
i.e. no partial update. Note, this is different from existing mm
|
||||
system call behaviors, where partial updates are made till an error is
|
||||
found and returned to userspace. To give an example:
|
||||
|
||||
Assume following code sequence:
|
||||
|
||||
- ptr = mmap(null, 8192, PROT_NONE);
|
||||
- munmap(ptr + 4096, 4096);
|
||||
- ret1 = mprotect(ptr, 8192, PROT_READ);
|
||||
- mseal(ptr, 4096);
|
||||
- ret2 = mprotect(ptr, 8192, PROT_NONE);
|
||||
|
||||
ret1 will be -ENOMEM, the page from ptr is updated to PROT_READ.
|
||||
|
||||
ret2 will be -EPERM, the page remains to be PROT_READ.
|
||||
|
||||
**Note**:
|
||||
|
||||
- mseal() only works on 64-bit CPUs, not 32-bit CPU.
|
||||
|
||||
- users can call mseal() multiple times, mseal() on an already sealed memory
|
||||
is a no-action (not error).
|
||||
|
||||
- munseal() is not supported.
|
||||
|
||||
Use cases:
|
||||
==========
|
||||
Use cases
|
||||
=========
|
||||
- glibc:
|
||||
The dynamic linker, during loading ELF executables, can apply sealing to
|
||||
non-writable memory segments.
|
||||
mapping segments.
|
||||
|
||||
- Chrome browser: protect some security sensitive data-structures.
|
||||
- Chrome browser: protect some security sensitive data structures.
|
||||
|
||||
Notes on which memory to seal:
|
||||
==============================
|
||||
|
||||
It might be important to note that sealing changes the lifetime of a mapping,
|
||||
i.e. the sealed mapping won’t be unmapped till the process terminates or the
|
||||
exec system call is invoked. Applications can apply sealing to any virtual
|
||||
memory region from userspace, but it is crucial to thoroughly analyze the
|
||||
mapping's lifetime prior to apply the sealing.
|
||||
When not to use mseal
|
||||
=====================
|
||||
Applications can apply sealing to any virtual memory region from userspace,
|
||||
but it is *crucial to thoroughly analyze the mapping's lifetime* prior to
|
||||
apply the sealing. This is because the sealed mapping *won’t be unmapped*
|
||||
until the process terminates or the exec system call is invoked.
|
||||
|
||||
For example:
|
||||
- aio/shm
|
||||
aio/shm can call mmap and munmap on behalf of userspace, e.g.
|
||||
ksys_shmdt() in shm.c. The lifetimes of those mapping are not tied to
|
||||
the lifetime of the process. If those memories are sealed from userspace,
|
||||
then munmap will fail, causing leaks in VMA address space during the
|
||||
lifetime of the process.
|
||||
|
||||
- aio/shm
|
||||
- ptr allocated by malloc (heap)
|
||||
Don't use mseal on the memory ptr return from malloc().
|
||||
malloc() is implemented by allocator, e.g. by glibc. Heap manager might
|
||||
allocate a ptr from brk or mapping created by mmap.
|
||||
If an app calls mseal on a ptr returned from malloc(), this can affect
|
||||
the heap manager's ability to manage the mappings; the outcome is
|
||||
non-deterministic.
|
||||
|
||||
aio/shm can call mmap()/munmap() on behalf of userspace, e.g. ksys_shmdt() in
|
||||
shm.c. The lifetime of those mapping are not tied to the lifetime of the
|
||||
process. If those memories are sealed from userspace, then munmap() will fail,
|
||||
causing leaks in VMA address space during the lifetime of the process.
|
||||
Example::
|
||||
|
||||
- Brk (heap)
|
||||
ptr = malloc(size);
|
||||
/* don't call mseal on ptr return from malloc. */
|
||||
mseal(ptr, size);
|
||||
/* free will success, allocator can't shrink heap lower than ptr */
|
||||
free(ptr);
|
||||
|
||||
Currently, userspace applications can seal parts of the heap by calling
|
||||
malloc() and mseal().
|
||||
let's assume following calls from user space:
|
||||
mseal doesn't block
|
||||
===================
|
||||
In a nutshell, mseal blocks certain mm syscall from modifying some of VMA's
|
||||
attributes, such as protection bits (RWX). Sealed mappings doesn't mean the
|
||||
memory is immutable.
|
||||
|
||||
- ptr = malloc(size);
|
||||
- mprotect(ptr, size, RO);
|
||||
- mseal(ptr, size);
|
||||
- free(ptr);
|
||||
|
||||
Technically, before mseal() is added, the user can change the protection of
|
||||
the heap by calling mprotect(RO). As long as the user changes the protection
|
||||
back to RW before free(), the memory range can be reused.
|
||||
|
||||
Adding mseal() into the picture, however, the heap is then sealed partially,
|
||||
the user can still free it, but the memory remains to be RO. If the address
|
||||
is re-used by the heap manager for another malloc, the process might crash
|
||||
soon after. Therefore, it is important not to apply sealing to any memory
|
||||
that might get recycled.
|
||||
|
||||
Furthermore, even if the application never calls the free() for the ptr,
|
||||
the heap manager may invoke the brk system call to shrink the size of the
|
||||
heap. In the kernel, the brk-shrink will call munmap(). Consequently,
|
||||
depending on the location of the ptr, the outcome of brk-shrink is
|
||||
nondeterministic.
|
||||
|
||||
|
||||
Additional notes:
|
||||
=================
|
||||
As Jann Horn pointed out in [3], there are still a few ways to write
|
||||
to RO memory, which is, in a way, by design. Those cases are not covered
|
||||
by mseal(). If applications want to block such cases, sandbox tools (such as
|
||||
seccomp, LSM, etc) might be considered.
|
||||
to RO memory, which is, in a way, by design. And those could be blocked
|
||||
by different security measures.
|
||||
|
||||
Those cases are:
|
||||
|
||||
- Write to read-only memory through /proc/self/mem interface.
|
||||
- Write to read-only memory through ptrace (such as PTRACE_POKETEXT).
|
||||
- userfaultfd.
|
||||
- Write to read-only memory through /proc/self/mem interface (FOLL_FORCE).
|
||||
- Write to read-only memory through ptrace (such as PTRACE_POKETEXT).
|
||||
- userfaultfd.
|
||||
|
||||
The idea that inspired this patch comes from Stephen Röttger’s work in V8
|
||||
CFI [4]. Chrome browser in ChromeOS will be the first user of this API.
|
||||
|
||||
Reference:
|
||||
==========
|
||||
[1] https://github.com/apple-oss-distributions/xnu/blob/1031c584a5e37aff177559b9f69dbd3c8c3fd30a/osfmk/mach/vm_statistics.h#L274
|
||||
|
||||
[2] https://man.openbsd.org/mimmutable.2
|
||||
|
||||
[3] https://lore.kernel.org/lkml/CAG48ez3ShUYey+ZAFsU2i1RpQn0a5eOs2hzQ426FkcgnfUGLvA@mail.gmail.com
|
||||
|
||||
[4] https://docs.google.com/document/d/1O2jwK4dxI3nRcOJuPYkonhTkNQfbmwdvxQMyXgeaRHo/edit#heading=h.bvaojj9fu6hc
|
||||
Reference
|
||||
=========
|
||||
- [1] https://github.com/apple-oss-distributions/xnu/blob/1031c584a5e37aff177559b9f69dbd3c8c3fd30a/osfmk/mach/vm_statistics.h#L274
|
||||
- [2] https://man.openbsd.org/mimmutable.2
|
||||
- [3] https://lore.kernel.org/lkml/CAG48ez3ShUYey+ZAFsU2i1RpQn0a5eOs2hzQ426FkcgnfUGLvA@mail.gmail.com
|
||||
- [4] https://docs.google.com/document/d/1O2jwK4dxI3nRcOJuPYkonhTkNQfbmwdvxQMyXgeaRHo/edit#heading=h.bvaojj9fu6hc
|
||||
|
@ -8098,13 +8098,15 @@ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if
|
||||
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is
|
||||
disabled.
|
||||
|
||||
KVM_X86_QUIRK_SLOT_ZAP_ALL By default, KVM invalidates all SPTEs in
|
||||
fast way for memslot deletion when VM type
|
||||
is KVM_X86_DEFAULT_VM.
|
||||
When this quirk is disabled or when VM type
|
||||
is other than KVM_X86_DEFAULT_VM, KVM zaps
|
||||
only leaf SPTEs that are within the range of
|
||||
the memslot being deleted.
|
||||
KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM
|
||||
invalidates all SPTEs in all memslots and
|
||||
address spaces when a memslot is deleted or
|
||||
moved. When this quirk is disabled (or the
|
||||
VM type isn't KVM_X86_DEFAULT_VM), KVM only
|
||||
ensures the backing memory of the deleted
|
||||
or moved memslot isn't reachable, i.e KVM
|
||||
_may_ invalidate only SPTEs related to the
|
||||
memslot.
|
||||
=================================== ============================================
|
||||
|
||||
7.32 KVM_CAP_MAX_VCPU_ID
|
||||
|
@ -136,7 +136,7 @@ For direct sp, we can easily avoid it since the spte of direct sp is fixed
|
||||
to gfn. For indirect sp, we disabled fast page fault for simplicity.
|
||||
|
||||
A solution for indirect sp could be to pin the gfn, for example via
|
||||
kvm_vcpu_gfn_to_pfn_atomic, before the cmpxchg. After the pinning:
|
||||
gfn_to_pfn_memslot_atomic, before the cmpxchg. After the pinning:
|
||||
|
||||
- We have held the refcount of pfn; that means the pfn can not be freed and
|
||||
be reused for another gfn.
|
||||
|
62
MAINTAINERS
62
MAINTAINERS
@ -9723,6 +9723,7 @@ F: include/dt-bindings/gpio/
|
||||
F: include/linux/gpio.h
|
||||
F: include/linux/gpio/
|
||||
F: include/linux/of_gpio.h
|
||||
K: (devm_)?gpio_(request|free|direction|get|set)
|
||||
|
||||
GPIO UAPI
|
||||
M: Bartosz Golaszewski <brgl@bgdev.pl>
|
||||
@ -14140,6 +14141,15 @@ S: Maintained
|
||||
T: git git://linuxtv.org/media_tree.git
|
||||
F: drivers/media/platform/nxp/imx-pxp.[ch]
|
||||
|
||||
MEDIA DRIVERS FOR ASCOT2E
|
||||
M: Abylay Ospan <aospan@amazon.com>
|
||||
L: linux-media@vger.kernel.org
|
||||
S: Supported
|
||||
W: https://linuxtv.org
|
||||
W: http://netup.tv/
|
||||
T: git git://linuxtv.org/media_tree.git
|
||||
F: drivers/media/dvb-frontends/ascot2e*
|
||||
|
||||
MEDIA DRIVERS FOR CXD2099AR CI CONTROLLERS
|
||||
M: Jasmin Jessich <jasmin@anw.at>
|
||||
L: linux-media@vger.kernel.org
|
||||
@ -14148,6 +14158,15 @@ W: https://linuxtv.org
|
||||
T: git git://linuxtv.org/media_tree.git
|
||||
F: drivers/media/dvb-frontends/cxd2099*
|
||||
|
||||
MEDIA DRIVERS FOR CXD2841ER
|
||||
M: Abylay Ospan <aospan@amazon.com>
|
||||
L: linux-media@vger.kernel.org
|
||||
S: Supported
|
||||
W: https://linuxtv.org
|
||||
W: http://netup.tv/
|
||||
T: git git://linuxtv.org/media_tree.git
|
||||
F: drivers/media/dvb-frontends/cxd2841er*
|
||||
|
||||
MEDIA DRIVERS FOR CXD2880
|
||||
M: Yasunari Takiguchi <Yasunari.Takiguchi@sony.com>
|
||||
L: linux-media@vger.kernel.org
|
||||
@ -14192,6 +14211,33 @@ F: drivers/media/platform/nxp/imx-mipi-csis.c
|
||||
F: drivers/media/platform/nxp/imx7-media-csi.c
|
||||
F: drivers/media/platform/nxp/imx8mq-mipi-csi2.c
|
||||
|
||||
MEDIA DRIVERS FOR HELENE
|
||||
M: Abylay Ospan <aospan@amazon.com>
|
||||
L: linux-media@vger.kernel.org
|
||||
S: Supported
|
||||
W: https://linuxtv.org
|
||||
W: http://netup.tv/
|
||||
T: git git://linuxtv.org/media_tree.git
|
||||
F: drivers/media/dvb-frontends/helene*
|
||||
|
||||
MEDIA DRIVERS FOR HORUS3A
|
||||
M: Abylay Ospan <aospan@amazon.com>
|
||||
L: linux-media@vger.kernel.org
|
||||
S: Supported
|
||||
W: https://linuxtv.org
|
||||
W: http://netup.tv/
|
||||
T: git git://linuxtv.org/media_tree.git
|
||||
F: drivers/media/dvb-frontends/horus3a*
|
||||
|
||||
MEDIA DRIVERS FOR LNBH25
|
||||
M: Abylay Ospan <aospan@amazon.com>
|
||||
L: linux-media@vger.kernel.org
|
||||
S: Supported
|
||||
W: https://linuxtv.org
|
||||
W: http://netup.tv/
|
||||
T: git git://linuxtv.org/media_tree.git
|
||||
F: drivers/media/dvb-frontends/lnbh25*
|
||||
|
||||
MEDIA DRIVERS FOR MXL5XX TUNER DEMODULATORS
|
||||
L: linux-media@vger.kernel.org
|
||||
S: Orphan
|
||||
@ -14199,6 +14245,15 @@ W: https://linuxtv.org
|
||||
T: git git://linuxtv.org/media_tree.git
|
||||
F: drivers/media/dvb-frontends/mxl5xx*
|
||||
|
||||
MEDIA DRIVERS FOR NETUP PCI UNIVERSAL DVB devices
|
||||
M: Abylay Ospan <aospan@amazon.com>
|
||||
L: linux-media@vger.kernel.org
|
||||
S: Supported
|
||||
W: https://linuxtv.org
|
||||
W: http://netup.tv/
|
||||
T: git git://linuxtv.org/media_tree.git
|
||||
F: drivers/media/pci/netup_unidvb/*
|
||||
|
||||
MEDIA DRIVERS FOR NVIDIA TEGRA - VDE
|
||||
M: Dmitry Osipenko <digetx@gmail.com>
|
||||
L: linux-media@vger.kernel.org
|
||||
@ -14986,6 +15041,7 @@ F: drivers/spi/spi-at91-usart.c
|
||||
|
||||
MICROCHIP AUDIO ASOC DRIVERS
|
||||
M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
|
||||
M: Andrei Simion <andrei.simion@microchip.com>
|
||||
L: linux-sound@vger.kernel.org
|
||||
S: Supported
|
||||
F: Documentation/devicetree/bindings/sound/atmel*
|
||||
@ -15094,6 +15150,7 @@ F: include/video/atmel_lcdc.h
|
||||
|
||||
MICROCHIP MCP16502 PMIC DRIVER
|
||||
M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
|
||||
M: Andrei Simion <andrei.simion@microchip.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Supported
|
||||
F: Documentation/devicetree/bindings/regulator/microchip,mcp16502.yaml
|
||||
@ -15224,6 +15281,7 @@ F: drivers/spi/spi-atmel.*
|
||||
|
||||
MICROCHIP SSC DRIVER
|
||||
M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
|
||||
M: Andrei Simion <andrei.simion@microchip.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Supported
|
||||
F: Documentation/devicetree/bindings/misc/atmel-ssc.txt
|
||||
@ -16042,6 +16100,7 @@ M: "David S. Miller" <davem@davemloft.net>
|
||||
M: Eric Dumazet <edumazet@google.com>
|
||||
M: Jakub Kicinski <kuba@kernel.org>
|
||||
M: Paolo Abeni <pabeni@redhat.com>
|
||||
R: Simon Horman <horms@kernel.org>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
P: Documentation/process/maintainer-netdev.rst
|
||||
@ -16084,6 +16143,7 @@ F: include/uapi/linux/rtnetlink.h
|
||||
F: lib/net_utils.c
|
||||
F: lib/random32.c
|
||||
F: net/
|
||||
F: samples/pktgen/
|
||||
F: tools/net/
|
||||
F: tools/testing/selftests/net/
|
||||
X: Documentation/networking/mac80211-injection.rst
|
||||
@ -23143,7 +23203,7 @@ F: Documentation/devicetree/bindings/iio/adc/ti,lmp92064.yaml
|
||||
F: drivers/iio/adc/ti-lmp92064.c
|
||||
|
||||
TI PCM3060 ASoC CODEC DRIVER
|
||||
M: Kirill Marinushkin <kmarinushkin@birdec.com>
|
||||
M: Kirill Marinushkin <k.marinushkin@gmail.com>
|
||||
L: linux-sound@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/devicetree/bindings/sound/pcm3060.txt
|
||||
|
2
Makefile
2
Makefile
@ -2,7 +2,7 @@
|
||||
VERSION = 6
|
||||
PATCHLEVEL = 12
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc4
|
||||
EXTRAVERSION = -rc6
|
||||
NAME = Baby Opossum Posse
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -855,14 +855,14 @@ config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG
|
||||
def_bool y
|
||||
depends on $(cc-option,-fsanitize=kcfi -fsanitize-cfi-icall-experimental-normalize-integers)
|
||||
# With GCOV/KASAN we need this fix: https://github.com/llvm/llvm-project/pull/104826
|
||||
depends on CLANG_VERSION >= 190000 || (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS)
|
||||
depends on CLANG_VERSION >= 190103 || (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS)
|
||||
|
||||
config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC
|
||||
def_bool y
|
||||
depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG
|
||||
depends on RUSTC_VERSION >= 107900
|
||||
# With GCOV/KASAN we need this fix: https://github.com/rust-lang/rust/pull/129373
|
||||
depends on (RUSTC_LLVM_VERSION >= 190000 && RUSTC_VERSION >= 108200) || \
|
||||
depends on (RUSTC_LLVM_VERSION >= 190103 && RUSTC_VERSION >= 108200) || \
|
||||
(!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS)
|
||||
|
||||
config CFI_PERMISSIVE
|
||||
|
@ -178,6 +178,7 @@ struct kvm_nvhe_init_params {
|
||||
unsigned long hcr_el2;
|
||||
unsigned long vttbr;
|
||||
unsigned long vtcr;
|
||||
unsigned long tmp;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -51,6 +51,7 @@
|
||||
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
|
||||
#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
|
||||
#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
|
||||
#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
|
||||
|
||||
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
||||
KVM_DIRTY_LOG_INITIALLY_SET)
|
||||
@ -211,6 +212,12 @@ struct kvm_s2_mmu {
|
||||
*/
|
||||
bool nested_stage2_enabled;
|
||||
|
||||
/*
|
||||
* true when this MMU needs to be unmapped before being used for a new
|
||||
* purpose.
|
||||
*/
|
||||
bool pending_unmap;
|
||||
|
||||
/*
|
||||
* 0: Nobody is currently using this, check vttbr for validity
|
||||
* >0: Somebody is actively using this.
|
||||
|
@ -166,7 +166,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
|
||||
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
|
||||
void __init free_hyp_pgds(void);
|
||||
|
||||
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size);
|
||||
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
|
||||
u64 size, bool may_block);
|
||||
void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
|
||||
void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
|
||||
|
||||
|
@ -78,6 +78,8 @@ extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
|
||||
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
|
||||
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
|
||||
|
||||
struct kvm_s2_trans {
|
||||
phys_addr_t output;
|
||||
unsigned long block_size;
|
||||
@ -124,7 +126,7 @@ extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s2_trans *trans);
|
||||
extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
|
||||
extern void kvm_nested_s2_wp(struct kvm *kvm);
|
||||
extern void kvm_nested_s2_unmap(struct kvm *kvm);
|
||||
extern void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block);
|
||||
extern void kvm_nested_s2_flush(struct kvm *kvm);
|
||||
|
||||
unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val);
|
||||
|
@ -146,6 +146,7 @@ int main(void)
|
||||
DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2));
|
||||
DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr));
|
||||
DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr));
|
||||
DEFINE(NVHE_INIT_TMP, offsetof(struct kvm_nvhe_init_params, tmp));
|
||||
#endif
|
||||
#ifdef CONFIG_CPU_PM
|
||||
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/rseq.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/pkeys.h>
|
||||
|
||||
#include <asm/daifflags.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
@ -66,10 +67,63 @@ struct rt_sigframe_user_layout {
|
||||
unsigned long end_offset;
|
||||
};
|
||||
|
||||
/*
|
||||
* Holds any EL0-controlled state that influences unprivileged memory accesses.
|
||||
* This includes both accesses done in userspace and uaccess done in the kernel.
|
||||
*
|
||||
* This state needs to be carefully managed to ensure that it doesn't cause
|
||||
* uaccess to fail when setting up the signal frame, and the signal handler
|
||||
* itself also expects a well-defined state when entered.
|
||||
*/
|
||||
struct user_access_state {
|
||||
u64 por_el0;
|
||||
};
|
||||
|
||||
#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16)
|
||||
#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16)
|
||||
#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16)
|
||||
|
||||
/*
|
||||
* Save the user access state into ua_state and reset it to disable any
|
||||
* restrictions.
|
||||
*/
|
||||
static void save_reset_user_access_state(struct user_access_state *ua_state)
|
||||
{
|
||||
if (system_supports_poe()) {
|
||||
u64 por_enable_all = 0;
|
||||
|
||||
for (int pkey = 0; pkey < arch_max_pkey(); pkey++)
|
||||
por_enable_all |= POE_RXW << (pkey * POR_BITS_PER_PKEY);
|
||||
|
||||
ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0);
|
||||
write_sysreg_s(por_enable_all, SYS_POR_EL0);
|
||||
/* Ensure that any subsequent uaccess observes the updated value */
|
||||
isb();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the user access state for invoking the signal handler.
|
||||
*
|
||||
* No uaccess should be done after that function is called.
|
||||
*/
|
||||
static void set_handler_user_access_state(void)
|
||||
{
|
||||
if (system_supports_poe())
|
||||
write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore the user access state to the values saved in ua_state.
|
||||
*
|
||||
* No uaccess should be done after that function is called.
|
||||
*/
|
||||
static void restore_user_access_state(const struct user_access_state *ua_state)
|
||||
{
|
||||
if (system_supports_poe())
|
||||
write_sysreg_s(ua_state->por_el0, SYS_POR_EL0);
|
||||
}
|
||||
|
||||
static void init_user_layout(struct rt_sigframe_user_layout *user)
|
||||
{
|
||||
const size_t reserved_size =
|
||||
@ -261,18 +315,20 @@ static int restore_fpmr_context(struct user_ctxs *user)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int preserve_poe_context(struct poe_context __user *ctx)
|
||||
static int preserve_poe_context(struct poe_context __user *ctx,
|
||||
const struct user_access_state *ua_state)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
__put_user_error(POE_MAGIC, &ctx->head.magic, err);
|
||||
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
|
||||
__put_user_error(read_sysreg_s(SYS_POR_EL0), &ctx->por_el0, err);
|
||||
__put_user_error(ua_state->por_el0, &ctx->por_el0, err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int restore_poe_context(struct user_ctxs *user)
|
||||
static int restore_poe_context(struct user_ctxs *user,
|
||||
struct user_access_state *ua_state)
|
||||
{
|
||||
u64 por_el0;
|
||||
int err = 0;
|
||||
@ -282,7 +338,7 @@ static int restore_poe_context(struct user_ctxs *user)
|
||||
|
||||
__get_user_error(por_el0, &(user->poe->por_el0), err);
|
||||
if (!err)
|
||||
write_sysreg_s(por_el0, SYS_POR_EL0);
|
||||
ua_state->por_el0 = por_el0;
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -850,7 +906,8 @@ static int parse_user_sigframe(struct user_ctxs *user,
|
||||
}
|
||||
|
||||
static int restore_sigframe(struct pt_regs *regs,
|
||||
struct rt_sigframe __user *sf)
|
||||
struct rt_sigframe __user *sf,
|
||||
struct user_access_state *ua_state)
|
||||
{
|
||||
sigset_t set;
|
||||
int i, err;
|
||||
@ -899,7 +956,7 @@ static int restore_sigframe(struct pt_regs *regs,
|
||||
err = restore_zt_context(&user);
|
||||
|
||||
if (err == 0 && system_supports_poe() && user.poe)
|
||||
err = restore_poe_context(&user);
|
||||
err = restore_poe_context(&user, ua_state);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -908,6 +965,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
|
||||
{
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
struct rt_sigframe __user *frame;
|
||||
struct user_access_state ua_state;
|
||||
|
||||
/* Always make any pending restarted system calls return -EINTR */
|
||||
current->restart_block.fn = do_no_restart_syscall;
|
||||
@ -924,12 +982,14 @@ SYSCALL_DEFINE0(rt_sigreturn)
|
||||
if (!access_ok(frame, sizeof (*frame)))
|
||||
goto badframe;
|
||||
|
||||
if (restore_sigframe(regs, frame))
|
||||
if (restore_sigframe(regs, frame, &ua_state))
|
||||
goto badframe;
|
||||
|
||||
if (restore_altstack(&frame->uc.uc_stack))
|
||||
goto badframe;
|
||||
|
||||
restore_user_access_state(&ua_state);
|
||||
|
||||
return regs->regs[0];
|
||||
|
||||
badframe:
|
||||
@ -1035,7 +1095,8 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
|
||||
}
|
||||
|
||||
static int setup_sigframe(struct rt_sigframe_user_layout *user,
|
||||
struct pt_regs *regs, sigset_t *set)
|
||||
struct pt_regs *regs, sigset_t *set,
|
||||
const struct user_access_state *ua_state)
|
||||
{
|
||||
int i, err = 0;
|
||||
struct rt_sigframe __user *sf = user->sigframe;
|
||||
@ -1097,10 +1158,9 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
|
||||
struct poe_context __user *poe_ctx =
|
||||
apply_user_offset(user, user->poe_offset);
|
||||
|
||||
err |= preserve_poe_context(poe_ctx);
|
||||
err |= preserve_poe_context(poe_ctx, ua_state);
|
||||
}
|
||||
|
||||
|
||||
/* ZA state if present */
|
||||
if (system_supports_sme() && err == 0 && user->za_offset) {
|
||||
struct za_context __user *za_ctx =
|
||||
@ -1237,9 +1297,6 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
|
||||
sme_smstop();
|
||||
}
|
||||
|
||||
if (system_supports_poe())
|
||||
write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
|
||||
|
||||
if (ka->sa.sa_flags & SA_RESTORER)
|
||||
sigtramp = ka->sa.sa_restorer;
|
||||
else
|
||||
@ -1253,6 +1310,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
|
||||
{
|
||||
struct rt_sigframe_user_layout user;
|
||||
struct rt_sigframe __user *frame;
|
||||
struct user_access_state ua_state;
|
||||
int err = 0;
|
||||
|
||||
fpsimd_signal_preserve_current_state();
|
||||
@ -1260,13 +1318,14 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
|
||||
if (get_sigframe(&user, ksig, regs))
|
||||
return 1;
|
||||
|
||||
save_reset_user_access_state(&ua_state);
|
||||
frame = user.sigframe;
|
||||
|
||||
__put_user_error(0, &frame->uc.uc_flags, err);
|
||||
__put_user_error(NULL, &frame->uc.uc_link, err);
|
||||
|
||||
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
|
||||
err |= setup_sigframe(&user, regs, set);
|
||||
err |= setup_sigframe(&user, regs, set, &ua_state);
|
||||
if (err == 0) {
|
||||
setup_return(regs, &ksig->ka, &user, usig);
|
||||
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
|
||||
@ -1276,6 +1335,11 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
|
||||
}
|
||||
}
|
||||
|
||||
if (err == 0)
|
||||
set_handler_user_access_state();
|
||||
else
|
||||
restore_user_access_state(&ua_state);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -997,6 +997,9 @@ static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu)
|
||||
static int check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_request_pending(vcpu)) {
|
||||
if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu))
|
||||
return -EIO;
|
||||
|
||||
if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
|
||||
kvm_vcpu_sleep(vcpu);
|
||||
|
||||
@ -1031,6 +1034,8 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (kvm_dirty_ring_check_request(vcpu))
|
||||
return 0;
|
||||
|
||||
check_nested_vcpu_requests(vcpu);
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
@ -24,28 +24,25 @@
|
||||
.align 11
|
||||
|
||||
SYM_CODE_START(__kvm_hyp_init)
|
||||
ventry __invalid // Synchronous EL2t
|
||||
ventry __invalid // IRQ EL2t
|
||||
ventry __invalid // FIQ EL2t
|
||||
ventry __invalid // Error EL2t
|
||||
ventry . // Synchronous EL2t
|
||||
ventry . // IRQ EL2t
|
||||
ventry . // FIQ EL2t
|
||||
ventry . // Error EL2t
|
||||
|
||||
ventry __invalid // Synchronous EL2h
|
||||
ventry __invalid // IRQ EL2h
|
||||
ventry __invalid // FIQ EL2h
|
||||
ventry __invalid // Error EL2h
|
||||
ventry . // Synchronous EL2h
|
||||
ventry . // IRQ EL2h
|
||||
ventry . // FIQ EL2h
|
||||
ventry . // Error EL2h
|
||||
|
||||
ventry __do_hyp_init // Synchronous 64-bit EL1
|
||||
ventry __invalid // IRQ 64-bit EL1
|
||||
ventry __invalid // FIQ 64-bit EL1
|
||||
ventry __invalid // Error 64-bit EL1
|
||||
ventry . // IRQ 64-bit EL1
|
||||
ventry . // FIQ 64-bit EL1
|
||||
ventry . // Error 64-bit EL1
|
||||
|
||||
ventry __invalid // Synchronous 32-bit EL1
|
||||
ventry __invalid // IRQ 32-bit EL1
|
||||
ventry __invalid // FIQ 32-bit EL1
|
||||
ventry __invalid // Error 32-bit EL1
|
||||
|
||||
__invalid:
|
||||
b .
|
||||
ventry . // Synchronous 32-bit EL1
|
||||
ventry . // IRQ 32-bit EL1
|
||||
ventry . // FIQ 32-bit EL1
|
||||
ventry . // Error 32-bit EL1
|
||||
|
||||
/*
|
||||
* Only uses x0..x3 so as to not clobber callee-saved SMCCC registers.
|
||||
@ -76,6 +73,13 @@ __do_hyp_init:
|
||||
eret
|
||||
SYM_CODE_END(__kvm_hyp_init)
|
||||
|
||||
SYM_CODE_START_LOCAL(__kvm_init_el2_state)
|
||||
/* Initialize EL2 CPU state to sane values. */
|
||||
init_el2_state // Clobbers x0..x2
|
||||
finalise_el2_state
|
||||
ret
|
||||
SYM_CODE_END(__kvm_init_el2_state)
|
||||
|
||||
/*
|
||||
* Initialize the hypervisor in EL2.
|
||||
*
|
||||
@ -102,9 +106,12 @@ SYM_CODE_START_LOCAL(___kvm_hyp_init)
|
||||
// TPIDR_EL2 is used to preserve x0 across the macro maze...
|
||||
isb
|
||||
msr tpidr_el2, x0
|
||||
init_el2_state
|
||||
finalise_el2_state
|
||||
str lr, [x0, #NVHE_INIT_TMP]
|
||||
|
||||
bl __kvm_init_el2_state
|
||||
|
||||
mrs x0, tpidr_el2
|
||||
ldr lr, [x0, #NVHE_INIT_TMP]
|
||||
|
||||
1:
|
||||
ldr x1, [x0, #NVHE_INIT_TPIDR_EL2]
|
||||
@ -199,9 +206,8 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
|
||||
|
||||
2: msr SPsel, #1 // We want to use SP_EL{1,2}
|
||||
|
||||
/* Initialize EL2 CPU state to sane values. */
|
||||
init_el2_state // Clobbers x0..x2
|
||||
finalise_el2_state
|
||||
bl __kvm_init_el2_state
|
||||
|
||||
__init_el2_nvhe_prepare_eret
|
||||
|
||||
/* Enable MMU, set vectors and stack. */
|
||||
|
@ -317,7 +317,7 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu)
|
||||
* to the guest, and hide SSBS so that the
|
||||
* guest stays protected.
|
||||
*/
|
||||
if (cpus_have_final_cap(ARM64_SSBS))
|
||||
if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SSBS, IMP))
|
||||
break;
|
||||
fallthrough;
|
||||
case SPECTRE_UNAFFECTED:
|
||||
@ -428,7 +428,7 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
|
||||
* Convert the workaround level into an easy-to-compare number, where higher
|
||||
* values mean better protection.
|
||||
*/
|
||||
static int get_kernel_wa_level(u64 regid)
|
||||
static int get_kernel_wa_level(struct kvm_vcpu *vcpu, u64 regid)
|
||||
{
|
||||
switch (regid) {
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
@ -449,7 +449,7 @@ static int get_kernel_wa_level(u64 regid)
|
||||
* don't have any FW mitigation if SSBS is there at
|
||||
* all times.
|
||||
*/
|
||||
if (cpus_have_final_cap(ARM64_SSBS))
|
||||
if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SSBS, IMP))
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
|
||||
fallthrough;
|
||||
case SPECTRE_UNAFFECTED:
|
||||
@ -486,7 +486,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
|
||||
val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
|
||||
val = get_kernel_wa_level(vcpu, reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
|
||||
break;
|
||||
case KVM_REG_ARM_STD_BMAP:
|
||||
val = READ_ONCE(smccc_feat->std_bmap);
|
||||
@ -588,7 +588,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (get_kernel_wa_level(reg->id) < val)
|
||||
if (get_kernel_wa_level(vcpu, reg->id) < val)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
@ -624,7 +624,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
* We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
|
||||
* other way around.
|
||||
*/
|
||||
if (get_kernel_wa_level(reg->id) < wa_level)
|
||||
if (get_kernel_wa_level(vcpu, reg->id) < wa_level)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -328,9 +328,10 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
|
||||
may_block));
|
||||
}
|
||||
|
||||
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
|
||||
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
|
||||
u64 size, bool may_block)
|
||||
{
|
||||
__unmap_stage2_range(mmu, start, size, true);
|
||||
__unmap_stage2_range(mmu, start, size, may_block);
|
||||
}
|
||||
|
||||
void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
|
||||
@ -1015,7 +1016,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
|
||||
|
||||
if (!(vma->vm_flags & VM_PFNMAP)) {
|
||||
gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
|
||||
kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
|
||||
kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start, true);
|
||||
}
|
||||
hva = vm_end;
|
||||
} while (hva < reg_end);
|
||||
@ -1042,7 +1043,7 @@ void stage2_unmap_vm(struct kvm *kvm)
|
||||
kvm_for_each_memslot(memslot, bkt, slots)
|
||||
stage2_unmap_memslot(kvm, memslot);
|
||||
|
||||
kvm_nested_s2_unmap(kvm);
|
||||
kvm_nested_s2_unmap(kvm, true);
|
||||
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
mmap_read_unlock(current->mm);
|
||||
@ -1912,7 +1913,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
(range->end - range->start) << PAGE_SHIFT,
|
||||
range->may_block);
|
||||
|
||||
kvm_nested_s2_unmap(kvm);
|
||||
kvm_nested_s2_unmap(kvm, range->may_block);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -2179,8 +2180,8 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
phys_addr_t size = slot->npages << PAGE_SHIFT;
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size);
|
||||
kvm_nested_s2_unmap(kvm);
|
||||
kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size, true);
|
||||
kvm_nested_s2_unmap(kvm, true);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
|
@ -632,9 +632,9 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
|
||||
/* Set the scene for the next search */
|
||||
kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size;
|
||||
|
||||
/* Clear the old state */
|
||||
/* Make sure we don't forget to do the laundry */
|
||||
if (kvm_s2_mmu_valid(s2_mmu))
|
||||
kvm_stage2_unmap_range(s2_mmu, 0, kvm_phys_size(s2_mmu));
|
||||
s2_mmu->pending_unmap = true;
|
||||
|
||||
/*
|
||||
* The virtual VMID (modulo CnP) will be used as a key when matching
|
||||
@ -650,6 +650,16 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
|
||||
|
||||
out:
|
||||
atomic_inc(&s2_mmu->refcnt);
|
||||
|
||||
/*
|
||||
* Set the vCPU request to perform an unmap, even if the pending unmap
|
||||
* originates from another vCPU. This guarantees that the MMU has been
|
||||
* completely unmapped before any vCPU actually uses it, and allows
|
||||
* multiple vCPUs to lend a hand with completing the unmap.
|
||||
*/
|
||||
if (s2_mmu->pending_unmap)
|
||||
kvm_make_request(KVM_REQ_NESTED_S2_UNMAP, vcpu);
|
||||
|
||||
return s2_mmu;
|
||||
}
|
||||
|
||||
@ -663,6 +673,13 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
|
||||
|
||||
void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* The vCPU kept its reference on the MMU after the last put, keep
|
||||
* rolling with it.
|
||||
*/
|
||||
if (vcpu->arch.hw_mmu)
|
||||
return;
|
||||
|
||||
if (is_hyp_ctxt(vcpu)) {
|
||||
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
|
||||
} else {
|
||||
@ -674,10 +691,18 @@ void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu)) {
|
||||
/*
|
||||
* Keep a reference on the associated stage-2 MMU if the vCPU is
|
||||
* scheduling out and not in WFI emulation, suggesting it is likely to
|
||||
* reuse the MMU sometime soon.
|
||||
*/
|
||||
if (vcpu->scheduled_out && !vcpu_get_flag(vcpu, IN_WFI))
|
||||
return;
|
||||
|
||||
if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu))
|
||||
atomic_dec(&vcpu->arch.hw_mmu->refcnt);
|
||||
vcpu->arch.hw_mmu = NULL;
|
||||
}
|
||||
|
||||
vcpu->arch.hw_mmu = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -730,7 +755,7 @@ void kvm_nested_s2_wp(struct kvm *kvm)
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_nested_s2_unmap(struct kvm *kvm)
|
||||
void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -740,7 +765,7 @@ void kvm_nested_s2_unmap(struct kvm *kvm)
|
||||
struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
|
||||
|
||||
if (kvm_s2_mmu_valid(mmu))
|
||||
kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu));
|
||||
kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), may_block);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1184,3 +1209,17 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_check_request(KVM_REQ_NESTED_S2_UNMAP, vcpu)) {
|
||||
struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
|
||||
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu->pending_unmap) {
|
||||
kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), true);
|
||||
mmu->pending_unmap = false;
|
||||
}
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
|
@ -1527,6 +1527,14 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
|
||||
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR);
|
||||
break;
|
||||
case SYS_ID_AA64PFR2_EL1:
|
||||
/* We only expose FPMR */
|
||||
@ -1550,7 +1558,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
|
||||
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
|
||||
break;
|
||||
case SYS_ID_AA64MMFR3_EL1:
|
||||
val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE;
|
||||
val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE |
|
||||
ID_AA64MMFR3_EL1_S1PIE;
|
||||
break;
|
||||
case SYS_ID_MMFR4_EL1:
|
||||
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
|
||||
@ -1985,7 +1994,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
* one cache line.
|
||||
*/
|
||||
if (kvm_has_mte(vcpu->kvm))
|
||||
clidr |= 2 << CLIDR_TTYPE_SHIFT(loc);
|
||||
clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc);
|
||||
|
||||
__vcpu_sys_reg(vcpu, r->reg) = clidr;
|
||||
|
||||
@ -2376,7 +2385,19 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
ID_AA64PFR0_EL1_RAS |
|
||||
ID_AA64PFR0_EL1_AdvSIMD |
|
||||
ID_AA64PFR0_EL1_FP), },
|
||||
ID_SANITISED(ID_AA64PFR1_EL1),
|
||||
ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_PFAR |
|
||||
ID_AA64PFR1_EL1_DF2 |
|
||||
ID_AA64PFR1_EL1_MTEX |
|
||||
ID_AA64PFR1_EL1_THE |
|
||||
ID_AA64PFR1_EL1_GCS |
|
||||
ID_AA64PFR1_EL1_MTE_frac |
|
||||
ID_AA64PFR1_EL1_NMI |
|
||||
ID_AA64PFR1_EL1_RNDR_trap |
|
||||
ID_AA64PFR1_EL1_SME |
|
||||
ID_AA64PFR1_EL1_RES0 |
|
||||
ID_AA64PFR1_EL1_MPAM_frac |
|
||||
ID_AA64PFR1_EL1_RAS_frac |
|
||||
ID_AA64PFR1_EL1_MTE)),
|
||||
ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR),
|
||||
ID_UNALLOCATED(4,3),
|
||||
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
|
||||
@ -2390,7 +2411,21 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
.get_user = get_id_reg,
|
||||
.set_user = set_id_aa64dfr0_el1,
|
||||
.reset = read_sanitised_id_aa64dfr0_el1,
|
||||
.val = ID_AA64DFR0_EL1_PMUVer_MASK |
|
||||
/*
|
||||
* Prior to FEAT_Debugv8.9, the architecture defines context-aware
|
||||
* breakpoints (CTX_CMPs) as the highest numbered breakpoints (BRPs).
|
||||
* KVM does not trap + emulate the breakpoint registers, and as such
|
||||
* cannot support a layout that misaligns with the underlying hardware.
|
||||
* While it may be possible to describe a subset that aligns with
|
||||
* hardware, just prevent changes to BRPs and CTX_CMPs altogether for
|
||||
* simplicity.
|
||||
*
|
||||
* See DDI0487K.a, section D2.8.3 Breakpoint types and linking
|
||||
* of breakpoints for more details.
|
||||
*/
|
||||
.val = ID_AA64DFR0_EL1_DoubleLock_MASK |
|
||||
ID_AA64DFR0_EL1_WRPs_MASK |
|
||||
ID_AA64DFR0_EL1_PMUVer_MASK |
|
||||
ID_AA64DFR0_EL1_DebugVer_MASK, },
|
||||
ID_SANITISED(ID_AA64DFR1_EL1),
|
||||
ID_UNALLOCATED(5,2),
|
||||
@ -2433,6 +2468,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
ID_AA64MMFR2_EL1_NV |
|
||||
ID_AA64MMFR2_EL1_CCIDX)),
|
||||
ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
|
||||
ID_AA64MMFR3_EL1_S1PIE |
|
||||
ID_AA64MMFR3_EL1_S1POE)),
|
||||
ID_SANITISED(ID_AA64MMFR4_EL1),
|
||||
ID_UNALLOCATED(7,5),
|
||||
@ -2903,7 +2939,7 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
* Drop all shadow S2s, resulting in S1/S2 TLBIs for each of the
|
||||
* corresponding VMIDs.
|
||||
*/
|
||||
kvm_nested_s2_unmap(vcpu->kvm);
|
||||
kvm_nested_s2_unmap(vcpu->kvm, true);
|
||||
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
@ -2955,7 +2991,30 @@ union tlbi_info {
|
||||
static void s2_mmu_unmap_range(struct kvm_s2_mmu *mmu,
|
||||
const union tlbi_info *info)
|
||||
{
|
||||
kvm_stage2_unmap_range(mmu, info->range.start, info->range.size);
|
||||
/*
|
||||
* The unmap operation is allowed to drop the MMU lock and block, which
|
||||
* means that @mmu could be used for a different context than the one
|
||||
* currently being invalidated.
|
||||
*
|
||||
* This behavior is still safe, as:
|
||||
*
|
||||
* 1) The vCPU(s) that recycled the MMU are responsible for invalidating
|
||||
* the entire MMU before reusing it, which still honors the intent
|
||||
* of a TLBI.
|
||||
*
|
||||
* 2) Until the guest TLBI instruction is 'retired' (i.e. increment PC
|
||||
* and ERET to the guest), other vCPUs are allowed to use stale
|
||||
* translations.
|
||||
*
|
||||
* 3) Accidentally unmapping an unrelated MMU context is nonfatal, and
|
||||
* at worst may cause more aborts for shadow stage-2 fills.
|
||||
*
|
||||
* Dropping the MMU lock also implies that shadow stage-2 fills could
|
||||
* happen behind the back of the TLBI. This is still safe, though, as
|
||||
* the L1 needs to put its stage-2 in a consistent state before doing
|
||||
* the TLBI.
|
||||
*/
|
||||
kvm_stage2_unmap_range(mmu, info->range.start, info->range.size, true);
|
||||
}
|
||||
|
||||
static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
@ -3050,7 +3109,11 @@ static void s2_mmu_unmap_ipa(struct kvm_s2_mmu *mmu,
|
||||
max_size = compute_tlb_inval_range(mmu, info->ipa.addr);
|
||||
base_addr &= ~(max_size - 1);
|
||||
|
||||
kvm_stage2_unmap_range(mmu, base_addr, max_size);
|
||||
/*
|
||||
* See comment in s2_mmu_unmap_range() for why this is allowed to
|
||||
* reschedule.
|
||||
*/
|
||||
kvm_stage2_unmap_range(mmu, base_addr, max_size, true);
|
||||
}
|
||||
|
||||
static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
|
@ -417,8 +417,28 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
kfree(vgic_cpu->private_irqs);
|
||||
vgic_cpu->private_irqs = NULL;
|
||||
|
||||
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
|
||||
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
||||
/*
|
||||
* If this vCPU is being destroyed because of a failed creation
|
||||
* then unregister the redistributor to avoid leaving behind a
|
||||
* dangling pointer to the vCPU struct.
|
||||
*
|
||||
* vCPUs that have been successfully created (i.e. added to
|
||||
* kvm->vcpu_array) get unregistered in kvm_vgic_destroy(), as
|
||||
* this function gets called while holding kvm->arch.config_lock
|
||||
* in the VM teardown path and would otherwise introduce a lock
|
||||
* inversion w.r.t. kvm->srcu.
|
||||
*
|
||||
* vCPUs that failed creation are torn down outside of the
|
||||
* kvm->arch.config_lock and do not get unregistered in
|
||||
* kvm_vgic_destroy(), meaning it is both safe and necessary to
|
||||
* do so here.
|
||||
*/
|
||||
if (kvm_get_vcpu_by_id(vcpu->kvm, vcpu->vcpu_id) != vcpu)
|
||||
vgic_unregister_redist_iodev(vcpu);
|
||||
|
||||
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
@ -524,22 +544,31 @@ int kvm_vgic_map_resources(struct kvm *kvm)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
dist->ready = true;
|
||||
dist_base = dist->vgic_dist_base;
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
|
||||
ret = vgic_register_dist_iodev(kvm, dist_base, type);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
kvm_err("Unable to register VGIC dist MMIO regions\n");
|
||||
goto out_slots;
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_io_bus_register_dev() guarantees all readers see the new MMIO
|
||||
* registration before returning through synchronize_srcu(), which also
|
||||
* implies a full memory barrier. As such, marking the distributor as
|
||||
* 'ready' here is guaranteed to be ordered after all vCPUs having seen
|
||||
* a completely configured distributor.
|
||||
*/
|
||||
dist->ready = true;
|
||||
goto out_slots;
|
||||
out:
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
out_slots:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
if (ret)
|
||||
kvm_vgic_destroy(kvm);
|
||||
kvm_vm_dead(kvm);
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -236,7 +236,12 @@ static int vgic_set_common_attr(struct kvm_device *dev,
|
||||
|
||||
mutex_lock(&dev->kvm->arch.config_lock);
|
||||
|
||||
if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis)
|
||||
/*
|
||||
* Either userspace has already configured NR_IRQS or
|
||||
* the vgic has already been initialized and vgic_init()
|
||||
* supplied a default amount of SPIs.
|
||||
*/
|
||||
if (dev->kvm->arch.vgic.nr_spis)
|
||||
ret = -EBUSY;
|
||||
else
|
||||
dev->kvm->arch.vgic.nr_spis =
|
||||
|
@ -2220,7 +2220,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
|
||||
emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
|
||||
/* for the first pass, assume the worst case */
|
||||
if (!ctx->image)
|
||||
ctx->idx += 4;
|
||||
else
|
||||
emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
|
||||
emit_call((const u64)__bpf_tramp_enter, ctx);
|
||||
}
|
||||
|
||||
@ -2264,7 +2268,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
im->ip_epilogue = ctx->ro_image + ctx->idx;
|
||||
emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
|
||||
/* for the first pass, assume the worst case */
|
||||
if (!ctx->image)
|
||||
ctx->idx += 4;
|
||||
else
|
||||
emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
|
||||
emit_call((const u64)__bpf_tramp_exit, ctx);
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,10 @@ struct loongson_board_info {
|
||||
|
||||
#define NR_WORDS DIV_ROUND_UP(NR_CPUS, BITS_PER_LONG)
|
||||
|
||||
/*
|
||||
* The "core" of cores_per_node and cores_per_package stands for a
|
||||
* logical core, which means in a SMT system it stands for a thread.
|
||||
*/
|
||||
struct loongson_system_configuration {
|
||||
int nr_cpus;
|
||||
int nr_nodes;
|
||||
|
@ -16,7 +16,7 @@
|
||||
#define XRANGE_SHIFT (48)
|
||||
|
||||
/* Valid address length */
|
||||
#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
|
||||
#define XRANGE_SHADOW_SHIFT min(cpu_vabits, VA_BITS)
|
||||
/* Used for taking out the valid address */
|
||||
#define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0)
|
||||
/* One segment whole address space size */
|
||||
|
@ -250,7 +250,7 @@
|
||||
#define CSR_ESTAT_IS_WIDTH 15
|
||||
#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT)
|
||||
|
||||
#define LOONGARCH_CSR_ERA 0x6 /* ERA */
|
||||
#define LOONGARCH_CSR_ERA 0x6 /* Exception return address */
|
||||
|
||||
#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#define __HAVE_ARCH_PMD_ALLOC_ONE
|
||||
#define __HAVE_ARCH_PUD_ALLOC_ONE
|
||||
#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
|
||||
#include <asm-generic/pgalloc.h>
|
||||
|
||||
static inline void pmd_populate_kernel(struct mm_struct *mm,
|
||||
@ -44,6 +45,16 @@ extern void pagetable_init(void);
|
||||
|
||||
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
||||
|
||||
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
pte_t *pte = __pte_alloc_one_kernel(mm);
|
||||
|
||||
if (pte)
|
||||
kernel_pte_init(pte);
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
#define __pte_free_tlb(tlb, pte, address) \
|
||||
do { \
|
||||
pagetable_pte_dtor(page_ptdesc(pte)); \
|
||||
|
@ -269,6 +269,7 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pm
|
||||
extern void pgd_init(void *addr);
|
||||
extern void pud_init(void *addr);
|
||||
extern void pmd_init(void *addr);
|
||||
extern void kernel_pte_init(void *addr);
|
||||
|
||||
/*
|
||||
* Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
|
||||
@ -325,39 +326,17 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
|
||||
{
|
||||
WRITE_ONCE(*ptep, pteval);
|
||||
|
||||
if (pte_val(pteval) & _PAGE_GLOBAL) {
|
||||
pte_t *buddy = ptep_buddy(ptep);
|
||||
/*
|
||||
* Make sure the buddy is global too (if it's !none,
|
||||
* it better already be global)
|
||||
*/
|
||||
if (pte_none(ptep_get(buddy))) {
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* For SMP, multiple CPUs can race, so we need
|
||||
* to do this atomically.
|
||||
*/
|
||||
__asm__ __volatile__(
|
||||
__AMOR "$zero, %[global], %[buddy] \n"
|
||||
: [buddy] "+ZB" (buddy->pte)
|
||||
: [global] "r" (_PAGE_GLOBAL)
|
||||
: "memory");
|
||||
|
||||
DBAR(0b11000); /* o_wrw = 0b11000 */
|
||||
#else /* !CONFIG_SMP */
|
||||
WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL));
|
||||
#endif /* CONFIG_SMP */
|
||||
}
|
||||
}
|
||||
if (pte_val(pteval) & _PAGE_GLOBAL)
|
||||
DBAR(0b11000); /* o_wrw = 0b11000 */
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
/* Preserve global status for the pair */
|
||||
if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL)
|
||||
set_pte(ptep, __pte(_PAGE_GLOBAL));
|
||||
else
|
||||
set_pte(ptep, __pte(0));
|
||||
pte_t pte = ptep_get(ptep);
|
||||
pte_val(pte) &= _PAGE_GLOBAL;
|
||||
set_pte(ptep, pte);
|
||||
}
|
||||
|
||||
#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1)
|
||||
|
@ -293,13 +293,15 @@ unsigned long stack_top(void)
|
||||
{
|
||||
unsigned long top = TASK_SIZE & PAGE_MASK;
|
||||
|
||||
/* Space for the VDSO & data page */
|
||||
top -= PAGE_ALIGN(current->thread.vdso->size);
|
||||
top -= VVAR_SIZE;
|
||||
if (current->thread.vdso) {
|
||||
/* Space for the VDSO & data page */
|
||||
top -= PAGE_ALIGN(current->thread.vdso->size);
|
||||
top -= VVAR_SIZE;
|
||||
|
||||
/* Space to randomize the VDSO base */
|
||||
if (current->flags & PF_RANDOMIZE)
|
||||
top -= VDSO_RANDOMIZE_SIZE;
|
||||
/* Space to randomize the VDSO base */
|
||||
if (current->flags & PF_RANDOMIZE)
|
||||
top -= VDSO_RANDOMIZE_SIZE;
|
||||
}
|
||||
|
||||
return top;
|
||||
}
|
||||
|
@ -55,6 +55,7 @@
|
||||
#define SMBIOS_FREQHIGH_OFFSET 0x17
|
||||
#define SMBIOS_FREQLOW_MASK 0xFF
|
||||
#define SMBIOS_CORE_PACKAGE_OFFSET 0x23
|
||||
#define SMBIOS_THREAD_PACKAGE_OFFSET 0x25
|
||||
#define LOONGSON_EFI_ENABLE (1 << 3)
|
||||
|
||||
unsigned long fw_arg0, fw_arg1, fw_arg2;
|
||||
@ -125,7 +126,7 @@ static void __init parse_cpu_table(const struct dmi_header *dm)
|
||||
cpu_clock_freq = freq_temp * 1000000;
|
||||
|
||||
loongson_sysconf.cpuname = (void *)dmi_string_parse(dm, dmi_data[16]);
|
||||
loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_CORE_PACKAGE_OFFSET);
|
||||
loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_THREAD_PACKAGE_OFFSET);
|
||||
|
||||
pr_info("CpuClock = %llu\n", cpu_clock_freq);
|
||||
}
|
||||
|
@ -555,6 +555,9 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs)
|
||||
#else
|
||||
unsigned int *pc;
|
||||
|
||||
if (regs->csr_prmd & CSR_PRMD_PIE)
|
||||
local_irq_enable();
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
|
||||
|
||||
/*
|
||||
@ -579,6 +582,8 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs)
|
||||
die_if_kernel("Kernel ale access", regs);
|
||||
force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
|
||||
out:
|
||||
if (regs->csr_prmd & CSR_PRMD_PIE)
|
||||
local_irq_disable();
|
||||
#endif
|
||||
irqentry_exit(regs, state);
|
||||
}
|
||||
|
@ -34,7 +34,6 @@ static union {
|
||||
struct loongarch_vdso_data vdata;
|
||||
} loongarch_vdso_data __page_aligned_data;
|
||||
|
||||
static struct page *vdso_pages[] = { NULL };
|
||||
struct vdso_data *vdso_data = generic_vdso_data.data;
|
||||
struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata;
|
||||
struct vdso_rng_data *vdso_rng_data = &loongarch_vdso_data.vdata.rng_data;
|
||||
@ -85,10 +84,8 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
|
||||
|
||||
struct loongarch_vdso_info vdso_info = {
|
||||
.vdso = vdso_start,
|
||||
.size = PAGE_SIZE,
|
||||
.code_mapping = {
|
||||
.name = "[vdso]",
|
||||
.pages = vdso_pages,
|
||||
.mremap = vdso_mremap,
|
||||
},
|
||||
.data_mapping = {
|
||||
@ -103,11 +100,14 @@ static int __init init_vdso(void)
|
||||
unsigned long i, cpu, pfn;
|
||||
|
||||
BUG_ON(!PAGE_ALIGNED(vdso_info.vdso));
|
||||
BUG_ON(!PAGE_ALIGNED(vdso_info.size));
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
vdso_pdata[cpu].node = cpu_to_node(cpu);
|
||||
|
||||
vdso_info.size = PAGE_ALIGN(vdso_end - vdso_start);
|
||||
vdso_info.code_mapping.pages =
|
||||
kcalloc(vdso_info.size / PAGE_SIZE, sizeof(struct page *), GFP_KERNEL);
|
||||
|
||||
pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso));
|
||||
for (i = 0; i < vdso_info.size / PAGE_SIZE; i++)
|
||||
vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i);
|
||||
|
@ -161,10 +161,11 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu)
|
||||
if (kvm_vcpu_is_blocking(vcpu)) {
|
||||
|
||||
/*
|
||||
* HRTIMER_MODE_PINNED is suggested since vcpu may run in
|
||||
* the same physical cpu in next time
|
||||
* HRTIMER_MODE_PINNED_HARD is suggested since vcpu may run in
|
||||
* the same physical cpu in next time, and the timer should run
|
||||
* in hardirq context even in the PREEMPT_RT case.
|
||||
*/
|
||||
hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
|
||||
hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED_HARD);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1457,7 +1457,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.vpid = 0;
|
||||
vcpu->arch.flush_gpa = INVALID_GPA;
|
||||
|
||||
hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
|
||||
hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
|
||||
vcpu->arch.swtimer.function = kvm_swtimer_wakeup;
|
||||
|
||||
vcpu->arch.handle_exit = kvm_handle_exit;
|
||||
|
@ -201,7 +201,9 @@ pte_t * __init populate_kernel_pte(unsigned long addr)
|
||||
pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
|
||||
if (!pte)
|
||||
panic("%s: Failed to allocate memory\n", __func__);
|
||||
|
||||
pmd_populate_kernel(&init_mm, pmd, pte);
|
||||
kernel_pte_init(pte);
|
||||
}
|
||||
|
||||
return pte_offset_kernel(pmd, addr);
|
||||
|
@ -116,6 +116,26 @@ void pud_init(void *addr)
|
||||
EXPORT_SYMBOL_GPL(pud_init);
|
||||
#endif
|
||||
|
||||
void kernel_pte_init(void *addr)
|
||||
{
|
||||
unsigned long *p, *end;
|
||||
|
||||
p = (unsigned long *)addr;
|
||||
end = p + PTRS_PER_PTE;
|
||||
|
||||
do {
|
||||
p[0] = _PAGE_GLOBAL;
|
||||
p[1] = _PAGE_GLOBAL;
|
||||
p[2] = _PAGE_GLOBAL;
|
||||
p[3] = _PAGE_GLOBAL;
|
||||
p[4] = _PAGE_GLOBAL;
|
||||
p += 8;
|
||||
p[-3] = _PAGE_GLOBAL;
|
||||
p[-2] = _PAGE_GLOBAL;
|
||||
p[-1] = _PAGE_GLOBAL;
|
||||
} while (p != end);
|
||||
}
|
||||
|
||||
pmd_t mk_pmd(struct page *page, pgprot_t prot)
|
||||
{
|
||||
pmd_t pmd;
|
||||
|
@ -102,3 +102,4 @@ unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old,
|
||||
return old;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__cmpxchg_small);
|
||||
|
@ -177,7 +177,7 @@ config RISCV
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
select HAVE_RETHOOK if !XIP_KERNEL
|
||||
select HAVE_RSEQ
|
||||
select HAVE_RUST if RUSTC_SUPPORTS_RISCV
|
||||
select HAVE_RUST if RUSTC_SUPPORTS_RISCV && CC_IS_CLANG
|
||||
select HAVE_SAMPLE_FTRACE_DIRECT
|
||||
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
|
||||
select HAVE_STACKPROTECTOR
|
||||
|
@ -2,6 +2,12 @@ ifdef CONFIG_RELOCATABLE
|
||||
KBUILD_CFLAGS += -fno-pie
|
||||
endif
|
||||
|
||||
ifdef CONFIG_RISCV_ALTERNATIVE_EARLY
|
||||
ifdef CONFIG_FORTIFY_SOURCE
|
||||
KBUILD_CFLAGS += -D__NO_FORTIFY
|
||||
endif
|
||||
endif
|
||||
|
||||
obj-$(CONFIG_ERRATA_ANDES) += andes/
|
||||
obj-$(CONFIG_ERRATA_SIFIVE) += sifive/
|
||||
obj-$(CONFIG_ERRATA_THEAD) += thead/
|
||||
|
@ -36,6 +36,11 @@ KASAN_SANITIZE_alternative.o := n
|
||||
KASAN_SANITIZE_cpufeature.o := n
|
||||
KASAN_SANITIZE_sbi_ecall.o := n
|
||||
endif
|
||||
ifdef CONFIG_FORTIFY_SOURCE
|
||||
CFLAGS_alternative.o += -D__NO_FORTIFY
|
||||
CFLAGS_cpufeature.o += -D__NO_FORTIFY
|
||||
CFLAGS_sbi_ecall.o += -D__NO_FORTIFY
|
||||
endif
|
||||
endif
|
||||
|
||||
extra-y += vmlinux.lds
|
||||
|
@ -210,7 +210,7 @@ void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size)
|
||||
if (!size)
|
||||
return NULL;
|
||||
|
||||
return early_ioremap(phys, size);
|
||||
return early_memremap(phys, size);
|
||||
}
|
||||
|
||||
void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
|
||||
@ -218,7 +218,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
|
||||
if (!map || !size)
|
||||
return;
|
||||
|
||||
early_iounmap(map, size);
|
||||
early_memunmap(map, size);
|
||||
}
|
||||
|
||||
void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
|
||||
|
@ -4,8 +4,6 @@
|
||||
* Copyright (C) 2017 SiFive
|
||||
*/
|
||||
|
||||
#define GENERATING_ASM_OFFSETS
|
||||
|
||||
#include <linux/kbuild.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
@ -80,8 +80,7 @@ int populate_cache_leaves(unsigned int cpu)
|
||||
{
|
||||
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
|
||||
struct cacheinfo *this_leaf = this_cpu_ci->info_list;
|
||||
struct device_node *np = of_cpu_device_node_get(cpu);
|
||||
struct device_node *prev = NULL;
|
||||
struct device_node *np, *prev;
|
||||
int levels = 1, level = 1;
|
||||
|
||||
if (!acpi_disabled) {
|
||||
@ -105,6 +104,10 @@ int populate_cache_leaves(unsigned int cpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
np = of_cpu_device_node_get(cpu);
|
||||
if (!np)
|
||||
return -ENOENT;
|
||||
|
||||
if (of_property_read_bool(np, "cache-size"))
|
||||
ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
|
||||
if (of_property_read_bool(np, "i-cache-size"))
|
||||
|
@ -58,7 +58,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
|
||||
if (cpu_ops->cpu_is_stopped)
|
||||
ret = cpu_ops->cpu_is_stopped(cpu);
|
||||
if (ret)
|
||||
pr_warn("CPU%d may not have stopped: %d\n", cpu, ret);
|
||||
pr_warn("CPU%u may not have stopped: %d\n", cpu, ret);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -64,7 +64,7 @@ extra_header_fields:
|
||||
.long efi_header_end - _start // SizeOfHeaders
|
||||
.long 0 // CheckSum
|
||||
.short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
|
||||
.short 0 // DllCharacteristics
|
||||
.short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics
|
||||
.quad 0 // SizeOfStackReserve
|
||||
.quad 0 // SizeOfStackCommit
|
||||
.quad 0 // SizeOfHeapReserve
|
||||
|
@ -16,8 +16,12 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
|
||||
KBUILD_CFLAGS += -mcmodel=medany
|
||||
|
||||
CFLAGS_cmdline_early.o += -D__NO_FORTIFY
|
||||
CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY
|
||||
CFLAGS_fdt_early.o += -D__NO_FORTIFY
|
||||
# lib/string.c already defines __NO_FORTIFY
|
||||
CFLAGS_ctype.o += -D__NO_FORTIFY
|
||||
CFLAGS_lib-fdt.o += -D__NO_FORTIFY
|
||||
CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY
|
||||
CFLAGS_archrandom_early.o += -D__NO_FORTIFY
|
||||
|
||||
$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
|
||||
--remove-section=.note.gnu.property \
|
||||
|
@ -136,8 +136,6 @@
|
||||
#define REG_PTR(insn, pos, regs) \
|
||||
(ulong *)((ulong)(regs) + REG_OFFSET(insn, pos))
|
||||
|
||||
#define GET_RM(insn) (((insn) >> 12) & 7)
|
||||
|
||||
#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
|
||||
#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
|
||||
#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
|
||||
|
@ -18,6 +18,7 @@ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
|
||||
|
||||
ccflags-y := -fno-stack-protector
|
||||
ccflags-y += -DDISABLE_BRANCH_PROFILING
|
||||
ccflags-y += -fno-builtin
|
||||
|
||||
ifneq ($(c-gettimeofday-y),)
|
||||
CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
|
||||
|
@ -55,7 +55,7 @@ struct imsic {
|
||||
/* IMSIC SW-file */
|
||||
struct imsic_mrif *swfile;
|
||||
phys_addr_t swfile_pa;
|
||||
spinlock_t swfile_extirq_lock;
|
||||
raw_spinlock_t swfile_extirq_lock;
|
||||
};
|
||||
|
||||
#define imsic_vs_csr_read(__c) \
|
||||
@ -622,7 +622,7 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
|
||||
* interruptions between reading topei and updating pending status.
|
||||
*/
|
||||
|
||||
spin_lock_irqsave(&imsic->swfile_extirq_lock, flags);
|
||||
raw_spin_lock_irqsave(&imsic->swfile_extirq_lock, flags);
|
||||
|
||||
if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) &&
|
||||
imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis))
|
||||
@ -630,7 +630,7 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
|
||||
else
|
||||
kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
|
||||
|
||||
spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags);
|
||||
raw_spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags);
|
||||
}
|
||||
|
||||
static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear,
|
||||
@ -1051,7 +1051,7 @@ int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
imsic->swfile = page_to_virt(swfile_page);
|
||||
imsic->swfile_pa = page_to_phys(swfile_page);
|
||||
spin_lock_init(&imsic->swfile_extirq_lock);
|
||||
raw_spin_lock_init(&imsic->swfile_extirq_lock);
|
||||
|
||||
/* Setup IO device */
|
||||
kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops);
|
||||
|
@ -2257,6 +2257,7 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING
|
||||
config ADDRESS_MASKING
|
||||
bool "Linear Address Masking support"
|
||||
depends on X86_64
|
||||
depends on COMPILE_TEST || !CPU_MITIGATIONS # wait for LASS
|
||||
help
|
||||
Linear Address Masking (LAM) modifies the checking that is applied
|
||||
to 64-bit linear addresses, allowing software to use of the
|
||||
|
@ -116,7 +116,10 @@ static inline bool amd_gart_present(void)
|
||||
|
||||
#define amd_nb_num(x) 0
|
||||
#define amd_nb_has_feature(x) false
|
||||
#define node_to_amd_nb(x) NULL
|
||||
static inline struct amd_northbridge *node_to_amd_nb(int node)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#define amd_gart_present(x) false
|
||||
|
||||
#endif
|
||||
|
@ -6,7 +6,7 @@
|
||||
typeof(sym) __ret; \
|
||||
asm_inline("mov %1,%0\n1:\n" \
|
||||
".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
|
||||
".long 1b - %c2 - .\n\t" \
|
||||
".long 1b - %c2 - .\n" \
|
||||
".popsection" \
|
||||
:"=r" (__ret) \
|
||||
:"i" ((unsigned long)0x0123456789abcdefull), \
|
||||
@ -20,7 +20,7 @@
|
||||
typeof(0u+(val)) __ret = (val); \
|
||||
asm_inline("shrl $12,%k0\n1:\n" \
|
||||
".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
|
||||
".long 1b - 1 - .\n\t" \
|
||||
".long 1b - 1 - .\n" \
|
||||
".popsection" \
|
||||
:"+r" (__ret)); \
|
||||
__ret; })
|
||||
|
@ -12,6 +12,13 @@
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/runtime-const.h>
|
||||
|
||||
/*
|
||||
* Virtual variable: there's no actual backing store for this,
|
||||
* it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)'
|
||||
*/
|
||||
extern unsigned long USER_PTR_MAX;
|
||||
|
||||
#ifdef CONFIG_ADDRESS_MASKING
|
||||
/*
|
||||
@ -46,19 +53,24 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The virtual address space space is logically divided into a kernel
|
||||
* half and a user half. When cast to a signed type, user pointers
|
||||
* are positive and kernel pointers are negative.
|
||||
*/
|
||||
#define valid_user_address(x) ((__force long)(x) >= 0)
|
||||
#define valid_user_address(x) \
|
||||
((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX))
|
||||
|
||||
/*
|
||||
* Masking the user address is an alternative to a conditional
|
||||
* user_access_begin that can avoid the fencing. This only works
|
||||
* for dense accesses starting at the address.
|
||||
*/
|
||||
#define mask_user_address(x) ((typeof(x))((long)(x)|((long)(x)>>63)))
|
||||
static inline void __user *mask_user_address(const void __user *ptr)
|
||||
{
|
||||
unsigned long mask;
|
||||
asm("cmp %1,%0\n\t"
|
||||
"sbb %0,%0"
|
||||
:"=r" (mask)
|
||||
:"r" (ptr),
|
||||
"0" (runtime_const_ptr(USER_PTR_MAX)));
|
||||
return (__force void __user *)(mask | (__force unsigned long)ptr);
|
||||
}
|
||||
#define masked_user_access_begin(x) ({ \
|
||||
__auto_type __masked_ptr = (x); \
|
||||
__masked_ptr = mask_user_address(__masked_ptr); \
|
||||
@ -69,23 +81,16 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
|
||||
* arbitrary values in those bits rather then masking them off.
|
||||
*
|
||||
* Enforce two rules:
|
||||
* 1. 'ptr' must be in the user half of the address space
|
||||
* 1. 'ptr' must be in the user part of the address space
|
||||
* 2. 'ptr+size' must not overflow into kernel addresses
|
||||
*
|
||||
* Note that addresses around the sign change are not valid addresses,
|
||||
* and will GP-fault even with LAM enabled if the sign bit is set (see
|
||||
* "CR3.LAM_SUP" that can narrow the canonicality check if we ever
|
||||
* enable it, but not remove it entirely).
|
||||
*
|
||||
* So the "overflow into kernel addresses" does not imply some sudden
|
||||
* exact boundary at the sign bit, and we can allow a lot of slop on the
|
||||
* size check.
|
||||
* Note that we always have at least one guard page between the
|
||||
* max user address and the non-canonical gap, allowing us to
|
||||
* ignore small sizes entirely.
|
||||
*
|
||||
* In fact, we could probably remove the size check entirely, since
|
||||
* any kernel accesses will be in increasing address order starting
|
||||
* at 'ptr', and even if the end might be in kernel space, we'll
|
||||
* hit the GP faults for non-canonical accesses before we ever get
|
||||
* there.
|
||||
* at 'ptr'.
|
||||
*
|
||||
* That's a separate optimization, for now just handle the small
|
||||
* constant case.
|
||||
|
@ -69,6 +69,7 @@
|
||||
#include <asm/sev.h>
|
||||
#include <asm/tdx.h>
|
||||
#include <asm/posted_intr.h>
|
||||
#include <asm/runtime-const.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
@ -2389,6 +2390,15 @@ void __init arch_cpu_finalize_init(void)
|
||||
alternative_instructions();
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
unsigned long USER_PTR_MAX = TASK_SIZE_MAX-1;
|
||||
|
||||
/*
|
||||
* Enable this when LAM is gated on LASS support
|
||||
if (cpu_feature_enabled(X86_FEATURE_LAM))
|
||||
USER_PTR_MAX = (1ul << 63) - PAGE_SIZE - 1;
|
||||
*/
|
||||
runtime_const_init(ptr, USER_PTR_MAX);
|
||||
|
||||
/*
|
||||
* Make sure the first 2MB area is not mapped by huge pages
|
||||
* There are typically fixed size MTRRs in there and overlapping
|
||||
|
@ -584,7 +584,7 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_
|
||||
native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy);
|
||||
}
|
||||
|
||||
static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
|
||||
static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size);
|
||||
|
||||
static int __init save_microcode_in_initrd(void)
|
||||
{
|
||||
@ -605,7 +605,7 @@ static int __init save_microcode_in_initrd(void)
|
||||
if (!desc.mc)
|
||||
return -EINVAL;
|
||||
|
||||
ret = load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
|
||||
ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
|
||||
if (ret > UCODE_UPDATED)
|
||||
return -EINVAL;
|
||||
|
||||
@ -613,16 +613,19 @@ static int __init save_microcode_in_initrd(void)
|
||||
}
|
||||
early_initcall(save_microcode_in_initrd);
|
||||
|
||||
static inline bool patch_cpus_equivalent(struct ucode_patch *p, struct ucode_patch *n)
|
||||
static inline bool patch_cpus_equivalent(struct ucode_patch *p,
|
||||
struct ucode_patch *n,
|
||||
bool ignore_stepping)
|
||||
{
|
||||
/* Zen and newer hardcode the f/m/s in the patch ID */
|
||||
if (x86_family(bsp_cpuid_1_eax) >= 0x17) {
|
||||
union cpuid_1_eax p_cid = ucode_rev_to_cpuid(p->patch_id);
|
||||
union cpuid_1_eax n_cid = ucode_rev_to_cpuid(n->patch_id);
|
||||
|
||||
/* Zap stepping */
|
||||
p_cid.stepping = 0;
|
||||
n_cid.stepping = 0;
|
||||
if (ignore_stepping) {
|
||||
p_cid.stepping = 0;
|
||||
n_cid.stepping = 0;
|
||||
}
|
||||
|
||||
return p_cid.full == n_cid.full;
|
||||
} else {
|
||||
@ -644,13 +647,13 @@ static struct ucode_patch *cache_find_patch(struct ucode_cpu_info *uci, u16 equi
|
||||
WARN_ON_ONCE(!n.patch_id);
|
||||
|
||||
list_for_each_entry(p, µcode_cache, plist)
|
||||
if (patch_cpus_equivalent(p, &n))
|
||||
if (patch_cpus_equivalent(p, &n, false))
|
||||
return p;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
|
||||
static inline int patch_newer(struct ucode_patch *p, struct ucode_patch *n)
|
||||
{
|
||||
/* Zen and newer hardcode the f/m/s in the patch ID */
|
||||
if (x86_family(bsp_cpuid_1_eax) >= 0x17) {
|
||||
@ -659,6 +662,9 @@ static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
|
||||
zp.ucode_rev = p->patch_id;
|
||||
zn.ucode_rev = n->patch_id;
|
||||
|
||||
if (zn.stepping != zp.stepping)
|
||||
return -1;
|
||||
|
||||
return zn.rev > zp.rev;
|
||||
} else {
|
||||
return n->patch_id > p->patch_id;
|
||||
@ -668,10 +674,14 @@ static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
|
||||
static void update_cache(struct ucode_patch *new_patch)
|
||||
{
|
||||
struct ucode_patch *p;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(p, µcode_cache, plist) {
|
||||
if (patch_cpus_equivalent(p, new_patch)) {
|
||||
if (!patch_newer(p, new_patch)) {
|
||||
if (patch_cpus_equivalent(p, new_patch, true)) {
|
||||
ret = patch_newer(p, new_patch);
|
||||
if (ret < 0)
|
||||
continue;
|
||||
else if (!ret) {
|
||||
/* we already have the latest patch */
|
||||
kfree(new_patch->data);
|
||||
kfree(new_patch);
|
||||
@ -944,6 +954,20 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
|
||||
return UCODE_OK;
|
||||
}
|
||||
|
||||
static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size)
|
||||
{
|
||||
enum ucode_state ret;
|
||||
|
||||
/* free old equiv table */
|
||||
free_equiv_cpu_table();
|
||||
|
||||
ret = __load_microcode_amd(family, data, size);
|
||||
if (ret != UCODE_OK)
|
||||
cleanup();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
|
||||
{
|
||||
struct cpuinfo_x86 *c;
|
||||
@ -951,14 +975,9 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz
|
||||
struct ucode_patch *p;
|
||||
enum ucode_state ret;
|
||||
|
||||
/* free old equiv table */
|
||||
free_equiv_cpu_table();
|
||||
|
||||
ret = __load_microcode_amd(family, data, size);
|
||||
if (ret != UCODE_OK) {
|
||||
cleanup();
|
||||
ret = _load_microcode_amd(family, data, size);
|
||||
if (ret != UCODE_OK)
|
||||
return ret;
|
||||
}
|
||||
|
||||
for_each_node(nid) {
|
||||
cpu = cpumask_first(cpumask_of_node(nid));
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <asm/apic.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/cpuidle_haltpoll.h>
|
||||
#include <asm/ptrace.h>
|
||||
@ -980,6 +981,9 @@ static void __init kvm_init_platform(void)
|
||||
}
|
||||
kvmclock_init();
|
||||
x86_platform.apic_post_init = kvm_apic_init;
|
||||
|
||||
/* Set WB as the default cache mode for SEV-SNP and TDX */
|
||||
mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_AMD_MEM_ENCRYPT)
|
||||
|
@ -261,12 +261,6 @@ static noinstr bool handle_bug(struct pt_regs *regs)
|
||||
int ud_type;
|
||||
u32 imm;
|
||||
|
||||
/*
|
||||
* Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
|
||||
* is a rare case that uses @regs without passing them to
|
||||
* irqentry_enter().
|
||||
*/
|
||||
kmsan_unpoison_entry_regs(regs);
|
||||
ud_type = decode_bug(regs->ip, &imm);
|
||||
if (ud_type == BUG_NONE)
|
||||
return handled;
|
||||
@ -275,6 +269,12 @@ static noinstr bool handle_bug(struct pt_regs *regs)
|
||||
* All lies, just get the WARN/BUG out.
|
||||
*/
|
||||
instrumentation_begin();
|
||||
/*
|
||||
* Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
|
||||
* is a rare case that uses @regs without passing them to
|
||||
* irqentry_enter().
|
||||
*/
|
||||
kmsan_unpoison_entry_regs(regs);
|
||||
/*
|
||||
* Since we're emulating a CALL with exceptions, restore the interrupt
|
||||
* state to what it was at the exception site.
|
||||
|
@ -358,6 +358,7 @@ SECTIONS
|
||||
#endif
|
||||
|
||||
RUNTIME_CONST_VARIABLES
|
||||
RUNTIME_CONST(ptr, USER_PTR_MAX)
|
||||
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
|
||||
|
@ -1556,6 +1556,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
bool flush = false;
|
||||
|
||||
/*
|
||||
* To prevent races with vCPUs faulting in a gfn using stale data,
|
||||
* zapping a gfn range must be protected by mmu_invalidate_in_progress
|
||||
* (and mmu_invalidate_seq). The only exception is memslot deletion;
|
||||
* in that case, SRCU synchronization ensures that SPTEs are zapped
|
||||
* after all vCPUs have unlocked SRCU, guaranteeing that vCPUs see the
|
||||
* invalid slot.
|
||||
*/
|
||||
lockdep_assert_once(kvm->mmu_invalidate_in_progress ||
|
||||
lockdep_is_held(&kvm->slots_lock));
|
||||
|
||||
if (kvm_memslots_have_rmaps(kvm))
|
||||
flush = __kvm_rmap_zap_gfn_range(kvm, range->slot,
|
||||
range->start, range->end,
|
||||
@ -1884,14 +1895,10 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp)
|
||||
if (is_obsolete_sp((_kvm), (_sp))) { \
|
||||
} else
|
||||
|
||||
#define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
|
||||
#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \
|
||||
for_each_valid_sp(_kvm, _sp, \
|
||||
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
|
||||
if ((_sp)->gfn != (_gfn)) {} else
|
||||
|
||||
#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \
|
||||
for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
|
||||
if (!sp_has_gptes(_sp)) {} else
|
||||
if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else
|
||||
|
||||
static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
{
|
||||
@ -7063,15 +7070,15 @@ static void kvm_mmu_zap_memslot_pages_and_flush(struct kvm *kvm,
|
||||
|
||||
/*
|
||||
* Since accounting information is stored in struct kvm_arch_memory_slot,
|
||||
* shadow pages deletion (e.g. unaccount_shadowed()) requires that all
|
||||
* gfns with a shadow page have a corresponding memslot. Do so before
|
||||
* the memslot goes away.
|
||||
* all MMU pages that are shadowing guest PTEs must be zapped before the
|
||||
* memslot is deleted, as freeing such pages after the memslot is freed
|
||||
* will result in use-after-free, e.g. in unaccount_shadowed().
|
||||
*/
|
||||
for (i = 0; i < slot->npages; i++) {
|
||||
struct kvm_mmu_page *sp;
|
||||
gfn_t gfn = slot->base_gfn + i;
|
||||
|
||||
for_each_gfn_valid_sp(kvm, sp, gfn)
|
||||
for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn)
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
|
||||
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
|
||||
|
@ -63,8 +63,12 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
|
||||
u64 pdpte;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Note, nCR3 is "assumed" to be 32-byte aligned, i.e. the CPU ignores
|
||||
* nCR3[4:0] when loading PDPTEs from memory.
|
||||
*/
|
||||
ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
|
||||
offset_in_page(cr3) + index * 8, 8);
|
||||
(cr3 & GENMASK(11, 5)) + index * 8, 8);
|
||||
if (ret)
|
||||
return 0;
|
||||
return pdpte;
|
||||
|
@ -4888,9 +4888,6 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
vmx->hv_deadline_tsc = -1;
|
||||
kvm_set_cr8(vcpu, 0);
|
||||
|
||||
vmx_segment_cache_clear(vmx);
|
||||
kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS);
|
||||
|
||||
seg_setup(VCPU_SREG_CS);
|
||||
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
|
||||
vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
|
||||
@ -4917,6 +4914,9 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
vmcs_writel(GUEST_IDTR_BASE, 0);
|
||||
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
|
||||
|
||||
vmx_segment_cache_clear(vmx);
|
||||
kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS);
|
||||
|
||||
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
|
||||
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
|
||||
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
|
||||
|
@ -39,8 +39,13 @@
|
||||
|
||||
.macro check_range size:req
|
||||
.if IS_ENABLED(CONFIG_X86_64)
|
||||
mov %rax, %rdx
|
||||
sar $63, %rdx
|
||||
movq $0x0123456789abcdef,%rdx
|
||||
1:
|
||||
.pushsection runtime_ptr_USER_PTR_MAX,"a"
|
||||
.long 1b - 8 - .
|
||||
.popsection
|
||||
cmp %rax, %rdx
|
||||
sbb %rdx, %rdx
|
||||
or %rdx, %rax
|
||||
.else
|
||||
cmp $TASK_SIZE_MAX-\size+1, %eax
|
||||
|
@ -173,6 +173,8 @@ static void __init __snp_fixup_e820_tables(u64 pa)
|
||||
e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
if (!memblock_is_region_reserved(pa, PMD_SIZE))
|
||||
memblock_reserve(pa, PMD_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -561,57 +561,33 @@ EXPORT_SYMBOL(blk_rq_append_bio);
|
||||
/* Prepare bio for passthrough IO given ITER_BVEC iter */
|
||||
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
size_t nr_iter = iov_iter_count(iter);
|
||||
size_t nr_segs = iter->nr_segs;
|
||||
struct bio_vec *bvecs, *bvprvp = NULL;
|
||||
const struct queue_limits *lim = &q->limits;
|
||||
unsigned int nsegs = 0, bytes = 0;
|
||||
const struct queue_limits *lim = &rq->q->limits;
|
||||
unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
|
||||
unsigned int nsegs;
|
||||
struct bio *bio;
|
||||
size_t i;
|
||||
int ret;
|
||||
|
||||
if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
|
||||
return -EINVAL;
|
||||
if (nr_segs > queue_max_segments(q))
|
||||
if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes)
|
||||
return -EINVAL;
|
||||
|
||||
/* no iovecs to alloc, as we already have a BVEC iterator */
|
||||
/* reuse the bvecs from the iterator instead of allocating new ones */
|
||||
bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
|
||||
if (bio == NULL)
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
|
||||
bio_iov_bvec_set(bio, (struct iov_iter *)iter);
|
||||
blk_rq_bio_prep(rq, bio, nr_segs);
|
||||
|
||||
/* loop to perform a bunch of sanity checks */
|
||||
bvecs = (struct bio_vec *)iter->bvec;
|
||||
for (i = 0; i < nr_segs; i++) {
|
||||
struct bio_vec *bv = &bvecs[i];
|
||||
|
||||
/*
|
||||
* If the queue doesn't support SG gaps and adding this
|
||||
* offset would create a gap, fallback to copy.
|
||||
*/
|
||||
if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
|
||||
blk_mq_map_bio_put(bio);
|
||||
return -EREMOTEIO;
|
||||
}
|
||||
/* check full condition */
|
||||
if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
|
||||
goto put_bio;
|
||||
if (bytes + bv->bv_len > nr_iter)
|
||||
goto put_bio;
|
||||
if (bv->bv_offset + bv->bv_len > PAGE_SIZE)
|
||||
goto put_bio;
|
||||
|
||||
nsegs++;
|
||||
bytes += bv->bv_len;
|
||||
bvprvp = bv;
|
||||
/* check that the data layout matches the hardware restrictions */
|
||||
ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
|
||||
if (ret) {
|
||||
/* if we would have to split the bio, copy instead */
|
||||
if (ret > 0)
|
||||
ret = -EREMOTEIO;
|
||||
blk_mq_map_bio_put(bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
blk_rq_bio_prep(rq, bio, nsegs);
|
||||
return 0;
|
||||
put_bio:
|
||||
blk_mq_map_bio_put(bio);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -108,6 +108,14 @@ static int reset_pending_show(struct seq_file *s, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int firewall_irq_counter_show(struct seq_file *s, void *v)
|
||||
{
|
||||
struct ivpu_device *vdev = seq_to_ivpu(s);
|
||||
|
||||
seq_printf(s, "%d\n", atomic_read(&vdev->hw->firewall_irq_counter));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct drm_debugfs_info vdev_debugfs_list[] = {
|
||||
{"bo_list", bo_list_show, 0},
|
||||
{"fw_name", fw_name_show, 0},
|
||||
@ -116,6 +124,7 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = {
|
||||
{"last_bootmode", last_bootmode_show, 0},
|
||||
{"reset_counter", reset_counter_show, 0},
|
||||
{"reset_pending", reset_pending_show, 0},
|
||||
{"firewall_irq_counter", firewall_irq_counter_show, 0},
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
|
@ -249,6 +249,7 @@ int ivpu_hw_init(struct ivpu_device *vdev)
|
||||
platform_init(vdev);
|
||||
wa_init(vdev);
|
||||
timeouts_init(vdev);
|
||||
atomic_set(&vdev->hw->firewall_irq_counter, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -52,6 +52,7 @@ struct ivpu_hw_info {
|
||||
int dma_bits;
|
||||
ktime_t d0i3_entry_host_ts;
|
||||
u64 d0i3_entry_vpu_ts;
|
||||
atomic_t firewall_irq_counter;
|
||||
};
|
||||
|
||||
int ivpu_hw_init(struct ivpu_device *vdev);
|
||||
|
@ -1062,7 +1062,10 @@ static void irq_wdt_mss_handler(struct ivpu_device *vdev)
|
||||
|
||||
static void irq_noc_firewall_handler(struct ivpu_device *vdev)
|
||||
{
|
||||
ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
|
||||
atomic_inc(&vdev->hw->firewall_irq_counter);
|
||||
|
||||
ivpu_dbg(vdev, IRQ, "NOC Firewall interrupt detected, counter %d\n",
|
||||
atomic_read(&vdev->hw->firewall_irq_counter));
|
||||
}
|
||||
|
||||
/* Handler for IRQs from NPU core */
|
||||
|
@ -130,6 +130,17 @@ static const struct dmi_system_id dmi_lid_quirks[] = {
|
||||
},
|
||||
.driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN,
|
||||
},
|
||||
{
|
||||
/*
|
||||
* Samsung galaxybook2 ,initial _LID device notification returns
|
||||
* lid closed.
|
||||
*/
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "750XED"),
|
||||
},
|
||||
.driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -867,7 +867,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
|
||||
|
||||
/* Store CPU Logical ID */
|
||||
cpc_ptr->cpu_id = pr->id;
|
||||
spin_lock_init(&cpc_ptr->rmw_lock);
|
||||
raw_spin_lock_init(&cpc_ptr->rmw_lock);
|
||||
|
||||
/* Parse PSD data for this CPU */
|
||||
ret = acpi_get_psd(cpc_ptr, handle);
|
||||
@ -1087,6 +1087,7 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
|
||||
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
|
||||
struct cpc_reg *reg = ®_res->cpc_entry.reg;
|
||||
struct cpc_desc *cpc_desc;
|
||||
unsigned long flags;
|
||||
|
||||
size = GET_BIT_WIDTH(reg);
|
||||
|
||||
@ -1126,7 +1127,7 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
spin_lock(&cpc_desc->rmw_lock);
|
||||
raw_spin_lock_irqsave(&cpc_desc->rmw_lock, flags);
|
||||
switch (size) {
|
||||
case 8:
|
||||
prev_val = readb_relaxed(vaddr);
|
||||
@ -1141,7 +1142,7 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
|
||||
prev_val = readq_relaxed(vaddr);
|
||||
break;
|
||||
default:
|
||||
spin_unlock(&cpc_desc->rmw_lock);
|
||||
raw_spin_unlock_irqrestore(&cpc_desc->rmw_lock, flags);
|
||||
return -EFAULT;
|
||||
}
|
||||
val = MASK_VAL_WRITE(reg, prev_val, val);
|
||||
@ -1174,7 +1175,7 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
|
||||
}
|
||||
|
||||
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
|
||||
spin_unlock(&cpc_desc->rmw_lock);
|
||||
raw_spin_unlock_irqrestore(&cpc_desc->rmw_lock, flags);
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
@ -1916,9 +1917,15 @@ unsigned int cppc_perf_to_khz(struct cppc_perf_caps *caps, unsigned int perf)
|
||||
u64 mul, div;
|
||||
|
||||
if (caps->lowest_freq && caps->nominal_freq) {
|
||||
mul = caps->nominal_freq - caps->lowest_freq;
|
||||
/* Avoid special case when nominal_freq is equal to lowest_freq */
|
||||
if (caps->lowest_freq == caps->nominal_freq) {
|
||||
mul = caps->nominal_freq;
|
||||
div = caps->nominal_perf;
|
||||
} else {
|
||||
mul = caps->nominal_freq - caps->lowest_freq;
|
||||
div = caps->nominal_perf - caps->lowest_perf;
|
||||
}
|
||||
mul *= KHZ_PER_MHZ;
|
||||
div = caps->nominal_perf - caps->lowest_perf;
|
||||
offset = caps->nominal_freq * KHZ_PER_MHZ -
|
||||
div64_u64(caps->nominal_perf * mul, div);
|
||||
} else {
|
||||
@ -1939,11 +1946,17 @@ unsigned int cppc_khz_to_perf(struct cppc_perf_caps *caps, unsigned int freq)
|
||||
{
|
||||
s64 retval, offset = 0;
|
||||
static u64 max_khz;
|
||||
u64 mul, div;
|
||||
u64 mul, div;
|
||||
|
||||
if (caps->lowest_freq && caps->nominal_freq) {
|
||||
mul = caps->nominal_perf - caps->lowest_perf;
|
||||
div = caps->nominal_freq - caps->lowest_freq;
|
||||
/* Avoid special case when nominal_freq is equal to lowest_freq */
|
||||
if (caps->lowest_freq == caps->nominal_freq) {
|
||||
mul = caps->nominal_perf;
|
||||
div = caps->nominal_freq;
|
||||
} else {
|
||||
mul = caps->nominal_perf - caps->lowest_perf;
|
||||
div = caps->nominal_freq - caps->lowest_freq;
|
||||
}
|
||||
/*
|
||||
* We don't need to convert to kHz for computing offset and can
|
||||
* directly use nominal_freq and lowest_freq as the div64_u64
|
||||
|
@ -52,7 +52,7 @@ struct prm_context_buffer {
|
||||
static LIST_HEAD(prm_module_list);
|
||||
|
||||
struct prm_handler_info {
|
||||
guid_t guid;
|
||||
efi_guid_t guid;
|
||||
efi_status_t (__efiapi *handler_addr)(u64, void *);
|
||||
u64 static_data_buffer_addr;
|
||||
u64 acpi_param_buffer_addr;
|
||||
@ -72,17 +72,21 @@ struct prm_module_info {
|
||||
struct prm_handler_info handlers[] __counted_by(handler_count);
|
||||
};
|
||||
|
||||
static u64 efi_pa_va_lookup(u64 pa)
|
||||
static u64 efi_pa_va_lookup(efi_guid_t *guid, u64 pa)
|
||||
{
|
||||
efi_memory_desc_t *md;
|
||||
u64 pa_offset = pa & ~PAGE_MASK;
|
||||
u64 page = pa & PAGE_MASK;
|
||||
|
||||
for_each_efi_memory_desc(md) {
|
||||
if (md->phys_addr < pa && pa < md->phys_addr + PAGE_SIZE * md->num_pages)
|
||||
if ((md->attribute & EFI_MEMORY_RUNTIME) &&
|
||||
(md->phys_addr < pa && pa < md->phys_addr + PAGE_SIZE * md->num_pages)) {
|
||||
return pa_offset + md->virt_addr + page - md->phys_addr;
|
||||
}
|
||||
}
|
||||
|
||||
pr_warn("Failed to find VA for GUID: %pUL, PA: 0x%llx", guid, pa);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -148,9 +152,15 @@ acpi_parse_prmt(union acpi_subtable_headers *header, const unsigned long end)
|
||||
th = &tm->handlers[cur_handler];
|
||||
|
||||
guid_copy(&th->guid, (guid_t *)handler_info->handler_guid);
|
||||
th->handler_addr = (void *)efi_pa_va_lookup(handler_info->handler_address);
|
||||
th->static_data_buffer_addr = efi_pa_va_lookup(handler_info->static_data_buffer_address);
|
||||
th->acpi_param_buffer_addr = efi_pa_va_lookup(handler_info->acpi_param_buffer_address);
|
||||
th->handler_addr =
|
||||
(void *)efi_pa_va_lookup(&th->guid, handler_info->handler_address);
|
||||
|
||||
th->static_data_buffer_addr =
|
||||
efi_pa_va_lookup(&th->guid, handler_info->static_data_buffer_address);
|
||||
|
||||
th->acpi_param_buffer_addr =
|
||||
efi_pa_va_lookup(&th->guid, handler_info->acpi_param_buffer_address);
|
||||
|
||||
} while (++cur_handler < tm->handler_count && (handler_info = get_next_handler(handler_info)));
|
||||
|
||||
return 0;
|
||||
@ -277,6 +287,13 @@ static acpi_status acpi_platformrt_space_handler(u32 function,
|
||||
if (!handler || !module)
|
||||
goto invalid_guid;
|
||||
|
||||
if (!handler->handler_addr ||
|
||||
!handler->static_data_buffer_addr ||
|
||||
!handler->acpi_param_buffer_addr) {
|
||||
buffer->prm_status = PRM_HANDLER_ERROR;
|
||||
return AE_OK;
|
||||
}
|
||||
|
||||
ACPI_COPY_NAMESEG(context.signature, "PRMC");
|
||||
context.revision = 0x0;
|
||||
context.reserved = 0x0;
|
||||
|
@ -503,6 +503,13 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = {
|
||||
DMI_MATCH(DMI_BOARD_NAME, "17U70P"),
|
||||
},
|
||||
},
|
||||
{
|
||||
/* LG Electronics 16T90SP */
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "16T90SP"),
|
||||
},
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
|
@ -651,6 +651,7 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
|
||||
/* the scmd has an associated qc */
|
||||
if (!(qc->flags & ATA_QCFLAG_EH)) {
|
||||
/* which hasn't failed yet, timeout */
|
||||
set_host_byte(scmd, DID_TIME_OUT);
|
||||
qc->err_mask |= AC_ERR_TIMEOUT;
|
||||
qc->flags |= ATA_QCFLAG_EH;
|
||||
nr_timedout++;
|
||||
|
@ -26,7 +26,6 @@
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_device.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/slab.h>
|
||||
@ -2634,7 +2633,6 @@ static const char *dev_uevent_name(const struct kobject *kobj)
|
||||
static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
|
||||
{
|
||||
const struct device *dev = kobj_to_dev(kobj);
|
||||
struct device_driver *driver;
|
||||
int retval = 0;
|
||||
|
||||
/* add device node properties if present */
|
||||
@ -2663,12 +2661,8 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
|
||||
if (dev->type && dev->type->name)
|
||||
add_uevent_var(env, "DEVTYPE=%s", dev->type->name);
|
||||
|
||||
/* Synchronize with module_remove_driver() */
|
||||
rcu_read_lock();
|
||||
driver = READ_ONCE(dev->driver);
|
||||
if (driver)
|
||||
add_uevent_var(env, "DRIVER=%s", driver->name);
|
||||
rcu_read_unlock();
|
||||
if (dev->driver)
|
||||
add_uevent_var(env, "DRIVER=%s", dev->driver->name);
|
||||
|
||||
/* Add common DT information about the device */
|
||||
of_device_uevent(dev, env);
|
||||
@ -2738,8 +2732,11 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr,
|
||||
if (!env)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Synchronize with really_probe() */
|
||||
device_lock(dev);
|
||||
/* let the kset specific function add its keys */
|
||||
retval = kset->uevent_ops->uevent(&dev->kobj, env);
|
||||
device_unlock(dev);
|
||||
if (retval)
|
||||
goto out;
|
||||
|
||||
@ -4037,6 +4034,41 @@ int device_for_each_child_reverse(struct device *parent, void *data,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(device_for_each_child_reverse);
|
||||
|
||||
/**
|
||||
* device_for_each_child_reverse_from - device child iterator in reversed order.
|
||||
* @parent: parent struct device.
|
||||
* @from: optional starting point in child list
|
||||
* @fn: function to be called for each device.
|
||||
* @data: data for the callback.
|
||||
*
|
||||
* Iterate over @parent's child devices, starting at @from, and call @fn
|
||||
* for each, passing it @data. This helper is identical to
|
||||
* device_for_each_child_reverse() when @from is NULL.
|
||||
*
|
||||
* @fn is checked each iteration. If it returns anything other than 0,
|
||||
* iteration stop and that value is returned to the caller of
|
||||
* device_for_each_child_reverse_from();
|
||||
*/
|
||||
int device_for_each_child_reverse_from(struct device *parent,
|
||||
struct device *from, const void *data,
|
||||
int (*fn)(struct device *, const void *))
|
||||
{
|
||||
struct klist_iter i;
|
||||
struct device *child;
|
||||
int error = 0;
|
||||
|
||||
if (!parent->p)
|
||||
return 0;
|
||||
|
||||
klist_iter_init_node(&parent->p->klist_children, &i,
|
||||
(from ? &from->p->knode_parent : NULL));
|
||||
while ((child = prev_device(&i)) && !error)
|
||||
error = fn(child, data);
|
||||
klist_iter_exit(&i);
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(device_for_each_child_reverse_from);
|
||||
|
||||
/**
|
||||
* device_find_child - device iterator for locating a particular device.
|
||||
* @parent: parent struct device
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include "base.h"
|
||||
|
||||
static char *make_driver_name(const struct device_driver *drv)
|
||||
@ -102,9 +101,6 @@ void module_remove_driver(const struct device_driver *drv)
|
||||
if (!drv)
|
||||
return;
|
||||
|
||||
/* Synchronize with dev_uevent() */
|
||||
synchronize_rcu();
|
||||
|
||||
sysfs_remove_link(&drv->p->kobj, "module");
|
||||
|
||||
if (drv->owner)
|
||||
|
@ -674,6 +674,16 @@ EXPORT_SYMBOL_GPL(tpm_chip_register);
|
||||
*/
|
||||
void tpm_chip_unregister(struct tpm_chip *chip)
|
||||
{
|
||||
#ifdef CONFIG_TCG_TPM2_HMAC
|
||||
int rc;
|
||||
|
||||
rc = tpm_try_get_ops(chip);
|
||||
if (!rc) {
|
||||
tpm2_end_auth_session(chip);
|
||||
tpm_put_ops(chip);
|
||||
}
|
||||
#endif
|
||||
|
||||
tpm_del_legacy_sysfs(chip);
|
||||
if (tpm_is_hwrng_enabled(chip))
|
||||
hwrng_unregister(&chip->hwrng);
|
||||
|
@ -27,6 +27,9 @@ static ssize_t tpm_dev_transmit(struct tpm_chip *chip, struct tpm_space *space,
|
||||
struct tpm_header *header = (void *)buf;
|
||||
ssize_t ret, len;
|
||||
|
||||
if (chip->flags & TPM_CHIP_FLAG_TPM2)
|
||||
tpm2_end_auth_session(chip);
|
||||
|
||||
ret = tpm2_prepare_space(chip, space, buf, bufsiz);
|
||||
/* If the command is not implemented by the TPM, synthesize a
|
||||
* response with a TPM2_RC_COMMAND_CODE return for user-space.
|
||||
|
@ -379,10 +379,12 @@ int tpm_pm_suspend(struct device *dev)
|
||||
|
||||
rc = tpm_try_get_ops(chip);
|
||||
if (!rc) {
|
||||
if (chip->flags & TPM_CHIP_FLAG_TPM2)
|
||||
if (chip->flags & TPM_CHIP_FLAG_TPM2) {
|
||||
tpm2_end_auth_session(chip);
|
||||
tpm2_shutdown(chip, TPM2_SU_STATE);
|
||||
else
|
||||
} else {
|
||||
rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
|
||||
}
|
||||
|
||||
tpm_put_ops(chip);
|
||||
}
|
||||
|
@ -333,6 +333,9 @@ void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TCG_TPM2_HMAC
|
||||
/* The first write to /dev/tpm{rm0} will flush the session. */
|
||||
attributes |= TPM2_SA_CONTINUE_SESSION;
|
||||
|
||||
/*
|
||||
* The Architecture Guide requires us to strip trailing zeros
|
||||
* before computing the HMAC
|
||||
@ -484,7 +487,8 @@ static void tpm2_KDFe(u8 z[EC_PT_SZ], const char *str, u8 *pt_u, u8 *pt_v,
|
||||
sha256_final(&sctx, out);
|
||||
}
|
||||
|
||||
static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
|
||||
static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip,
|
||||
struct tpm2_auth *auth)
|
||||
{
|
||||
struct crypto_kpp *kpp;
|
||||
struct kpp_request *req;
|
||||
@ -543,7 +547,7 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
|
||||
sg_set_buf(&s[0], chip->null_ec_key_x, EC_PT_SZ);
|
||||
sg_set_buf(&s[1], chip->null_ec_key_y, EC_PT_SZ);
|
||||
kpp_request_set_input(req, s, EC_PT_SZ*2);
|
||||
sg_init_one(d, chip->auth->salt, EC_PT_SZ);
|
||||
sg_init_one(d, auth->salt, EC_PT_SZ);
|
||||
kpp_request_set_output(req, d, EC_PT_SZ);
|
||||
crypto_kpp_compute_shared_secret(req);
|
||||
kpp_request_free(req);
|
||||
@ -554,8 +558,7 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
|
||||
* This works because KDFe fully consumes the secret before it
|
||||
* writes the salt
|
||||
*/
|
||||
tpm2_KDFe(chip->auth->salt, "SECRET", x, chip->null_ec_key_x,
|
||||
chip->auth->salt);
|
||||
tpm2_KDFe(auth->salt, "SECRET", x, chip->null_ec_key_x, auth->salt);
|
||||
|
||||
out:
|
||||
crypto_free_kpp(kpp);
|
||||
@ -853,7 +856,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
|
||||
if (rc)
|
||||
/* manually close the session if it wasn't consumed */
|
||||
tpm2_flush_context(chip, auth->handle);
|
||||
memzero_explicit(auth, sizeof(*auth));
|
||||
|
||||
kfree_sensitive(auth);
|
||||
chip->auth = NULL;
|
||||
} else {
|
||||
/* reset for next use */
|
||||
auth->session = TPM_HEADER_SIZE;
|
||||
@ -881,7 +886,8 @@ void tpm2_end_auth_session(struct tpm_chip *chip)
|
||||
return;
|
||||
|
||||
tpm2_flush_context(chip, auth->handle);
|
||||
memzero_explicit(auth, sizeof(*auth));
|
||||
kfree_sensitive(auth);
|
||||
chip->auth = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(tpm2_end_auth_session);
|
||||
|
||||
@ -915,33 +921,37 @@ static int tpm2_parse_start_auth_session(struct tpm2_auth *auth,
|
||||
|
||||
static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key)
|
||||
{
|
||||
int rc;
|
||||
unsigned int offset = 0; /* dummy offset for null seed context */
|
||||
u8 name[SHA256_DIGEST_SIZE + 2];
|
||||
u32 tmp_null_key;
|
||||
int rc;
|
||||
|
||||
rc = tpm2_load_context(chip, chip->null_key_context, &offset,
|
||||
null_key);
|
||||
if (rc != -EINVAL)
|
||||
return rc;
|
||||
&tmp_null_key);
|
||||
if (rc != -EINVAL) {
|
||||
if (!rc)
|
||||
*null_key = tmp_null_key;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* an integrity failure may mean the TPM has been reset */
|
||||
dev_err(&chip->dev, "NULL key integrity failure!\n");
|
||||
/* check the null name against what we know */
|
||||
tpm2_create_primary(chip, TPM2_RH_NULL, NULL, name);
|
||||
if (memcmp(name, chip->null_key_name, sizeof(name)) == 0)
|
||||
/* name unchanged, assume transient integrity failure */
|
||||
return rc;
|
||||
/*
|
||||
* Fatal TPM failure: the NULL seed has actually changed, so
|
||||
* the TPM must have been illegally reset. All in-kernel TPM
|
||||
* operations will fail because the NULL primary can't be
|
||||
* loaded to salt the sessions, but disable the TPM anyway so
|
||||
* userspace programmes can't be compromised by it.
|
||||
*/
|
||||
dev_err(&chip->dev, "NULL name has changed, disabling TPM due to interference\n");
|
||||
/* Try to re-create null key, given the integrity failure: */
|
||||
rc = tpm2_create_primary(chip, TPM2_RH_NULL, &tmp_null_key, name);
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
/* Return null key if the name has not been changed: */
|
||||
if (!memcmp(name, chip->null_key_name, sizeof(name))) {
|
||||
*null_key = tmp_null_key;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Deduce from the name change TPM interference: */
|
||||
dev_err(&chip->dev, "null key integrity check failed\n");
|
||||
tpm2_flush_context(chip, tmp_null_key);
|
||||
chip->flags |= TPM_CHIP_FLAG_DISABLE;
|
||||
|
||||
return rc;
|
||||
err:
|
||||
return rc ? -ENODEV : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -958,16 +968,20 @@ static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key)
|
||||
*/
|
||||
int tpm2_start_auth_session(struct tpm_chip *chip)
|
||||
{
|
||||
struct tpm2_auth *auth;
|
||||
struct tpm_buf buf;
|
||||
struct tpm2_auth *auth = chip->auth;
|
||||
int rc;
|
||||
u32 null_key;
|
||||
int rc;
|
||||
|
||||
if (!auth) {
|
||||
dev_warn_once(&chip->dev, "auth session is not active\n");
|
||||
if (chip->auth) {
|
||||
dev_warn_once(&chip->dev, "auth session is active\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
auth = kzalloc(sizeof(*auth), GFP_KERNEL);
|
||||
if (!auth)
|
||||
return -ENOMEM;
|
||||
|
||||
rc = tpm2_load_null(chip, &null_key);
|
||||
if (rc)
|
||||
goto out;
|
||||
@ -988,7 +1002,7 @@ int tpm2_start_auth_session(struct tpm_chip *chip)
|
||||
tpm_buf_append(&buf, auth->our_nonce, sizeof(auth->our_nonce));
|
||||
|
||||
/* append encrypted salt and squirrel away unencrypted in auth */
|
||||
tpm_buf_append_salt(&buf, chip);
|
||||
tpm_buf_append_salt(&buf, chip, auth);
|
||||
/* session type (HMAC, audit or policy) */
|
||||
tpm_buf_append_u8(&buf, TPM2_SE_HMAC);
|
||||
|
||||
@ -1010,10 +1024,13 @@ int tpm2_start_auth_session(struct tpm_chip *chip)
|
||||
|
||||
tpm_buf_destroy(&buf);
|
||||
|
||||
if (rc)
|
||||
goto out;
|
||||
if (rc == TPM2_RC_SUCCESS) {
|
||||
chip->auth = auth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
out:
|
||||
out:
|
||||
kfree_sensitive(auth);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(tpm2_start_auth_session);
|
||||
@ -1347,18 +1364,21 @@ static int tpm2_create_null_primary(struct tpm_chip *chip)
|
||||
*
|
||||
* Derive and context save the null primary and allocate memory in the
|
||||
* struct tpm_chip for the authorizations.
|
||||
*
|
||||
* Return:
|
||||
* * 0 - OK
|
||||
* * -errno - A system error
|
||||
* * TPM_RC - A TPM error
|
||||
*/
|
||||
int tpm2_sessions_init(struct tpm_chip *chip)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = tpm2_create_null_primary(chip);
|
||||
if (rc)
|
||||
dev_err(&chip->dev, "TPM: security failed (NULL seed derivation): %d\n", rc);
|
||||
|
||||
chip->auth = kmalloc(sizeof(*chip->auth), GFP_KERNEL);
|
||||
if (!chip->auth)
|
||||
return -ENOMEM;
|
||||
if (rc) {
|
||||
dev_err(&chip->dev, "null key creation failed with %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -60,6 +60,7 @@ config CXL_ACPI
|
||||
default CXL_BUS
|
||||
select ACPI_TABLE_LIB
|
||||
select ACPI_HMAT
|
||||
select CXL_PORT
|
||||
help
|
||||
Enable support for host managed device memory (HDM) resources
|
||||
published by a platform's ACPI CXL memory layout description. See
|
||||
|
@ -1,13 +1,21 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# Order is important here for the built-in case:
|
||||
# - 'core' first for fundamental init
|
||||
# - 'port' before platform root drivers like 'acpi' so that CXL-root ports
|
||||
# are immediately enabled
|
||||
# - 'mem' and 'pmem' before endpoint drivers so that memdevs are
|
||||
# immediately enabled
|
||||
# - 'pci' last, also mirrors the hardware enumeration hierarchy
|
||||
obj-y += core/
|
||||
obj-$(CONFIG_CXL_PCI) += cxl_pci.o
|
||||
obj-$(CONFIG_CXL_MEM) += cxl_mem.o
|
||||
obj-$(CONFIG_CXL_PORT) += cxl_port.o
|
||||
obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o
|
||||
obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o
|
||||
obj-$(CONFIG_CXL_PORT) += cxl_port.o
|
||||
obj-$(CONFIG_CXL_MEM) += cxl_mem.o
|
||||
obj-$(CONFIG_CXL_PCI) += cxl_pci.o
|
||||
|
||||
cxl_mem-y := mem.o
|
||||
cxl_pci-y := pci.o
|
||||
cxl_port-y := port.o
|
||||
cxl_acpi-y := acpi.o
|
||||
cxl_pmem-y := pmem.o security.o
|
||||
cxl_port-y := port.o
|
||||
cxl_mem-y := mem.o
|
||||
cxl_pci-y := pci.o
|
||||
|
@ -924,6 +924,13 @@ static void __exit cxl_acpi_exit(void)
|
||||
|
||||
/* load before dax_hmem sees 'Soft Reserved' CXL ranges */
|
||||
subsys_initcall(cxl_acpi_init);
|
||||
|
||||
/*
|
||||
* Arrange for host-bridge ports to be active synchronous with
|
||||
* cxl_acpi_probe() exit.
|
||||
*/
|
||||
MODULE_SOFTDEP("pre: cxl_port");
|
||||
|
||||
module_exit(cxl_acpi_exit);
|
||||
MODULE_DESCRIPTION("CXL ACPI: Platform Support");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
@ -641,6 +641,9 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr,
|
||||
void *ptr;
|
||||
int rc;
|
||||
|
||||
if (!dev_is_pci(cxlds->dev))
|
||||
return -ENODEV;
|
||||
|
||||
if (cxlds->rcd)
|
||||
return -ENODEV;
|
||||
|
||||
|
@ -712,7 +712,44 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cxl_decoder_reset(struct cxl_decoder *cxld)
|
||||
static int commit_reap(struct device *dev, const void *data)
|
||||
{
|
||||
struct cxl_port *port = to_cxl_port(dev->parent);
|
||||
struct cxl_decoder *cxld;
|
||||
|
||||
if (!is_switch_decoder(dev) && !is_endpoint_decoder(dev))
|
||||
return 0;
|
||||
|
||||
cxld = to_cxl_decoder(dev);
|
||||
if (port->commit_end == cxld->id &&
|
||||
((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
|
||||
port->commit_end--;
|
||||
dev_dbg(&port->dev, "reap: %s commit_end: %d\n",
|
||||
dev_name(&cxld->dev), port->commit_end);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void cxl_port_commit_reap(struct cxl_decoder *cxld)
|
||||
{
|
||||
struct cxl_port *port = to_cxl_port(cxld->dev.parent);
|
||||
|
||||
lockdep_assert_held_write(&cxl_region_rwsem);
|
||||
|
||||
/*
|
||||
* Once the highest committed decoder is disabled, free any other
|
||||
* decoders that were pinned allocated by out-of-order release.
|
||||
*/
|
||||
port->commit_end--;
|
||||
dev_dbg(&port->dev, "reap: %s commit_end: %d\n", dev_name(&cxld->dev),
|
||||
port->commit_end);
|
||||
device_for_each_child_reverse_from(&port->dev, &cxld->dev, NULL,
|
||||
commit_reap);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(cxl_port_commit_reap, CXL);
|
||||
|
||||
static void cxl_decoder_reset(struct cxl_decoder *cxld)
|
||||
{
|
||||
struct cxl_port *port = to_cxl_port(cxld->dev.parent);
|
||||
struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
|
||||
@ -721,14 +758,14 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld)
|
||||
u32 ctrl;
|
||||
|
||||
if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
|
||||
return 0;
|
||||
return;
|
||||
|
||||
if (port->commit_end != id) {
|
||||
if (port->commit_end == id)
|
||||
cxl_port_commit_reap(cxld);
|
||||
else
|
||||
dev_dbg(&port->dev,
|
||||
"%s: out of order reset, expected decoder%d.%d\n",
|
||||
dev_name(&cxld->dev), port->id, port->commit_end);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
down_read(&cxl_dpa_rwsem);
|
||||
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
|
||||
@ -741,7 +778,6 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld)
|
||||
writel(0, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id));
|
||||
up_read(&cxl_dpa_rwsem);
|
||||
|
||||
port->commit_end--;
|
||||
cxld->flags &= ~CXL_DECODER_F_ENABLE;
|
||||
|
||||
/* Userspace is now responsible for reconfiguring this decoder */
|
||||
@ -751,8 +787,6 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld)
|
||||
cxled = to_cxl_endpoint_decoder(&cxld->dev);
|
||||
cxled->state = CXL_DECODER_STATE_MANUAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cxl_setup_hdm_decoder_from_dvsec(
|
||||
|
@ -2084,11 +2084,18 @@ static void cxl_bus_remove(struct device *dev)
|
||||
|
||||
static struct workqueue_struct *cxl_bus_wq;
|
||||
|
||||
static int cxl_rescan_attach(struct device *dev, void *data)
|
||||
{
|
||||
int rc = device_attach(dev);
|
||||
|
||||
dev_vdbg(dev, "rescan: %s\n", rc ? "attach" : "detached");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cxl_bus_rescan_queue(struct work_struct *w)
|
||||
{
|
||||
int rc = bus_rescan_devices(&cxl_bus_type);
|
||||
|
||||
pr_debug("CXL bus rescan result: %d\n", rc);
|
||||
bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_rescan_attach);
|
||||
}
|
||||
|
||||
void cxl_bus_rescan(void)
|
||||
|
@ -232,8 +232,8 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
|
||||
"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
|
||||
return 0;
|
||||
} else {
|
||||
dev_err(&cxlr->dev,
|
||||
"Failed to synchronize CPU cache state\n");
|
||||
dev_WARN(&cxlr->dev,
|
||||
"Failed to synchronize CPU cache state\n");
|
||||
return -ENXIO;
|
||||
}
|
||||
}
|
||||
@ -242,19 +242,17 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
|
||||
static void cxl_region_decode_reset(struct cxl_region *cxlr, int count)
|
||||
{
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
int i, rc = 0;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Before region teardown attempt to flush, and if the flush
|
||||
* fails cancel the region teardown for data consistency
|
||||
* concerns
|
||||
* Before region teardown attempt to flush, evict any data cached for
|
||||
* this region, or scream loudly about missing arch / platform support
|
||||
* for CXL teardown.
|
||||
*/
|
||||
rc = cxl_region_invalidate_memregion(cxlr);
|
||||
if (rc)
|
||||
return rc;
|
||||
cxl_region_invalidate_memregion(cxlr);
|
||||
|
||||
for (i = count - 1; i >= 0; i--) {
|
||||
struct cxl_endpoint_decoder *cxled = p->targets[i];
|
||||
@ -277,23 +275,17 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
|
||||
cxl_rr = cxl_rr_load(iter, cxlr);
|
||||
cxld = cxl_rr->decoder;
|
||||
if (cxld->reset)
|
||||
rc = cxld->reset(cxld);
|
||||
if (rc)
|
||||
return rc;
|
||||
cxld->reset(cxld);
|
||||
set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
|
||||
}
|
||||
|
||||
endpoint_reset:
|
||||
rc = cxled->cxld.reset(&cxled->cxld);
|
||||
if (rc)
|
||||
return rc;
|
||||
cxled->cxld.reset(&cxled->cxld);
|
||||
set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
|
||||
}
|
||||
|
||||
/* all decoders associated with this region have been torn down */
|
||||
clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int commit_decoder(struct cxl_decoder *cxld)
|
||||
@ -409,16 +401,8 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
|
||||
* still pending.
|
||||
*/
|
||||
if (p->state == CXL_CONFIG_RESET_PENDING) {
|
||||
rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
|
||||
/*
|
||||
* Revert to committed since there may still be active
|
||||
* decoders associated with this region, or move forward
|
||||
* to active to mark the reset successful
|
||||
*/
|
||||
if (rc)
|
||||
p->state = CXL_CONFIG_COMMIT;
|
||||
else
|
||||
p->state = CXL_CONFIG_ACTIVE;
|
||||
cxl_region_decode_reset(cxlr, p->interleave_ways);
|
||||
p->state = CXL_CONFIG_ACTIVE;
|
||||
}
|
||||
}
|
||||
|
||||
@ -794,26 +778,50 @@ static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int check_commit_order(struct device *dev, const void *data)
|
||||
{
|
||||
struct cxl_decoder *cxld = to_cxl_decoder(dev);
|
||||
|
||||
/*
|
||||
* if port->commit_end is not the only free decoder, then out of
|
||||
* order shutdown has occurred, block further allocations until
|
||||
* that is resolved
|
||||
*/
|
||||
if (((cxld->flags & CXL_DECODER_F_ENABLE) == 0))
|
||||
return -EBUSY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int match_free_decoder(struct device *dev, void *data)
|
||||
{
|
||||
struct cxl_port *port = to_cxl_port(dev->parent);
|
||||
struct cxl_decoder *cxld;
|
||||
int *id = data;
|
||||
int rc;
|
||||
|
||||
if (!is_switch_decoder(dev))
|
||||
return 0;
|
||||
|
||||
cxld = to_cxl_decoder(dev);
|
||||
|
||||
/* enforce ordered allocation */
|
||||
if (cxld->id != *id)
|
||||
if (cxld->id != port->commit_end + 1)
|
||||
return 0;
|
||||
|
||||
if (!cxld->region)
|
||||
return 1;
|
||||
if (cxld->region) {
|
||||
dev_dbg(dev->parent,
|
||||
"next decoder to commit (%s) is already reserved (%s)\n",
|
||||
dev_name(dev), dev_name(&cxld->region->dev));
|
||||
return 0;
|
||||
}
|
||||
|
||||
(*id)++;
|
||||
|
||||
return 0;
|
||||
rc = device_for_each_child_reverse_from(dev->parent, dev, NULL,
|
||||
check_commit_order);
|
||||
if (rc) {
|
||||
dev_dbg(dev->parent,
|
||||
"unable to allocate %s due to out of order shutdown\n",
|
||||
dev_name(dev));
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int match_auto_decoder(struct device *dev, void *data)
|
||||
@ -840,7 +848,6 @@ cxl_region_find_decoder(struct cxl_port *port,
|
||||
struct cxl_region *cxlr)
|
||||
{
|
||||
struct device *dev;
|
||||
int id = 0;
|
||||
|
||||
if (port == cxled_to_port(cxled))
|
||||
return &cxled->cxld;
|
||||
@ -849,7 +856,7 @@ cxl_region_find_decoder(struct cxl_port *port,
|
||||
dev = device_find_child(&port->dev, &cxlr->params,
|
||||
match_auto_decoder);
|
||||
else
|
||||
dev = device_find_child(&port->dev, &id, match_free_decoder);
|
||||
dev = device_find_child(&port->dev, NULL, match_free_decoder);
|
||||
if (!dev)
|
||||
return NULL;
|
||||
/*
|
||||
@ -2054,13 +2061,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
|
||||
get_device(&cxlr->dev);
|
||||
|
||||
if (p->state > CXL_CONFIG_ACTIVE) {
|
||||
/*
|
||||
* TODO: tear down all impacted regions if a device is
|
||||
* removed out of order
|
||||
*/
|
||||
rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
|
||||
if (rc)
|
||||
goto out;
|
||||
cxl_region_decode_reset(cxlr, p->interleave_ways);
|
||||
p->state = CXL_CONFIG_ACTIVE;
|
||||
}
|
||||
|
||||
|
@ -279,7 +279,7 @@ TRACE_EVENT(cxl_generic_event,
|
||||
#define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR 0x00
|
||||
#define CXL_GMER_MEM_EVT_TYPE_INV_ADDR 0x01
|
||||
#define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x02
|
||||
#define show_mem_event_type(type) __print_symbolic(type, \
|
||||
#define show_gmer_mem_event_type(type) __print_symbolic(type, \
|
||||
{ CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \
|
||||
{ CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \
|
||||
{ CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \
|
||||
@ -373,7 +373,7 @@ TRACE_EVENT(cxl_general_media,
|
||||
"hpa=%llx region=%s region_uuid=%pUb",
|
||||
__entry->dpa, show_dpa_flags(__entry->dpa_flags),
|
||||
show_event_desc_flags(__entry->descriptor),
|
||||
show_mem_event_type(__entry->type),
|
||||
show_gmer_mem_event_type(__entry->type),
|
||||
show_trans_type(__entry->transaction_type),
|
||||
__entry->channel, __entry->rank, __entry->device,
|
||||
__print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
|
||||
@ -391,6 +391,17 @@ TRACE_EVENT(cxl_general_media,
|
||||
* DRAM Event Record defines many fields the same as the General Media Event
|
||||
* Record. Reuse those definitions as appropriate.
|
||||
*/
|
||||
#define CXL_DER_MEM_EVT_TYPE_ECC_ERROR 0x00
|
||||
#define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR 0x01
|
||||
#define CXL_DER_MEM_EVT_TYPE_INV_ADDR 0x02
|
||||
#define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x03
|
||||
#define show_dram_mem_event_type(type) __print_symbolic(type, \
|
||||
{ CXL_DER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \
|
||||
{ CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \
|
||||
{ CXL_DER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \
|
||||
{ CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \
|
||||
)
|
||||
|
||||
#define CXL_DER_VALID_CHANNEL BIT(0)
|
||||
#define CXL_DER_VALID_RANK BIT(1)
|
||||
#define CXL_DER_VALID_NIBBLE BIT(2)
|
||||
@ -477,7 +488,7 @@ TRACE_EVENT(cxl_dram,
|
||||
"hpa=%llx region=%s region_uuid=%pUb",
|
||||
__entry->dpa, show_dpa_flags(__entry->dpa_flags),
|
||||
show_event_desc_flags(__entry->descriptor),
|
||||
show_mem_event_type(__entry->type),
|
||||
show_dram_mem_event_type(__entry->type),
|
||||
show_trans_type(__entry->transaction_type),
|
||||
__entry->channel, __entry->rank, __entry->nibble_mask,
|
||||
__entry->bank_group, __entry->bank,
|
||||
|
@ -359,7 +359,7 @@ struct cxl_decoder {
|
||||
struct cxl_region *region;
|
||||
unsigned long flags;
|
||||
int (*commit)(struct cxl_decoder *cxld);
|
||||
int (*reset)(struct cxl_decoder *cxld);
|
||||
void (*reset)(struct cxl_decoder *cxld);
|
||||
};
|
||||
|
||||
/*
|
||||
@ -730,6 +730,7 @@ static inline bool is_cxl_root(struct cxl_port *port)
|
||||
int cxl_num_decoders_committed(struct cxl_port *port);
|
||||
bool is_cxl_port(const struct device *dev);
|
||||
struct cxl_port *to_cxl_port(const struct device *dev);
|
||||
void cxl_port_commit_reap(struct cxl_decoder *cxld);
|
||||
struct pci_bus;
|
||||
int devm_cxl_register_pci_bus(struct device *host, struct device *uport_dev,
|
||||
struct pci_bus *bus);
|
||||
|
@ -208,7 +208,22 @@ static struct cxl_driver cxl_port_driver = {
|
||||
},
|
||||
};
|
||||
|
||||
module_cxl_driver(cxl_port_driver);
|
||||
static int __init cxl_port_init(void)
|
||||
{
|
||||
return cxl_driver_register(&cxl_port_driver);
|
||||
}
|
||||
/*
|
||||
* Be ready to immediately enable ports emitted by the platform CXL root
|
||||
* (e.g. cxl_acpi) when CONFIG_CXL_PORT=y.
|
||||
*/
|
||||
subsys_initcall(cxl_port_init);
|
||||
|
||||
static void __exit cxl_port_exit(void)
|
||||
{
|
||||
cxl_driver_unregister(&cxl_port_driver);
|
||||
}
|
||||
module_exit(cxl_port_exit);
|
||||
|
||||
MODULE_DESCRIPTION("CXL: Port enumeration and services");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_IMPORT_NS(CXL);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user