mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-17 02:36:21 +00:00
Merge branch 'mana-shared' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Leon Romanovsky says: ==================== net: mana: Allow variable size indirection table Like we talked, I created new shared branch for this patch: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/log/?h=mana-shared * 'mana-shared' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: net: mana: Allow variable size indirection table ==================== Link: https://lore.kernel.org/all/20240612183051.GE4966@unreal Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
commit
ef5513526b
9
.mailmap
9
.mailmap
@ -337,10 +337,11 @@ Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
|
||||
Karthikeyan Periyasamy <quic_periyasa@quicinc.com> <periyasa@codeaurora.org>
|
||||
Kathiravan T <quic_kathirav@quicinc.com> <kathirav@codeaurora.org>
|
||||
Kay Sievers <kay.sievers@vrfy.org>
|
||||
Kees Cook <keescook@chromium.org> <kees.cook@canonical.com>
|
||||
Kees Cook <keescook@chromium.org> <keescook@google.com>
|
||||
Kees Cook <keescook@chromium.org> <kees@outflux.net>
|
||||
Kees Cook <keescook@chromium.org> <kees@ubuntu.com>
|
||||
Kees Cook <kees@kernel.org> <kees.cook@canonical.com>
|
||||
Kees Cook <kees@kernel.org> <keescook@chromium.org>
|
||||
Kees Cook <kees@kernel.org> <keescook@google.com>
|
||||
Kees Cook <kees@kernel.org> <kees@outflux.net>
|
||||
Kees Cook <kees@kernel.org> <kees@ubuntu.com>
|
||||
Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
|
||||
Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
|
||||
Kenneth W Chen <kenneth.w.chen@intel.com>
|
||||
|
@ -9,8 +9,8 @@ TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
|
||||
|
||||
LiveCD-based tutorials are available at
|
||||
|
||||
http://tomoyo.sourceforge.jp/1.8/ubuntu12.04-live.html
|
||||
http://tomoyo.sourceforge.jp/1.8/centos6-live.html
|
||||
https://tomoyo.sourceforge.net/1.8/ubuntu12.04-live.html
|
||||
https://tomoyo.sourceforge.net/1.8/centos6-live.html
|
||||
|
||||
Though these tutorials use non-LSM version of TOMOYO, they are useful for you
|
||||
to know what TOMOYO is.
|
||||
@ -21,45 +21,32 @@ How to enable TOMOYO?
|
||||
Build the kernel with ``CONFIG_SECURITY_TOMOYO=y`` and pass ``security=tomoyo`` on
|
||||
kernel's command line.
|
||||
|
||||
Please see http://tomoyo.osdn.jp/2.5/ for details.
|
||||
Please see https://tomoyo.sourceforge.net/2.6/ for details.
|
||||
|
||||
Where is documentation?
|
||||
=======================
|
||||
|
||||
User <-> Kernel interface documentation is available at
|
||||
https://tomoyo.osdn.jp/2.5/policy-specification/index.html .
|
||||
https://tomoyo.sourceforge.net/2.6/policy-specification/index.html .
|
||||
|
||||
Materials we prepared for seminars and symposiums are available at
|
||||
https://osdn.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
|
||||
https://sourceforge.net/projects/tomoyo/files/docs/ .
|
||||
Below lists are chosen from three aspects.
|
||||
|
||||
What is TOMOYO?
|
||||
TOMOYO Linux Overview
|
||||
https://osdn.jp/projects/tomoyo/docs/lca2009-takeda.pdf
|
||||
https://sourceforge.net/projects/tomoyo/files/docs/lca2009-takeda.pdf
|
||||
TOMOYO Linux: pragmatic and manageable security for Linux
|
||||
https://osdn.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
|
||||
https://sourceforge.net/projects/tomoyo/files/docs/freedomhectaipei-tomoyo.pdf
|
||||
TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
|
||||
https://osdn.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
|
||||
https://sourceforge.net/projects/tomoyo/files/docs/PacSec2007-en-no-demo.pdf
|
||||
|
||||
What can TOMOYO do?
|
||||
Deep inside TOMOYO Linux
|
||||
https://osdn.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
|
||||
https://sourceforge.net/projects/tomoyo/files/docs/lca2009-kumaneko.pdf
|
||||
The role of "pathname based access control" in security.
|
||||
https://osdn.jp/projects/tomoyo/docs/lfj2008-bof.pdf
|
||||
https://sourceforge.net/projects/tomoyo/files/docs/lfj2008-bof.pdf
|
||||
|
||||
History of TOMOYO?
|
||||
Realities of Mainlining
|
||||
https://osdn.jp/projects/tomoyo/docs/lfj2008.pdf
|
||||
|
||||
What is future plan?
|
||||
====================
|
||||
|
||||
We believe that inode based security and name based security are complementary
|
||||
and both should be used together. But unfortunately, so far, we cannot enable
|
||||
multiple LSM modules at the same time. We feel sorry that you have to give up
|
||||
SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
|
||||
|
||||
We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
|
||||
version of TOMOYO, available at http://tomoyo.osdn.jp/1.8/ .
|
||||
LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
|
||||
to port non-LSM version's functionalities to LSM versions.
|
||||
https://sourceforge.net/projects/tomoyo/files/docs/lfj2008.pdf
|
||||
|
@ -1921,6 +1921,28 @@
|
||||
Format:
|
||||
<bus_id>,<clkrate>
|
||||
|
||||
i2c_touchscreen_props= [HW,ACPI,X86]
|
||||
Set device-properties for ACPI-enumerated I2C-attached
|
||||
touchscreen, to e.g. fix coordinates of upside-down
|
||||
mounted touchscreens. If you need this option please
|
||||
submit a drivers/platform/x86/touchscreen_dmi.c patch
|
||||
adding a DMI quirk for this.
|
||||
|
||||
Format:
|
||||
<ACPI_HW_ID>:<prop_name>=<val>[:prop_name=val][:...]
|
||||
Where <val> is one of:
|
||||
Omit "=<val>" entirely Set a boolean device-property
|
||||
Unsigned number Set a u32 device-property
|
||||
Anything else Set a string device-property
|
||||
|
||||
Examples (split over multiple lines):
|
||||
i2c_touchscreen_props=GDIX1001:touchscreen-inverted-x:
|
||||
touchscreen-inverted-y
|
||||
|
||||
i2c_touchscreen_props=MSSL1680:touchscreen-size-x=1920:
|
||||
touchscreen-size-y=1080:touchscreen-inverted-y:
|
||||
firmware-name=gsl1680-vendor-model.fw:silead,home-button
|
||||
|
||||
i8042.debug [HW] Toggle i8042 debug mode
|
||||
i8042.unmask_kbd_data
|
||||
[HW] Enable printing of interrupt data from the KBD port
|
||||
|
@ -467,11 +467,11 @@ anon_fault_fallback_charge
|
||||
instead falls back to using huge pages with lower orders or
|
||||
small pages even though the allocation was successful.
|
||||
|
||||
anon_swpout
|
||||
swpout
|
||||
is incremented every time a huge page is swapped out in one
|
||||
piece without splitting.
|
||||
|
||||
anon_swpout_fallback
|
||||
swpout_fallback
|
||||
is incremented if a huge page has to be split before swapout.
|
||||
Usually because failed to allocate some continuous swap space
|
||||
for the huge page.
|
||||
|
@ -65,4 +65,6 @@ the extension, or may have deliberately removed it from the listing.
|
||||
Misaligned accesses
|
||||
-------------------
|
||||
|
||||
Misaligned accesses are supported in userspace, but they may perform poorly.
|
||||
Misaligned scalar accesses are supported in userspace, but they may perform
|
||||
poorly. Misaligned vector accesses are only supported if the Zicclsm extension
|
||||
is supported.
|
||||
|
@ -217,7 +217,7 @@ current *struct* is::
|
||||
int (*media_changed)(struct cdrom_device_info *, int);
|
||||
int (*tray_move)(struct cdrom_device_info *, int);
|
||||
int (*lock_door)(struct cdrom_device_info *, int);
|
||||
int (*select_speed)(struct cdrom_device_info *, int);
|
||||
int (*select_speed)(struct cdrom_device_info *, unsigned long);
|
||||
int (*get_last_session) (struct cdrom_device_info *,
|
||||
struct cdrom_multisession *);
|
||||
int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
|
||||
@ -396,7 +396,7 @@ action need be taken, and the return value should be 0.
|
||||
|
||||
::
|
||||
|
||||
int select_speed(struct cdrom_device_info *cdi, int speed)
|
||||
int select_speed(struct cdrom_device_info *cdi, unsigned long speed)
|
||||
|
||||
Some CD-ROM drives are capable of changing their head-speed. There
|
||||
are several reasons for changing the speed of a CD-ROM drive. Badly
|
||||
|
@ -192,7 +192,7 @@ alignment larger than PAGE_SIZE.
|
||||
|
||||
Dynamic swiotlb
|
||||
---------------
|
||||
When CONFIG_DYNAMIC_SWIOTLB is enabled, swiotlb can do on-demand expansion of
|
||||
When CONFIG_SWIOTLB_DYNAMIC is enabled, swiotlb can do on-demand expansion of
|
||||
the amount of memory available for allocation as bounce buffers. If a bounce
|
||||
buffer request fails due to lack of available space, an asynchronous background
|
||||
task is kicked off to allocate memory from general system memory and turn it
|
||||
|
@ -54,11 +54,10 @@ unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
mlahb: ahb@38000000 {
|
||||
ahb {
|
||||
compatible = "st,mlahb", "simple-bus";
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
reg = <0x10000000 0x40000>;
|
||||
ranges;
|
||||
dma-ranges = <0x00000000 0x38000000 0x10000>,
|
||||
<0x10000000 0x10000000 0x60000>,
|
||||
|
@ -57,17 +57,17 @@ properties:
|
||||
- const: allwinner,sun8i-v3s
|
||||
|
||||
- description: Anbernic RG35XX (2024)
|
||||
- items:
|
||||
items:
|
||||
- const: anbernic,rg35xx-2024
|
||||
- const: allwinner,sun50i-h700
|
||||
|
||||
- description: Anbernic RG35XX Plus
|
||||
- items:
|
||||
items:
|
||||
- const: anbernic,rg35xx-plus
|
||||
- const: allwinner,sun50i-h700
|
||||
|
||||
- description: Anbernic RG35XX H
|
||||
- items:
|
||||
items:
|
||||
- const: anbernic,rg35xx-h
|
||||
- const: allwinner,sun50i-h700
|
||||
|
||||
|
@ -18,9 +18,12 @@ allOf:
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- elan,ekth6915
|
||||
- ilitek,ili2901
|
||||
oneOf:
|
||||
- items:
|
||||
- enum:
|
||||
- elan,ekth5015m
|
||||
- const: elan,ekth6915
|
||||
- const: elan,ekth6915
|
||||
|
||||
reg:
|
||||
const: 0x10
|
||||
@ -33,6 +36,12 @@ properties:
|
||||
reset-gpios:
|
||||
description: Reset GPIO; not all touchscreens using eKTH6915 hook this up.
|
||||
|
||||
no-reset-on-power-off:
|
||||
type: boolean
|
||||
description:
|
||||
Reset line is wired so that it can (and should) be left deasserted when
|
||||
the power supply is off.
|
||||
|
||||
vcc33-supply:
|
||||
description: The 3.3V supply to the touchscreen.
|
||||
|
||||
@ -58,8 +67,8 @@ examples:
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
ap_ts: touchscreen@10 {
|
||||
compatible = "elan,ekth6915";
|
||||
touchscreen@10 {
|
||||
compatible = "elan,ekth5015m", "elan,ekth6915";
|
||||
reg = <0x10>;
|
||||
|
||||
interrupt-parent = <&tlmm>;
|
||||
|
66
Documentation/devicetree/bindings/input/ilitek,ili2901.yaml
Normal file
66
Documentation/devicetree/bindings/input/ilitek,ili2901.yaml
Normal file
@ -0,0 +1,66 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/input/ilitek,ili2901.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Ilitek ILI2901 touchscreen controller
|
||||
|
||||
maintainers:
|
||||
- Jiri Kosina <jkosina@suse.com>
|
||||
|
||||
description:
|
||||
Supports the Ilitek ILI2901 touchscreen controller.
|
||||
This touchscreen controller uses the i2c-hid protocol with a reset GPIO.
|
||||
|
||||
allOf:
|
||||
- $ref: /schemas/input/touchscreen/touchscreen.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- ilitek,ili2901
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
panel: true
|
||||
|
||||
reset-gpios:
|
||||
maxItems: 1
|
||||
|
||||
vcc33-supply: true
|
||||
|
||||
vccio-supply: true
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- interrupts
|
||||
- vcc33-supply
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/gpio/gpio.h>
|
||||
#include <dt-bindings/interrupt-controller/irq.h>
|
||||
|
||||
i2c {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
touchscreen@41 {
|
||||
compatible = "ilitek,ili2901";
|
||||
reg = <0x41>;
|
||||
|
||||
interrupt-parent = <&tlmm>;
|
||||
interrupts = <9 IRQ_TYPE_LEVEL_LOW>;
|
||||
|
||||
reset-gpios = <&tlmm 8 GPIO_ACTIVE_LOW>;
|
||||
vcc33-supply = <&pp3300_ts>;
|
||||
};
|
||||
};
|
@ -24,6 +24,7 @@ properties:
|
||||
|
||||
managers:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
description:
|
||||
List of the PD69208T4/PD69204T4/PD69208M PSE managers. Each manager
|
||||
have 4 or 8 physical ports according to the chip version. No need to
|
||||
@ -47,8 +48,9 @@ properties:
|
||||
- "#size-cells"
|
||||
|
||||
patternProperties:
|
||||
"^manager@0[0-9a-b]$":
|
||||
"^manager@[0-9a-b]$":
|
||||
type: object
|
||||
additionalProperties: false
|
||||
description:
|
||||
PD69208T4/PD69204T4/PD69208M PSE manager exposing 4 or 8 physical
|
||||
ports.
|
||||
@ -69,9 +71,14 @@ properties:
|
||||
patternProperties:
|
||||
'^port@[0-7]$':
|
||||
type: object
|
||||
additionalProperties: false
|
||||
|
||||
properties:
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- reg
|
||||
additionalProperties: false
|
||||
|
||||
required:
|
||||
- reg
|
||||
|
@ -29,13 +29,31 @@ properties:
|
||||
of the ports conversion matrix that establishes relationship between
|
||||
the logical ports and the physical channels.
|
||||
type: object
|
||||
additionalProperties: false
|
||||
|
||||
properties:
|
||||
"#address-cells":
|
||||
const: 1
|
||||
|
||||
"#size-cells":
|
||||
const: 0
|
||||
|
||||
patternProperties:
|
||||
'^channel@[0-7]$':
|
||||
type: object
|
||||
additionalProperties: false
|
||||
|
||||
properties:
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- reg
|
||||
|
||||
required:
|
||||
- "#address-cells"
|
||||
- "#size-cells"
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
required:
|
||||
|
@ -150,6 +150,12 @@ applicable everywhere (see syntax).
|
||||
That will limit the usefulness but on the other hand avoid
|
||||
the illegal configurations all over.
|
||||
|
||||
If "select" <symbol> is followed by "if" <expr>, <symbol> will be
|
||||
selected by the logical AND of the value of the current menu symbol
|
||||
and <expr>. This means, the lower limit can be downgraded due to the
|
||||
presence of "if" <expr>. This behavior may seem weird, but we rely on
|
||||
it. (The future of this behavior is undecided.)
|
||||
|
||||
- weak reverse dependencies: "imply" <symbol> ["if" <expr>]
|
||||
|
||||
This is similar to "select" as it enforces a lower limit on another
|
||||
@ -184,7 +190,7 @@ applicable everywhere (see syntax).
|
||||
ability to hook into a secondary subsystem while allowing the user to
|
||||
configure that subsystem out without also having to unset these drivers.
|
||||
|
||||
Note: If the combination of FOO=y and BAR=m causes a link error,
|
||||
Note: If the combination of FOO=y and BAZ=m causes a link error,
|
||||
you can guard the function call with IS_REACHABLE()::
|
||||
|
||||
foo_init()
|
||||
@ -202,6 +208,10 @@ applicable everywhere (see syntax).
|
||||
imply BAR
|
||||
imply BAZ
|
||||
|
||||
Note: If "imply" <symbol> is followed by "if" <expr>, the default of <symbol>
|
||||
will be the logical AND of the value of the current menu symbol and <expr>.
|
||||
(The future of this behavior is undecided.)
|
||||
|
||||
- limiting menu display: "visible if" <expr>
|
||||
|
||||
This attribute is only applicable to menu blocks, if the condition is
|
||||
|
@ -349,6 +349,10 @@ attribute-sets:
|
||||
Number of packets dropped due to transient lack of resources, such as
|
||||
buffer space, host descriptors etc.
|
||||
type: uint
|
||||
-
|
||||
name: rx-csum-complete
|
||||
doc: Number of packets that were marked as CHECKSUM_COMPLETE.
|
||||
type: uint
|
||||
-
|
||||
name: rx-csum-unnecessary
|
||||
doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
|
||||
|
@ -329,24 +329,23 @@ XDP_SHARED_UMEM option and provide the initial socket's fd in the
|
||||
sxdp_shared_umem_fd field as you registered the UMEM on that
|
||||
socket. These two sockets will now share one and the same UMEM.
|
||||
|
||||
In this case, it is possible to use the NIC's packet steering
|
||||
capabilities to steer the packets to the right queue. This is not
|
||||
possible in the previous example as there is only one queue shared
|
||||
among sockets, so the NIC cannot do this steering as it can only steer
|
||||
between queues.
|
||||
There is no need to supply an XDP program like the one in the previous
|
||||
case where sockets were bound to the same queue id and
|
||||
device. Instead, use the NIC's packet steering capabilities to steer
|
||||
the packets to the right queue. In the previous example, there is only
|
||||
one queue shared among sockets, so the NIC cannot do this steering. It
|
||||
can only steer between queues.
|
||||
|
||||
In libxdp (or libbpf prior to version 1.0), you need to use the
|
||||
xsk_socket__create_shared() API as it takes a reference to a FILL ring
|
||||
and a COMPLETION ring that will be created for you and bound to the
|
||||
shared UMEM. You can use this function for all the sockets you create,
|
||||
or you can use it for the second and following ones and use
|
||||
xsk_socket__create() for the first one. Both methods yield the same
|
||||
result.
|
||||
In libbpf, you need to use the xsk_socket__create_shared() API as it
|
||||
takes a reference to a FILL ring and a COMPLETION ring that will be
|
||||
created for you and bound to the shared UMEM. You can use this
|
||||
function for all the sockets you create, or you can use it for the
|
||||
second and following ones and use xsk_socket__create() for the first
|
||||
one. Both methods yield the same result.
|
||||
|
||||
Note that a UMEM can be shared between sockets on the same queue id
|
||||
and device, as well as between queues on the same device and between
|
||||
devices at the same time. It is also possible to redirect to any
|
||||
socket as long as it is bound to the same umem with XDP_SHARED_UMEM.
|
||||
devices at the same time.
|
||||
|
||||
XDP_USE_NEED_WAKEUP bind flag
|
||||
-----------------------------
|
||||
@ -823,10 +822,6 @@ A: The short answer is no, that is not supported at the moment. The
|
||||
switch, or other distribution mechanism, in your NIC to direct
|
||||
traffic to the correct queue id and socket.
|
||||
|
||||
Note that if you are using the XDP_SHARED_UMEM option, it is
|
||||
possible to switch traffic between any socket bound to the same
|
||||
umem.
|
||||
|
||||
Q: My packets are sometimes corrupted. What is wrong?
|
||||
|
||||
A: Care has to be taken not to feed the same buffer in the UMEM into
|
||||
|
@ -227,7 +227,7 @@ preferably including links to previous postings, for example::
|
||||
The amount of mooing will depend on packet rate so should match
|
||||
the diurnal cycle quite well.
|
||||
|
||||
Signed-of-by: Joe Defarmer <joe@barn.org>
|
||||
Signed-off-by: Joe Defarmer <joe@barn.org>
|
||||
---
|
||||
v3:
|
||||
- add a note about time-of-day mooing fluctuation to the commit message
|
||||
|
@ -582,7 +582,7 @@ depending on the hardware. In all cases, however, only routes that have the
|
||||
Devices generating the streams may allow enabling and disabling some of the
|
||||
routes or have a fixed routing configuration. If the routes can be disabled, not
|
||||
declaring the routes (or declaring them without
|
||||
``VIDIOC_SUBDEV_STREAM_FL_ACTIVE`` flag set) in ``VIDIOC_SUBDEV_S_ROUTING`` will
|
||||
``V4L2_SUBDEV_STREAM_FL_ACTIVE`` flag set) in ``VIDIOC_SUBDEV_S_ROUTING`` will
|
||||
disable the routes. ``VIDIOC_SUBDEV_S_ROUTING`` will still return such routes
|
||||
back to the user in the routes array, with the ``V4L2_SUBDEV_STREAM_FL_ACTIVE``
|
||||
flag unset.
|
||||
|
@ -1107,7 +1107,6 @@ L: linux-pm@vger.kernel.org
|
||||
S: Supported
|
||||
F: Documentation/admin-guide/pm/amd-pstate.rst
|
||||
F: drivers/cpufreq/amd-pstate*
|
||||
F: include/linux/amd-pstate.h
|
||||
F: tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
|
||||
|
||||
AMD PTDMA DRIVER
|
||||
@ -3854,6 +3853,7 @@ BPF JIT for ARM64
|
||||
M: Daniel Borkmann <daniel@iogearbox.net>
|
||||
M: Alexei Starovoitov <ast@kernel.org>
|
||||
M: Puranjay Mohan <puranjay@kernel.org>
|
||||
R: Xu Kuohai <xukuohai@huaweicloud.com>
|
||||
L: bpf@vger.kernel.org
|
||||
S: Supported
|
||||
F: arch/arm64/net/
|
||||
@ -5187,7 +5187,6 @@ F: Documentation/devicetree/bindings/media/i2c/chrontel,ch7322.yaml
|
||||
F: drivers/media/cec/i2c/ch7322.c
|
||||
|
||||
CIRRUS LOGIC AUDIO CODEC DRIVERS
|
||||
M: James Schulman <james.schulman@cirrus.com>
|
||||
M: David Rhodes <david.rhodes@cirrus.com>
|
||||
M: Richard Fitzgerald <rf@opensource.cirrus.com>
|
||||
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
|
||||
@ -15238,7 +15237,6 @@ F: drivers/staging/most/
|
||||
F: include/linux/most.h
|
||||
|
||||
MOTORCOMM PHY DRIVER
|
||||
M: Peter Geis <pgwipeout@gmail.com>
|
||||
M: Frank <Frank.Sae@motor-comm.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
@ -21316,7 +21314,7 @@ F: arch/riscv/boot/dts/starfive/
|
||||
|
||||
STARFIVE DWMAC GLUE LAYER
|
||||
M: Emil Renner Berthing <kernel@esmil.dk>
|
||||
M: Samin Guo <samin.guo@starfivetech.com>
|
||||
M: Minda Chen <minda.chen@starfivetech.com>
|
||||
S: Maintained
|
||||
F: Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
|
||||
F: drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
|
||||
@ -22679,7 +22677,7 @@ L: tomoyo-users-en@lists.osdn.me (subscribers-only, for users in English)
|
||||
L: tomoyo-dev@lists.osdn.me (subscribers-only, for developers in Japanese)
|
||||
L: tomoyo-users@lists.osdn.me (subscribers-only, for users in Japanese)
|
||||
S: Maintained
|
||||
W: https://tomoyo.osdn.jp/
|
||||
W: https://tomoyo.sourceforge.net/
|
||||
F: security/tomoyo/
|
||||
|
||||
TOPSTAR LAPTOP EXTRAS DRIVER
|
||||
|
2
Makefile
2
Makefile
@ -2,7 +2,7 @@
|
||||
VERSION = 6
|
||||
PATCHLEVEL = 10
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc1
|
||||
EXTRAVERSION = -rc3
|
||||
NAME = Baby Opossum Posse
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -39,7 +39,7 @@
|
||||
|
||||
/************** Functions that the back-end must provide **************/
|
||||
/* Extension for 32-bit operations. */
|
||||
inline u8 zext(u8 *buf, u8 rd);
|
||||
u8 zext(u8 *buf, u8 rd);
|
||||
/***** Moves *****/
|
||||
u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext);
|
||||
u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm);
|
||||
|
@ -62,7 +62,7 @@ enum {
|
||||
* If/when we decide to add ARCv2 instructions that do use register pairs,
|
||||
* the mapping, hopefully, doesn't need to be revisited.
|
||||
*/
|
||||
const u8 bpf2arc[][2] = {
|
||||
static const u8 bpf2arc[][2] = {
|
||||
/* Return value from in-kernel function, and exit value from eBPF */
|
||||
[BPF_REG_0] = {ARC_R_8, ARC_R_9},
|
||||
/* Arguments from eBPF program to in-kernel function */
|
||||
@ -1302,7 +1302,7 @@ static u8 arc_b(u8 *buf, s32 offset)
|
||||
|
||||
/************* Packers (Deal with BPF_REGs) **************/
|
||||
|
||||
inline u8 zext(u8 *buf, u8 rd)
|
||||
u8 zext(u8 *buf, u8 rd)
|
||||
{
|
||||
if (rd != BPF_REG_FP)
|
||||
return arc_movi_r(buf, REG_HI(rd), 0);
|
||||
@ -2235,6 +2235,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
|
||||
break;
|
||||
default:
|
||||
/* The caller must have handled this. */
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
@ -2253,6 +2254,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
|
||||
break;
|
||||
default:
|
||||
/* The caller must have handled this. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2517,7 +2519,7 @@ u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size)
|
||||
#define JCC64_NR_OF_JMPS 3 /* Number of jumps in jcc64 template. */
|
||||
#define JCC64_INSNS_TO_END 3 /* Number of insn. inclusive the 2nd jmp to end. */
|
||||
#define JCC64_SKIP_JMP 1 /* Index of the "skip" jump to "end". */
|
||||
const struct {
|
||||
static const struct {
|
||||
/*
|
||||
* "jit_off" is common between all "jmp[]" and is coupled with
|
||||
* "cond" of each "jmp[]" instance. e.g.:
|
||||
@ -2883,7 +2885,7 @@ u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off)
|
||||
* The "ARC_CC_SET" becomes "CC_unequal" because of the "tst"
|
||||
* instruction that precedes the conditional branch.
|
||||
*/
|
||||
const u8 arcv2_32_jmps[ARC_CC_LAST] = {
|
||||
static const u8 arcv2_32_jmps[ARC_CC_LAST] = {
|
||||
[ARC_CC_UGT] = CC_great_u,
|
||||
[ARC_CC_UGE] = CC_great_eq_u,
|
||||
[ARC_CC_ULT] = CC_less_u,
|
||||
|
@ -159,7 +159,7 @@ static void jit_dump(const struct jit_context *ctx)
|
||||
/* Initialise the context so there's no garbage. */
|
||||
static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
|
||||
{
|
||||
memset(ctx, 0, sizeof(ctx));
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
|
||||
ctx->orig_prog = prog;
|
||||
|
||||
@ -167,7 +167,7 @@ static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
|
||||
ctx->prog = bpf_jit_blind_constants(prog);
|
||||
if (IS_ERR(ctx->prog))
|
||||
return PTR_ERR(ctx->prog);
|
||||
ctx->blinded = (ctx->prog == ctx->orig_prog ? false : true);
|
||||
ctx->blinded = (ctx->prog != ctx->orig_prog);
|
||||
|
||||
/* If the verifier doesn't zero-extend, then we have to do it. */
|
||||
ctx->do_zext = !ctx->prog->aux->verifier_zext;
|
||||
@ -1182,12 +1182,12 @@ static int jit_prepare(struct jit_context *ctx)
|
||||
}
|
||||
|
||||
/*
|
||||
* All the "handle_*()" functions have been called before by the
|
||||
* "jit_prepare()". If there was an error, we would know by now.
|
||||
* Therefore, no extra error checking at this point, other than
|
||||
* a sanity check at the end that expects the calculated length
|
||||
* (jit.len) to be equal to the length of generated instructions
|
||||
* (jit.index).
|
||||
* jit_compile() is the real compilation phase. jit_prepare() is
|
||||
* invoked before jit_compile() as a dry-run to make sure everything
|
||||
* will go OK and allocate the necessary memory.
|
||||
*
|
||||
* In the end, jit_compile() checks if it has produced the same number
|
||||
* of instructions as jit_prepare() would.
|
||||
*/
|
||||
static int jit_compile(struct jit_context *ctx)
|
||||
{
|
||||
@ -1407,9 +1407,9 @@ static struct bpf_prog *do_extra_pass(struct bpf_prog *prog)
|
||||
|
||||
/*
|
||||
* This function may be invoked twice for the same stream of BPF
|
||||
* instructions. The "extra pass" happens, when there are "call"s
|
||||
* involved that their addresses are not known during the first
|
||||
* invocation.
|
||||
* instructions. The "extra pass" happens, when there are
|
||||
* (re)locations involved that their addresses are not known
|
||||
* during the first run.
|
||||
*/
|
||||
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
|
||||
{
|
||||
|
@ -146,7 +146,7 @@
|
||||
/* Coprocessor traps */
|
||||
.macro __init_el2_cptr
|
||||
__check_hvhe .LnVHE_\@, x1
|
||||
mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
|
||||
mov x0, #CPACR_ELx_FPEN
|
||||
msr cpacr_el1, x0
|
||||
b .Lskip_set_cptr_\@
|
||||
.LnVHE_\@:
|
||||
@ -277,7 +277,7 @@
|
||||
|
||||
// (h)VHE case
|
||||
mrs x0, cpacr_el1 // Disable SVE traps
|
||||
orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
|
||||
orr x0, x0, #CPACR_ELx_ZEN
|
||||
msr cpacr_el1, x0
|
||||
b .Lskip_set_cptr_\@
|
||||
|
||||
@ -298,7 +298,7 @@
|
||||
|
||||
// (h)VHE case
|
||||
mrs x0, cpacr_el1 // Disable SME traps
|
||||
orr x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN)
|
||||
orr x0, x0, #CPACR_ELx_SMEN
|
||||
msr cpacr_el1, x0
|
||||
b .Lskip_set_cptr_sme_\@
|
||||
|
||||
|
@ -153,8 +153,9 @@ extern void __memset_io(volatile void __iomem *, int, size_t);
|
||||
* emit the large TLP from the CPU.
|
||||
*/
|
||||
|
||||
static inline void __const_memcpy_toio_aligned32(volatile u32 __iomem *to,
|
||||
const u32 *from, size_t count)
|
||||
static __always_inline void
|
||||
__const_memcpy_toio_aligned32(volatile u32 __iomem *to, const u32 *from,
|
||||
size_t count)
|
||||
{
|
||||
switch (count) {
|
||||
case 8:
|
||||
@ -196,24 +197,22 @@ static inline void __const_memcpy_toio_aligned32(volatile u32 __iomem *to,
|
||||
|
||||
void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count);
|
||||
|
||||
static inline void __const_iowrite32_copy(void __iomem *to, const void *from,
|
||||
size_t count)
|
||||
static __always_inline void
|
||||
__iowrite32_copy(void __iomem *to, const void *from, size_t count)
|
||||
{
|
||||
if (count == 8 || count == 4 || count == 2 || count == 1) {
|
||||
if (__builtin_constant_p(count) &&
|
||||
(count == 8 || count == 4 || count == 2 || count == 1)) {
|
||||
__const_memcpy_toio_aligned32(to, from, count);
|
||||
dgh();
|
||||
} else {
|
||||
__iowrite32_copy_full(to, from, count);
|
||||
}
|
||||
}
|
||||
#define __iowrite32_copy __iowrite32_copy
|
||||
|
||||
#define __iowrite32_copy(to, from, count) \
|
||||
(__builtin_constant_p(count) ? \
|
||||
__const_iowrite32_copy(to, from, count) : \
|
||||
__iowrite32_copy_full(to, from, count))
|
||||
|
||||
static inline void __const_memcpy_toio_aligned64(volatile u64 __iomem *to,
|
||||
const u64 *from, size_t count)
|
||||
static __always_inline void
|
||||
__const_memcpy_toio_aligned64(volatile u64 __iomem *to, const u64 *from,
|
||||
size_t count)
|
||||
{
|
||||
switch (count) {
|
||||
case 8:
|
||||
@ -255,21 +254,18 @@ static inline void __const_memcpy_toio_aligned64(volatile u64 __iomem *to,
|
||||
|
||||
void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count);
|
||||
|
||||
static inline void __const_iowrite64_copy(void __iomem *to, const void *from,
|
||||
size_t count)
|
||||
static __always_inline void
|
||||
__iowrite64_copy(void __iomem *to, const void *from, size_t count)
|
||||
{
|
||||
if (count == 8 || count == 4 || count == 2 || count == 1) {
|
||||
if (__builtin_constant_p(count) &&
|
||||
(count == 8 || count == 4 || count == 2 || count == 1)) {
|
||||
__const_memcpy_toio_aligned64(to, from, count);
|
||||
dgh();
|
||||
} else {
|
||||
__iowrite64_copy_full(to, from, count);
|
||||
}
|
||||
}
|
||||
|
||||
#define __iowrite64_copy(to, from, count) \
|
||||
(__builtin_constant_p(count) ? \
|
||||
__const_iowrite64_copy(to, from, count) : \
|
||||
__iowrite64_copy_full(to, from, count))
|
||||
#define __iowrite64_copy __iowrite64_copy
|
||||
|
||||
/*
|
||||
* I/O memory mapping functions.
|
||||
|
@ -305,6 +305,12 @@
|
||||
GENMASK(19, 14) | \
|
||||
BIT(11))
|
||||
|
||||
#define CPTR_VHE_EL2_RES0 (GENMASK(63, 32) | \
|
||||
GENMASK(27, 26) | \
|
||||
GENMASK(23, 22) | \
|
||||
GENMASK(19, 18) | \
|
||||
GENMASK(15, 0))
|
||||
|
||||
/* Hyp Debug Configuration Register bits */
|
||||
#define MDCR_EL2_E2TB_MASK (UL(0x3))
|
||||
#define MDCR_EL2_E2TB_SHIFT (UL(24))
|
||||
|
@ -557,6 +557,68 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
|
||||
vcpu_set_flag((v), e); \
|
||||
} while (0)
|
||||
|
||||
#define __build_check_all_or_none(r, bits) \
|
||||
BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits))
|
||||
|
||||
#define __cpacr_to_cptr_clr(clr, set) \
|
||||
({ \
|
||||
u64 cptr = 0; \
|
||||
\
|
||||
if ((set) & CPACR_ELx_FPEN) \
|
||||
cptr |= CPTR_EL2_TFP; \
|
||||
if ((set) & CPACR_ELx_ZEN) \
|
||||
cptr |= CPTR_EL2_TZ; \
|
||||
if ((set) & CPACR_ELx_SMEN) \
|
||||
cptr |= CPTR_EL2_TSM; \
|
||||
if ((clr) & CPACR_ELx_TTA) \
|
||||
cptr |= CPTR_EL2_TTA; \
|
||||
if ((clr) & CPTR_EL2_TAM) \
|
||||
cptr |= CPTR_EL2_TAM; \
|
||||
if ((clr) & CPTR_EL2_TCPAC) \
|
||||
cptr |= CPTR_EL2_TCPAC; \
|
||||
\
|
||||
cptr; \
|
||||
})
|
||||
|
||||
#define __cpacr_to_cptr_set(clr, set) \
|
||||
({ \
|
||||
u64 cptr = 0; \
|
||||
\
|
||||
if ((clr) & CPACR_ELx_FPEN) \
|
||||
cptr |= CPTR_EL2_TFP; \
|
||||
if ((clr) & CPACR_ELx_ZEN) \
|
||||
cptr |= CPTR_EL2_TZ; \
|
||||
if ((clr) & CPACR_ELx_SMEN) \
|
||||
cptr |= CPTR_EL2_TSM; \
|
||||
if ((set) & CPACR_ELx_TTA) \
|
||||
cptr |= CPTR_EL2_TTA; \
|
||||
if ((set) & CPTR_EL2_TAM) \
|
||||
cptr |= CPTR_EL2_TAM; \
|
||||
if ((set) & CPTR_EL2_TCPAC) \
|
||||
cptr |= CPTR_EL2_TCPAC; \
|
||||
\
|
||||
cptr; \
|
||||
})
|
||||
|
||||
#define cpacr_clear_set(clr, set) \
|
||||
do { \
|
||||
BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \
|
||||
BUILD_BUG_ON((clr) & CPACR_ELx_E0POE); \
|
||||
__build_check_all_or_none((clr), CPACR_ELx_FPEN); \
|
||||
__build_check_all_or_none((set), CPACR_ELx_FPEN); \
|
||||
__build_check_all_or_none((clr), CPACR_ELx_ZEN); \
|
||||
__build_check_all_or_none((set), CPACR_ELx_ZEN); \
|
||||
__build_check_all_or_none((clr), CPACR_ELx_SMEN); \
|
||||
__build_check_all_or_none((set), CPACR_ELx_SMEN); \
|
||||
\
|
||||
if (has_vhe() || has_hvhe()) \
|
||||
sysreg_clear_set(cpacr_el1, clr, set); \
|
||||
else \
|
||||
sysreg_clear_set(cptr_el2, \
|
||||
__cpacr_to_cptr_clr(clr, set), \
|
||||
__cpacr_to_cptr_set(clr, set));\
|
||||
} while (0)
|
||||
|
||||
static __always_inline void kvm_write_cptr_el2(u64 val)
|
||||
{
|
||||
if (has_vhe() || has_hvhe())
|
||||
@ -570,17 +632,16 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
|
||||
u64 val;
|
||||
|
||||
if (has_vhe()) {
|
||||
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
|
||||
CPACR_EL1_ZEN_EL1EN);
|
||||
val = (CPACR_ELx_FPEN | CPACR_EL1_ZEN_EL1EN);
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPACR_EL1_SMEN_EL1EN;
|
||||
} else if (has_hvhe()) {
|
||||
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
|
||||
val = CPACR_ELx_FPEN;
|
||||
|
||||
if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
|
||||
val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
|
||||
val |= CPACR_ELx_ZEN;
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
|
||||
val |= CPACR_ELx_SMEN;
|
||||
} else {
|
||||
val = CPTR_NVHE_EL2_RES1;
|
||||
|
||||
|
@ -76,6 +76,7 @@ static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
|
||||
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
|
||||
|
||||
extern unsigned int __ro_after_init kvm_sve_max_vl;
|
||||
extern unsigned int __ro_after_init kvm_host_sve_max_vl;
|
||||
int __init kvm_arm_init_sve(void);
|
||||
|
||||
u32 __attribute_const__ kvm_target_cpu(void);
|
||||
@ -521,6 +522,20 @@ struct kvm_cpu_context {
|
||||
u64 *vncr_array;
|
||||
};
|
||||
|
||||
struct cpu_sve_state {
|
||||
__u64 zcr_el1;
|
||||
|
||||
/*
|
||||
* Ordering is important since __sve_save_state/__sve_restore_state
|
||||
* relies on it.
|
||||
*/
|
||||
__u32 fpsr;
|
||||
__u32 fpcr;
|
||||
|
||||
/* Must be SVE_VQ_BYTES (128 bit) aligned. */
|
||||
__u8 sve_regs[];
|
||||
};
|
||||
|
||||
/*
|
||||
* This structure is instantiated on a per-CPU basis, and contains
|
||||
* data that is:
|
||||
@ -534,7 +549,15 @@ struct kvm_cpu_context {
|
||||
*/
|
||||
struct kvm_host_data {
|
||||
struct kvm_cpu_context host_ctxt;
|
||||
struct user_fpsimd_state *fpsimd_state; /* hyp VA */
|
||||
|
||||
/*
|
||||
* All pointers in this union are hyp VA.
|
||||
* sve_state is only used in pKVM and if system_supports_sve().
|
||||
*/
|
||||
union {
|
||||
struct user_fpsimd_state *fpsimd_state;
|
||||
struct cpu_sve_state *sve_state;
|
||||
};
|
||||
|
||||
/* Ownership of the FP regs */
|
||||
enum {
|
||||
|
@ -111,7 +111,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
|
||||
|
||||
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
|
||||
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
|
||||
void __sve_restore_state(void *sve_pffr, u32 *fpsr);
|
||||
void __sve_save_state(void *sve_pffr, u32 *fpsr, int save_ffr);
|
||||
void __sve_restore_state(void *sve_pffr, u32 *fpsr, int restore_ffr);
|
||||
|
||||
u64 __guest_enter(struct kvm_vcpu *vcpu);
|
||||
|
||||
@ -142,5 +143,6 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
|
||||
|
||||
extern unsigned long kvm_nvhe_sym(__icache_flags);
|
||||
extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
|
||||
extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
|
||||
|
||||
#endif /* __ARM64_KVM_HYP_H__ */
|
||||
|
@ -128,4 +128,13 @@ static inline unsigned long hyp_ffa_proxy_pages(void)
|
||||
return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static inline size_t pkvm_host_sve_state_size(void)
|
||||
{
|
||||
if (!system_supports_sve())
|
||||
return 0;
|
||||
|
||||
return size_add(sizeof(struct cpu_sve_state),
|
||||
SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
|
||||
}
|
||||
|
||||
#endif /* __ARM64_KVM_PKVM_H__ */
|
||||
|
@ -462,6 +462,9 @@ static int run_all_insn_set_hw_mode(unsigned int cpu)
|
||||
for (int i = 0; i < ARRAY_SIZE(insn_emulations); i++) {
|
||||
struct insn_emulation *insn = insn_emulations[i];
|
||||
bool enable = READ_ONCE(insn->current_mode) == INSN_HW;
|
||||
if (insn->status == INSN_UNAVAILABLE)
|
||||
continue;
|
||||
|
||||
if (insn->set_hw_mode && insn->set_hw_mode(enable)) {
|
||||
pr_warn("CPU[%u] cannot support the emulation of %s",
|
||||
cpu, insn->name);
|
||||
|
@ -1931,6 +1931,11 @@ static unsigned long nvhe_percpu_order(void)
|
||||
return size ? get_order(size) : 0;
|
||||
}
|
||||
|
||||
static size_t pkvm_host_sve_state_order(void)
|
||||
{
|
||||
return get_order(pkvm_host_sve_state_size());
|
||||
}
|
||||
|
||||
/* A lookup table holding the hypervisor VA for each vector slot */
|
||||
static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
|
||||
|
||||
@ -2310,12 +2315,20 @@ static void __init teardown_subsystems(void)
|
||||
|
||||
static void __init teardown_hyp_mode(void)
|
||||
{
|
||||
bool free_sve = system_supports_sve() && is_protected_kvm_enabled();
|
||||
int cpu;
|
||||
|
||||
free_hyp_pgds();
|
||||
for_each_possible_cpu(cpu) {
|
||||
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
|
||||
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
|
||||
|
||||
if (free_sve) {
|
||||
struct cpu_sve_state *sve_state;
|
||||
|
||||
sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
|
||||
free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2398,6 +2411,58 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_pkvm_host_sve_state(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!system_supports_sve())
|
||||
return 0;
|
||||
|
||||
/* Allocate pages for host sve state in protected mode. */
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct page *page = alloc_pages(GFP_KERNEL, pkvm_host_sve_state_order());
|
||||
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = page_address(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't map the pages in hyp since these are only used in protected
|
||||
* mode, which will (re)create its own mapping when initialized.
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Finalizes the initialization of hyp mode, once everything else is initialized
|
||||
* and the initialziation process cannot fail.
|
||||
*/
|
||||
static void finalize_init_hyp_mode(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (system_supports_sve() && is_protected_kvm_enabled()) {
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct cpu_sve_state *sve_state;
|
||||
|
||||
sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
|
||||
per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
|
||||
kern_hyp_va(sve_state);
|
||||
}
|
||||
} else {
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct user_fpsimd_state *fpsimd_state;
|
||||
|
||||
fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
|
||||
per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
|
||||
kern_hyp_va(fpsimd_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void pkvm_hyp_init_ptrauth(void)
|
||||
{
|
||||
struct kvm_cpu_context *hyp_ctxt;
|
||||
@ -2566,6 +2631,10 @@ static int __init init_hyp_mode(void)
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
err = init_pkvm_host_sve_state();
|
||||
if (err)
|
||||
goto out_err;
|
||||
|
||||
err = kvm_hyp_init_protection(hyp_va_bits);
|
||||
if (err) {
|
||||
kvm_err("Failed to init hyp memory protection\n");
|
||||
@ -2730,6 +2799,13 @@ static __init int kvm_arm_init(void)
|
||||
if (err)
|
||||
goto out_subs;
|
||||
|
||||
/*
|
||||
* This should be called after initialization is done and failure isn't
|
||||
* possible anymore.
|
||||
*/
|
||||
if (!in_hyp_mode)
|
||||
finalize_init_hyp_mode();
|
||||
|
||||
kvm_arm_initialised = true;
|
||||
|
||||
return 0;
|
||||
|
@ -2181,16 +2181,23 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
|
||||
if (forward_traps(vcpu, HCR_NV))
|
||||
return;
|
||||
|
||||
spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
|
||||
spsr = kvm_check_illegal_exception_return(vcpu, spsr);
|
||||
|
||||
/* Check for an ERETAx */
|
||||
esr = kvm_vcpu_get_esr(vcpu);
|
||||
if (esr_iss_is_eretax(esr) && !kvm_auth_eretax(vcpu, &elr)) {
|
||||
/*
|
||||
* Oh no, ERETAx failed to authenticate. If we have
|
||||
* FPACCOMBINE, deliver an exception right away. If we
|
||||
* don't, then let the mangled ELR value trickle down the
|
||||
* Oh no, ERETAx failed to authenticate.
|
||||
*
|
||||
* If we have FPACCOMBINE and we don't have a pending
|
||||
* Illegal Execution State exception (which has priority
|
||||
* over FPAC), deliver an exception right away.
|
||||
*
|
||||
* Otherwise, let the mangled ELR value trickle down the
|
||||
* ERET handling, and the guest will have a little surprise.
|
||||
*/
|
||||
if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE)) {
|
||||
if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE) && !(spsr & PSR_IL_BIT)) {
|
||||
esr &= ESR_ELx_ERET_ISS_ERETA;
|
||||
esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_FPAC);
|
||||
kvm_inject_nested_sync(vcpu, esr);
|
||||
@ -2201,17 +2208,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
|
||||
preempt_disable();
|
||||
kvm_arch_vcpu_put(vcpu);
|
||||
|
||||
spsr = __vcpu_sys_reg(vcpu, SPSR_EL2);
|
||||
spsr = kvm_check_illegal_exception_return(vcpu, spsr);
|
||||
if (!esr_iss_is_eretax(esr))
|
||||
elr = __vcpu_sys_reg(vcpu, ELR_EL2);
|
||||
|
||||
trace_kvm_nested_eret(vcpu, elr, spsr);
|
||||
|
||||
/*
|
||||
* Note that the current exception level is always the virtual EL2,
|
||||
* since we set HCR_EL2.NV bit only when entering the virtual EL2.
|
||||
*/
|
||||
*vcpu_pc(vcpu) = elr;
|
||||
*vcpu_cpsr(vcpu) = spsr;
|
||||
|
||||
|
@ -90,6 +90,13 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If normal guests gain SME support, maintain this behavior for pKVM
|
||||
* guests, which don't support SME.
|
||||
*/
|
||||
WARN_ON(is_protected_kvm_enabled() && system_supports_sme() &&
|
||||
read_sysreg_s(SYS_SVCR));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -161,9 +168,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
if (has_vhe() && system_supports_sme()) {
|
||||
/* Also restore EL0 state seen on entry */
|
||||
if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
|
||||
sysreg_clear_set(CPACR_EL1, 0,
|
||||
CPACR_EL1_SMEN_EL0EN |
|
||||
CPACR_EL1_SMEN_EL1EN);
|
||||
sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_SMEN);
|
||||
else
|
||||
sysreg_clear_set(CPACR_EL1,
|
||||
CPACR_EL1_SMEN_EL0EN,
|
||||
|
@ -251,6 +251,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
case PSR_AA32_MODE_SVC:
|
||||
case PSR_AA32_MODE_ABT:
|
||||
case PSR_AA32_MODE_UND:
|
||||
case PSR_AA32_MODE_SYS:
|
||||
if (!vcpu_el1_is_32bit(vcpu))
|
||||
return -EINVAL;
|
||||
break;
|
||||
@ -276,7 +277,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
|
||||
int i, nr_reg;
|
||||
|
||||
switch (*vcpu_cpsr(vcpu)) {
|
||||
switch (*vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK) {
|
||||
/*
|
||||
* Either we are dealing with user mode, and only the
|
||||
* first 15 registers (+ PC) must be narrowed to 32bit.
|
||||
|
@ -50,9 +50,23 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
|
||||
u32 cpsr_cond;
|
||||
int cond;
|
||||
|
||||
/* Top two bits non-zero? Unconditional. */
|
||||
if (kvm_vcpu_get_esr(vcpu) >> 30)
|
||||
/*
|
||||
* These are the exception classes that could fire with a
|
||||
* conditional instruction.
|
||||
*/
|
||||
switch (kvm_vcpu_trap_get_class(vcpu)) {
|
||||
case ESR_ELx_EC_CP15_32:
|
||||
case ESR_ELx_EC_CP15_64:
|
||||
case ESR_ELx_EC_CP14_MR:
|
||||
case ESR_ELx_EC_CP14_LS:
|
||||
case ESR_ELx_EC_FP_ASIMD:
|
||||
case ESR_ELx_EC_CP10_ID:
|
||||
case ESR_ELx_EC_CP14_64:
|
||||
case ESR_ELx_EC_SVC32:
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Is condition field valid? */
|
||||
cond = kvm_vcpu_get_condition(vcpu);
|
||||
|
@ -25,3 +25,9 @@ SYM_FUNC_START(__sve_restore_state)
|
||||
sve_load 0, x1, x2, 3
|
||||
ret
|
||||
SYM_FUNC_END(__sve_restore_state)
|
||||
|
||||
SYM_FUNC_START(__sve_save_state)
|
||||
mov x2, #1
|
||||
sve_save 0, x1, x2, 3
|
||||
ret
|
||||
SYM_FUNC_END(__sve_save_state)
|
||||
|
@ -316,10 +316,24 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
|
||||
__sve_restore_state(vcpu_sve_pffr(vcpu),
|
||||
&vcpu->arch.ctxt.fp_regs.fpsr);
|
||||
&vcpu->arch.ctxt.fp_regs.fpsr,
|
||||
true);
|
||||
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
|
||||
}
|
||||
|
||||
static inline void __hyp_sve_save_host(void)
|
||||
{
|
||||
struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
|
||||
|
||||
sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
|
||||
write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
|
||||
__sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
|
||||
&sve_state->fpsr,
|
||||
true);
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
|
||||
|
||||
/*
|
||||
* We trap the first access to the FP/SIMD to save the host context and
|
||||
* restore the guest context lazily.
|
||||
@ -330,7 +344,6 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
bool sve_guest;
|
||||
u8 esr_ec;
|
||||
u64 reg;
|
||||
|
||||
if (!system_supports_fpsimd())
|
||||
return false;
|
||||
@ -353,24 +366,15 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
/* Valid trap. Switch the context: */
|
||||
|
||||
/* First disable enough traps to allow us to update the registers */
|
||||
if (has_vhe() || has_hvhe()) {
|
||||
reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
|
||||
if (sve_guest)
|
||||
reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
|
||||
|
||||
sysreg_clear_set(cpacr_el1, 0, reg);
|
||||
} else {
|
||||
reg = CPTR_EL2_TFP;
|
||||
if (sve_guest)
|
||||
reg |= CPTR_EL2_TZ;
|
||||
|
||||
sysreg_clear_set(cptr_el2, reg, 0);
|
||||
}
|
||||
if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
|
||||
cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
|
||||
else
|
||||
cpacr_clear_set(0, CPACR_ELx_FPEN);
|
||||
isb();
|
||||
|
||||
/* Write out the host state if it's in the registers */
|
||||
if (host_owns_fp_regs())
|
||||
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
|
||||
kvm_hyp_save_fpsimd_host(vcpu);
|
||||
|
||||
/* Restore the guest state */
|
||||
if (sve_guest)
|
||||
|
@ -59,7 +59,6 @@ static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
}
|
||||
|
||||
void pkvm_hyp_vm_table_init(void *tbl);
|
||||
void pkvm_host_fpsimd_state_init(void);
|
||||
|
||||
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
|
||||
unsigned long pgd_hva);
|
||||
|
@ -23,20 +23,80 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
|
||||
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
|
||||
|
||||
static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
|
||||
/*
|
||||
* On saving/restoring guest sve state, always use the maximum VL for
|
||||
* the guest. The layout of the data when saving the sve state depends
|
||||
* on the VL, so use a consistent (i.e., the maximum) guest VL.
|
||||
*/
|
||||
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
|
||||
__sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
|
||||
write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
|
||||
}
|
||||
|
||||
static void __hyp_sve_restore_host(void)
|
||||
{
|
||||
struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
|
||||
|
||||
/*
|
||||
* On saving/restoring host sve state, always use the maximum VL for
|
||||
* the host. The layout of the data when saving the sve state depends
|
||||
* on the VL, so use a consistent (i.e., the maximum) host VL.
|
||||
*
|
||||
* Setting ZCR_EL2 to ZCR_ELx_LEN_MASK sets the effective length
|
||||
* supported by the system (or limited at EL3).
|
||||
*/
|
||||
write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
|
||||
__sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
|
||||
&sve_state->fpsr,
|
||||
true);
|
||||
write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
|
||||
}
|
||||
|
||||
static void fpsimd_sve_flush(void)
|
||||
{
|
||||
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
|
||||
}
|
||||
|
||||
static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!guest_owns_fp_regs())
|
||||
return;
|
||||
|
||||
cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
|
||||
isb();
|
||||
|
||||
if (vcpu_has_sve(vcpu))
|
||||
__hyp_sve_save_guest(vcpu);
|
||||
else
|
||||
__fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
|
||||
|
||||
if (system_supports_sve())
|
||||
__hyp_sve_restore_host();
|
||||
else
|
||||
__fpsimd_restore_state(*host_data_ptr(fpsimd_state));
|
||||
|
||||
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
|
||||
}
|
||||
|
||||
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
|
||||
|
||||
fpsimd_sve_flush();
|
||||
|
||||
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
|
||||
|
||||
hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
|
||||
hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
|
||||
/* Limit guest vector length to the maximum supported by the host. */
|
||||
hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
|
||||
|
||||
hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
|
||||
|
||||
hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
|
||||
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
|
||||
hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
|
||||
|
||||
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
|
||||
|
||||
@ -54,10 +114,11 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
|
||||
unsigned int i;
|
||||
|
||||
fpsimd_sve_sync(&hyp_vcpu->vcpu);
|
||||
|
||||
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
|
||||
|
||||
host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
|
||||
host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2;
|
||||
|
||||
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
|
||||
|
||||
@ -79,6 +140,17 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
struct kvm *host_kvm;
|
||||
|
||||
/*
|
||||
* KVM (and pKVM) doesn't support SME guests for now, and
|
||||
* ensures that SME features aren't enabled in pstate when
|
||||
* loading a vcpu. Therefore, if SME features enabled the host
|
||||
* is misbehaving.
|
||||
*/
|
||||
if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
host_kvm = kern_hyp_va(host_vcpu->kvm);
|
||||
hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
|
||||
host_vcpu->vcpu_idx);
|
||||
@ -405,11 +477,7 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
|
||||
handle_host_smc(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_SVE:
|
||||
if (has_hvhe())
|
||||
sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN |
|
||||
CPACR_EL1_ZEN_EL0EN));
|
||||
else
|
||||
sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
|
||||
cpacr_clear_set(0, CPACR_ELx_ZEN);
|
||||
isb();
|
||||
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
|
||||
break;
|
||||
|
@ -18,6 +18,8 @@ unsigned long __icache_flags;
|
||||
/* Used by kvm_get_vttbr(). */
|
||||
unsigned int kvm_arm_vmid_bits;
|
||||
|
||||
unsigned int kvm_host_sve_max_vl;
|
||||
|
||||
/*
|
||||
* Set trap register values based on features in ID_AA64PFR0.
|
||||
*/
|
||||
@ -63,7 +65,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
|
||||
/* Trap SVE */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
|
||||
if (has_hvhe())
|
||||
cptr_clear |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
|
||||
cptr_clear |= CPACR_ELx_ZEN;
|
||||
else
|
||||
cptr_set |= CPTR_EL2_TZ;
|
||||
}
|
||||
@ -247,17 +249,6 @@ void pkvm_hyp_vm_table_init(void *tbl)
|
||||
vm_table = tbl;
|
||||
}
|
||||
|
||||
void pkvm_host_fpsimd_state_init(void)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < hyp_nr_cpus; i++) {
|
||||
struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
|
||||
|
||||
host_data->fpsimd_state = &host_data->host_ctxt.fp_regs;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the hyp vm structure corresponding to the handle.
|
||||
*/
|
||||
@ -586,6 +577,8 @@ unlock:
|
||||
if (ret)
|
||||
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
|
||||
|
||||
hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -67,6 +67,28 @@ static int divide_memory_pool(void *virt, unsigned long size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pkvm_create_host_sve_mappings(void)
|
||||
{
|
||||
void *start, *end;
|
||||
int ret, i;
|
||||
|
||||
if (!system_supports_sve())
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < hyp_nr_cpus; i++) {
|
||||
struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
|
||||
struct cpu_sve_state *sve_state = host_data->sve_state;
|
||||
|
||||
start = kern_hyp_va(sve_state);
|
||||
end = start + PAGE_ALIGN(pkvm_host_sve_state_size());
|
||||
ret = pkvm_create_mappings(start, end, PAGE_HYP);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
||||
unsigned long *per_cpu_base,
|
||||
u32 hyp_va_bits)
|
||||
@ -125,6 +147,8 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
||||
return ret;
|
||||
}
|
||||
|
||||
pkvm_create_host_sve_mappings();
|
||||
|
||||
/*
|
||||
* Map the host sections RO in the hypervisor, but transfer the
|
||||
* ownership from the host to the hypervisor itself to make sure they
|
||||
@ -300,7 +324,6 @@ void __noreturn __pkvm_init_finalise(void)
|
||||
goto out;
|
||||
|
||||
pkvm_hyp_vm_table_init(vm_table_base);
|
||||
pkvm_host_fpsimd_state_init();
|
||||
out:
|
||||
/*
|
||||
* We tail-called to here from handle___pkvm_init() and will not return,
|
||||
|
@ -48,15 +48,14 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
|
||||
if (cpus_have_final_cap(ARM64_SME)) {
|
||||
if (has_hvhe())
|
||||
val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN);
|
||||
val &= ~CPACR_ELx_SMEN;
|
||||
else
|
||||
val |= CPTR_EL2_TSM;
|
||||
}
|
||||
|
||||
if (!guest_owns_fp_regs()) {
|
||||
if (has_hvhe())
|
||||
val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
|
||||
CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
|
||||
val &= ~(CPACR_ELx_FPEN | CPACR_ELx_ZEN);
|
||||
else
|
||||
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
|
||||
|
||||
@ -182,6 +181,25 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
kvm_handle_pvm_sysreg(vcpu, exit_code));
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Non-protected kvm relies on the host restoring its sve state.
|
||||
* Protected kvm restores the host's sve state as not to reveal that
|
||||
* fpsimd was used by a guest nor leak upper sve bits.
|
||||
*/
|
||||
if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
|
||||
__hyp_sve_save_host();
|
||||
|
||||
/* Re-enable SVE traps if not supported for the guest vcpu. */
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
cpacr_clear_set(CPACR_ELx_ZEN, 0);
|
||||
|
||||
} else {
|
||||
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
|
||||
}
|
||||
}
|
||||
|
||||
static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
|
||||
|
@ -93,8 +93,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
|
||||
val = read_sysreg(cpacr_el1);
|
||||
val |= CPACR_ELx_TTA;
|
||||
val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
|
||||
CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
|
||||
val &= ~(CPACR_ELx_ZEN | CPACR_ELx_SMEN);
|
||||
|
||||
/*
|
||||
* With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
|
||||
@ -109,9 +108,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (guest_owns_fp_regs()) {
|
||||
if (vcpu_has_sve(vcpu))
|
||||
val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
|
||||
val |= CPACR_ELx_ZEN;
|
||||
} else {
|
||||
val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
|
||||
val &= ~CPACR_ELx_FPEN;
|
||||
__activate_traps_fpsimd32(vcpu);
|
||||
}
|
||||
|
||||
@ -262,6 +261,11 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
|
||||
}
|
||||
|
||||
static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
|
||||
|
@ -58,8 +58,10 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
|
||||
break;
|
||||
|
||||
case SYS_ID_AA64PFR1_EL1:
|
||||
/* Only support SSBS */
|
||||
val &= NV_FTR(PFR1, SSBS);
|
||||
/* Only support BTI, SSBS, CSV2_frac */
|
||||
val &= (NV_FTR(PFR1, BT) |
|
||||
NV_FTR(PFR1, SSBS) |
|
||||
NV_FTR(PFR1, CSV2_frac));
|
||||
break;
|
||||
|
||||
case SYS_ID_AA64MMFR0_EL1:
|
||||
|
@ -32,6 +32,7 @@
|
||||
|
||||
/* Maximum phys_shift supported for any VM on this host */
|
||||
static u32 __ro_after_init kvm_ipa_limit;
|
||||
unsigned int __ro_after_init kvm_host_sve_max_vl;
|
||||
|
||||
/*
|
||||
* ARMv8 Reset Values
|
||||
@ -51,6 +52,8 @@ int __init kvm_arm_init_sve(void)
|
||||
{
|
||||
if (system_supports_sve()) {
|
||||
kvm_sve_max_vl = sve_max_virtualisable_vl();
|
||||
kvm_host_sve_max_vl = sve_max_vl();
|
||||
kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
|
||||
|
||||
/*
|
||||
* The get_sve_reg()/set_sve_reg() ioctl interface will need
|
||||
|
@ -376,7 +376,7 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
|
||||
* clearing access/dirty for the whole block.
|
||||
*/
|
||||
unsigned long start = addr;
|
||||
unsigned long end = start + nr;
|
||||
unsigned long end = start + nr * PAGE_SIZE;
|
||||
|
||||
if (pte_cont(__ptep_get(ptep + nr - 1)))
|
||||
end = ALIGN(end, CONT_PTE_SIZE);
|
||||
@ -386,7 +386,7 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
|
||||
ptep = contpte_align_down(ptep);
|
||||
}
|
||||
|
||||
__clear_young_dirty_ptes(vma, start, ptep, end - start, flags);
|
||||
__clear_young_dirty_ptes(vma, start, ptep, (end - start) / PAGE_SIZE, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);
|
||||
|
||||
|
@ -44,14 +44,14 @@
|
||||
&gmac0 {
|
||||
status = "okay";
|
||||
|
||||
phy-mode = "rgmii";
|
||||
phy-mode = "rgmii-id";
|
||||
bus_id = <0x0>;
|
||||
};
|
||||
|
||||
&gmac1 {
|
||||
status = "okay";
|
||||
|
||||
phy-mode = "rgmii";
|
||||
phy-mode = "rgmii-id";
|
||||
bus_id = <0x1>;
|
||||
};
|
||||
|
||||
|
@ -43,7 +43,7 @@
|
||||
&gmac0 {
|
||||
status = "okay";
|
||||
|
||||
phy-mode = "rgmii";
|
||||
phy-mode = "rgmii-id";
|
||||
phy-handle = <&phy0>;
|
||||
mdio {
|
||||
compatible = "snps,dwmac-mdio";
|
||||
@ -58,7 +58,7 @@
|
||||
&gmac1 {
|
||||
status = "okay";
|
||||
|
||||
phy-mode = "rgmii";
|
||||
phy-mode = "rgmii-id";
|
||||
phy-handle = <&phy1>;
|
||||
mdio {
|
||||
compatible = "snps,dwmac-mdio";
|
||||
|
@ -92,7 +92,7 @@
|
||||
&gmac2 {
|
||||
status = "okay";
|
||||
|
||||
phy-mode = "rgmii";
|
||||
phy-mode = "rgmii-id";
|
||||
phy-handle = <&phy2>;
|
||||
mdio {
|
||||
compatible = "snps,dwmac-mdio";
|
||||
|
@ -56,6 +56,7 @@ extern int early_cpu_to_node(int cpu);
|
||||
static inline void early_numa_add_cpu(int cpuid, s16 node) { }
|
||||
static inline void numa_add_cpu(unsigned int cpu) { }
|
||||
static inline void numa_remove_cpu(unsigned int cpu) { }
|
||||
static inline void set_cpuid_to_node(int cpuid, s16 node) { }
|
||||
|
||||
static inline int early_cpu_to_node(int cpu)
|
||||
{
|
||||
|
@ -42,7 +42,7 @@
|
||||
.macro JUMP_VIRT_ADDR temp1 temp2
|
||||
li.d \temp1, CACHE_BASE
|
||||
pcaddi \temp2, 0
|
||||
or \temp1, \temp1, \temp2
|
||||
bstrins.d \temp1, \temp2, (DMW_PABITS - 1), 0
|
||||
jirl zero, \temp1, 0xc
|
||||
.endm
|
||||
|
||||
|
@ -22,7 +22,7 @@
|
||||
_head:
|
||||
.word MZ_MAGIC /* "MZ", MS-DOS header */
|
||||
.org 0x8
|
||||
.dword kernel_entry /* Kernel entry point */
|
||||
.dword _kernel_entry /* Kernel entry point (physical address) */
|
||||
.dword _kernel_asize /* Kernel image effective size */
|
||||
.quad PHYS_LINK_KADDR /* Kernel image load offset from start of RAM */
|
||||
.org 0x38 /* 0x20 ~ 0x37 reserved */
|
||||
|
@ -282,7 +282,7 @@ static void __init fdt_setup(void)
|
||||
return;
|
||||
|
||||
/* Prefer to use built-in dtb, checking its legality first. */
|
||||
if (!fdt_check_header(__dtb_start))
|
||||
if (IS_ENABLED(CONFIG_BUILTIN_DTB) && !fdt_check_header(__dtb_start))
|
||||
fdt_pointer = __dtb_start;
|
||||
else
|
||||
fdt_pointer = efi_fdt_pointer(); /* Fallback to firmware dtb */
|
||||
@ -351,10 +351,8 @@ void __init platform_init(void)
|
||||
arch_reserve_vmcore();
|
||||
arch_reserve_crashkernel();
|
||||
|
||||
#ifdef CONFIG_ACPI_TABLE_UPGRADE
|
||||
acpi_table_upgrade();
|
||||
#endif
|
||||
#ifdef CONFIG_ACPI
|
||||
acpi_table_upgrade();
|
||||
acpi_gbl_use_default_register_widths = false;
|
||||
acpi_boot_table_init();
|
||||
#endif
|
||||
|
@ -273,7 +273,6 @@ static void __init fdt_smp_setup(void)
|
||||
|
||||
if (cpuid == loongson_sysconf.boot_cpu_id) {
|
||||
cpu = 0;
|
||||
numa_add_cpu(cpu);
|
||||
} else {
|
||||
cpu = cpumask_next_zero(-1, cpu_present_mask);
|
||||
}
|
||||
@ -283,6 +282,9 @@ static void __init fdt_smp_setup(void)
|
||||
set_cpu_present(cpu, true);
|
||||
__cpu_number_map[cpuid] = cpu;
|
||||
__cpu_logical_map[cpu] = cpuid;
|
||||
|
||||
early_numa_add_cpu(cpu, 0);
|
||||
set_cpuid_to_node(cpuid, 0);
|
||||
}
|
||||
|
||||
loongson_sysconf.nr_cpus = num_processors;
|
||||
@ -468,6 +470,7 @@ void smp_prepare_boot_cpu(void)
|
||||
set_cpu_possible(0, true);
|
||||
set_cpu_online(0, true);
|
||||
set_my_cpu_offset(per_cpu_offset(0));
|
||||
numa_add_cpu(0);
|
||||
|
||||
rr_node = first_node(node_online_map);
|
||||
for_each_possible_cpu(cpu) {
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#define PAGE_SIZE _PAGE_SIZE
|
||||
#define RO_EXCEPTION_TABLE_ALIGN 4
|
||||
#define PHYSADDR_MASK 0xffffffffffff /* 48-bit */
|
||||
|
||||
/*
|
||||
* Put .bss..swapper_pg_dir as the first thing in .bss. This will
|
||||
@ -142,10 +143,11 @@ SECTIONS
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
/* header symbols */
|
||||
_kernel_asize = _end - _text;
|
||||
_kernel_fsize = _edata - _text;
|
||||
_kernel_vsize = _end - __initdata_begin;
|
||||
_kernel_rsize = _edata - __initdata_begin;
|
||||
_kernel_entry = ABSOLUTE(kernel_entry & PHYSADDR_MASK);
|
||||
_kernel_asize = ABSOLUTE(_end - _text);
|
||||
_kernel_fsize = ABSOLUTE(_edata - _text);
|
||||
_kernel_vsize = ABSOLUTE(_end - __initdata_begin);
|
||||
_kernel_rsize = ABSOLUTE(_edata - __initdata_begin);
|
||||
#endif
|
||||
|
||||
.gptab.sdata : {
|
||||
|
@ -137,7 +137,7 @@ config PPC
|
||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
|
||||
select ARCH_HAS_KCOV
|
||||
select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC_FPU
|
||||
select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC64 && PPC_FPU
|
||||
select ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||
select ARCH_HAS_MEMBARRIER_SYNC_CORE
|
||||
select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU
|
||||
|
@ -92,9 +92,25 @@ __pu_failed: \
|
||||
: label)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CC_IS_CLANG
|
||||
#define DS_FORM_CONSTRAINT "Z<>"
|
||||
#else
|
||||
#define DS_FORM_CONSTRAINT "YZ<>"
|
||||
#endif
|
||||
|
||||
#ifdef __powerpc64__
|
||||
#ifdef CONFIG_PPC_KERNEL_PREFIXED
|
||||
#define __put_user_asm2_goto(x, ptr, label) \
|
||||
__put_user_asm_goto(x, ptr, label, "std")
|
||||
#else
|
||||
#define __put_user_asm2_goto(x, addr, label) \
|
||||
asm goto ("1: std%U1%X1 %0,%1 # put_user\n" \
|
||||
EX_TABLE(1b, %l2) \
|
||||
: \
|
||||
: "r" (x), DS_FORM_CONSTRAINT (*addr) \
|
||||
: \
|
||||
: label)
|
||||
#endif // CONFIG_PPC_KERNEL_PREFIXED
|
||||
#else /* __powerpc64__ */
|
||||
#define __put_user_asm2_goto(x, addr, label) \
|
||||
asm goto( \
|
||||
@ -165,8 +181,19 @@ do { \
|
||||
#endif
|
||||
|
||||
#ifdef __powerpc64__
|
||||
#ifdef CONFIG_PPC_KERNEL_PREFIXED
|
||||
#define __get_user_asm2_goto(x, addr, label) \
|
||||
__get_user_asm_goto(x, addr, label, "ld")
|
||||
#else
|
||||
#define __get_user_asm2_goto(x, addr, label) \
|
||||
asm_goto_output( \
|
||||
"1: ld%U1%X1 %0, %1 # get_user\n" \
|
||||
EX_TABLE(1b, %l2) \
|
||||
: "=r" (x) \
|
||||
: DS_FORM_CONSTRAINT (*addr) \
|
||||
: \
|
||||
: label)
|
||||
#endif // CONFIG_PPC_KERNEL_PREFIXED
|
||||
#else /* __powerpc64__ */
|
||||
#define __get_user_asm2_goto(x, addr, label) \
|
||||
asm_goto_output( \
|
||||
|
@ -900,6 +900,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
|
||||
|
||||
/* Get offset into TMP_REG */
|
||||
EMIT(PPC_RAW_LI(tmp_reg, off));
|
||||
/*
|
||||
* Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
|
||||
* before and after the operation.
|
||||
*
|
||||
* This is a requirement in the Linux Kernel Memory Model.
|
||||
* See __cmpxchg_u32() in asm/cmpxchg.h as an example.
|
||||
*/
|
||||
if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
|
||||
EMIT(PPC_RAW_SYNC());
|
||||
tmp_idx = ctx->idx * 4;
|
||||
/* load value from memory into r0 */
|
||||
EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
|
||||
@ -953,6 +962,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
|
||||
|
||||
/* For the BPF_FETCH variant, get old data into src_reg */
|
||||
if (imm & BPF_FETCH) {
|
||||
/* Emit 'sync' to enforce full ordering */
|
||||
if (IS_ENABLED(CONFIG_SMP))
|
||||
EMIT(PPC_RAW_SYNC());
|
||||
EMIT(PPC_RAW_MR(ret_reg, ax_reg));
|
||||
if (!fp->aux->verifier_zext)
|
||||
EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
|
||||
|
@ -846,6 +846,15 @@ emit_clear:
|
||||
|
||||
/* Get offset into TMP_REG_1 */
|
||||
EMIT(PPC_RAW_LI(tmp1_reg, off));
|
||||
/*
|
||||
* Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
|
||||
* before and after the operation.
|
||||
*
|
||||
* This is a requirement in the Linux Kernel Memory Model.
|
||||
* See __cmpxchg_u64() in asm/cmpxchg.h as an example.
|
||||
*/
|
||||
if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
|
||||
EMIT(PPC_RAW_SYNC());
|
||||
tmp_idx = ctx->idx * 4;
|
||||
/* load value from memory into TMP_REG_2 */
|
||||
if (size == BPF_DW)
|
||||
@ -908,6 +917,9 @@ emit_clear:
|
||||
PPC_BCC_SHORT(COND_NE, tmp_idx);
|
||||
|
||||
if (imm & BPF_FETCH) {
|
||||
/* Emit 'sync' to enforce full ordering */
|
||||
if (IS_ENABLED(CONFIG_SMP))
|
||||
EMIT(PPC_RAW_SYNC());
|
||||
EMIT(PPC_RAW_MR(ret_reg, _R0));
|
||||
/*
|
||||
* Skip unnecessary zero-extension for 32-bit cmpxchg.
|
||||
|
@ -371,8 +371,8 @@ static int read_dt_lpar_name(struct seq_file *m)
|
||||
|
||||
static void read_lpar_name(struct seq_file *m)
|
||||
{
|
||||
if (read_rtas_lpar_name(m) && read_dt_lpar_name(m))
|
||||
pr_err_once("Error can't get the LPAR name");
|
||||
if (read_rtas_lpar_name(m))
|
||||
read_dt_lpar_name(m);
|
||||
}
|
||||
|
||||
#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
|
||||
|
@ -106,7 +106,7 @@ config RISCV
|
||||
select HAS_IOPORT if MMU
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
|
||||
select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL
|
||||
select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
|
||||
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
|
||||
select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
|
||||
select HAVE_ARCH_KASAN if MMU && 64BIT
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
#include <asm/fence.h>
|
||||
|
||||
#define __arch_xchg_masked(prepend, append, r, p, n) \
|
||||
#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
|
||||
({ \
|
||||
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
|
||||
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
|
||||
@ -25,7 +25,7 @@
|
||||
"0: lr.w %0, %2\n" \
|
||||
" and %1, %0, %z4\n" \
|
||||
" or %1, %1, %z3\n" \
|
||||
" sc.w %1, %1, %2\n" \
|
||||
" sc.w" sc_sfx " %1, %1, %2\n" \
|
||||
" bnez %1, 0b\n" \
|
||||
append \
|
||||
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
|
||||
@ -46,7 +46,8 @@
|
||||
: "memory"); \
|
||||
})
|
||||
|
||||
#define _arch_xchg(ptr, new, sfx, prepend, append) \
|
||||
#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
|
||||
sc_append, swap_append) \
|
||||
({ \
|
||||
__typeof__(ptr) __ptr = (ptr); \
|
||||
__typeof__(*(__ptr)) __new = (new); \
|
||||
@ -55,15 +56,15 @@
|
||||
switch (sizeof(*__ptr)) { \
|
||||
case 1: \
|
||||
case 2: \
|
||||
__arch_xchg_masked(prepend, append, \
|
||||
__arch_xchg_masked(sc_sfx, prepend, sc_append, \
|
||||
__ret, __ptr, __new); \
|
||||
break; \
|
||||
case 4: \
|
||||
__arch_xchg(".w" sfx, prepend, append, \
|
||||
__arch_xchg(".w" swap_sfx, prepend, swap_append, \
|
||||
__ret, __ptr, __new); \
|
||||
break; \
|
||||
case 8: \
|
||||
__arch_xchg(".d" sfx, prepend, append, \
|
||||
__arch_xchg(".d" swap_sfx, prepend, swap_append, \
|
||||
__ret, __ptr, __new); \
|
||||
break; \
|
||||
default: \
|
||||
@ -73,16 +74,17 @@
|
||||
})
|
||||
|
||||
#define arch_xchg_relaxed(ptr, x) \
|
||||
_arch_xchg(ptr, x, "", "", "")
|
||||
_arch_xchg(ptr, x, "", "", "", "", "")
|
||||
|
||||
#define arch_xchg_acquire(ptr, x) \
|
||||
_arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER)
|
||||
_arch_xchg(ptr, x, "", "", "", \
|
||||
RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
|
||||
|
||||
#define arch_xchg_release(ptr, x) \
|
||||
_arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "")
|
||||
_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
|
||||
|
||||
#define arch_xchg(ptr, x) \
|
||||
_arch_xchg(ptr, x, ".aqrl", "", "")
|
||||
_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
|
||||
|
||||
#define xchg32(ptr, x) \
|
||||
({ \
|
||||
|
@ -72,7 +72,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
|
||||
/* Make sure tidle is updated */
|
||||
smp_mb();
|
||||
bdata->task_ptr = tidle;
|
||||
bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
|
||||
bdata->stack_ptr = task_pt_regs(tidle);
|
||||
/* Make sure boot data is updated */
|
||||
smp_mb();
|
||||
hsm_data = __pa(bdata);
|
||||
|
@ -34,8 +34,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid,
|
||||
|
||||
/* Make sure tidle is updated */
|
||||
smp_mb();
|
||||
WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
|
||||
task_stack_page(tidle) + THREAD_SIZE);
|
||||
WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle));
|
||||
WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
|
||||
}
|
||||
|
||||
|
@ -237,10 +237,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
|
||||
|
||||
static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
|
||||
{
|
||||
u32 hart, group = 0;
|
||||
u32 hart = 0, group = 0;
|
||||
|
||||
hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
|
||||
GENMASK_ULL(aia->nr_hart_bits - 1, 0);
|
||||
if (aia->nr_hart_bits)
|
||||
hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
|
||||
GENMASK_ULL(aia->nr_hart_bits - 1, 0);
|
||||
if (aia->nr_group_bits)
|
||||
group = (addr >> aia->nr_group_shift) &
|
||||
GENMASK_ULL(aia->nr_group_bits - 1, 0);
|
||||
|
@ -724,9 +724,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
|
||||
switch (reg_subtype) {
|
||||
case KVM_REG_RISCV_ISA_SINGLE:
|
||||
return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
|
||||
case KVM_REG_RISCV_SBI_MULTI_EN:
|
||||
case KVM_REG_RISCV_ISA_MULTI_EN:
|
||||
return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
|
||||
case KVM_REG_RISCV_SBI_MULTI_DIS:
|
||||
case KVM_REG_RISCV_ISA_MULTI_DIS:
|
||||
return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
|
||||
default:
|
||||
return -ENOENT;
|
||||
|
@ -293,8 +293,8 @@ void handle_page_fault(struct pt_regs *regs)
|
||||
if (unlikely(access_error(cause, vma))) {
|
||||
vma_end_read(vma);
|
||||
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
|
||||
tsk->thread.bad_cause = SEGV_ACCERR;
|
||||
bad_area_nosemaphore(regs, code, addr);
|
||||
tsk->thread.bad_cause = cause;
|
||||
bad_area_nosemaphore(regs, SEGV_ACCERR, addr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -250,18 +250,19 @@ static void __init setup_bootmem(void)
|
||||
kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
|
||||
|
||||
/*
|
||||
* memblock allocator is not aware of the fact that last 4K bytes of
|
||||
* the addressable memory can not be mapped because of IS_ERR_VALUE
|
||||
* macro. Make sure that last 4k bytes are not usable by memblock
|
||||
* if end of dram is equal to maximum addressable memory. For 64-bit
|
||||
* kernel, this problem can't happen here as the end of the virtual
|
||||
* address space is occupied by the kernel mapping then this check must
|
||||
* be done as soon as the kernel mapping base address is determined.
|
||||
* Reserve physical address space that would be mapped to virtual
|
||||
* addresses greater than (void *)(-PAGE_SIZE) because:
|
||||
* - This memory would overlap with ERR_PTR
|
||||
* - This memory belongs to high memory, which is not supported
|
||||
*
|
||||
* This is not applicable to 64-bit kernel, because virtual addresses
|
||||
* after (void *)(-PAGE_SIZE) are not linearly mapped: they are
|
||||
* occupied by kernel mapping. Also it is unrealistic for high memory
|
||||
* to exist on 64-bit platforms.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_64BIT)) {
|
||||
max_mapped_addr = __pa(~(ulong)0);
|
||||
if (max_mapped_addr == (phys_ram_end - 1))
|
||||
memblock_set_current_limit(max_mapped_addr - 4096);
|
||||
max_mapped_addr = __va_to_pa_nodebug(-PAGE_SIZE);
|
||||
memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr);
|
||||
}
|
||||
|
||||
min_low_pfn = PFN_UP(phys_ram_base);
|
||||
|
@ -451,7 +451,7 @@ static void *nt_final(void *ptr)
|
||||
/*
|
||||
* Initialize ELF header (new kernel)
|
||||
*/
|
||||
static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
|
||||
static void *ehdr_init(Elf64_Ehdr *ehdr, int phdr_count)
|
||||
{
|
||||
memset(ehdr, 0, sizeof(*ehdr));
|
||||
memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
|
||||
@ -465,11 +465,8 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
|
||||
ehdr->e_phoff = sizeof(Elf64_Ehdr);
|
||||
ehdr->e_ehsize = sizeof(Elf64_Ehdr);
|
||||
ehdr->e_phentsize = sizeof(Elf64_Phdr);
|
||||
/*
|
||||
* Number of memory chunk PT_LOAD program headers plus one kernel
|
||||
* image PT_LOAD program header plus one PT_NOTE program header.
|
||||
*/
|
||||
ehdr->e_phnum = mem_chunk_cnt + 1 + 1;
|
||||
/* Number of PT_LOAD program headers plus PT_NOTE program header */
|
||||
ehdr->e_phnum = phdr_count + 1;
|
||||
return ehdr + 1;
|
||||
}
|
||||
|
||||
@ -503,12 +500,14 @@ static int get_mem_chunk_cnt(void)
|
||||
/*
|
||||
* Initialize ELF loads (new kernel)
|
||||
*/
|
||||
static void loads_init(Elf64_Phdr *phdr)
|
||||
static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
|
||||
{
|
||||
unsigned long old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
|
||||
unsigned long old_identity_base = 0;
|
||||
phys_addr_t start, end;
|
||||
u64 idx;
|
||||
|
||||
if (os_info_has_vm)
|
||||
old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
|
||||
for_each_physmem_range(idx, &oldmem_type, &start, &end) {
|
||||
phdr->p_type = PT_LOAD;
|
||||
phdr->p_vaddr = old_identity_base + start;
|
||||
@ -522,6 +521,11 @@ static void loads_init(Elf64_Phdr *phdr)
|
||||
}
|
||||
}
|
||||
|
||||
static bool os_info_has_vm(void)
|
||||
{
|
||||
return os_info_old_value(OS_INFO_KASLR_OFFSET);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare PT_LOAD type program header for kernel image region
|
||||
*/
|
||||
@ -566,7 +570,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static size_t get_elfcorehdr_size(int mem_chunk_cnt)
|
||||
static size_t get_elfcorehdr_size(int phdr_count)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
@ -581,10 +585,8 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
|
||||
size += nt_vmcoreinfo_size();
|
||||
/* nt_final */
|
||||
size += sizeof(Elf64_Nhdr);
|
||||
/* PT_LOAD type program header for kernel text region */
|
||||
size += sizeof(Elf64_Phdr);
|
||||
/* PT_LOADS */
|
||||
size += mem_chunk_cnt * sizeof(Elf64_Phdr);
|
||||
size += phdr_count * sizeof(Elf64_Phdr);
|
||||
|
||||
return size;
|
||||
}
|
||||
@ -595,8 +597,8 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
|
||||
int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
|
||||
{
|
||||
Elf64_Phdr *phdr_notes, *phdr_loads, *phdr_text;
|
||||
int mem_chunk_cnt, phdr_text_cnt;
|
||||
size_t alloc_size;
|
||||
int mem_chunk_cnt;
|
||||
void *ptr, *hdr;
|
||||
u64 hdr_off;
|
||||
|
||||
@ -615,12 +617,14 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
|
||||
}
|
||||
|
||||
mem_chunk_cnt = get_mem_chunk_cnt();
|
||||
phdr_text_cnt = os_info_has_vm() ? 1 : 0;
|
||||
|
||||
alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
|
||||
alloc_size = get_elfcorehdr_size(mem_chunk_cnt + phdr_text_cnt);
|
||||
|
||||
hdr = kzalloc(alloc_size, GFP_KERNEL);
|
||||
|
||||
/* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
|
||||
/*
|
||||
* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
|
||||
* a dump with this crash kernel will fail. Panic now to allow other
|
||||
* dump mechanisms to take over.
|
||||
*/
|
||||
@ -628,21 +632,23 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
|
||||
panic("s390 kdump allocating elfcorehdr failed");
|
||||
|
||||
/* Init elf header */
|
||||
ptr = ehdr_init(hdr, mem_chunk_cnt);
|
||||
phdr_notes = ehdr_init(hdr, mem_chunk_cnt + phdr_text_cnt);
|
||||
/* Init program headers */
|
||||
phdr_notes = ptr;
|
||||
ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
|
||||
phdr_text = ptr;
|
||||
ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
|
||||
phdr_loads = ptr;
|
||||
ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
|
||||
if (phdr_text_cnt) {
|
||||
phdr_text = phdr_notes + 1;
|
||||
phdr_loads = phdr_text + 1;
|
||||
} else {
|
||||
phdr_loads = phdr_notes + 1;
|
||||
}
|
||||
ptr = PTR_ADD(phdr_loads, sizeof(Elf64_Phdr) * mem_chunk_cnt);
|
||||
/* Init notes */
|
||||
hdr_off = PTR_DIFF(ptr, hdr);
|
||||
ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
|
||||
/* Init kernel text program header */
|
||||
text_init(phdr_text);
|
||||
if (phdr_text_cnt)
|
||||
text_init(phdr_text);
|
||||
/* Init loads */
|
||||
loads_init(phdr_loads);
|
||||
loads_init(phdr_loads, phdr_text_cnt);
|
||||
/* Finalize program headers */
|
||||
hdr_off = PTR_DIFF(ptr, hdr);
|
||||
*addr = (unsigned long long) hdr;
|
||||
|
@ -114,6 +114,7 @@
|
||||
#include "../perf_event.h"
|
||||
#include "../probe.h"
|
||||
|
||||
MODULE_DESCRIPTION("Support for Intel cstate performance events");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
|
||||
|
@ -34,6 +34,7 @@ static struct event_constraint uncore_constraint_fixed =
|
||||
struct event_constraint uncore_constraint_empty =
|
||||
EVENT_CONSTRAINT(0, 0, 0);
|
||||
|
||||
MODULE_DESCRIPTION("Support for Intel uncore performance events");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
int uncore_pcibus_to_dieid(struct pci_bus *bus)
|
||||
|
@ -64,6 +64,7 @@
|
||||
#include "perf_event.h"
|
||||
#include "probe.h"
|
||||
|
||||
MODULE_DESCRIPTION("Support Intel/AMD RAPL energy consumption counters");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
/*
|
||||
|
@ -2154,6 +2154,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
|
||||
void *insn, int insn_len);
|
||||
void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
|
||||
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
|
||||
void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
u64 addr, unsigned long roots);
|
||||
|
@ -77,7 +77,7 @@
|
||||
#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */
|
||||
#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */
|
||||
#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */
|
||||
#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */
|
||||
#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* Conditionally reflect EPT violations as #VE exceptions */
|
||||
#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */
|
||||
#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */
|
||||
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
|
||||
|
@ -215,7 +215,14 @@ out:
|
||||
|
||||
int amd_smn_read(u16 node, u32 address, u32 *value)
|
||||
{
|
||||
return __amd_smn_rw(node, address, value, false);
|
||||
int err = __amd_smn_rw(node, address, value, false);
|
||||
|
||||
if (PCI_POSSIBLE_ERROR(*value)) {
|
||||
err = -ENODEV;
|
||||
*value = 0;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(amd_smn_read);
|
||||
|
||||
|
@ -345,6 +345,7 @@ static DECLARE_WORK(disable_freq_invariance_work,
|
||||
disable_freq_invariance_workfn);
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
|
||||
|
||||
static void scale_freq_tick(u64 acnt, u64 mcnt)
|
||||
{
|
||||
|
@ -1075,6 +1075,10 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
|
||||
|
||||
c->x86_virt_bits = (eax >> 8) & 0xff;
|
||||
c->x86_phys_bits = eax & 0xff;
|
||||
|
||||
/* Provide a sane default if not enumerated: */
|
||||
if (!c->x86_clflush_size)
|
||||
c->x86_clflush_size = 32;
|
||||
}
|
||||
|
||||
c->x86_cache_bits = c->x86_phys_bits;
|
||||
@ -1585,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
|
||||
if (have_cpuid_p()) {
|
||||
cpu_detect(c);
|
||||
get_cpu_vendor(c);
|
||||
intel_unlock_cpuid_leafs(c);
|
||||
get_cpu_cap(c);
|
||||
setup_force_cpu_cap(X86_FEATURE_CPUID);
|
||||
get_cpu_address_sizes(c);
|
||||
@ -1744,7 +1749,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
|
||||
cpu_detect(c);
|
||||
|
||||
get_cpu_vendor(c);
|
||||
|
||||
intel_unlock_cpuid_leafs(c);
|
||||
get_cpu_cap(c);
|
||||
|
||||
get_cpu_address_sizes(c);
|
||||
|
@ -61,9 +61,11 @@ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
|
||||
|
||||
extern void __init tsx_init(void);
|
||||
void tsx_ap_init(void);
|
||||
void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c);
|
||||
#else
|
||||
static inline void tsx_init(void) { }
|
||||
static inline void tsx_ap_init(void) { }
|
||||
static inline void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) { }
|
||||
#endif /* CONFIG_CPU_SUP_INTEL */
|
||||
|
||||
extern void init_spectral_chicken(struct cpuinfo_x86 *c);
|
||||
|
@ -269,19 +269,26 @@ detect_keyid_bits:
|
||||
c->x86_phys_bits -= keyid_bits;
|
||||
}
|
||||
|
||||
void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return;
|
||||
|
||||
if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The BIOS can have limited CPUID to leaf 2, which breaks feature
|
||||
* enumeration. Unlock it and update the maximum leaf info.
|
||||
*/
|
||||
if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0)
|
||||
c->cpuid_level = cpuid_eax(0);
|
||||
}
|
||||
|
||||
static void early_init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 misc_enable;
|
||||
|
||||
/* Unmask CPUID levels if masked: */
|
||||
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
|
||||
if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
|
||||
MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
|
||||
c->cpuid_level = cpuid_eax(0);
|
||||
get_cpu_cap(c);
|
||||
}
|
||||
}
|
||||
|
||||
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
|
||||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
|
@ -84,9 +84,9 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext)
|
||||
|
||||
/*
|
||||
* If leaf 0xb is available, then the domain shifts are set
|
||||
* already and nothing to do here.
|
||||
* already and nothing to do here. Only valid for family >= 0x17.
|
||||
*/
|
||||
if (!has_topoext) {
|
||||
if (!has_topoext && tscan->c->x86 >= 0x17) {
|
||||
/*
|
||||
* Leaf 0x80000008 set the CORE domain shift already.
|
||||
* Update the SMT domain, but do not propagate it.
|
||||
|
@ -295,8 +295,15 @@ void machine_kexec_cleanup(struct kimage *image)
|
||||
void machine_kexec(struct kimage *image)
|
||||
{
|
||||
unsigned long page_list[PAGES_NR];
|
||||
void *control_page;
|
||||
unsigned int host_mem_enc_active;
|
||||
int save_ftrace_enabled;
|
||||
void *control_page;
|
||||
|
||||
/*
|
||||
* This must be done before load_segments() since if call depth tracking
|
||||
* is used then GS must be valid to make any function calls.
|
||||
*/
|
||||
host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT);
|
||||
|
||||
#ifdef CONFIG_KEXEC_JUMP
|
||||
if (image->preserve_context)
|
||||
@ -358,7 +365,7 @@ void machine_kexec(struct kimage *image)
|
||||
(unsigned long)page_list,
|
||||
image->start,
|
||||
image->preserve_context,
|
||||
cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT));
|
||||
host_mem_enc_active);
|
||||
|
||||
#ifdef CONFIG_KEXEC_JUMP
|
||||
if (image->preserve_context)
|
||||
|
@ -44,6 +44,7 @@ config KVM
|
||||
select KVM_VFIO
|
||||
select HAVE_KVM_PM_NOTIFIER if PM
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
select KVM_WERROR if WERROR
|
||||
help
|
||||
Support hosting fully virtualized guest machines using hardware
|
||||
virtualization extensions. You will need a fairly recent
|
||||
@ -66,7 +67,7 @@ config KVM_WERROR
|
||||
# FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning.
|
||||
# Building KVM with -Werror and KASAN is still doable via enabling
|
||||
# the kernel-wide WERROR=y.
|
||||
depends on KVM && EXPERT && !KASAN
|
||||
depends on KVM && ((EXPERT && !KASAN) || WERROR)
|
||||
help
|
||||
Add -Werror to the build flags for KVM.
|
||||
|
||||
@ -97,15 +98,17 @@ config KVM_INTEL
|
||||
|
||||
config KVM_INTEL_PROVE_VE
|
||||
bool "Check that guests do not receive #VE exceptions"
|
||||
default KVM_PROVE_MMU || DEBUG_KERNEL
|
||||
depends on KVM_INTEL
|
||||
depends on KVM_INTEL && EXPERT
|
||||
help
|
||||
|
||||
Checks that KVM's page table management code will not incorrectly
|
||||
let guests receive a virtualization exception. Virtualization
|
||||
exceptions will be trapped by the hypervisor rather than injected
|
||||
in the guest.
|
||||
|
||||
Note: some CPUs appear to generate spurious EPT Violations #VEs
|
||||
that trigger KVM's WARN, in particular with eptad=0 and/or nested
|
||||
virtualization.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config X86_SGX_KVM
|
||||
|
@ -59,7 +59,17 @@
|
||||
#define MAX_APIC_VECTOR 256
|
||||
#define APIC_VECTORS_PER_REG 32
|
||||
|
||||
static bool lapic_timer_advance_dynamic __read_mostly;
|
||||
/*
|
||||
* Enable local APIC timer advancement (tscdeadline mode only) with adaptive
|
||||
* tuning. When enabled, KVM programs the host timer event to fire early, i.e.
|
||||
* before the deadline expires, to account for the delay between taking the
|
||||
* VM-Exit (to inject the guest event) and the subsequent VM-Enter to resume
|
||||
* the guest, i.e. so that the interrupt arrives in the guest with minimal
|
||||
* latency relative to the deadline programmed by the guest.
|
||||
*/
|
||||
static bool lapic_timer_advance __read_mostly = true;
|
||||
module_param(lapic_timer_advance, bool, 0444);
|
||||
|
||||
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
|
||||
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
|
||||
#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
|
||||
@ -1854,16 +1864,14 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
|
||||
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
|
||||
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
|
||||
|
||||
if (lapic_timer_advance_dynamic) {
|
||||
adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
|
||||
/*
|
||||
* If the timer fired early, reread the TSC to account for the
|
||||
* overhead of the above adjustment to avoid waiting longer
|
||||
* than is necessary.
|
||||
*/
|
||||
if (guest_tsc < tsc_deadline)
|
||||
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
|
||||
}
|
||||
adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
|
||||
|
||||
/*
|
||||
* If the timer fired early, reread the TSC to account for the overhead
|
||||
* of the above adjustment to avoid waiting longer than is necessary.
|
||||
*/
|
||||
if (guest_tsc < tsc_deadline)
|
||||
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
|
||||
|
||||
if (guest_tsc < tsc_deadline)
|
||||
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
|
||||
@ -2812,7 +2820,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
|
||||
int kvm_create_lapic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic;
|
||||
|
||||
@ -2845,13 +2853,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
|
||||
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_ABS_HARD);
|
||||
apic->lapic_timer.timer.function = apic_timer_fn;
|
||||
if (timer_advance_ns == -1) {
|
||||
if (lapic_timer_advance)
|
||||
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
|
||||
lapic_timer_advance_dynamic = true;
|
||||
} else {
|
||||
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
|
||||
lapic_timer_advance_dynamic = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stuff the APIC ENABLE bit in lieu of temporarily incrementing
|
||||
|
@ -85,7 +85,7 @@ struct kvm_lapic {
|
||||
|
||||
struct dest_map;
|
||||
|
||||
int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
|
||||
int kvm_create_lapic(struct kvm_vcpu *vcpu);
|
||||
void kvm_free_lapic(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
|
@ -336,16 +336,19 @@ static int is_cpuid_PSE36(void)
|
||||
#ifdef CONFIG_X86_64
|
||||
static void __set_spte(u64 *sptep, u64 spte)
|
||||
{
|
||||
KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
|
||||
WRITE_ONCE(*sptep, spte);
|
||||
}
|
||||
|
||||
static void __update_clear_spte_fast(u64 *sptep, u64 spte)
|
||||
{
|
||||
KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
|
||||
WRITE_ONCE(*sptep, spte);
|
||||
}
|
||||
|
||||
static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
|
||||
{
|
||||
KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
|
||||
return xchg(sptep, spte);
|
||||
}
|
||||
|
||||
@ -4101,6 +4104,22 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
|
||||
return leaf;
|
||||
}
|
||||
|
||||
static int get_sptes_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
|
||||
int *root_level)
|
||||
{
|
||||
int leaf;
|
||||
|
||||
walk_shadow_page_lockless_begin(vcpu);
|
||||
|
||||
if (is_tdp_mmu_active(vcpu))
|
||||
leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, root_level);
|
||||
else
|
||||
leaf = get_walk(vcpu, addr, sptes, root_level);
|
||||
|
||||
walk_shadow_page_lockless_end(vcpu);
|
||||
return leaf;
|
||||
}
|
||||
|
||||
/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
|
||||
static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
|
||||
{
|
||||
@ -4109,15 +4128,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
|
||||
int root, leaf, level;
|
||||
bool reserved = false;
|
||||
|
||||
walk_shadow_page_lockless_begin(vcpu);
|
||||
|
||||
if (is_tdp_mmu_active(vcpu))
|
||||
leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
|
||||
else
|
||||
leaf = get_walk(vcpu, addr, sptes, &root);
|
||||
|
||||
walk_shadow_page_lockless_end(vcpu);
|
||||
|
||||
leaf = get_sptes_lockless(vcpu, addr, sptes, &root);
|
||||
if (unlikely(leaf < 0)) {
|
||||
*sptep = 0ull;
|
||||
return reserved;
|
||||
@ -4400,9 +4411,6 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
|
||||
return RET_PF_EMULATE;
|
||||
}
|
||||
|
||||
fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
|
||||
smp_rmb();
|
||||
|
||||
/*
|
||||
* Check for a relevant mmu_notifier invalidation event before getting
|
||||
* the pfn from the primary MMU, and before acquiring mmu_lock.
|
||||
@ -5921,6 +5929,22 @@ emulate:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
|
||||
|
||||
void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg)
|
||||
{
|
||||
u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
|
||||
int root_level, leaf, level;
|
||||
|
||||
leaf = get_sptes_lockless(vcpu, gpa, sptes, &root_level);
|
||||
if (unlikely(leaf < 0))
|
||||
return;
|
||||
|
||||
pr_err("%s %llx", msg, gpa);
|
||||
for (level = root_level; level >= leaf; level--)
|
||||
pr_cont(", spte[%d] = 0x%llx", level, sptes[level]);
|
||||
pr_cont("\n");
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_print_sptes);
|
||||
|
||||
static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
u64 addr, hpa_t root_hpa)
|
||||
{
|
||||
|
@ -3,6 +3,8 @@
|
||||
#ifndef KVM_X86_MMU_SPTE_H
|
||||
#define KVM_X86_MMU_SPTE_H
|
||||
|
||||
#include <asm/vmx.h>
|
||||
|
||||
#include "mmu.h"
|
||||
#include "mmu_internal.h"
|
||||
|
||||
@ -276,6 +278,13 @@ static inline bool is_shadow_present_pte(u64 pte)
|
||||
return !!(pte & SPTE_MMU_PRESENT_MASK);
|
||||
}
|
||||
|
||||
static inline bool is_ept_ve_possible(u64 spte)
|
||||
{
|
||||
return (shadow_present_mask & VMX_EPT_SUPPRESS_VE_BIT) &&
|
||||
!(spte & VMX_EPT_SUPPRESS_VE_BIT) &&
|
||||
(spte & VMX_EPT_RWX_MASK) != VMX_EPT_MISCONFIG_WX_VALUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if A/D bits are supported in hardware and are enabled by KVM.
|
||||
* When enabled, KVM uses A/D bits for all non-nested MMUs. Because L1 can
|
||||
|
@ -21,11 +21,13 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
|
||||
|
||||
static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
|
||||
{
|
||||
KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
|
||||
return xchg(rcu_dereference(sptep), new_spte);
|
||||
}
|
||||
|
||||
static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
|
||||
{
|
||||
KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
|
||||
WRITE_ONCE(*rcu_dereference(sptep), new_spte);
|
||||
}
|
||||
|
||||
|
@ -626,7 +626,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
|
||||
* SPTEs.
|
||||
*/
|
||||
handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
|
||||
0, iter->level, true);
|
||||
SHADOW_NONPRESENT_VALUE, iter->level, true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -779,6 +779,14 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
||||
*/
|
||||
fpstate_set_confidential(&vcpu->arch.guest_fpu);
|
||||
vcpu->arch.guest_state_protected = true;
|
||||
|
||||
/*
|
||||
* SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it
|
||||
* only after setting guest_state_protected because KVM_SET_MSRS allows
|
||||
* dynamic toggling of LBRV (for performance reason) on write access to
|
||||
* MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set.
|
||||
*/
|
||||
svm_enable_lbrv(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2406,6 +2414,12 @@ void __init sev_hardware_setup(void)
|
||||
if (!boot_cpu_has(X86_FEATURE_SEV_ES))
|
||||
goto out;
|
||||
|
||||
if (!lbrv) {
|
||||
WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV),
|
||||
"LBRV must be present for SEV-ES support");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Has the system been allocated ASIDs for SEV-ES? */
|
||||
if (min_sev_asid == 1)
|
||||
goto out;
|
||||
@ -3216,7 +3230,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
|
||||
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
|
||||
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
|
||||
|
||||
/*
|
||||
* An SEV-ES guest requires a VMSA area that is a separate from the
|
||||
@ -3268,10 +3281,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
||||
/* Clear intercepts on selected MSRs */
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
|
||||
}
|
||||
|
||||
void sev_init_vmcb(struct vcpu_svm *svm)
|
||||
|
@ -99,6 +99,7 @@ static const struct svm_direct_access_msrs {
|
||||
{ .index = MSR_IA32_SPEC_CTRL, .always = false },
|
||||
{ .index = MSR_IA32_PRED_CMD, .always = false },
|
||||
{ .index = MSR_IA32_FLUSH_CMD, .always = false },
|
||||
{ .index = MSR_IA32_DEBUGCTLMSR, .always = false },
|
||||
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
|
||||
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
|
||||
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
|
||||
@ -215,7 +216,7 @@ int vgif = true;
|
||||
module_param(vgif, int, 0444);
|
||||
|
||||
/* enable/disable LBR virtualization */
|
||||
static int lbrv = true;
|
||||
int lbrv = true;
|
||||
module_param(lbrv, int, 0444);
|
||||
|
||||
static int tsc_scaling = true;
|
||||
@ -990,7 +991,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
|
||||
vmcb_mark_dirty(to_vmcb, VMCB_LBR);
|
||||
}
|
||||
|
||||
static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
|
||||
void svm_enable_lbrv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
@ -1000,6 +1001,9 @@ static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
|
||||
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
|
||||
|
||||
/* Move the LBR msrs to the vmcb02 so that the guest can see them. */
|
||||
if (is_guest_mode(vcpu))
|
||||
svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
|
||||
@ -1009,6 +1013,8 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
|
||||
|
||||
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
|
||||
@ -2822,10 +2828,24 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
sev_es_prevent_msr_access(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
return sev_es_guest(vcpu->kvm) &&
|
||||
vcpu->arch.guest_state_protected &&
|
||||
svm_msrpm_offset(msr_info->index) != MSR_INVALID &&
|
||||
!msr_write_intercepted(vcpu, msr_info->index);
|
||||
}
|
||||
|
||||
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (sev_es_prevent_msr_access(vcpu, msr_info)) {
|
||||
msr_info->data = 0;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (msr_info->index) {
|
||||
case MSR_AMD64_TSC_RATIO:
|
||||
if (!msr_info->host_initiated &&
|
||||
@ -2976,6 +2996,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
|
||||
u32 ecx = msr->index;
|
||||
u64 data = msr->data;
|
||||
|
||||
if (sev_es_prevent_msr_access(vcpu, msr))
|
||||
return -EINVAL;
|
||||
|
||||
switch (ecx) {
|
||||
case MSR_AMD64_TSC_RATIO:
|
||||
|
||||
@ -3846,16 +3870,27 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
/*
|
||||
* KVM should never request an NMI window when vNMI is enabled, as KVM
|
||||
* allows at most one to-be-injected NMI and one pending NMI, i.e. if
|
||||
* two NMIs arrive simultaneously, KVM will inject one and set
|
||||
* V_NMI_PENDING for the other. WARN, but continue with the standard
|
||||
* single-step approach to try and salvage the pending NMI.
|
||||
* If NMIs are outright masked, i.e. the vCPU is already handling an
|
||||
* NMI, and KVM has not yet intercepted an IRET, then there is nothing
|
||||
* more to do at this time as KVM has already enabled IRET intercepts.
|
||||
* If KVM has already intercepted IRET, then single-step over the IRET,
|
||||
* as NMIs aren't architecturally unmasked until the IRET completes.
|
||||
*
|
||||
* If vNMI is enabled, KVM should never request an NMI window if NMIs
|
||||
* are masked, as KVM allows at most one to-be-injected NMI and one
|
||||
* pending NMI. If two NMIs arrive simultaneously, KVM will inject one
|
||||
* NMI and set V_NMI_PENDING for the other, but if and only if NMIs are
|
||||
* unmasked. KVM _will_ request an NMI window in some situations, e.g.
|
||||
* if the vCPU is in an STI shadow or if GIF=0, KVM can't immediately
|
||||
* inject the NMI. In those situations, KVM needs to single-step over
|
||||
* the STI shadow or intercept STGI.
|
||||
*/
|
||||
WARN_ON_ONCE(is_vnmi_enabled(svm));
|
||||
if (svm_get_nmi_mask(vcpu)) {
|
||||
WARN_ON_ONCE(is_vnmi_enabled(svm));
|
||||
|
||||
if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
|
||||
return; /* IRET will cause a vm exit */
|
||||
if (!svm->awaiting_iret_completion)
|
||||
return; /* IRET will cause a vm exit */
|
||||
}
|
||||
|
||||
/*
|
||||
* SEV-ES guests are responsible for signaling when a vCPU is ready to
|
||||
@ -5265,6 +5300,12 @@ static __init int svm_hardware_setup(void)
|
||||
|
||||
nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
|
||||
|
||||
if (lbrv) {
|
||||
if (!boot_cpu_has(X86_FEATURE_LBRV))
|
||||
lbrv = false;
|
||||
else
|
||||
pr_info("LBR virtualization supported\n");
|
||||
}
|
||||
/*
|
||||
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
|
||||
* may be modified by svm_adjust_mmio_mask()), as well as nrips.
|
||||
@ -5318,14 +5359,6 @@ static __init int svm_hardware_setup(void)
|
||||
svm_x86_ops.set_vnmi_pending = NULL;
|
||||
}
|
||||
|
||||
|
||||
if (lbrv) {
|
||||
if (!boot_cpu_has(X86_FEATURE_LBRV))
|
||||
lbrv = false;
|
||||
else
|
||||
pr_info("LBR virtualization supported\n");
|
||||
}
|
||||
|
||||
if (!enable_pmu)
|
||||
pr_info("PMU virtualization is disabled\n");
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
#define IOPM_SIZE PAGE_SIZE * 3
|
||||
#define MSRPM_SIZE PAGE_SIZE * 2
|
||||
|
||||
#define MAX_DIRECT_ACCESS_MSRS 47
|
||||
#define MAX_DIRECT_ACCESS_MSRS 48
|
||||
#define MSRPM_OFFSETS 32
|
||||
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
|
||||
extern bool npt_enabled;
|
||||
@ -39,6 +39,7 @@ extern int vgif;
|
||||
extern bool intercept_smi;
|
||||
extern bool x2avic_enabled;
|
||||
extern bool vnmi;
|
||||
extern int lbrv;
|
||||
|
||||
/*
|
||||
* Clean bits in VMCB.
|
||||
@ -552,6 +553,7 @@ u32 *svm_vcpu_alloc_msrpm(void);
|
||||
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
|
||||
void svm_vcpu_free_msrpm(u32 *msrpm);
|
||||
void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
|
||||
void svm_enable_lbrv(struct kvm_vcpu *vcpu);
|
||||
void svm_update_lbrv(struct kvm_vcpu *vcpu);
|
||||
|
||||
int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
|
||||
|
@ -2242,6 +2242,9 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
|
||||
vmcs_write64(EPT_POINTER,
|
||||
construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
|
||||
|
||||
if (vmx->ve_info)
|
||||
vmcs_write64(VE_INFORMATION_ADDRESS, __pa(vmx->ve_info));
|
||||
|
||||
/* All VMFUNCs are currently emulated through L0 vmexits. */
|
||||
if (cpu_has_vmx_vmfunc())
|
||||
vmcs_write64(VM_FUNCTION_CONTROL, 0);
|
||||
@ -6230,6 +6233,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
|
||||
else if (is_alignment_check(intr_info) &&
|
||||
!vmx_guest_inject_ac(vcpu))
|
||||
return true;
|
||||
else if (is_ve_fault(intr_info))
|
||||
return true;
|
||||
return false;
|
||||
case EXIT_REASON_EXTERNAL_INTERRUPT:
|
||||
return true;
|
||||
|
@ -5218,8 +5218,15 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||
if (is_invalid_opcode(intr_info))
|
||||
return handle_ud(vcpu);
|
||||
|
||||
if (KVM_BUG_ON(is_ve_fault(intr_info), vcpu->kvm))
|
||||
return -EIO;
|
||||
if (WARN_ON_ONCE(is_ve_fault(intr_info))) {
|
||||
struct vmx_ve_information *ve_info = vmx->ve_info;
|
||||
|
||||
WARN_ONCE(ve_info->exit_reason != EXIT_REASON_EPT_VIOLATION,
|
||||
"Unexpected #VE on VM-Exit reason 0x%x", ve_info->exit_reason);
|
||||
dump_vmcs(vcpu);
|
||||
kvm_mmu_print_sptes(vcpu, ve_info->guest_physical_address, "#VE");
|
||||
return 1;
|
||||
}
|
||||
|
||||
error_code = 0;
|
||||
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
|
||||
|
@ -164,15 +164,6 @@ module_param(kvmclock_periodic_sync, bool, 0444);
|
||||
static u32 __read_mostly tsc_tolerance_ppm = 250;
|
||||
module_param(tsc_tolerance_ppm, uint, 0644);
|
||||
|
||||
/*
|
||||
* lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables
|
||||
* adaptive tuning starting from default advancement of 1000ns. '0' disables
|
||||
* advancement entirely. Any other value is used as-is and disables adaptive
|
||||
* tuning, i.e. allows privileged userspace to set an exact advancement time.
|
||||
*/
|
||||
static int __read_mostly lapic_timer_advance_ns = -1;
|
||||
module_param(lapic_timer_advance_ns, int, 0644);
|
||||
|
||||
static bool __read_mostly vector_hashing = true;
|
||||
module_param(vector_hashing, bool, 0444);
|
||||
|
||||
@ -12169,7 +12160,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
|
||||
r = kvm_create_lapic(vcpu);
|
||||
if (r < 0)
|
||||
goto fail_mmu_destroy;
|
||||
|
||||
|
@ -104,6 +104,7 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
|
||||
static int blk_validate_limits(struct queue_limits *lim)
|
||||
{
|
||||
unsigned int max_hw_sectors;
|
||||
unsigned int logical_block_sectors;
|
||||
|
||||
/*
|
||||
* Unless otherwise specified, default to 512 byte logical blocks and a
|
||||
@ -134,8 +135,11 @@ static int blk_validate_limits(struct queue_limits *lim)
|
||||
lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
|
||||
if (WARN_ON_ONCE(lim->max_hw_sectors < PAGE_SECTORS))
|
||||
return -EINVAL;
|
||||
logical_block_sectors = lim->logical_block_size >> SECTOR_SHIFT;
|
||||
if (WARN_ON_ONCE(logical_block_sectors > lim->max_hw_sectors))
|
||||
return -EINVAL;
|
||||
lim->max_hw_sectors = round_down(lim->max_hw_sectors,
|
||||
lim->logical_block_size >> SECTOR_SHIFT);
|
||||
logical_block_sectors);
|
||||
|
||||
/*
|
||||
* The actual max_sectors value is a complex beast and also takes the
|
||||
@ -153,7 +157,7 @@ static int blk_validate_limits(struct queue_limits *lim)
|
||||
lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP);
|
||||
}
|
||||
lim->max_sectors = round_down(lim->max_sectors,
|
||||
lim->logical_block_size >> SECTOR_SHIFT);
|
||||
logical_block_sectors);
|
||||
|
||||
/*
|
||||
* Random default for the maximum number of segments. Driver should not
|
||||
@ -611,6 +615,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
unsigned int top, bottom, alignment, ret = 0;
|
||||
|
||||
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
|
||||
t->max_user_sectors = min_not_zero(t->max_user_sectors,
|
||||
b->max_user_sectors);
|
||||
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
|
||||
t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
|
||||
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
|
||||
|
@ -64,7 +64,6 @@ struct blk_stat_callback {
|
||||
|
||||
struct blk_queue_stats *blk_alloc_queue_stats(void);
|
||||
void blk_free_queue_stats(struct blk_queue_stats *);
|
||||
bool blk_stats_alloc_enable(struct request_queue *q);
|
||||
|
||||
void blk_stat_add(struct request *rq, u64 now);
|
||||
|
||||
|
@ -1399,32 +1399,32 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
bps_dft = U64_MAX;
|
||||
iops_dft = UINT_MAX;
|
||||
|
||||
if (tg->bps_conf[READ] == bps_dft &&
|
||||
tg->bps_conf[WRITE] == bps_dft &&
|
||||
tg->iops_conf[READ] == iops_dft &&
|
||||
tg->iops_conf[WRITE] == iops_dft)
|
||||
if (tg->bps[READ] == bps_dft &&
|
||||
tg->bps[WRITE] == bps_dft &&
|
||||
tg->iops[READ] == iops_dft &&
|
||||
tg->iops[WRITE] == iops_dft)
|
||||
return 0;
|
||||
|
||||
seq_printf(sf, "%s", dname);
|
||||
if (tg->bps_conf[READ] == U64_MAX)
|
||||
if (tg->bps[READ] == U64_MAX)
|
||||
seq_printf(sf, " rbps=max");
|
||||
else
|
||||
seq_printf(sf, " rbps=%llu", tg->bps_conf[READ]);
|
||||
seq_printf(sf, " rbps=%llu", tg->bps[READ]);
|
||||
|
||||
if (tg->bps_conf[WRITE] == U64_MAX)
|
||||
if (tg->bps[WRITE] == U64_MAX)
|
||||
seq_printf(sf, " wbps=max");
|
||||
else
|
||||
seq_printf(sf, " wbps=%llu", tg->bps_conf[WRITE]);
|
||||
seq_printf(sf, " wbps=%llu", tg->bps[WRITE]);
|
||||
|
||||
if (tg->iops_conf[READ] == UINT_MAX)
|
||||
if (tg->iops[READ] == UINT_MAX)
|
||||
seq_printf(sf, " riops=max");
|
||||
else
|
||||
seq_printf(sf, " riops=%u", tg->iops_conf[READ]);
|
||||
seq_printf(sf, " riops=%u", tg->iops[READ]);
|
||||
|
||||
if (tg->iops_conf[WRITE] == UINT_MAX)
|
||||
if (tg->iops[WRITE] == UINT_MAX)
|
||||
seq_printf(sf, " wiops=max");
|
||||
else
|
||||
seq_printf(sf, " wiops=%u", tg->iops_conf[WRITE]);
|
||||
seq_printf(sf, " wiops=%u", tg->iops[WRITE]);
|
||||
|
||||
seq_printf(sf, "\n");
|
||||
return 0;
|
||||
|
@ -95,15 +95,11 @@ struct throtl_grp {
|
||||
bool has_rules_bps[2];
|
||||
bool has_rules_iops[2];
|
||||
|
||||
/* internally used bytes per second rate limits */
|
||||
/* bytes per second rate limits */
|
||||
uint64_t bps[2];
|
||||
/* user configured bps limits */
|
||||
uint64_t bps_conf[2];
|
||||
|
||||
/* internally used IOPS limits */
|
||||
/* IOPS limits */
|
||||
unsigned int iops[2];
|
||||
/* user configured IOPS limits */
|
||||
unsigned int iops_conf[2];
|
||||
|
||||
/* Number of bytes dispatched in current slice */
|
||||
uint64_t bytes_disp[2];
|
||||
|
@ -450,6 +450,25 @@ static inline bool disk_zone_is_conv(struct gendisk *disk, sector_t sector)
|
||||
return test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap);
|
||||
}
|
||||
|
||||
static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone)
|
||||
{
|
||||
return zone->start + zone->len >= get_capacity(disk);
|
||||
}
|
||||
|
||||
static bool disk_zone_is_full(struct gendisk *disk,
|
||||
unsigned int zno, unsigned int offset_in_zone)
|
||||
{
|
||||
if (zno < disk->nr_zones - 1)
|
||||
return offset_in_zone >= disk->zone_capacity;
|
||||
return offset_in_zone >= disk->last_zone_capacity;
|
||||
}
|
||||
|
||||
static bool disk_zone_wplug_is_full(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset);
|
||||
}
|
||||
|
||||
static bool disk_insert_zone_wplug(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
@ -543,7 +562,7 @@ static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
|
||||
return false;
|
||||
|
||||
/* We can remove zone write plugs for zones that are empty or full. */
|
||||
return !zwplug->wp_offset || zwplug->wp_offset >= disk->zone_capacity;
|
||||
return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug);
|
||||
}
|
||||
|
||||
static void disk_remove_zone_wplug(struct gendisk *disk,
|
||||
@ -664,13 +683,12 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
|
||||
static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
unsigned int zone_capacity = disk->zone_capacity;
|
||||
unsigned int wp_offset = zwplug->wp_offset;
|
||||
struct bio_list bl = BIO_EMPTY_LIST;
|
||||
struct bio *bio;
|
||||
|
||||
while ((bio = bio_list_pop(&zwplug->bio_list))) {
|
||||
if (wp_offset >= zone_capacity ||
|
||||
if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) ||
|
||||
(bio_op(bio) != REQ_OP_ZONE_APPEND &&
|
||||
bio_offset_from_zone_start(bio) != wp_offset)) {
|
||||
blk_zone_wplug_bio_io_error(zwplug, bio);
|
||||
@ -909,7 +927,6 @@ void blk_zone_write_plug_init_request(struct request *req)
|
||||
sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req);
|
||||
struct request_queue *q = req->q;
|
||||
struct gendisk *disk = q->disk;
|
||||
unsigned int zone_capacity = disk->zone_capacity;
|
||||
struct blk_zone_wplug *zwplug =
|
||||
disk_get_zone_wplug(disk, blk_rq_pos(req));
|
||||
unsigned long flags;
|
||||
@ -933,7 +950,7 @@ void blk_zone_write_plug_init_request(struct request *req)
|
||||
* into the back of the request.
|
||||
*/
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
while (zwplug->wp_offset < zone_capacity) {
|
||||
while (!disk_zone_wplug_is_full(disk, zwplug)) {
|
||||
bio = bio_list_peek(&zwplug->bio_list);
|
||||
if (!bio)
|
||||
break;
|
||||
@ -979,7 +996,7 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
|
||||
* We know such BIO will fail, and that would potentially overflow our
|
||||
* write pointer offset beyond the end of the zone.
|
||||
*/
|
||||
if (zwplug->wp_offset >= disk->zone_capacity)
|
||||
if (disk_zone_wplug_is_full(disk, zwplug))
|
||||
goto err;
|
||||
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
@ -1556,6 +1573,7 @@ void disk_free_zone_resources(struct gendisk *disk)
|
||||
kfree(disk->conv_zones_bitmap);
|
||||
disk->conv_zones_bitmap = NULL;
|
||||
disk->zone_capacity = 0;
|
||||
disk->last_zone_capacity = 0;
|
||||
disk->nr_zones = 0;
|
||||
}
|
||||
|
||||
@ -1600,6 +1618,7 @@ struct blk_revalidate_zone_args {
|
||||
unsigned long *conv_zones_bitmap;
|
||||
unsigned int nr_zones;
|
||||
unsigned int zone_capacity;
|
||||
unsigned int last_zone_capacity;
|
||||
sector_t sector;
|
||||
};
|
||||
|
||||
@ -1617,6 +1636,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||
|
||||
disk->nr_zones = args->nr_zones;
|
||||
disk->zone_capacity = args->zone_capacity;
|
||||
disk->last_zone_capacity = args->last_zone_capacity;
|
||||
swap(disk->conv_zones_bitmap, args->conv_zones_bitmap);
|
||||
if (disk->conv_zones_bitmap)
|
||||
nr_conv_zones = bitmap_weight(disk->conv_zones_bitmap,
|
||||
@ -1668,6 +1688,9 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (disk_zone_is_last(disk, zone))
|
||||
args->last_zone_capacity = zone->capacity;
|
||||
|
||||
if (!disk_need_zone_resources(disk))
|
||||
return 0;
|
||||
|
||||
@ -1693,11 +1716,14 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
|
||||
|
||||
/*
|
||||
* Remember the capacity of the first sequential zone and check
|
||||
* if it is constant for all zones.
|
||||
* if it is constant for all zones, ignoring the last zone as it can be
|
||||
* smaller.
|
||||
*/
|
||||
if (!args->zone_capacity)
|
||||
args->zone_capacity = zone->capacity;
|
||||
if (zone->capacity != args->zone_capacity) {
|
||||
if (disk_zone_is_last(disk, zone)) {
|
||||
args->last_zone_capacity = zone->capacity;
|
||||
} else if (zone->capacity != args->zone_capacity) {
|
||||
pr_warn("%s: Invalid variable zone capacity\n",
|
||||
disk->disk_name);
|
||||
return -ENODEV;
|
||||
@ -1732,7 +1758,6 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
|
||||
{
|
||||
struct blk_revalidate_zone_args *args = data;
|
||||
struct gendisk *disk = args->disk;
|
||||
sector_t capacity = get_capacity(disk);
|
||||
sector_t zone_sectors = disk->queue->limits.chunk_sectors;
|
||||
int ret;
|
||||
|
||||
@ -1743,7 +1768,7 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (zone->start >= capacity || !zone->len) {
|
||||
if (zone->start >= get_capacity(disk) || !zone->len) {
|
||||
pr_warn("%s: Invalid zone start %llu, length %llu\n",
|
||||
disk->disk_name, zone->start, zone->len);
|
||||
return -ENODEV;
|
||||
@ -1753,7 +1778,7 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
|
||||
* All zones must have the same size, with the exception on an eventual
|
||||
* smaller last zone.
|
||||
*/
|
||||
if (zone->start + zone->len < capacity) {
|
||||
if (!disk_zone_is_last(disk, zone)) {
|
||||
if (zone->len != zone_sectors) {
|
||||
pr_warn("%s: Invalid zoned device with non constant zone size\n",
|
||||
disk->disk_name);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user