mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-18 02:46:06 +00:00
Merge branch 'linus' into objtool/core, to pick up Xen dependencies
Pick up dependencies - freshly merged upstream via xen-next - before applying dependent objtool changes. Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
585a78c1f7
6
.mailmap
6
.mailmap
@ -25,6 +25,8 @@ Aleksey Gorelov <aleksey_gorelov@phoenix.com>
|
||||
Alexander Lobakin <alobakin@pm.me> <alobakin@dlink.ru>
|
||||
Alexander Lobakin <alobakin@pm.me> <alobakin@marvell.com>
|
||||
Alexander Lobakin <alobakin@pm.me> <bloodyreaper@yandex.ru>
|
||||
Alexander Mikhalitsyn <alexander@mihalicyn.com> <alexander.mikhalitsyn@virtuozzo.com>
|
||||
Alexander Mikhalitsyn <alexander@mihalicyn.com> <aleksandr.mikhalitsyn@canonical.com>
|
||||
Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electrons.com>
|
||||
Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
|
||||
Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
|
||||
@ -130,6 +132,7 @@ Domen Puncer <domen@coderock.org>
|
||||
Douglas Gilbert <dougg@torque.net>
|
||||
Ed L. Cashin <ecashin@coraid.com>
|
||||
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
|
||||
Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
|
||||
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
|
||||
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
|
||||
Felipe W Damasio <felipewd@terra.com.br>
|
||||
@ -214,6 +217,7 @@ Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
|
||||
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
|
||||
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
|
||||
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
|
||||
John Crispin <john@phrozen.org> <blogic@openwrt.org>
|
||||
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
|
||||
John Stultz <johnstul@us.ibm.com>
|
||||
Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org>
|
||||
@ -371,6 +375,7 @@ Rémi Denis-Courmont <rdenis@simphalempin.com>
|
||||
Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
|
||||
Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
|
||||
Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
|
||||
Robert Foss <rfoss@kernel.org> <robert.foss@linaro.org>
|
||||
Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
|
||||
Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
|
||||
Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
|
||||
@ -422,6 +427,7 @@ Tony Luck <tony.luck@intel.com>
|
||||
TripleX Chung <xxx.phy@gmail.com> <triplex@zh-kernel.org>
|
||||
TripleX Chung <xxx.phy@gmail.com> <zhongyu@18mail.cn>
|
||||
Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com>
|
||||
Tudor Ambarus <tudor.ambarus@linaro.org> <tudor.ambarus@microchip.com>
|
||||
Tycho Andersen <tycho@tycho.pizza> <tycho@tycho.ws>
|
||||
Tzung-Bi Shih <tzungbi@kernel.org> <tzungbi@google.com>
|
||||
Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
|
||||
|
15
CREDITS
15
CREDITS
@ -1173,6 +1173,10 @@ D: Future Domain TMC-16x0 SCSI driver (author)
|
||||
D: APM driver (early port)
|
||||
D: DRM drivers (author of several)
|
||||
|
||||
N: Veaceslav Falico
|
||||
E: vfalico@gmail.com
|
||||
D: Co-maintainer and co-author of the network bonding driver.
|
||||
|
||||
N: János Farkas
|
||||
E: chexum@shadow.banki.hu
|
||||
D: romfs, various (mostly networking) fixes
|
||||
@ -2489,6 +2493,13 @@ D: XF86_Mach8
|
||||
D: XF86_8514
|
||||
D: cfdisk (curses based disk partitioning program)
|
||||
|
||||
N: Mat Martineau
|
||||
E: mat@martineau.name
|
||||
D: MPTCP subsystem co-maintainer 2020-2023
|
||||
D: Keyctl restricted keyring and Diffie-Hellman UAPI
|
||||
D: Bluetooth L2CAP ERTM mode and AMP
|
||||
S: USA
|
||||
|
||||
N: John S. Marvin
|
||||
E: jsm@fc.hp.com
|
||||
D: PA-RISC port
|
||||
@ -4172,6 +4183,10 @@ S: B-1206 Jingmao Guojigongyu
|
||||
S: 16 Baliqiao Nanjie, Beijing 101100
|
||||
S: People's Repulic of China
|
||||
|
||||
N: Vlad Yasevich
|
||||
E: vyasevich@gmail.com
|
||||
D: SCTP protocol maintainer.
|
||||
|
||||
N: Aviad Yehezkel
|
||||
E: aviadye@nvidia.com
|
||||
D: Kernel TLS implementation and offload support.
|
||||
|
@ -432,7 +432,8 @@ Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This is the maximum number of kilobytes that the block
|
||||
layer will allow for a filesystem request. Must be smaller than
|
||||
or equal to the maximum size allowed by the hardware.
|
||||
or equal to the maximum size allowed by the hardware. Write 0
|
||||
to use default kernel settings.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/max_segment_size
|
||||
|
@ -522,7 +522,6 @@ Description: These files allow to each of ASICs by writing 1.
|
||||
|
||||
The files are write only.
|
||||
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/comm_chnl_ready
|
||||
Date: July 2022
|
||||
KernelVersion: 5.20
|
||||
@ -542,3 +541,124 @@ Description: The file indicates COME module hardware configuration.
|
||||
The purpose is to expose some minor BOM changes for the same system SKU.
|
||||
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_pwr_converter_fail
|
||||
Date: February 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: This file shows the system reset cause due to power converter
|
||||
devices failure.
|
||||
Value 1 in file means this is reset cause, 0 - otherwise.
|
||||
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot1_ap_reset
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot2_ap_reset
|
||||
Date: February 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: These files aim to monitor the status of the External Root of Trust (EROT)
|
||||
processor's RESET output to the Application Processor (AP).
|
||||
By reading this file, could be determined if the EROT has invalidated or
|
||||
revoked AP Firmware, at which point it will hold the AP in RESET until a
|
||||
valid firmware is loaded. This protects the AP from running an
|
||||
unauthorized firmware. In the normal flow, the AP reset should be released
|
||||
after the EROT validates the integrity of the FW, and it should be done so
|
||||
as quickly as possible so that the AP boots before the CPU starts to
|
||||
communicate to each ASIC.
|
||||
|
||||
The files are read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot1_recovery
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot2_recovery
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot1_reset
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot2_reset
|
||||
Date: February 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: These files aim to perform External Root of Trust (EROT) recovery
|
||||
sequence after EROT device failure.
|
||||
These EROT devices protect ASICs from unauthorized access and in normal
|
||||
flow their reset should be released with system power – earliest power
|
||||
up stage, so that EROTs can begin boot and authentication process before
|
||||
CPU starts to communicate to ASICs.
|
||||
Issuing a reset to the EROT while asserting the recovery signal will cause
|
||||
the EROT Application Processor to enter recovery mode so that the EROT FW
|
||||
can be updated/recovered.
|
||||
For reset/recovery the related file should be toggled by 1/0.
|
||||
|
||||
The files are read/write.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot1_wp
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot2_wp
|
||||
Date: February 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: These files allow access to External Root of Trust (EROT) for reset
|
||||
and recovery sequence after EROT device failure.
|
||||
Default is 0 (programming disabled).
|
||||
If the system is in locked-down mode writing this file will not be allowed.
|
||||
|
||||
The files are read/write.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/spi_chnl_select
|
||||
Date: February 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: This file allows SPI chip selection for External Root of Trust (EROT)
|
||||
device Out-of-Band recovery.
|
||||
File can be written with 0 or with 1. It selects which EROT can be accessed
|
||||
through SPI device.
|
||||
|
||||
The file is read/write.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic_pg_fail
|
||||
Date: February 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Vadim Pasternak vadimp@nvidia.com
|
||||
Description: This file shows ASIC Power Good status.
|
||||
Value 1 in file means ASIC Power Good failed, 0 - otherwise.
|
||||
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/clk_brd1_boot_fail
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/clk_brd2_boot_fail
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/clk_brd_fail
|
||||
Date: February 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Vadim Pasternak vadimp@nvidia.com
|
||||
Description: These files are related to clock boards status in system.
|
||||
- clk_brd1_boot_fail: warning about 1-st clock board failed to boot from CI.
|
||||
- clk_brd2_boot_fail: warning about 2-nd clock board failed to boot from CI.
|
||||
- clk_brd_fail: error about common clock board boot failure.
|
||||
|
||||
The files are read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/clk_brd_prog_en
|
||||
Date: February 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: This file enables programming of clock boards.
|
||||
Default is 0 (programming disabled).
|
||||
If the system is in locked-down mode writing this file will not be allowed.
|
||||
|
||||
The file is read/write.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/pwr_converter_prog_en
|
||||
Date: February 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: This file enables programming of power converters.
|
||||
Default is 0 (programming disabled).
|
||||
If the system is in locked-down mode writing this file will not be allowed.
|
||||
|
||||
The file is read/write.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_ac_ok_fail
|
||||
Date: February 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||
Description: This file shows the system reset cause due to AC power failure.
|
||||
Value 1 in file means this is reset cause, 0 - otherwise.
|
||||
|
||||
The file is read only.
|
||||
|
@ -120,3 +120,16 @@ Contact: xen-devel@lists.xenproject.org
|
||||
Description: If running under Xen:
|
||||
The Xen version is in the format <major>.<minor><extra>
|
||||
This is the <minor> part of it.
|
||||
|
||||
What: /sys/hypervisor/start_flags/*
|
||||
Date: March 2023
|
||||
KernelVersion: 6.3.0
|
||||
Contact: xen-devel@lists.xenproject.org
|
||||
Description: If running under Xen:
|
||||
All bits in Xen's start-flags are represented as
|
||||
boolean files, returning '1' if set, '0' otherwise.
|
||||
This takes the place of the defunct /proc/xen/capabilities,
|
||||
which would contain "control_d" on dom0, and be empty
|
||||
otherwise. This flag is now exposed as "initdomain" in
|
||||
addition to the "privileged" flag; all other possible flags
|
||||
are accessible as "unknownXX".
|
||||
|
18
Documentation/ABI/testing/debugfs-pktcdvd
Normal file
18
Documentation/ABI/testing/debugfs-pktcdvd
Normal file
@ -0,0 +1,18 @@
|
||||
What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
|
||||
Date: Oct. 2006
|
||||
KernelVersion: 2.6.20
|
||||
Contact: Thomas Maier <balagi@justmail.de>
|
||||
Description:
|
||||
|
||||
The pktcdvd module (packet writing driver) creates
|
||||
these files in debugfs:
|
||||
|
||||
/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
|
||||
|
||||
==== ====== ====================================
|
||||
info 0444 Lots of driver statistics and infos.
|
||||
==== ====== ====================================
|
||||
|
||||
Example::
|
||||
|
||||
cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
|
@ -1,22 +1,19 @@
|
||||
What: /sys/bus/css/devices/.../type
|
||||
Date: March 2008
|
||||
Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
|
||||
linux-s390@vger.kernel.org
|
||||
Contact: linux-s390@vger.kernel.org
|
||||
Description: Contains the subchannel type, as reported by the hardware.
|
||||
This attribute is present for all subchannel types.
|
||||
|
||||
What: /sys/bus/css/devices/.../modalias
|
||||
Date: March 2008
|
||||
Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
|
||||
linux-s390@vger.kernel.org
|
||||
Contact: linux-s390@vger.kernel.org
|
||||
Description: Contains the module alias as reported with uevents.
|
||||
It is of the format css:t<type> and present for all
|
||||
subchannel types.
|
||||
|
||||
What: /sys/bus/css/drivers/io_subchannel/.../chpids
|
||||
Date: December 2002
|
||||
Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
|
||||
linux-s390@vger.kernel.org
|
||||
Contact: linux-s390@vger.kernel.org
|
||||
Description: Contains the ids of the channel paths used by this
|
||||
subchannel, as reported by the channel subsystem
|
||||
during subchannel recognition.
|
||||
@ -26,8 +23,7 @@ Users: s390-tools, HAL
|
||||
|
||||
What: /sys/bus/css/drivers/io_subchannel/.../pimpampom
|
||||
Date: December 2002
|
||||
Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
|
||||
linux-s390@vger.kernel.org
|
||||
Contact: linux-s390@vger.kernel.org
|
||||
Description: Contains the PIM/PAM/POM values, as reported by the
|
||||
channel subsystem when last queried by the common I/O
|
||||
layer (this implies that this attribute is not necessarily
|
||||
@ -38,8 +34,7 @@ Users: s390-tools, HAL
|
||||
|
||||
What: /sys/bus/css/devices/.../driver_override
|
||||
Date: June 2019
|
||||
Contact: Cornelia Huck <cohuck@redhat.com>
|
||||
linux-s390@vger.kernel.org
|
||||
Contact: linux-s390@vger.kernel.org
|
||||
Description: This file allows the driver for a device to be specified. When
|
||||
specified, only a driver with a name matching the value written
|
||||
to driver_override will have an opportunity to bind to the
|
||||
|
97
Documentation/ABI/testing/sysfs-class-pktcdvd
Normal file
97
Documentation/ABI/testing/sysfs-class-pktcdvd
Normal file
@ -0,0 +1,97 @@
|
||||
sysfs interface
|
||||
---------------
|
||||
The pktcdvd module (packet writing driver) creates the following files in the
|
||||
sysfs: (<devid> is in the format major:minor)
|
||||
|
||||
What: /sys/class/pktcdvd/add
|
||||
What: /sys/class/pktcdvd/remove
|
||||
What: /sys/class/pktcdvd/device_map
|
||||
Date: Oct. 2006
|
||||
KernelVersion: 2.6.20
|
||||
Contact: Thomas Maier <balagi@justmail.de>
|
||||
Description:
|
||||
|
||||
========== ==============================================
|
||||
add (WO) Write a block device id (major:minor) to
|
||||
create a new pktcdvd device and map it to the
|
||||
block device.
|
||||
|
||||
remove (WO) Write the pktcdvd device id (major:minor)
|
||||
to remove the pktcdvd device.
|
||||
|
||||
device_map (RO) Shows the device mapping in format:
|
||||
pktcdvd[0-7] <pktdevid> <blkdevid>
|
||||
========== ==============================================
|
||||
|
||||
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/dev
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/uevent
|
||||
Date: Oct. 2006
|
||||
KernelVersion: 2.6.20
|
||||
Contact: Thomas Maier <balagi@justmail.de>
|
||||
Description:
|
||||
dev: (RO) Device id
|
||||
|
||||
uevent: (WO) To send a uevent
|
||||
|
||||
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/reset
|
||||
Date: Oct. 2006
|
||||
KernelVersion: 2.6.20
|
||||
Contact: Thomas Maier <balagi@justmail.de>
|
||||
Description:
|
||||
packets_started: (RO) Number of started packets.
|
||||
|
||||
packets_finished: (RO) Number of finished packets.
|
||||
|
||||
kb_written: (RO) kBytes written.
|
||||
|
||||
kb_read: (RO) kBytes read.
|
||||
|
||||
kb_read_gather: (RO) kBytes read to fill write packets.
|
||||
|
||||
reset: (WO) Write any value to it to reset
|
||||
pktcdvd device statistic values, like
|
||||
bytes read/written.
|
||||
|
||||
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on
|
||||
Date: Oct. 2006
|
||||
KernelVersion: 2.6.20
|
||||
Contact: Thomas Maier <balagi@justmail.de>
|
||||
Description:
|
||||
============== ================================================
|
||||
size (RO) Contains the size of the bio write queue.
|
||||
|
||||
congestion_off (RW) If bio write queue size is below this mark,
|
||||
accept new bio requests from the block layer.
|
||||
|
||||
congestion_on (RW) If bio write queue size is higher as this
|
||||
mark, do no longer accept bio write requests
|
||||
from the block layer and wait till the pktcdvd
|
||||
device has processed enough bio's so that bio
|
||||
write queue size is below congestion off mark.
|
||||
A value of <= 0 disables congestion control.
|
||||
============== ================================================
|
||||
|
||||
|
||||
Example:
|
||||
--------
|
||||
To use the pktcdvd sysfs interface directly, you can do::
|
||||
|
||||
# create a new pktcdvd device mapped to /dev/hdc
|
||||
echo "22:0" >/sys/class/pktcdvd/add
|
||||
cat /sys/class/pktcdvd/device_map
|
||||
# assuming device pktcdvd0 was created, look at stat's
|
||||
cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written
|
||||
# print the device id of the mapped block device
|
||||
fgrep pktcdvd0 /sys/class/pktcdvd/device_map
|
||||
# remove device, using pktcdvd0 device id 253:0
|
||||
echo "253:0" >/sys/class/pktcdvd/remove
|
@ -4,7 +4,8 @@ Contact: "Huang Jianan" <huangjianan@oppo.com>
|
||||
Description: Shows all enabled kernel features.
|
||||
Supported features:
|
||||
zero_padding, compr_cfgs, big_pcluster, chunked_file,
|
||||
device_table, compr_head2, sb_chksum.
|
||||
device_table, compr_head2, sb_chksum, ztailpacking,
|
||||
dedupe, fragments.
|
||||
|
||||
What: /sys/fs/erofs/<disk>/sync_decompress
|
||||
Date: November 2021
|
||||
|
@ -8,7 +8,7 @@ Although RCU is usually used to protect read-mostly data structures,
|
||||
it is possible to use RCU to provide dynamic non-maskable interrupt
|
||||
handlers, as well as dynamic irq handlers. This document describes
|
||||
how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer
|
||||
work in "arch/x86/kernel/traps.c".
|
||||
work in an old version of "arch/x86/kernel/traps.c".
|
||||
|
||||
The relevant pieces of code are listed below, each followed by a
|
||||
brief explanation::
|
||||
@ -116,7 +116,7 @@ Answer to Quick Quiz:
|
||||
|
||||
This same sad story can happen on other CPUs when using
|
||||
a compiler with aggressive pointer-value speculation
|
||||
optimizations.
|
||||
optimizations. (But please don't!)
|
||||
|
||||
More important, the rcu_dereference_sched() makes it
|
||||
clear to someone reading the code that the pointer is
|
||||
|
@ -38,7 +38,7 @@ by having call_rcu() directly invoke its arguments only if it was called
|
||||
from process context. However, this can fail in a similar manner.
|
||||
|
||||
Suppose that an RCU-based algorithm again scans a linked list containing
|
||||
elements A, B, and C in process contexts, but that it invokes a function
|
||||
elements A, B, and C in process context, but that it invokes a function
|
||||
on each element as it is scanned. Suppose further that this function
|
||||
deletes element B from the list, then passes it to call_rcu() for deferred
|
||||
freeing. This may be a bit unconventional, but it is perfectly legal
|
||||
@ -59,7 +59,8 @@ Example 3: Death by Deadlock
|
||||
Suppose that call_rcu() is invoked while holding a lock, and that the
|
||||
callback function must acquire this same lock. In this case, if
|
||||
call_rcu() were to directly invoke the callback, the result would
|
||||
be self-deadlock.
|
||||
be self-deadlock *even if* this invocation occurred from a later
|
||||
call_rcu() invocation a full grace period later.
|
||||
|
||||
In some cases, it would possible to restructure to code so that
|
||||
the call_rcu() is delayed until after the lock is released. However,
|
||||
@ -85,6 +86,14 @@ Quick Quiz #2:
|
||||
|
||||
:ref:`Answers to Quick Quiz <answer_quick_quiz_up>`
|
||||
|
||||
It is important to note that userspace RCU implementations *do*
|
||||
permit call_rcu() to directly invoke callbacks, but only if a full
|
||||
grace period has elapsed since those callbacks were queued. This is
|
||||
the case because some userspace environments are extremely constrained.
|
||||
Nevertheless, people writing userspace RCU implementations are strongly
|
||||
encouraged to avoid invoking callbacks from call_rcu(), thus obtaining
|
||||
the deadlock-avoidance benefits called out above.
|
||||
|
||||
Summary
|
||||
-------
|
||||
|
||||
|
@ -69,9 +69,8 @@ checking of rcu_dereference() primitives:
|
||||
value of the pointer itself, for example, against NULL.
|
||||
|
||||
The rcu_dereference_check() check expression can be any boolean
|
||||
expression, but would normally include a lockdep expression. However,
|
||||
any boolean expression can be used. For a moderately ornate example,
|
||||
consider the following::
|
||||
expression, but would normally include a lockdep expression. For a
|
||||
moderately ornate example, consider the following::
|
||||
|
||||
file = rcu_dereference_check(fdt->fd[fd],
|
||||
lockdep_is_held(&files->file_lock) ||
|
||||
@ -97,10 +96,10 @@ code, it could instead be written as follows::
|
||||
atomic_read(&files->count) == 1);
|
||||
|
||||
This would verify cases #2 and #3 above, and furthermore lockdep would
|
||||
complain if this was used in an RCU read-side critical section unless one
|
||||
of these two cases held. Because rcu_dereference_protected() omits all
|
||||
barriers and compiler constraints, it generates better code than do the
|
||||
other flavors of rcu_dereference(). On the other hand, it is illegal
|
||||
complain even if this was used in an RCU read-side critical section unless
|
||||
one of these two cases held. Because rcu_dereference_protected() omits
|
||||
all barriers and compiler constraints, it generates better code than do
|
||||
the other flavors of rcu_dereference(). On the other hand, it is illegal
|
||||
to use rcu_dereference_protected() if either the RCU-protected pointer
|
||||
or the RCU-protected data that it points to can change concurrently.
|
||||
|
||||
|
@ -77,15 +77,17 @@ Frequently Asked Questions
|
||||
search for the string "Patent" in Documentation/RCU/RTFP.txt to find them.
|
||||
Of these, one was allowed to lapse by the assignee, and the
|
||||
others have been contributed to the Linux kernel under GPL.
|
||||
Many (but not all) have long since expired.
|
||||
There are now also LGPL implementations of user-level RCU
|
||||
available (https://liburcu.org/).
|
||||
|
||||
- I hear that RCU needs work in order to support realtime kernels?
|
||||
|
||||
Realtime-friendly RCU can be enabled via the CONFIG_PREEMPT_RCU
|
||||
Realtime-friendly RCU are enabled via the CONFIG_PREEMPTION
|
||||
kernel configuration parameter.
|
||||
|
||||
- Where can I find more information on RCU?
|
||||
|
||||
See the Documentation/RCU/RTFP.txt file.
|
||||
Or point your browser at (http://www.rdrop.com/users/paulmck/RCU/).
|
||||
Or point your browser at (https://docs.google.com/document/d/1X0lThx8OK0ZgLMqVoXiR4ZrGURHrXK6NyLRbeXe3Xac/edit)
|
||||
or (https://docs.google.com/document/d/1GCdQC8SDbb54W1shjEXqGZ0Rq8a6kIeYutdSIajfpLA/edit?usp=sharing).
|
||||
|
@ -19,8 +19,9 @@ Follow these rules to keep your RCU code working properly:
|
||||
can reload the value, and won't your code have fun with two
|
||||
different values for a single pointer! Without rcu_dereference(),
|
||||
DEC Alpha can load a pointer, dereference that pointer, and
|
||||
return data preceding initialization that preceded the store of
|
||||
the pointer.
|
||||
return data preceding initialization that preceded the store
|
||||
of the pointer. (As noted later, in recent kernels READ_ONCE()
|
||||
also prevents DEC Alpha from playing these tricks.)
|
||||
|
||||
In addition, the volatile cast in rcu_dereference() prevents the
|
||||
compiler from deducing the resulting pointer value. Please see
|
||||
@ -34,7 +35,7 @@ Follow these rules to keep your RCU code working properly:
|
||||
takes on the role of the lockless_dereference() primitive that
|
||||
was removed in v4.15.
|
||||
|
||||
- You are only permitted to use rcu_dereference on pointer values.
|
||||
- You are only permitted to use rcu_dereference() on pointer values.
|
||||
The compiler simply knows too much about integral values to
|
||||
trust it to carry dependencies through integer operations.
|
||||
There are a very few exceptions, namely that you can temporarily
|
||||
@ -240,6 +241,7 @@ precautions. To see this, consider the following code fragment::
|
||||
struct foo *q;
|
||||
int r1, r2;
|
||||
|
||||
rcu_read_lock();
|
||||
p = rcu_dereference(gp2);
|
||||
if (p == NULL)
|
||||
return;
|
||||
@ -248,7 +250,10 @@ precautions. To see this, consider the following code fragment::
|
||||
if (p == q) {
|
||||
/* The compiler decides that q->c is same as p->c. */
|
||||
r2 = p->c; /* Could get 44 on weakly order system. */
|
||||
} else {
|
||||
r2 = p->c - r1; /* Unconditional access to p->c. */
|
||||
}
|
||||
rcu_read_unlock();
|
||||
do_something_with(r1, r2);
|
||||
}
|
||||
|
||||
@ -297,6 +302,7 @@ Then one approach is to use locking, for example, as follows::
|
||||
struct foo *q;
|
||||
int r1, r2;
|
||||
|
||||
rcu_read_lock();
|
||||
p = rcu_dereference(gp2);
|
||||
if (p == NULL)
|
||||
return;
|
||||
@ -306,7 +312,12 @@ Then one approach is to use locking, for example, as follows::
|
||||
if (p == q) {
|
||||
/* The compiler decides that q->c is same as p->c. */
|
||||
r2 = p->c; /* Locking guarantees r2 == 144. */
|
||||
} else {
|
||||
spin_lock(&q->lock);
|
||||
r2 = q->c - r1;
|
||||
spin_unlock(&q->lock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
spin_unlock(&p->lock);
|
||||
do_something_with(r1, r2);
|
||||
}
|
||||
@ -364,7 +375,7 @@ the exact value of "p" even in the not-equals case. This allows the
|
||||
compiler to make the return values independent of the load from "gp",
|
||||
in turn destroying the ordering between this load and the loads of the
|
||||
return values. This can result in "p->b" returning pre-initialization
|
||||
garbage values.
|
||||
garbage values on weakly ordered systems.
|
||||
|
||||
In short, rcu_dereference() is *not* optional when you are going to
|
||||
dereference the resulting pointer.
|
||||
@ -430,7 +441,7 @@ member of the rcu_dereference() to use in various situations:
|
||||
SPARSE CHECKING OF RCU-PROTECTED POINTERS
|
||||
-----------------------------------------
|
||||
|
||||
The sparse static-analysis tool checks for direct access to RCU-protected
|
||||
The sparse static-analysis tool checks for non-RCU access to RCU-protected
|
||||
pointers, which can result in "interesting" bugs due to compiler
|
||||
optimizations involving invented loads and perhaps also load tearing.
|
||||
For example, suppose someone mistakenly does something like this::
|
||||
|
@ -5,37 +5,12 @@ RCU and Unloadable Modules
|
||||
|
||||
[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/]
|
||||
|
||||
RCU (read-copy update) is a synchronization mechanism that can be thought
|
||||
of as a replacement for read-writer locking (among other things), but with
|
||||
very low-overhead readers that are immune to deadlock, priority inversion,
|
||||
and unbounded latency. RCU read-side critical sections are delimited
|
||||
by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPTION
|
||||
kernels, generate no code whatsoever.
|
||||
|
||||
This means that RCU writers are unaware of the presence of concurrent
|
||||
readers, so that RCU updates to shared data must be undertaken quite
|
||||
carefully, leaving an old version of the data structure in place until all
|
||||
pre-existing readers have finished. These old versions are needed because
|
||||
such readers might hold a reference to them. RCU updates can therefore be
|
||||
rather expensive, and RCU is thus best suited for read-mostly situations.
|
||||
|
||||
How can an RCU writer possibly determine when all readers are finished,
|
||||
given that readers might well leave absolutely no trace of their
|
||||
presence? There is a synchronize_rcu() primitive that blocks until all
|
||||
pre-existing readers have completed. An updater wishing to delete an
|
||||
element p from a linked list might do the following, while holding an
|
||||
appropriate lock, of course::
|
||||
|
||||
list_del_rcu(p);
|
||||
synchronize_rcu();
|
||||
kfree(p);
|
||||
|
||||
But the above code cannot be used in IRQ context -- the call_rcu()
|
||||
primitive must be used instead. This primitive takes a pointer to an
|
||||
rcu_head struct placed within the RCU-protected data structure and
|
||||
another pointer to a function that may be invoked later to free that
|
||||
structure. Code to delete an element p from the linked list from IRQ
|
||||
context might then be as follows::
|
||||
RCU updaters sometimes use call_rcu() to initiate an asynchronous wait for
|
||||
a grace period to elapse. This primitive takes a pointer to an rcu_head
|
||||
struct placed within the RCU-protected data structure and another pointer
|
||||
to a function that may be invoked later to free that structure. Code to
|
||||
delete an element p from the linked list from IRQ context might then be
|
||||
as follows::
|
||||
|
||||
list_del_rcu(p);
|
||||
call_rcu(&p->rcu, p_callback);
|
||||
@ -54,7 +29,7 @@ IRQ context. The function p_callback() might be defined as follows::
|
||||
Unloading Modules That Use call_rcu()
|
||||
-------------------------------------
|
||||
|
||||
But what if p_callback is defined in an unloadable module?
|
||||
But what if the p_callback() function is defined in an unloadable module?
|
||||
|
||||
If we unload the module while some RCU callbacks are pending,
|
||||
the CPUs executing these callbacks are going to be severely
|
||||
@ -67,20 +42,21 @@ grace period to elapse, it does not wait for the callbacks to complete.
|
||||
|
||||
One might be tempted to try several back-to-back synchronize_rcu()
|
||||
calls, but this is still not guaranteed to work. If there is a very
|
||||
heavy RCU-callback load, then some of the callbacks might be deferred
|
||||
in order to allow other processing to proceed. Such deferral is required
|
||||
in realtime kernels in order to avoid excessive scheduling latencies.
|
||||
heavy RCU-callback load, then some of the callbacks might be deferred in
|
||||
order to allow other processing to proceed. For but one example, such
|
||||
deferral is required in realtime kernels in order to avoid excessive
|
||||
scheduling latencies.
|
||||
|
||||
|
||||
rcu_barrier()
|
||||
-------------
|
||||
|
||||
We instead need the rcu_barrier() primitive. Rather than waiting for
|
||||
a grace period to elapse, rcu_barrier() waits for all outstanding RCU
|
||||
callbacks to complete. Please note that rcu_barrier() does **not** imply
|
||||
synchronize_rcu(), in particular, if there are no RCU callbacks queued
|
||||
anywhere, rcu_barrier() is within its rights to return immediately,
|
||||
without waiting for a grace period to elapse.
|
||||
This situation can be handled by the rcu_barrier() primitive. Rather
|
||||
than waiting for a grace period to elapse, rcu_barrier() waits for all
|
||||
outstanding RCU callbacks to complete. Please note that rcu_barrier()
|
||||
does **not** imply synchronize_rcu(), in particular, if there are no RCU
|
||||
callbacks queued anywhere, rcu_barrier() is within its rights to return
|
||||
immediately, without waiting for anything, let alone a grace period.
|
||||
|
||||
Pseudo-code using rcu_barrier() is as follows:
|
||||
|
||||
@ -89,83 +65,86 @@ Pseudo-code using rcu_barrier() is as follows:
|
||||
3. Allow the module to be unloaded.
|
||||
|
||||
There is also an srcu_barrier() function for SRCU, and you of course
|
||||
must match the flavor of rcu_barrier() with that of call_rcu(). If your
|
||||
module uses multiple flavors of call_rcu(), then it must also use multiple
|
||||
flavors of rcu_barrier() when unloading that module. For example, if
|
||||
it uses call_rcu(), call_srcu() on srcu_struct_1, and call_srcu() on
|
||||
srcu_struct_2, then the following three lines of code will be required
|
||||
when unloading::
|
||||
must match the flavor of srcu_barrier() with that of call_srcu().
|
||||
If your module uses multiple srcu_struct structures, then it must also
|
||||
use multiple invocations of srcu_barrier() when unloading that module.
|
||||
For example, if it uses call_rcu(), call_srcu() on srcu_struct_1, and
|
||||
call_srcu() on srcu_struct_2, then the following three lines of code
|
||||
will be required when unloading::
|
||||
|
||||
1 rcu_barrier();
|
||||
2 srcu_barrier(&srcu_struct_1);
|
||||
3 srcu_barrier(&srcu_struct_2);
|
||||
1 rcu_barrier();
|
||||
2 srcu_barrier(&srcu_struct_1);
|
||||
3 srcu_barrier(&srcu_struct_2);
|
||||
|
||||
The rcutorture module makes use of rcu_barrier() in its exit function
|
||||
as follows::
|
||||
If latency is of the essence, workqueues could be used to run these
|
||||
three functions concurrently.
|
||||
|
||||
1 static void
|
||||
2 rcu_torture_cleanup(void)
|
||||
3 {
|
||||
4 int i;
|
||||
5
|
||||
6 fullstop = 1;
|
||||
7 if (shuffler_task != NULL) {
|
||||
8 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
|
||||
9 kthread_stop(shuffler_task);
|
||||
10 }
|
||||
11 shuffler_task = NULL;
|
||||
An ancient version of the rcutorture module makes use of rcu_barrier()
|
||||
in its exit function as follows::
|
||||
|
||||
1 static void
|
||||
2 rcu_torture_cleanup(void)
|
||||
3 {
|
||||
4 int i;
|
||||
5
|
||||
6 fullstop = 1;
|
||||
7 if (shuffler_task != NULL) {
|
||||
8 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
|
||||
9 kthread_stop(shuffler_task);
|
||||
10 }
|
||||
11 shuffler_task = NULL;
|
||||
12
|
||||
13 if (writer_task != NULL) {
|
||||
14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
|
||||
15 kthread_stop(writer_task);
|
||||
16 }
|
||||
17 writer_task = NULL;
|
||||
13 if (writer_task != NULL) {
|
||||
14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
|
||||
15 kthread_stop(writer_task);
|
||||
16 }
|
||||
17 writer_task = NULL;
|
||||
18
|
||||
19 if (reader_tasks != NULL) {
|
||||
20 for (i = 0; i < nrealreaders; i++) {
|
||||
21 if (reader_tasks[i] != NULL) {
|
||||
22 VERBOSE_PRINTK_STRING(
|
||||
23 "Stopping rcu_torture_reader task");
|
||||
24 kthread_stop(reader_tasks[i]);
|
||||
25 }
|
||||
26 reader_tasks[i] = NULL;
|
||||
27 }
|
||||
28 kfree(reader_tasks);
|
||||
29 reader_tasks = NULL;
|
||||
30 }
|
||||
31 rcu_torture_current = NULL;
|
||||
19 if (reader_tasks != NULL) {
|
||||
20 for (i = 0; i < nrealreaders; i++) {
|
||||
21 if (reader_tasks[i] != NULL) {
|
||||
22 VERBOSE_PRINTK_STRING(
|
||||
23 "Stopping rcu_torture_reader task");
|
||||
24 kthread_stop(reader_tasks[i]);
|
||||
25 }
|
||||
26 reader_tasks[i] = NULL;
|
||||
27 }
|
||||
28 kfree(reader_tasks);
|
||||
29 reader_tasks = NULL;
|
||||
30 }
|
||||
31 rcu_torture_current = NULL;
|
||||
32
|
||||
33 if (fakewriter_tasks != NULL) {
|
||||
34 for (i = 0; i < nfakewriters; i++) {
|
||||
35 if (fakewriter_tasks[i] != NULL) {
|
||||
36 VERBOSE_PRINTK_STRING(
|
||||
37 "Stopping rcu_torture_fakewriter task");
|
||||
38 kthread_stop(fakewriter_tasks[i]);
|
||||
39 }
|
||||
40 fakewriter_tasks[i] = NULL;
|
||||
41 }
|
||||
42 kfree(fakewriter_tasks);
|
||||
43 fakewriter_tasks = NULL;
|
||||
44 }
|
||||
33 if (fakewriter_tasks != NULL) {
|
||||
34 for (i = 0; i < nfakewriters; i++) {
|
||||
35 if (fakewriter_tasks[i] != NULL) {
|
||||
36 VERBOSE_PRINTK_STRING(
|
||||
37 "Stopping rcu_torture_fakewriter task");
|
||||
38 kthread_stop(fakewriter_tasks[i]);
|
||||
39 }
|
||||
40 fakewriter_tasks[i] = NULL;
|
||||
41 }
|
||||
42 kfree(fakewriter_tasks);
|
||||
43 fakewriter_tasks = NULL;
|
||||
44 }
|
||||
45
|
||||
46 if (stats_task != NULL) {
|
||||
47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
|
||||
48 kthread_stop(stats_task);
|
||||
49 }
|
||||
50 stats_task = NULL;
|
||||
46 if (stats_task != NULL) {
|
||||
47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
|
||||
48 kthread_stop(stats_task);
|
||||
49 }
|
||||
50 stats_task = NULL;
|
||||
51
|
||||
52 /* Wait for all RCU callbacks to fire. */
|
||||
53 rcu_barrier();
|
||||
52 /* Wait for all RCU callbacks to fire. */
|
||||
53 rcu_barrier();
|
||||
54
|
||||
55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
|
||||
55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
|
||||
56
|
||||
57 if (cur_ops->cleanup != NULL)
|
||||
58 cur_ops->cleanup();
|
||||
59 if (atomic_read(&n_rcu_torture_error))
|
||||
60 rcu_torture_print_module_parms("End of test: FAILURE");
|
||||
61 else
|
||||
62 rcu_torture_print_module_parms("End of test: SUCCESS");
|
||||
63 }
|
||||
57 if (cur_ops->cleanup != NULL)
|
||||
58 cur_ops->cleanup();
|
||||
59 if (atomic_read(&n_rcu_torture_error))
|
||||
60 rcu_torture_print_module_parms("End of test: FAILURE");
|
||||
61 else
|
||||
62 rcu_torture_print_module_parms("End of test: SUCCESS");
|
||||
63 }
|
||||
|
||||
Line 6 sets a global variable that prevents any RCU callbacks from
|
||||
re-posting themselves. This will not be necessary in most cases, since
|
||||
@ -190,16 +169,17 @@ Quick Quiz #1:
|
||||
:ref:`Answer to Quick Quiz #1 <answer_rcubarrier_quiz_1>`
|
||||
|
||||
Your module might have additional complications. For example, if your
|
||||
module invokes call_rcu() from timers, you will need to first cancel all
|
||||
the timers, and only then invoke rcu_barrier() to wait for any remaining
|
||||
module invokes call_rcu() from timers, you will need to first refrain
|
||||
from posting new timers, cancel (or wait for) all the already-posted
|
||||
timers, and only then invoke rcu_barrier() to wait for any remaining
|
||||
RCU callbacks to complete.
|
||||
|
||||
Of course, if you module uses call_rcu(), you will need to invoke
|
||||
Of course, if your module uses call_rcu(), you will need to invoke
|
||||
rcu_barrier() before unloading. Similarly, if your module uses
|
||||
call_srcu(), you will need to invoke srcu_barrier() before unloading,
|
||||
and on the same srcu_struct structure. If your module uses call_rcu()
|
||||
**and** call_srcu(), then you will need to invoke rcu_barrier() **and**
|
||||
srcu_barrier().
|
||||
**and** call_srcu(), then (as noted above) you will need to invoke
|
||||
rcu_barrier() **and** srcu_barrier().
|
||||
|
||||
|
||||
Implementing rcu_barrier()
|
||||
@ -211,27 +191,40 @@ queues. His implementation queues an RCU callback on each of the per-CPU
|
||||
callback queues, and then waits until they have all started executing, at
|
||||
which point, all earlier RCU callbacks are guaranteed to have completed.
|
||||
|
||||
The original code for rcu_barrier() was as follows::
|
||||
The original code for rcu_barrier() was roughly as follows::
|
||||
|
||||
1 void rcu_barrier(void)
|
||||
2 {
|
||||
3 BUG_ON(in_interrupt());
|
||||
4 /* Take cpucontrol mutex to protect against CPU hotplug */
|
||||
5 mutex_lock(&rcu_barrier_mutex);
|
||||
6 init_completion(&rcu_barrier_completion);
|
||||
7 atomic_set(&rcu_barrier_cpu_count, 0);
|
||||
8 on_each_cpu(rcu_barrier_func, NULL, 0, 1);
|
||||
9 wait_for_completion(&rcu_barrier_completion);
|
||||
10 mutex_unlock(&rcu_barrier_mutex);
|
||||
11 }
|
||||
1 void rcu_barrier(void)
|
||||
2 {
|
||||
3 BUG_ON(in_interrupt());
|
||||
4 /* Take cpucontrol mutex to protect against CPU hotplug */
|
||||
5 mutex_lock(&rcu_barrier_mutex);
|
||||
6 init_completion(&rcu_barrier_completion);
|
||||
7 atomic_set(&rcu_barrier_cpu_count, 1);
|
||||
8 on_each_cpu(rcu_barrier_func, NULL, 0, 1);
|
||||
9 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
10 complete(&rcu_barrier_completion);
|
||||
11 wait_for_completion(&rcu_barrier_completion);
|
||||
12 mutex_unlock(&rcu_barrier_mutex);
|
||||
13 }
|
||||
|
||||
Line 3 verifies that the caller is in process context, and lines 5 and 10
|
||||
Line 3 verifies that the caller is in process context, and lines 5 and 12
|
||||
use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the
|
||||
global completion and counters at a time, which are initialized on lines
|
||||
6 and 7. Line 8 causes each CPU to invoke rcu_barrier_func(), which is
|
||||
shown below. Note that the final "1" in on_each_cpu()'s argument list
|
||||
ensures that all the calls to rcu_barrier_func() will have completed
|
||||
before on_each_cpu() returns. Line 9 then waits for the completion.
|
||||
before on_each_cpu() returns. Line 9 removes the initial count from
|
||||
rcu_barrier_cpu_count, and if this count is now zero, line 10 finalizes
|
||||
the completion, which prevents line 11 from blocking. Either way,
|
||||
line 11 then waits (if needed) for the completion.
|
||||
|
||||
.. _rcubarrier_quiz_2:
|
||||
|
||||
Quick Quiz #2:
|
||||
Why doesn't line 8 initialize rcu_barrier_cpu_count to zero,
|
||||
thereby avoiding the need for lines 9 and 10?
|
||||
|
||||
:ref:`Answer to Quick Quiz #2 <answer_rcubarrier_quiz_2>`
|
||||
|
||||
This code was rewritten in 2008 and several times thereafter, but this
|
||||
still gives the general idea.
|
||||
@ -239,21 +232,21 @@ still gives the general idea.
|
||||
The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
|
||||
to post an RCU callback, as follows::
|
||||
|
||||
1 static void rcu_barrier_func(void *notused)
|
||||
2 {
|
||||
3 int cpu = smp_processor_id();
|
||||
4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
5 struct rcu_head *head;
|
||||
6
|
||||
7 head = &rdp->barrier;
|
||||
8 atomic_inc(&rcu_barrier_cpu_count);
|
||||
9 call_rcu(head, rcu_barrier_callback);
|
||||
10 }
|
||||
1 static void rcu_barrier_func(void *notused)
|
||||
2 {
|
||||
3 int cpu = smp_processor_id();
|
||||
4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
5 struct rcu_head *head;
|
||||
6
|
||||
7 head = &rdp->barrier;
|
||||
8 atomic_inc(&rcu_barrier_cpu_count);
|
||||
9 call_rcu(head, rcu_barrier_callback);
|
||||
10 }
|
||||
|
||||
Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure,
|
||||
which contains the struct rcu_head that needed for the later call to
|
||||
call_rcu(). Line 7 picks up a pointer to this struct rcu_head, and line
|
||||
8 increments a global counter. This counter will later be decremented
|
||||
8 increments the global counter. This counter will later be decremented
|
||||
by the callback. Line 9 then registers the rcu_barrier_callback() on
|
||||
the current CPU's queue.
|
||||
|
||||
@ -261,33 +254,34 @@ The rcu_barrier_callback() function simply atomically decrements the
|
||||
rcu_barrier_cpu_count variable and finalizes the completion when it
|
||||
reaches zero, as follows::
|
||||
|
||||
1 static void rcu_barrier_callback(struct rcu_head *notused)
|
||||
2 {
|
||||
3 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
4 complete(&rcu_barrier_completion);
|
||||
5 }
|
||||
1 static void rcu_barrier_callback(struct rcu_head *notused)
|
||||
2 {
|
||||
3 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
4 complete(&rcu_barrier_completion);
|
||||
5 }
|
||||
|
||||
.. _rcubarrier_quiz_2:
|
||||
.. _rcubarrier_quiz_3:
|
||||
|
||||
Quick Quiz #2:
|
||||
Quick Quiz #3:
|
||||
What happens if CPU 0's rcu_barrier_func() executes
|
||||
immediately (thus incrementing rcu_barrier_cpu_count to the
|
||||
value one), but the other CPU's rcu_barrier_func() invocations
|
||||
are delayed for a full grace period? Couldn't this result in
|
||||
rcu_barrier() returning prematurely?
|
||||
|
||||
:ref:`Answer to Quick Quiz #2 <answer_rcubarrier_quiz_2>`
|
||||
:ref:`Answer to Quick Quiz #3 <answer_rcubarrier_quiz_3>`
|
||||
|
||||
The current rcu_barrier() implementation is more complex, due to the need
|
||||
to avoid disturbing idle CPUs (especially on battery-powered systems)
|
||||
and the need to minimally disturb non-idle CPUs in real-time systems.
|
||||
However, the code above illustrates the concepts.
|
||||
In addition, a great many optimizations have been applied. However,
|
||||
the code above illustrates the concepts.
|
||||
|
||||
|
||||
rcu_barrier() Summary
|
||||
---------------------
|
||||
|
||||
The rcu_barrier() primitive has seen relatively little use, since most
|
||||
The rcu_barrier() primitive is used relatively infrequently, since most
|
||||
code using RCU is in the core kernel rather than in modules. However, if
|
||||
you are using RCU from an unloadable module, you need to use rcu_barrier()
|
||||
so that your module may be safely unloaded.
|
||||
@ -302,7 +296,8 @@ Quick Quiz #1:
|
||||
Is there any other situation where rcu_barrier() might
|
||||
be required?
|
||||
|
||||
Answer: Interestingly enough, rcu_barrier() was not originally
|
||||
Answer:
|
||||
Interestingly enough, rcu_barrier() was not originally
|
||||
implemented for module unloading. Nikita Danilov was using
|
||||
RCU in a filesystem, which resulted in a similar situation at
|
||||
filesystem-unmount time. Dipankar Sarma coded up rcu_barrier()
|
||||
@ -318,13 +313,48 @@ Answer: Interestingly enough, rcu_barrier() was not originally
|
||||
.. _answer_rcubarrier_quiz_2:
|
||||
|
||||
Quick Quiz #2:
|
||||
Why doesn't line 8 initialize rcu_barrier_cpu_count to zero,
|
||||
thereby avoiding the need for lines 9 and 10?
|
||||
|
||||
Answer:
|
||||
Suppose that the on_each_cpu() function shown on line 8 was
|
||||
delayed, so that CPU 0's rcu_barrier_func() executed and
|
||||
the corresponding grace period elapsed, all before CPU 1's
|
||||
rcu_barrier_func() started executing. This would result in
|
||||
rcu_barrier_cpu_count being decremented to zero, so that line
|
||||
11's wait_for_completion() would return immediately, failing to
|
||||
wait for CPU 1's callbacks to be invoked.
|
||||
|
||||
Note that this was not a problem when the rcu_barrier() code
|
||||
was first added back in 2005. This is because on_each_cpu()
|
||||
disables preemption, which acted as an RCU read-side critical
|
||||
section, thus preventing CPU 0's grace period from completing
|
||||
until on_each_cpu() had dealt with all of the CPUs. However,
|
||||
with the advent of preemptible RCU, rcu_barrier() no longer
|
||||
waited on nonpreemptible regions of code in preemptible kernels,
|
||||
that being the job of the new rcu_barrier_sched() function.
|
||||
|
||||
However, with the RCU flavor consolidation around v4.20, this
|
||||
possibility was once again ruled out, because the consolidated
|
||||
RCU once again waits on nonpreemptible regions of code.
|
||||
|
||||
Nevertheless, that extra count might still be a good idea.
|
||||
Relying on these sort of accidents of implementation can result
|
||||
in later surprise bugs when the implementation changes.
|
||||
|
||||
:ref:`Back to Quick Quiz #2 <rcubarrier_quiz_2>`
|
||||
|
||||
.. _answer_rcubarrier_quiz_3:
|
||||
|
||||
Quick Quiz #3:
|
||||
What happens if CPU 0's rcu_barrier_func() executes
|
||||
immediately (thus incrementing rcu_barrier_cpu_count to the
|
||||
value one), but the other CPU's rcu_barrier_func() invocations
|
||||
are delayed for a full grace period? Couldn't this result in
|
||||
rcu_barrier() returning prematurely?
|
||||
|
||||
Answer: This cannot happen. The reason is that on_each_cpu() has its last
|
||||
Answer:
|
||||
This cannot happen. The reason is that on_each_cpu() has its last
|
||||
argument, the wait flag, set to "1". This flag is passed through
|
||||
to smp_call_function() and further to smp_call_function_on_cpu(),
|
||||
causing this latter to spin until the cross-CPU invocation of
|
||||
@ -336,18 +366,15 @@ Answer: This cannot happen. The reason is that on_each_cpu() has its last
|
||||
|
||||
Therefore, on_each_cpu() disables preemption across its call
|
||||
to smp_call_function() and also across the local call to
|
||||
rcu_barrier_func(). This prevents the local CPU from context
|
||||
switching, again preventing grace periods from completing. This
|
||||
rcu_barrier_func(). Because recent RCU implementations treat
|
||||
preemption-disabled regions of code as RCU read-side critical
|
||||
sections, this prevents grace periods from completing. This
|
||||
means that all CPUs have executed rcu_barrier_func() before
|
||||
the first rcu_barrier_callback() can possibly execute, in turn
|
||||
preventing rcu_barrier_cpu_count from prematurely reaching zero.
|
||||
|
||||
Currently, -rt implementations of RCU keep but a single global
|
||||
queue for RCU callbacks, and thus do not suffer from this
|
||||
problem. However, when the -rt RCU eventually does have per-CPU
|
||||
callback queues, things will have to change. One simple change
|
||||
is to add an rcu_read_lock() before line 8 of rcu_barrier()
|
||||
and an rcu_read_unlock() after line 8 of this same function. If
|
||||
you can think of a better change, please let me know!
|
||||
But if on_each_cpu() ever decides to forgo disabling preemption,
|
||||
as might well happen due to real-time latency considerations,
|
||||
initializing rcu_barrier_cpu_count to one will save the day.
|
||||
|
||||
:ref:`Back to Quick Quiz #2 <rcubarrier_quiz_2>`
|
||||
:ref:`Back to Quick Quiz #3 <rcubarrier_quiz_3>`
|
||||
|
@ -14,19 +14,19 @@ Using 'nulls'
|
||||
=============
|
||||
|
||||
Using special makers (called 'nulls') is a convenient way
|
||||
to solve following problem :
|
||||
to solve following problem.
|
||||
|
||||
A typical RCU linked list managing objects which are
|
||||
allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
|
||||
use following algos :
|
||||
Without 'nulls', a typical RCU linked list managing objects which are
|
||||
allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following
|
||||
algorithms:
|
||||
|
||||
1) Lookup algo
|
||||
--------------
|
||||
1) Lookup algorithm
|
||||
-------------------
|
||||
|
||||
::
|
||||
|
||||
rcu_read_lock()
|
||||
begin:
|
||||
rcu_read_lock()
|
||||
obj = lockless_lookup(key);
|
||||
if (obj) {
|
||||
if (!try_get_ref(obj)) // might fail for free objects
|
||||
@ -38,6 +38,7 @@ use following algos :
|
||||
*/
|
||||
if (obj->key != key) { // not the object we expected
|
||||
put_ref(obj);
|
||||
rcu_read_unlock();
|
||||
goto begin;
|
||||
}
|
||||
}
|
||||
@ -52,9 +53,9 @@ but a version with an additional memory barrier (smp_rmb())
|
||||
{
|
||||
struct hlist_node *node, *next;
|
||||
for (pos = rcu_dereference((head)->first);
|
||||
pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
|
||||
({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
|
||||
pos = rcu_dereference(next))
|
||||
pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
|
||||
({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
|
||||
pos = rcu_dereference(next))
|
||||
if (obj->key == key)
|
||||
return obj;
|
||||
return NULL;
|
||||
@ -64,9 +65,9 @@ And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
|
||||
|
||||
struct hlist_node *node;
|
||||
for (pos = rcu_dereference((head)->first);
|
||||
pos && ({ prefetch(pos->next); 1; }) &&
|
||||
({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
|
||||
pos = rcu_dereference(pos->next))
|
||||
pos && ({ prefetch(pos->next); 1; }) &&
|
||||
({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
|
||||
pos = rcu_dereference(pos->next))
|
||||
if (obj->key == key)
|
||||
return obj;
|
||||
return NULL;
|
||||
@ -82,36 +83,32 @@ Quoting Corey Minyard::
|
||||
solved by pre-fetching the "next" field (with proper barriers) before
|
||||
checking the key."
|
||||
|
||||
2) Insert algo
|
||||
--------------
|
||||
2) Insertion algorithm
|
||||
----------------------
|
||||
|
||||
We need to make sure a reader cannot read the new 'obj->obj_next' value
|
||||
and previous value of 'obj->key'. Or else, an item could be deleted
|
||||
and previous value of 'obj->key'. Otherwise, an item could be deleted
|
||||
from a chain, and inserted into another chain. If new chain was empty
|
||||
before the move, 'next' pointer is NULL, and lockless reader can
|
||||
not detect it missed following items in original chain.
|
||||
before the move, 'next' pointer is NULL, and lockless reader can not
|
||||
detect the fact that it missed following items in original chain.
|
||||
|
||||
::
|
||||
|
||||
/*
|
||||
* Please note that new inserts are done at the head of list,
|
||||
* not in the middle or end.
|
||||
*/
|
||||
* Please note that new inserts are done at the head of list,
|
||||
* not in the middle or end.
|
||||
*/
|
||||
obj = kmem_cache_alloc(...);
|
||||
lock_chain(); // typically a spin_lock()
|
||||
obj->key = key;
|
||||
/*
|
||||
* we need to make sure obj->key is updated before obj->next
|
||||
* or obj->refcnt
|
||||
*/
|
||||
smp_wmb();
|
||||
atomic_set(&obj->refcnt, 1);
|
||||
atomic_set_release(&obj->refcnt, 1); // key before refcnt
|
||||
hlist_add_head_rcu(&obj->obj_node, list);
|
||||
unlock_chain(); // typically a spin_unlock()
|
||||
|
||||
|
||||
3) Remove algo
|
||||
--------------
|
||||
3) Removal algorithm
|
||||
--------------------
|
||||
|
||||
Nothing special here, we can use a standard RCU hlist deletion.
|
||||
But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
|
||||
very very fast (before the end of RCU grace period)
|
||||
@ -133,7 +130,7 @@ Avoiding extra smp_rmb()
|
||||
========================
|
||||
|
||||
With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
|
||||
and extra smp_wmb() in insert function.
|
||||
and extra _release() in insert function.
|
||||
|
||||
For example, if we choose to store the slot number as the 'nulls'
|
||||
end-of-list marker for each slot of the hash table, we can detect
|
||||
@ -142,59 +139,61 @@ to another chain) checking the final 'nulls' value if
|
||||
the lookup met the end of chain. If final 'nulls' value
|
||||
is not the slot number, then we must restart the lookup at
|
||||
the beginning. If the object was moved to the same chain,
|
||||
then the reader doesn't care : It might eventually
|
||||
then the reader doesn't care: It might occasionally
|
||||
scan the list again without harm.
|
||||
|
||||
|
||||
1) lookup algo
|
||||
--------------
|
||||
1) lookup algorithm
|
||||
-------------------
|
||||
|
||||
::
|
||||
|
||||
head = &table[slot];
|
||||
rcu_read_lock();
|
||||
begin:
|
||||
rcu_read_lock();
|
||||
hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
|
||||
if (obj->key == key) {
|
||||
if (!try_get_ref(obj)) // might fail for free objects
|
||||
goto begin;
|
||||
if (obj->key != key) { // not the object we expected
|
||||
put_ref(obj);
|
||||
if (!try_get_ref(obj)) { // might fail for free objects
|
||||
rcu_read_unlock();
|
||||
goto begin;
|
||||
}
|
||||
goto out;
|
||||
if (obj->key != key) { // not the object we expected
|
||||
put_ref(obj);
|
||||
rcu_read_unlock();
|
||||
goto begin;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
// If the nulls value we got at the end of this lookup is
|
||||
// not the expected one, we must restart lookup.
|
||||
// We probably met an item that was moved to another chain.
|
||||
if (get_nulls_value(node) != slot) {
|
||||
put_ref(obj);
|
||||
rcu_read_unlock();
|
||||
goto begin;
|
||||
}
|
||||
/*
|
||||
* if the nulls value we got at the end of this lookup is
|
||||
* not the expected one, we must restart lookup.
|
||||
* We probably met an item that was moved to another chain.
|
||||
*/
|
||||
if (get_nulls_value(node) != slot)
|
||||
goto begin;
|
||||
obj = NULL;
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
|
||||
2) Insert function
|
||||
------------------
|
||||
2) Insert algorithm
|
||||
-------------------
|
||||
|
||||
::
|
||||
|
||||
/*
|
||||
* Please note that new inserts are done at the head of list,
|
||||
* not in the middle or end.
|
||||
*/
|
||||
* Please note that new inserts are done at the head of list,
|
||||
* not in the middle or end.
|
||||
*/
|
||||
obj = kmem_cache_alloc(cachep);
|
||||
lock_chain(); // typically a spin_lock()
|
||||
obj->key = key;
|
||||
atomic_set_release(&obj->refcnt, 1); // key before refcnt
|
||||
/*
|
||||
* changes to obj->key must be visible before refcnt one
|
||||
*/
|
||||
smp_wmb();
|
||||
atomic_set(&obj->refcnt, 1);
|
||||
/*
|
||||
* insert obj in RCU way (readers might be traversing chain)
|
||||
*/
|
||||
* insert obj in RCU way (readers might be traversing chain)
|
||||
*/
|
||||
hlist_nulls_add_head_rcu(&obj->obj_node, list);
|
||||
unlock_chain(); // typically a spin_unlock()
|
||||
|
@ -25,10 +25,10 @@ warnings:
|
||||
|
||||
- A CPU looping with bottom halves disabled.
|
||||
|
||||
- For !CONFIG_PREEMPTION kernels, a CPU looping anywhere in the kernel
|
||||
without invoking schedule(). If the looping in the kernel is
|
||||
really expected and desirable behavior, you might need to add
|
||||
some calls to cond_resched().
|
||||
- For !CONFIG_PREEMPTION kernels, a CPU looping anywhere in the
|
||||
kernel without potentially invoking schedule(). If the looping
|
||||
in the kernel is really expected and desirable behavior, you
|
||||
might need to add some calls to cond_resched().
|
||||
|
||||
- Booting Linux using a console connection that is too slow to
|
||||
keep up with the boot-time console-message rate. For example,
|
||||
@ -108,16 +108,17 @@ warnings:
|
||||
|
||||
- A bug in the RCU implementation.
|
||||
|
||||
- A hardware failure. This is quite unlikely, but has occurred
|
||||
at least once in real life. A CPU failed in a running system,
|
||||
becoming unresponsive, but not causing an immediate crash.
|
||||
This resulted in a series of RCU CPU stall warnings, eventually
|
||||
leading the realization that the CPU had failed.
|
||||
- A hardware failure. This is quite unlikely, but is not at all
|
||||
uncommon in large datacenter. In one memorable case some decades
|
||||
back, a CPU failed in a running system, becoming unresponsive,
|
||||
but not causing an immediate crash. This resulted in a series
|
||||
of RCU CPU stall warnings, eventually leading the realization
|
||||
that the CPU had failed.
|
||||
|
||||
The RCU, RCU-sched, and RCU-tasks implementations have CPU stall warning.
|
||||
Note that SRCU does *not* have CPU stall warnings. Please note that
|
||||
RCU only detects CPU stalls when there is a grace period in progress.
|
||||
No grace period, no CPU stall warnings.
|
||||
The RCU, RCU-sched, RCU-tasks, and RCU-tasks-trace implementations have
|
||||
CPU stall warning. Note that SRCU does *not* have CPU stall warnings.
|
||||
Please note that RCU only detects CPU stalls when there is a grace period
|
||||
in progress. No grace period, no CPU stall warnings.
|
||||
|
||||
To diagnose the cause of the stall, inspect the stack traces.
|
||||
The offending function will usually be near the top of the stack.
|
||||
@ -205,16 +206,21 @@ RCU_STALL_RAT_DELAY
|
||||
rcupdate.rcu_task_stall_timeout
|
||||
-------------------------------
|
||||
|
||||
This boot/sysfs parameter controls the RCU-tasks stall warning
|
||||
interval. A value of zero or less suppresses RCU-tasks stall
|
||||
warnings. A positive value sets the stall-warning interval
|
||||
in seconds. An RCU-tasks stall warning starts with the line:
|
||||
This boot/sysfs parameter controls the RCU-tasks and
|
||||
RCU-tasks-trace stall warning intervals. A value of zero or less
|
||||
suppresses RCU-tasks stall warnings. A positive value sets the
|
||||
stall-warning interval in seconds. An RCU-tasks stall warning
|
||||
starts with the line:
|
||||
|
||||
INFO: rcu_tasks detected stalls on tasks:
|
||||
|
||||
And continues with the output of sched_show_task() for each
|
||||
task stalling the current RCU-tasks grace period.
|
||||
|
||||
An RCU-tasks-trace stall warning starts (and continues) similarly:
|
||||
|
||||
INFO: rcu_tasks_trace detected stalls on tasks
|
||||
|
||||
|
||||
Interpreting RCU's CPU Stall-Detector "Splats"
|
||||
==============================================
|
||||
@ -248,7 +254,8 @@ dynticks counter, which will have an even-numbered value if the CPU
|
||||
is in dyntick-idle mode and an odd-numbered value otherwise. The hex
|
||||
number between the two "/"s is the value of the nesting, which will be
|
||||
a small non-negative number if in the idle loop (as shown above) and a
|
||||
very large positive number otherwise.
|
||||
very large positive number otherwise. The number following the final
|
||||
"/" is the NMI nesting, which will be a small non-negative number.
|
||||
|
||||
The "softirq=" portion of the message tracks the number of RCU softirq
|
||||
handlers that the stalled CPU has executed. The number before the "/"
|
||||
@ -383,3 +390,95 @@ for example, "P3421".
|
||||
|
||||
It is entirely possible to see stall warnings from normal and from
|
||||
expedited grace periods at about the same time during the same run.
|
||||
|
||||
RCU_CPU_STALL_CPUTIME
|
||||
=====================
|
||||
|
||||
In kernels built with CONFIG_RCU_CPU_STALL_CPUTIME=y or booted with
|
||||
rcupdate.rcu_cpu_stall_cputime=1, the following additional information
|
||||
is supplied with each RCU CPU stall warning::
|
||||
|
||||
rcu: hardirqs softirqs csw/system
|
||||
rcu: number: 624 45 0
|
||||
rcu: cputime: 69 1 2425 ==> 2500(ms)
|
||||
|
||||
These statistics are collected during the sampling period. The values
|
||||
in row "number:" are the number of hard interrupts, number of soft
|
||||
interrupts, and number of context switches on the stalled CPU. The
|
||||
first three values in row "cputime:" indicate the CPU time in
|
||||
milliseconds consumed by hard interrupts, soft interrupts, and tasks
|
||||
on the stalled CPU. The last number is the measurement interval, again
|
||||
in milliseconds. Because user-mode tasks normally do not cause RCU CPU
|
||||
stalls, these tasks are typically kernel tasks, which is why only the
|
||||
system CPU time are considered.
|
||||
|
||||
The sampling period is shown as follows::
|
||||
|
||||
|<------------first timeout---------->|<-----second timeout----->|
|
||||
|<--half timeout-->|<--half timeout-->| |
|
||||
| |<--first period-->| |
|
||||
| |<-----------second sampling period---------->|
|
||||
| | | |
|
||||
snapshot time point 1st-stall 2nd-stall
|
||||
|
||||
The following describes four typical scenarios:
|
||||
|
||||
1. A CPU looping with interrupts disabled.
|
||||
|
||||
::
|
||||
|
||||
rcu: hardirqs softirqs csw/system
|
||||
rcu: number: 0 0 0
|
||||
rcu: cputime: 0 0 0 ==> 2500(ms)
|
||||
|
||||
Because interrupts have been disabled throughout the measurement
|
||||
interval, there are no interrupts and no context switches.
|
||||
Furthermore, because CPU time consumption was measured using interrupt
|
||||
handlers, the system CPU consumption is misleadingly measured as zero.
|
||||
This scenario will normally also have "(0 ticks this GP)" printed on
|
||||
this CPU's summary line.
|
||||
|
||||
2. A CPU looping with bottom halves disabled.
|
||||
|
||||
This is similar to the previous example, but with non-zero number of
|
||||
and CPU time consumed by hard interrupts, along with non-zero CPU
|
||||
time consumed by in-kernel execution::
|
||||
|
||||
rcu: hardirqs softirqs csw/system
|
||||
rcu: number: 624 0 0
|
||||
rcu: cputime: 49 0 2446 ==> 2500(ms)
|
||||
|
||||
The fact that there are zero softirqs gives a hint that these were
|
||||
disabled, perhaps via local_bh_disable(). It is of course possible
|
||||
that there were no softirqs, perhaps because all events that would
|
||||
result in softirq execution are confined to other CPUs. In this case,
|
||||
the diagnosis should continue as shown in the next example.
|
||||
|
||||
3. A CPU looping with preemption disabled.
|
||||
|
||||
Here, only the number of context switches is zero::
|
||||
|
||||
rcu: hardirqs softirqs csw/system
|
||||
rcu: number: 624 45 0
|
||||
rcu: cputime: 69 1 2425 ==> 2500(ms)
|
||||
|
||||
This situation hints that the stalled CPU was looping with preemption
|
||||
disabled.
|
||||
|
||||
4. No looping, but massive hard and soft interrupts.
|
||||
|
||||
::
|
||||
|
||||
rcu: hardirqs softirqs csw/system
|
||||
rcu: number: xx xx 0
|
||||
rcu: cputime: xx xx 0 ==> 2500(ms)
|
||||
|
||||
Here, the number and CPU time of hard interrupts are all non-zero,
|
||||
but the number of context switches and the in-kernel CPU time consumed
|
||||
are zero. The number and cputime of soft interrupts will usually be
|
||||
non-zero, but could be zero, for example, if the CPU was spinning
|
||||
within a single hard interrupt handler.
|
||||
|
||||
If this type of RCU CPU stall warning can be reproduced, you can
|
||||
narrow it down by looking at /proc/interrupts or by writing code to
|
||||
trace each interrupt, for example, by referring to show_interrupts().
|
||||
|
@ -206,7 +206,11 @@ values for memory may require disabling the callback-flooding tests
|
||||
using the --bootargs parameter discussed below.
|
||||
|
||||
Sometimes additional debugging is useful, and in such cases the --kconfig
|
||||
parameter to kvm.sh may be used, for example, ``--kconfig 'CONFIG_KASAN=y'``.
|
||||
parameter to kvm.sh may be used, for example, ``--kconfig 'CONFIG_RCU_EQS_DEBUG=y'``.
|
||||
In addition, there are the --gdb, --kasan, and --kcsan parameters.
|
||||
Note that --gdb limits you to one scenario per kvm.sh run and requires
|
||||
that you have another window open from which to run ``gdb`` as instructed
|
||||
by the script.
|
||||
|
||||
Kernel boot arguments can also be supplied, for example, to control
|
||||
rcutorture's module parameters. For example, to test a change to RCU's
|
||||
@ -219,10 +223,17 @@ require disabling rcutorture's callback-flooding tests::
|
||||
--bootargs 'rcutorture.fwd_progress=0'
|
||||
|
||||
Sometimes all that is needed is a full set of kernel builds. This is
|
||||
what the --buildonly argument does.
|
||||
what the --buildonly parameter does.
|
||||
|
||||
Finally, the --trust-make argument allows each kernel build to reuse what
|
||||
it can from the previous kernel build.
|
||||
The --duration parameter can override the default run time of 30 minutes.
|
||||
For example, ``--duration 2d`` would run for two days, ``--duration 3h``
|
||||
would run for three hours, ``--duration 5m`` would run for five minutes,
|
||||
and ``--duration 45s`` would run for 45 seconds. This last can be useful
|
||||
for tracking down rare boot-time failures.
|
||||
|
||||
Finally, the --trust-make parameter allows each kernel build to reuse what
|
||||
it can from the previous kernel build. Please note that without the
|
||||
--trust-make parameter, your tags files may be demolished.
|
||||
|
||||
There are additional more arcane arguments that are documented in the
|
||||
source code of the kvm.sh script.
|
||||
@ -291,3 +302,73 @@ the following summary at the end of the run on a 12-CPU system::
|
||||
TREE07 ------- 167347 GPs (30.9902/s) [rcu: g1079021 f0x0 ] n_max_cbs: 478732
|
||||
CPU count limited from 16 to 12
|
||||
TREE09 ------- 752238 GPs (139.303/s) [rcu: g13075057 f0x0 ] n_max_cbs: 99011
|
||||
|
||||
|
||||
Repeated Runs
|
||||
=============
|
||||
|
||||
Suppose that you are chasing down a rare boot-time failure. Although you
|
||||
could use kvm.sh, doing so will rebuild the kernel on each run. If you
|
||||
need (say) 1,000 runs to have confidence that you have fixed the bug,
|
||||
these pointless rebuilds can become extremely annoying.
|
||||
|
||||
This is why kvm-again.sh exists.
|
||||
|
||||
Suppose that a previous kvm.sh run left its output in this directory::
|
||||
|
||||
tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28
|
||||
|
||||
Then this run can be re-run without rebuilding as follow:
|
||||
|
||||
kvm-again.sh tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28
|
||||
|
||||
A few of the original run's kvm.sh parameters may be overridden, perhaps
|
||||
most notably --duration and --bootargs. For example::
|
||||
|
||||
kvm-again.sh tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28 \
|
||||
--duration 45s
|
||||
|
||||
would re-run the previous test, but for only 45 seconds, thus facilitating
|
||||
tracking down the aforementioned rare boot-time failure.
|
||||
|
||||
|
||||
Distributed Runs
|
||||
================
|
||||
|
||||
Although kvm.sh is quite useful, its testing is confined to a single
|
||||
system. It is not all that hard to use your favorite framework to cause
|
||||
(say) 5 instances of kvm.sh to run on your 5 systems, but this will very
|
||||
likely unnecessarily rebuild kernels. In addition, manually distributing
|
||||
the desired rcutorture scenarios across the available systems can be
|
||||
painstaking and error-prone.
|
||||
|
||||
And this is why the kvm-remote.sh script exists.
|
||||
|
||||
If you the following command works::
|
||||
|
||||
ssh system0 date
|
||||
|
||||
and if it also works for system1, system2, system3, system4, and system5,
|
||||
and all of these systems have 64 CPUs, you can type::
|
||||
|
||||
kvm-remote.sh "system0 system1 system2 system3 system4 system5" \
|
||||
--cpus 64 --duration 8h --configs "5*CFLIST"
|
||||
|
||||
This will build each default scenario's kernel on the local system, then
|
||||
spread each of five instances of each scenario over the systems listed,
|
||||
running each scenario for eight hours. At the end of the runs, the
|
||||
results will be gathered, recorded, and printed. Most of the parameters
|
||||
that kvm.sh will accept can be passed to kvm-remote.sh, but the list of
|
||||
systems must come first.
|
||||
|
||||
The kvm.sh ``--dryrun scenarios`` argument is useful for working out
|
||||
how many scenarios may be run in one batch across a group of systems.
|
||||
|
||||
You can also re-run a previous remote run in a manner similar to kvm.sh:
|
||||
|
||||
kvm-remote.sh "system0 system1 system2 system3 system4 system5" \
|
||||
tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28-remote \
|
||||
--duration 24h
|
||||
|
||||
In this case, most of the kvm-again.sh parmeters may be supplied following
|
||||
the pathname of the old run-results directory.
|
||||
|
@ -16,18 +16,23 @@ to start learning about RCU:
|
||||
| 6. The RCU API, 2019 Edition https://lwn.net/Articles/777036/
|
||||
| 2019 Big API Table https://lwn.net/Articles/777165/
|
||||
|
||||
For those preferring video:
|
||||
|
||||
| 1. Unraveling RCU Mysteries: Fundamentals https://www.linuxfoundation.org/webinars/unraveling-rcu-usage-mysteries
|
||||
| 2. Unraveling RCU Mysteries: Additional Use Cases https://www.linuxfoundation.org/webinars/unraveling-rcu-usage-mysteries-additional-use-cases
|
||||
|
||||
|
||||
What is RCU?
|
||||
|
||||
RCU is a synchronization mechanism that was added to the Linux kernel
|
||||
during the 2.5 development effort that is optimized for read-mostly
|
||||
situations. Although RCU is actually quite simple once you understand it,
|
||||
getting there can sometimes be a challenge. Part of the problem is that
|
||||
most of the past descriptions of RCU have been written with the mistaken
|
||||
assumption that there is "one true way" to describe RCU. Instead,
|
||||
the experience has been that different people must take different paths
|
||||
to arrive at an understanding of RCU. This document provides several
|
||||
different paths, as follows:
|
||||
situations. Although RCU is actually quite simple, making effective use
|
||||
of it requires you to think differently about your code. Another part
|
||||
of the problem is the mistaken assumption that there is "one true way" to
|
||||
describe and to use RCU. Instead, the experience has been that different
|
||||
people must take different paths to arrive at an understanding of RCU,
|
||||
depending on their experiences and use cases. This document provides
|
||||
several different paths, as follows:
|
||||
|
||||
:ref:`1. RCU OVERVIEW <1_whatisRCU>`
|
||||
|
||||
@ -157,34 +162,36 @@ rcu_read_lock()
|
||||
^^^^^^^^^^^^^^^
|
||||
void rcu_read_lock(void);
|
||||
|
||||
Used by a reader to inform the reclaimer that the reader is
|
||||
entering an RCU read-side critical section. It is illegal
|
||||
to block while in an RCU read-side critical section, though
|
||||
kernels built with CONFIG_PREEMPT_RCU can preempt RCU
|
||||
read-side critical sections. Any RCU-protected data structure
|
||||
accessed during an RCU read-side critical section is guaranteed to
|
||||
remain unreclaimed for the full duration of that critical section.
|
||||
Reference counts may be used in conjunction with RCU to maintain
|
||||
longer-term references to data structures.
|
||||
This temporal primitive is used by a reader to inform the
|
||||
reclaimer that the reader is entering an RCU read-side critical
|
||||
section. It is illegal to block while in an RCU read-side
|
||||
critical section, though kernels built with CONFIG_PREEMPT_RCU
|
||||
can preempt RCU read-side critical sections. Any RCU-protected
|
||||
data structure accessed during an RCU read-side critical section
|
||||
is guaranteed to remain unreclaimed for the full duration of that
|
||||
critical section. Reference counts may be used in conjunction
|
||||
with RCU to maintain longer-term references to data structures.
|
||||
|
||||
rcu_read_unlock()
|
||||
^^^^^^^^^^^^^^^^^
|
||||
void rcu_read_unlock(void);
|
||||
|
||||
Used by a reader to inform the reclaimer that the reader is
|
||||
exiting an RCU read-side critical section. Note that RCU
|
||||
read-side critical sections may be nested and/or overlapping.
|
||||
This temporal primitives is used by a reader to inform the
|
||||
reclaimer that the reader is exiting an RCU read-side critical
|
||||
section. Note that RCU read-side critical sections may be nested
|
||||
and/or overlapping.
|
||||
|
||||
synchronize_rcu()
|
||||
^^^^^^^^^^^^^^^^^
|
||||
void synchronize_rcu(void);
|
||||
|
||||
Marks the end of updater code and the beginning of reclaimer
|
||||
code. It does this by blocking until all pre-existing RCU
|
||||
read-side critical sections on all CPUs have completed.
|
||||
Note that synchronize_rcu() will **not** necessarily wait for
|
||||
any subsequent RCU read-side critical sections to complete.
|
||||
For example, consider the following sequence of events::
|
||||
This temporal primitive marks the end of updater code and the
|
||||
beginning of reclaimer code. It does this by blocking until
|
||||
all pre-existing RCU read-side critical sections on all CPUs
|
||||
have completed. Note that synchronize_rcu() will **not**
|
||||
necessarily wait for any subsequent RCU read-side critical
|
||||
sections to complete. For example, consider the following
|
||||
sequence of events::
|
||||
|
||||
CPU 0 CPU 1 CPU 2
|
||||
----------------- ------------------------- ---------------
|
||||
@ -211,13 +218,13 @@ synchronize_rcu()
|
||||
to be useful in all but the most read-intensive situations,
|
||||
synchronize_rcu()'s overhead must also be quite small.
|
||||
|
||||
The call_rcu() API is a callback form of synchronize_rcu(),
|
||||
and is described in more detail in a later section. Instead of
|
||||
blocking, it registers a function and argument which are invoked
|
||||
after all ongoing RCU read-side critical sections have completed.
|
||||
This callback variant is particularly useful in situations where
|
||||
it is illegal to block or where update-side performance is
|
||||
critically important.
|
||||
The call_rcu() API is an asynchronous callback form of
|
||||
synchronize_rcu(), and is described in more detail in a later
|
||||
section. Instead of blocking, it registers a function and
|
||||
argument which are invoked after all ongoing RCU read-side
|
||||
critical sections have completed. This callback variant is
|
||||
particularly useful in situations where it is illegal to block
|
||||
or where update-side performance is critically important.
|
||||
|
||||
However, the call_rcu() API should not be used lightly, as use
|
||||
of the synchronize_rcu() API generally results in simpler code.
|
||||
@ -236,11 +243,13 @@ rcu_assign_pointer()
|
||||
would be cool to be able to declare a function in this manner.
|
||||
(Compiler experts will no doubt disagree.)
|
||||
|
||||
The updater uses this function to assign a new value to an
|
||||
The updater uses this spatial macro to assign a new value to an
|
||||
RCU-protected pointer, in order to safely communicate the change
|
||||
in value from the updater to the reader. This macro does not
|
||||
evaluate to an rvalue, but it does execute any memory-barrier
|
||||
instructions required for a given CPU architecture.
|
||||
in value from the updater to the reader. This is a spatial (as
|
||||
opposed to temporal) macro. It does not evaluate to an rvalue,
|
||||
but it does execute any memory-barrier instructions required
|
||||
for a given CPU architecture. Its ordering properties are that
|
||||
of a store-release operation.
|
||||
|
||||
Perhaps just as important, it serves to document (1) which
|
||||
pointers are protected by RCU and (2) the point at which a
|
||||
@ -255,14 +264,15 @@ rcu_dereference()
|
||||
Like rcu_assign_pointer(), rcu_dereference() must be implemented
|
||||
as a macro.
|
||||
|
||||
The reader uses rcu_dereference() to fetch an RCU-protected
|
||||
pointer, which returns a value that may then be safely
|
||||
dereferenced. Note that rcu_dereference() does not actually
|
||||
dereference the pointer, instead, it protects the pointer for
|
||||
later dereferencing. It also executes any needed memory-barrier
|
||||
instructions for a given CPU architecture. Currently, only Alpha
|
||||
needs memory barriers within rcu_dereference() -- on other CPUs,
|
||||
it compiles to nothing, not even a compiler directive.
|
||||
The reader uses the spatial rcu_dereference() macro to fetch
|
||||
an RCU-protected pointer, which returns a value that may
|
||||
then be safely dereferenced. Note that rcu_dereference()
|
||||
does not actually dereference the pointer, instead, it
|
||||
protects the pointer for later dereferencing. It also
|
||||
executes any needed memory-barrier instructions for a given
|
||||
CPU architecture. Currently, only Alpha needs memory barriers
|
||||
within rcu_dereference() -- on other CPUs, it compiles to a
|
||||
volatile load.
|
||||
|
||||
Common coding practice uses rcu_dereference() to copy an
|
||||
RCU-protected pointer to a local variable, then dereferences
|
||||
@ -355,12 +365,15 @@ reader, updater, and reclaimer.
|
||||
synchronize_rcu() & call_rcu()
|
||||
|
||||
|
||||
The RCU infrastructure observes the time sequence of rcu_read_lock(),
|
||||
The RCU infrastructure observes the temporal sequence of rcu_read_lock(),
|
||||
rcu_read_unlock(), synchronize_rcu(), and call_rcu() invocations in
|
||||
order to determine when (1) synchronize_rcu() invocations may return
|
||||
to their callers and (2) call_rcu() callbacks may be invoked. Efficient
|
||||
implementations of the RCU infrastructure make heavy use of batching in
|
||||
order to amortize their overhead over many uses of the corresponding APIs.
|
||||
The rcu_assign_pointer() and rcu_dereference() invocations communicate
|
||||
spatial changes via stores to and loads from the RCU-protected pointer in
|
||||
question.
|
||||
|
||||
There are at least three flavors of RCU usage in the Linux kernel. The diagram
|
||||
above shows the most common one. On the updater side, the rcu_assign_pointer(),
|
||||
@ -392,7 +405,9 @@ b. RCU applied to networking data structures that may be subjected
|
||||
c. RCU applied to scheduler and interrupt/NMI-handler tasks.
|
||||
|
||||
Again, most uses will be of (a). The (b) and (c) cases are important
|
||||
for specialized uses, but are relatively uncommon.
|
||||
for specialized uses, but are relatively uncommon. The SRCU, RCU-Tasks,
|
||||
RCU-Tasks-Rude, and RCU-Tasks-Trace have similar relationships among
|
||||
their assorted primitives.
|
||||
|
||||
.. _3_whatisRCU:
|
||||
|
||||
@ -468,7 +483,7 @@ So, to sum up:
|
||||
- Within an RCU read-side critical section, use rcu_dereference()
|
||||
to dereference RCU-protected pointers.
|
||||
|
||||
- Use some solid scheme (such as locks or semaphores) to
|
||||
- Use some solid design (such as locks or semaphores) to
|
||||
keep concurrent updates from interfering with each other.
|
||||
|
||||
- Use rcu_assign_pointer() to update an RCU-protected pointer.
|
||||
@ -579,6 +594,14 @@ to avoid having to write your own callback::
|
||||
|
||||
kfree_rcu(old_fp, rcu);
|
||||
|
||||
If the occasional sleep is permitted, the single-argument form may
|
||||
be used, omitting the rcu_head structure from struct foo.
|
||||
|
||||
kfree_rcu(old_fp);
|
||||
|
||||
This variant of kfree_rcu() almost never blocks, but might do so by
|
||||
invoking synchronize_rcu() in response to memory-allocation failure.
|
||||
|
||||
Again, see checklist.rst for additional rules governing the use of RCU.
|
||||
|
||||
.. _5_whatisRCU:
|
||||
@ -596,7 +619,7 @@ lacking both functionality and performance. However, they are useful
|
||||
in getting a feel for how RCU works. See kernel/rcu/update.c for a
|
||||
production-quality implementation, and see:
|
||||
|
||||
http://www.rdrop.com/users/paulmck/RCU
|
||||
https://docs.google.com/document/d/1X0lThx8OK0ZgLMqVoXiR4ZrGURHrXK6NyLRbeXe3Xac/edit
|
||||
|
||||
for papers describing the Linux kernel RCU implementation. The OLS'01
|
||||
and OLS'02 papers are a good introduction, and the dissertation provides
|
||||
@ -929,6 +952,8 @@ unfortunately any spinlock in a ``SLAB_TYPESAFE_BY_RCU`` object must be
|
||||
initialized after each and every call to kmem_cache_alloc(), which renders
|
||||
reference-free spinlock acquisition completely unsafe. Therefore, when
|
||||
using ``SLAB_TYPESAFE_BY_RCU``, make proper use of a reference counter.
|
||||
(Those willing to use a kmem_cache constructor may also use locking,
|
||||
including cache-friendly sequence locking.)
|
||||
|
||||
With traditional reference counting -- such as that implemented by the
|
||||
kref library in Linux -- there is typically code that runs when the last
|
||||
@ -1047,6 +1072,30 @@ sched::
|
||||
rcu_read_lock_sched_held
|
||||
|
||||
|
||||
RCU-Tasks::
|
||||
|
||||
Critical sections Grace period Barrier
|
||||
|
||||
N/A call_rcu_tasks rcu_barrier_tasks
|
||||
synchronize_rcu_tasks
|
||||
|
||||
|
||||
RCU-Tasks-Rude::
|
||||
|
||||
Critical sections Grace period Barrier
|
||||
|
||||
N/A call_rcu_tasks_rude rcu_barrier_tasks_rude
|
||||
synchronize_rcu_tasks_rude
|
||||
|
||||
|
||||
RCU-Tasks-Trace::
|
||||
|
||||
Critical sections Grace period Barrier
|
||||
|
||||
rcu_read_lock_trace call_rcu_tasks_trace rcu_barrier_tasks_trace
|
||||
rcu_read_unlock_trace synchronize_rcu_tasks_trace
|
||||
|
||||
|
||||
SRCU::
|
||||
|
||||
Critical sections Grace period Barrier
|
||||
@ -1087,35 +1136,43 @@ list can be helpful:
|
||||
|
||||
a. Will readers need to block? If so, you need SRCU.
|
||||
|
||||
b. What about the -rt patchset? If readers would need to block
|
||||
in an non-rt kernel, you need SRCU. If readers would block
|
||||
in a -rt kernel, but not in a non-rt kernel, SRCU is not
|
||||
necessary. (The -rt patchset turns spinlocks into sleeplocks,
|
||||
hence this distinction.)
|
||||
b. Will readers need to block and are you doing tracing, for
|
||||
example, ftrace or BPF? If so, you need RCU-tasks,
|
||||
RCU-tasks-rude, and/or RCU-tasks-trace.
|
||||
|
||||
c. Do you need to treat NMI handlers, hardirq handlers,
|
||||
c. What about the -rt patchset? If readers would need to block in
|
||||
an non-rt kernel, you need SRCU. If readers would block when
|
||||
acquiring spinlocks in a -rt kernel, but not in a non-rt kernel,
|
||||
SRCU is not necessary. (The -rt patchset turns spinlocks into
|
||||
sleeplocks, hence this distinction.)
|
||||
|
||||
d. Do you need to treat NMI handlers, hardirq handlers,
|
||||
and code segments with preemption disabled (whether
|
||||
via preempt_disable(), local_irq_save(), local_bh_disable(),
|
||||
or some other mechanism) as if they were explicit RCU readers?
|
||||
If so, RCU-sched is the only choice that will work for you.
|
||||
If so, RCU-sched readers are the only choice that will work
|
||||
for you, but since about v4.20 you use can use the vanilla RCU
|
||||
update primitives.
|
||||
|
||||
d. Do you need RCU grace periods to complete even in the face
|
||||
of softirq monopolization of one or more of the CPUs? For
|
||||
example, is your code subject to network-based denial-of-service
|
||||
attacks? If so, you should disable softirq across your readers,
|
||||
for example, by using rcu_read_lock_bh().
|
||||
e. Do you need RCU grace periods to complete even in the face of
|
||||
softirq monopolization of one or more of the CPUs? For example,
|
||||
is your code subject to network-based denial-of-service attacks?
|
||||
If so, you should disable softirq across your readers, for
|
||||
example, by using rcu_read_lock_bh(). Since about v4.20 you
|
||||
use can use the vanilla RCU update primitives.
|
||||
|
||||
e. Is your workload too update-intensive for normal use of
|
||||
f. Is your workload too update-intensive for normal use of
|
||||
RCU, but inappropriate for other synchronization mechanisms?
|
||||
If so, consider SLAB_TYPESAFE_BY_RCU (which was originally
|
||||
named SLAB_DESTROY_BY_RCU). But please be careful!
|
||||
|
||||
f. Do you need read-side critical sections that are respected
|
||||
even though they are in the middle of the idle loop, during
|
||||
user-mode execution, or on an offlined CPU? If so, SRCU is the
|
||||
only choice that will work for you.
|
||||
g. Do you need read-side critical sections that are respected even
|
||||
on CPUs that are deep in the idle loop, during entry to or exit
|
||||
from user-mode execution, or on an offlined CPU? If so, SRCU
|
||||
and RCU Tasks Trace are the only choices that will work for you,
|
||||
with SRCU being strongly preferred in almost all cases.
|
||||
|
||||
g. Otherwise, use RCU.
|
||||
h. Otherwise, use RCU.
|
||||
|
||||
Of course, this all assumes that you have determined that RCU is in fact
|
||||
the right tool for your job.
|
||||
|
@ -80,6 +80,8 @@ access. For example, cpusets (see Documentation/admin-guide/cgroup-v1/cpusets.rs
|
||||
you to associate a set of CPUs and a set of memory nodes with the
|
||||
tasks in each cgroup.
|
||||
|
||||
.. _cgroups-why-needed:
|
||||
|
||||
1.2 Why are cgroups needed ?
|
||||
----------------------------
|
||||
|
||||
|
@ -2,18 +2,18 @@
|
||||
Memory Resource Controller
|
||||
==========================
|
||||
|
||||
NOTE:
|
||||
.. caution::
|
||||
This document is hopelessly outdated and it asks for a complete
|
||||
rewrite. It still contains a useful information so we are keeping it
|
||||
here but make sure to check the current code if you need a deeper
|
||||
understanding.
|
||||
|
||||
NOTE:
|
||||
.. note::
|
||||
The Memory Resource Controller has generically been referred to as the
|
||||
memory controller in this document. Do not confuse memory controller
|
||||
used here with the memory controller that is used in hardware.
|
||||
|
||||
(For editors) In this document:
|
||||
.. hint::
|
||||
When we mention a cgroup (cgroupfs's directory) with memory controller,
|
||||
we call it "memory cgroup". When you see git-log and source code, you'll
|
||||
see patch's title and function names tend to use "memcg".
|
||||
@ -23,7 +23,7 @@ Benefits and Purpose of the memory controller
|
||||
=============================================
|
||||
|
||||
The memory controller isolates the memory behaviour of a group of tasks
|
||||
from the rest of the system. The article on LWN [12] mentions some probable
|
||||
from the rest of the system. The article on LWN [12]_ mentions some probable
|
||||
uses of the memory controller. The memory controller can be used to
|
||||
|
||||
a. Isolate an application or a group of applications
|
||||
@ -55,7 +55,8 @@ Features:
|
||||
- Root cgroup has no limit controls.
|
||||
|
||||
Kernel memory support is a work in progress, and the current version provides
|
||||
basically functionality. (See Section 2.7)
|
||||
basically functionality. (See :ref:`section 2.7
|
||||
<cgroup-v1-memory-kernel-extension>`)
|
||||
|
||||
Brief summary of control files.
|
||||
|
||||
@ -107,16 +108,16 @@ Brief summary of control files.
|
||||
==========
|
||||
|
||||
The memory controller has a long history. A request for comments for the memory
|
||||
controller was posted by Balbir Singh [1]. At the time the RFC was posted
|
||||
controller was posted by Balbir Singh [1]_. At the time the RFC was posted
|
||||
there were several implementations for memory control. The goal of the
|
||||
RFC was to build consensus and agreement for the minimal features required
|
||||
for memory control. The first RSS controller was posted by Balbir Singh[2]
|
||||
in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the
|
||||
RSS controller. At OLS, at the resource management BoF, everyone suggested
|
||||
that we handle both page cache and RSS together. Another request was raised
|
||||
to allow user space handling of OOM. The current memory controller is
|
||||
for memory control. The first RSS controller was posted by Balbir Singh [2]_
|
||||
in Feb 2007. Pavel Emelianov [3]_ [4]_ [5]_ has since posted three versions
|
||||
of the RSS controller. At OLS, at the resource management BoF, everyone
|
||||
suggested that we handle both page cache and RSS together. Another request was
|
||||
raised to allow user space handling of OOM. The current memory controller is
|
||||
at version 6; it combines both mapped (RSS) and unmapped Page
|
||||
Cache Control [11].
|
||||
Cache Control [11]_.
|
||||
|
||||
2. Memory Control
|
||||
=================
|
||||
@ -147,7 +148,8 @@ specific data structure (mem_cgroup) associated with it.
|
||||
2.2. Accounting
|
||||
---------------
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
:caption: Figure 1: Hierarchy of Accounting
|
||||
|
||||
+--------------------+
|
||||
| mem_cgroup |
|
||||
@ -167,7 +169,6 @@ specific data structure (mem_cgroup) associated with it.
|
||||
| | | |
|
||||
+---------------+ +---------------+
|
||||
|
||||
(Figure 1: Hierarchy of Accounting)
|
||||
|
||||
|
||||
Figure 1 shows the important aspects of the controller
|
||||
@ -221,8 +222,9 @@ behind this approach is that a cgroup that aggressively uses a shared
|
||||
page will eventually get charged for it (once it is uncharged from
|
||||
the cgroup that brought it in -- this will happen on memory pressure).
|
||||
|
||||
But see section 8.2: when moving a task to another cgroup, its pages may
|
||||
be recharged to the new cgroup, if move_charge_at_immigrate has been chosen.
|
||||
But see :ref:`section 8.2 <cgroup-v1-memory-movable-charges>` when moving a
|
||||
task to another cgroup, its pages may be recharged to the new cgroup, if
|
||||
move_charge_at_immigrate has been chosen.
|
||||
|
||||
2.4 Swap Extension
|
||||
--------------------------------------
|
||||
@ -244,7 +246,8 @@ In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap.
|
||||
By using the memsw limit, you can avoid system OOM which can be caused by swap
|
||||
shortage.
|
||||
|
||||
**why 'memory+swap' rather than swap**
|
||||
2.4.1 why 'memory+swap' rather than swap
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
|
||||
to move account from memory to swap...there is no change in usage of
|
||||
@ -252,7 +255,8 @@ memory+swap. In other words, when we want to limit the usage of swap without
|
||||
affecting global LRU, memory+swap limit is better than just limiting swap from
|
||||
an OS point of view.
|
||||
|
||||
**What happens when a cgroup hits memory.memsw.limit_in_bytes**
|
||||
2.4.2. What happens when a cgroup hits memory.memsw.limit_in_bytes
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
|
||||
in this cgroup. Then, swap-out will not be done by cgroup routine and file
|
||||
@ -268,26 +272,26 @@ global VM. When a cgroup goes over its limit, we first try
|
||||
to reclaim memory from the cgroup so as to make space for the new
|
||||
pages that the cgroup has touched. If the reclaim is unsuccessful,
|
||||
an OOM routine is invoked to select and kill the bulkiest task in the
|
||||
cgroup. (See 10. OOM Control below.)
|
||||
cgroup. (See :ref:`10. OOM Control <cgroup-v1-memory-oom-control>` below.)
|
||||
|
||||
The reclaim algorithm has not been modified for cgroups, except that
|
||||
pages that are selected for reclaiming come from the per-cgroup LRU
|
||||
list.
|
||||
|
||||
NOTE:
|
||||
Reclaim does not work for the root cgroup, since we cannot set any
|
||||
limits on the root cgroup.
|
||||
.. note::
|
||||
Reclaim does not work for the root cgroup, since we cannot set any
|
||||
limits on the root cgroup.
|
||||
|
||||
Note2:
|
||||
When panic_on_oom is set to "2", the whole system will panic.
|
||||
.. note::
|
||||
When panic_on_oom is set to "2", the whole system will panic.
|
||||
|
||||
When oom event notifier is registered, event will be delivered.
|
||||
(See oom_control section)
|
||||
(See :ref:`oom_control <cgroup-v1-memory-oom-control>` section)
|
||||
|
||||
2.6 Locking
|
||||
-----------
|
||||
|
||||
Lock order is as follows:
|
||||
Lock order is as follows::
|
||||
|
||||
Page lock (PG_locked bit of page->flags)
|
||||
mm->page_table_lock or split pte_lock
|
||||
@ -299,6 +303,8 @@ Per-node-per-memcgroup LRU (cgroup's private LRU) is guarded by
|
||||
lruvec->lru_lock; PG_lru bit of page->flags is cleared before
|
||||
isolating a page from its LRU under lruvec->lru_lock.
|
||||
|
||||
.. _cgroup-v1-memory-kernel-extension:
|
||||
|
||||
2.7 Kernel Memory Extension
|
||||
-----------------------------------------------
|
||||
|
||||
@ -367,10 +373,10 @@ U != 0, K < U:
|
||||
never greater than the total memory, and freely set U at the cost of his
|
||||
QoS.
|
||||
|
||||
WARNING:
|
||||
In the current implementation, memory reclaim will NOT be
|
||||
triggered for a cgroup when it hits K while staying below U, which makes
|
||||
this setup impractical.
|
||||
.. warning::
|
||||
In the current implementation, memory reclaim will NOT be triggered for
|
||||
a cgroup when it hits K while staying below U, which makes this setup
|
||||
impractical.
|
||||
|
||||
U != 0, K >= U:
|
||||
Since kmem charges will also be fed to the user counter and reclaim will be
|
||||
@ -381,45 +387,41 @@ U != 0, K >= U:
|
||||
3. User Interface
|
||||
=================
|
||||
|
||||
3.0. Configuration
|
||||
------------------
|
||||
To use the user interface:
|
||||
|
||||
a. Enable CONFIG_CGROUPS
|
||||
b. Enable CONFIG_MEMCG
|
||||
|
||||
3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
|
||||
-------------------------------------------------------------------
|
||||
|
||||
::
|
||||
1. Enable CONFIG_CGROUPS and CONFIG_MEMCG options
|
||||
2. Prepare the cgroups (see :ref:`Why are cgroups needed?
|
||||
<cgroups-why-needed>` for the background information)::
|
||||
|
||||
# mount -t tmpfs none /sys/fs/cgroup
|
||||
# mkdir /sys/fs/cgroup/memory
|
||||
# mount -t cgroup none /sys/fs/cgroup/memory -o memory
|
||||
|
||||
3.2. Make the new group and move bash into it::
|
||||
3. Make the new group and move bash into it::
|
||||
|
||||
# mkdir /sys/fs/cgroup/memory/0
|
||||
# echo $$ > /sys/fs/cgroup/memory/0/tasks
|
||||
|
||||
Since now we're in the 0 cgroup, we can alter the memory limit::
|
||||
4. Since now we're in the 0 cgroup, we can alter the memory limit::
|
||||
|
||||
# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
|
||||
|
||||
NOTE:
|
||||
We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
|
||||
mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes,
|
||||
Gibibytes.)
|
||||
The limit can now be queried::
|
||||
|
||||
NOTE:
|
||||
We can write "-1" to reset the ``*.limit_in_bytes(unlimited)``.
|
||||
# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
|
||||
4194304
|
||||
|
||||
NOTE:
|
||||
We cannot set limits on the root cgroup any more.
|
||||
.. note::
|
||||
We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
|
||||
mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes,
|
||||
Gibibytes.)
|
||||
|
||||
::
|
||||
.. note::
|
||||
We can write "-1" to reset the ``*.limit_in_bytes(unlimited)``.
|
||||
|
||||
.. note::
|
||||
We cannot set limits on the root cgroup any more.
|
||||
|
||||
# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
|
||||
4194304
|
||||
|
||||
We can check the usage::
|
||||
|
||||
@ -458,6 +460,8 @@ test because it has noise of shared objects/status.
|
||||
But the above two are testing extreme situations.
|
||||
Trying usual test under memory controller is always helpful.
|
||||
|
||||
.. _cgroup-v1-memory-test-troubleshoot:
|
||||
|
||||
4.1 Troubleshooting
|
||||
-------------------
|
||||
|
||||
@ -470,8 +474,11 @@ terminated by the OOM killer. There are several causes for this:
|
||||
A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of
|
||||
some of the pages cached in the cgroup (page cache pages).
|
||||
|
||||
To know what happens, disabling OOM_Kill as per "10. OOM Control" (below) and
|
||||
seeing what happens will be helpful.
|
||||
To know what happens, disabling OOM_Kill as per :ref:`"10. OOM Control"
|
||||
<cgroup-v1-memory-oom-control>` (below) and seeing what happens will be
|
||||
helpful.
|
||||
|
||||
.. _cgroup-v1-memory-test-task-migration:
|
||||
|
||||
4.2 Task migration
|
||||
------------------
|
||||
@ -482,15 +489,16 @@ remain charged to it, the charge is dropped when the page is freed or
|
||||
reclaimed.
|
||||
|
||||
You can move charges of a task along with task migration.
|
||||
See 8. "Move charges at task migration"
|
||||
See :ref:`8. "Move charges at task migration" <cgroup-v1-memory-move-charges>`
|
||||
|
||||
4.3 Removing a cgroup
|
||||
---------------------
|
||||
|
||||
A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
|
||||
cgroup might have some charge associated with it, even though all
|
||||
tasks have migrated away from it. (because we charge against pages, not
|
||||
against tasks.)
|
||||
A cgroup can be removed by rmdir, but as discussed in :ref:`sections 4.1
|
||||
<cgroup-v1-memory-test-troubleshoot>` and :ref:`4.2
|
||||
<cgroup-v1-memory-test-task-migration>`, a cgroup might have some charge
|
||||
associated with it, even though all tasks have migrated away from it. (because
|
||||
we charge against pages, not against tasks.)
|
||||
|
||||
We move the stats to parent, and no change on the charge except uncharging
|
||||
from the child.
|
||||
@ -519,67 +527,66 @@ will be charged as a new owner of it.
|
||||
5.2 stat file
|
||||
-------------
|
||||
|
||||
memory.stat file includes following statistics
|
||||
memory.stat file includes following statistics:
|
||||
|
||||
per-memory cgroup local status
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* per-memory cgroup local status
|
||||
|
||||
=============== ===============================================================
|
||||
cache # of bytes of page cache memory.
|
||||
rss # of bytes of anonymous and swap cache memory (includes
|
||||
transparent hugepages).
|
||||
rss_huge # of bytes of anonymous transparent hugepages.
|
||||
mapped_file # of bytes of mapped file (includes tmpfs/shmem)
|
||||
pgpgin # of charging events to the memory cgroup. The charging
|
||||
event happens each time a page is accounted as either mapped
|
||||
anon page(RSS) or cache page(Page Cache) to the cgroup.
|
||||
pgpgout # of uncharging events to the memory cgroup. The uncharging
|
||||
event happens each time a page is unaccounted from the cgroup.
|
||||
swap # of bytes of swap usage
|
||||
dirty # of bytes that are waiting to get written back to the disk.
|
||||
writeback # of bytes of file/anon cache that are queued for syncing to
|
||||
disk.
|
||||
inactive_anon # of bytes of anonymous and swap cache memory on inactive
|
||||
LRU list.
|
||||
active_anon # of bytes of anonymous and swap cache memory on active
|
||||
LRU list.
|
||||
inactive_file # of bytes of file-backed memory and MADV_FREE anonymous memory(
|
||||
LazyFree pages) on inactive LRU list.
|
||||
active_file # of bytes of file-backed memory on active LRU list.
|
||||
unevictable # of bytes of memory that cannot be reclaimed (mlocked etc).
|
||||
=============== ===============================================================
|
||||
=============== ===============================================================
|
||||
cache # of bytes of page cache memory.
|
||||
rss # of bytes of anonymous and swap cache memory (includes
|
||||
transparent hugepages).
|
||||
rss_huge # of bytes of anonymous transparent hugepages.
|
||||
mapped_file # of bytes of mapped file (includes tmpfs/shmem)
|
||||
pgpgin # of charging events to the memory cgroup. The charging
|
||||
event happens each time a page is accounted as either mapped
|
||||
anon page(RSS) or cache page(Page Cache) to the cgroup.
|
||||
pgpgout # of uncharging events to the memory cgroup. The uncharging
|
||||
event happens each time a page is unaccounted from the
|
||||
cgroup.
|
||||
swap # of bytes of swap usage
|
||||
dirty # of bytes that are waiting to get written back to the disk.
|
||||
writeback # of bytes of file/anon cache that are queued for syncing to
|
||||
disk.
|
||||
inactive_anon # of bytes of anonymous and swap cache memory on inactive
|
||||
LRU list.
|
||||
active_anon # of bytes of anonymous and swap cache memory on active
|
||||
LRU list.
|
||||
inactive_file # of bytes of file-backed memory and MADV_FREE anonymous
|
||||
memory (LazyFree pages) on inactive LRU list.
|
||||
active_file # of bytes of file-backed memory on active LRU list.
|
||||
unevictable # of bytes of memory that cannot be reclaimed (mlocked etc).
|
||||
=============== ===============================================================
|
||||
|
||||
status considering hierarchy (see memory.use_hierarchy settings)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* status considering hierarchy (see memory.use_hierarchy settings):
|
||||
|
||||
========================= ===================================================
|
||||
hierarchical_memory_limit # of bytes of memory limit with regard to hierarchy
|
||||
under which the memory cgroup is
|
||||
hierarchical_memsw_limit # of bytes of memory+swap limit with regard to
|
||||
hierarchy under which memory cgroup is.
|
||||
========================= ===================================================
|
||||
hierarchical_memory_limit # of bytes of memory limit with regard to
|
||||
hierarchy
|
||||
under which the memory cgroup is
|
||||
hierarchical_memsw_limit # of bytes of memory+swap limit with regard to
|
||||
hierarchy under which memory cgroup is.
|
||||
|
||||
total_<counter> # hierarchical version of <counter>, which in
|
||||
addition to the cgroup's own value includes the
|
||||
sum of all hierarchical children's values of
|
||||
<counter>, i.e. total_cache
|
||||
========================= ===================================================
|
||||
total_<counter> # hierarchical version of <counter>, which in
|
||||
addition to the cgroup's own value includes the
|
||||
sum of all hierarchical children's values of
|
||||
<counter>, i.e. total_cache
|
||||
========================= ===================================================
|
||||
|
||||
The following additional stats are dependent on CONFIG_DEBUG_VM
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* additional vm parameters (depends on CONFIG_DEBUG_VM):
|
||||
|
||||
========================= ========================================
|
||||
recent_rotated_anon VM internal parameter. (see mm/vmscan.c)
|
||||
recent_rotated_file VM internal parameter. (see mm/vmscan.c)
|
||||
recent_scanned_anon VM internal parameter. (see mm/vmscan.c)
|
||||
recent_scanned_file VM internal parameter. (see mm/vmscan.c)
|
||||
========================= ========================================
|
||||
========================= ========================================
|
||||
recent_rotated_anon VM internal parameter. (see mm/vmscan.c)
|
||||
recent_rotated_file VM internal parameter. (see mm/vmscan.c)
|
||||
recent_scanned_anon VM internal parameter. (see mm/vmscan.c)
|
||||
recent_scanned_file VM internal parameter. (see mm/vmscan.c)
|
||||
========================= ========================================
|
||||
|
||||
Memo:
|
||||
.. hint::
|
||||
recent_rotated means recent frequency of LRU rotation.
|
||||
recent_scanned means recent # of scans to LRU.
|
||||
showing for better debug please see the code for meanings.
|
||||
|
||||
Note:
|
||||
.. note::
|
||||
Only anonymous and swap cache memory is listed as part of 'rss' stat.
|
||||
This should not be confused with the true 'resident set size' or the
|
||||
amount of physical memory used by the cgroup.
|
||||
@ -710,13 +717,16 @@ If we want to change this to 1G, we can at any time use::
|
||||
|
||||
# echo 1G > memory.soft_limit_in_bytes
|
||||
|
||||
NOTE1:
|
||||
.. note::
|
||||
Soft limits take effect over a long period of time, since they involve
|
||||
reclaiming memory for balancing between memory cgroups
|
||||
NOTE2:
|
||||
|
||||
.. note::
|
||||
It is recommended to set the soft limit always below the hard limit,
|
||||
otherwise the hard limit will take precedence.
|
||||
|
||||
.. _cgroup-v1-memory-move-charges:
|
||||
|
||||
8. Move charges at task migration
|
||||
=================================
|
||||
|
||||
@ -735,23 +745,29 @@ If you want to enable it::
|
||||
|
||||
# echo (some positive value) > memory.move_charge_at_immigrate
|
||||
|
||||
Note:
|
||||
.. note::
|
||||
Each bits of move_charge_at_immigrate has its own meaning about what type
|
||||
of charges should be moved. See 8.2 for details.
|
||||
Note:
|
||||
of charges should be moved. See :ref:`section 8.2
|
||||
<cgroup-v1-memory-movable-charges>` for details.
|
||||
|
||||
.. note::
|
||||
Charges are moved only when you move mm->owner, in other words,
|
||||
a leader of a thread group.
|
||||
Note:
|
||||
|
||||
.. note::
|
||||
If we cannot find enough space for the task in the destination cgroup, we
|
||||
try to make space by reclaiming memory. Task migration may fail if we
|
||||
cannot make enough space.
|
||||
Note:
|
||||
|
||||
.. note::
|
||||
It can take several seconds if you move charges much.
|
||||
|
||||
And if you want disable it again::
|
||||
|
||||
# echo 0 > memory.move_charge_at_immigrate
|
||||
|
||||
.. _cgroup-v1-memory-movable-charges:
|
||||
|
||||
8.2 Type of charges which can be moved
|
||||
--------------------------------------
|
||||
|
||||
@ -801,6 +817,8 @@ threshold in any direction.
|
||||
|
||||
It's applicable for root and non-root cgroup.
|
||||
|
||||
.. _cgroup-v1-memory-oom-control:
|
||||
|
||||
10. OOM Control
|
||||
===============
|
||||
|
||||
@ -956,15 +974,16 @@ commented and discussed quite extensively in the community.
|
||||
References
|
||||
==========
|
||||
|
||||
1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
|
||||
2. Singh, Balbir. Memory Controller (RSS Control),
|
||||
.. [1] Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
|
||||
.. [2] Singh, Balbir. Memory Controller (RSS Control),
|
||||
http://lwn.net/Articles/222762/
|
||||
3. Emelianov, Pavel. Resource controllers based on process cgroups
|
||||
.. [3] Emelianov, Pavel. Resource controllers based on process cgroups
|
||||
https://lore.kernel.org/r/45ED7DEC.7010403@sw.ru
|
||||
4. Emelianov, Pavel. RSS controller based on process cgroups (v2)
|
||||
.. [4] Emelianov, Pavel. RSS controller based on process cgroups (v2)
|
||||
https://lore.kernel.org/r/461A3010.90403@sw.ru
|
||||
5. Emelianov, Pavel. RSS controller based on process cgroups (v3)
|
||||
.. [5] Emelianov, Pavel. RSS controller based on process cgroups (v3)
|
||||
https://lore.kernel.org/r/465D9739.8070209@openvz.org
|
||||
|
||||
6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/
|
||||
7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control
|
||||
subsystem (v3), http://lwn.net/Articles/235534/
|
||||
@ -974,7 +993,8 @@ References
|
||||
https://lore.kernel.org/r/464D267A.50107@linux.vnet.ibm.com
|
||||
10. Singh, Balbir. Memory controller v6 test results,
|
||||
https://lore.kernel.org/r/20070819094658.654.84837.sendpatchset@balbir-laptop
|
||||
11. Singh, Balbir. Memory controller introduction (v6),
|
||||
https://lore.kernel.org/r/20070817084228.26003.12568.sendpatchset@balbir-laptop
|
||||
12. Corbet, Jonathan, Controlling memory use in cgroups,
|
||||
http://lwn.net/Articles/243795/
|
||||
|
||||
.. [11] Singh, Balbir. Memory controller introduction (v6),
|
||||
https://lore.kernel.org/r/20070817084228.26003.12568.sendpatchset@balbir-laptop
|
||||
.. [12] Corbet, Jonathan, Controlling memory use in cgroups,
|
||||
http://lwn.net/Articles/243795/
|
||||
|
@ -619,6 +619,8 @@ process migrations.
|
||||
and is an example of this type.
|
||||
|
||||
|
||||
.. _cgroupv2-limits-distributor:
|
||||
|
||||
Limits
|
||||
------
|
||||
|
||||
@ -635,6 +637,7 @@ process migrations.
|
||||
"io.max" limits the maximum BPS and/or IOPS that a cgroup can consume
|
||||
on an IO device and is an example of this type.
|
||||
|
||||
.. _cgroupv2-protections-distributor:
|
||||
|
||||
Protections
|
||||
-----------
|
||||
@ -1245,13 +1248,17 @@ PAGE_SIZE multiple when read back.
|
||||
This is a simple interface to trigger memory reclaim in the
|
||||
target cgroup.
|
||||
|
||||
This file accepts a string which contains the number of bytes to
|
||||
reclaim.
|
||||
This file accepts a single key, the number of bytes to reclaim.
|
||||
No nested keys are currently supported.
|
||||
|
||||
Example::
|
||||
|
||||
echo "1G" > memory.reclaim
|
||||
|
||||
The interface can be later extended with nested keys to
|
||||
configure the reclaim behavior. For example, specify the
|
||||
type of memory to reclaim from (anon, file, ..).
|
||||
|
||||
Please note that the kernel can over or under reclaim from
|
||||
the target cgroup. If less bytes are reclaimed than the
|
||||
specified amount, -EAGAIN is returned.
|
||||
@ -1263,13 +1270,6 @@ PAGE_SIZE multiple when read back.
|
||||
This means that the networking layer will not adapt based on
|
||||
reclaim induced by memory.reclaim.
|
||||
|
||||
This file also allows the user to specify the nodes to reclaim from,
|
||||
via the 'nodes=' key, for example::
|
||||
|
||||
echo "1G nodes=0,1" > memory.reclaim
|
||||
|
||||
The above instructs the kernel to reclaim memory from nodes 0,1.
|
||||
|
||||
memory.peak
|
||||
A read-only single value file which exists on non-root
|
||||
cgroups.
|
||||
|
91
Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst
Normal file
91
Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst
Normal file
@ -0,0 +1,91 @@
|
||||
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Cross-Thread Return Address Predictions
|
||||
=======================================
|
||||
|
||||
Certain AMD and Hygon processors are subject to a cross-thread return address
|
||||
predictions vulnerability. When running in SMT mode and one sibling thread
|
||||
transitions out of C0 state, the other sibling thread could use return target
|
||||
predictions from the sibling thread that transitioned out of C0.
|
||||
|
||||
The Spectre v2 mitigations protect the Linux kernel, as it fills the return
|
||||
address prediction entries with safe targets when context switching to the idle
|
||||
thread. However, KVM does allow a VMM to prevent exiting guest mode when
|
||||
transitioning out of C0. This could result in a guest-controlled return target
|
||||
being consumed by the sibling thread.
|
||||
|
||||
Affected processors
|
||||
-------------------
|
||||
|
||||
The following CPUs are vulnerable:
|
||||
|
||||
- AMD Family 17h processors
|
||||
- Hygon Family 18h processors
|
||||
|
||||
Related CVEs
|
||||
------------
|
||||
|
||||
The following CVE entry is related to this issue:
|
||||
|
||||
============== =======================================
|
||||
CVE-2022-27672 Cross-Thread Return Address Predictions
|
||||
============== =======================================
|
||||
|
||||
Problem
|
||||
-------
|
||||
|
||||
Affected SMT-capable processors support 1T and 2T modes of execution when SMT
|
||||
is enabled. In 2T mode, both threads in a core are executing code. For the
|
||||
processor core to enter 1T mode, it is required that one of the threads
|
||||
requests to transition out of the C0 state. This can be communicated with the
|
||||
HLT instruction or with an MWAIT instruction that requests non-C0.
|
||||
When the thread re-enters the C0 state, the processor transitions back
|
||||
to 2T mode, assuming the other thread is also still in C0 state.
|
||||
|
||||
In affected processors, the return address predictor (RAP) is partitioned
|
||||
depending on the SMT mode. For instance, in 2T mode each thread uses a private
|
||||
16-entry RAP, but in 1T mode, the active thread uses a 32-entry RAP. Upon
|
||||
transition between 1T/2T mode, the RAP contents are not modified but the RAP
|
||||
pointers (which control the next return target to use for predictions) may
|
||||
change. This behavior may result in return targets from one SMT thread being
|
||||
used by RET predictions in the sibling thread following a 1T/2T switch. In
|
||||
particular, a RET instruction executed immediately after a transition to 1T may
|
||||
use a return target from the thread that just became idle. In theory, this
|
||||
could lead to information disclosure if the return targets used do not come
|
||||
from trustworthy code.
|
||||
|
||||
Attack scenarios
|
||||
----------------
|
||||
|
||||
An attack can be mounted on affected processors by performing a series of CALL
|
||||
instructions with targeted return locations and then transitioning out of C0
|
||||
state.
|
||||
|
||||
Mitigation mechanism
|
||||
--------------------
|
||||
|
||||
Before entering idle state, the kernel context switches to the idle thread. The
|
||||
context switch fills the RAP entries (referred to as the RSB in Linux) with safe
|
||||
targets by performing a sequence of CALL instructions.
|
||||
|
||||
Prevent a guest VM from directly putting the processor into an idle state by
|
||||
intercepting HLT and MWAIT instructions.
|
||||
|
||||
Both mitigations are required to fully address this issue.
|
||||
|
||||
Mitigation control on the kernel command line
|
||||
---------------------------------------------
|
||||
|
||||
Use existing Spectre v2 mitigations that will fill the RSB on context switch.
|
||||
|
||||
Mitigation control for KVM - module parameter
|
||||
---------------------------------------------
|
||||
|
||||
By default, the KVM hypervisor mitigates this issue by intercepting guest
|
||||
attempts to transition out of C0. A VMM can use the KVM_CAP_X86_DISABLE_EXITS
|
||||
capability to override those interceptions, but since this is not common, the
|
||||
mitigation that covers this path is not enabled by default.
|
||||
|
||||
The mitigation for the KVM_CAP_X86_DISABLE_EXITS capability can be turned on
|
||||
using the boolean module parameter mitigate_smt_rsb, e.g. ``kvm.mitigate_smt_rsb=1``.
|
@ -18,3 +18,4 @@ are configurable at compile, boot or run time.
|
||||
core-scheduling.rst
|
||||
l1d_flush.rst
|
||||
processor_mmio_stale_data.rst
|
||||
cross-thread-rsb.rst
|
||||
|
@ -610,9 +610,9 @@ kernel command line.
|
||||
retpoline,generic Retpolines
|
||||
retpoline,lfence LFENCE; indirect branch
|
||||
retpoline,amd alias for retpoline,lfence
|
||||
eibrs enhanced IBRS
|
||||
eibrs,retpoline enhanced IBRS + Retpolines
|
||||
eibrs,lfence enhanced IBRS + LFENCE
|
||||
eibrs Enhanced/Auto IBRS
|
||||
eibrs,retpoline Enhanced/Auto IBRS + Retpolines
|
||||
eibrs,lfence Enhanced/Auto IBRS + LFENCE
|
||||
ibrs use IBRS to protect kernel
|
||||
|
||||
Not specifying this option is equivalent to
|
||||
|
@ -116,6 +116,7 @@ configure specific aspects of kernel behavior to your liking.
|
||||
svga
|
||||
syscall-user-dispatch
|
||||
sysrq
|
||||
thermal/index
|
||||
thunderbolt
|
||||
ufs
|
||||
unicode
|
||||
|
@ -5113,6 +5113,17 @@
|
||||
rcupdate.rcu_cpu_stall_timeout to be used (after
|
||||
conversion from seconds to milliseconds).
|
||||
|
||||
rcupdate.rcu_cpu_stall_cputime= [KNL]
|
||||
Provide statistics on the cputime and count of
|
||||
interrupts and tasks during the sampling period. For
|
||||
multiple continuous RCU stalls, all sampling periods
|
||||
begin at half of the first RCU stall timeout.
|
||||
|
||||
rcupdate.rcu_exp_stall_task_details= [KNL]
|
||||
Print stack dumps of any tasks blocking the
|
||||
current expedited RCU grace period during an
|
||||
expedited RCU CPU stall warning.
|
||||
|
||||
rcupdate.rcu_expedited= [KNL]
|
||||
Use expedited grace-period primitives, for
|
||||
example, synchronize_rcu_expedited() instead
|
||||
@ -5221,7 +5232,7 @@
|
||||
rdt= [HW,X86,RDT]
|
||||
Turn on/off individual RDT features. List is:
|
||||
cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
|
||||
mba.
|
||||
mba, smba, bmec.
|
||||
E.g. to turn on cmt and turn off mba use:
|
||||
rdt=cmt,!mba
|
||||
|
||||
@ -5729,9 +5740,9 @@
|
||||
retpoline,generic - Retpolines
|
||||
retpoline,lfence - LFENCE; indirect branch
|
||||
retpoline,amd - alias for retpoline,lfence
|
||||
eibrs - enhanced IBRS
|
||||
eibrs,retpoline - enhanced IBRS + Retpolines
|
||||
eibrs,lfence - enhanced IBRS + LFENCE
|
||||
eibrs - Enhanced/Auto IBRS
|
||||
eibrs,retpoline - Enhanced/Auto IBRS + Retpolines
|
||||
eibrs,lfence - Enhanced/Auto IBRS + LFENCE
|
||||
ibrs - use IBRS to protect kernel
|
||||
|
||||
Not specifying this option is equivalent to
|
||||
@ -6369,6 +6380,16 @@
|
||||
in situations with strict latency requirements (where
|
||||
interruptions from clocksource watchdog are not
|
||||
acceptable).
|
||||
[x86] recalibrate: force recalibration against a HW timer
|
||||
(HPET or PM timer) on systems whose TSC frequency was
|
||||
obtained from HW or FW using either an MSR or CPUID(0x15).
|
||||
Warn if the difference is more than 500 ppm.
|
||||
[x86] watchdog: Use TSC as the watchdog clocksource with
|
||||
which to check other HW timers (HPET or PM timer), but
|
||||
only on systems where TSC has been deemed trustworthy.
|
||||
This will be suppressed by an earlier tsc=nowatchdog and
|
||||
can be overridden by a later tsc=nowatchdog. A console
|
||||
message will flag any such suppression or overriding.
|
||||
|
||||
tsc_early_khz= [X86] Skip early TSC calibration and use the given
|
||||
value instead. Useful when the early TSC frequency discovery
|
||||
@ -7020,3 +7041,10 @@
|
||||
management firmware translates the requests into actual
|
||||
hardware states (core frequency, data fabric and memory
|
||||
clocks etc.)
|
||||
active
|
||||
Use amd_pstate_epp driver instance as the scaling driver,
|
||||
driver provides a hint to the hardware if software wants
|
||||
to bias toward performance (0x0) or energy efficiency (0xff)
|
||||
to the CPPC firmware. then CPPC power algorithm will
|
||||
calculate the runtime workload and adjust the realtime cores
|
||||
frequency.
|
||||
|
@ -1,65 +0,0 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
The VPBE V4L2 driver design
|
||||
===========================
|
||||
|
||||
Functional partitioning
|
||||
-----------------------
|
||||
|
||||
Consists of the following:
|
||||
|
||||
1. V4L2 display driver
|
||||
|
||||
Implements creation of video2 and video3 device nodes and
|
||||
provides v4l2 device interface to manage VID0 and VID1 layers.
|
||||
|
||||
2. Display controller
|
||||
|
||||
Loads up VENC, OSD and external encoders such as ths8200. It provides
|
||||
a set of API calls to V4L2 drivers to set the output/standards
|
||||
in the VENC or external sub devices. It also provides
|
||||
a device object to access the services from OSD subdevice
|
||||
using sub device ops. The connection of external encoders to VENC LCD
|
||||
controller port is done at init time based on default output and standard
|
||||
selection or at run time when application change the output through
|
||||
V4L2 IOCTLs.
|
||||
|
||||
When connected to an external encoder, vpbe controller is also responsible
|
||||
for setting up the interface between VENC and external encoders based on
|
||||
board specific settings (specified in board-xxx-evm.c). This allows
|
||||
interfacing external encoders such as ths8200. The setup_if_config()
|
||||
is implemented for this as well as configure_venc() (part of the next patch)
|
||||
API to set timings in VENC for a specific display resolution. As of this
|
||||
patch series, the interconnection and enabling and setting of the external
|
||||
encoders is not present, and would be a part of the next patch series.
|
||||
|
||||
3. VENC subdevice module
|
||||
|
||||
Responsible for setting outputs provided through internal DACs and also
|
||||
setting timings at LCD controller port when external encoders are connected
|
||||
at the port or LCD panel timings required. When external encoder/LCD panel
|
||||
is connected, the timings for a specific standard/preset is retrieved from
|
||||
the board specific table and the values are used to set the timings in
|
||||
venc using non-standard timing mode.
|
||||
|
||||
Support LCD Panel displays using the VENC. For example to support a Logic
|
||||
PD display, it requires setting up the LCD controller port with a set of
|
||||
timings for the resolution supported and setting the dot clock. So we could
|
||||
add the available outputs as a board specific entry (i.e add the "LogicPD"
|
||||
output name to board-xxx-evm.c). A table of timings for various LCDs
|
||||
supported can be maintained in the board specific setup file to support
|
||||
various LCD displays.As of this patch a basic driver is present, and this
|
||||
support for external encoders and displays forms a part of the next
|
||||
patch series.
|
||||
|
||||
4. OSD module
|
||||
|
||||
OSD module implements all OSD layer management and hardware specific
|
||||
features. The VPBE module interacts with the OSD for enabling and
|
||||
disabling appropriate features of the OSD.
|
||||
|
||||
Current status
|
||||
--------------
|
||||
|
||||
A fully functional working version of the V4L2 driver is available. This
|
||||
driver has been tested with NTSC and PAL standards and buffer streaming.
|
@ -73,7 +73,6 @@ via-camera VIAFB camera controller
|
||||
video-mux Video Multiplexer
|
||||
vpif_display TI DaVinci VPIF V4L2-Display
|
||||
vpif_capture TI DaVinci VPIF video capture
|
||||
vpss TI DaVinci VPBE V4L2-Display
|
||||
vsp1 Renesas VSP1 Video Processing Engine
|
||||
xilinx-tpg Xilinx Video Test Pattern Generator
|
||||
xilinx-video Xilinx Video IP (EXPERIMENTAL)
|
||||
|
@ -13,7 +13,6 @@ Video4Linux (V4L) driver-specific documentation
|
||||
cafe_ccic
|
||||
cpia2
|
||||
cx88
|
||||
davinci-vpbe
|
||||
fimc
|
||||
imx
|
||||
imx7
|
||||
|
@ -70,9 +70,7 @@ e.g. ``zswap.zpool=zbud``. It can also be changed at runtime using the sysfs
|
||||
The zbud type zpool allocates exactly 1 page to store 2 compressed pages, which
|
||||
means the compression ratio will always be 2:1 or worse (because of half-full
|
||||
zbud pages). The zsmalloc type zpool has a more complex compressed page
|
||||
storage method, and it can achieve greater storage densities. However,
|
||||
zsmalloc does not implement compressed page eviction, so once zswap fills it
|
||||
cannot evict the oldest page, it can only reject new pages.
|
||||
storage method, and it can achieve greater storage densities.
|
||||
|
||||
When a swap page is passed from frontswap to zswap, zswap maintains a mapping
|
||||
of the swap entry, a combination of the swap type and swap offset, to the zpool
|
||||
|
@ -230,8 +230,8 @@ with :c:macro:`MSR_AMD_CPPC_ENABLE` or ``cppc_set_enable``, it will respond
|
||||
to the request from AMD P-States.
|
||||
|
||||
|
||||
User Space Interface in ``sysfs``
|
||||
==================================
|
||||
User Space Interface in ``sysfs`` - Per-policy control
|
||||
======================================================
|
||||
|
||||
``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
|
||||
control its functionality at the system level. They are located in the
|
||||
@ -262,6 +262,25 @@ lowest non-linear performance in `AMD CPPC Performance Capability
|
||||
<perf_cap_>`_.)
|
||||
This attribute is read-only.
|
||||
|
||||
``energy_performance_available_preferences``
|
||||
|
||||
A list of all the supported EPP preferences that could be used for
|
||||
``energy_performance_preference`` on this system.
|
||||
These profiles represent different hints that are provided
|
||||
to the low-level firmware about the user's desired energy vs efficiency
|
||||
tradeoff. ``default`` represents the epp value is set by platform
|
||||
firmware. This attribute is read-only.
|
||||
|
||||
``energy_performance_preference``
|
||||
|
||||
The current energy performance preference can be read from this attribute.
|
||||
and user can change current preference according to energy or performance needs
|
||||
Please get all support profiles list from
|
||||
``energy_performance_available_preferences`` attribute, all the profiles are
|
||||
integer values defined between 0 to 255 when EPP feature is enabled by platform
|
||||
firmware, if EPP feature is disabled, driver will ignore the written value
|
||||
This attribute is read-write.
|
||||
|
||||
Other performance and frequency values can be read back from
|
||||
``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
|
||||
|
||||
@ -280,8 +299,30 @@ module which supports the new AMD P-States mechanism on most of the future AMD
|
||||
platforms. The AMD P-States mechanism is the more performance and energy
|
||||
efficiency frequency management method on AMD processors.
|
||||
|
||||
Kernel Module Options for ``amd-pstate``
|
||||
=========================================
|
||||
|
||||
AMD Pstate Driver Operation Modes
|
||||
=================================
|
||||
|
||||
``amd_pstate`` CPPC has two operation modes: CPPC Autonomous(active) mode and
|
||||
CPPC non-autonomous(passive) mode.
|
||||
active mode and passive mode can be chosen by different kernel parameters.
|
||||
When in Autonomous mode, CPPC ignores requests done in the Desired Performance
|
||||
Target register and takes into account only the values set to the Minimum requested
|
||||
performance, Maximum requested performance, and Energy Performance Preference
|
||||
registers. When Autonomous is disabled, it only considers the Desired Performance Target.
|
||||
|
||||
Active Mode
|
||||
------------
|
||||
|
||||
``amd_pstate=active``
|
||||
|
||||
This is the low-level firmware control mode which is implemented by ``amd_pstate_epp``
|
||||
driver with ``amd_pstate=active`` passed to the kernel in the command line.
|
||||
In this mode, ``amd_pstate_epp`` driver provides a hint to the hardware if software
|
||||
wants to bias toward performance (0x0) or energy efficiency (0xff) to the CPPC firmware.
|
||||
then CPPC power algorithm will calculate the runtime workload and adjust the realtime
|
||||
cores frequency according to the power supply and thermal, core voltage and some other
|
||||
hardware conditions.
|
||||
|
||||
Passive Mode
|
||||
------------
|
||||
@ -298,6 +339,35 @@ processor must provide at least nominal performance requested and go higher if c
|
||||
operating conditions allow.
|
||||
|
||||
|
||||
User Space Interface in ``sysfs`` - General
|
||||
===========================================
|
||||
|
||||
Global Attributes
|
||||
-----------------
|
||||
|
||||
``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
|
||||
control its functionality at the system level. They are located in the
|
||||
``/sys/devices/system/cpu/amd-pstate/`` directory and affect all CPUs.
|
||||
|
||||
``status``
|
||||
Operation mode of the driver: "active", "passive" or "disable".
|
||||
|
||||
"active"
|
||||
The driver is functional and in the ``active mode``
|
||||
|
||||
"passive"
|
||||
The driver is functional and in the ``passive mode``
|
||||
|
||||
"disable"
|
||||
The driver is unregistered and not functional now.
|
||||
|
||||
This attribute can be written to in order to change the driver's
|
||||
operation mode or to unregister it. The string written to it must be
|
||||
one of the possible values of it and, if successful, writing one of
|
||||
these values to the sysfs file will cause the driver to switch over
|
||||
to the operation mode represented by that string - or to be
|
||||
unregistered in the "disable" case.
|
||||
|
||||
``cpupower`` tool support for ``amd-pstate``
|
||||
===============================================
|
||||
|
||||
|
8
Documentation/admin-guide/thermal/index.rst
Normal file
8
Documentation/admin-guide/thermal/index.rst
Normal file
@ -0,0 +1,8 @@
|
||||
=================
|
||||
Thermal Subsystem
|
||||
=================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
intel_powerclamp
|
@ -26,6 +26,8 @@ By:
|
||||
- Generic Thermal Layer (sysfs)
|
||||
- Kernel APIs (TBD)
|
||||
|
||||
(*) Module Parameters
|
||||
|
||||
INTRODUCTION
|
||||
============
|
||||
|
||||
@ -153,13 +155,15 @@ b) determine the amount of compensation needed at each target ratio
|
||||
Compensation to each target ratio consists of two parts:
|
||||
|
||||
a) steady state error compensation
|
||||
This is to offset the error occurring when the system can
|
||||
enter idle without extra wakeups (such as external interrupts).
|
||||
|
||||
This is to offset the error occurring when the system can
|
||||
enter idle without extra wakeups (such as external interrupts).
|
||||
|
||||
b) dynamic error compensation
|
||||
When an excessive amount of wakeups occurs during idle, an
|
||||
additional idle ratio can be added to quiet interrupts, by
|
||||
slowing down CPU activities.
|
||||
|
||||
When an excessive amount of wakeups occurs during idle, an
|
||||
additional idle ratio can be added to quiet interrupts, by
|
||||
slowing down CPU activities.
|
||||
|
||||
A debugfs file is provided for the user to examine compensation
|
||||
progress and results, such as on a Westmere system::
|
||||
@ -281,6 +285,7 @@ cur_state returns value -1 instead of 0 which is to avoid confusing
|
||||
100% busy state with the disabled state.
|
||||
|
||||
Example usage:
|
||||
|
||||
- To inject 25% idle time::
|
||||
|
||||
$ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state
|
||||
@ -318,3 +323,23 @@ device, a PID based userspace thermal controller can manage to
|
||||
control CPU temperature effectively, when no other thermal influence
|
||||
is added. For example, a UltraBook user can compile the kernel under
|
||||
certain temperature (below most active trip points).
|
||||
|
||||
Module Parameters
|
||||
=================
|
||||
|
||||
``cpumask`` (RW)
|
||||
A bit mask of CPUs to inject idle. The format of the bitmask is same as
|
||||
used in other subsystems like in /proc/irq/\*/smp_affinity. The mask is
|
||||
comma separated 32 bit groups. Each CPU is one bit. For example for a 256
|
||||
CPU system the full mask is:
|
||||
ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff
|
||||
|
||||
The rightmost mask is for CPU 0-32.
|
||||
|
||||
``max_idle`` (RW)
|
||||
Maximum injected idle time to the total CPU time ratio in percent range
|
||||
from 1 to 100. Even if the cooling device max_state is always 100 (100%),
|
||||
this parameter allows to add a max idle percent limit. The default is 50,
|
||||
to match the current implementation of powerclamp driver. Also doesn't
|
||||
allow value more than 75, if the cpumask includes every CPU present in
|
||||
the system.
|
@ -64,7 +64,6 @@ SoC-specific documents
|
||||
sunxi
|
||||
|
||||
samsung/index
|
||||
samsung-s3c24xx/index
|
||||
|
||||
sunxi/clocks
|
||||
|
||||
|
@ -1,77 +0,0 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
=======================
|
||||
S3C24XX CPUfreq support
|
||||
=======================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The S3C24XX series support a number of power saving systems, such as
|
||||
the ability to change the core, memory and peripheral operating
|
||||
frequencies. The core control is exported via the CPUFreq driver
|
||||
which has a number of different manual or automatic controls over the
|
||||
rate the core is running at.
|
||||
|
||||
There are two forms of the driver depending on the specific CPU and
|
||||
how the clocks are arranged. The first implementation used as single
|
||||
PLL to feed the ARM, memory and peripherals via a series of dividers
|
||||
and muxes and this is the implementation that is documented here. A
|
||||
newer version where there is a separate PLL and clock divider for the
|
||||
ARM core is available as a separate driver.
|
||||
|
||||
|
||||
Layout
|
||||
------
|
||||
|
||||
The code core manages the CPU specific drivers, any data that they
|
||||
need to register and the interface to the generic drivers/cpufreq
|
||||
system. Each CPU registers a driver to control the PLL, clock dividers
|
||||
and anything else associated with it. Any board that wants to use this
|
||||
framework needs to supply at least basic details of what is required.
|
||||
|
||||
The core registers with drivers/cpufreq at init time if all the data
|
||||
necessary has been supplied.
|
||||
|
||||
|
||||
CPU support
|
||||
-----------
|
||||
|
||||
The support for each CPU depends on the facilities provided by the
|
||||
SoC and the driver as each device has different PLL and clock chains
|
||||
associated with it.
|
||||
|
||||
|
||||
Slow Mode
|
||||
---------
|
||||
|
||||
The SLOW mode where the PLL is turned off altogether and the
|
||||
system is fed by the external crystal input is currently not
|
||||
supported.
|
||||
|
||||
|
||||
sysfs
|
||||
-----
|
||||
|
||||
The core code exports extra information via sysfs in the directory
|
||||
devices/system/cpu/cpu0/arch-freq.
|
||||
|
||||
|
||||
Board Support
|
||||
-------------
|
||||
|
||||
Each board that wants to use the cpufreq code must register some basic
|
||||
information with the core driver to provide information about what the
|
||||
board requires and any restrictions being placed on it.
|
||||
|
||||
The board needs to supply information about whether it needs the IO bank
|
||||
timings changing, any maximum frequency limits and information about the
|
||||
SDRAM refresh rate.
|
||||
|
||||
|
||||
|
||||
|
||||
Document Author
|
||||
---------------
|
||||
|
||||
Ben Dooks, Copyright 2009 Simtec Electronics
|
@ -1,59 +0,0 @@
|
||||
===================================
|
||||
Simtec Electronics EB2410ITX (BAST)
|
||||
===================================
|
||||
|
||||
http://www.simtec.co.uk/products/EB2410ITX/
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The EB2410ITX is a S3C2410 based development board with a variety of
|
||||
peripherals and expansion connectors. This board is also known by
|
||||
the shortened name of Bast.
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
To set the default configuration, use `make bast_defconfig` which
|
||||
supports the commonly used features of this board.
|
||||
|
||||
|
||||
Support
|
||||
-------
|
||||
|
||||
Official support information can be found on the Simtec Electronics
|
||||
website, at the product page http://www.simtec.co.uk/products/EB2410ITX/
|
||||
|
||||
Useful links:
|
||||
|
||||
- Resources Page http://www.simtec.co.uk/products/EB2410ITX/resources.html
|
||||
|
||||
- Board FAQ at http://www.simtec.co.uk/products/EB2410ITX/faq.html
|
||||
|
||||
- Bootloader info http://www.simtec.co.uk/products/SWABLE/resources.html
|
||||
and FAQ http://www.simtec.co.uk/products/SWABLE/faq.html
|
||||
|
||||
|
||||
MTD
|
||||
---
|
||||
|
||||
The NAND and NOR support has been merged from the linux-mtd project.
|
||||
Any problems, see http://www.linux-mtd.infradead.org/ for more
|
||||
information or up-to-date versions of linux-mtd.
|
||||
|
||||
|
||||
IDE
|
||||
---
|
||||
|
||||
Both onboard IDE ports are supported, however there is no support for
|
||||
changing speed of devices, PIO Mode 4 capable drives should be used.
|
||||
|
||||
|
||||
Maintainers
|
||||
-----------
|
||||
|
||||
This board is maintained by Simtec Electronics.
|
||||
|
||||
|
||||
Copyright 2004 Ben Dooks, Simtec Electronics
|
@ -1,172 +0,0 @@
|
||||
====================
|
||||
S3C24XX GPIO Control
|
||||
====================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The s3c2410 kernel provides an interface to configure and
|
||||
manipulate the state of the GPIO pins, and find out other
|
||||
information about them.
|
||||
|
||||
There are a number of conditions attached to the configuration
|
||||
of the s3c2410 GPIO system, please read the Samsung provided
|
||||
data-sheet/users manual to find out the complete list.
|
||||
|
||||
See Documentation/arm/samsung/gpio.rst for the core implementation.
|
||||
|
||||
|
||||
GPIOLIB
|
||||
-------
|
||||
|
||||
With the event of the GPIOLIB in drivers/gpio, support for some
|
||||
of the GPIO functions such as reading and writing a pin will
|
||||
be removed in favour of this common access method.
|
||||
|
||||
Once all the extant drivers have been converted, the functions
|
||||
listed below will be removed (they may be marked as __deprecated
|
||||
in the near future).
|
||||
|
||||
The following functions now either have a `s3c_` specific variant
|
||||
or are merged into gpiolib. See the definitions in
|
||||
arch/arm/mach-s3c/gpio-cfg.h:
|
||||
|
||||
- s3c2410_gpio_setpin() gpio_set_value() or gpio_direction_output()
|
||||
- s3c2410_gpio_getpin() gpio_get_value() or gpio_direction_input()
|
||||
- s3c2410_gpio_getirq() gpio_to_irq()
|
||||
- s3c2410_gpio_cfgpin() s3c_gpio_cfgpin()
|
||||
- s3c2410_gpio_getcfg() s3c_gpio_getcfg()
|
||||
- s3c2410_gpio_pullup() s3c_gpio_setpull()
|
||||
|
||||
|
||||
GPIOLIB conversion
|
||||
------------------
|
||||
|
||||
If you need to convert your board or driver to use gpiolib from the phased
|
||||
out s3c2410 API, then here are some notes on the process.
|
||||
|
||||
1) If your board is exclusively using an GPIO, say to control peripheral
|
||||
power, then it will require to claim the gpio with gpio_request() before
|
||||
it can use it.
|
||||
|
||||
It is recommended to check the return value, with at least WARN_ON()
|
||||
during initialisation.
|
||||
|
||||
2) The s3c2410_gpio_cfgpin() can be directly replaced with s3c_gpio_cfgpin()
|
||||
as they have the same arguments, and can either take the pin specific
|
||||
values, or the more generic special-function-number arguments.
|
||||
|
||||
3) s3c2410_gpio_pullup() changes have the problem that while the
|
||||
s3c2410_gpio_pullup(x, 1) can be easily translated to the
|
||||
s3c_gpio_setpull(x, S3C_GPIO_PULL_NONE), the s3c2410_gpio_pullup(x, 0)
|
||||
are not so easy.
|
||||
|
||||
The s3c2410_gpio_pullup(x, 0) case enables the pull-up (or in the case
|
||||
of some of the devices, a pull-down) and as such the new API distinguishes
|
||||
between the UP and DOWN case. There is currently no 'just turn on' setting
|
||||
which may be required if this becomes a problem.
|
||||
|
||||
4) s3c2410_gpio_setpin() can be replaced by gpio_set_value(), the old call
|
||||
does not implicitly configure the relevant gpio to output. The gpio
|
||||
direction should be changed before using gpio_set_value().
|
||||
|
||||
5) s3c2410_gpio_getpin() is replaceable by gpio_get_value() if the pin
|
||||
has been set to input. It is currently unknown what the behaviour is
|
||||
when using gpio_get_value() on an output pin (s3c2410_gpio_getpin
|
||||
would return the value the pin is supposed to be outputting).
|
||||
|
||||
6) s3c2410_gpio_getirq() should be directly replaceable with the
|
||||
gpio_to_irq() call.
|
||||
|
||||
The s3c2410_gpio and `gpio_` calls have always operated on the same gpio
|
||||
numberspace, so there is no problem with converting the gpio numbering
|
||||
between the calls.
|
||||
|
||||
|
||||
Headers
|
||||
-------
|
||||
|
||||
See arch/arm/mach-s3c/regs-gpio-s3c24xx.h for the list
|
||||
of GPIO pins, and the configuration values for them. This
|
||||
is included by using #include <mach/regs-gpio.h>
|
||||
|
||||
|
||||
PIN Numbers
|
||||
-----------
|
||||
|
||||
Each pin has an unique number associated with it in regs-gpio.h,
|
||||
e.g. S3C2410_GPA(0) or S3C2410_GPF(1). These defines are used to tell
|
||||
the GPIO functions which pin is to be used.
|
||||
|
||||
With the conversion to gpiolib, there is no longer a direct conversion
|
||||
from gpio pin number to register base address as in earlier kernels. This
|
||||
is due to the number space required for newer SoCs where the later
|
||||
GPIOs are not contiguous.
|
||||
|
||||
|
||||
Configuring a pin
|
||||
-----------------
|
||||
|
||||
The following function allows the configuration of a given pin to
|
||||
be changed.
|
||||
|
||||
void s3c_gpio_cfgpin(unsigned int pin, unsigned int function);
|
||||
|
||||
e.g.:
|
||||
|
||||
s3c_gpio_cfgpin(S3C2410_GPA(0), S3C_GPIO_SFN(1));
|
||||
s3c_gpio_cfgpin(S3C2410_GPE(8), S3C_GPIO_SFN(2));
|
||||
|
||||
which would turn GPA(0) into the lowest Address line A0, and set
|
||||
GPE(8) to be connected to the SDIO/MMC controller's SDDAT1 line.
|
||||
|
||||
|
||||
Reading the current configuration
|
||||
---------------------------------
|
||||
|
||||
The current configuration of a pin can be read by using standard
|
||||
gpiolib function:
|
||||
|
||||
s3c_gpio_getcfg(unsigned int pin);
|
||||
|
||||
The return value will be from the same set of values which can be
|
||||
passed to s3c_gpio_cfgpin().
|
||||
|
||||
|
||||
Configuring a pull-up resistor
|
||||
------------------------------
|
||||
|
||||
A large proportion of the GPIO pins on the S3C2410 can have weak
|
||||
pull-up resistors enabled. This can be configured by the following
|
||||
function:
|
||||
|
||||
void s3c_gpio_setpull(unsigned int pin, unsigned int to);
|
||||
|
||||
Where the to value is S3C_GPIO_PULL_NONE to set the pull-up off,
|
||||
and S3C_GPIO_PULL_UP to enable the specified pull-up. Any other
|
||||
values are currently undefined.
|
||||
|
||||
|
||||
Getting and setting the state of a PIN
|
||||
--------------------------------------
|
||||
|
||||
These calls are now implemented by the relevant gpiolib calls, convert
|
||||
your board or driver to use gpiolib.
|
||||
|
||||
|
||||
Getting the IRQ number associated with a PIN
|
||||
--------------------------------------------
|
||||
|
||||
A standard gpiolib function can map the given pin number to an IRQ
|
||||
number to pass to the IRQ system.
|
||||
|
||||
int gpio_to_irq(unsigned int pin);
|
||||
|
||||
Note, not all pins have an IRQ.
|
||||
|
||||
|
||||
Author
|
||||
-------
|
||||
|
||||
Ben Dooks, 03 October 2004
|
||||
Copyright 2004 Ben Dooks, Simtec Electronics
|
@ -1,41 +0,0 @@
|
||||
=============
|
||||
HP IPAQ H1940
|
||||
=============
|
||||
|
||||
http://www.handhelds.org/projects/h1940.html
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The HP H1940 is a S3C2410 based handheld device, with
|
||||
bluetooth connectivity.
|
||||
|
||||
|
||||
Support
|
||||
-------
|
||||
|
||||
A variety of information is available
|
||||
|
||||
handhelds.org project page:
|
||||
|
||||
http://www.handhelds.org/projects/h1940.html
|
||||
|
||||
handhelds.org wiki page:
|
||||
|
||||
http://handhelds.org/moin/moin.cgi/HpIpaqH1940
|
||||
|
||||
Herbert Pötzl pages:
|
||||
|
||||
http://vserver.13thfloor.at/H1940/
|
||||
|
||||
|
||||
Maintainers
|
||||
-----------
|
||||
|
||||
This project is being maintained and developed by a variety
|
||||
of people, including Ben Dooks, Arnaud Patard, and Herbert Pötzl.
|
||||
|
||||
Thanks to the many others who have also provided support.
|
||||
|
||||
|
||||
(c) 2005 Ben Dooks
|
@ -1,20 +0,0 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==========================
|
||||
Samsung S3C24XX SoC Family
|
||||
==========================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
h1940
|
||||
gpio
|
||||
cpufreq
|
||||
suspend
|
||||
usb-host
|
||||
s3c2412
|
||||
eb2410itx
|
||||
nand
|
||||
smdk2440
|
||||
s3c2413
|
||||
overview
|
@ -1,30 +0,0 @@
|
||||
====================
|
||||
S3C24XX NAND Support
|
||||
====================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
Small Page NAND
|
||||
---------------
|
||||
|
||||
The driver uses a 512 byte (1 page) ECC code for this setup. The
|
||||
ECC code is not directly compatible with the default kernel ECC
|
||||
code, so the driver enforces its own OOB layout and ECC parameters
|
||||
|
||||
Large Page NAND
|
||||
---------------
|
||||
|
||||
The driver is capable of handling NAND flash with a 2KiB page
|
||||
size, with support for hardware ECC generation and correction.
|
||||
|
||||
Unlike the 512byte page mode, the driver generates ECC data for
|
||||
each 256 byte block in an 2KiB page. This means that more than
|
||||
one error in a page can be rectified. It also means that the
|
||||
OOB layout remains the default kernel layout for these flashes.
|
||||
|
||||
|
||||
Document Author
|
||||
---------------
|
||||
|
||||
Ben Dooks, Copyright 2007 Simtec Electronics
|
@ -1,311 +0,0 @@
|
||||
==========================
|
||||
S3C24XX ARM Linux Overview
|
||||
==========================
|
||||
|
||||
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported
|
||||
by the 's3c2410' architecture of ARM Linux. Currently the S3C2410,
|
||||
S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 devices
|
||||
are supported.
|
||||
|
||||
Support for the S3C2400 and S3C24A0 series was never completed and the
|
||||
corresponding code has been removed after a while. If someone wishes to
|
||||
revive this effort, partial support can be retrieved from earlier Linux
|
||||
versions.
|
||||
|
||||
The S3C2416 and S3C2450 devices are very similar and S3C2450 support is
|
||||
included under the arch/arm/mach-s3c directory. Note, while core
|
||||
support for these SoCs is in, work on some of the extra peripherals
|
||||
and extra interrupts is still ongoing.
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
A generic S3C2410 configuration is provided, and can be used as the
|
||||
default by `make s3c2410_defconfig`. This configuration has support
|
||||
for all the machines, and the commonly used features on them.
|
||||
|
||||
Certain machines may have their own default configurations as well,
|
||||
please check the machine specific documentation.
|
||||
|
||||
|
||||
Layout
|
||||
------
|
||||
|
||||
The core support files, register, kernel and paltform data are located in the
|
||||
platform code contained in arch/arm/mach-s3c with headers in
|
||||
arch/arm/mach-s3c/include
|
||||
|
||||
arch/arm/mach-s3c:
|
||||
|
||||
Files in here are either common to all the s3c24xx family,
|
||||
or are common to only some of them with names to indicate this
|
||||
status. The files that are not common to all are generally named
|
||||
with the initial cpu they support in the series to ensure a short
|
||||
name without any possibility of confusion with newer devices.
|
||||
|
||||
As an example, initially s3c244x would cover s3c2440 and s3c2442, but
|
||||
with the s3c2443 which does not share many of the same drivers in
|
||||
this directory, the name becomes invalid. We stick to s3c2440-<x>
|
||||
to indicate a driver that is s3c2440 and s3c2442 compatible.
|
||||
|
||||
This does mean that to find the status of any given SoC, a number
|
||||
of directories may need to be searched.
|
||||
|
||||
|
||||
Machines
|
||||
--------
|
||||
|
||||
The currently supported machines are as follows:
|
||||
|
||||
Simtec Electronics EB2410ITX (BAST)
|
||||
|
||||
A general purpose development board, see EB2410ITX.txt for further
|
||||
details
|
||||
|
||||
Simtec Electronics IM2440D20 (Osiris)
|
||||
|
||||
CPU Module from Simtec Electronics, with a S3C2440A CPU, nand flash
|
||||
and a PCMCIA controller.
|
||||
|
||||
Samsung SMDK2410
|
||||
|
||||
Samsung's own development board, geared for PDA work.
|
||||
|
||||
Samsung/Aiji SMDK2412
|
||||
|
||||
The S3C2412 version of the SMDK2440.
|
||||
|
||||
Samsung/Aiji SMDK2413
|
||||
|
||||
The S3C2412 version of the SMDK2440.
|
||||
|
||||
Samsung/Meritech SMDK2440
|
||||
|
||||
The S3C2440 compatible version of the SMDK2440, which has the
|
||||
option of an S3C2440 or S3C2442 CPU module.
|
||||
|
||||
Thorcom VR1000
|
||||
|
||||
Custom embedded board
|
||||
|
||||
HP IPAQ 1940
|
||||
|
||||
Handheld (IPAQ), available in several varieties
|
||||
|
||||
HP iPAQ rx3715
|
||||
|
||||
S3C2440 based IPAQ, with a number of variations depending on
|
||||
features shipped.
|
||||
|
||||
Acer N30
|
||||
|
||||
A S3C2410 based PDA from Acer. There is a Wiki page at
|
||||
http://handhelds.org/moin/moin.cgi/AcerN30Documentation .
|
||||
|
||||
AML M5900
|
||||
|
||||
American Microsystems' M5900
|
||||
|
||||
Nex Vision Nexcoder
|
||||
Nex Vision Otom
|
||||
|
||||
Two machines by Nex Vision
|
||||
|
||||
|
||||
Adding New Machines
|
||||
-------------------
|
||||
|
||||
The architecture has been designed to support as many machines as can
|
||||
be configured for it in one kernel build, and any future additions
|
||||
should keep this in mind before altering items outside of their own
|
||||
machine files.
|
||||
|
||||
Machine definitions should be kept in arch/arm/mach-s3c,
|
||||
and there are a number of examples that can be looked at.
|
||||
|
||||
Read the kernel patch submission policies as well as the
|
||||
Documentation/arm directory before submitting patches. The
|
||||
ARM kernel series is managed by Russell King, and has a patch system
|
||||
located at http://www.arm.linux.org.uk/developer/patches/
|
||||
as well as mailing lists that can be found from the same site.
|
||||
|
||||
As a courtesy, please notify <ben-linux@fluff.org> of any new
|
||||
machines or other modifications.
|
||||
|
||||
Any large scale modifications, or new drivers should be discussed
|
||||
on the ARM kernel mailing list (linux-arm-kernel) before being
|
||||
attempted. See http://www.arm.linux.org.uk/mailinglists/ for the
|
||||
mailing list information.
|
||||
|
||||
|
||||
I2C
|
||||
---
|
||||
|
||||
The hardware I2C core in the CPU is supported in single master
|
||||
mode, and can be configured via platform data.
|
||||
|
||||
|
||||
RTC
|
||||
---
|
||||
|
||||
Support for the onboard RTC unit, including alarm function.
|
||||
|
||||
This has recently been upgraded to use the new RTC core,
|
||||
and the module has been renamed to rtc-s3c to fit in with
|
||||
the new rtc naming scheme.
|
||||
|
||||
|
||||
Watchdog
|
||||
--------
|
||||
|
||||
The onchip watchdog is available via the standard watchdog
|
||||
interface.
|
||||
|
||||
|
||||
NAND
|
||||
----
|
||||
|
||||
The current kernels now have support for the s3c2410 NAND
|
||||
controller. If there are any problems the latest linux-mtd
|
||||
code can be found from http://www.linux-mtd.infradead.org/
|
||||
|
||||
For more information see Documentation/arm/samsung-s3c24xx/nand.rst
|
||||
|
||||
|
||||
SD/MMC
|
||||
------
|
||||
|
||||
The SD/MMC hardware pre S3C2443 is supported in the current
|
||||
kernel, the driver is drivers/mmc/host/s3cmci.c and supports
|
||||
1 and 4 bit SD or MMC cards.
|
||||
|
||||
The SDIO behaviour of this driver has not been fully tested. There is no
|
||||
current support for hardware SDIO interrupts.
|
||||
|
||||
|
||||
Serial
|
||||
------
|
||||
|
||||
The s3c2410 serial driver provides support for the internal
|
||||
serial ports. These devices appear as /dev/ttySAC0 through 3.
|
||||
|
||||
To create device nodes for these, use the following commands
|
||||
|
||||
mknod ttySAC0 c 204 64
|
||||
mknod ttySAC1 c 204 65
|
||||
mknod ttySAC2 c 204 66
|
||||
|
||||
|
||||
GPIO
|
||||
----
|
||||
|
||||
The core contains support for manipulating the GPIO, see the
|
||||
documentation in GPIO.txt in the same directory as this file.
|
||||
|
||||
Newer kernels carry GPIOLIB, and support is being moved towards
|
||||
this with some of the older support in line to be removed.
|
||||
|
||||
As of v2.6.34, the move towards using gpiolib support is almost
|
||||
complete, and very little of the old calls are left.
|
||||
|
||||
See Documentation/arm/samsung-s3c24xx/gpio.rst for the S3C24XX specific
|
||||
support and Documentation/arm/samsung/gpio.rst for the core Samsung
|
||||
implementation.
|
||||
|
||||
|
||||
Clock Management
|
||||
----------------
|
||||
|
||||
The core provides the interface defined in the header file
|
||||
include/asm-arm/hardware/clock.h, to allow control over the
|
||||
various clock units
|
||||
|
||||
|
||||
Suspend to RAM
|
||||
--------------
|
||||
|
||||
For boards that provide support for suspend to RAM, the
|
||||
system can be placed into low power suspend.
|
||||
|
||||
See Suspend.txt for more information.
|
||||
|
||||
|
||||
SPI
|
||||
---
|
||||
|
||||
SPI drivers are available for both the in-built hardware
|
||||
(although there is no DMA support yet) and a generic
|
||||
GPIO based solution.
|
||||
|
||||
|
||||
LEDs
|
||||
----
|
||||
|
||||
There is support for GPIO based LEDs via a platform driver
|
||||
in the LED subsystem.
|
||||
|
||||
|
||||
Platform Data
|
||||
-------------
|
||||
|
||||
Whenever a device has platform specific data that is specified
|
||||
on a per-machine basis, care should be taken to ensure the
|
||||
following:
|
||||
|
||||
1) that default data is not left in the device to confuse the
|
||||
driver if a machine does not set it at startup
|
||||
|
||||
2) the data should (if possible) be marked as __initdata,
|
||||
to ensure that the data is thrown away if the machine is
|
||||
not the one currently in use.
|
||||
|
||||
The best way of doing this is to make a function that
|
||||
kmalloc()s an area of memory, and copies the __initdata
|
||||
and then sets the relevant device's platform data. Making
|
||||
the function `__init` takes care of ensuring it is discarded
|
||||
with the rest of the initialisation code::
|
||||
|
||||
static __init void s3c24xx_xxx_set_platdata(struct xxx_data *pd)
|
||||
{
|
||||
struct s3c2410_xxx_mach_info *npd;
|
||||
|
||||
npd = kmalloc(sizeof(struct s3c2410_xxx_mach_info), GFP_KERNEL);
|
||||
if (npd) {
|
||||
memcpy(npd, pd, sizeof(struct s3c2410_xxx_mach_info));
|
||||
s3c_device_xxx.dev.platform_data = npd;
|
||||
} else {
|
||||
printk(KERN_ERR "no memory for xxx platform data\n");
|
||||
}
|
||||
}
|
||||
|
||||
Note, since the code is marked as __init, it should not be
|
||||
exported outside arch/arm/mach-s3c/, or exported to
|
||||
modules via EXPORT_SYMBOL() and related functions.
|
||||
|
||||
|
||||
Port Contributors
|
||||
-----------------
|
||||
|
||||
Ben Dooks (BJD)
|
||||
Vincent Sanders
|
||||
Herbert Potzl
|
||||
Arnaud Patard (RTP)
|
||||
Roc Wu
|
||||
Klaus Fetscher
|
||||
Dimitry Andric
|
||||
Shannon Holland
|
||||
Guillaume Gourat (NexVision)
|
||||
Christer Weinigel (wingel) (Acer N30)
|
||||
Lucas Correia Villa Real (S3C2400 port)
|
||||
|
||||
|
||||
Document Author
|
||||
---------------
|
||||
|
||||
Ben Dooks, Copyright 2004-2006 Simtec Electronics
|
@ -1,121 +0,0 @@
|
||||
==========================
|
||||
S3C2412 ARM Linux Overview
|
||||
==========================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The S3C2412 is part of the S3C24XX range of ARM9 System-on-Chip CPUs
|
||||
from Samsung. This part has an ARM926-EJS core, capable of running up
|
||||
to 266MHz (see data-sheet for more information)
|
||||
|
||||
|
||||
Clock
|
||||
-----
|
||||
|
||||
The core clock code provides a set of clocks to the drivers, and allows
|
||||
for source selection and a number of other features.
|
||||
|
||||
|
||||
Power
|
||||
-----
|
||||
|
||||
No support for suspend/resume to RAM in the current system.
|
||||
|
||||
|
||||
DMA
|
||||
---
|
||||
|
||||
No current support for DMA.
|
||||
|
||||
|
||||
GPIO
|
||||
----
|
||||
|
||||
There is support for setting the GPIO to input/output/special function
|
||||
and reading or writing to them.
|
||||
|
||||
|
||||
UART
|
||||
----
|
||||
|
||||
The UART hardware is similar to the S3C2440, and is supported by the
|
||||
s3c2410 driver in the drivers/serial directory.
|
||||
|
||||
|
||||
NAND
|
||||
----
|
||||
|
||||
The NAND hardware is similar to the S3C2440, and is supported by the
|
||||
s3c2410 driver in the drivers/mtd/nand/raw directory.
|
||||
|
||||
|
||||
USB Host
|
||||
--------
|
||||
|
||||
The USB hardware is similar to the S3C2410, with extended clock source
|
||||
control. The OHCI portion is supported by the ohci-s3c2410 driver, and
|
||||
the clock control selection is supported by the core clock code.
|
||||
|
||||
|
||||
USB Device
|
||||
----------
|
||||
|
||||
No current support in the kernel
|
||||
|
||||
|
||||
IRQs
|
||||
----
|
||||
|
||||
All the standard, and external interrupt sources are supported. The
|
||||
extra sub-sources are not yet supported.
|
||||
|
||||
|
||||
RTC
|
||||
---
|
||||
|
||||
The RTC hardware is similar to the S3C2410, and is supported by the
|
||||
s3c2410-rtc driver.
|
||||
|
||||
|
||||
Watchdog
|
||||
--------
|
||||
|
||||
The watchdog hardware is the same as the S3C2410, and is supported by
|
||||
the s3c2410_wdt driver.
|
||||
|
||||
|
||||
MMC/SD/SDIO
|
||||
-----------
|
||||
|
||||
No current support for the MMC/SD/SDIO block.
|
||||
|
||||
IIC
|
||||
---
|
||||
|
||||
The IIC hardware is the same as the S3C2410, and is supported by the
|
||||
i2c-s3c24xx driver.
|
||||
|
||||
|
||||
IIS
|
||||
---
|
||||
|
||||
No current support for the IIS interface.
|
||||
|
||||
|
||||
SPI
|
||||
---
|
||||
|
||||
No current support for the SPI interfaces.
|
||||
|
||||
|
||||
ATA
|
||||
---
|
||||
|
||||
No current support for the on-board ATA block.
|
||||
|
||||
|
||||
Document Author
|
||||
---------------
|
||||
|
||||
Ben Dooks, Copyright 2006 Simtec Electronics
|
@ -1,22 +0,0 @@
|
||||
==========================
|
||||
S3C2413 ARM Linux Overview
|
||||
==========================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The S3C2413 is an extended version of the S3C2412, with an camera
|
||||
interface and mobile DDR memory support. See the S3C2412 support
|
||||
documentation for more information.
|
||||
|
||||
|
||||
Camera Interface
|
||||
----------------
|
||||
|
||||
This block is currently not supported.
|
||||
|
||||
|
||||
Document Author
|
||||
---------------
|
||||
|
||||
Ben Dooks, Copyright 2006 Simtec Electronics
|
@ -1,57 +0,0 @@
|
||||
=========================
|
||||
Samsung/Meritech SMDK2440
|
||||
=========================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The SMDK2440 is a two part evaluation board for the Samsung S3C2440
|
||||
processor. It includes support for LCD, SmartMedia, Audio, SD and
|
||||
10MBit Ethernet, and expansion headers for various signals, including
|
||||
the camera and unused GPIO.
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
To set the default configuration, use `make smdk2440_defconfig` which
|
||||
will configure the common features of this board, or use
|
||||
`make s3c2410_config` to include support for all s3c2410/s3c2440 machines
|
||||
|
||||
|
||||
Support
|
||||
-------
|
||||
|
||||
Ben Dooks' SMDK2440 site at http://www.fluff.org/ben/smdk2440/ which
|
||||
includes linux based USB download tools.
|
||||
|
||||
Some of the h1940 patches that can be found from the H1940 project
|
||||
site at http://www.handhelds.org/projects/h1940.html can also be
|
||||
applied to this board.
|
||||
|
||||
|
||||
Peripherals
|
||||
-----------
|
||||
|
||||
There is no current support for any of the extra peripherals on the
|
||||
base-board itself.
|
||||
|
||||
|
||||
MTD
|
||||
---
|
||||
|
||||
The NAND flash should be supported by the in kernel MTD NAND support,
|
||||
NOR flash will be added later.
|
||||
|
||||
|
||||
Maintainers
|
||||
-----------
|
||||
|
||||
This board is being maintained by Ben Dooks, for more info, see
|
||||
http://www.fluff.org/ben/smdk2440/
|
||||
|
||||
Many thanks to Dimitry Andric of TomTom for the loan of the SMDK2440,
|
||||
and to Simtec Electronics for allowing me time to work on this.
|
||||
|
||||
|
||||
(c) 2004 Ben Dooks
|
@ -1,137 +0,0 @@
|
||||
=======================
|
||||
S3C24XX Suspend Support
|
||||
=======================
|
||||
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The S3C24XX supports a low-power suspend mode, where the SDRAM is kept
|
||||
in Self-Refresh mode, and all but the essential peripheral blocks are
|
||||
powered down. For more information on how this works, please look
|
||||
at the relevant CPU datasheet from Samsung.
|
||||
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
1) A bootloader that can support the necessary resume operation
|
||||
|
||||
2) Support for at least 1 source for resume
|
||||
|
||||
3) CONFIG_PM enabled in the kernel
|
||||
|
||||
4) Any peripherals that are going to be powered down at the same
|
||||
time require suspend/resume support.
|
||||
|
||||
|
||||
Resuming
|
||||
--------
|
||||
|
||||
The S3C2410 user manual defines the process of sending the CPU to
|
||||
sleep and how it resumes. The default behaviour of the Linux code
|
||||
is to set the GSTATUS3 register to the physical address of the
|
||||
code to resume Linux operation.
|
||||
|
||||
GSTATUS4 is currently left alone by the sleep code, and is free to
|
||||
use for any other purposes (for example, the EB2410ITX uses this to
|
||||
save memory configuration in).
|
||||
|
||||
|
||||
Machine Support
|
||||
---------------
|
||||
|
||||
The machine specific functions must call the s3c_pm_init() function
|
||||
to say that its bootloader is capable of resuming. This can be as
|
||||
simple as adding the following to the machine's definition:
|
||||
|
||||
INITMACHINE(s3c_pm_init)
|
||||
|
||||
A board can do its own setup before calling s3c_pm_init, if it
|
||||
needs to setup anything else for power management support.
|
||||
|
||||
There is currently no support for over-riding the default method of
|
||||
saving the resume address, if your board requires it, then contact
|
||||
the maintainer and discuss what is required.
|
||||
|
||||
Note, the original method of adding an late_initcall() is wrong,
|
||||
and will end up initialising all compiled machines' pm init!
|
||||
|
||||
The following is an example of code used for testing wakeup from
|
||||
an falling edge on IRQ_EINT0::
|
||||
|
||||
|
||||
static irqreturn_t button_irq(int irq, void *pw)
|
||||
{
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
statuc void __init machine_init(void)
|
||||
{
|
||||
...
|
||||
|
||||
request_irq(IRQ_EINT0, button_irq, IRQF_TRIGGER_FALLING,
|
||||
"button-irq-eint0", NULL);
|
||||
|
||||
enable_irq_wake(IRQ_EINT0);
|
||||
|
||||
s3c_pm_init();
|
||||
}
|
||||
|
||||
|
||||
Debugging
|
||||
---------
|
||||
|
||||
There are several important things to remember when using PM suspend:
|
||||
|
||||
1) The uart drivers will disable the clocks to the UART blocks when
|
||||
suspending, which means that use of printascii() or similar direct
|
||||
access to the UARTs will cause the debug to stop.
|
||||
|
||||
2) While the pm code itself will attempt to re-enable the UART clocks,
|
||||
care should be taken that any external clock sources that the UARTs
|
||||
rely on are still enabled at that point.
|
||||
|
||||
3) If any debugging is placed in the resume path, then it must have the
|
||||
relevant clocks and peripherals setup before use (ie, bootloader).
|
||||
|
||||
For example, if you transmit a character from the UART, the baud
|
||||
rate and uart controls must be setup beforehand.
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
The S3C2410 specific configuration in `System Type` defines various
|
||||
aspects of how the S3C2410 suspend and resume support is configured
|
||||
|
||||
`S3C2410 PM Suspend debug`
|
||||
|
||||
This option prints messages to the serial console before and after
|
||||
the actual suspend, giving detailed information on what is
|
||||
happening
|
||||
|
||||
|
||||
`S3C2410 PM Suspend Memory CRC`
|
||||
|
||||
Allows the entire memory to be checksummed before and after the
|
||||
suspend to see if there has been any corruption of the contents.
|
||||
|
||||
Note, the time to calculate the CRC is dependent on the CPU speed
|
||||
and the size of memory. For an 64Mbyte RAM area on an 200MHz
|
||||
S3C2410, this can take approximately 4 seconds to complete.
|
||||
|
||||
This support requires the CRC32 function to be enabled.
|
||||
|
||||
|
||||
`S3C2410 PM Suspend CRC Chunksize (KiB)`
|
||||
|
||||
Defines the size of memory each CRC chunk covers. A smaller value
|
||||
will mean that the CRC data block will take more memory, but will
|
||||
identify any faults with better precision
|
||||
|
||||
|
||||
Document Author
|
||||
---------------
|
||||
|
||||
Ben Dooks, Copyright 2004 Simtec Electronics
|
@ -1,91 +0,0 @@
|
||||
========================
|
||||
S3C24XX USB Host support
|
||||
========================
|
||||
|
||||
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
This document details the S3C2410/S3C2440 in-built OHCI USB host support.
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
Enable at least the following kernel options:
|
||||
|
||||
menuconfig::
|
||||
|
||||
Device Drivers --->
|
||||
USB support --->
|
||||
<*> Support for Host-side USB
|
||||
<*> OHCI HCD support
|
||||
|
||||
|
||||
.config:
|
||||
|
||||
- CONFIG_USB
|
||||
- CONFIG_USB_OHCI_HCD
|
||||
|
||||
|
||||
Once these options are configured, the standard set of USB device
|
||||
drivers can be configured and used.
|
||||
|
||||
|
||||
Board Support
|
||||
-------------
|
||||
|
||||
The driver attaches to a platform device, which will need to be
|
||||
added by the board specific support file in arch/arm/mach-s3c,
|
||||
such as mach-bast.c or mach-smdk2410.c
|
||||
|
||||
The platform device's platform_data field is only needed if the
|
||||
board implements extra power control or over-current monitoring.
|
||||
|
||||
The OHCI driver does not ensure the state of the S3C2410's MISCCTRL
|
||||
register, so if both ports are to be used for the host, then it is
|
||||
the board support file's responsibility to ensure that the second
|
||||
port is configured to be connected to the OHCI core.
|
||||
|
||||
|
||||
Platform Data
|
||||
-------------
|
||||
|
||||
See include/linux/platform_data/usb-ohci-s3c2410.h for the
|
||||
descriptions of the platform device data. An implementation
|
||||
can be found in arch/arm/mach-s3c/simtec-usb.c .
|
||||
|
||||
The `struct s3c2410_hcd_info` contains a pair of functions
|
||||
that get called to enable over-current detection, and to
|
||||
control the port power status.
|
||||
|
||||
The ports are numbered 0 and 1.
|
||||
|
||||
power_control:
|
||||
Called to enable or disable the power on the port.
|
||||
|
||||
enable_oc:
|
||||
Called to enable or disable the over-current monitoring.
|
||||
This should claim or release the resources being used to
|
||||
check the power condition on the port, such as an IRQ.
|
||||
|
||||
report_oc:
|
||||
The OHCI driver fills this field in for the over-current code
|
||||
to call when there is a change to the over-current state on
|
||||
an port. The ports argument is a bitmask of 1 bit per port,
|
||||
with bit X being 1 for an over-current on port X.
|
||||
|
||||
The function s3c2410_usb_report_oc() has been provided to
|
||||
ensure this is called correctly.
|
||||
|
||||
port[x]:
|
||||
This is struct describes each port, 0 or 1. The platform driver
|
||||
should set the flags field of each port to S3C_HCDFLG_USED if
|
||||
the port is enabled.
|
||||
|
||||
|
||||
|
||||
Document Author
|
||||
---------------
|
||||
|
||||
Ben Dooks, Copyright 2005 Simtec Electronics
|
@ -9,14 +9,6 @@ This outlines the Samsung GPIO implementation and the architecture
|
||||
specific calls provided alongside the drivers/gpio core.
|
||||
|
||||
|
||||
S3C24XX (Legacy)
|
||||
----------------
|
||||
|
||||
See Documentation/arm/samsung-s3c24xx/gpio.rst for more information
|
||||
about these devices. Their implementation has been brought into line
|
||||
with the core samsung implementation described in this document.
|
||||
|
||||
|
||||
GPIOLIB integration
|
||||
-------------------
|
||||
|
||||
|
@ -12,21 +12,10 @@ Introduction
|
||||
|
||||
The currently supported SoCs are:
|
||||
|
||||
- S3C24XX: See Documentation/arm/samsung-s3c24xx/overview.rst for full list
|
||||
- S3C64XX: S3C6400 and S3C6410
|
||||
- S5PC110 / S5PV210
|
||||
|
||||
|
||||
S3C24XX Systems
|
||||
---------------
|
||||
|
||||
There is still documentation in Documnetation/arm/Samsung-S3C24XX/ which
|
||||
deals with the architecture and drivers specific to these devices.
|
||||
|
||||
See Documentation/arm/samsung-s3c24xx/overview.rst for more information
|
||||
on the implementation details and specific support.
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
@ -51,8 +40,6 @@ Layout
|
||||
specific information. It contains the base clock, GPIO and device definitions
|
||||
to get the system running.
|
||||
|
||||
plat-s3c24xx is for s3c24xx specific builds, see the S3C24XX docs.
|
||||
|
||||
plat-s5p is for s5p specific builds, and contains common support for the
|
||||
S5P specific systems. Not all S5Ps use all the features in this directory
|
||||
due to differences in the hardware.
|
||||
|
@ -223,7 +223,7 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
For systems with a GICv3 interrupt controller to be used in v3 mode:
|
||||
- If EL3 is present:
|
||||
|
||||
- ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
|
||||
- ICC_SRE_EL3.Enable (bit 3) must be initialised to 0b1.
|
||||
- ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
|
||||
- ICC_CTLR_EL3.PMHE (bit 6) must be set to the same value across
|
||||
all CPUs the kernel is executing on, and must stay constant
|
||||
@ -369,6 +369,16 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
- HCR_EL2.ATA (bit 56) must be initialised to 0b1.
|
||||
|
||||
For CPUs with the Scalable Matrix Extension version 2 (FEAT_SME2):
|
||||
|
||||
- If EL3 is present:
|
||||
|
||||
- SMCR_EL3.EZT0 (bit 30) must be initialised to 0b1.
|
||||
|
||||
- If the kernel is entered at EL1 and EL2 is present:
|
||||
|
||||
- SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.
|
||||
|
||||
The requirements described above for CPU mode, caches, MMUs, architected
|
||||
timers, coherency and system registers apply to all CPUs. All CPUs must
|
||||
enter the kernel in the same exception level. Where the values documented
|
||||
|
@ -14,7 +14,7 @@ Some hardware or software features are only available on some CPU
|
||||
implementations, and/or with certain kernel configurations, but have no
|
||||
architected discovery mechanism available to userspace code at EL0. The
|
||||
kernel exposes the presence of these features to userspace through a set
|
||||
of flags called hwcaps, exposed in the auxilliary vector.
|
||||
of flags called hwcaps, exposed in the auxiliary vector.
|
||||
|
||||
Userspace software can test for features by acquiring the AT_HWCAP or
|
||||
AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
|
||||
@ -284,6 +284,24 @@ HWCAP2_RPRFM
|
||||
HWCAP2_SVE2P1
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0010.
|
||||
|
||||
HWCAP2_SME2
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0001.
|
||||
|
||||
HWCAP2_SME2P1
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0010.
|
||||
|
||||
HWCAP2_SMEI16I32
|
||||
Functionality implied by ID_AA64SMFR0_EL1.I16I32 == 0b0101
|
||||
|
||||
HWCAP2_SMEBI32I32
|
||||
Functionality implied by ID_AA64SMFR0_EL1.BI32I32 == 0b1
|
||||
|
||||
HWCAP2_SMEB16B16
|
||||
Functionality implied by ID_AA64SMFR0_EL1.B16B16 == 0b1
|
||||
|
||||
HWCAP2_SMEF16F16
|
||||
Functionality implied by ID_AA64SMFR0_EL1.F16F16 == 0b1
|
||||
|
||||
4. Unused AT_HWCAP bits
|
||||
-----------------------
|
||||
|
||||
|
@ -120,6 +120,8 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 |
|
||||
|
@ -18,14 +18,19 @@ model features for SME is included in Appendix A.
|
||||
1. General
|
||||
-----------
|
||||
|
||||
* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA
|
||||
register state and TPIDR2_EL0 are tracked per thread.
|
||||
* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA and (when
|
||||
present) ZTn register state and TPIDR2_EL0 are tracked per thread.
|
||||
|
||||
* The presence of SME is reported to userspace via HWCAP2_SME in the aux vector
|
||||
AT_HWCAP2 entry. Presence of this flag implies the presence of the SME
|
||||
instructions and registers, and the Linux-specific system interfaces
|
||||
described in this document. SME is reported in /proc/cpuinfo as "sme".
|
||||
|
||||
* The presence of SME2 is reported to userspace via HWCAP2_SME2 in the
|
||||
aux vector AT_HWCAP2 entry. Presence of this flag implies the presence of
|
||||
the SME2 instructions and ZT0, and the Linux-specific system interfaces
|
||||
described in this document. SME2 is reported in /proc/cpuinfo as "sme2".
|
||||
|
||||
* Support for the execution of SME instructions in userspace can also be
|
||||
detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS
|
||||
instruction, and checking that the value of the SME field is nonzero. [3]
|
||||
@ -44,6 +49,7 @@ model features for SME is included in Appendix A.
|
||||
HWCAP2_SME_B16F32
|
||||
HWCAP2_SME_F32F32
|
||||
HWCAP2_SME_FA64
|
||||
HWCAP2_SME2
|
||||
|
||||
This list may be extended over time as the SME architecture evolves.
|
||||
|
||||
@ -52,8 +58,8 @@ model features for SME is included in Appendix A.
|
||||
cpu-feature-registers.txt for details.
|
||||
|
||||
* Debuggers should restrict themselves to interacting with the target via the
|
||||
NT_ARM_SVE, NT_ARM_SSVE and NT_ARM_ZA regsets. The recommended way
|
||||
of detecting support for these regsets is to connect to a target process
|
||||
NT_ARM_SVE, NT_ARM_SSVE, NT_ARM_ZA and NT_ARM_ZT regsets. The recommended
|
||||
way of detecting support for these regsets is to connect to a target process
|
||||
first and then attempt a
|
||||
|
||||
ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov).
|
||||
@ -89,13 +95,13 @@ be zeroed.
|
||||
-------------------------
|
||||
|
||||
* On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
|
||||
ZA matrix are preserved.
|
||||
ZA matrix and ZTn (if present) are preserved.
|
||||
|
||||
* On syscall PSTATE.SM will be cleared and the SVE registers will be handled
|
||||
as per the standard SVE ABI.
|
||||
|
||||
* Neither the SVE registers nor ZA are used to pass arguments to or receive
|
||||
results from any syscall.
|
||||
* None of the SVE registers, ZA or ZTn are used to pass arguments to
|
||||
or receive results from any syscall.
|
||||
|
||||
* On process creation (eg, clone()) the newly created process will have
|
||||
PSTATE.SM cleared.
|
||||
@ -111,6 +117,9 @@ be zeroed.
|
||||
|
||||
* Signal handlers are invoked with streaming mode and ZA disabled.
|
||||
|
||||
* A new signal frame record TPIDR2_MAGIC is added formatted as a struct
|
||||
tpidr2_context to allow access to TPIDR2_EL0 from signal handlers.
|
||||
|
||||
* A new signal frame record za_context encodes the ZA register contents on
|
||||
signal delivery. [1]
|
||||
|
||||
@ -134,6 +143,14 @@ be zeroed.
|
||||
__reserved[] referencing this space. za_context is then written in the
|
||||
extra space. Refer to [1] for further details about this mechanism.
|
||||
|
||||
* If ZTn is supported and PSTATE.ZA==1 then a signal frame record for ZTn will
|
||||
be generated.
|
||||
|
||||
* The signal record for ZTn has magic ZT_MAGIC (0x5a544e01) and consists of a
|
||||
standard signal frame header followed by a struct zt_context specifying
|
||||
the number of ZTn registers supported by the system, then zt_context.nregs
|
||||
blocks of 64 bytes of data per register.
|
||||
|
||||
|
||||
5. Signal return
|
||||
-----------------
|
||||
@ -151,6 +168,9 @@ When returning from a signal handler:
|
||||
the signal frame does not match the current vector length, the signal return
|
||||
attempt is treated as illegal, resulting in a forced SIGSEGV.
|
||||
|
||||
* If ZTn is not supported or PSTATE.ZA==0 then it is illegal to have a
|
||||
signal frame record for ZTn, resulting in a forced SIGSEGV.
|
||||
|
||||
|
||||
6. prctl extensions
|
||||
--------------------
|
||||
@ -214,8 +234,8 @@ prctl(PR_SME_SET_VL, unsigned long arg)
|
||||
vector length that will be applied at the next execve() by the calling
|
||||
thread.
|
||||
|
||||
* Changing the vector length causes all of ZA, P0..P15, FFR and all bits of
|
||||
Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
|
||||
* Changing the vector length causes all of ZA, ZTn, P0..P15, FFR and all
|
||||
bits of Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
|
||||
unspecified, including both streaming and non-streaming SVE state.
|
||||
Calling PR_SME_SET_VL with vl equal to the thread's current vector
|
||||
length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
|
||||
@ -317,6 +337,15 @@ The regset data starts with struct user_za_header, containing:
|
||||
|
||||
* The effect of writing a partial, incomplete payload is unspecified.
|
||||
|
||||
* A new regset NT_ARM_ZT is defined for access to ZTn state via
|
||||
PTRACE_GETREGSET and PTRACE_SETREGSET.
|
||||
|
||||
* The NT_ARM_ZT regset consists of a single 512 bit register.
|
||||
|
||||
* When PSTATE.ZA==0 reads of NT_ARM_ZT will report all bits of ZTn as 0.
|
||||
|
||||
* Writes to NT_ARM_ZT will set PSTATE.ZA to 1.
|
||||
|
||||
|
||||
8. ELF coredump extensions
|
||||
---------------------------
|
||||
@ -331,6 +360,11 @@ The regset data starts with struct user_za_header, containing:
|
||||
been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
|
||||
when the coredump was generated.
|
||||
|
||||
* A NT_ARM_ZT note will be added to each coredump for each thread of the
|
||||
dumped process. The contents will be equivalent to the data that would have
|
||||
been read if a PTRACE_GETREGSET of NT_ARM_ZT were executed for each thread
|
||||
when the coredump was generated.
|
||||
|
||||
* The NT_ARM_TLS note will be extended to two registers, the second register
|
||||
will contain TPIDR2_EL0 on systems that support SME and will be read as
|
||||
zero with writes ignored otherwise.
|
||||
@ -406,6 +440,9 @@ In A64 state, SME adds the following:
|
||||
For best system performance it is strongly encouraged for software to enable
|
||||
ZA only when it is actively being used.
|
||||
|
||||
* A new ZT0 register is introduced when SME2 is present. This is a 512 bit
|
||||
register which is accessible when PSTATE.ZA is set, as ZA itself is.
|
||||
|
||||
* Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and
|
||||
SMSTOP instructions or by access to the SVCR system register:
|
||||
|
||||
|
@ -175,7 +175,7 @@ the SVE instruction set architecture.
|
||||
When returning from a signal handler:
|
||||
|
||||
* If there is no sve_context record in the signal frame, or if the record is
|
||||
present but contains no register data as desribed in the previous section,
|
||||
present but contains no register data as described in the previous section,
|
||||
then the SVE registers/bits become non-live and take unspecified values.
|
||||
|
||||
* If sve_context is present in the signal frame and contains full register
|
||||
@ -223,7 +223,7 @@ prctl(PR_SVE_SET_VL, unsigned long arg)
|
||||
Defer the requested vector length change until the next execve()
|
||||
performed by this thread.
|
||||
|
||||
The effect is equivalent to implicit exceution of the following
|
||||
The effect is equivalent to implicit execution of the following
|
||||
call immediately after the next execve() (if any) by the thread:
|
||||
|
||||
prctl(PR_SVE_SET_VL, arg & ~PR_SVE_SET_VL_ONEXEC)
|
||||
|
@ -1,10 +0,0 @@
|
||||
===============================
|
||||
Generic Block Device Capability
|
||||
===============================
|
||||
|
||||
This file documents the sysfs file ``block/<disk>/capability``.
|
||||
|
||||
``capability`` is a bitfield, printed in hexadecimal, indicating which
|
||||
capabilities a specific block device supports:
|
||||
|
||||
.. kernel-doc:: include/linux/blkdev.h
|
@ -10,7 +10,6 @@ Block
|
||||
bfq-iosched
|
||||
biovecs
|
||||
blk-mq
|
||||
capability
|
||||
cmdline-partition
|
||||
data-integrity
|
||||
deadline-iosched
|
||||
|
@ -144,6 +144,43 @@ managing and controlling ublk devices with help of several control commands:
|
||||
For retrieving device info via ``ublksrv_ctrl_dev_info``. It is the server's
|
||||
responsibility to save IO target specific info in userspace.
|
||||
|
||||
- ``UBLK_CMD_GET_DEV_INFO2``
|
||||
Same purpose with ``UBLK_CMD_GET_DEV_INFO``, but ublk server has to
|
||||
provide path of the char device of ``/dev/ublkc*`` for kernel to run
|
||||
permission check, and this command is added for supporting unprivileged
|
||||
ublk device, and introduced with ``UBLK_F_UNPRIVILEGED_DEV`` together.
|
||||
Only the user owning the requested device can retrieve the device info.
|
||||
|
||||
How to deal with userspace/kernel compatibility:
|
||||
|
||||
1) if kernel is capable of handling ``UBLK_F_UNPRIVILEGED_DEV``
|
||||
|
||||
If ublk server supports ``UBLK_F_UNPRIVILEGED_DEV``:
|
||||
|
||||
ublk server should send ``UBLK_CMD_GET_DEV_INFO2``, given anytime
|
||||
unprivileged application needs to query devices the current user owns,
|
||||
when the application has no idea if ``UBLK_F_UNPRIVILEGED_DEV`` is set
|
||||
given the capability info is stateless, and application should always
|
||||
retrieve it via ``UBLK_CMD_GET_DEV_INFO2``
|
||||
|
||||
If ublk server doesn't support ``UBLK_F_UNPRIVILEGED_DEV``:
|
||||
|
||||
``UBLK_CMD_GET_DEV_INFO`` is always sent to kernel, and the feature of
|
||||
UBLK_F_UNPRIVILEGED_DEV isn't available for user
|
||||
|
||||
2) if kernel isn't capable of handling ``UBLK_F_UNPRIVILEGED_DEV``
|
||||
|
||||
If ublk server supports ``UBLK_F_UNPRIVILEGED_DEV``:
|
||||
|
||||
``UBLK_CMD_GET_DEV_INFO2`` is tried first, and will be failed, then
|
||||
``UBLK_CMD_GET_DEV_INFO`` needs to be retried given
|
||||
``UBLK_F_UNPRIVILEGED_DEV`` can't be set
|
||||
|
||||
If ublk server doesn't support ``UBLK_F_UNPRIVILEGED_DEV``:
|
||||
|
||||
``UBLK_CMD_GET_DEV_INFO`` is always sent to kernel, and the feature of
|
||||
``UBLK_F_UNPRIVILEGED_DEV`` isn't available for user
|
||||
|
||||
- ``UBLK_CMD_START_USER_RECOVERY``
|
||||
|
||||
This command is valid if ``UBLK_F_USER_RECOVERY`` feature is enabled. This
|
||||
@ -180,6 +217,15 @@ managing and controlling ublk devices with help of several control commands:
|
||||
double-write since the driver may issue the same I/O request twice. It
|
||||
might be useful to a read-only FS or a VM backend.
|
||||
|
||||
Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``.
|
||||
Once the flag is set, all control commands can be sent by unprivileged
|
||||
user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on
|
||||
the specified char device(``/dev/ublkc*``) is done for all other control
|
||||
commands by ublk driver, for doing that, path of the char device has to
|
||||
be provided in these commands' payload from ublk server. With this way,
|
||||
ublk device becomes container-ware, and device created in one container
|
||||
can be controlled/accessed just inside this container.
|
||||
|
||||
Data plane
|
||||
----------
|
||||
|
||||
@ -254,15 +300,6 @@ with specified IO tag in the command data:
|
||||
Future development
|
||||
==================
|
||||
|
||||
Container-aware ublk deivice
|
||||
----------------------------
|
||||
|
||||
ublk driver doesn't handle any IO logic. Its function is well defined
|
||||
for now and very limited userspace interfaces are needed, which is also
|
||||
well defined too. It is possible to make ublk devices container-aware block
|
||||
devices in future as Stefan Hajnoczi suggested [#stefan]_, by removing
|
||||
ADMIN privilege.
|
||||
|
||||
Zero copy
|
||||
---------
|
||||
|
||||
|
@ -31,6 +31,12 @@ def have_command(cmd):
|
||||
# Get Sphinx version
|
||||
major, minor, patch = sphinx.version_info[:3]
|
||||
|
||||
#
|
||||
# Warn about older versions that we don't want to support for much
|
||||
# longer.
|
||||
#
|
||||
if (major < 2) or (major == 2 and minor < 4):
|
||||
print('WARNING: support for Sphinx < 2.4 will be removed soon.')
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
@ -339,7 +345,11 @@ html_use_smartypants = False
|
||||
|
||||
# Custom sidebar templates, maps document names to template names.
|
||||
# Note that the RTD theme ignores this
|
||||
html_sidebars = { '**': ["about.html", 'searchbox.html', 'localtoc.html', 'sourcelink.html']}
|
||||
html_sidebars = { '**': ['searchbox.html', 'localtoc.html', 'sourcelink.html']}
|
||||
|
||||
# about.html is available for alabaster theme. Add it at the front.
|
||||
if html_theme == 'alabaster':
|
||||
html_sidebars['**'].insert(0, 'about.html')
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'TheLinuxKerneldoc'
|
||||
|
5
Documentation/devicetree/bindings/.gitignore
vendored
5
Documentation/devicetree/bindings/.gitignore
vendored
@ -2,3 +2,8 @@
|
||||
*.example.dts
|
||||
/processed-schema*.yaml
|
||||
/processed-schema*.json
|
||||
|
||||
#
|
||||
# We don't want to ignore the following even if they are dot-files
|
||||
#
|
||||
!.yamllint
|
||||
|
@ -31,6 +31,7 @@ properties:
|
||||
- description: Mercury+ AA1 boards
|
||||
items:
|
||||
- enum:
|
||||
- enclustra,mercury-pe1
|
||||
- google,chameleon-v3
|
||||
- const: enclustra,mercury-aa1
|
||||
- const: altr,socfpga-arria10
|
||||
|
@ -154,6 +154,7 @@ properties:
|
||||
items:
|
||||
- enum:
|
||||
- khadas,vim3
|
||||
- radxa,zero2
|
||||
- const: amlogic,a311d
|
||||
- const: amlogic,g12b
|
||||
|
||||
@ -165,6 +166,7 @@ properties:
|
||||
- azw,gtking-pro
|
||||
- hardkernel,odroid-go-ultra
|
||||
- hardkernel,odroid-n2
|
||||
- hardkernel,odroid-n2l
|
||||
- hardkernel,odroid-n2-plus
|
||||
- khadas,vim3
|
||||
- ugoos,am6
|
||||
@ -176,6 +178,7 @@ properties:
|
||||
- enum:
|
||||
- amediatech,x96-air
|
||||
- amediatech,x96-air-gbit
|
||||
- bananapi,bpi-m2-pro
|
||||
- bananapi,bpi-m5
|
||||
- cyx,a95xf3-air
|
||||
- cyx,a95xf3-air-gbit
|
||||
|
@ -78,6 +78,7 @@ properties:
|
||||
- facebook,cloudripper-bmc
|
||||
- facebook,elbert-bmc
|
||||
- facebook,fuji-bmc
|
||||
- facebook,greatlakes-bmc
|
||||
- ibm,everest-bmc
|
||||
- ibm,rainier-bmc
|
||||
- ibm,tacoma-bmc
|
||||
@ -85,6 +86,7 @@ properties:
|
||||
- jabil,rbp-bmc
|
||||
- qcom,dc-scm-v1-bmc
|
||||
- quanta,s6q-bmc
|
||||
- ufispace,ncplite-bmc
|
||||
- const: aspeed,ast2600
|
||||
|
||||
additionalProperties: true
|
||||
|
@ -91,9 +91,11 @@ properties:
|
||||
- const: atmel,sama5d2
|
||||
- const: atmel,sama5
|
||||
|
||||
- description: SAM9X60-EK board
|
||||
- description: Microchip SAM9X60 Evaluation Boards
|
||||
items:
|
||||
- const: microchip,sam9x60ek
|
||||
- enum:
|
||||
- microchip,sam9x60ek
|
||||
- microchip,sam9x60-curiosity
|
||||
- const: microchip,sam9x60
|
||||
- const: atmel,at91sam9
|
||||
|
||||
|
@ -88,12 +88,56 @@ properties:
|
||||
items:
|
||||
- enum:
|
||||
- armadeus,imx28-apf28 # APF28 SoM
|
||||
- armadeus,imx28-apf28dev # APF28 SoM on APF28Dev board
|
||||
- bluegiga,apx4devkit # Bluegiga APx4 SoM on dev board
|
||||
- crystalfontz,cfa10036 # Crystalfontz CFA-10036 SoM
|
||||
- eukrea,mbmx28lc
|
||||
- fsl,imx28-evk
|
||||
- i2se,duckbill
|
||||
- i2se,duckbill-2
|
||||
- karo,tx28 # Ka-Ro electronics TX28 module
|
||||
- lwn,imx28-xea
|
||||
- msr,m28cu3 # M28 SoM with custom base board
|
||||
- schulercontrol,imx28-sps1
|
||||
- technologic,imx28-ts4600
|
||||
- const: fsl,imx28
|
||||
|
||||
- description: i.MX28 Aries M28 SoM Board
|
||||
items:
|
||||
- const: aries,m28
|
||||
- const: denx,m28
|
||||
- const: fsl,imx28
|
||||
|
||||
- description: i.MX28 Aries M28EVK Board
|
||||
items:
|
||||
- const: aries,m28evk
|
||||
- const: denx,m28evk
|
||||
- const: fsl,imx28
|
||||
|
||||
- description: i.MX28 Armadeus Systems APF28Dev Board
|
||||
items:
|
||||
- const: armadeus,imx28-apf28dev
|
||||
- const: armadeus,imx28-apf28
|
||||
- const: fsl,imx28
|
||||
|
||||
- description: i.MX28 Crystalfontz CFA-10036 based Boards
|
||||
items:
|
||||
- enum:
|
||||
- crystalfontz,cfa10037
|
||||
- crystalfontz,cfa10049
|
||||
- crystalfontz,cfa10057
|
||||
- crystalfontz,cfa10058
|
||||
- const: crystalfontz,cfa10036
|
||||
- const: fsl,imx28
|
||||
|
||||
- description: i.MX28 Crystalfontz CFA-10037 based Boards
|
||||
items:
|
||||
- enum:
|
||||
- crystalfontz,cfa10055
|
||||
- crystalfontz,cfa10056
|
||||
- const: crystalfontz,cfa10037
|
||||
- const: crystalfontz,cfa10036
|
||||
- const: fsl,imx28
|
||||
|
||||
- description: i.MX28 Duckbill 2 based Boards
|
||||
items:
|
||||
- enum:
|
||||
@ -103,6 +147,19 @@ properties:
|
||||
- const: i2se,duckbill-2
|
||||
- const: fsl,imx28
|
||||
|
||||
- description: i.MX28 Eukrea Electromatique MBMX283LC Board
|
||||
items:
|
||||
- const: eukrea,mbmx283lc
|
||||
- const: eukrea,mbmx28lc
|
||||
- const: fsl,imx28
|
||||
|
||||
- description: i.MX28 Eukrea Electromatique MBMX287LC Board
|
||||
items:
|
||||
- const: eukrea,mbmx287lc
|
||||
- const: eukrea,mbmx283lc
|
||||
- const: eukrea,mbmx28lc
|
||||
- const: fsl,imx28
|
||||
|
||||
- description: i.MX31 based Boards
|
||||
items:
|
||||
- enum:
|
||||
@ -173,6 +230,7 @@ properties:
|
||||
- kiebackpeter,imx53-ddc # K+P imx53 DDC
|
||||
- kiebackpeter,imx53-hsc # K+P imx53 HSC
|
||||
- menlo,m53menlo # i.MX53 Menlo board
|
||||
- starterkit,sk-imx53
|
||||
- voipac,imx53-dmm-668 # Voipac i.MX53 X53-DMM-668
|
||||
- const: fsl,imx53
|
||||
|
||||
@ -644,6 +702,16 @@ properties:
|
||||
- const: armadeus,imx6ull-opos6ul # OPOS6UL (i.MX6ULL) SoM
|
||||
- const: fsl,imx6ull
|
||||
|
||||
- description: i.MX6ULL DHCOM SoM based Boards
|
||||
items:
|
||||
- enum:
|
||||
- dh,imx6ull-dhcom-drc02
|
||||
- dh,imx6ull-dhcom-pdk2
|
||||
- dh,imx6ull-dhcom-picoitx
|
||||
- const: dh,imx6ull-dhcom-som # The DHCOR is soldered on the DHCOM
|
||||
- const: dh,imx6ull-dhcor-som
|
||||
- const: fsl,imx6ull
|
||||
|
||||
- description: i.MX6ULL PHYTEC phyBOARD-Segin
|
||||
items:
|
||||
- enum:
|
||||
@ -815,7 +883,6 @@ properties:
|
||||
- enum:
|
||||
- beacon,imx8mm-beacon-kit # i.MX8MM Beacon Development Kit
|
||||
- boundary,imx8mm-nitrogen8mm # i.MX8MM Nitrogen Board
|
||||
- cloos,imx8mm-phg # i.MX8MM Cloos PHG Board
|
||||
- dmo,imx8mm-data-modul-edm-sbc # i.MX8MM eDM SBC
|
||||
- emtrion,emcon-mx8mm-avari # emCON-MX8MM SoM on Avari Base
|
||||
- fsl,imx8mm-ddr4-evk # i.MX8MM DDR4 EVK Board
|
||||
@ -830,7 +897,6 @@ properties:
|
||||
- innocomm,wb15-evk # i.MX8MM Innocomm EVK board with WB15 SoM
|
||||
- kontron,imx8mm-sl # i.MX8MM Kontron SL (N801X) SOM
|
||||
- kontron,imx8mm-osm-s # i.MX8MM Kontron OSM-S (N802X) SOM
|
||||
- menlo,mx8menlo # i.MX8MM Menlo board with Verdin SoM
|
||||
- toradex,verdin-imx8mm # Verdin iMX8M Mini Modules
|
||||
- toradex,verdin-imx8mm-nonwifi # Verdin iMX8M Mini Modules without Wi-Fi / BT
|
||||
- toradex,verdin-imx8mm-wifi # Verdin iMX8M Mini Wi-Fi / BT Modules
|
||||
@ -861,8 +927,10 @@ properties:
|
||||
- description: Toradex Boards with Verdin iMX8M Mini Modules
|
||||
items:
|
||||
- enum:
|
||||
- menlo,mx8menlo # Verdin iMX8M Mini Module on i.MX8MM Menlo board
|
||||
- toradex,verdin-imx8mm-nonwifi-dahlia # Verdin iMX8M Mini Module on Dahlia
|
||||
- toradex,verdin-imx8mm-nonwifi-dev # Verdin iMX8M Mini Module on Verdin Development Board
|
||||
- toradex,verdin-imx8mm-nonwifi-yavia # Verdin iMX8M Mini Module on Yavia
|
||||
- const: toradex,verdin-imx8mm-nonwifi # Verdin iMX8M Mini Module without Wi-Fi / BT
|
||||
- const: toradex,verdin-imx8mm # Verdin iMX8M Mini Module
|
||||
- const: fsl,imx8mm
|
||||
@ -872,6 +940,7 @@ properties:
|
||||
- enum:
|
||||
- toradex,verdin-imx8mm-wifi-dahlia # Verdin iMX8M Mini Wi-Fi / BT Module on Dahlia
|
||||
- toradex,verdin-imx8mm-wifi-dev # Verdin iMX8M Mini Wi-Fi / BT M. on Verdin Development B.
|
||||
- toradex,verdin-imx8mm-wifi-yavia # Verdin iMX8M Mini Wi-Fi / BT Module on Yavia
|
||||
- const: toradex,verdin-imx8mm-wifi # Verdin iMX8M Mini Wi-Fi / BT Module
|
||||
- const: toradex,verdin-imx8mm # Verdin iMX8M Mini Module
|
||||
- const: fsl,imx8mm
|
||||
@ -895,6 +964,7 @@ properties:
|
||||
one compatible is needed.
|
||||
items:
|
||||
- enum:
|
||||
- cloos,imx8mm-phg # i.MX8MM Cloos PHG Board
|
||||
- tq,imx8mm-tqma8mqml-mba8mx # TQ-Systems GmbH i.MX8MM TQMa8MQML SOM on MBa8Mx
|
||||
- const: tq,imx8mm-tqma8mqml # TQ-Systems GmbH i.MX8MM TQMa8MQML SOM
|
||||
- const: fsl,imx8mm
|
||||
@ -931,10 +1001,11 @@ properties:
|
||||
- description: i.MX8MP based Boards
|
||||
items:
|
||||
- enum:
|
||||
- dh,imx8mp-dhcom-som # i.MX8MP DHCOM SoM
|
||||
- dh,imx8mp-dhcom-pdk2 # i.MX8MP DHCOM SoM on PDK2 board
|
||||
- beacon,imx8mp-beacon-kit # i.MX8MP Beacon Development Kit
|
||||
- fsl,imx8mp-evk # i.MX8MP EVK Board
|
||||
- gateworks,imx8mp-gw74xx # i.MX8MP Gateworks Board
|
||||
- polyhex,imx8mp-debix # Polyhex Debix boards
|
||||
- polyhex,imx8mp-debix-model-a # Polyhex Debix Model A Board
|
||||
- toradex,verdin-imx8mp # Verdin iMX8M Plus Modules
|
||||
- toradex,verdin-imx8mp-nonwifi # Verdin iMX8M Plus Modules without Wi-Fi / BT
|
||||
- toradex,verdin-imx8mp-wifi # Verdin iMX8M Plus Wi-Fi / BT Modules
|
||||
@ -947,6 +1018,12 @@ properties:
|
||||
- const: avnet,sm2s-imx8mp # SM2S-IMX8PLUS SoM
|
||||
- const: fsl,imx8mp
|
||||
|
||||
- description: i.MX8MP DHCOM based Boards
|
||||
items:
|
||||
- const: dh,imx8mp-dhcom-pdk2 # i.MX8MP DHCOM SoM on PDK2 board
|
||||
- const: dh,imx8mp-dhcom-som # i.MX8MP DHCOM SoM
|
||||
- const: fsl,imx8mp
|
||||
|
||||
- description: Engicam i.Core MX8M Plus SoM based boards
|
||||
items:
|
||||
- enum:
|
||||
@ -965,6 +1042,7 @@ properties:
|
||||
- enum:
|
||||
- toradex,verdin-imx8mp-nonwifi-dahlia # Verdin iMX8M Plus Module on Dahlia
|
||||
- toradex,verdin-imx8mp-nonwifi-dev # Verdin iMX8M Plus Module on Verdin Development Board
|
||||
- toradex,verdin-imx8mp-nonwifi-yavia # Verdin iMX8M Plus Module on Yavia
|
||||
- const: toradex,verdin-imx8mp-nonwifi # Verdin iMX8M Plus Module without Wi-Fi / BT
|
||||
- const: toradex,verdin-imx8mp # Verdin iMX8M Plus Module
|
||||
- const: fsl,imx8mp
|
||||
@ -974,6 +1052,7 @@ properties:
|
||||
- enum:
|
||||
- toradex,verdin-imx8mp-wifi-dahlia # Verdin iMX8M Plus Wi-Fi / BT Module on Dahlia
|
||||
- toradex,verdin-imx8mp-wifi-dev # Verdin iMX8M Plus Wi-Fi / BT M. on Verdin Development B.
|
||||
- toradex,verdin-imx8mp-wifi-yavia # Verdin iMX8M Plus Wi-Fi / BT Module on Yavia
|
||||
- const: toradex,verdin-imx8mp-wifi # Verdin iMX8M Plus Wi-Fi / BT Module
|
||||
- const: toradex,verdin-imx8mp # Verdin iMX8M Plus Module
|
||||
- const: fsl,imx8mp
|
||||
@ -999,12 +1078,17 @@ properties:
|
||||
- fsl,imx8mq-evk # i.MX8MQ EVK Board
|
||||
- google,imx8mq-phanbell # Google Coral Edge TPU
|
||||
- kontron,pitx-imx8m # Kontron pITX-imx8m Board
|
||||
- mntre,reform2 # MNT Reform2 Laptop
|
||||
- purism,librem5-devkit # Purism Librem5 devkit
|
||||
- solidrun,hummingboard-pulse # SolidRun Hummingboard Pulse
|
||||
- technexion,pico-pi-imx8m # TechNexion PICO-PI-8M evk
|
||||
- const: fsl,imx8mq
|
||||
|
||||
- description: i.MX8MQ NITROGEN SoM based Boards
|
||||
items:
|
||||
- const: mntre,reform2 # MNT Reform2 Laptop
|
||||
- const: boundary,imx8mq-nitrogen8m-som # i.MX8MQ NITROGEN SoM
|
||||
- const: fsl,imx8mq
|
||||
|
||||
- description: Purism Librem5 phones
|
||||
items:
|
||||
- enum:
|
||||
|
@ -244,6 +244,10 @@ properties:
|
||||
- enum:
|
||||
- mediatek,mt8183-pumpkin
|
||||
- const: mediatek,mt8183
|
||||
- items:
|
||||
- enum:
|
||||
- mediatek,mt8365-evk
|
||||
- const: mediatek,mt8365
|
||||
- items:
|
||||
- enum:
|
||||
- mediatek,mt8516-pumpkin
|
||||
|
@ -35,6 +35,8 @@ description: |
|
||||
mdm9615
|
||||
msm8226
|
||||
msm8916
|
||||
msm8939
|
||||
msm8953
|
||||
msm8956
|
||||
msm8974
|
||||
msm8976
|
||||
@ -47,11 +49,13 @@ description: |
|
||||
qru1000
|
||||
sa8155p
|
||||
sa8540p
|
||||
sa8775p
|
||||
sc7180
|
||||
sc7280
|
||||
sc8180x
|
||||
sc8280xp
|
||||
sda660
|
||||
sdm450
|
||||
sdm630
|
||||
sdm632
|
||||
sdm636
|
||||
@ -62,6 +66,7 @@ description: |
|
||||
sdx65
|
||||
sm4250
|
||||
sm6115
|
||||
sm6115p
|
||||
sm6125
|
||||
sm6350
|
||||
sm6375
|
||||
@ -70,6 +75,7 @@ description: |
|
||||
sm8250
|
||||
sm8350
|
||||
sm8450
|
||||
sm8550
|
||||
|
||||
The 'board' element must be one of the following strings:
|
||||
|
||||
@ -84,6 +90,7 @@ description: |
|
||||
liquid
|
||||
mtp
|
||||
qrd
|
||||
ride
|
||||
sbc
|
||||
x100
|
||||
|
||||
@ -160,6 +167,12 @@ properties:
|
||||
- samsung,s3ve3g
|
||||
- const: qcom,msm8226
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- sony,kanuti-tulip
|
||||
- square,apq8039-t2
|
||||
- const: qcom,msm8939
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- sony,kugo-row
|
||||
@ -194,8 +207,10 @@ properties:
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- acer,a1-724
|
||||
- alcatel,idol347
|
||||
- asus,z00l
|
||||
- gplus,fl8005a
|
||||
- huawei,g7
|
||||
- longcheer,l8910
|
||||
- samsung,a3u-eur
|
||||
@ -203,8 +218,13 @@ properties:
|
||||
- samsung,e5
|
||||
- samsung,e7
|
||||
- samsung,grandmax
|
||||
- samsung,gt510
|
||||
- samsung,gt58
|
||||
- samsung,j5
|
||||
- samsung,j5x
|
||||
- samsung,serranove
|
||||
- thwc,uf896
|
||||
- thwc,ufi001c
|
||||
- wingtech,wt88047
|
||||
- const: qcom,msm8916
|
||||
|
||||
@ -213,6 +233,15 @@ properties:
|
||||
- const: qcom,msm8916-v1-qrd/9-v1
|
||||
- const: qcom,msm8916
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- motorola,potter
|
||||
- xiaomi,daisy
|
||||
- xiaomi,mido
|
||||
- xiaomi,tissot
|
||||
- xiaomi,vince
|
||||
- const: qcom,msm8953
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- lg,bullhead
|
||||
@ -627,6 +656,12 @@ properties:
|
||||
- const: google,hoglin
|
||||
- const: qcom,sc7280
|
||||
|
||||
- description: Qualcomm Technologies, Inc. sc7280 CRD Pro platform (newest rev)
|
||||
items:
|
||||
- const: google,zoglin-sku1536
|
||||
- const: google,hoglin-sku1536
|
||||
- const: qcom,sc7280
|
||||
|
||||
- description: Qualcomm Technologies, Inc. sc7280 IDP SKU1 platform
|
||||
items:
|
||||
- const: qcom,sc7280-idp
|
||||
@ -679,6 +714,18 @@ properties:
|
||||
- const: google,zombie-sku512
|
||||
- const: qcom,sc7280
|
||||
|
||||
- description: Google Zombie with NVMe (newest rev)
|
||||
items:
|
||||
- const: google,zombie-sku2
|
||||
- const: google,zombie-sku3
|
||||
- const: google,zombie-sku515
|
||||
- const: qcom,sc7280
|
||||
|
||||
- description: Google Zombie with LTE and NVMe (newest rev)
|
||||
items:
|
||||
- const: google,zombie-sku514
|
||||
- const: qcom,sc7280
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- lenovo,flex-5g
|
||||
@ -693,6 +740,11 @@ properties:
|
||||
- qcom,sc8280xp-qrd
|
||||
- const: qcom,sc8280xp
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- motorola,ali
|
||||
- const: qcom,sdm450
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- sony,discovery-row
|
||||
@ -709,6 +761,7 @@ properties:
|
||||
- items:
|
||||
- enum:
|
||||
- fairphone,fp3
|
||||
- motorola,ocean
|
||||
- const: qcom,sdm632
|
||||
|
||||
- items:
|
||||
@ -762,6 +815,11 @@ properties:
|
||||
- qcom,sa8540p-ride
|
||||
- const: qcom,sa8540p
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- qcom,sa8775p-ride
|
||||
- const: qcom,sa8775p
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- google,cheza
|
||||
@ -790,6 +848,12 @@ properties:
|
||||
- oneplus,billie2
|
||||
- const: qcom,sm4250
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- lenovo,j606f
|
||||
- const: qcom,sm6115p
|
||||
- const: qcom,sm6115
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- sony,pdx201
|
||||
@ -826,6 +890,7 @@ properties:
|
||||
- qcom,sm8250-mtp
|
||||
- sony,pdx203-generic
|
||||
- sony,pdx206-generic
|
||||
- xiaomi,elish
|
||||
- const: qcom,sm8250
|
||||
|
||||
- items:
|
||||
@ -845,6 +910,11 @@ properties:
|
||||
- sony,pdx224
|
||||
- const: qcom,sm8450
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- qcom,sm8550-mtp
|
||||
- const: qcom,sm8550
|
||||
|
||||
# Board compatibles go above
|
||||
|
||||
qcom,msm-id:
|
||||
@ -922,15 +992,22 @@ allOf:
|
||||
- qcom,apq8026
|
||||
- qcom,apq8094
|
||||
- qcom,apq8096
|
||||
- qcom,msm8939
|
||||
- qcom,msm8953
|
||||
- qcom,msm8956
|
||||
- qcom,msm8992
|
||||
- qcom,msm8994
|
||||
- qcom,msm8996
|
||||
- qcom,msm8998
|
||||
- qcom,sdm450
|
||||
- qcom,sdm630
|
||||
- qcom,sdm632
|
||||
- qcom,sdm636
|
||||
- qcom,sdm845
|
||||
- qcom,sdx55
|
||||
- qcom,sdx65
|
||||
- qcom,sm4250
|
||||
- qcom,sm6115
|
||||
- qcom,sm6125
|
||||
- qcom,sm6350
|
||||
- qcom,sm7225
|
||||
@ -954,6 +1031,8 @@ allOf:
|
||||
- oneplus,dumpling
|
||||
- oneplus,enchilada
|
||||
- oneplus,fajita
|
||||
- oneplus,oneplus3
|
||||
- oneplus,oneplus3t
|
||||
then:
|
||||
properties:
|
||||
qcom,board-id:
|
||||
|
@ -90,11 +90,33 @@ properties:
|
||||
- const: chipspark,rayeager-px2
|
||||
- const: rockchip,rk3066a
|
||||
|
||||
- description: Edgeble Neural Compute Module 2(Neu2) SoM based boards
|
||||
items:
|
||||
- const: edgeble,neural-compute-module-2-io # Edgeble Neural Compute Module 2 IO Board
|
||||
- const: edgeble,neural-compute-module-2 # Edgeble Neural Compute Module 2 SoM
|
||||
- const: rockchip,rv1126
|
||||
|
||||
- description: Edgeble Neural Compute Module 6(Neu6) Model A SoM based boards
|
||||
items:
|
||||
- const: edgeble,neural-compute-module-6a-io # Edgeble Neural Compute Module 6A IO Board
|
||||
- const: edgeble,neural-compute-module-6a # Edgeble Neural Compute Module 6A SoM
|
||||
- const: rockchip,rk3588
|
||||
|
||||
- description: Elgin RV1108 R1
|
||||
items:
|
||||
- const: elgin,rv1108-r1
|
||||
- const: rockchip,rv1108
|
||||
|
||||
- description: EmbedFire LubanCat 1
|
||||
items:
|
||||
- const: embedfire,lubancat-1
|
||||
- const: rockchip,rk3566
|
||||
|
||||
- description: EmbedFire LubanCat 2
|
||||
items:
|
||||
- const: embedfire,lubancat-2
|
||||
- const: rockchip,rk3568
|
||||
|
||||
- description: Engicam PX30.Core C.TOUCH 2.0
|
||||
items:
|
||||
- const: engicam,px30-core-ctouch2
|
||||
@ -599,6 +621,20 @@ properties:
|
||||
- const: pine64,soquartz
|
||||
- const: rockchip,rk3566
|
||||
|
||||
- description: Radxa Compute Module 3(CM3)
|
||||
items:
|
||||
- enum:
|
||||
- radxa,cm3-io
|
||||
- const: radxa,cm3
|
||||
- const: rockchip,rk3566
|
||||
|
||||
- description: Radxa CM3 Industrial
|
||||
items:
|
||||
- enum:
|
||||
- radxa,e25
|
||||
- const: radxa,cm3i
|
||||
- const: rockchip,rk3568
|
||||
|
||||
- description: Radxa Rock
|
||||
items:
|
||||
- const: radxa,rock
|
||||
@ -652,6 +688,16 @@ properties:
|
||||
- const: radxa,rock3a
|
||||
- const: rockchip,rk3568
|
||||
|
||||
- description: Radxa ROCK 5 Model A
|
||||
items:
|
||||
- const: radxa,rock-5a
|
||||
- const: rockchip,rk3588s
|
||||
|
||||
- description: Radxa ROCK 5 Model B
|
||||
items:
|
||||
- const: radxa,rock-5b
|
||||
- const: rockchip,rk3588
|
||||
|
||||
- description: Rikomagic MK808 v1
|
||||
items:
|
||||
- const: rikomagic,mk808
|
||||
@ -689,6 +735,11 @@ properties:
|
||||
- const: rockchip,rk3036-evb
|
||||
- const: rockchip,rk3036
|
||||
|
||||
- description: Rockchip RK3128 Evaluation board
|
||||
items:
|
||||
- const: rockchip,rk3128-evb
|
||||
- const: rockchip,rk3128
|
||||
|
||||
- description: Rockchip RK3228 Evaluation board
|
||||
items:
|
||||
- const: rockchip,rk3228-evb
|
||||
@ -736,6 +787,11 @@ properties:
|
||||
- const: rockchip,rk3399-sapphire-excavator
|
||||
- const: rockchip,rk3399
|
||||
|
||||
- description: Rockchip RK3588 Evaluation board
|
||||
items:
|
||||
- const: rockchip,rk3588-evb1-v10
|
||||
- const: rockchip,rk3588
|
||||
|
||||
- description: Rockchip RV1108 Evaluation board
|
||||
items:
|
||||
- const: rockchip,rv1108-evb
|
||||
@ -761,6 +817,11 @@ properties:
|
||||
- const: tronsmart,orion-r68-meta
|
||||
- const: rockchip,rk3368
|
||||
|
||||
- description: Xunlong Orange Pi R1 Plus
|
||||
items:
|
||||
- const: xunlong,orangepi-r1-plus
|
||||
- const: rockchip,rk3328
|
||||
|
||||
- description: Zkmagic A95X Z2
|
||||
items:
|
||||
- const: zkmagic,a95x-z2
|
||||
|
@ -27,6 +27,7 @@ select:
|
||||
- rockchip,rk3399-pmu
|
||||
- rockchip,rk3568-pmu
|
||||
- rockchip,rk3588-pmu
|
||||
- rockchip,rv1126-pmu
|
||||
|
||||
required:
|
||||
- compatible
|
||||
@ -43,6 +44,7 @@ properties:
|
||||
- rockchip,rk3399-pmu
|
||||
- rockchip,rk3568-pmu
|
||||
- rockchip,rk3588-pmu
|
||||
- rockchip,rv1126-pmu
|
||||
- const: syscon
|
||||
- const: simple-mfd
|
||||
|
||||
|
@ -171,6 +171,7 @@ properties:
|
||||
- hardkernel,odroid-xu3-lite # Hardkernel Odroid XU3 Lite
|
||||
- hardkernel,odroid-xu4 # Hardkernel Odroid XU4
|
||||
- hardkernel,odroid-hc1 # Hardkernel Odroid HC1
|
||||
- samsung,k3g # Samsung Galaxy S5 (SM-G900H)
|
||||
- const: samsung,exynos5800
|
||||
- const: samsung,exynos5
|
||||
|
||||
|
@ -38,10 +38,17 @@ properties:
|
||||
- ti,am642-sk
|
||||
- const: ti,am642
|
||||
|
||||
- description: K3 AM642 SoC PHYTEC phyBOARD-Electra
|
||||
items:
|
||||
- const: phytec,am642-phyboard-electra-rdk
|
||||
- const: phytec,am64-phycore-som
|
||||
- const: ti,am642
|
||||
|
||||
- description: K3 AM654 SoC
|
||||
items:
|
||||
- enum:
|
||||
- siemens,iot2050-advanced
|
||||
- siemens,iot2050-advanced-m2
|
||||
- siemens,iot2050-advanced-pg2
|
||||
- siemens,iot2050-basic
|
||||
- siemens,iot2050-basic-pg2
|
||||
@ -69,9 +76,17 @@ properties:
|
||||
- description: K3 J721s2 SoC
|
||||
items:
|
||||
- enum:
|
||||
- ti,am68-sk
|
||||
- ti,j721s2-evm
|
||||
- const: ti,j721s2
|
||||
|
||||
- description: K3 J784s4 SoC
|
||||
items:
|
||||
- enum:
|
||||
- ti,am69-sk
|
||||
- ti,j784s4-evm
|
||||
- const: ti,j784s4
|
||||
|
||||
additionalProperties: true
|
||||
|
||||
...
|
||||
|
@ -40,10 +40,10 @@ properties:
|
||||
maxItems: 1
|
||||
|
||||
"#address-cells":
|
||||
const: 1
|
||||
enum: [ 1, 2 ]
|
||||
|
||||
"#size-cells":
|
||||
const: 1
|
||||
enum: [ 1, 2 ]
|
||||
|
||||
ranges: true
|
||||
|
||||
|
@ -0,0 +1,51 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/clock/qcom,qdu1000-gcc.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm Global Clock & Reset Controller for QDU1000 and QRU1000
|
||||
|
||||
maintainers:
|
||||
- Melody Olvera <quic_molvera@quicinc.com>
|
||||
|
||||
description: |
|
||||
Qualcomm global clock control module which supports the clocks, resets and
|
||||
power domains on QDU1000 and QRU1000
|
||||
|
||||
See also:: include/dt-bindings/clock/qcom,qdu1000-gcc.h
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,qdu1000-gcc
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: Board XO source
|
||||
- description: Sleep clock source
|
||||
- description: PCIE 0 Pipe clock source
|
||||
- description: PCIE 0 Phy Auxiliary clock source
|
||||
- description: USB3 Phy wrapper pipe clock source
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- clocks
|
||||
|
||||
allOf:
|
||||
- $ref: qcom,gcc.yaml#
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/qcom,rpmh.h>
|
||||
clock-controller@100000 {
|
||||
compatible = "qcom,qdu1000-gcc";
|
||||
reg = <0x00100000 0x001f4200>;
|
||||
clocks = <&rpmhcc RPMH_CXO_CLK>, <&sleep_clk>,
|
||||
<&pcie_0_pipe_clk>, <&pcie_0_phy_aux_clk>,
|
||||
<&usb3_phy_wrapper_pipe_clk>;
|
||||
#clock-cells = <1>;
|
||||
#reset-cells = <1>;
|
||||
#power-domain-cells = <1>;
|
||||
};
|
@ -0,0 +1,49 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/clock/qcom,sm6350-camcc.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm Camera Clock & Reset Controller on SM6350
|
||||
|
||||
maintainers:
|
||||
- Konrad Dybcio <konrad.dybcio@linaro.org>
|
||||
|
||||
description: |
|
||||
Qualcomm camera clock control module provides the clocks, resets and power
|
||||
domains on SM6350.
|
||||
|
||||
See also:: include/dt-bindings/clock/qcom,sm6350-camcc.h
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6350-camcc
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: Board XO source
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- clocks
|
||||
|
||||
allOf:
|
||||
- $ref: qcom,gcc.yaml#
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/qcom,rpmh.h>
|
||||
clock-controller@ad00000 {
|
||||
compatible = "qcom,sm6350-camcc";
|
||||
reg = <0x0ad00000 0x16000>;
|
||||
clocks = <&rpmhcc RPMH_CXO_CLK>;
|
||||
#clock-cells = <1>;
|
||||
#reset-cells = <1>;
|
||||
#power-domain-cells = <1>;
|
||||
};
|
||||
...
|
105
Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml
Normal file
105
Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml
Normal file
@ -0,0 +1,105 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/clock/qcom,sm8550-dispcc.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm Display Clock & Reset Controller for SM8550
|
||||
|
||||
maintainers:
|
||||
- Bjorn Andersson <andersson@kernel.org>
|
||||
- Neil Armstrong <neil.armstrong@linaro.org>
|
||||
|
||||
description: |
|
||||
Qualcomm display clock control module provides the clocks, resets and power
|
||||
domains on SM8550.
|
||||
|
||||
See also:: include/dt-bindings/clock/qcom,sm8550-dispcc.h
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- qcom,sm8550-dispcc
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: Board XO source
|
||||
- description: Board Always On XO source
|
||||
- description: Display's AHB clock
|
||||
- description: sleep clock
|
||||
- description: Byte clock from DSI PHY0
|
||||
- description: Pixel clock from DSI PHY0
|
||||
- description: Byte clock from DSI PHY1
|
||||
- description: Pixel clock from DSI PHY1
|
||||
- description: Link clock from DP PHY0
|
||||
- description: VCO DIV clock from DP PHY0
|
||||
- description: Link clock from DP PHY1
|
||||
- description: VCO DIV clock from DP PHY1
|
||||
- description: Link clock from DP PHY2
|
||||
- description: VCO DIV clock from DP PHY2
|
||||
- description: Link clock from DP PHY3
|
||||
- description: VCO DIV clock from DP PHY3
|
||||
|
||||
'#clock-cells':
|
||||
const: 1
|
||||
|
||||
'#reset-cells':
|
||||
const: 1
|
||||
|
||||
'#power-domain-cells':
|
||||
const: 1
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
power-domains:
|
||||
description:
|
||||
A phandle and PM domain specifier for the MMCX power domain.
|
||||
maxItems: 1
|
||||
|
||||
required-opps:
|
||||
description:
|
||||
A phandle to an OPP node describing required MMCX performance point.
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- clocks
|
||||
- '#clock-cells'
|
||||
- '#reset-cells'
|
||||
- '#power-domain-cells'
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/qcom,sm8550-gcc.h>
|
||||
#include <dt-bindings/clock/qcom,rpmh.h>
|
||||
#include <dt-bindings/power/qcom-rpmpd.h>
|
||||
clock-controller@af00000 {
|
||||
compatible = "qcom,sm8550-dispcc";
|
||||
reg = <0x0af00000 0x10000>;
|
||||
clocks = <&rpmhcc RPMH_CXO_CLK>,
|
||||
<&rpmhcc RPMH_CXO_CLK_A>,
|
||||
<&gcc GCC_DISP_AHB_CLK>,
|
||||
<&sleep_clk>,
|
||||
<&dsi0_phy 0>,
|
||||
<&dsi0_phy 1>,
|
||||
<&dsi1_phy 0>,
|
||||
<&dsi1_phy 1>,
|
||||
<&dp0_phy 0>,
|
||||
<&dp0_phy 1>,
|
||||
<&dp1_phy 0>,
|
||||
<&dp1_phy 1>,
|
||||
<&dp2_phy 0>,
|
||||
<&dp2_phy 1>,
|
||||
<&dp3_phy 0>,
|
||||
<&dp3_phy 1>;
|
||||
#clock-cells = <1>;
|
||||
#reset-cells = <1>;
|
||||
#power-domain-cells = <1>;
|
||||
power-domains = <&rpmhpd SM8550_MMCX>;
|
||||
required-opps = <&rpmhpd_opp_low_svs>;
|
||||
};
|
||||
...
|
@ -0,0 +1,55 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/clock/qcom,sm8550-tcsr.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm TCSR Clock Controller on SM8550
|
||||
|
||||
maintainers:
|
||||
- Bjorn Andersson <andersson@kernel.org>
|
||||
|
||||
description: |
|
||||
Qualcomm TCSR clock control module provides the clocks, resets and
|
||||
power domains on SM8550
|
||||
|
||||
See also:: include/dt-bindings/clock/qcom,sm8550-tcsr.h
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: qcom,sm8550-tcsr
|
||||
- const: syscon
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: TCXO pad clock
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
'#clock-cells':
|
||||
const: 1
|
||||
|
||||
'#reset-cells':
|
||||
const: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- clocks
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/qcom,rpmh.h>
|
||||
|
||||
clock-controller@1fc0000 {
|
||||
compatible = "qcom,sm8550-tcsr", "syscon";
|
||||
reg = <0x1fc0000 0x30000>;
|
||||
clocks = <&rpmhcc RPMH_CXO_CLK>;
|
||||
#clock-cells = <1>;
|
||||
#reset-cells = <1>;
|
||||
};
|
||||
|
||||
...
|
@ -1,49 +0,0 @@
|
||||
* Samsung S3C2410 Clock Controller
|
||||
|
||||
The S3C2410 clock controller generates and supplies clock to various controllers
|
||||
within the SoC. The clock binding described here is applicable to the s3c2410,
|
||||
s3c2440 and s3c2442 SoCs in the s3c24x family.
|
||||
|
||||
Required Properties:
|
||||
|
||||
- compatible: should be one of the following.
|
||||
- "samsung,s3c2410-clock" - controller compatible with S3C2410 SoC.
|
||||
- "samsung,s3c2440-clock" - controller compatible with S3C2440 SoC.
|
||||
- "samsung,s3c2442-clock" - controller compatible with S3C2442 SoC.
|
||||
- reg: physical base address of the controller and length of memory mapped
|
||||
region.
|
||||
- #clock-cells: should be 1.
|
||||
|
||||
Each clock is assigned an identifier and client nodes can use this identifier
|
||||
to specify the clock which they consume. Some of the clocks are available only
|
||||
on a particular SoC.
|
||||
|
||||
All available clocks are defined as preprocessor macros in
|
||||
dt-bindings/clock/s3c2410.h header and can be used in device
|
||||
tree sources.
|
||||
|
||||
External clocks:
|
||||
|
||||
The xti clock used as input for the plls is generated outside the SoC. It is
|
||||
expected that is are defined using standard clock bindings with a
|
||||
clock-output-names value of "xti".
|
||||
|
||||
Example: Clock controller node:
|
||||
|
||||
clocks: clock-controller@4c000000 {
|
||||
compatible = "samsung,s3c2410-clock";
|
||||
reg = <0x4c000000 0x20>;
|
||||
#clock-cells = <1>;
|
||||
};
|
||||
|
||||
Example: UART controller node that consumes the clock generated by the clock
|
||||
controller (refer to the standard clock bindings for information about
|
||||
"clocks" and "clock-names" properties):
|
||||
|
||||
serial@50004000 {
|
||||
compatible = "samsung,s3c2440-uart";
|
||||
reg = <0x50004000 0x4000>;
|
||||
interrupts = <1 23 3 4>, <1 23 4 4>;
|
||||
clock-names = "uart", "clk_uart_baud2";
|
||||
clocks = <&clocks PCLK_UART0>, <&clocks PCLK_UART0>;
|
||||
};
|
@ -1,49 +0,0 @@
|
||||
* Samsung S3C2412 Clock Controller
|
||||
|
||||
The S3C2412 clock controller generates and supplies clock to various controllers
|
||||
within the SoC. The clock binding described here is applicable to the s3c2412
|
||||
and s3c2413 SoCs in the s3c24x family.
|
||||
|
||||
Required Properties:
|
||||
|
||||
- compatible: should be "samsung,s3c2412-clock"
|
||||
- reg: physical base address of the controller and length of memory mapped
|
||||
region.
|
||||
- #clock-cells: should be 1.
|
||||
|
||||
Each clock is assigned an identifier and client nodes can use this identifier
|
||||
to specify the clock which they consume. Some of the clocks are available only
|
||||
on a particular SoC.
|
||||
|
||||
All available clocks are defined as preprocessor macros in
|
||||
dt-bindings/clock/s3c2412.h header and can be used in device
|
||||
tree sources.
|
||||
|
||||
External clocks:
|
||||
|
||||
There are several clocks that are generated outside the SoC. It is expected
|
||||
that they are defined using standard clock bindings with following
|
||||
clock-output-names:
|
||||
- "xti" - crystal input - required,
|
||||
- "ext" - external clock source - optional,
|
||||
|
||||
Example: Clock controller node:
|
||||
|
||||
clocks: clock-controller@4c000000 {
|
||||
compatible = "samsung,s3c2412-clock";
|
||||
reg = <0x4c000000 0x20>;
|
||||
#clock-cells = <1>;
|
||||
};
|
||||
|
||||
Example: UART controller node that consumes the clock generated by the clock
|
||||
controller (refer to the standard clock bindings for information about
|
||||
"clocks" and "clock-names" properties):
|
||||
|
||||
serial@50004000 {
|
||||
compatible = "samsung,s3c2412-uart";
|
||||
reg = <0x50004000 0x4000>;
|
||||
interrupts = <1 23 3 4>, <1 23 4 4>;
|
||||
clock-names = "uart", "clk_uart_baud2", "clk_uart_baud3";
|
||||
clocks = <&clocks PCLK_UART0>, <&clocks PCLK_UART0>,
|
||||
<&clocks SCLK_UART>;
|
||||
};
|
@ -1,55 +0,0 @@
|
||||
* Samsung S3C2443 Clock Controller
|
||||
|
||||
The S3C2443 clock controller generates and supplies clock to various controllers
|
||||
within the SoC. The clock binding described here is applicable to all SoCs in
|
||||
the s3c24x family starting with the s3c2443.
|
||||
|
||||
Required Properties:
|
||||
|
||||
- compatible: should be one of the following.
|
||||
- "samsung,s3c2416-clock" - controller compatible with S3C2416 SoC.
|
||||
- "samsung,s3c2443-clock" - controller compatible with S3C2443 SoC.
|
||||
- "samsung,s3c2450-clock" - controller compatible with S3C2450 SoC.
|
||||
- reg: physical base address of the controller and length of memory mapped
|
||||
region.
|
||||
- #clock-cells: should be 1.
|
||||
|
||||
Each clock is assigned an identifier and client nodes can use this identifier
|
||||
to specify the clock which they consume. Some of the clocks are available only
|
||||
on a particular SoC.
|
||||
|
||||
All available clocks are defined as preprocessor macros in
|
||||
dt-bindings/clock/s3c2443.h header and can be used in device
|
||||
tree sources.
|
||||
|
||||
External clocks:
|
||||
|
||||
There are several clocks that are generated outside the SoC. It is expected
|
||||
that they are defined using standard clock bindings with following
|
||||
clock-output-names:
|
||||
- "xti" - crystal input - required,
|
||||
- "ext" - external clock source - optional,
|
||||
- "ext_i2s" - external I2S clock - optional,
|
||||
- "ext_uart" - external uart clock - optional,
|
||||
|
||||
Example: Clock controller node:
|
||||
|
||||
clocks: clock-controller@4c000000 {
|
||||
compatible = "samsung,s3c2416-clock";
|
||||
reg = <0x4c000000 0x40>;
|
||||
#clock-cells = <1>;
|
||||
};
|
||||
|
||||
Example: UART controller node that consumes the clock generated by the clock
|
||||
controller (refer to the standard clock bindings for information about
|
||||
"clocks" and "clock-names" properties):
|
||||
|
||||
serial@50004000 {
|
||||
compatible = "samsung,s3c2440-uart";
|
||||
reg = <0x50004000 0x4000>;
|
||||
interrupts = <1 23 3 4>, <1 23 4 4>;
|
||||
clock-names = "uart", "clk_uart_baud2",
|
||||
"clk_uart_baud3";
|
||||
clocks = <&clocks PCLK_UART0>, <&clocks PCLK_UART0>,
|
||||
<&clocks SCLK_UART>;
|
||||
};
|
@ -26,8 +26,13 @@ properties:
|
||||
items:
|
||||
- enum:
|
||||
- qcom,qdu1000-cpufreq-epss
|
||||
- qcom,sc7280-cpufreq-epss
|
||||
- qcom,sc8280xp-cpufreq-epss
|
||||
- qcom,sm6375-cpufreq-epss
|
||||
- qcom,sm8250-cpufreq-epss
|
||||
- qcom,sm8350-cpufreq-epss
|
||||
- qcom,sm8450-cpufreq-epss
|
||||
- qcom,sm8550-cpufreq-epss
|
||||
- const: qcom,cpufreq-epss
|
||||
|
||||
reg:
|
||||
@ -54,6 +59,17 @@ properties:
|
||||
- const: xo
|
||||
- const: alternate
|
||||
|
||||
interrupts:
|
||||
minItems: 1
|
||||
maxItems: 3
|
||||
|
||||
interrupt-names:
|
||||
minItems: 1
|
||||
items:
|
||||
- const: dcvsh-irq-0
|
||||
- const: dcvsh-irq-1
|
||||
- const: dcvsh-irq-2
|
||||
|
||||
'#freq-domain-cells':
|
||||
const: 1
|
||||
|
||||
|
@ -17,6 +17,9 @@ description: |
|
||||
on the CPU OPP in use. The CPUFreq driver sets the CPR power domain level
|
||||
according to the required OPPs defined in the CPU OPP tables.
|
||||
|
||||
For old implementation efuses are parsed to select the correct opp table and
|
||||
voltage and CPR is not supported/used.
|
||||
|
||||
select:
|
||||
properties:
|
||||
compatible:
|
||||
@ -33,37 +36,65 @@ select:
|
||||
required:
|
||||
- compatible
|
||||
|
||||
properties:
|
||||
cpus:
|
||||
type: object
|
||||
|
||||
patternProperties:
|
||||
'^cpu@[0-9a-f]+$':
|
||||
type: object
|
||||
|
||||
properties:
|
||||
power-domains:
|
||||
maxItems: 1
|
||||
|
||||
power-domain-names:
|
||||
items:
|
||||
- const: cpr
|
||||
|
||||
required:
|
||||
- power-domains
|
||||
- power-domain-names
|
||||
|
||||
patternProperties:
|
||||
'^opp-table(-[a-z0-9]+)?$':
|
||||
if:
|
||||
allOf:
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
const: operating-points-v2-kryo-cpu
|
||||
then:
|
||||
$ref: /schemas/opp/opp-v2-kryo-cpu.yaml#
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
const: operating-points-v2-qcom-level
|
||||
then:
|
||||
$ref: /schemas/opp/opp-v2-qcom-level.yaml#
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
allOf:
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
const: operating-points-v2-kryo-cpu
|
||||
contains:
|
||||
enum:
|
||||
- qcom,qcs404
|
||||
|
||||
then:
|
||||
properties:
|
||||
cpus:
|
||||
type: object
|
||||
|
||||
patternProperties:
|
||||
'^cpu@[0-9a-f]+$':
|
||||
type: object
|
||||
|
||||
properties:
|
||||
power-domains:
|
||||
maxItems: 1
|
||||
|
||||
power-domain-names:
|
||||
items:
|
||||
- const: cpr
|
||||
|
||||
required:
|
||||
- power-domains
|
||||
- power-domain-names
|
||||
|
||||
patternProperties:
|
||||
'^opp-?[0-9]+$':
|
||||
required:
|
||||
- required-opps
|
||||
'^opp-table(-[a-z0-9]+)?$':
|
||||
if:
|
||||
properties:
|
||||
compatible:
|
||||
const: operating-points-v2-kryo-cpu
|
||||
then:
|
||||
patternProperties:
|
||||
'^opp-?[0-9]+$':
|
||||
required:
|
||||
- required-opps
|
||||
|
||||
additionalProperties: true
|
||||
|
||||
|
@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
title: Atmel Advanced Encryption Standard (AES) HW cryptographic accelerator
|
||||
|
||||
maintainers:
|
||||
- Tudor Ambarus <tudor.ambarus@microchip.com>
|
||||
- Tudor Ambarus <tudor.ambarus@linaro.org>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
|
@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
title: Atmel Secure Hash Algorithm (SHA) HW cryptographic accelerator
|
||||
|
||||
maintainers:
|
||||
- Tudor Ambarus <tudor.ambarus@microchip.com>
|
||||
- Tudor Ambarus <tudor.ambarus@linaro.org>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
|
@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
title: Atmel Triple Data Encryption Standard (TDES) HW cryptographic accelerator
|
||||
|
||||
maintainers:
|
||||
- Tudor Ambarus <tudor.ambarus@microchip.com>
|
||||
- Tudor Ambarus <tudor.ambarus@linaro.org>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
|
@ -32,7 +32,7 @@ properties:
|
||||
- description: Display byte clock
|
||||
- description: Display byte interface clock
|
||||
- description: Display pixel clock
|
||||
- description: Display escape clock
|
||||
- description: Display core clock
|
||||
- description: Display AHB clock
|
||||
- description: Display AXI clock
|
||||
|
||||
@ -137,8 +137,6 @@ required:
|
||||
- phys
|
||||
- assigned-clocks
|
||||
- assigned-clock-parents
|
||||
- power-domains
|
||||
- operating-points-v2
|
||||
- ports
|
||||
|
||||
additionalProperties: false
|
||||
|
@ -69,7 +69,6 @@ required:
|
||||
- compatible
|
||||
- reg
|
||||
- reg-names
|
||||
- vdds-supply
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
|
@ -39,7 +39,6 @@ required:
|
||||
- compatible
|
||||
- reg
|
||||
- reg-names
|
||||
- vcca-supply
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
|
@ -34,6 +34,10 @@ properties:
|
||||
vddio-supply:
|
||||
description: Phandle to vdd-io regulator device node.
|
||||
|
||||
qcom,dsi-phy-regulator-ldo-mode:
|
||||
type: boolean
|
||||
description: Indicates if the LDO mode PHY regulator is wanted.
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
@ -72,7 +72,7 @@ examples:
|
||||
#include <dt-bindings/interconnect/qcom,qcm2290.h>
|
||||
#include <dt-bindings/power/qcom-rpmpd.h>
|
||||
|
||||
mdss@5e00000 {
|
||||
display-subsystem@5e00000 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "qcom,qcm2290-mdss";
|
||||
|
@ -62,7 +62,7 @@ examples:
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/power/qcom-rpmpd.h>
|
||||
|
||||
mdss@5e00000 {
|
||||
display-subsystem@5e00000 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "qcom,sm6115-mdss";
|
||||
|
@ -20,10 +20,10 @@ properties:
|
||||
- nvidia,tegra194-display
|
||||
|
||||
'#address-cells':
|
||||
const: 1
|
||||
enum: [ 1, 2 ]
|
||||
|
||||
'#size-cells':
|
||||
const: 1
|
||||
enum: [ 1, 2 ]
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
@ -16,7 +16,7 @@ properties:
|
||||
compatible:
|
||||
items:
|
||||
- enum:
|
||||
- renesas,i2c-r9a09g011 # RZ/V2M
|
||||
- renesas,r9a09g011-i2c # RZ/V2M
|
||||
- const: renesas,rzv2m-i2c
|
||||
|
||||
reg:
|
||||
@ -66,7 +66,7 @@ examples:
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
|
||||
i2c0: i2c@a4030000 {
|
||||
compatible = "renesas,i2c-r9a09g011", "renesas,rzv2m-i2c";
|
||||
compatible = "renesas,r9a09g011-i2c", "renesas,rzv2m-i2c";
|
||||
reg = <0xa4030000 0x80>;
|
||||
interrupts = <GIC_SPI 232 IRQ_TYPE_EDGE_RISING>,
|
||||
<GIC_SPI 236 IRQ_TYPE_EDGE_RISING>;
|
||||
|
@ -0,0 +1,70 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/interconnect/qcom,qdu1000-rpmh.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm RPMh Network-On-Chip Interconnect on QDU1000
|
||||
|
||||
maintainers:
|
||||
- Georgi Djakov <djakov@kernel.org>
|
||||
- Odelu Kukatla <quic_okukatla@quicinc.com>
|
||||
|
||||
description: |
|
||||
RPMh interconnect providers support system bandwidth requirements through
|
||||
RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is
|
||||
able to communicate with the BCM through the Resource State Coordinator (RSC)
|
||||
associated with each execution environment. Provider nodes must point to at
|
||||
least one RPMh device child node pertaining to their RSC and each provider
|
||||
can map to multiple RPMh resources.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- qcom,qdu1000-clk-virt
|
||||
- qcom,qdu1000-gem-noc
|
||||
- qcom,qdu1000-mc-virt
|
||||
- qcom,qdu1000-system-noc
|
||||
|
||||
'#interconnect-cells': true
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
allOf:
|
||||
- $ref: qcom,rpmh-common.yaml#
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,qdu1000-clk-virt
|
||||
- qcom,qdu1000-mc-virt
|
||||
then:
|
||||
properties:
|
||||
reg: false
|
||||
else:
|
||||
required:
|
||||
- reg
|
||||
|
||||
required:
|
||||
- compatible
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interconnect/qcom,qdu1000-rpmh.h>
|
||||
|
||||
system_noc: interconnect@1640000 {
|
||||
compatible = "qcom,qdu1000-system-noc";
|
||||
reg = <0x1640000 0x45080>;
|
||||
#interconnect-cells = <2>;
|
||||
qcom,bcm-voters = <&apps_bcm_voter>;
|
||||
};
|
||||
|
||||
clk_virt: interconnect-0 {
|
||||
compatible = "qcom,qdu1000-clk-virt";
|
||||
#interconnect-cells = <2>;
|
||||
qcom,bcm-voters = <&apps_bcm_voter>;
|
||||
};
|
@ -84,7 +84,6 @@ allOf:
|
||||
- qcom,msm8939-pcnoc
|
||||
- qcom,msm8939-snoc
|
||||
- qcom,msm8996-a1noc
|
||||
- qcom,msm8996-a2noc
|
||||
- qcom,msm8996-bimc
|
||||
- qcom,msm8996-cnoc
|
||||
- qcom,msm8996-pnoc
|
||||
@ -186,6 +185,29 @@ allOf:
|
||||
required:
|
||||
- power-domains
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,msm8996-a2noc
|
||||
|
||||
then:
|
||||
properties:
|
||||
clock-names:
|
||||
items:
|
||||
- const: bus
|
||||
- const: bus_a
|
||||
- const: aggre2_ufs_axi
|
||||
- const: ufs_axi
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: Bus Clock
|
||||
- description: Bus A Clock
|
||||
- description: Aggregate2 NoC UFS AXI Clock
|
||||
- description: UFS AXI Clock
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
|
@ -0,0 +1,139 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/interconnect/qcom,sm8550-rpmh.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm RPMh Network-On-Chip Interconnect on SM8550
|
||||
|
||||
maintainers:
|
||||
- Abel Vesa <abel.vesa@linaro.org>
|
||||
- Neil Armstrong <neil.armstrong@linaro.org>
|
||||
|
||||
description: |
|
||||
RPMh interconnect providers support system bandwidth requirements through
|
||||
RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is
|
||||
able to communicate with the BCM through the Resource State Coordinator (RSC)
|
||||
associated with each execution environment. Provider nodes must point to at
|
||||
least one RPMh device child node pertaining to their RSC and each provider
|
||||
can map to multiple RPMh resources.
|
||||
|
||||
See also:: include/dt-bindings/interconnect/qcom,sm8550-rpmh.h
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- qcom,sm8550-aggre1-noc
|
||||
- qcom,sm8550-aggre2-noc
|
||||
- qcom,sm8550-clk-virt
|
||||
- qcom,sm8550-cnoc-main
|
||||
- qcom,sm8550-config-noc
|
||||
- qcom,sm8550-gem-noc
|
||||
- qcom,sm8550-lpass-ag-noc
|
||||
- qcom,sm8550-lpass-lpiaon-noc
|
||||
- qcom,sm8550-lpass-lpicx-noc
|
||||
- qcom,sm8550-mc-virt
|
||||
- qcom,sm8550-mmss-noc
|
||||
- qcom,sm8550-nsp-noc
|
||||
- qcom,sm8550-pcie-anoc
|
||||
- qcom,sm8550-system-noc
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
clocks:
|
||||
minItems: 1
|
||||
maxItems: 2
|
||||
|
||||
allOf:
|
||||
- $ref: qcom,rpmh-common.yaml#
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,sm8550-clk-virt
|
||||
- qcom,sm8550-mc-virt
|
||||
then:
|
||||
properties:
|
||||
reg: false
|
||||
else:
|
||||
required:
|
||||
- reg
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,sm8550-pcie-anoc
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
items:
|
||||
- description: aggre-NOC PCIe AXI clock
|
||||
- description: cfg-NOC PCIe a-NOC AHB clock
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,sm8550-aggre1-noc
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
items:
|
||||
- description: aggre UFS PHY AXI clock
|
||||
- description: aggre USB3 PRIM AXI clock
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,sm8550-aggre2-noc
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
items:
|
||||
- description: RPMH CC IPA clock
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,sm8550-aggre1-noc
|
||||
- qcom,sm8550-aggre2-noc
|
||||
- qcom,sm8550-pcie-anoc
|
||||
then:
|
||||
required:
|
||||
- clocks
|
||||
else:
|
||||
properties:
|
||||
clocks: false
|
||||
|
||||
required:
|
||||
- compatible
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/qcom,sm8550-gcc.h>
|
||||
|
||||
clk_virt: interconnect-0 {
|
||||
compatible = "qcom,sm8550-clk-virt";
|
||||
#interconnect-cells = <2>;
|
||||
qcom,bcm-voters = <&apps_bcm_voter>;
|
||||
};
|
||||
|
||||
aggre1_noc: interconnect@16e0000 {
|
||||
compatible = "qcom,sm8550-aggre1-noc";
|
||||
reg = <0x016e0000 0x14400>;
|
||||
#interconnect-cells = <2>;
|
||||
clocks = <&gcc GCC_AGGRE_UFS_PHY_AXI_CLK>,
|
||||
<&gcc GCC_AGGRE_USB3_PRIM_AXI_CLK>;
|
||||
qcom,bcm-voters = <&apps_bcm_voter>;
|
||||
};
|
@ -108,7 +108,7 @@ properties:
|
||||
|
||||
msi-controller:
|
||||
description:
|
||||
Only present if the Message Based Interrupt functionnality is
|
||||
Only present if the Message Based Interrupt functionality is
|
||||
being exposed by the HW, and the mbi-ranges property present.
|
||||
|
||||
mbi-ranges:
|
||||
|
@ -1,53 +0,0 @@
|
||||
Samsung S3C24XX Interrupt Controllers
|
||||
|
||||
The S3C24XX SoCs contain a custom set of interrupt controllers providing a
|
||||
varying number of interrupt sources. The set consists of a main- and sub-
|
||||
controller and on newer SoCs even a second main controller.
|
||||
|
||||
Required properties:
|
||||
- compatible: Compatible property value should be "samsung,s3c2410-irq"
|
||||
for machines before s3c2416 and "samsung,s3c2416-irq" for s3c2416 and later.
|
||||
|
||||
- reg: Physical base address of the controller and length of memory mapped
|
||||
region.
|
||||
|
||||
- interrupt-controller : Identifies the node as an interrupt controller
|
||||
|
||||
- #interrupt-cells : Specifies the number of cells needed to encode an
|
||||
interrupt source. The value shall be 4 and interrupt descriptor shall
|
||||
have the following format:
|
||||
<ctrl_num parent_irq ctrl_irq type>
|
||||
|
||||
ctrl_num contains the controller to use:
|
||||
- 0 ... main controller
|
||||
- 1 ... sub controller
|
||||
- 2 ... second main controller on s3c2416 and s3c2450
|
||||
parent_irq contains the parent bit in the main controller and will be
|
||||
ignored in main controllers
|
||||
ctrl_irq contains the interrupt bit of the controller
|
||||
type contains the trigger type to use
|
||||
|
||||
Example:
|
||||
|
||||
interrupt-controller@4a000000 {
|
||||
compatible = "samsung,s3c2410-irq";
|
||||
reg = <0x4a000000 0x100>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells=<4>;
|
||||
};
|
||||
|
||||
[...]
|
||||
|
||||
serial@50000000 {
|
||||
compatible = "samsung,s3c2410-uart";
|
||||
reg = <0x50000000 0x4000>;
|
||||
interrupt-parent = <&subintc>;
|
||||
interrupts = <1 28 0 4>, <1 28 1 4>;
|
||||
};
|
||||
|
||||
rtc@57000000 {
|
||||
compatible = "samsung,s3c2410-rtc";
|
||||
reg = <0x57000000 0x100>;
|
||||
interrupt-parent = <&intc>;
|
||||
interrupts = <0 30 0 3>, <0 8 0 3>;
|
||||
};
|
@ -0,0 +1,45 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/memory-controllers/xlnx,zynqmp-ocmc-1.0.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Xilinx Zynqmp OCM(On-Chip Memory) Controller
|
||||
|
||||
maintainers:
|
||||
- Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
|
||||
- Sai Krishna Potthuri <sai.krishna.potthuri@amd.com>
|
||||
|
||||
description: |
|
||||
The OCM supports 64-bit wide ECC functionality to detect multi-bit errors
|
||||
and recover from a single-bit memory fault.On a write, if all bytes are
|
||||
being written, the ECC is generated and written into the ECC RAM along with
|
||||
the write-data that is written into the data RAM. If one or more bytes are
|
||||
not written, then the read operation results in an correctable error or
|
||||
uncorrectable error.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: xlnx,zynqmp-ocmc-1.0
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- interrupts
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
memory-controller@ff960000 {
|
||||
compatible = "xlnx,zynqmp-ocmc-1.0";
|
||||
reg = <0xff960000 0x1000>;
|
||||
interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
@ -64,12 +64,6 @@ properties:
|
||||
- rockchip,rk3568-qos
|
||||
- rockchip,rk3588-qos
|
||||
- rockchip,rv1126-qos
|
||||
- samsung,exynos3-sysreg
|
||||
- samsung,exynos4-sysreg
|
||||
- samsung,exynos5-sysreg
|
||||
- samsung,exynos5433-sysreg
|
||||
- samsung,exynos850-sysreg
|
||||
- samsung,exynosautov9-sysreg
|
||||
|
||||
- const: syscon
|
||||
|
||||
|
@ -1,42 +0,0 @@
|
||||
* Samsung's S3C24XX MMC/SD/SDIO controller device tree bindings
|
||||
|
||||
Samsung's S3C24XX MMC/SD/SDIO controller is used as a connectivity interface
|
||||
with external MMC, SD and SDIO storage mediums.
|
||||
|
||||
This file documents differences between the core mmc properties described by
|
||||
mmc.txt and the properties used by the Samsung S3C24XX MMC/SD/SDIO controller
|
||||
implementation.
|
||||
|
||||
Required SoC Specific Properties:
|
||||
- compatible: should be one of the following
|
||||
- "samsung,s3c2410-sdi": for controllers compatible with s3c2410
|
||||
- "samsung,s3c2412-sdi": for controllers compatible with s3c2412
|
||||
- "samsung,s3c2440-sdi": for controllers compatible with s3c2440
|
||||
- reg: register location and length
|
||||
- interrupts: mmc controller interrupt
|
||||
- clocks: Should reference the controller clock
|
||||
- clock-names: Should contain "sdi"
|
||||
|
||||
Required Board Specific Properties:
|
||||
- pinctrl-0: Should specify pin control groups used for this controller.
|
||||
- pinctrl-names: Should contain only one value - "default".
|
||||
|
||||
Optional Properties:
|
||||
- bus-width: number of data lines (see mmc.txt)
|
||||
- cd-gpios: gpio for card detection (see mmc.txt)
|
||||
- wp-gpios: gpio for write protection (see mmc.txt)
|
||||
|
||||
Example:
|
||||
|
||||
mmc0: mmc@5a000000 {
|
||||
compatible = "samsung,s3c2440-sdi";
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&sdi_pins>;
|
||||
reg = <0x5a000000 0x100000>;
|
||||
interrupts = <0 0 21 3>;
|
||||
clocks = <&clocks PCLK_SDI>;
|
||||
clock-names = "sdi";
|
||||
bus-width = <4>;
|
||||
cd-gpios = <&gpg 8 GPIO_ACTIVE_LOW>;
|
||||
wp-gpios = <&gph 8 GPIO_ACTIVE_LOW>;
|
||||
};
|
@ -40,6 +40,9 @@ properties:
|
||||
clock-names:
|
||||
const: stmmaceth
|
||||
|
||||
phy-supply:
|
||||
description: PHY regulator
|
||||
|
||||
syscon:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
description:
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user