Linux 5.1-rc1
-----BEGIN PGP SIGNATURE----- iQFSBAABCAA8FiEEq68RxlopcLEwq+PEeb4+QwBBGIYFAlyOup0eHHRvcnZhbGRz QGxpbnV4LWZvdW5kYXRpb24ub3JnAAoJEHm+PkMAQRiGHKoIAIKVuBSyD+m65TaM pjoAFa56weEc67Mmai2A84EOm0MVy9C6L7EOcOgVsJiLxDCYyWQ7xYwV2kceKJpW H5xauhb3+TxpxYeaeKdPPPHmBdejRwOPYvGAfnDMCqCCWQTad52sQUPCLI+yhF1t wgnuMi+SwNBWP9aYCXdFPK4fVhh27AcEAOEsRVCh4tIBH/wkf4GwrDr3IX1MFeMX jE/R43la4hu1swcWBsjkErWUasVPCgJSSQTfKDo9PQTVnoh0PHFp4fkOInVKLymQ 7AGo+Knc+1he+sFsB2IbZwea0xqtJtjtr1oC+at8gNx66qVG+o7UZNi5LR1uPW4Z 4+dwGBk= =pyXR -----END PGP SIGNATURE----- gpgsig -----BEGIN PGP SIGNATURE----- iQFHBAABCgAxFiEEreZoqmdXGLWf4p/qJNaLcl1Uh9AFAlyPfa8THGJyb29uaWVA a2VybmVsLm9yZwAKCRAk1otyXVSH0LsKB/9Qd7sfWgGVo4tk6uYwxOVOh8oEW6AU SB4iMlC6MJjrJkHfOXqE98Mf9+RHDty3a/F+t0A/eaNfoQVXFexpeOgPv098gCU5 gtq1N3x2Cr0JJxBpO4Sc3Pz0UH24sboNfPsJO4NTBGfdNwQvhId35lbYTSCKE3PC YzsmS31avg/NhXPJ6jhxxx0qOmoLemZ2bfj2j2JvLeKEewUkEVl0Hj2PKbSahFzB RssauFsLxeFpdbdzScEu9xvnFX3OJvcu/7/Cb/ji5+zH8brE9L+5yywtgyWxi/6i zNHFwqDTr3o7R3tV7aXUo8iGvMTsWBvbqvjY18zrXzpiZKr4ZMcW0BYk =NCbp -----END PGP SIGNATURE----- Merge tag 'v5.1-rc1' into asoc-5.1 Linux 5.1-rc1
@ -240,6 +240,7 @@ ForEachMacros:
|
||||
- 'for_each_set_bit'
|
||||
- 'for_each_set_bit_from'
|
||||
- 'for_each_sg'
|
||||
- 'for_each_sg_dma_page'
|
||||
- 'for_each_sg_page'
|
||||
- 'for_each_sibling_event'
|
||||
- '__for_each_thread'
|
||||
@ -289,7 +290,6 @@ ForEachMacros:
|
||||
- 'idr_for_each_entry_ul'
|
||||
- 'inet_bind_bucket_for_each'
|
||||
- 'inet_lhash2_for_each_icsk_rcu'
|
||||
- 'iov_for_each'
|
||||
- 'key_for_each'
|
||||
- 'key_for_each_safe'
|
||||
- 'klp_for_each_func'
|
||||
@ -360,6 +360,7 @@ ForEachMacros:
|
||||
- 'radix_tree_for_each_slot'
|
||||
- 'radix_tree_for_each_tagged'
|
||||
- 'rbtree_postorder_for_each_entry_safe'
|
||||
- 'rdma_for_each_port'
|
||||
- 'resource_list_for_each_entry'
|
||||
- 'resource_list_for_each_entry_safe'
|
||||
- 'rhl_for_each_entry_rcu'
|
||||
|
1
.mailmap
@ -123,6 +123,7 @@ Mark Brown <broonie@sirena.org.uk>
|
||||
Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
|
||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
|
||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
|
||||
Mathieu Othacehe <m.othacehe@gmail.com>
|
||||
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
|
||||
Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
|
||||
Matthew Wilcox <willy@infradead.org> <mawilcox@linuxonhyperv.com>
|
||||
|
22
CREDITS
@ -842,10 +842,9 @@ D: ax25-utils maintainer.
|
||||
|
||||
N: Helge Deller
|
||||
E: deller@gmx.de
|
||||
E: hdeller@redhat.de
|
||||
D: PA-RISC Linux hacker, LASI-, ASP-, WAX-, LCD/LED-driver
|
||||
S: Schimmelsrain 1
|
||||
S: D-69231 Rauenberg
|
||||
W: http://www.parisc-linux.org/
|
||||
D: PA-RISC Linux architecture maintainer
|
||||
D: LASI-, ASP-, WAX-, LCD/LED-driver
|
||||
S: Germany
|
||||
|
||||
N: Jean Delvare
|
||||
@ -1222,7 +1221,7 @@ S: Brazil
|
||||
|
||||
N: Oded Gabbay
|
||||
E: oded.gabbay@gmail.com
|
||||
D: AMD KFD maintainer
|
||||
D: HabanaLabs and AMD KFD maintainer
|
||||
S: 12 Shraga Raphaeli
|
||||
S: Petah-Tikva, 4906418
|
||||
S: Israel
|
||||
@ -1361,7 +1360,7 @@ S: Stellenbosch, Western Cape
|
||||
S: South Africa
|
||||
|
||||
N: Grant Grundler
|
||||
E: grundler@parisc-linux.org
|
||||
E: grantgrundler@gmail.com
|
||||
W: http://obmouse.sourceforge.net/
|
||||
W: http://www.parisc-linux.org/
|
||||
D: obmouse - rewrote Olivier Florent's Omnibook 600 "pop-up" mouse driver
|
||||
@ -2492,7 +2491,7 @@ S: Syracuse, New York 13206
|
||||
S: USA
|
||||
|
||||
N: Kyle McMartin
|
||||
E: kyle@parisc-linux.org
|
||||
E: kyle@mcmartin.ca
|
||||
D: Linux/PARISC hacker
|
||||
D: AD1889 sound driver
|
||||
S: Ottawa, Canada
|
||||
@ -3780,14 +3779,13 @@ S: 21513 Conradia Ct
|
||||
S: Cupertino, CA 95014
|
||||
S: USA
|
||||
|
||||
N: Thibaut Varene
|
||||
E: T-Bone@parisc-linux.org
|
||||
W: http://www.parisc-linux.org/~varenet/
|
||||
P: 1024D/B7D2F063 E67C 0D43 A75E 12A5 BB1C FA2F 1E32 C3DA B7D2 F063
|
||||
N: Thibaut Varène
|
||||
E: hacks+kernel@slashdirt.org
|
||||
W: http://hacks.slashdirt.org/
|
||||
D: PA-RISC port minion, PDC and GSCPS2 drivers, debuglocks and other bits
|
||||
D: Some ARM at91rm9200 bits, S1D13XXX FB driver, random patches here and there
|
||||
D: AD1889 sound driver
|
||||
S: Paris, France
|
||||
S: France
|
||||
|
||||
N: Heikki Vatiainen
|
||||
E: hessu@cs.tut.fi
|
||||
|
22
Documentation/ABI/obsolete/sysfs-class-dax
Normal file
@ -0,0 +1,22 @@
|
||||
What: /sys/class/dax/
|
||||
Date: May, 2016
|
||||
KernelVersion: v4.7
|
||||
Contact: linux-nvdimm@lists.01.org
|
||||
Description: Device DAX is the device-centric analogue of Filesystem
|
||||
DAX (CONFIG_FS_DAX). It allows memory ranges to be
|
||||
allocated and mapped without need of an intervening file
|
||||
system. Device DAX is strict, precise and predictable.
|
||||
Specifically this interface:
|
||||
|
||||
1/ Guarantees fault granularity with respect to a given
|
||||
page size (pte, pmd, or pud) set at configuration time.
|
||||
|
||||
2/ Enforces deterministic behavior by being strict about
|
||||
what fault scenarios are supported.
|
||||
|
||||
The /sys/class/dax/ interface enumerates all the
|
||||
device-dax instances in the system. The ABI is
|
||||
deprecated and will be removed after 2020. It is
|
||||
replaced with the DAX bus interface /sys/bus/dax/ where
|
||||
device-dax instances can be found under
|
||||
/sys/bus/dax/devices/
|
@ -146,3 +146,36 @@ KernelVersion: 4.16
|
||||
Contact: Stephen Hemminger <sthemmin@microsoft.com>
|
||||
Description: Binary file created by uio_hv_generic for ring buffer
|
||||
Users: Userspace drivers
|
||||
|
||||
What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/intr_in_full
|
||||
Date: February 2019
|
||||
KernelVersion: 5.0
|
||||
Contact: Michael Kelley <mikelley@microsoft.com>
|
||||
Description: Number of guest to host interrupts caused by the inbound ring
|
||||
buffer transitioning from full to not full while a packet is
|
||||
waiting for buffer space to become available
|
||||
Users: Debugging tools
|
||||
|
||||
What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/intr_out_empty
|
||||
Date: February 2019
|
||||
KernelVersion: 5.0
|
||||
Contact: Michael Kelley <mikelley@microsoft.com>
|
||||
Description: Number of guest to host interrupts caused by the outbound ring
|
||||
buffer transitioning from empty to not empty
|
||||
Users: Debugging tools
|
||||
|
||||
What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/out_full_first
|
||||
Date: February 2019
|
||||
KernelVersion: 5.0
|
||||
Contact: Michael Kelley <mikelley@microsoft.com>
|
||||
Description: Number of write operations that were the first to encounter an
|
||||
outbound ring buffer full condition
|
||||
Users: Debugging tools
|
||||
|
||||
What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/out_full_total
|
||||
Date: February 2019
|
||||
KernelVersion: 5.0
|
||||
Contact: Michael Kelley <mikelley@microsoft.com>
|
||||
Description: Total number of write operations that encountered an outbound
|
||||
ring buffer full condition
|
||||
Users: Debugging tools
|
||||
|
@ -21,10 +21,22 @@ Description: These files show with which CPLD versions have been burned
|
||||
The files are read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
|
||||
cpld3_version
|
||||
fan_dir
|
||||
|
||||
Date: December 2018
|
||||
KernelVersion: 5.0
|
||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
||||
Description: This file shows the system fans direction:
|
||||
forward direction - relevant bit is set 0;
|
||||
reversed direction - relevant bit is set 1.
|
||||
|
||||
The files are read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
|
||||
jtag_enable
|
||||
|
||||
Date: November 2018
|
||||
KernelVersion: 4.21
|
||||
KernelVersion: 5.0
|
||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
||||
Description: These files show with which CPLD versions have been burned
|
||||
on LED board.
|
||||
@ -35,7 +47,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
|
||||
jtag_enable
|
||||
|
||||
Date: November 2018
|
||||
KernelVersion: 4.21
|
||||
KernelVersion: 5.0
|
||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
||||
Description: These files enable and disable the access to the JTAG domain.
|
||||
By default access to the JTAG domain is disabled.
|
||||
@ -105,7 +117,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
|
||||
reset_voltmon_upgrade_fail
|
||||
|
||||
Date: November 2018
|
||||
KernelVersion: 4.21
|
||||
KernelVersion: 5.0
|
||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
||||
Description: These files show the system reset cause, as following: ComEx
|
||||
power fail, reset from ComEx, system platform reset, reset
|
||||
|
126
Documentation/ABI/testing/debugfs-driver-habanalabs
Normal file
@ -0,0 +1,126 @@
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/addr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Sets the device address to be used for read or write through
|
||||
PCI bar. The acceptable value is a string that starts with "0x"
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays a list with information about the currently allocated
|
||||
command buffers
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/command_submission
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays a list with information about the currently active
|
||||
command submissions
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/command_submission_jobs
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays a list with detailed information about each JOB (CB) of
|
||||
each active command submission
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/data32
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Allows the root user to read or write directly through the
|
||||
device's PCI bar. Writing to this file generates a write
|
||||
transaction while reading from the file generates a read
|
||||
transcation. This custom interface is needed (instead of using
|
||||
the generic Linux user-space PCI mapping) because the DDR bar
|
||||
is very small compared to the DDR memory and only the driver can
|
||||
move the bar before and after the transaction
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/device
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Enables the root user to set the device to specific state.
|
||||
Valid values are "disable", "enable", "suspend", "resume".
|
||||
User can read this property to see the valid values
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Sets I2C device address for I2C transaction that is generated
|
||||
by the device's CPU
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_bus
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Sets I2C bus address for I2C transaction that is generated by
|
||||
the device's CPU
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_data
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Triggers an I2C transaction that is generated by the device's
|
||||
CPU. Writing to this file generates a write transaction while
|
||||
reading from the file generates a read transcation
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Sets I2C register id for I2C transaction that is generated by
|
||||
the device's CPU
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/led0
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Sets the state of the first S/W led on the device
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/led1
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Sets the state of the second S/W led on the device
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/led2
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Sets the state of the third S/W led on the device
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/mmu
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the hop values and physical address for a given ASID
|
||||
and virtual address. The user should write the ASID and VA into
|
||||
the file and then read the file to get the result.
|
||||
e.g. to display info about VA 0x1000 for ASID 1 you need to do:
|
||||
echo "1 0x1000" > /sys/kernel/debug/habanalabs/hl0/mmu
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/set_power_state
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Sets the PCI power state. Valid values are "1" for D0 and "2"
|
||||
for D3Hot
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/userptr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays a list with information about the currently user
|
||||
pointers (user virtual addresses) that are pinned and mapped
|
||||
to DMA addresses
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/vm
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays a list with information about all the active virtual
|
||||
address mappings per ASID
|
23
Documentation/ABI/testing/debugfs-wilco-ec
Normal file
@ -0,0 +1,23 @@
|
||||
What: /sys/kernel/debug/wilco_ec/raw
|
||||
Date: January 2019
|
||||
KernelVersion: 5.1
|
||||
Description:
|
||||
Write and read raw mailbox commands to the EC.
|
||||
|
||||
For writing:
|
||||
Bytes 0-1 indicate the message type:
|
||||
00 F0 = Execute Legacy Command
|
||||
00 F2 = Read/Write NVRAM Property
|
||||
Byte 2 provides the command code
|
||||
Bytes 3+ consist of the data passed in the request
|
||||
|
||||
At least three bytes are required, for the msg type and command,
|
||||
with additional bytes optional for additional data.
|
||||
|
||||
Example:
|
||||
// Request EC info type 3 (EC firmware build date)
|
||||
$ echo 00 f0 38 00 03 00 > raw
|
||||
// View the result. The decoded ASCII result "12/21/18" is
|
||||
// included after the raw hex.
|
||||
$ cat raw
|
||||
00 31 32 2f 32 31 2f 31 38 00 38 00 01 00 2f 00 .12/21/18.8...
|
@ -1554,6 +1554,10 @@ What: /sys/bus/iio/devices/iio:deviceX/in_concentration_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_co2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_ethanol_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_ethanol_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_h2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_h2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_voc_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_voc_raw
|
||||
KernelVersion: 4.3
|
||||
@ -1684,4 +1688,19 @@ KernelVersion: 4.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Raw (unscaled) phase difference reading from channel Y
|
||||
that can be processed to radians.
|
||||
that can be processed to radians.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm1_input
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm1_input
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm2p5_input
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm2p5_input
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm4_input
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm4_input
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm10_input
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm10_input
|
||||
KernelVersion: 4.22
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Mass concentration reading of particulate matter in ug / m3.
|
||||
pmX consists of particles with aerodynamic diameter less or
|
||||
equal to X micrometers.
|
||||
|
28
Documentation/ABI/testing/sysfs-bus-iio-sps30
Normal file
@ -0,0 +1,28 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/start_cleaning
|
||||
Date: December 2018
|
||||
KernelVersion: 4.22
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Writing 1 starts sensor self cleaning. Internal fan accelerates
|
||||
to its maximum speed and keeps spinning for about 10 seconds in
|
||||
order to blow out accumulated dust.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/cleaning_period
|
||||
Date: January 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Sensor is capable of triggering self cleaning periodically.
|
||||
Period can be changed by writing a new value here. Upon reading
|
||||
the current one is returned. Units are seconds.
|
||||
|
||||
Writing 0 disables periodical self cleaning entirely.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/cleaning_period_available
|
||||
Date: January 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
The range of available values in seconds represented as the
|
||||
minimum value, the step and the maximum value, all enclosed in
|
||||
square brackets.
|
@ -3,11 +3,13 @@ Date: June 2015
|
||||
KernelVersion: 4.3
|
||||
Contact: Alexander Shishkin <alexander.shishkin@linux.intel.com>
|
||||
Description: (RW) Writes of 1 or 0 enable or disable trace output to this
|
||||
output device. Reads return current status.
|
||||
output device. Reads return current status. Requires that the
|
||||
correstponding output port driver be loaded.
|
||||
|
||||
What: /sys/bus/intel_th/devices/<intel_th_id>-msc<msc-id>/port
|
||||
Date: June 2015
|
||||
KernelVersion: 4.3
|
||||
Contact: Alexander Shishkin <alexander.shishkin@linux.intel.com>
|
||||
Description: (RO) Port number, corresponding to this output device on the
|
||||
switch (GTH).
|
||||
switch (GTH) or "unassigned" if the corresponding output
|
||||
port driver is not loaded.
|
||||
|
@ -186,7 +186,7 @@ Contact: Lan Tianyu <tianyu.lan@intel.com>
|
||||
Description:
|
||||
Some platforms provide usb port connect types through ACPI.
|
||||
This attribute is to expose these information to user space.
|
||||
The file will read "hotplug", "wired" and "not used" if the
|
||||
The file will read "hotplug", "hardwired" and "not used" if the
|
||||
information is available, and "unknown" otherwise.
|
||||
|
||||
What: /sys/bus/usb/devices/.../(hub interface)/portX/location
|
||||
|
32
Documentation/ABI/testing/sysfs-class-chromeos
Normal file
@ -0,0 +1,32 @@
|
||||
What: /sys/class/chromeos/<ec-device-name>/flashinfo
|
||||
Date: August 2015
|
||||
KernelVersion: 4.2
|
||||
Description:
|
||||
Show the EC flash information.
|
||||
|
||||
What: /sys/class/chromeos/<ec-device-name>/kb_wake_angle
|
||||
Date: March 2018
|
||||
KernelVersion: 4.17
|
||||
Description:
|
||||
Control the keyboard wake lid angle. Values are between
|
||||
0 and 360. This file will also show the keyboard wake lid
|
||||
angle by querying the hardware.
|
||||
|
||||
What: /sys/class/chromeos/<ec-device-name>/reboot
|
||||
Date: August 2015
|
||||
KernelVersion: 4.2
|
||||
Description:
|
||||
Tell the EC to reboot in various ways. Options are:
|
||||
"cancel": Cancel a pending reboot.
|
||||
"ro": Jump to RO without rebooting.
|
||||
"rw": Jump to RW without rebooting.
|
||||
"cold": Cold reboot.
|
||||
"disable-jump": Disable jump until next reboot.
|
||||
"hibernate": Hibernate the EC.
|
||||
"at-shutdown": Reboot after an AP shutdown.
|
||||
|
||||
What: /sys/class/chromeos/<ec-device-name>/version
|
||||
Date: August 2015
|
||||
KernelVersion: 4.2
|
||||
Description:
|
||||
Show the information about the EC software and hardware.
|
@ -0,0 +1,74 @@
|
||||
What: /sys/class/chromeos/<ec-device-name>/lightbar/brightness
|
||||
Date: August 2015
|
||||
KernelVersion: 4.2
|
||||
Description:
|
||||
Writing to this file adjusts the overall brightness of
|
||||
the lightbar, separate from any color intensity. The
|
||||
valid range is 0 (off) to 255 (maximum brightness).
|
||||
|
||||
What: /sys/class/chromeos/<ec-device-name>/lightbar/interval_msec
|
||||
Date: August 2015
|
||||
KernelVersion: 4.2
|
||||
Description:
|
||||
The lightbar is controlled by an embedded controller (EC),
|
||||
which also manages the keyboard, battery charging, fans,
|
||||
and other system hardware. To prevent unprivileged users
|
||||
from interfering with the other EC functions, the rate at
|
||||
which the lightbar control files can be read or written is
|
||||
limited.
|
||||
|
||||
Reading this file will return the number of milliseconds
|
||||
that must elapse between accessing any of the lightbar
|
||||
functions through this interface. Going faster will simply
|
||||
block until the necessary interval has lapsed. The interval
|
||||
applies uniformly to all accesses of any kind by any user.
|
||||
|
||||
What: /sys/class/chromeos/<ec-device-name>/lightbar/led_rgb
|
||||
Date: August 2015
|
||||
KernelVersion: 4.2
|
||||
Description:
|
||||
This allows you to control each LED segment. If the
|
||||
lightbar is already running one of the automatic
|
||||
sequences, you probably won’t see anything change because
|
||||
your color setting will be almost immediately replaced.
|
||||
To get useful results, you should stop the lightbar
|
||||
sequence first.
|
||||
|
||||
The values written to this file are sets of four integers,
|
||||
indicating LED, RED, GREEN, BLUE. The LED number is 0 to 3
|
||||
to select a single segment, or 4 to set all four segments
|
||||
to the same value at once. The RED, GREEN, and BLUE
|
||||
numbers should be in the range 0 (off) to 255 (maximum).
|
||||
You can update more than one segment at a time by writing
|
||||
more than one set of four integers.
|
||||
|
||||
What: /sys/class/chromeos/<ec-device-name>/lightbar/program
|
||||
Date: August 2015
|
||||
KernelVersion: 4.2
|
||||
Description:
|
||||
This allows you to upload and run custom lightbar sequences.
|
||||
|
||||
What: /sys/class/chromeos/<ec-device-name>/lightbar/sequence
|
||||
Date: August 2015
|
||||
KernelVersion: 4.2
|
||||
Description:
|
||||
The Pixel lightbar has a number of built-in sequences
|
||||
that it displays under various conditions, such as at
|
||||
power on, shut down, or while running. Reading from this
|
||||
file displays the current sequence that the lightbar is
|
||||
displaying. Writing to this file allows you to change the
|
||||
sequence.
|
||||
|
||||
What: /sys/class/chromeos/<ec-device-name>/lightbar/userspace_control
|
||||
Date: August 2015
|
||||
KernelVersion: 4.2
|
||||
Description:
|
||||
This allows you to take the control of the lightbar. This
|
||||
prevents the kernel from going through its normal
|
||||
sequences.
|
||||
|
||||
What: /sys/class/chromeos/<ec-device-name>/lightbar/version
|
||||
Date: August 2015
|
||||
KernelVersion: 4.2
|
||||
Description:
|
||||
Show the information about the lightbar version.
|
@ -0,0 +1,6 @@
|
||||
What: /sys/class/chromeos/<ec-device-name>/vbc/vboot_context
|
||||
Date: October 2015
|
||||
KernelVersion: 4.4
|
||||
Description:
|
||||
Read/write the verified boot context data included on a
|
||||
small nvram space on some EC implementations.
|
@ -7,55 +7,10 @@ Description:
|
||||
timer. It can do gradual dimming and step change of brightness.
|
||||
|
||||
The pattern is given by a series of tuples, of brightness and
|
||||
duration (ms). The LED is expected to traverse the series and
|
||||
each brightness value for the specified duration. Duration of
|
||||
0 means brightness should immediately change to new value, and
|
||||
writing malformed pattern deactivates any active one.
|
||||
duration (ms).
|
||||
|
||||
1. For gradual dimming, the dimming interval now is set as 50
|
||||
milliseconds. So the tuple with duration less than dimming
|
||||
interval (50ms) is treated as a step change of brightness,
|
||||
i.e. the subsequent brightness will be applied without adding
|
||||
intervening dimming intervals.
|
||||
|
||||
The gradual dimming format of the software pattern values should be:
|
||||
"brightness_1 duration_1 brightness_2 duration_2 brightness_3
|
||||
duration_3 ...". For example:
|
||||
|
||||
echo 0 1000 255 2000 > pattern
|
||||
|
||||
It will make the LED go gradually from zero-intensity to max (255)
|
||||
intensity in 1000 milliseconds, then back to zero intensity in 2000
|
||||
milliseconds:
|
||||
|
||||
LED brightness
|
||||
^
|
||||
255-| / \ / \ /
|
||||
| / \ / \ /
|
||||
| / \ / \ /
|
||||
| / \ / \ /
|
||||
0-| / \/ \/
|
||||
+---0----1----2----3----4----5----6------------> time (s)
|
||||
|
||||
2. To make the LED go instantly from one brightness value to another,
|
||||
we should use zero-time lengths (the brightness must be same as
|
||||
the previous tuple's). So the format should be:
|
||||
"brightness_1 duration_1 brightness_1 0 brightness_2 duration_2
|
||||
brightness_2 0 ...". For example:
|
||||
|
||||
echo 0 1000 0 0 255 2000 255 0 > pattern
|
||||
|
||||
It will make the LED stay off for one second, then stay at max brightness
|
||||
for two seconds:
|
||||
|
||||
LED brightness
|
||||
^
|
||||
255-| +---------+ +---------+
|
||||
| | | | |
|
||||
| | | | |
|
||||
| | | | |
|
||||
0-| -----+ +----+ +----
|
||||
+---0----1----2----3----4----5----6------------> time (s)
|
||||
The exact format is described in:
|
||||
Documentation/devicetree/bindings/leds/leds-trigger-pattern.txt
|
||||
|
||||
What: /sys/class/leds/<led>/hw_pattern
|
||||
Date: September 2018
|
||||
|
@ -49,3 +49,26 @@ Contact: Wim Van Sebroeck <wim@iguana.be>
|
||||
Description:
|
||||
It is a read only file. It is read to know about current
|
||||
value of timeout programmed.
|
||||
|
||||
What: /sys/class/watchdog/watchdogn/pretimeout
|
||||
Date: December 2016
|
||||
Contact: Wim Van Sebroeck <wim@iguana.be>
|
||||
Description:
|
||||
It is a read only file. It specifies the time in seconds before
|
||||
timeout when the pretimeout interrupt is delivered. Pretimeout
|
||||
is an optional feature.
|
||||
|
||||
What: /sys/class/watchdog/watchdogn/pretimeout_avaialable_governors
|
||||
Date: February 2017
|
||||
Contact: Wim Van Sebroeck <wim@iguana.be>
|
||||
Description:
|
||||
It is a read only file. It shows the pretimeout governors
|
||||
available for this watchdog.
|
||||
|
||||
What: /sys/class/watchdog/watchdogn/pretimeout_governor
|
||||
Date: February 2017
|
||||
Contact: Wim Van Sebroeck <wim@iguana.be>
|
||||
Description:
|
||||
It is a read/write file. When read, the currently assigned
|
||||
pretimeout governor is returned. When written, it sets
|
||||
the pretimeout governor.
|
||||
|
190
Documentation/ABI/testing/sysfs-driver-habanalabs
Normal file
@ -0,0 +1,190 @@
|
||||
What: /sys/class/habanalabs/hl<n>/armcp_kernel_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the Linux kernel running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/armcp_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the application running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/cpld_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the Device's CPLD F/W
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/device_type
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the code name of the device according to its type.
|
||||
The supported values are: "GOYA"
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/eeprom
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: A binary file attribute that contains the contents of the
|
||||
on-board EEPROM
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/fuse_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the device's version from the eFuse
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/hard_reset
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Interface to trigger a hard-reset operation for the device.
|
||||
Hard-reset will reset ALL internal components of the device
|
||||
except for the PCI interface and the internal PLLs
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/hard_reset_cnt
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays how many times the device have undergone a hard-reset
|
||||
operation since the driver was loaded
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/high_pll
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Allows the user to set the maximum clock frequency for MME, TPC
|
||||
and IC when the power management profile is set to "automatic".
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/ic_clk
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Allows the user to set the maximum clock frequency of the
|
||||
Interconnect fabric. Writes to this parameter affect the device
|
||||
only when the power management profile is set to "manual" mode.
|
||||
The device IC clock might be set to lower value then the
|
||||
maximum. The user should read the ic_clk_curr to see the actual
|
||||
frequency value of the IC
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/ic_clk_curr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the current clock frequency of the Interconnect fabric
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/infineon_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the Device's power supply F/W code
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/max_power
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Allows the user to set the maximum power consumption of the
|
||||
device in milliwatts.
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/mme_clk
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Allows the user to set the maximum clock frequency of the
|
||||
MME compute engine. Writes to this parameter affect the device
|
||||
only when the power management profile is set to "manual" mode.
|
||||
The device MME clock might be set to lower value then the
|
||||
maximum. The user should read the mme_clk_curr to see the actual
|
||||
frequency value of the MME
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/mme_clk_curr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the current clock frequency of the MME compute engine
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/pci_addr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the PCI address of the device. This is needed so the
|
||||
user would be able to open a device based on its PCI address
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/pm_mng_profile
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Power management profile. Values are "auto", "manual". In "auto"
|
||||
mode, the driver will set the maximum clock frequency to a high
|
||||
value when a user-space process opens the device's file (unless
|
||||
it was already opened by another process). The driver will set
|
||||
the max clock frequency to a low value when there are no user
|
||||
processes that are opened on the device's file. In "manual"
|
||||
mode, the user sets the maximum clock frequency by writing to
|
||||
ic_clk, mme_clk and tpc_clk
|
||||
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/preboot_btl_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the device's preboot F/W code
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/soft_reset
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Interface to trigger a soft-reset operation for the device.
|
||||
Soft-reset will reset only the compute and DMA engines of the
|
||||
device
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/soft_reset_cnt
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays how many times the device have undergone a soft-reset
|
||||
operation since the driver was loaded
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/status
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Status of the card: "Operational", "Malfunction", "In reset".
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/thermal_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the Device's thermal daemon
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/tpc_clk
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Allows the user to set the maximum clock frequency of the
|
||||
TPC compute engines. Writes to this parameter affect the device
|
||||
only when the power management profile is set to "manual" mode.
|
||||
The device TPC clock might be set to lower value then the
|
||||
maximum. The user should read the tpc_clk_curr to see the actual
|
||||
frequency value of the TPC
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/tpc_clk_curr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the current clock frequency of the TPC compute engines
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/uboot_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the u-boot running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/write_open_cnt
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the total number of user processes that are currently
|
||||
opened on the device's file
|
@ -109,3 +109,10 @@ Description:
|
||||
write operation (since a 4k random write might turn
|
||||
into a much larger write due to the zeroout
|
||||
operation).
|
||||
|
||||
What: /sys/fs/ext4/<disk>/journal_task
|
||||
Date: February 2019
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
This file is read-only and shows the pid of journal thread in
|
||||
current pid-namespace or 0 if task is unreachable.
|
||||
|
@ -86,6 +86,13 @@ Description:
|
||||
The unit size is one block, now only support configuring in range
|
||||
of [1, 512].
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/umount_discard_timeout
|
||||
Date: January 2019
|
||||
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
||||
Description:
|
||||
Set timeout to issue discard commands during umount.
|
||||
Default: 5 secs
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/max_victim_search
|
||||
Date: January 2014
|
||||
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
|
||||
|
@ -33,18 +33,6 @@ Description:
|
||||
An attribute which indicates whether the patch is currently in
|
||||
transition.
|
||||
|
||||
What: /sys/kernel/livepatch/<patch>/signal
|
||||
Date: Nov 2017
|
||||
KernelVersion: 4.15.0
|
||||
Contact: live-patching@vger.kernel.org
|
||||
Description:
|
||||
A writable attribute that allows administrator to affect the
|
||||
course of an existing transition. Writing 1 sends a fake
|
||||
signal to all remaining blocking tasks. The fake signal
|
||||
means that no proper signal is delivered (there is no data in
|
||||
signal pending structures). Tasks are interrupted or woken up,
|
||||
and forced to change their patched state.
|
||||
|
||||
What: /sys/kernel/livepatch/<patch>/force
|
||||
Date: Nov 2017
|
||||
KernelVersion: 4.15.0
|
||||
|
@ -146,114 +146,75 @@ What about block I/O and networking buffers? The block I/O and
|
||||
networking subsystems make sure that the buffers they use are valid
|
||||
for you to DMA from/to.
|
||||
|
||||
DMA addressing limitations
|
||||
DMA addressing capabilities
|
||||
==========================
|
||||
|
||||
Does your device have any DMA addressing limitations? For example, is
|
||||
your device only capable of driving the low order 24-bits of address?
|
||||
If so, you need to inform the kernel of this fact.
|
||||
By default, the kernel assumes that your device can address 32-bits of DMA
|
||||
addressing. For a 64-bit capable device, this needs to be increased, and for
|
||||
a device with limitations, it needs to be decreased.
|
||||
|
||||
By default, the kernel assumes that your device can address the full
|
||||
32-bits. For a 64-bit capable device, this needs to be increased.
|
||||
And for a device with limitations, as discussed in the previous
|
||||
paragraph, it needs to be decreased.
|
||||
Special note about PCI: PCI-X specification requires PCI-X devices to support
|
||||
64-bit addressing (DAC) for all transactions. And at least one platform (SGI
|
||||
SN2) requires 64-bit consistent allocations to operate correctly when the IO
|
||||
bus is in PCI-X mode.
|
||||
|
||||
Special note about PCI: PCI-X specification requires PCI-X devices to
|
||||
support 64-bit addressing (DAC) for all transactions. And at least
|
||||
one platform (SGI SN2) requires 64-bit consistent allocations to
|
||||
operate correctly when the IO bus is in PCI-X mode.
|
||||
For correct operation, you must set the DMA mask to inform the kernel about
|
||||
your devices DMA addressing capabilities.
|
||||
|
||||
For correct operation, you must interrogate the kernel in your device
|
||||
probe routine to see if the DMA controller on the machine can properly
|
||||
support the DMA addressing limitation your device has. It is good
|
||||
style to do this even if your device holds the default setting,
|
||||
because this shows that you did think about these issues wrt. your
|
||||
device.
|
||||
|
||||
The query is performed via a call to dma_set_mask_and_coherent()::
|
||||
This is performed via a call to dma_set_mask_and_coherent()::
|
||||
|
||||
int dma_set_mask_and_coherent(struct device *dev, u64 mask);
|
||||
|
||||
which will query the mask for both streaming and coherent APIs together.
|
||||
If you have some special requirements, then the following two separate
|
||||
queries can be used instead:
|
||||
which will set the mask for both streaming and coherent APIs together. If you
|
||||
have some special requirements, then the following two separate calls can be
|
||||
used instead:
|
||||
|
||||
The query for streaming mappings is performed via a call to
|
||||
The setup for streaming mappings is performed via a call to
|
||||
dma_set_mask()::
|
||||
|
||||
int dma_set_mask(struct device *dev, u64 mask);
|
||||
|
||||
The query for consistent allocations is performed via a call
|
||||
The setup for consistent allocations is performed via a call
|
||||
to dma_set_coherent_mask()::
|
||||
|
||||
int dma_set_coherent_mask(struct device *dev, u64 mask);
|
||||
|
||||
Here, dev is a pointer to the device struct of your device, and mask
|
||||
is a bit mask describing which bits of an address your device
|
||||
supports. It returns zero if your card can perform DMA properly on
|
||||
the machine given the address mask you provided. In general, the
|
||||
device struct of your device is embedded in the bus-specific device
|
||||
struct of your device. For example, &pdev->dev is a pointer to the
|
||||
device struct of a PCI device (pdev is a pointer to the PCI device
|
||||
struct of your device).
|
||||
Here, dev is a pointer to the device struct of your device, and mask is a bit
|
||||
mask describing which bits of an address your device supports. Often the
|
||||
device struct of your device is embedded in the bus-specific device struct of
|
||||
your device. For example, &pdev->dev is a pointer to the device struct of a
|
||||
PCI device (pdev is a pointer to the PCI device struct of your device).
|
||||
|
||||
If it returns non-zero, your device cannot perform DMA properly on
|
||||
this platform, and attempting to do so will result in undefined
|
||||
behavior. You must either use a different mask, or not use DMA.
|
||||
These calls usually return zero to indicated your device can perform DMA
|
||||
properly on the machine given the address mask you provided, but they might
|
||||
return an error if the mask is too small to be supportable on the given
|
||||
system. If it returns non-zero, your device cannot perform DMA properly on
|
||||
this platform, and attempting to do so will result in undefined behavior.
|
||||
You must not use DMA on this device unless the dma_set_mask family of
|
||||
functions has returned success.
|
||||
|
||||
This means that in the failure case, you have three options:
|
||||
This means that in the failure case, you have two options:
|
||||
|
||||
1) Use another DMA mask, if possible (see below).
|
||||
2) Use some non-DMA mode for data transfer, if possible.
|
||||
3) Ignore this device and do not initialize it.
|
||||
1) Use some non-DMA mode for data transfer, if possible.
|
||||
2) Ignore this device and do not initialize it.
|
||||
|
||||
It is recommended that your driver print a kernel KERN_WARNING message
|
||||
when you end up performing either #2 or #3. In this manner, if a user
|
||||
of your driver reports that performance is bad or that the device is not
|
||||
even detected, you can ask them for the kernel messages to find out
|
||||
exactly why.
|
||||
It is recommended that your driver print a kernel KERN_WARNING message when
|
||||
setting the DMA mask fails. In this manner, if a user of your driver reports
|
||||
that performance is bad or that the device is not even detected, you can ask
|
||||
them for the kernel messages to find out exactly why.
|
||||
|
||||
The standard 32-bit addressing device would do something like this::
|
||||
The standard 64-bit addressing device would do something like this::
|
||||
|
||||
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
|
||||
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
|
||||
dev_warn(dev, "mydev: No suitable DMA available\n");
|
||||
goto ignore_this_device;
|
||||
}
|
||||
|
||||
Another common scenario is a 64-bit capable device. The approach here
|
||||
is to try for 64-bit addressing, but back down to a 32-bit mask that
|
||||
should not fail. The kernel may fail the 64-bit mask not because the
|
||||
platform is not capable of 64-bit addressing. Rather, it may fail in
|
||||
this case simply because 32-bit addressing is done more efficiently
|
||||
than 64-bit addressing. For example, Sparc64 PCI SAC addressing is
|
||||
more efficient than DAC addressing.
|
||||
If the device only supports 32-bit addressing for descriptors in the
|
||||
coherent allocations, but supports full 64-bits for streaming mappings
|
||||
it would look like this:
|
||||
|
||||
Here is how you would handle a 64-bit capable device which can drive
|
||||
all 64-bits when accessing streaming DMA::
|
||||
|
||||
int using_dac;
|
||||
|
||||
if (!dma_set_mask(dev, DMA_BIT_MASK(64))) {
|
||||
using_dac = 1;
|
||||
} else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) {
|
||||
using_dac = 0;
|
||||
} else {
|
||||
dev_warn(dev, "mydev: No suitable DMA available\n");
|
||||
goto ignore_this_device;
|
||||
}
|
||||
|
||||
If a card is capable of using 64-bit consistent allocations as well,
|
||||
the case would look like this::
|
||||
|
||||
int using_dac, consistent_using_dac;
|
||||
|
||||
if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
|
||||
using_dac = 1;
|
||||
consistent_using_dac = 1;
|
||||
} else if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
|
||||
using_dac = 0;
|
||||
consistent_using_dac = 0;
|
||||
} else {
|
||||
if (dma_set_mask(dev, DMA_BIT_MASK(64))) {
|
||||
dev_warn(dev, "mydev: No suitable DMA available\n");
|
||||
goto ignore_this_device;
|
||||
}
|
||||
|
@ -195,6 +195,14 @@ Requesting the required mask does not alter the current mask. If you
|
||||
wish to take advantage of it, you should issue a dma_set_mask()
|
||||
call to set the mask to the value returned.
|
||||
|
||||
::
|
||||
|
||||
size_t
|
||||
dma_direct_max_mapping_size(struct device *dev);
|
||||
|
||||
Returns the maximum size of a mapping for the device. The size parameter
|
||||
of the mapping functions like dma_map_single(), dma_map_page() and
|
||||
others should not be larger than the returned value.
|
||||
|
||||
Part Id - Streaming DMA mappings
|
||||
--------------------------------
|
||||
@ -530,8 +538,8 @@ that simply cannot make consistent memory.
|
||||
dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle, unsigned long attrs)
|
||||
|
||||
Free memory allocated by the dma_alloc_attrs(). All parameters common
|
||||
parameters must identical to those otherwise passed to dma_fre_coherent,
|
||||
Free memory allocated by the dma_alloc_attrs(). All common
|
||||
parameters must be identical to those otherwise passed to dma_free_coherent,
|
||||
and the attrs argument must be identical to the attrs passed to
|
||||
dma_alloc_attrs().
|
||||
|
||||
@ -566,8 +574,7 @@ boundaries when doing this.
|
||||
|
||||
int
|
||||
dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
|
||||
dma_addr_t device_addr, size_t size, int
|
||||
flags)
|
||||
dma_addr_t device_addr, size_t size);
|
||||
|
||||
Declare region of memory to be handed out by dma_alloc_coherent() when
|
||||
it's asked for coherent memory for this device.
|
||||
@ -581,12 +588,6 @@ dma_addr_t in dma_alloc_coherent()).
|
||||
|
||||
size is the size of the area (must be multiples of PAGE_SIZE).
|
||||
|
||||
flags can be ORed together and are:
|
||||
|
||||
- DMA_MEMORY_EXCLUSIVE - only allocate memory from the declared regions.
|
||||
Do not allow dma_alloc_coherent() to fall back to system memory when
|
||||
it's out of memory in the declared region.
|
||||
|
||||
As a simplification for the platforms, only *one* such region of
|
||||
memory may be declared per device.
|
||||
|
||||
@ -605,23 +606,6 @@ unconditionally having removed all the required structures. It is the
|
||||
driver's job to ensure that no parts of this memory region are
|
||||
currently in use.
|
||||
|
||||
::
|
||||
|
||||
void *
|
||||
dma_mark_declared_memory_occupied(struct device *dev,
|
||||
dma_addr_t device_addr, size_t size)
|
||||
|
||||
This is used to occupy specific regions of the declared space
|
||||
(dma_alloc_coherent() will hand out the first free region it finds).
|
||||
|
||||
device_addr is the *device* address of the region requested.
|
||||
|
||||
size is the size (and should be a page-sized multiple).
|
||||
|
||||
The return value will be either a pointer to the processor virtual
|
||||
address of the memory, or an error (via PTR_ERR()) if any part of the
|
||||
region is occupied.
|
||||
|
||||
Part III - Debug drivers use of the DMA-API
|
||||
-------------------------------------------
|
||||
|
||||
@ -696,6 +680,9 @@ dma-api/disabled This read-only file contains the character 'Y'
|
||||
happen when it runs out of memory or if it was
|
||||
disabled at boot time
|
||||
|
||||
dma-api/dump This read-only file contains current DMA
|
||||
mappings.
|
||||
|
||||
dma-api/error_count This file is read-only and shows the total
|
||||
numbers of errors found.
|
||||
|
||||
@ -717,7 +704,7 @@ dma-api/num_free_entries The current number of free dma_debug_entries
|
||||
dma-api/nr_total_entries The total number of dma_debug_entries in the
|
||||
allocator, both free and used.
|
||||
|
||||
dma-api/driver-filter You can write a name of a driver into this file
|
||||
dma-api/driver_filter You can write a name of a driver into this file
|
||||
to limit the debug output to requests from that
|
||||
particular driver. Write an empty string to
|
||||
that file to disable the filter and see
|
||||
|
@ -52,8 +52,8 @@ Address translation
|
||||
-------------------
|
||||
|
||||
To translate the virtual address to a bus address, use the normal DMA
|
||||
API. Do _not_ use isa_virt_to_phys() even though it does the same
|
||||
thing. The reason for this is that the function isa_virt_to_phys()
|
||||
API. Do _not_ use isa_virt_to_bus() even though it does the same
|
||||
thing. The reason for this is that the function isa_virt_to_bus()
|
||||
will require a Kconfig dependency to ISA, not just ISA_DMA_API which
|
||||
is really all you need. Remember that even though the DMA controller
|
||||
has its origins in ISA it is used elsewhere.
|
||||
|
@ -328,13 +328,13 @@
|
||||
inkscape:window-height="1148"
|
||||
id="namedview90"
|
||||
showgrid="true"
|
||||
inkscape:zoom="0.80021373"
|
||||
inkscape:cx="462.49289"
|
||||
inkscape:cy="473.6718"
|
||||
inkscape:zoom="0.69092787"
|
||||
inkscape:cx="476.34085"
|
||||
inkscape:cy="712.80957"
|
||||
inkscape:window-x="770"
|
||||
inkscape:window-y="24"
|
||||
inkscape:window-maximized="0"
|
||||
inkscape:current-layer="g4114-9-3-9"
|
||||
inkscape:current-layer="g4"
|
||||
inkscape:snap-grids="false"
|
||||
fit-margin-top="5"
|
||||
fit-margin-right="5"
|
||||
@ -813,14 +813,18 @@
|
||||
<text
|
||||
sodipodi:linespacing="125%"
|
||||
id="text4110-5-7-6-2-4-0"
|
||||
y="841.88086"
|
||||
y="670.74316"
|
||||
x="1460.1007"
|
||||
style="font-size:267.24359131px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
|
||||
xml:space="preserve"><tspan
|
||||
y="841.88086"
|
||||
y="670.74316"
|
||||
x="1460.1007"
|
||||
sodipodi:role="line"
|
||||
id="tspan4925-1-2-4-5">reched_cpu()</tspan></text>
|
||||
id="tspan4925-1-2-4-5">Request</tspan><tspan
|
||||
y="1004.7976"
|
||||
x="1460.1007"
|
||||
sodipodi:role="line"
|
||||
id="tspan3100">context switch</tspan></text>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
@ -72,10 +72,10 @@ will ignore it because idle and offline CPUs are already residing
|
||||
in quiescent states.
|
||||
Otherwise, the expedited grace period will use
|
||||
<tt>smp_call_function_single()</tt> to send the CPU an IPI, which
|
||||
is handled by <tt>sync_rcu_exp_handler()</tt>.
|
||||
is handled by <tt>rcu_exp_handler()</tt>.
|
||||
|
||||
<p>
|
||||
However, because this is preemptible RCU, <tt>sync_rcu_exp_handler()</tt>
|
||||
However, because this is preemptible RCU, <tt>rcu_exp_handler()</tt>
|
||||
can check to see if the CPU is currently running in an RCU read-side
|
||||
critical section.
|
||||
If not, the handler can immediately report a quiescent state.
|
||||
@ -145,19 +145,18 @@ expedited grace period is shown in the following diagram:
|
||||
<p><img src="ExpSchedFlow.svg" alt="ExpSchedFlow.svg" width="55%">
|
||||
|
||||
<p>
|
||||
As with RCU-preempt's <tt>synchronize_rcu_expedited()</tt>,
|
||||
As with RCU-preempt, RCU-sched's
|
||||
<tt>synchronize_sched_expedited()</tt> ignores offline and
|
||||
idle CPUs, again because they are in remotely detectable
|
||||
quiescent states.
|
||||
However, the <tt>synchronize_rcu_expedited()</tt> handler
|
||||
is <tt>sync_sched_exp_handler()</tt>, and because the
|
||||
However, because the
|
||||
<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
|
||||
leave no trace of their invocation, in general it is not possible to tell
|
||||
whether or not the current CPU is in an RCU read-side critical section.
|
||||
The best that <tt>sync_sched_exp_handler()</tt> can do is to check
|
||||
The best that RCU-sched's <tt>rcu_exp_handler()</tt> can do is to check
|
||||
for idle, on the off-chance that the CPU went idle while the IPI
|
||||
was in flight.
|
||||
If the CPU is idle, then <tt>sync_sched_exp_handler()</tt> reports
|
||||
If the CPU is idle, then <tt>rcu_exp_handler()</tt> reports
|
||||
the quiescent state.
|
||||
|
||||
<p> Otherwise, the handler forces a future context switch by setting the
|
||||
@ -298,19 +297,18 @@ Instead, the task pushing the grace period forward will include the
|
||||
idle CPUs in the mask passed to <tt>rcu_report_exp_cpu_mult()</tt>.
|
||||
|
||||
<p>
|
||||
For RCU-sched, there is an additional check for idle in the IPI
|
||||
handler, <tt>sync_sched_exp_handler()</tt>.
|
||||
For RCU-sched, there is an additional check:
|
||||
If the IPI has interrupted the idle loop, then
|
||||
<tt>sync_sched_exp_handler()</tt> invokes <tt>rcu_report_exp_rdp()</tt>
|
||||
<tt>rcu_exp_handler()</tt> invokes <tt>rcu_report_exp_rdp()</tt>
|
||||
to report the corresponding quiescent state.
|
||||
|
||||
<p>
|
||||
For RCU-preempt, there is no specific check for idle in the
|
||||
IPI handler (<tt>sync_rcu_exp_handler()</tt>), but because
|
||||
IPI handler (<tt>rcu_exp_handler()</tt>), but because
|
||||
RCU read-side critical sections are not permitted within the
|
||||
idle loop, if <tt>sync_rcu_exp_handler()</tt> sees that the CPU is within
|
||||
idle loop, if <tt>rcu_exp_handler()</tt> sees that the CPU is within
|
||||
RCU read-side critical section, the CPU cannot possibly be idle.
|
||||
Otherwise, <tt>sync_rcu_exp_handler()</tt> invokes
|
||||
Otherwise, <tt>rcu_exp_handler()</tt> invokes
|
||||
<tt>rcu_report_exp_rdp()</tt> to report the corresponding quiescent
|
||||
state, regardless of whether or not that quiescent state was due to
|
||||
the CPU being idle.
|
||||
@ -625,6 +623,8 @@ checks, but only during the mid-boot dead zone.
|
||||
<p>
|
||||
With this refinement, synchronous grace periods can now be used from
|
||||
task context pretty much any time during the life of the kernel.
|
||||
That is, aside from some points in the suspend, hibernate, or shutdown
|
||||
code path.
|
||||
|
||||
<h3><a name="Summary">
|
||||
Summary</a></h3>
|
||||
|
@ -485,13 +485,13 @@ section that the grace period must wait on.
|
||||
noted by <tt>rcu_node_context_switch()</tt> on the left.
|
||||
On the other hand, if the CPU takes a scheduler-clock interrupt
|
||||
while executing in usermode, a quiescent state will be noted by
|
||||
<tt>rcu_check_callbacks()</tt> on the right.
|
||||
<tt>rcu_sched_clock_irq()</tt> on the right.
|
||||
Either way, the passage through a quiescent state will be noted
|
||||
in a per-CPU variable.
|
||||
|
||||
<p>The next time an <tt>RCU_SOFTIRQ</tt> handler executes on
|
||||
this CPU (for example, after the next scheduler-clock
|
||||
interrupt), <tt>__rcu_process_callbacks()</tt> will invoke
|
||||
interrupt), <tt>rcu_core()</tt> will invoke
|
||||
<tt>rcu_check_quiescent_state()</tt>, which will notice the
|
||||
recorded quiescent state, and invoke
|
||||
<tt>rcu_report_qs_rdp()</tt>.
|
||||
@ -651,7 +651,7 @@ to end.
|
||||
These callbacks are identified by <tt>rcu_advance_cbs()</tt>,
|
||||
which is usually invoked by <tt>__note_gp_changes()</tt>.
|
||||
As shown in the diagram below, this invocation can be triggered by
|
||||
the scheduling-clock interrupt (<tt>rcu_check_callbacks()</tt> on
|
||||
the scheduling-clock interrupt (<tt>rcu_sched_clock_irq()</tt> on
|
||||
the left) or by idle entry (<tt>rcu_cleanup_after_idle()</tt> on
|
||||
the right, but only for kernels build with
|
||||
<tt>CONFIG_RCU_FAST_NO_HZ=y</tt>).
|
||||
|
@ -349,7 +349,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_check_callbacks()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq()</text>
|
||||
<rect
|
||||
x="7069.6187"
|
||||
y="5087.4678"
|
||||
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
@ -3902,7 +3902,7 @@
|
||||
font-style="normal"
|
||||
y="-4418.6582"
|
||||
x="3745.7725"
|
||||
xml:space="preserve">rcu_check_callbacks()</text>
|
||||
xml:space="preserve">rcu_sched_clock_irq()</text>
|
||||
</g>
|
||||
<g
|
||||
transform="translate(-850.30204,55463.106)"
|
||||
@ -3924,7 +3924,7 @@
|
||||
font-style="normal"
|
||||
y="-4418.6582"
|
||||
x="3745.7725"
|
||||
xml:space="preserve">rcu_process_callbacks()</text>
|
||||
xml:space="preserve">rcu_core()</text>
|
||||
<text
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
|
||||
id="text202-7-5-3-27-0"
|
||||
@ -3933,7 +3933,7 @@
|
||||
font-style="normal"
|
||||
y="-4165.7954"
|
||||
x="3745.7725"
|
||||
xml:space="preserve">rcu_check_quiescent_state())</text>
|
||||
xml:space="preserve">rcu_check_quiescent_state()</text>
|
||||
<text
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
|
||||
id="text202-7-5-3-27-0-9"
|
||||
@ -4968,7 +4968,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-19"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_check_callbacks()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq()</text>
|
||||
<rect
|
||||
x="5314.2671"
|
||||
y="82817.688"
|
||||
|
Before Width: | Height: | Size: 209 KiB After Width: | Height: | Size: 209 KiB |
@ -775,7 +775,7 @@
|
||||
font-style="normal"
|
||||
y="-4418.6582"
|
||||
x="3745.7725"
|
||||
xml:space="preserve">rcu_check_callbacks()</text>
|
||||
xml:space="preserve">rcu_sched_clock_irq()</text>
|
||||
</g>
|
||||
<g
|
||||
transform="translate(399.7744,828.86448)"
|
||||
@ -797,7 +797,7 @@
|
||||
font-style="normal"
|
||||
y="-4418.6582"
|
||||
x="3745.7725"
|
||||
xml:space="preserve">rcu_process_callbacks()</text>
|
||||
xml:space="preserve">rcu_core()</text>
|
||||
<text
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
|
||||
id="text202-7-5-3-27-0"
|
||||
@ -806,7 +806,7 @@
|
||||
font-style="normal"
|
||||
y="-4165.7954"
|
||||
x="3745.7725"
|
||||
xml:space="preserve">rcu_check_quiescent_state())</text>
|
||||
xml:space="preserve">rcu_check_quiescent_state()</text>
|
||||
<text
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
|
||||
id="text202-7-5-3-27-0-9"
|
||||
|
Before Width: | Height: | Size: 43 KiB After Width: | Height: | Size: 43 KiB |
@ -3099,7 +3099,7 @@ If you block forever in one of a given domain's SRCU read-side critical
|
||||
sections, then that domain's grace periods will also be blocked forever.
|
||||
Of course, one good way to block forever is to deadlock, which can
|
||||
happen if any operation in a given domain's SRCU read-side critical
|
||||
section can block waiting, either directly or indirectly, for that domain's
|
||||
section can wait, either directly or indirectly, for that domain's
|
||||
grace period to elapse.
|
||||
For example, this results in a self-deadlock:
|
||||
|
||||
@ -3139,12 +3139,18 @@ API, which, in combination with <tt>srcu_read_unlock()</tt>,
|
||||
guarantees a full memory barrier.
|
||||
|
||||
<p>
|
||||
Also unlike other RCU flavors, SRCU's callbacks-wait function
|
||||
<tt>srcu_barrier()</tt> may be invoked from CPU-hotplug notifiers,
|
||||
though this is not necessarily a good idea.
|
||||
The reason that this is possible is that SRCU is insensitive
|
||||
to whether or not a CPU is online, which means that <tt>srcu_barrier()</tt>
|
||||
need not exclude CPU-hotplug operations.
|
||||
Also unlike other RCU flavors, <tt>synchronize_srcu()</tt> may <b>not</b>
|
||||
be invoked from CPU-hotplug notifiers, due to the fact that SRCU grace
|
||||
periods make use of timers and the possibility of timers being temporarily
|
||||
“stranded” on the outgoing CPU.
|
||||
This stranding of timers means that timers posted to the outgoing CPU
|
||||
will not fire until late in the CPU-hotplug process.
|
||||
The problem is that if a notifier is waiting on an SRCU grace period,
|
||||
that grace period is waiting on a timer, and that timer is stranded on the
|
||||
outgoing CPU, then the notifier will never be awakened, in other words,
|
||||
deadlock has occurred.
|
||||
This same situation of course also prohibits <tt>srcu_barrier()</tt>
|
||||
from being invoked from CPU-hotplug notifiers.
|
||||
|
||||
<p>
|
||||
SRCU also differs from other RCU flavors in that SRCU's expedited and
|
||||
|
@ -14,9 +14,9 @@ being the real world and all that.
|
||||
So let's look at an example RCU lockdep splat from 3.0-rc5, one that
|
||||
has long since been fixed:
|
||||
|
||||
===============================
|
||||
[ INFO: suspicious RCU usage. ]
|
||||
-------------------------------
|
||||
=============================
|
||||
WARNING: suspicious RCU usage
|
||||
-----------------------------
|
||||
block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!
|
||||
|
||||
other info that might help us debug this:
|
||||
@ -24,11 +24,11 @@ other info that might help us debug this:
|
||||
|
||||
rcu_scheduler_active = 1, debug_locks = 0
|
||||
3 locks held by scsi_scan_6/1552:
|
||||
#0: (&shost->scan_mutex){+.+.+.}, at: [<ffffffff8145efca>]
|
||||
#0: (&shost->scan_mutex){+.+.}, at: [<ffffffff8145efca>]
|
||||
scsi_scan_host_selected+0x5a/0x150
|
||||
#1: (&eq->sysfs_lock){+.+...}, at: [<ffffffff812a5032>]
|
||||
#1: (&eq->sysfs_lock){+.+.}, at: [<ffffffff812a5032>]
|
||||
elevator_exit+0x22/0x60
|
||||
#2: (&(&q->__queue_lock)->rlock){-.-...}, at: [<ffffffff812b6233>]
|
||||
#2: (&(&q->__queue_lock)->rlock){-.-.}, at: [<ffffffff812b6233>]
|
||||
cfq_exit_queue+0x43/0x190
|
||||
|
||||
stack backtrace:
|
||||
|
@ -219,17 +219,18 @@ an estimate of the total number of RCU callbacks queued across all CPUs
|
||||
In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed
|
||||
for each CPU:
|
||||
|
||||
0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 nonlazy_posted: 25 .D
|
||||
0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 Nonlazy posted: ..D
|
||||
|
||||
The "last_accelerate:" prints the low-order 16 bits (in hex) of the
|
||||
jiffies counter when this CPU last invoked rcu_try_advance_all_cbs()
|
||||
from rcu_needs_cpu() or last invoked rcu_accelerate_cbs() from
|
||||
rcu_prepare_for_idle(). The "nonlazy_posted:" prints the number
|
||||
of non-lazy callbacks posted since the last call to rcu_needs_cpu().
|
||||
Finally, an "L" indicates that there are currently no non-lazy callbacks
|
||||
("." is printed otherwise, as shown above) and "D" indicates that
|
||||
dyntick-idle processing is enabled ("." is printed otherwise, for example,
|
||||
if disabled via the "nohz=" kernel boot parameter).
|
||||
rcu_prepare_for_idle(). The "Nonlazy posted:" indicates lazy-callback
|
||||
status, so that an "l" indicates that all callbacks were lazy at the start
|
||||
of the last idle period and an "L" indicates that there are currently
|
||||
no non-lazy callbacks (in both cases, "." is printed otherwise, as
|
||||
shown above) and "D" indicates that dyntick-idle processing is enabled
|
||||
("." is printed otherwise, for example, if disabled via the "nohz="
|
||||
kernel boot parameter).
|
||||
|
||||
If the grace period ends just as the stall warning starts printing,
|
||||
there will be a spurious stall-warning message, which will include
|
||||
|
@ -10,173 +10,8 @@ status messages via printk(), which can be examined via the dmesg
|
||||
command (perhaps grepping for "torture"). The test is started
|
||||
when the module is loaded, and stops when the module is unloaded.
|
||||
|
||||
|
||||
MODULE PARAMETERS
|
||||
|
||||
This module has the following parameters:
|
||||
|
||||
fqs_duration Duration (in microseconds) of artificially induced bursts
|
||||
of force_quiescent_state() invocations. In RCU
|
||||
implementations having force_quiescent_state(), these
|
||||
bursts help force races between forcing a given grace
|
||||
period and that grace period ending on its own.
|
||||
|
||||
fqs_holdoff Holdoff time (in microseconds) between consecutive calls
|
||||
to force_quiescent_state() within a burst.
|
||||
|
||||
fqs_stutter Wait time (in seconds) between consecutive bursts
|
||||
of calls to force_quiescent_state().
|
||||
|
||||
gp_normal Make the fake writers use normal synchronous grace-period
|
||||
primitives.
|
||||
|
||||
gp_exp Make the fake writers use expedited synchronous grace-period
|
||||
primitives. If both gp_normal and gp_exp are set, or
|
||||
if neither gp_normal nor gp_exp are set, then randomly
|
||||
choose the primitive so that about 50% are normal and
|
||||
50% expedited. By default, neither are set, which
|
||||
gives best overall test coverage.
|
||||
|
||||
irqreader Says to invoke RCU readers from irq level. This is currently
|
||||
done via timers. Defaults to "1" for variants of RCU that
|
||||
permit this. (Or, more accurately, variants of RCU that do
|
||||
-not- permit this know to ignore this variable.)
|
||||
|
||||
n_barrier_cbs If this is nonzero, RCU barrier testing will be conducted,
|
||||
in which case n_barrier_cbs specifies the number of
|
||||
RCU callbacks (and corresponding kthreads) to use for
|
||||
this testing. The value cannot be negative. If you
|
||||
specify this to be non-zero when torture_type indicates a
|
||||
synchronous RCU implementation (one for which a member of
|
||||
the synchronize_rcu() rather than the call_rcu() family is
|
||||
used -- see the documentation for torture_type below), an
|
||||
error will be reported and no testing will be carried out.
|
||||
|
||||
nfakewriters This is the number of RCU fake writer threads to run. Fake
|
||||
writer threads repeatedly use the synchronous "wait for
|
||||
current readers" function of the interface selected by
|
||||
torture_type, with a delay between calls to allow for various
|
||||
different numbers of writers running in parallel.
|
||||
nfakewriters defaults to 4, which provides enough parallelism
|
||||
to trigger special cases caused by multiple writers, such as
|
||||
the synchronize_srcu() early return optimization.
|
||||
|
||||
nreaders This is the number of RCU reading threads supported.
|
||||
The default is twice the number of CPUs. Why twice?
|
||||
To properly exercise RCU implementations with preemptible
|
||||
read-side critical sections.
|
||||
|
||||
onoff_interval
|
||||
The number of seconds between each attempt to execute a
|
||||
randomly selected CPU-hotplug operation. Defaults to
|
||||
zero, which disables CPU hotplugging. In HOTPLUG_CPU=n
|
||||
kernels, rcutorture will silently refuse to do any
|
||||
CPU-hotplug operations regardless of what value is
|
||||
specified for onoff_interval.
|
||||
|
||||
onoff_holdoff The number of seconds to wait until starting CPU-hotplug
|
||||
operations. This would normally only be used when
|
||||
rcutorture was built into the kernel and started
|
||||
automatically at boot time, in which case it is useful
|
||||
in order to avoid confusing boot-time code with CPUs
|
||||
coming and going.
|
||||
|
||||
shuffle_interval
|
||||
The number of seconds to keep the test threads affinitied
|
||||
to a particular subset of the CPUs, defaults to 3 seconds.
|
||||
Used in conjunction with test_no_idle_hz.
|
||||
|
||||
shutdown_secs The number of seconds to run the test before terminating
|
||||
the test and powering off the system. The default is
|
||||
zero, which disables test termination and system shutdown.
|
||||
This capability is useful for automated testing.
|
||||
|
||||
stall_cpu The number of seconds that a CPU should be stalled while
|
||||
within both an rcu_read_lock() and a preempt_disable().
|
||||
This stall happens only once per rcutorture run.
|
||||
If you need multiple stalls, use modprobe and rmmod to
|
||||
repeatedly run rcutorture. The default for stall_cpu
|
||||
is zero, which prevents rcutorture from stalling a CPU.
|
||||
|
||||
Note that attempts to rmmod rcutorture while the stall
|
||||
is ongoing will hang, so be careful what value you
|
||||
choose for this module parameter! In addition, too-large
|
||||
values for stall_cpu might well induce failures and
|
||||
warnings in other parts of the kernel. You have been
|
||||
warned!
|
||||
|
||||
stall_cpu_holdoff
|
||||
The number of seconds to wait after rcutorture starts
|
||||
before stalling a CPU. Defaults to 10 seconds.
|
||||
|
||||
stat_interval The number of seconds between output of torture
|
||||
statistics (via printk()). Regardless of the interval,
|
||||
statistics are printed when the module is unloaded.
|
||||
Setting the interval to zero causes the statistics to
|
||||
be printed -only- when the module is unloaded, and this
|
||||
is the default.
|
||||
|
||||
stutter The length of time to run the test before pausing for this
|
||||
same period of time. Defaults to "stutter=5", so as
|
||||
to run and pause for (roughly) five-second intervals.
|
||||
Specifying "stutter=0" causes the test to run continuously
|
||||
without pausing, which is the old default behavior.
|
||||
|
||||
test_boost Whether or not to test the ability of RCU to do priority
|
||||
boosting. Defaults to "test_boost=1", which performs
|
||||
RCU priority-inversion testing only if the selected
|
||||
RCU implementation supports priority boosting. Specifying
|
||||
"test_boost=0" never performs RCU priority-inversion
|
||||
testing. Specifying "test_boost=2" performs RCU
|
||||
priority-inversion testing even if the selected RCU
|
||||
implementation does not support RCU priority boosting,
|
||||
which can be used to test rcutorture's ability to
|
||||
carry out RCU priority-inversion testing.
|
||||
|
||||
test_boost_interval
|
||||
The number of seconds in an RCU priority-inversion test
|
||||
cycle. Defaults to "test_boost_interval=7". It is
|
||||
usually wise for this value to be relatively prime to
|
||||
the value selected for "stutter".
|
||||
|
||||
test_boost_duration
|
||||
The number of seconds to do RCU priority-inversion testing
|
||||
within any given "test_boost_interval". Defaults to
|
||||
"test_boost_duration=4".
|
||||
|
||||
test_no_idle_hz Whether or not to test the ability of RCU to operate in
|
||||
a kernel that disables the scheduling-clock interrupt to
|
||||
idle CPUs. Boolean parameter, "1" to test, "0" otherwise.
|
||||
Defaults to omitting this test.
|
||||
|
||||
torture_type The type of RCU to test, with string values as follows:
|
||||
|
||||
"rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu(),
|
||||
along with expedited, synchronous, and polling
|
||||
variants.
|
||||
|
||||
"rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and
|
||||
call_rcu_bh(), along with expedited and synchronous
|
||||
variants.
|
||||
|
||||
"rcu_busted": This tests an intentionally incorrect version
|
||||
of RCU in order to help test rcutorture itself.
|
||||
|
||||
"srcu": srcu_read_lock(), srcu_read_unlock() and
|
||||
call_srcu(), along with expedited and
|
||||
synchronous variants.
|
||||
|
||||
"sched": preempt_disable(), preempt_enable(), and
|
||||
call_rcu_sched(), along with expedited,
|
||||
synchronous, and polling variants.
|
||||
|
||||
"tasks": voluntary context switch and call_rcu_tasks(),
|
||||
along with expedited and synchronous variants.
|
||||
|
||||
Defaults to "rcu".
|
||||
|
||||
verbose Enable debug printk()s. Default is disabled.
|
||||
|
||||
Module parameters are prefixed by "rcutorture." in
|
||||
Documentation/admin-guide/kernel-parameters.txt.
|
||||
|
||||
OUTPUT
|
||||
|
||||
|
@ -302,7 +302,7 @@ rcu_dereference()
|
||||
must prohibit. The rcu_dereference_protected() variant takes
|
||||
a lockdep expression to indicate which locks must be acquired
|
||||
by the caller. If the indicated protection is not provided,
|
||||
a lockdep splat is emitted. See RCU/Design/Requirements.html
|
||||
a lockdep splat is emitted. See RCU/Design/Requirements/Requirements.html
|
||||
and the API's code comments for more details and example usage.
|
||||
|
||||
The following diagram shows how each API communicates among the
|
||||
@ -560,7 +560,7 @@ presents two such "toy" implementations of RCU, one that is implemented
|
||||
in terms of familiar locking primitives, and another that more closely
|
||||
resembles "classic" RCU. Both are way too simple for real-world use,
|
||||
lacking both functionality and performance. However, they are useful
|
||||
in getting a feel for how RCU works. See kernel/rcupdate.c for a
|
||||
in getting a feel for how RCU works. See kernel/rcu/update.c for a
|
||||
production-quality implementation, and see:
|
||||
|
||||
http://www.rdrop.com/users/paulmck/RCU
|
||||
|
@ -23,7 +23,7 @@ kernel.
|
||||
|
||||
The resultant userspace tool binary is then located at:
|
||||
|
||||
tools/acpi/power/acpi/acpidbg/acpidbg
|
||||
tools/power/acpi/acpidbg
|
||||
|
||||
It can be installed to system directories by running "make install" (as a
|
||||
sufficiently privileged user).
|
||||
@ -35,7 +35,7 @@ kernel.
|
||||
|
||||
# mount -t debugfs none /sys/kernel/debug
|
||||
# modprobe acpi_dbg
|
||||
# tools/acpi/power/acpi/acpidbg/acpidbg
|
||||
# tools/power/acpi/acpidbg
|
||||
|
||||
That spawns the interactive AML debugger environment where you can execute
|
||||
debugger commands.
|
||||
|
@ -14,6 +14,10 @@ upgrade the ACPI execution environment that is defined by the ACPI tables
|
||||
via upgrading the ACPI tables provided by the BIOS with an instrumented,
|
||||
modified, more recent version one, or installing brand new ACPI tables.
|
||||
|
||||
When building initrd with kernel in a single image, option
|
||||
ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD should also be true for this
|
||||
feature to work.
|
||||
|
||||
For a full list of ACPI tables that can be upgraded/installed, take a look
|
||||
at the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in
|
||||
drivers/acpi/tables.c.
|
||||
|
107
Documentation/admin-guide/LSM/SafeSetID.rst
Normal file
@ -0,0 +1,107 @@
|
||||
=========
|
||||
SafeSetID
|
||||
=========
|
||||
SafeSetID is an LSM module that gates the setid family of syscalls to restrict
|
||||
UID/GID transitions from a given UID/GID to only those approved by a
|
||||
system-wide whitelist. These restrictions also prohibit the given UIDs/GIDs
|
||||
from obtaining auxiliary privileges associated with CAP_SET{U/G}ID, such as
|
||||
allowing a user to set up user namespace UID mappings.
|
||||
|
||||
|
||||
Background
|
||||
==========
|
||||
In absence of file capabilities, processes spawned on a Linux system that need
|
||||
to switch to a different user must be spawned with CAP_SETUID privileges.
|
||||
CAP_SETUID is granted to programs running as root or those running as a non-root
|
||||
user that have been explicitly given the CAP_SETUID runtime capability. It is
|
||||
often preferable to use Linux runtime capabilities rather than file
|
||||
capabilities, since using file capabilities to run a program with elevated
|
||||
privileges opens up possible security holes since any user with access to the
|
||||
file can exec() that program to gain the elevated privileges.
|
||||
|
||||
While it is possible to implement a tree of processes by giving full
|
||||
CAP_SET{U/G}ID capabilities, this is often at odds with the goals of running a
|
||||
tree of processes under non-root user(s) in the first place. Specifically,
|
||||
since CAP_SETUID allows changing to any user on the system, including the root
|
||||
user, it is an overpowered capability for what is needed in this scenario,
|
||||
especially since programs often only call setuid() to drop privileges to a
|
||||
lesser-privileged user -- not elevate privileges. Unfortunately, there is no
|
||||
generally feasible way in Linux to restrict the potential UIDs that a user can
|
||||
switch to through setuid() beyond allowing a switch to any user on the system.
|
||||
This SafeSetID LSM seeks to provide a solution for restricting setid
|
||||
capabilities in such a way.
|
||||
|
||||
The main use case for this LSM is to allow a non-root program to transition to
|
||||
other untrusted uids without full blown CAP_SETUID capabilities. The non-root
|
||||
program would still need CAP_SETUID to do any kind of transition, but the
|
||||
additional restrictions imposed by this LSM would mean it is a "safer" version
|
||||
of CAP_SETUID since the non-root program cannot take advantage of CAP_SETUID to
|
||||
do any unapproved actions (e.g. setuid to uid 0 or create/enter new user
|
||||
namespace). The higher level goal is to allow for uid-based sandboxing of system
|
||||
services without having to give out CAP_SETUID all over the place just so that
|
||||
non-root programs can drop to even-lesser-privileged uids. This is especially
|
||||
relevant when one non-root daemon on the system should be allowed to spawn other
|
||||
processes as different uids, but its undesirable to give the daemon a
|
||||
basically-root-equivalent CAP_SETUID.
|
||||
|
||||
|
||||
Other Approaches Considered
|
||||
===========================
|
||||
|
||||
Solve this problem in userspace
|
||||
-------------------------------
|
||||
For candidate applications that would like to have restricted setid capabilities
|
||||
as implemented in this LSM, an alternative option would be to simply take away
|
||||
setid capabilities from the application completely and refactor the process
|
||||
spawning semantics in the application (e.g. by using a privileged helper program
|
||||
to do process spawning and UID/GID transitions). Unfortunately, there are a
|
||||
number of semantics around process spawning that would be affected by this, such
|
||||
as fork() calls where the program doesn???t immediately call exec() after the
|
||||
fork(), parent processes specifying custom environment variables or command line
|
||||
args for spawned child processes, or inheritance of file handles across a
|
||||
fork()/exec(). Because of this, as solution that uses a privileged helper in
|
||||
userspace would likely be less appealing to incorporate into existing projects
|
||||
that rely on certain process-spawning semantics in Linux.
|
||||
|
||||
Use user namespaces
|
||||
-------------------
|
||||
Another possible approach would be to run a given process tree in its own user
|
||||
namespace and give programs in the tree setid capabilities. In this way,
|
||||
programs in the tree could change to any desired UID/GID in the context of their
|
||||
own user namespace, and only approved UIDs/GIDs could be mapped back to the
|
||||
initial system user namespace, affectively preventing privilege escalation.
|
||||
Unfortunately, it is not generally feasible to use user namespaces in isolation,
|
||||
without pairing them with other namespace types, which is not always an option.
|
||||
Linux checks for capabilities based off of the user namespace that ???owns??? some
|
||||
entity. For example, Linux has the notion that network namespaces are owned by
|
||||
the user namespace in which they were created. A consequence of this is that
|
||||
capability checks for access to a given network namespace are done by checking
|
||||
whether a task has the given capability in the context of the user namespace
|
||||
that owns the network namespace -- not necessarily the user namespace under
|
||||
which the given task runs. Therefore spawning a process in a new user namespace
|
||||
effectively prevents it from accessing the network namespace owned by the
|
||||
initial namespace. This is a deal-breaker for any application that expects to
|
||||
retain the CAP_NET_ADMIN capability for the purpose of adjusting network
|
||||
configurations. Using user namespaces in isolation causes problems regarding
|
||||
other system interactions, including use of pid namespaces and device creation.
|
||||
|
||||
Use an existing LSM
|
||||
-------------------
|
||||
None of the other in-tree LSMs have the capability to gate setid transitions, or
|
||||
even employ the security_task_fix_setuid hook at all. SELinux says of that hook:
|
||||
"Since setuid only affects the current process, and since the SELinux controls
|
||||
are not based on the Linux identity attributes, SELinux does not need to control
|
||||
this operation."
|
||||
|
||||
|
||||
Directions for use
|
||||
==================
|
||||
This LSM hooks the setid syscalls to make sure transitions are allowed if an
|
||||
applicable restriction policy is in place. Policies are configured through
|
||||
securityfs by writing to the safesetid/add_whitelist_policy and
|
||||
safesetid/flush_whitelist_policies files at the location where securityfs is
|
||||
mounted. The format for adding a policy is '<UID>:<UID>', using literal
|
||||
numbers, such as '123:456'. To flush the policies, any write to the file is
|
||||
sufficient. Again, configuring a policy for a UID will prevent that UID from
|
||||
obtaining auxiliary setid privileges, such as allowing a user to set up user
|
||||
namespace UID mappings.
|
@ -17,9 +17,8 @@ MAC extensions, other extensions can be built using the LSM to provide
|
||||
specific changes to system operation when these tweaks are not available
|
||||
in the core functionality of Linux itself.
|
||||
|
||||
Without a specific LSM built into the kernel, the default LSM will be the
|
||||
Linux capabilities system. Most LSMs choose to extend the capabilities
|
||||
system, building their checks on top of the defined capability hooks.
|
||||
The Linux capabilities modules will always be included. This may be
|
||||
followed by any number of "minor" modules and at most one "major" module.
|
||||
For more details on capabilities, see ``capabilities(7)`` in the Linux
|
||||
man-pages project.
|
||||
|
||||
@ -30,6 +29,14 @@ order in which checks are made. The capability module will always
|
||||
be first, followed by any "minor" modules (e.g. Yama) and then
|
||||
the one "major" module (e.g. SELinux) if there is one configured.
|
||||
|
||||
Process attributes associated with "major" security modules should
|
||||
be accessed and maintained using the special files in ``/proc/.../attr``.
|
||||
A security module may maintain a module specific subdirectory there,
|
||||
named after the module. ``/proc/.../attr/smack`` is provided by the Smack
|
||||
security module and contains all its special files. The files directly
|
||||
in ``/proc/.../attr`` remain as legacy interfaces for modules that provide
|
||||
subdirectories.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
@ -39,3 +46,4 @@ the one "major" module (e.g. SELinux) if there is one configured.
|
||||
Smack
|
||||
tomoyo
|
||||
Yama
|
||||
SafeSetID
|
||||
|
@ -1,9 +1,9 @@
|
||||
.. _readme:
|
||||
|
||||
Linux kernel release 4.x <http://kernel.org/>
|
||||
Linux kernel release 5.x <http://kernel.org/>
|
||||
=============================================
|
||||
|
||||
These are the release notes for Linux version 4. Read them carefully,
|
||||
These are the release notes for Linux version 5. Read them carefully,
|
||||
as they tell you what this is all about, explain how to install the
|
||||
kernel, and what to do if something goes wrong.
|
||||
|
||||
@ -63,7 +63,7 @@ Installing the kernel source
|
||||
directory where you have permissions (e.g. your home directory) and
|
||||
unpack it::
|
||||
|
||||
xz -cd linux-4.X.tar.xz | tar xvf -
|
||||
xz -cd linux-5.x.tar.xz | tar xvf -
|
||||
|
||||
Replace "X" with the version number of the latest kernel.
|
||||
|
||||
@ -72,26 +72,26 @@ Installing the kernel source
|
||||
files. They should match the library, and not get messed up by
|
||||
whatever the kernel-du-jour happens to be.
|
||||
|
||||
- You can also upgrade between 4.x releases by patching. Patches are
|
||||
- You can also upgrade between 5.x releases by patching. Patches are
|
||||
distributed in the xz format. To install by patching, get all the
|
||||
newer patch files, enter the top level directory of the kernel source
|
||||
(linux-4.X) and execute::
|
||||
(linux-5.x) and execute::
|
||||
|
||||
xz -cd ../patch-4.x.xz | patch -p1
|
||||
xz -cd ../patch-5.x.xz | patch -p1
|
||||
|
||||
Replace "x" for all versions bigger than the version "X" of your current
|
||||
Replace "x" for all versions bigger than the version "x" of your current
|
||||
source tree, **in_order**, and you should be ok. You may want to remove
|
||||
the backup files (some-file-name~ or some-file-name.orig), and make sure
|
||||
that there are no failed patches (some-file-name# or some-file-name.rej).
|
||||
If there are, either you or I have made a mistake.
|
||||
|
||||
Unlike patches for the 4.x kernels, patches for the 4.x.y kernels
|
||||
Unlike patches for the 5.x kernels, patches for the 5.x.y kernels
|
||||
(also known as the -stable kernels) are not incremental but instead apply
|
||||
directly to the base 4.x kernel. For example, if your base kernel is 4.0
|
||||
and you want to apply the 4.0.3 patch, you must not first apply the 4.0.1
|
||||
and 4.0.2 patches. Similarly, if you are running kernel version 4.0.2 and
|
||||
want to jump to 4.0.3, you must first reverse the 4.0.2 patch (that is,
|
||||
patch -R) **before** applying the 4.0.3 patch. You can read more on this in
|
||||
directly to the base 5.x kernel. For example, if your base kernel is 5.0
|
||||
and you want to apply the 5.0.3 patch, you must not first apply the 5.0.1
|
||||
and 5.0.2 patches. Similarly, if you are running kernel version 5.0.2 and
|
||||
want to jump to 5.0.3, you must first reverse the 5.0.2 patch (that is,
|
||||
patch -R) **before** applying the 5.0.3 patch. You can read more on this in
|
||||
:ref:`Documentation/process/applying-patches.rst <applying_patches>`.
|
||||
|
||||
Alternatively, the script patch-kernel can be used to automate this
|
||||
@ -114,7 +114,7 @@ Installing the kernel source
|
||||
Software requirements
|
||||
---------------------
|
||||
|
||||
Compiling and running the 4.x kernels requires up-to-date
|
||||
Compiling and running the 5.x kernels requires up-to-date
|
||||
versions of various software packages. Consult
|
||||
:ref:`Documentation/process/changes.rst <changes>` for the minimum version numbers
|
||||
required and how to get updates for these packages. Beware that using
|
||||
@ -132,12 +132,12 @@ Build directory for the kernel
|
||||
place for the output files (including .config).
|
||||
Example::
|
||||
|
||||
kernel source code: /usr/src/linux-4.X
|
||||
kernel source code: /usr/src/linux-5.x
|
||||
build directory: /home/name/build/kernel
|
||||
|
||||
To configure and build the kernel, use::
|
||||
|
||||
cd /usr/src/linux-4.X
|
||||
cd /usr/src/linux-5.x
|
||||
make O=/home/name/build/kernel menuconfig
|
||||
make O=/home/name/build/kernel
|
||||
sudo make O=/home/name/build/kernel modules_install install
|
||||
@ -251,7 +251,7 @@ Configuring the kernel
|
||||
Compiling the kernel
|
||||
--------------------
|
||||
|
||||
- Make sure you have at least gcc 3.2 available.
|
||||
- Make sure you have at least gcc 4.6 available.
|
||||
For more information, refer to :ref:`Documentation/process/changes.rst <changes>`.
|
||||
|
||||
Please note that you can still run a.out user programs with this kernel.
|
||||
|
@ -1189,6 +1189,10 @@ PAGE_SIZE multiple when read back.
|
||||
Amount of cached filesystem data that was modified and
|
||||
is currently being written back to disk
|
||||
|
||||
anon_thp
|
||||
Amount of memory used in anonymous mappings backed by
|
||||
transparent hugepages
|
||||
|
||||
inactive_anon, active_anon, inactive_file, active_file, unevictable
|
||||
Amount of memory, swap-backed and filesystem-backed,
|
||||
on the internal memory management lists used by the
|
||||
@ -1248,6 +1252,18 @@ PAGE_SIZE multiple when read back.
|
||||
|
||||
Amount of reclaimed lazyfree pages
|
||||
|
||||
thp_fault_alloc
|
||||
|
||||
Number of transparent hugepages which were allocated to satisfy
|
||||
a page fault, including COW faults. This counter is not present
|
||||
when CONFIG_TRANSPARENT_HUGEPAGE is not set.
|
||||
|
||||
thp_collapse_alloc
|
||||
|
||||
Number of transparent hugepages which were allocated to allow
|
||||
collapsing an existing range of pages. This counter is not
|
||||
present when CONFIG_TRANSPARENT_HUGEPAGE is not set.
|
||||
|
||||
memory.swap.current
|
||||
A read-only single value file which exists on non-root
|
||||
cgroups.
|
||||
@ -1503,7 +1519,7 @@ protected workload.
|
||||
|
||||
The limits are only applied at the peer level in the hierarchy. This means that
|
||||
in the diagram below, only groups A, B, and C will influence each other, and
|
||||
groups D and F will influence each other. Group G will influence nobody.
|
||||
groups D and F will influence each other. Group G will influence nobody::
|
||||
|
||||
[root]
|
||||
/ | \
|
||||
|
@ -461,6 +461,11 @@
|
||||
possible to determine what the correct size should be.
|
||||
This option provides an override for these situations.
|
||||
|
||||
carrier_timeout=
|
||||
[NET] Specifies amount of time (in seconds) that
|
||||
the kernel should wait for a network carrier. By default
|
||||
it waits 120 seconds.
|
||||
|
||||
ca_keys= [KEYS] This parameter identifies a specific key(s) on
|
||||
the system trusted keyring to be used for certificate
|
||||
trust validation.
|
||||
@ -910,6 +915,10 @@
|
||||
The filter can be disabled or changed to another
|
||||
driver later using sysfs.
|
||||
|
||||
driver_async_probe= [KNL]
|
||||
List of driver names to be probed asynchronously.
|
||||
Format: <driver_name1>,<driver_name2>...
|
||||
|
||||
drm.edid_firmware=[<connector>:]<file>[,[<connector>:]<file>]
|
||||
Broken monitors, graphic adapters, KVMs and EDIDless
|
||||
panels may send no or incorrect EDID data sets.
|
||||
@ -1073,9 +1082,15 @@
|
||||
specified address. The serial port must already be
|
||||
setup and configured. Options are not yet supported.
|
||||
|
||||
efifb,[options]
|
||||
Start an early, unaccelerated console on the EFI
|
||||
memory mapped framebuffer (if available). On cache
|
||||
coherent non-x86 systems that use system memory for
|
||||
the framebuffer, pass the 'ram' option so that it is
|
||||
mapped with the correct attributes.
|
||||
|
||||
earlyprintk= [X86,SH,ARM,M68k,S390]
|
||||
earlyprintk=vga
|
||||
earlyprintk=efi
|
||||
earlyprintk=sclp
|
||||
earlyprintk=xen
|
||||
earlyprintk=serial[,ttySn[,baudrate]]
|
||||
@ -1182,9 +1197,10 @@
|
||||
arch/x86/kernel/cpu/cpufreq/elanfreq.c.
|
||||
|
||||
elevator= [IOSCHED]
|
||||
Format: {"cfq" | "deadline" | "noop"}
|
||||
See Documentation/block/cfq-iosched.txt and
|
||||
Documentation/block/deadline-iosched.txt for details.
|
||||
Format: { "mq-deadline" | "kyber" | "bfq" }
|
||||
See Documentation/block/deadline-iosched.txt,
|
||||
Documentation/block/kyber-iosched.txt and
|
||||
Documentation/block/bfq-iosched.txt for details.
|
||||
|
||||
elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390]
|
||||
Specifies physical address of start of kernel core
|
||||
@ -1696,12 +1712,11 @@
|
||||
By default, super page will be supported if Intel IOMMU
|
||||
has the capability. With this option, super page will
|
||||
not be supported.
|
||||
sm_off [Default Off]
|
||||
By default, scalable mode will be supported if the
|
||||
sm_on [Default Off]
|
||||
By default, scalable mode will be disabled even if the
|
||||
hardware advertises that it has support for the scalable
|
||||
mode translation. With this option set, scalable mode
|
||||
will not be used even on hardware which claims to support
|
||||
it.
|
||||
will be used on hardware which claims to support it.
|
||||
tboot_noforce [Default Off]
|
||||
Do not force the Intel IOMMU enabled under tboot.
|
||||
By default, tboot will force Intel IOMMU on, which
|
||||
@ -1831,6 +1846,11 @@
|
||||
to let secondary kernels in charge of setting up
|
||||
LPIs.
|
||||
|
||||
irqchip.gicv3_pseudo_nmi= [ARM64]
|
||||
Enables support for pseudo-NMIs in the kernel. This
|
||||
requires the kernel to be built with
|
||||
CONFIG_ARM64_PSEUDO_NMI.
|
||||
|
||||
irqfixup [HW]
|
||||
When an interrupt is not handled search all handlers
|
||||
for it. Intended to get systems with badly broken
|
||||
@ -1982,6 +2002,12 @@
|
||||
Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
|
||||
the default is off.
|
||||
|
||||
kpti= [ARM64] Control page table isolation of user
|
||||
and kernel address spaces.
|
||||
Default: enabled on cores which need mitigation.
|
||||
0: force disabled
|
||||
1: force enabled
|
||||
|
||||
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
|
||||
Default is 0 (don't ignore, but inject #GP)
|
||||
|
||||
@ -2319,6 +2345,10 @@
|
||||
|
||||
lsm.debug [SECURITY] Enable LSM initialization debugging output.
|
||||
|
||||
lsm=lsm1,...,lsmN
|
||||
[SECURITY] Choose order of LSM initialization. This
|
||||
overrides CONFIG_LSM, and the "security=" parameter.
|
||||
|
||||
machvec= [IA-64] Force the use of a particular machine-vector
|
||||
(machvec) in a generic kernel.
|
||||
Example: machvec=hpzx1_swiotlb
|
||||
@ -3654,19 +3684,6 @@
|
||||
latencies, which will choose a value aligned
|
||||
with the appropriate hardware boundaries.
|
||||
|
||||
rcutree.jiffies_till_sched_qs= [KNL]
|
||||
Set required age in jiffies for a
|
||||
given grace period before RCU starts
|
||||
soliciting quiescent-state help from
|
||||
rcu_note_context_switch(). If not specified, the
|
||||
kernel will calculate a value based on the most
|
||||
recent settings of rcutree.jiffies_till_first_fqs
|
||||
and rcutree.jiffies_till_next_fqs.
|
||||
This calculated value may be viewed in
|
||||
rcutree.jiffies_to_sched_qs. Any attempt to
|
||||
set rcutree.jiffies_to_sched_qs will be
|
||||
cheerfully overwritten.
|
||||
|
||||
rcutree.jiffies_till_first_fqs= [KNL]
|
||||
Set delay from grace-period initialization to
|
||||
first attempt to force quiescent states.
|
||||
@ -3678,6 +3695,20 @@
|
||||
quiescent states. Units are jiffies, minimum
|
||||
value is one, and maximum value is HZ.
|
||||
|
||||
rcutree.jiffies_till_sched_qs= [KNL]
|
||||
Set required age in jiffies for a
|
||||
given grace period before RCU starts
|
||||
soliciting quiescent-state help from
|
||||
rcu_note_context_switch() and cond_resched().
|
||||
If not specified, the kernel will calculate
|
||||
a value based on the most recent settings
|
||||
of rcutree.jiffies_till_first_fqs
|
||||
and rcutree.jiffies_till_next_fqs.
|
||||
This calculated value may be viewed in
|
||||
rcutree.jiffies_to_sched_qs. Any attempt to set
|
||||
rcutree.jiffies_to_sched_qs will be cheerfully
|
||||
overwritten.
|
||||
|
||||
rcutree.kthread_prio= [KNL,BOOT]
|
||||
Set the SCHED_FIFO priority of the RCU per-CPU
|
||||
kthreads (rcuc/N). This value is also used for
|
||||
@ -3721,6 +3752,11 @@
|
||||
This wake_up() will be accompanied by a
|
||||
WARN_ONCE() splat and an ftrace_dump().
|
||||
|
||||
rcutree.sysrq_rcu= [KNL]
|
||||
Commandeer a sysrq key to dump out Tree RCU's
|
||||
rcu_node tree with an eye towards determining
|
||||
why a new grace period has not yet started.
|
||||
|
||||
rcuperf.gp_async= [KNL]
|
||||
Measure performance of asynchronous
|
||||
grace-period primitives such as call_rcu().
|
||||
@ -4090,11 +4126,9 @@
|
||||
Note: increases power consumption, thus should only be
|
||||
enabled if running jitter sensitive (HPC/RT) workloads.
|
||||
|
||||
security= [SECURITY] Choose a security module to enable at boot.
|
||||
If this boot parameter is not specified, only the first
|
||||
security module asking for security registration will be
|
||||
loaded. An invalid security module name will be treated
|
||||
as if no module has been chosen.
|
||||
security= [SECURITY] Choose a legacy "major" security module to
|
||||
enable at boot. This has been deprecated by the
|
||||
"lsm=" parameter.
|
||||
|
||||
selinux= [SELINUX] Disable or enable SELinux at boot time.
|
||||
Format: { "0" | "1" }
|
||||
@ -4698,7 +4732,8 @@
|
||||
usbcore.authorized_default=
|
||||
[USB] Default USB device authorization:
|
||||
(default -1 = authorized except for wireless USB,
|
||||
0 = not authorized, 1 = authorized)
|
||||
0 = not authorized, 1 = authorized, 2 = authorized
|
||||
if device connected to internal port)
|
||||
|
||||
usbcore.autosuspend=
|
||||
[USB] The autosuspend time delay (in seconds) used
|
||||
@ -5043,6 +5078,14 @@
|
||||
or other driver-specific files in the
|
||||
Documentation/watchdog/ directory.
|
||||
|
||||
watchdog_thresh=
|
||||
[KNL]
|
||||
Set the hard lockup detector stall duration
|
||||
threshold in seconds. The soft lockup detector
|
||||
threshold is set to twice the value. A value of 0
|
||||
disables both lockup detectors. Default is 10
|
||||
seconds.
|
||||
|
||||
workqueue.watchdog_thresh=
|
||||
If CONFIG_WQ_WATCHDOG is configured, workqueue can
|
||||
warn stall conditions and dump internal state to
|
||||
|
@ -756,3 +756,6 @@ These currently include:
|
||||
The cache mode for raid5. raid5 could include an extra disk for
|
||||
caching. The mode can be "write-throuth" and "write-back". The
|
||||
default is "write-through".
|
||||
|
||||
ppl_write_hint
|
||||
NVMe stream ID to be set for each PPL write request.
|
||||
|
@ -75,9 +75,10 @@ number of times a page is mapped.
|
||||
20. NOPAGE
|
||||
21. KSM
|
||||
22. THP
|
||||
23. BALLOON
|
||||
23. OFFLINE
|
||||
24. ZERO_PAGE
|
||||
25. IDLE
|
||||
26. PGTABLE
|
||||
|
||||
* ``/proc/kpagecgroup``. This file contains a 64-bit inode number of the
|
||||
memory cgroup each page is charged to, indexed by PFN. Only available when
|
||||
@ -118,8 +119,8 @@ Short descriptions to the page flags
|
||||
identical memory pages dynamically shared between one or more processes
|
||||
22 - THP
|
||||
contiguous pages which construct transparent hugepages
|
||||
23 - BALLOON
|
||||
balloon compaction page
|
||||
23 - OFFLINE
|
||||
page is logically offline
|
||||
24 - ZERO_PAGE
|
||||
zero page for pfn_zero or huge_zero page
|
||||
25 - IDLE
|
||||
@ -128,6 +129,8 @@ Short descriptions to the page flags
|
||||
Note that this flag may be stale in case the page was accessed via
|
||||
a PTE. To make sure the flag is up-to-date one has to read
|
||||
``/sys/kernel/mm/page_idle/bitmap`` first.
|
||||
26 - PGTABLE
|
||||
page is in use as a page table
|
||||
|
||||
IO related page flags
|
||||
---------------------
|
||||
|
@ -6,83 +6,211 @@ Perf Events and tool security
|
||||
Overview
|
||||
--------
|
||||
|
||||
Usage of Performance Counters for Linux (perf_events) [1]_ , [2]_ , [3]_ can
|
||||
impose a considerable risk of leaking sensitive data accessed by monitored
|
||||
processes. The data leakage is possible both in scenarios of direct usage of
|
||||
perf_events system call API [2]_ and over data files generated by Perf tool user
|
||||
mode utility (Perf) [3]_ , [4]_ . The risk depends on the nature of data that
|
||||
perf_events performance monitoring units (PMU) [2]_ collect and expose for
|
||||
performance analysis. Having that said perf_events/Perf performance monitoring
|
||||
is the subject for security access control management [5]_ .
|
||||
Usage of Performance Counters for Linux (perf_events) [1]_ , [2]_ , [3]_
|
||||
can impose a considerable risk of leaking sensitive data accessed by
|
||||
monitored processes. The data leakage is possible both in scenarios of
|
||||
direct usage of perf_events system call API [2]_ and over data files
|
||||
generated by Perf tool user mode utility (Perf) [3]_ , [4]_ . The risk
|
||||
depends on the nature of data that perf_events performance monitoring
|
||||
units (PMU) [2]_ and Perf collect and expose for performance analysis.
|
||||
Collected system and performance data may be split into several
|
||||
categories:
|
||||
|
||||
1. System hardware and software configuration data, for example: a CPU
|
||||
model and its cache configuration, an amount of available memory and
|
||||
its topology, used kernel and Perf versions, performance monitoring
|
||||
setup including experiment time, events configuration, Perf command
|
||||
line parameters, etc.
|
||||
|
||||
2. User and kernel module paths and their load addresses with sizes,
|
||||
process and thread names with their PIDs and TIDs, timestamps for
|
||||
captured hardware and software events.
|
||||
|
||||
3. Content of kernel software counters (e.g., for context switches, page
|
||||
faults, CPU migrations), architectural hardware performance counters
|
||||
(PMC) [8]_ and machine specific registers (MSR) [9]_ that provide
|
||||
execution metrics for various monitored parts of the system (e.g.,
|
||||
memory controller (IMC), interconnect (QPI/UPI) or peripheral (PCIe)
|
||||
uncore counters) without direct attribution to any execution context
|
||||
state.
|
||||
|
||||
4. Content of architectural execution context registers (e.g., RIP, RSP,
|
||||
RBP on x86_64), process user and kernel space memory addresses and
|
||||
data, content of various architectural MSRs that capture data from
|
||||
this category.
|
||||
|
||||
Data that belong to the fourth category can potentially contain
|
||||
sensitive process data. If PMUs in some monitoring modes capture values
|
||||
of execution context registers or data from process memory then access
|
||||
to such monitoring capabilities requires to be ordered and secured
|
||||
properly. So, perf_events/Perf performance monitoring is the subject for
|
||||
security access control management [5]_ .
|
||||
|
||||
perf_events/Perf access control
|
||||
-------------------------------
|
||||
|
||||
To perform security checks, the Linux implementation splits processes into two
|
||||
categories [6]_ : a) privileged processes (whose effective user ID is 0, referred
|
||||
to as superuser or root), and b) unprivileged processes (whose effective UID is
|
||||
nonzero). Privileged processes bypass all kernel security permission checks so
|
||||
perf_events performance monitoring is fully available to privileged processes
|
||||
without access, scope and resource restrictions.
|
||||
To perform security checks, the Linux implementation splits processes
|
||||
into two categories [6]_ : a) privileged processes (whose effective user
|
||||
ID is 0, referred to as superuser or root), and b) unprivileged
|
||||
processes (whose effective UID is nonzero). Privileged processes bypass
|
||||
all kernel security permission checks so perf_events performance
|
||||
monitoring is fully available to privileged processes without access,
|
||||
scope and resource restrictions.
|
||||
|
||||
Unprivileged processes are subject to a full security permission check based on
|
||||
the process's credentials [5]_ (usually: effective UID, effective GID, and
|
||||
supplementary group list).
|
||||
Unprivileged processes are subject to a full security permission check
|
||||
based on the process's credentials [5]_ (usually: effective UID,
|
||||
effective GID, and supplementary group list).
|
||||
|
||||
Linux divides the privileges traditionally associated with superuser into
|
||||
distinct units, known as capabilities [6]_ , which can be independently enabled
|
||||
and disabled on per-thread basis for processes and files of unprivileged users.
|
||||
Linux divides the privileges traditionally associated with superuser
|
||||
into distinct units, known as capabilities [6]_ , which can be
|
||||
independently enabled and disabled on per-thread basis for processes and
|
||||
files of unprivileged users.
|
||||
|
||||
Unprivileged processes with enabled CAP_SYS_ADMIN capability are treated as
|
||||
privileged processes with respect to perf_events performance monitoring and
|
||||
bypass *scope* permissions checks in the kernel.
|
||||
Unprivileged processes with enabled CAP_SYS_ADMIN capability are treated
|
||||
as privileged processes with respect to perf_events performance
|
||||
monitoring and bypass *scope* permissions checks in the kernel.
|
||||
|
||||
Unprivileged processes using perf_events system call API is also subject for
|
||||
PTRACE_MODE_READ_REALCREDS ptrace access mode check [7]_ , whose outcome
|
||||
determines whether monitoring is permitted. So unprivileged processes provided
|
||||
with CAP_SYS_PTRACE capability are effectively permitted to pass the check.
|
||||
Unprivileged processes using perf_events system call API is also subject
|
||||
for PTRACE_MODE_READ_REALCREDS ptrace access mode check [7]_ , whose
|
||||
outcome determines whether monitoring is permitted. So unprivileged
|
||||
processes provided with CAP_SYS_PTRACE capability are effectively
|
||||
permitted to pass the check.
|
||||
|
||||
Other capabilities being granted to unprivileged processes can effectively
|
||||
enable capturing of additional data required for later performance analysis of
|
||||
monitored processes or a system. For example, CAP_SYSLOG capability permits
|
||||
reading kernel space memory addresses from /proc/kallsyms file.
|
||||
Other capabilities being granted to unprivileged processes can
|
||||
effectively enable capturing of additional data required for later
|
||||
performance analysis of monitored processes or a system. For example,
|
||||
CAP_SYSLOG capability permits reading kernel space memory addresses from
|
||||
/proc/kallsyms file.
|
||||
|
||||
perf_events/Perf privileged users
|
||||
---------------------------------
|
||||
|
||||
Mechanisms of capabilities, privileged capability-dumb files [6]_ and
|
||||
file system ACLs [10]_ can be used to create a dedicated group of
|
||||
perf_events/Perf privileged users who are permitted to execute
|
||||
performance monitoring without scope limits. The following steps can be
|
||||
taken to create such a group of privileged Perf users.
|
||||
|
||||
1. Create perf_users group of privileged Perf users, assign perf_users
|
||||
group to Perf tool executable and limit access to the executable for
|
||||
other users in the system who are not in the perf_users group:
|
||||
|
||||
::
|
||||
|
||||
# groupadd perf_users
|
||||
# ls -alhF
|
||||
-rwxr-xr-x 2 root root 11M Oct 19 15:12 perf
|
||||
# chgrp perf_users perf
|
||||
# ls -alhF
|
||||
-rwxr-xr-x 2 root perf_users 11M Oct 19 15:12 perf
|
||||
# chmod o-rwx perf
|
||||
# ls -alhF
|
||||
-rwxr-x--- 2 root perf_users 11M Oct 19 15:12 perf
|
||||
|
||||
2. Assign the required capabilities to the Perf tool executable file and
|
||||
enable members of perf_users group with performance monitoring
|
||||
privileges [6]_ :
|
||||
|
||||
::
|
||||
|
||||
# setcap "cap_sys_admin,cap_sys_ptrace,cap_syslog=ep" perf
|
||||
# setcap -v "cap_sys_admin,cap_sys_ptrace,cap_syslog=ep" perf
|
||||
perf: OK
|
||||
# getcap perf
|
||||
perf = cap_sys_ptrace,cap_sys_admin,cap_syslog+ep
|
||||
|
||||
As a result, members of perf_users group are capable of conducting
|
||||
performance monitoring by using functionality of the configured Perf
|
||||
tool executable that, when executes, passes perf_events subsystem scope
|
||||
checks.
|
||||
|
||||
This specific access control management is only available to superuser
|
||||
or root running processes with CAP_SETPCAP, CAP_SETFCAP [6]_
|
||||
capabilities.
|
||||
|
||||
perf_events/Perf unprivileged users
|
||||
-----------------------------------
|
||||
|
||||
perf_events/Perf *scope* and *access* control for unprivileged processes is
|
||||
governed by perf_event_paranoid [2]_ setting:
|
||||
perf_events/Perf *scope* and *access* control for unprivileged processes
|
||||
is governed by perf_event_paranoid [2]_ setting:
|
||||
|
||||
-1:
|
||||
Impose no *scope* and *access* restrictions on using perf_events performance
|
||||
monitoring. Per-user per-cpu perf_event_mlock_kb [2]_ locking limit is
|
||||
ignored when allocating memory buffers for storing performance data.
|
||||
This is the least secure mode since allowed monitored *scope* is
|
||||
maximized and no perf_events specific limits are imposed on *resources*
|
||||
allocated for performance monitoring.
|
||||
Impose no *scope* and *access* restrictions on using perf_events
|
||||
performance monitoring. Per-user per-cpu perf_event_mlock_kb [2]_
|
||||
locking limit is ignored when allocating memory buffers for storing
|
||||
performance data. This is the least secure mode since allowed
|
||||
monitored *scope* is maximized and no perf_events specific limits
|
||||
are imposed on *resources* allocated for performance monitoring.
|
||||
|
||||
>=0:
|
||||
*scope* includes per-process and system wide performance monitoring
|
||||
but excludes raw tracepoints and ftrace function tracepoints monitoring.
|
||||
CPU and system events happened when executing either in user or
|
||||
in kernel space can be monitored and captured for later analysis.
|
||||
Per-user per-cpu perf_event_mlock_kb locking limit is imposed but
|
||||
ignored for unprivileged processes with CAP_IPC_LOCK [6]_ capability.
|
||||
but excludes raw tracepoints and ftrace function tracepoints
|
||||
monitoring. CPU and system events happened when executing either in
|
||||
user or in kernel space can be monitored and captured for later
|
||||
analysis. Per-user per-cpu perf_event_mlock_kb locking limit is
|
||||
imposed but ignored for unprivileged processes with CAP_IPC_LOCK
|
||||
[6]_ capability.
|
||||
|
||||
>=1:
|
||||
*scope* includes per-process performance monitoring only and excludes
|
||||
system wide performance monitoring. CPU and system events happened when
|
||||
executing either in user or in kernel space can be monitored and
|
||||
captured for later analysis. Per-user per-cpu perf_event_mlock_kb
|
||||
locking limit is imposed but ignored for unprivileged processes with
|
||||
CAP_IPC_LOCK capability.
|
||||
*scope* includes per-process performance monitoring only and
|
||||
excludes system wide performance monitoring. CPU and system events
|
||||
happened when executing either in user or in kernel space can be
|
||||
monitored and captured for later analysis. Per-user per-cpu
|
||||
perf_event_mlock_kb locking limit is imposed but ignored for
|
||||
unprivileged processes with CAP_IPC_LOCK capability.
|
||||
|
||||
>=2:
|
||||
*scope* includes per-process performance monitoring only. CPU and system
|
||||
events happened when executing in user space only can be monitored and
|
||||
captured for later analysis. Per-user per-cpu perf_event_mlock_kb
|
||||
locking limit is imposed but ignored for unprivileged processes with
|
||||
CAP_IPC_LOCK capability.
|
||||
*scope* includes per-process performance monitoring only. CPU and
|
||||
system events happened when executing in user space only can be
|
||||
monitored and captured for later analysis. Per-user per-cpu
|
||||
perf_event_mlock_kb locking limit is imposed but ignored for
|
||||
unprivileged processes with CAP_IPC_LOCK capability.
|
||||
|
||||
perf_events/Perf resource control
|
||||
---------------------------------
|
||||
|
||||
Open file descriptors
|
||||
+++++++++++++++++++++
|
||||
|
||||
The perf_events system call API [2]_ allocates file descriptors for
|
||||
every configured PMU event. Open file descriptors are a per-process
|
||||
accountable resource governed by the RLIMIT_NOFILE [11]_ limit
|
||||
(ulimit -n), which is usually derived from the login shell process. When
|
||||
configuring Perf collection for a long list of events on a large server
|
||||
system, this limit can be easily hit preventing required monitoring
|
||||
configuration. RLIMIT_NOFILE limit can be increased on per-user basis
|
||||
modifying content of the limits.conf file [12]_ . Ordinarily, a Perf
|
||||
sampling session (perf record) requires an amount of open perf_event
|
||||
file descriptors that is not less than the number of monitored events
|
||||
multiplied by the number of monitored CPUs.
|
||||
|
||||
Memory allocation
|
||||
+++++++++++++++++
|
||||
|
||||
The amount of memory available to user processes for capturing
|
||||
performance monitoring data is governed by the perf_event_mlock_kb [2]_
|
||||
setting. This perf_event specific resource setting defines overall
|
||||
per-cpu limits of memory allowed for mapping by the user processes to
|
||||
execute performance monitoring. The setting essentially extends the
|
||||
RLIMIT_MEMLOCK [11]_ limit, but only for memory regions mapped
|
||||
specifically for capturing monitored performance events and related data.
|
||||
|
||||
For example, if a machine has eight cores and perf_event_mlock_kb limit
|
||||
is set to 516 KiB, then a user process is provided with 516 KiB * 8 =
|
||||
4128 KiB of memory above the RLIMIT_MEMLOCK limit (ulimit -l) for
|
||||
perf_event mmap buffers. In particular, this means that, if the user
|
||||
wants to start two or more performance monitoring processes, the user is
|
||||
required to manually distribute the available 4128 KiB between the
|
||||
monitoring processes, for example, using the --mmap-pages Perf record
|
||||
mode option. Otherwise, the first started performance monitoring process
|
||||
allocates all available 4128 KiB and the other processes will fail to
|
||||
proceed due to the lack of memory.
|
||||
|
||||
RLIMIT_MEMLOCK and perf_event_mlock_kb resource constraints are ignored
|
||||
for processes with the CAP_IPC_LOCK capability. Thus, perf_events/Perf
|
||||
privileged users can be provided with memory above the constraints for
|
||||
perf_events/Perf performance monitoring purpose by providing the Perf
|
||||
executable with CAP_IPC_LOCK capability.
|
||||
|
||||
Bibliography
|
||||
------------
|
||||
@ -94,4 +222,9 @@ Bibliography
|
||||
.. [5] `<https://www.kernel.org/doc/html/latest/security/credentials.html>`_
|
||||
.. [6] `<http://man7.org/linux/man-pages/man7/capabilities.7.html>`_
|
||||
.. [7] `<http://man7.org/linux/man-pages/man2/ptrace.2.html>`_
|
||||
.. [8] `<https://en.wikipedia.org/wiki/Hardware_performance_counter>`_
|
||||
.. [9] `<https://en.wikipedia.org/wiki/Model-specific_register>`_
|
||||
.. [10] `<http://man7.org/linux/man-pages/man5/acl.5.html>`_
|
||||
.. [11] `<http://man7.org/linux/man-pages/man2/getrlimit.2.html>`_
|
||||
.. [12] `<http://man7.org/linux/man-pages/man5/limits.conf.5.html>`_
|
||||
|
||||
|
@ -155,14 +155,14 @@ governor uses that information depends on what algorithm is implemented by it
|
||||
and that is the primary reason for having more than one governor in the
|
||||
``CPUIdle`` subsystem.
|
||||
|
||||
There are two ``CPUIdle`` governors available, ``menu`` and ``ladder``. Which
|
||||
of them is used depends on the configuration of the kernel and in particular on
|
||||
whether or not the scheduler tick can be `stopped by the idle
|
||||
loop <idle-cpus-and-tick_>`_. It is possible to change the governor at run time
|
||||
if the ``cpuidle_sysfs_switch`` command line parameter has been passed to the
|
||||
kernel, but that is not safe in general, so it should not be done on production
|
||||
systems (that may change in the future, though). The name of the ``CPUIdle``
|
||||
governor currently used by the kernel can be read from the
|
||||
There are three ``CPUIdle`` governors available, ``menu``, `TEO <teo-gov_>`_
|
||||
and ``ladder``. Which of them is used by default depends on the configuration
|
||||
of the kernel and in particular on whether or not the scheduler tick can be
|
||||
`stopped by the idle loop <idle-cpus-and-tick_>`_. It is possible to change the
|
||||
governor at run time if the ``cpuidle_sysfs_switch`` command line parameter has
|
||||
been passed to the kernel, but that is not safe in general, so it should not be
|
||||
done on production systems (that may change in the future, though). The name of
|
||||
the ``CPUIdle`` governor currently used by the kernel can be read from the
|
||||
:file:`current_governor_ro` (or :file:`current_governor` if
|
||||
``cpuidle_sysfs_switch`` is present in the kernel command line) file under
|
||||
:file:`/sys/devices/system/cpu/cpuidle/` in ``sysfs``.
|
||||
@ -256,6 +256,8 @@ the ``menu`` governor by default and if it is not tickless, the default
|
||||
``CPUIdle`` governor on it will be ``ladder``.
|
||||
|
||||
|
||||
.. _menu-gov:
|
||||
|
||||
The ``menu`` Governor
|
||||
=====================
|
||||
|
||||
@ -333,6 +335,92 @@ that time, the governor may need to select a shallower state with a suitable
|
||||
target residency.
|
||||
|
||||
|
||||
.. _teo-gov:
|
||||
|
||||
The Timer Events Oriented (TEO) Governor
|
||||
========================================
|
||||
|
||||
The timer events oriented (TEO) governor is an alternative ``CPUIdle`` governor
|
||||
for tickless systems. It follows the same basic strategy as the ``menu`` `one
|
||||
<menu-gov_>`_: it always tries to find the deepest idle state suitable for the
|
||||
given conditions. However, it applies a different approach to that problem.
|
||||
|
||||
First, it does not use sleep length correction factors, but instead it attempts
|
||||
to correlate the observed idle duration values with the available idle states
|
||||
and use that information to pick up the idle state that is most likely to
|
||||
"match" the upcoming CPU idle interval. Second, it does not take the tasks
|
||||
that were running on the given CPU in the past and are waiting on some I/O
|
||||
operations to complete now at all (there is no guarantee that they will run on
|
||||
the same CPU when they become runnable again) and the pattern detection code in
|
||||
it avoids taking timer wakeups into account. It also only uses idle duration
|
||||
values less than the current time till the closest timer (with the scheduler
|
||||
tick excluded) for that purpose.
|
||||
|
||||
Like in the ``menu`` governor `case <menu-gov_>`_, the first step is to obtain
|
||||
the *sleep length*, which is the time until the closest timer event with the
|
||||
assumption that the scheduler tick will be stopped (that also is the upper bound
|
||||
on the time until the next CPU wakeup). That value is then used to preselect an
|
||||
idle state on the basis of three metrics maintained for each idle state provided
|
||||
by the ``CPUIdle`` driver: ``hits``, ``misses`` and ``early_hits``.
|
||||
|
||||
The ``hits`` and ``misses`` metrics measure the likelihood that a given idle
|
||||
state will "match" the observed (post-wakeup) idle duration if it "matches" the
|
||||
sleep length. They both are subject to decay (after a CPU wakeup) every time
|
||||
the target residency of the idle state corresponding to them is less than or
|
||||
equal to the sleep length and the target residency of the next idle state is
|
||||
greater than the sleep length (that is, when the idle state corresponding to
|
||||
them "matches" the sleep length). The ``hits`` metric is increased if the
|
||||
former condition is satisfied and the target residency of the given idle state
|
||||
is less than or equal to the observed idle duration and the target residency of
|
||||
the next idle state is greater than the observed idle duration at the same time
|
||||
(that is, it is increased when the given idle state "matches" both the sleep
|
||||
length and the observed idle duration). In turn, the ``misses`` metric is
|
||||
increased when the given idle state "matches" the sleep length only and the
|
||||
observed idle duration is too short for its target residency.
|
||||
|
||||
The ``early_hits`` metric measures the likelihood that a given idle state will
|
||||
"match" the observed (post-wakeup) idle duration if it does not "match" the
|
||||
sleep length. It is subject to decay on every CPU wakeup and it is increased
|
||||
when the idle state corresponding to it "matches" the observed (post-wakeup)
|
||||
idle duration and the target residency of the next idle state is less than or
|
||||
equal to the sleep length (i.e. the idle state "matching" the sleep length is
|
||||
deeper than the given one).
|
||||
|
||||
The governor walks the list of idle states provided by the ``CPUIdle`` driver
|
||||
and finds the last (deepest) one with the target residency less than or equal
|
||||
to the sleep length. Then, the ``hits`` and ``misses`` metrics of that idle
|
||||
state are compared with each other and it is preselected if the ``hits`` one is
|
||||
greater (which means that that idle state is likely to "match" the observed idle
|
||||
duration after CPU wakeup). If the ``misses`` one is greater, the governor
|
||||
preselects the shallower idle state with the maximum ``early_hits`` metric
|
||||
(or if there are multiple shallower idle states with equal ``early_hits``
|
||||
metric which also is the maximum, the shallowest of them will be preselected).
|
||||
[If there is a wakeup latency constraint coming from the `PM QoS framework
|
||||
<cpu-pm-qos_>`_ which is hit before reaching the deepest idle state with the
|
||||
target residency within the sleep length, the deepest idle state with the exit
|
||||
latency within the constraint is preselected without consulting the ``hits``,
|
||||
``misses`` and ``early_hits`` metrics.]
|
||||
|
||||
Next, the governor takes several idle duration values observed most recently
|
||||
into consideration and if at least a half of them are greater than or equal to
|
||||
the target residency of the preselected idle state, that idle state becomes the
|
||||
final candidate to ask for. Otherwise, the average of the most recent idle
|
||||
duration values below the target residency of the preselected idle state is
|
||||
computed and the governor walks the idle states shallower than the preselected
|
||||
one and finds the deepest of them with the target residency within that average.
|
||||
That idle state is then taken as the final candidate to ask for.
|
||||
|
||||
Still, at this point the governor may need to refine the idle state selection if
|
||||
it has not decided to `stop the scheduler tick <idle-cpus-and-tick_>`_. That
|
||||
generally happens if the target residency of the idle state selected so far is
|
||||
less than the tick period and the tick has not been stopped already (in a
|
||||
previous iteration of the idle loop). Then, like in the ``menu`` governor
|
||||
`case <menu-gov_>`_, the sleep length used in the previous computations may not
|
||||
reflect the real time until the closest timer event and if it really is greater
|
||||
than that time, a shallower state with a suitable target residency may need to
|
||||
be selected.
|
||||
|
||||
|
||||
.. _idle-states-representation:
|
||||
|
||||
Representation of Idle States
|
||||
|
@ -1,59 +1,164 @@
|
||||
Tainted kernels
|
||||
---------------
|
||||
|
||||
Some oops reports contain the string **'Tainted: '** after the program
|
||||
counter. This indicates that the kernel has been tainted by some
|
||||
mechanism. The string is followed by a series of position-sensitive
|
||||
characters, each representing a particular tainted value.
|
||||
The kernel will mark itself as 'tainted' when something occurs that might be
|
||||
relevant later when investigating problems. Don't worry too much about this,
|
||||
most of the time it's not a problem to run a tainted kernel; the information is
|
||||
mainly of interest once someone wants to investigate some problem, as its real
|
||||
cause might be the event that got the kernel tainted. That's why bug reports
|
||||
from tainted kernels will often be ignored by developers, hence try to reproduce
|
||||
problems with an untainted kernel.
|
||||
|
||||
1) ``G`` if all modules loaded have a GPL or compatible license, ``P`` if
|
||||
Note the kernel will remain tainted even after you undo what caused the taint
|
||||
(i.e. unload a proprietary kernel module), to indicate the kernel remains not
|
||||
trustworthy. That's also why the kernel will print the tainted state when it
|
||||
notices an internal problem (a 'kernel bug'), a recoverable error
|
||||
('kernel oops') or a non-recoverable error ('kernel panic') and writes debug
|
||||
information about this to the logs ``dmesg`` outputs. It's also possible to
|
||||
check the tainted state at runtime through a file in ``/proc/``.
|
||||
|
||||
|
||||
Tainted flag in bugs, oops or panics messages
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
You find the tainted state near the top in a line starting with 'CPU:'; if or
|
||||
why the kernel was tainted is shown after the Process ID ('PID:') and a shortened
|
||||
name of the command ('Comm:') that triggered the event::
|
||||
|
||||
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
|
||||
Oops: 0002 [#1] SMP PTI
|
||||
CPU: 0 PID: 4424 Comm: insmod Tainted: P W O 4.20.0-0.rc6.fc30 #1
|
||||
Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
|
||||
RIP: 0010:my_oops_init+0x13/0x1000 [kpanic]
|
||||
[...]
|
||||
|
||||
You'll find a 'Not tainted: ' there if the kernel was not tainted at the
|
||||
time of the event; if it was, then it will print 'Tainted: ' and characters
|
||||
either letters or blanks. In above example it looks like this::
|
||||
|
||||
Tainted: P W O
|
||||
|
||||
The meaning of those characters is explained in the table below. In tis case
|
||||
the kernel got tainted earlier because a proprietary Module (``P``) was loaded,
|
||||
a warning occurred (``W``), and an externally-built module was loaded (``O``).
|
||||
To decode other letters use the table below.
|
||||
|
||||
|
||||
Decoding tainted state at runtime
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
At runtime, you can query the tainted state by reading
|
||||
``cat /proc/sys/kernel/tainted``. If that returns ``0``, the kernel is not
|
||||
tainted; any other number indicates the reasons why it is. The easiest way to
|
||||
decode that number is the script ``tools/debugging/kernel-chktaint``, which your
|
||||
distribution might ship as part of a package called ``linux-tools`` or
|
||||
``kernel-tools``; if it doesn't you can download the script from
|
||||
`git.kernel.org <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/debugging/kernel-chktaint>`_
|
||||
and execute it with ``sh kernel-chktaint``, which would print something like
|
||||
this on the machine that had the statements in the logs that were quoted earlier::
|
||||
|
||||
Kernel is Tainted for following reasons:
|
||||
* Proprietary module was loaded (#0)
|
||||
* Kernel issued warning (#9)
|
||||
* Externally-built ('out-of-tree') module was loaded (#12)
|
||||
See Documentation/admin-guide/tainted-kernels.rst in the the Linux kernel or
|
||||
https://www.kernel.org/doc/html/latest/admin-guide/tainted-kernels.html for
|
||||
a more details explanation of the various taint flags.
|
||||
Raw taint value as int/string: 4609/'P W O '
|
||||
|
||||
You can try to decode the number yourself. That's easy if there was only one
|
||||
reason that got your kernel tainted, as in this case you can find the number
|
||||
with the table below. If there were multiple reasons you need to decode the
|
||||
number, as it is a bitfield, where each bit indicates the absence or presence of
|
||||
a particular type of taint. It's best to leave that to the aforementioned
|
||||
script, but if you need something quick you can use this shell command to check
|
||||
which bits are set::
|
||||
|
||||
$ for i in $(seq 18); do echo $(($i-1)) $(($(cat /proc/sys/kernel/tainted)>>($i-1)&1));done
|
||||
|
||||
Table for decoding tainted state
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
=== === ====== ========================================================
|
||||
Bit Log Number Reason that got the kernel tainted
|
||||
=== === ====== ========================================================
|
||||
0 G/P 1 proprietary module was loaded
|
||||
1 _/F 2 module was force loaded
|
||||
2 _/S 4 SMP kernel oops on an officially SMP incapable processor
|
||||
3 _/R 8 module was force unloaded
|
||||
4 _/M 16 processor reported a Machine Check Exception (MCE)
|
||||
5 _/B 32 bad page referenced or some unexpected page flags
|
||||
6 _/U 64 taint requested by userspace application
|
||||
7 _/D 128 kernel died recently, i.e. there was an OOPS or BUG
|
||||
8 _/A 256 ACPI table overridden by user
|
||||
9 _/W 512 kernel issued warning
|
||||
10 _/C 1024 staging driver was loaded
|
||||
11 _/I 2048 workaround for bug in platform firmware applied
|
||||
12 _/O 4096 externally-built ("out-of-tree") module was loaded
|
||||
13 _/E 8192 unsigned module was loaded
|
||||
14 _/L 16384 soft lockup occurred
|
||||
15 _/K 32768 kernel has been live patched
|
||||
16 _/X 65536 auxiliary taint, defined for and used by distros
|
||||
17 _/T 131072 kernel was built with the struct randomization plugin
|
||||
=== === ====== ========================================================
|
||||
|
||||
Note: The character ``_`` is representing a blank in this table to make reading
|
||||
easier.
|
||||
|
||||
More detailed explanation for tainting
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
0) ``G`` if all modules loaded have a GPL or compatible license, ``P`` if
|
||||
any proprietary module has been loaded. Modules without a
|
||||
MODULE_LICENSE or with a MODULE_LICENSE that is not recognised by
|
||||
insmod as GPL compatible are assumed to be proprietary.
|
||||
|
||||
2) ``F`` if any module was force loaded by ``insmod -f``, ``' '`` if all
|
||||
1) ``F`` if any module was force loaded by ``insmod -f``, ``' '`` if all
|
||||
modules were loaded normally.
|
||||
|
||||
3) ``S`` if the oops occurred on an SMP kernel running on hardware that
|
||||
2) ``S`` if the oops occurred on an SMP kernel running on hardware that
|
||||
hasn't been certified as safe to run multiprocessor.
|
||||
Currently this occurs only on various Athlons that are not
|
||||
SMP capable.
|
||||
|
||||
4) ``R`` if a module was force unloaded by ``rmmod -f``, ``' '`` if all
|
||||
3) ``R`` if a module was force unloaded by ``rmmod -f``, ``' '`` if all
|
||||
modules were unloaded normally.
|
||||
|
||||
5) ``M`` if any processor has reported a Machine Check Exception,
|
||||
4) ``M`` if any processor has reported a Machine Check Exception,
|
||||
``' '`` if no Machine Check Exceptions have occurred.
|
||||
|
||||
6) ``B`` if a page-release function has found a bad page reference or
|
||||
some unexpected page flags.
|
||||
5) ``B`` If a page-release function has found a bad page reference or some
|
||||
unexpected page flags. This indicates a hardware problem or a kernel bug;
|
||||
there should be other information in the log indicating why this tainting
|
||||
occured.
|
||||
|
||||
7) ``U`` if a user or user application specifically requested that the
|
||||
6) ``U`` if a user or user application specifically requested that the
|
||||
Tainted flag be set, ``' '`` otherwise.
|
||||
|
||||
8) ``D`` if the kernel has died recently, i.e. there was an OOPS or BUG.
|
||||
7) ``D`` if the kernel has died recently, i.e. there was an OOPS or BUG.
|
||||
|
||||
9) ``A`` if the ACPI table has been overridden.
|
||||
8) ``A`` if an ACPI table has been overridden.
|
||||
|
||||
10) ``W`` if a warning has previously been issued by the kernel.
|
||||
9) ``W`` if a warning has previously been issued by the kernel.
|
||||
(Though some warnings may set more specific taint flags.)
|
||||
|
||||
11) ``C`` if a staging driver has been loaded.
|
||||
10) ``C`` if a staging driver has been loaded.
|
||||
|
||||
12) ``I`` if the kernel is working around a severe bug in the platform
|
||||
11) ``I`` if the kernel is working around a severe bug in the platform
|
||||
firmware (BIOS or similar).
|
||||
|
||||
13) ``O`` if an externally-built ("out-of-tree") module has been loaded.
|
||||
12) ``O`` if an externally-built ("out-of-tree") module has been loaded.
|
||||
|
||||
14) ``E`` if an unsigned module has been loaded in a kernel supporting
|
||||
13) ``E`` if an unsigned module has been loaded in a kernel supporting
|
||||
module signature.
|
||||
|
||||
15) ``L`` if a soft lockup has previously occurred on the system.
|
||||
14) ``L`` if a soft lockup has previously occurred on the system.
|
||||
|
||||
16) ``K`` if the kernel has been live patched.
|
||||
15) ``K`` if the kernel has been live patched.
|
||||
|
||||
The primary reason for the **'Tainted: '** string is to tell kernel
|
||||
debuggers if this is a clean kernel or if anything unusual has
|
||||
occurred. Tainting is permanent: even if an offending module is
|
||||
unloaded, the tainted value remains to indicate that the kernel is not
|
||||
trustworthy.
|
||||
16) ``X`` Auxiliary taint, defined for and used by Linux distributors.
|
||||
|
||||
17) ``T`` Kernel was build with the randstruct plugin, which can intentionally
|
||||
produce extremely unusual kernel structure layouts (even performance
|
||||
pathological ones), which is important to know when debugging. Set at
|
||||
build time.
|
||||
|
@ -6,7 +6,7 @@ TL;DR summary
|
||||
* Use only NEON instructions, or VFP instructions that don't rely on support
|
||||
code
|
||||
* Isolate your NEON code in a separate compilation unit, and compile it with
|
||||
'-mfpu=neon -mfloat-abi=softfp'
|
||||
'-march=armv7-a -mfpu=neon -mfloat-abi=softfp'
|
||||
* Put kernel_neon_begin() and kernel_neon_end() calls around the calls into your
|
||||
NEON code
|
||||
* Don't sleep in your NEON code, and be aware that it will be executed with
|
||||
@ -87,7 +87,7 @@ instructions appearing in unexpected places if no special care is taken.
|
||||
Therefore, the recommended and only supported way of using NEON/VFP in the
|
||||
kernel is by adhering to the following rules:
|
||||
* isolate the NEON code in a separate compilation unit and compile it with
|
||||
'-mfpu=neon -mfloat-abi=softfp';
|
||||
'-march=armv7-a -mfpu=neon -mfloat-abi=softfp';
|
||||
* issue the calls to kernel_neon_begin(), kernel_neon_end() as well as the calls
|
||||
into the unit containing the NEON code from a compilation unit which is *not*
|
||||
built with the GCC flag '-mfpu=neon' set.
|
||||
|
@ -188,6 +188,11 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
the kernel image will be entered must be initialised by software at a
|
||||
higher exception level to prevent execution in an UNKNOWN state.
|
||||
|
||||
- SCR_EL3.FIQ must have the same value across all CPUs the kernel is
|
||||
executing on.
|
||||
- The value of SCR_EL3.FIQ must be the same as the one present at boot
|
||||
time whenever the kernel is executing.
|
||||
|
||||
For systems with a GICv3 interrupt controller to be used in v3 mode:
|
||||
- If EL3 is present:
|
||||
ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
|
||||
|
@ -78,6 +78,11 @@ bits can vary between the two. Note that the masks apply to TTBR0
|
||||
addresses, and are not valid to apply to TTBR1 addresses (e.g. kernel
|
||||
pointers).
|
||||
|
||||
Additionally, when CONFIG_CHECKPOINT_RESTORE is also set, the kernel
|
||||
will expose the NT_ARM_PACA_KEYS and NT_ARM_PACG_KEYS regsets (struct
|
||||
user_pac_address_keys and struct user_pac_generic_keys). These can be
|
||||
used to get and set the keys for a thread.
|
||||
|
||||
|
||||
Virtualization
|
||||
--------------
|
||||
|
@ -44,6 +44,8 @@ stable kernels.
|
||||
|
||||
| Implementor | Component | Erratum ID | Kconfig |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
|
||||
| | | | |
|
||||
| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 |
|
||||
| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 |
|
||||
| ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 |
|
||||
@ -80,3 +82,4 @@ stable kernels.
|
||||
| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
|
||||
| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
|
||||
| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 |
|
||||
| Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 |
|
||||
|
@ -117,3 +117,28 @@ Other implications:
|
||||
size limitations and the limitations of the underlying devices. Thus
|
||||
there's no need to define ->merge_bvec_fn() callbacks for individual block
|
||||
drivers.
|
||||
|
||||
Usage of helpers:
|
||||
=================
|
||||
|
||||
* The following helpers whose names have the suffix of "_all" can only be used
|
||||
on non-BIO_CLONED bio. They are usually used by filesystem code. Drivers
|
||||
shouldn't use them because the bio may have been split before it reached the
|
||||
driver.
|
||||
|
||||
bio_for_each_segment_all()
|
||||
bio_first_bvec_all()
|
||||
bio_first_page_all()
|
||||
bio_last_bvec_all()
|
||||
|
||||
* The following helpers iterate over single-page segment. The passed 'struct
|
||||
bio_vec' will contain a single-page IO vector during the iteration
|
||||
|
||||
bio_for_each_segment()
|
||||
bio_for_each_segment_all()
|
||||
|
||||
* The following helpers iterate over multi-page bvec. The passed 'struct
|
||||
bio_vec' will contain a multi-page IO vector during the iteration
|
||||
|
||||
bio_for_each_bvec()
|
||||
rq_for_each_bvec()
|
||||
|
@ -36,27 +36,27 @@ consideration important quirks of other architectures) and
|
||||
defines calling convention that is compatible with C calling
|
||||
convention of the linux kernel on those architectures.
|
||||
|
||||
Q: can multiple return values be supported in the future?
|
||||
Q: Can multiple return values be supported in the future?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
A: NO. BPF allows only register R0 to be used as return value.
|
||||
|
||||
Q: can more than 5 function arguments be supported in the future?
|
||||
Q: Can more than 5 function arguments be supported in the future?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
A: NO. BPF calling convention only allows registers R1-R5 to be used
|
||||
as arguments. BPF is not a standalone instruction set.
|
||||
(unlike x64 ISA that allows msft, cdecl and other conventions)
|
||||
|
||||
Q: can BPF programs access instruction pointer or return address?
|
||||
Q: Can BPF programs access instruction pointer or return address?
|
||||
-----------------------------------------------------------------
|
||||
A: NO.
|
||||
|
||||
Q: can BPF programs access stack pointer ?
|
||||
Q: Can BPF programs access stack pointer ?
|
||||
------------------------------------------
|
||||
A: NO.
|
||||
|
||||
Only frame pointer (register R10) is accessible.
|
||||
From compiler point of view it's necessary to have stack pointer.
|
||||
For example LLVM defines register R11 as stack pointer in its
|
||||
For example, LLVM defines register R11 as stack pointer in its
|
||||
BPF backend, but it makes sure that generated code never uses it.
|
||||
|
||||
Q: Does C-calling convention diminishes possible use cases?
|
||||
@ -66,8 +66,8 @@ A: YES.
|
||||
BPF design forces addition of major functionality in the form
|
||||
of kernel helper functions and kernel objects like BPF maps with
|
||||
seamless interoperability between them. It lets kernel call into
|
||||
BPF programs and programs call kernel helpers with zero overhead.
|
||||
As all of them were native C code. That is particularly the case
|
||||
BPF programs and programs call kernel helpers with zero overhead,
|
||||
as all of them were native C code. That is particularly the case
|
||||
for JITed BPF programs that are indistinguishable from
|
||||
native kernel C code.
|
||||
|
||||
@ -75,9 +75,9 @@ Q: Does it mean that 'innovative' extensions to BPF code are disallowed?
|
||||
------------------------------------------------------------------------
|
||||
A: Soft yes.
|
||||
|
||||
At least for now until BPF core has support for
|
||||
At least for now, until BPF core has support for
|
||||
bpf-to-bpf calls, indirect calls, loops, global variables,
|
||||
jump tables, read only sections and all other normal constructs
|
||||
jump tables, read-only sections, and all other normal constructs
|
||||
that C code can produce.
|
||||
|
||||
Q: Can loops be supported in a safe way?
|
||||
@ -109,16 +109,16 @@ For example why BPF_JNE and other compare and jumps are not cpu-like?
|
||||
A: This was necessary to avoid introducing flags into ISA which are
|
||||
impossible to make generic and efficient across CPU architectures.
|
||||
|
||||
Q: why BPF_DIV instruction doesn't map to x64 div?
|
||||
Q: Why BPF_DIV instruction doesn't map to x64 div?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
A: Because if we picked one-to-one relationship to x64 it would have made
|
||||
it more complicated to support on arm64 and other archs. Also it
|
||||
needs div-by-zero runtime check.
|
||||
|
||||
Q: why there is no BPF_SDIV for signed divide operation?
|
||||
Q: Why there is no BPF_SDIV for signed divide operation?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
A: Because it would be rarely used. llvm errors in such case and
|
||||
prints a suggestion to use unsigned divide instead
|
||||
prints a suggestion to use unsigned divide instead.
|
||||
|
||||
Q: Why BPF has implicit prologue and epilogue?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
848
Documentation/bpf/btf.rst
Normal file
@ -0,0 +1,848 @@
|
||||
=====================
|
||||
BPF Type Format (BTF)
|
||||
=====================
|
||||
|
||||
1. Introduction
|
||||
***************
|
||||
|
||||
BTF (BPF Type Format) is the metadata format which encodes the debug info
|
||||
related to BPF program/map. The name BTF was used initially to describe data
|
||||
types. The BTF was later extended to include function info for defined
|
||||
subroutines, and line info for source/line information.
|
||||
|
||||
The debug info is used for map pretty print, function signature, etc. The
|
||||
function signature enables better bpf program/function kernel symbol. The line
|
||||
info helps generate source annotated translated byte code, jited code and
|
||||
verifier log.
|
||||
|
||||
The BTF specification contains two parts,
|
||||
* BTF kernel API
|
||||
* BTF ELF file format
|
||||
|
||||
The kernel API is the contract between user space and kernel. The kernel
|
||||
verifies the BTF info before using it. The ELF file format is a user space
|
||||
contract between ELF file and libbpf loader.
|
||||
|
||||
The type and string sections are part of the BTF kernel API, describing the
|
||||
debug info (mostly types related) referenced by the bpf program. These two
|
||||
sections are discussed in details in :ref:`BTF_Type_String`.
|
||||
|
||||
.. _BTF_Type_String:
|
||||
|
||||
2. BTF Type and String Encoding
|
||||
*******************************
|
||||
|
||||
The file ``include/uapi/linux/btf.h`` provides high-level definition of how
|
||||
types/strings are encoded.
|
||||
|
||||
The beginning of data blob must be::
|
||||
|
||||
struct btf_header {
|
||||
__u16 magic;
|
||||
__u8 version;
|
||||
__u8 flags;
|
||||
__u32 hdr_len;
|
||||
|
||||
/* All offsets are in bytes relative to the end of this header */
|
||||
__u32 type_off; /* offset of type section */
|
||||
__u32 type_len; /* length of type section */
|
||||
__u32 str_off; /* offset of string section */
|
||||
__u32 str_len; /* length of string section */
|
||||
};
|
||||
|
||||
The magic is ``0xeB9F``, which has different encoding for big and little
|
||||
endian systems, and can be used to test whether BTF is generated for big- or
|
||||
little-endian target. The ``btf_header`` is designed to be extensible with
|
||||
``hdr_len`` equal to ``sizeof(struct btf_header)`` when a data blob is
|
||||
generated.
|
||||
|
||||
2.1 String Encoding
|
||||
===================
|
||||
|
||||
The first string in the string section must be a null string. The rest of
|
||||
string table is a concatenation of other null-terminated strings.
|
||||
|
||||
2.2 Type Encoding
|
||||
=================
|
||||
|
||||
The type id ``0`` is reserved for ``void`` type. The type section is parsed
|
||||
sequentially and type id is assigned to each recognized type starting from id
|
||||
``1``. Currently, the following types are supported::
|
||||
|
||||
#define BTF_KIND_INT 1 /* Integer */
|
||||
#define BTF_KIND_PTR 2 /* Pointer */
|
||||
#define BTF_KIND_ARRAY 3 /* Array */
|
||||
#define BTF_KIND_STRUCT 4 /* Struct */
|
||||
#define BTF_KIND_UNION 5 /* Union */
|
||||
#define BTF_KIND_ENUM 6 /* Enumeration */
|
||||
#define BTF_KIND_FWD 7 /* Forward */
|
||||
#define BTF_KIND_TYPEDEF 8 /* Typedef */
|
||||
#define BTF_KIND_VOLATILE 9 /* Volatile */
|
||||
#define BTF_KIND_CONST 10 /* Const */
|
||||
#define BTF_KIND_RESTRICT 11 /* Restrict */
|
||||
#define BTF_KIND_FUNC 12 /* Function */
|
||||
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
|
||||
|
||||
Note that the type section encodes debug info, not just pure types.
|
||||
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
|
||||
|
||||
Each type contains the following common data::
|
||||
|
||||
struct btf_type {
|
||||
__u32 name_off;
|
||||
/* "info" bits arrangement
|
||||
* bits 0-15: vlen (e.g. # of struct's members)
|
||||
* bits 16-23: unused
|
||||
* bits 24-27: kind (e.g. int, ptr, array...etc)
|
||||
* bits 28-30: unused
|
||||
* bit 31: kind_flag, currently used by
|
||||
* struct, union and fwd
|
||||
*/
|
||||
__u32 info;
|
||||
/* "size" is used by INT, ENUM, STRUCT and UNION.
|
||||
* "size" tells the size of the type it is describing.
|
||||
*
|
||||
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
|
||||
* FUNC and FUNC_PROTO.
|
||||
* "type" is a type_id referring to another type.
|
||||
*/
|
||||
union {
|
||||
__u32 size;
|
||||
__u32 type;
|
||||
};
|
||||
};
|
||||
|
||||
For certain kinds, the common data are followed by kind-specific data. The
|
||||
``name_off`` in ``struct btf_type`` specifies the offset in the string table.
|
||||
The following sections detail encoding of each kind.
|
||||
|
||||
2.2.1 BTF_KIND_INT
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: any valid offset
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_INT
|
||||
* ``info.vlen``: 0
|
||||
* ``size``: the size of the int type in bytes.
|
||||
|
||||
``btf_type`` is followed by a ``u32`` with the following bits arrangement::
|
||||
|
||||
#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
|
||||
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
|
||||
#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
|
||||
|
||||
The ``BTF_INT_ENCODING`` has the following attributes::
|
||||
|
||||
#define BTF_INT_SIGNED (1 << 0)
|
||||
#define BTF_INT_CHAR (1 << 1)
|
||||
#define BTF_INT_BOOL (1 << 2)
|
||||
|
||||
The ``BTF_INT_ENCODING()`` provides extra information: signedness, char, or
|
||||
bool, for the int type. The char and bool encoding are mostly useful for
|
||||
pretty print. At most one encoding can be specified for the int type.
|
||||
|
||||
The ``BTF_INT_BITS()`` specifies the number of actual bits held by this int
|
||||
type. For example, a 4-bit bitfield encodes ``BTF_INT_BITS()`` equals to 4.
|
||||
The ``btf_type.size * 8`` must be equal to or greater than ``BTF_INT_BITS()``
|
||||
for the type. The maximum value of ``BTF_INT_BITS()`` is 128.
|
||||
|
||||
The ``BTF_INT_OFFSET()`` specifies the starting bit offset to calculate values
|
||||
for this int. For example, a bitfield struct member has: * btf member bit
|
||||
offset 100 from the start of the structure, * btf member pointing to an int
|
||||
type, * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
|
||||
|
||||
Then in the struct memory layout, this member will occupy ``4`` bits starting
|
||||
from bits ``100 + 2 = 102``.
|
||||
|
||||
Alternatively, the bitfield struct member can be the following to access the
|
||||
same bits as the above:
|
||||
|
||||
* btf member bit offset 102,
|
||||
* btf member pointing to an int type,
|
||||
* the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4``
|
||||
|
||||
The original intention of ``BTF_INT_OFFSET()`` is to provide flexibility of
|
||||
bitfield encoding. Currently, both llvm and pahole generate
|
||||
``BTF_INT_OFFSET() = 0`` for all int types.
|
||||
|
||||
2.2.2 BTF_KIND_PTR
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_PTR
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the pointee type of the pointer
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.3 BTF_KIND_ARRAY
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_ARRAY
|
||||
* ``info.vlen``: 0
|
||||
* ``size/type``: 0, not used
|
||||
|
||||
``btf_type`` is followed by one ``struct btf_array``::
|
||||
|
||||
struct btf_array {
|
||||
__u32 type;
|
||||
__u32 index_type;
|
||||
__u32 nelems;
|
||||
};
|
||||
|
||||
The ``struct btf_array`` encoding:
|
||||
* ``type``: the element type
|
||||
* ``index_type``: the index type
|
||||
* ``nelems``: the number of elements for this array (``0`` is also allowed).
|
||||
|
||||
The ``index_type`` can be any regular int type (``u8``, ``u16``, ``u32``,
|
||||
``u64``, ``unsigned __int128``). The original design of including
|
||||
``index_type`` follows DWARF, which has an ``index_type`` for its array type.
|
||||
Currently in BTF, beyond type verification, the ``index_type`` is not used.
|
||||
|
||||
The ``struct btf_array`` allows chaining through element type to represent
|
||||
multidimensional arrays. For example, for ``int a[5][6]``, the following type
|
||||
information illustrates the chaining:
|
||||
|
||||
* [1]: int
|
||||
* [2]: array, ``btf_array.type = [1]``, ``btf_array.nelems = 6``
|
||||
* [3]: array, ``btf_array.type = [2]``, ``btf_array.nelems = 5``
|
||||
|
||||
Currently, both pahole and llvm collapse multidimensional array into
|
||||
one-dimensional array, e.g., for ``a[5][6]``, the ``btf_array.nelems`` is
|
||||
equal to ``30``. This is because the original use case is map pretty print
|
||||
where the whole array is dumped out so one-dimensional array is enough. As
|
||||
more BTF usage is explored, pahole and llvm can be changed to generate proper
|
||||
chained representation for multidimensional arrays.
|
||||
|
||||
2.2.4 BTF_KIND_STRUCT
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
2.2.5 BTF_KIND_UNION
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0 or offset to a valid C identifier
|
||||
* ``info.kind_flag``: 0 or 1
|
||||
* ``info.kind``: BTF_KIND_STRUCT or BTF_KIND_UNION
|
||||
* ``info.vlen``: the number of struct/union members
|
||||
* ``info.size``: the size of the struct/union in bytes
|
||||
|
||||
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_member``.::
|
||||
|
||||
struct btf_member {
|
||||
__u32 name_off;
|
||||
__u32 type;
|
||||
__u32 offset;
|
||||
};
|
||||
|
||||
``struct btf_member`` encoding:
|
||||
* ``name_off``: offset to a valid C identifier
|
||||
* ``type``: the member type
|
||||
* ``offset``: <see below>
|
||||
|
||||
If the type info ``kind_flag`` is not set, the offset contains only bit offset
|
||||
of the member. Note that the base type of the bitfield can only be int or enum
|
||||
type. If the bitfield size is 32, the base type can be either int or enum
|
||||
type. If the bitfield size is not 32, the base type must be int, and int type
|
||||
``BTF_INT_BITS()`` encodes the bitfield size.
|
||||
|
||||
If the ``kind_flag`` is set, the ``btf_member.offset`` contains both member
|
||||
bitfield size and bit offset. The bitfield size and bit offset are calculated
|
||||
as below.::
|
||||
|
||||
#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24)
|
||||
#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff)
|
||||
|
||||
In this case, if the base type is an int type, it must be a regular int type:
|
||||
|
||||
* ``BTF_INT_OFFSET()`` must be 0.
|
||||
* ``BTF_INT_BITS()`` must be equal to ``{1,2,4,8,16} * 8``.
|
||||
|
||||
The following kernel patch introduced ``kind_flag`` and explained why both
|
||||
modes exist:
|
||||
|
||||
https://github.com/torvalds/linux/commit/9d5f9f701b1891466fb3dbb1806ad97716f95cc3#diff-fa650a64fdd3968396883d2fe8215ff3
|
||||
|
||||
2.2.6 BTF_KIND_ENUM
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0 or offset to a valid C identifier
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_ENUM
|
||||
* ``info.vlen``: number of enum values
|
||||
* ``size``: 4
|
||||
|
||||
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.::
|
||||
|
||||
struct btf_enum {
|
||||
__u32 name_off;
|
||||
__s32 val;
|
||||
};
|
||||
|
||||
The ``btf_enum`` encoding:
|
||||
* ``name_off``: offset to a valid C identifier
|
||||
* ``val``: any value
|
||||
|
||||
2.2.7 BTF_KIND_FWD
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: offset to a valid C identifier
|
||||
* ``info.kind_flag``: 0 for struct, 1 for union
|
||||
* ``info.kind``: BTF_KIND_FWD
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: 0
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.8 BTF_KIND_TYPEDEF
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: offset to a valid C identifier
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_TYPEDEF
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the type which can be referred by name at ``name_off``
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.9 BTF_KIND_VOLATILE
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_VOLATILE
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the type with ``volatile`` qualifier
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.10 BTF_KIND_CONST
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_CONST
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the type with ``const`` qualifier
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.11 BTF_KIND_RESTRICT
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_RESTRICT
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the type with ``restrict`` qualifier
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.12 BTF_KIND_FUNC
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: offset to a valid C identifier
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_FUNC
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: a BTF_KIND_FUNC_PROTO type
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
A BTF_KIND_FUNC defines not a type, but a subprogram (function) whose
|
||||
signature is defined by ``type``. The subprogram is thus an instance of that
|
||||
type. The BTF_KIND_FUNC may in turn be referenced by a func_info in the
|
||||
:ref:`BTF_Ext_Section` (ELF) or in the arguments to :ref:`BPF_Prog_Load`
|
||||
(ABI).
|
||||
|
||||
2.2.13 BTF_KIND_FUNC_PROTO
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_FUNC_PROTO
|
||||
* ``info.vlen``: # of parameters
|
||||
* ``type``: the return type
|
||||
|
||||
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_param``.::
|
||||
|
||||
struct btf_param {
|
||||
__u32 name_off;
|
||||
__u32 type;
|
||||
};
|
||||
|
||||
If a BTF_KIND_FUNC_PROTO type is referred by a BTF_KIND_FUNC type, then
|
||||
``btf_param.name_off`` must point to a valid C identifier except for the
|
||||
possible last argument representing the variable argument. The btf_param.type
|
||||
refers to parameter type.
|
||||
|
||||
If the function has variable arguments, the last parameter is encoded with
|
||||
``name_off = 0`` and ``type = 0``.
|
||||
|
||||
3. BTF Kernel API
|
||||
*****************
|
||||
|
||||
The following bpf syscall command involves BTF:
|
||||
* BPF_BTF_LOAD: load a blob of BTF data into kernel
|
||||
* BPF_MAP_CREATE: map creation with btf key and value type info.
|
||||
* BPF_PROG_LOAD: prog load with btf function and line info.
|
||||
* BPF_BTF_GET_FD_BY_ID: get a btf fd
|
||||
* BPF_OBJ_GET_INFO_BY_FD: btf, func_info, line_info
|
||||
and other btf related info are returned.
|
||||
|
||||
The workflow typically looks like:
|
||||
::
|
||||
|
||||
Application:
|
||||
BPF_BTF_LOAD
|
||||
|
|
||||
v
|
||||
BPF_MAP_CREATE and BPF_PROG_LOAD
|
||||
|
|
||||
V
|
||||
......
|
||||
|
||||
Introspection tool:
|
||||
......
|
||||
BPF_{PROG,MAP}_GET_NEXT_ID (get prog/map id's)
|
||||
|
|
||||
V
|
||||
BPF_{PROG,MAP}_GET_FD_BY_ID (get a prog/map fd)
|
||||
|
|
||||
V
|
||||
BPF_OBJ_GET_INFO_BY_FD (get bpf_prog_info/bpf_map_info with btf_id)
|
||||
| |
|
||||
V |
|
||||
BPF_BTF_GET_FD_BY_ID (get btf_fd) |
|
||||
| |
|
||||
V |
|
||||
BPF_OBJ_GET_INFO_BY_FD (get btf) |
|
||||
| |
|
||||
V V
|
||||
pretty print types, dump func signatures and line info, etc.
|
||||
|
||||
|
||||
3.1 BPF_BTF_LOAD
|
||||
================
|
||||
|
||||
Load a blob of BTF data into kernel. A blob of data, described in
|
||||
:ref:`BTF_Type_String`, can be directly loaded into the kernel. A ``btf_fd``
|
||||
is returned to a userspace.
|
||||
|
||||
3.2 BPF_MAP_CREATE
|
||||
==================
|
||||
|
||||
A map can be created with ``btf_fd`` and specified key/value type id.::
|
||||
|
||||
__u32 btf_fd; /* fd pointing to a BTF type data */
|
||||
__u32 btf_key_type_id; /* BTF type_id of the key */
|
||||
__u32 btf_value_type_id; /* BTF type_id of the value */
|
||||
|
||||
In libbpf, the map can be defined with extra annotation like below:
|
||||
::
|
||||
|
||||
struct bpf_map_def SEC("maps") btf_map = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(struct ipv_counts),
|
||||
.max_entries = 4,
|
||||
};
|
||||
BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
|
||||
|
||||
Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
|
||||
value types for the map. During ELF parsing, libbpf is able to extract
|
||||
key/value type_id's and assign them to BPF_MAP_CREATE attributes
|
||||
automatically.
|
||||
|
||||
.. _BPF_Prog_Load:
|
||||
|
||||
3.3 BPF_PROG_LOAD
|
||||
=================
|
||||
|
||||
During prog_load, func_info and line_info can be passed to kernel with proper
|
||||
values for the following attributes:
|
||||
::
|
||||
|
||||
__u32 insn_cnt;
|
||||
__aligned_u64 insns;
|
||||
......
|
||||
__u32 prog_btf_fd; /* fd pointing to BTF type data */
|
||||
__u32 func_info_rec_size; /* userspace bpf_func_info size */
|
||||
__aligned_u64 func_info; /* func info */
|
||||
__u32 func_info_cnt; /* number of bpf_func_info records */
|
||||
__u32 line_info_rec_size; /* userspace bpf_line_info size */
|
||||
__aligned_u64 line_info; /* line info */
|
||||
__u32 line_info_cnt; /* number of bpf_line_info records */
|
||||
|
||||
The func_info and line_info are an array of below, respectively.::
|
||||
|
||||
struct bpf_func_info {
|
||||
__u32 insn_off; /* [0, insn_cnt - 1] */
|
||||
__u32 type_id; /* pointing to a BTF_KIND_FUNC type */
|
||||
};
|
||||
struct bpf_line_info {
|
||||
__u32 insn_off; /* [0, insn_cnt - 1] */
|
||||
__u32 file_name_off; /* offset to string table for the filename */
|
||||
__u32 line_off; /* offset to string table for the source line */
|
||||
__u32 line_col; /* line number and column number */
|
||||
};
|
||||
|
||||
func_info_rec_size is the size of each func_info record, and
|
||||
line_info_rec_size is the size of each line_info record. Passing the record
|
||||
size to kernel make it possible to extend the record itself in the future.
|
||||
|
||||
Below are requirements for func_info:
|
||||
* func_info[0].insn_off must be 0.
|
||||
* the func_info insn_off is in strictly increasing order and matches
|
||||
bpf func boundaries.
|
||||
|
||||
Below are requirements for line_info:
|
||||
* the first insn in each func must have a line_info record pointing to it.
|
||||
* the line_info insn_off is in strictly increasing order.
|
||||
|
||||
For line_info, the line number and column number are defined as below:
|
||||
::
|
||||
|
||||
#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
|
||||
#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
|
||||
|
||||
3.4 BPF_{PROG,MAP}_GET_NEXT_ID
|
||||
|
||||
In kernel, every loaded program, map or btf has a unique id. The id won't
|
||||
change during the lifetime of a program, map, or btf.
|
||||
|
||||
The bpf syscall command BPF_{PROG,MAP}_GET_NEXT_ID returns all id's, one for
|
||||
each command, to user space, for bpf program or maps, respectively, so an
|
||||
inspection tool can inspect all programs and maps.
|
||||
|
||||
3.5 BPF_{PROG,MAP}_GET_FD_BY_ID
|
||||
|
||||
An introspection tool cannot use id to get details about program or maps.
|
||||
A file descriptor needs to be obtained first for reference-counting purpose.
|
||||
|
||||
3.6 BPF_OBJ_GET_INFO_BY_FD
|
||||
==========================
|
||||
|
||||
Once a program/map fd is acquired, an introspection tool can get the detailed
|
||||
information from kernel about this fd, some of which are BTF-related. For
|
||||
example, ``bpf_map_info`` returns ``btf_id`` and key/value type ids.
|
||||
``bpf_prog_info`` returns ``btf_id``, func_info, and line info for translated
|
||||
bpf byte codes, and jited_line_info.
|
||||
|
||||
3.7 BPF_BTF_GET_FD_BY_ID
|
||||
========================
|
||||
|
||||
With ``btf_id`` obtained in ``bpf_map_info`` and ``bpf_prog_info``, bpf
|
||||
syscall command BPF_BTF_GET_FD_BY_ID can retrieve a btf fd. Then, with
|
||||
command BPF_OBJ_GET_INFO_BY_FD, the btf blob, originally loaded into the
|
||||
kernel with BPF_BTF_LOAD, can be retrieved.
|
||||
|
||||
With the btf blob, ``bpf_map_info``, and ``bpf_prog_info``, an introspection
|
||||
tool has full btf knowledge and is able to pretty print map key/values, dump
|
||||
func signatures and line info, along with byte/jit codes.
|
||||
|
||||
4. ELF File Format Interface
|
||||
****************************
|
||||
|
||||
4.1 .BTF section
|
||||
================
|
||||
|
||||
The .BTF section contains type and string data. The format of this section is
|
||||
same as the one describe in :ref:`BTF_Type_String`.
|
||||
|
||||
.. _BTF_Ext_Section:
|
||||
|
||||
4.2 .BTF.ext section
|
||||
====================
|
||||
|
||||
The .BTF.ext section encodes func_info and line_info which needs loader
|
||||
manipulation before loading into the kernel.
|
||||
|
||||
The specification for .BTF.ext section is defined at ``tools/lib/bpf/btf.h``
|
||||
and ``tools/lib/bpf/btf.c``.
|
||||
|
||||
The current header of .BTF.ext section::
|
||||
|
||||
struct btf_ext_header {
|
||||
__u16 magic;
|
||||
__u8 version;
|
||||
__u8 flags;
|
||||
__u32 hdr_len;
|
||||
|
||||
/* All offsets are in bytes relative to the end of this header */
|
||||
__u32 func_info_off;
|
||||
__u32 func_info_len;
|
||||
__u32 line_info_off;
|
||||
__u32 line_info_len;
|
||||
};
|
||||
|
||||
It is very similar to .BTF section. Instead of type/string section, it
|
||||
contains func_info and line_info section. See :ref:`BPF_Prog_Load` for details
|
||||
about func_info and line_info record format.
|
||||
|
||||
The func_info is organized as below.::
|
||||
|
||||
func_info_rec_size
|
||||
btf_ext_info_sec for section #1 /* func_info for section #1 */
|
||||
btf_ext_info_sec for section #2 /* func_info for section #2 */
|
||||
...
|
||||
|
||||
``func_info_rec_size`` specifies the size of ``bpf_func_info`` structure when
|
||||
.BTF.ext is generated. ``btf_ext_info_sec``, defined below, is a collection of
|
||||
func_info for each specific ELF section.::
|
||||
|
||||
struct btf_ext_info_sec {
|
||||
__u32 sec_name_off; /* offset to section name */
|
||||
__u32 num_info;
|
||||
/* Followed by num_info * record_size number of bytes */
|
||||
__u8 data[0];
|
||||
};
|
||||
|
||||
Here, num_info must be greater than 0.
|
||||
|
||||
The line_info is organized as below.::
|
||||
|
||||
line_info_rec_size
|
||||
btf_ext_info_sec for section #1 /* line_info for section #1 */
|
||||
btf_ext_info_sec for section #2 /* line_info for section #2 */
|
||||
...
|
||||
|
||||
``line_info_rec_size`` specifies the size of ``bpf_line_info`` structure when
|
||||
.BTF.ext is generated.
|
||||
|
||||
The interpretation of ``bpf_func_info->insn_off`` and
|
||||
``bpf_line_info->insn_off`` is different between kernel API and ELF API. For
|
||||
kernel API, the ``insn_off`` is the instruction offset in the unit of ``struct
|
||||
bpf_insn``. For ELF API, the ``insn_off`` is the byte offset from the
|
||||
beginning of section (``btf_ext_info_sec->sec_name_off``).
|
||||
|
||||
5. Using BTF
|
||||
************
|
||||
|
||||
5.1 bpftool map pretty print
|
||||
============================
|
||||
|
||||
With BTF, the map key/value can be printed based on fields rather than simply
|
||||
raw bytes. This is especially valuable for large structure or if your data
|
||||
structure has bitfields. For example, for the following map,::
|
||||
|
||||
enum A { A1, A2, A3, A4, A5 };
|
||||
typedef enum A ___A;
|
||||
struct tmp_t {
|
||||
char a1:4;
|
||||
int a2:4;
|
||||
int :4;
|
||||
__u32 a3:4;
|
||||
int b;
|
||||
___A b1:4;
|
||||
enum A b2:4;
|
||||
};
|
||||
struct bpf_map_def SEC("maps") tmpmap = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(struct tmp_t),
|
||||
.max_entries = 1,
|
||||
};
|
||||
BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
|
||||
|
||||
bpftool is able to pretty print like below:
|
||||
::
|
||||
|
||||
[{
|
||||
"key": 0,
|
||||
"value": {
|
||||
"a1": 0x2,
|
||||
"a2": 0x4,
|
||||
"a3": 0x6,
|
||||
"b": 7,
|
||||
"b1": 0x8,
|
||||
"b2": 0xa
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
5.2 bpftool prog dump
|
||||
=====================
|
||||
|
||||
The following is an example showing how func_info and line_info can help prog
|
||||
dump with better kernel symbol names, function prototypes and line
|
||||
information.::
|
||||
|
||||
$ bpftool prog dump jited pinned /sys/fs/bpf/test_btf_haskv
|
||||
[...]
|
||||
int test_long_fname_2(struct dummy_tracepoint_args * arg):
|
||||
bpf_prog_44a040bf25481309_test_long_fname_2:
|
||||
; static int test_long_fname_2(struct dummy_tracepoint_args *arg)
|
||||
0: push %rbp
|
||||
1: mov %rsp,%rbp
|
||||
4: sub $0x30,%rsp
|
||||
b: sub $0x28,%rbp
|
||||
f: mov %rbx,0x0(%rbp)
|
||||
13: mov %r13,0x8(%rbp)
|
||||
17: mov %r14,0x10(%rbp)
|
||||
1b: mov %r15,0x18(%rbp)
|
||||
1f: xor %eax,%eax
|
||||
21: mov %rax,0x20(%rbp)
|
||||
25: xor %esi,%esi
|
||||
; int key = 0;
|
||||
27: mov %esi,-0x4(%rbp)
|
||||
; if (!arg->sock)
|
||||
2a: mov 0x8(%rdi),%rdi
|
||||
; if (!arg->sock)
|
||||
2e: cmp $0x0,%rdi
|
||||
32: je 0x0000000000000070
|
||||
34: mov %rbp,%rsi
|
||||
; counts = bpf_map_lookup_elem(&btf_map, &key);
|
||||
[...]
|
||||
|
||||
5.3 Verifier Log
|
||||
================
|
||||
|
||||
The following is an example of how line_info can help debugging verification
|
||||
failure.::
|
||||
|
||||
/* The code at tools/testing/selftests/bpf/test_xdp_noinline.c
|
||||
* is modified as below.
|
||||
*/
|
||||
data = (void *)(long)xdp->data;
|
||||
data_end = (void *)(long)xdp->data_end;
|
||||
/*
|
||||
if (data + 4 > data_end)
|
||||
return XDP_DROP;
|
||||
*/
|
||||
*(u32 *)data = dst->dst;
|
||||
|
||||
$ bpftool prog load ./test_xdp_noinline.o /sys/fs/bpf/test_xdp_noinline type xdp
|
||||
; data = (void *)(long)xdp->data;
|
||||
224: (79) r2 = *(u64 *)(r10 -112)
|
||||
225: (61) r2 = *(u32 *)(r2 +0)
|
||||
; *(u32 *)data = dst->dst;
|
||||
226: (63) *(u32 *)(r2 +0) = r1
|
||||
invalid access to packet, off=0 size=4, R2(id=0,off=0,r=0)
|
||||
R2 offset is outside of the packet
|
||||
|
||||
6. BTF Generation
|
||||
*****************
|
||||
|
||||
You need latest pahole
|
||||
|
||||
https://git.kernel.org/pub/scm/devel/pahole/pahole.git/
|
||||
|
||||
or llvm (8.0 or later). The pahole acts as a dwarf2btf converter. It doesn't
|
||||
support .BTF.ext and btf BTF_KIND_FUNC type yet. For example,::
|
||||
|
||||
-bash-4.4$ cat t.c
|
||||
struct t {
|
||||
int a:2;
|
||||
int b:3;
|
||||
int c:2;
|
||||
} g;
|
||||
-bash-4.4$ gcc -c -O2 -g t.c
|
||||
-bash-4.4$ pahole -JV t.o
|
||||
File t.o:
|
||||
[1] STRUCT t kind_flag=1 size=4 vlen=3
|
||||
a type_id=2 bitfield_size=2 bits_offset=0
|
||||
b type_id=2 bitfield_size=3 bits_offset=2
|
||||
c type_id=2 bitfield_size=2 bits_offset=5
|
||||
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
|
||||
|
||||
The llvm is able to generate .BTF and .BTF.ext directly with -g for bpf target
|
||||
only. The assembly code (-S) is able to show the BTF encoding in assembly
|
||||
format.::
|
||||
|
||||
-bash-4.4$ cat t2.c
|
||||
typedef int __int32;
|
||||
struct t2 {
|
||||
int a2;
|
||||
int (*f2)(char q1, __int32 q2, ...);
|
||||
int (*f3)();
|
||||
} g2;
|
||||
int main() { return 0; }
|
||||
int test() { return 0; }
|
||||
-bash-4.4$ clang -c -g -O2 -target bpf t2.c
|
||||
-bash-4.4$ readelf -S t2.o
|
||||
......
|
||||
[ 8] .BTF PROGBITS 0000000000000000 00000247
|
||||
000000000000016e 0000000000000000 0 0 1
|
||||
[ 9] .BTF.ext PROGBITS 0000000000000000 000003b5
|
||||
0000000000000060 0000000000000000 0 0 1
|
||||
[10] .rel.BTF.ext REL 0000000000000000 000007e0
|
||||
0000000000000040 0000000000000010 16 9 8
|
||||
......
|
||||
-bash-4.4$ clang -S -g -O2 -target bpf t2.c
|
||||
-bash-4.4$ cat t2.s
|
||||
......
|
||||
.section .BTF,"",@progbits
|
||||
.short 60319 # 0xeb9f
|
||||
.byte 1
|
||||
.byte 0
|
||||
.long 24
|
||||
.long 0
|
||||
.long 220
|
||||
.long 220
|
||||
.long 122
|
||||
.long 0 # BTF_KIND_FUNC_PROTO(id = 1)
|
||||
.long 218103808 # 0xd000000
|
||||
.long 2
|
||||
.long 83 # BTF_KIND_INT(id = 2)
|
||||
.long 16777216 # 0x1000000
|
||||
.long 4
|
||||
.long 16777248 # 0x1000020
|
||||
......
|
||||
.byte 0 # string offset=0
|
||||
.ascii ".text" # string offset=1
|
||||
.byte 0
|
||||
.ascii "/home/yhs/tmp-pahole/t2.c" # string offset=7
|
||||
.byte 0
|
||||
.ascii "int main() { return 0; }" # string offset=33
|
||||
.byte 0
|
||||
.ascii "int test() { return 0; }" # string offset=58
|
||||
.byte 0
|
||||
.ascii "int" # string offset=83
|
||||
......
|
||||
.section .BTF.ext,"",@progbits
|
||||
.short 60319 # 0xeb9f
|
||||
.byte 1
|
||||
.byte 0
|
||||
.long 24
|
||||
.long 0
|
||||
.long 28
|
||||
.long 28
|
||||
.long 44
|
||||
.long 8 # FuncInfo
|
||||
.long 1 # FuncInfo section string offset=1
|
||||
.long 2
|
||||
.long .Lfunc_begin0
|
||||
.long 3
|
||||
.long .Lfunc_begin1
|
||||
.long 5
|
||||
.long 16 # LineInfo
|
||||
.long 1 # LineInfo section string offset=1
|
||||
.long 2
|
||||
.long .Ltmp0
|
||||
.long 7
|
||||
.long 33
|
||||
.long 7182 # Line 7 Col 14
|
||||
.long .Ltmp3
|
||||
.long 7
|
||||
.long 58
|
||||
.long 8206 # Line 8 Col 14
|
||||
|
||||
7. Testing
|
||||
**********
|
||||
|
||||
Kernel bpf selftest `test_btf.c` provides extensive set of BTF-related tests.
|
@ -15,6 +15,13 @@ that goes into great technical depth about the BPF Architecture.
|
||||
The primary info for the bpf syscall is available in the `man-pages`_
|
||||
for `bpf(2)`_.
|
||||
|
||||
BPF Type Format (BTF)
|
||||
=====================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
btf
|
||||
|
||||
|
||||
Frequently asked questions (FAQ)
|
||||
|
@ -107,9 +107,9 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
|
||||
|
||||
8. LRU
|
||||
Each memcg has its own private LRU. Now, its handling is under global
|
||||
VM's control (means that it's handled under global zone_lru_lock).
|
||||
VM's control (means that it's handled under global pgdat->lru_lock).
|
||||
Almost all routines around memcg's LRU is called by global LRU's
|
||||
list management functions under zone_lru_lock().
|
||||
list management functions under pgdat->lru_lock.
|
||||
|
||||
A special function is mem_cgroup_isolate_pages(). This scans
|
||||
memcg's private LRU and call __isolate_lru_page() to extract a page
|
||||
|
@ -70,7 +70,7 @@ Brief summary of control files.
|
||||
memory.soft_limit_in_bytes # set/show soft limit of memory usage
|
||||
memory.stat # show various statistics
|
||||
memory.use_hierarchy # set/show hierarchical account enabled
|
||||
memory.force_empty # trigger forced move charge to parent
|
||||
memory.force_empty # trigger forced page reclaim
|
||||
memory.pressure_level # set memory pressure notifications
|
||||
memory.swappiness # set/show swappiness parameter of vmscan
|
||||
(See sysctl's vm.swappiness)
|
||||
@ -267,11 +267,11 @@ When oom event notifier is registered, event will be delivered.
|
||||
Other lock order is following:
|
||||
PG_locked.
|
||||
mm->page_table_lock
|
||||
zone_lru_lock
|
||||
pgdat->lru_lock
|
||||
lock_page_cgroup.
|
||||
In many cases, just lock_page_cgroup() is called.
|
||||
per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
|
||||
zone_lru_lock, it has no lock of its own.
|
||||
pgdat->lru_lock, it has no lock of its own.
|
||||
|
||||
2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
|
||||
|
||||
@ -459,8 +459,9 @@ About use_hierarchy, see Section 6.
|
||||
the cgroup will be reclaimed and as many pages reclaimed as possible.
|
||||
|
||||
The typical use case for this interface is before calling rmdir().
|
||||
Because rmdir() moves all pages to parent, some out-of-use page caches can be
|
||||
moved to the parent. If you want to avoid that, force_empty will be useful.
|
||||
Though rmdir() offlines memcg, but the memcg may still stay there due to
|
||||
charged file caches. Some out-of-use page caches may keep charged until
|
||||
memory pressure happens. If you want to avoid that, force_empty will be useful.
|
||||
|
||||
Also, note that when memory.kmem.limit_in_bytes is set the charges due to
|
||||
kernel pages will still be seen. This is not considered a failure and the
|
||||
|
@ -33,6 +33,9 @@ limit in the hierarchy is followed).
|
||||
pids.current tracks all child cgroup hierarchies, so parent/pids.current is a
|
||||
superset of parent/child/pids.current.
|
||||
|
||||
The pids.events file contains event counters:
|
||||
- max: Number of times fork failed because limit was hit.
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
|
@ -1,130 +0,0 @@
|
||||
|
||||
===================================
|
||||
Using flexible arrays in the kernel
|
||||
===================================
|
||||
|
||||
Large contiguous memory allocations can be unreliable in the Linux kernel.
|
||||
Kernel programmers will sometimes respond to this problem by allocating
|
||||
pages with :c:func:`vmalloc()`. This solution not ideal, though. On 32-bit
|
||||
systems, memory from vmalloc() must be mapped into a relatively small address
|
||||
space; it's easy to run out. On SMP systems, the page table changes required
|
||||
by vmalloc() allocations can require expensive cross-processor interrupts on
|
||||
all CPUs. And, on all systems, use of space in the vmalloc() range increases
|
||||
pressure on the translation lookaside buffer (TLB), reducing the performance
|
||||
of the system.
|
||||
|
||||
In many cases, the need for memory from vmalloc() can be eliminated by piecing
|
||||
together an array from smaller parts; the flexible array library exists to make
|
||||
this task easier.
|
||||
|
||||
A flexible array holds an arbitrary (within limits) number of fixed-sized
|
||||
objects, accessed via an integer index. Sparse arrays are handled
|
||||
reasonably well. Only single-page allocations are made, so memory
|
||||
allocation failures should be relatively rare. The down sides are that the
|
||||
arrays cannot be indexed directly, individual object size cannot exceed the
|
||||
system page size, and putting data into a flexible array requires a copy
|
||||
operation. It's also worth noting that flexible arrays do no internal
|
||||
locking at all; if concurrent access to an array is possible, then the
|
||||
caller must arrange for appropriate mutual exclusion.
|
||||
|
||||
The creation of a flexible array is done with :c:func:`flex_array_alloc()`::
|
||||
|
||||
#include <linux/flex_array.h>
|
||||
|
||||
struct flex_array *flex_array_alloc(int element_size,
|
||||
unsigned int total,
|
||||
gfp_t flags);
|
||||
|
||||
The individual object size is provided by ``element_size``, while total is the
|
||||
maximum number of objects which can be stored in the array. The flags
|
||||
argument is passed directly to the internal memory allocation calls. With
|
||||
the current code, using flags to ask for high memory is likely to lead to
|
||||
notably unpleasant side effects.
|
||||
|
||||
It is also possible to define flexible arrays at compile time with::
|
||||
|
||||
DEFINE_FLEX_ARRAY(name, element_size, total);
|
||||
|
||||
This macro will result in a definition of an array with the given name; the
|
||||
element size and total will be checked for validity at compile time.
|
||||
|
||||
Storing data into a flexible array is accomplished with a call to
|
||||
:c:func:`flex_array_put()`::
|
||||
|
||||
int flex_array_put(struct flex_array *array, unsigned int element_nr,
|
||||
void *src, gfp_t flags);
|
||||
|
||||
This call will copy the data from src into the array, in the position
|
||||
indicated by ``element_nr`` (which must be less than the maximum specified when
|
||||
the array was created). If any memory allocations must be performed, flags
|
||||
will be used. The return value is zero on success, a negative error code
|
||||
otherwise.
|
||||
|
||||
There might possibly be a need to store data into a flexible array while
|
||||
running in some sort of atomic context; in this situation, sleeping in the
|
||||
memory allocator would be a bad thing. That can be avoided by using
|
||||
``GFP_ATOMIC`` for the flags value, but, often, there is a better way. The
|
||||
trick is to ensure that any needed memory allocations are done before
|
||||
entering atomic context, using :c:func:`flex_array_prealloc()`::
|
||||
|
||||
int flex_array_prealloc(struct flex_array *array, unsigned int start,
|
||||
unsigned int nr_elements, gfp_t flags);
|
||||
|
||||
This function will ensure that memory for the elements indexed in the range
|
||||
defined by ``start`` and ``nr_elements`` has been allocated. Thereafter, a
|
||||
``flex_array_put()`` call on an element in that range is guaranteed not to
|
||||
block.
|
||||
|
||||
Getting data back out of the array is done with :c:func:`flex_array_get()`::
|
||||
|
||||
void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
|
||||
|
||||
The return value is a pointer to the data element, or NULL if that
|
||||
particular element has never been allocated.
|
||||
|
||||
Note that it is possible to get back a valid pointer for an element which
|
||||
has never been stored in the array. Memory for array elements is allocated
|
||||
one page at a time; a single allocation could provide memory for several
|
||||
adjacent elements. Flexible array elements are normally initialized to the
|
||||
value ``FLEX_ARRAY_FREE`` (defined as 0x6c in <linux/poison.h>), so errors
|
||||
involving that number probably result from use of unstored array entries.
|
||||
Note that, if array elements are allocated with ``__GFP_ZERO``, they will be
|
||||
initialized to zero and this poisoning will not happen.
|
||||
|
||||
Individual elements in the array can be cleared with
|
||||
:c:func:`flex_array_clear()`::
|
||||
|
||||
int flex_array_clear(struct flex_array *array, unsigned int element_nr);
|
||||
|
||||
This function will set the given element to ``FLEX_ARRAY_FREE`` and return
|
||||
zero. If storage for the indicated element is not allocated for the array,
|
||||
``flex_array_clear()`` will return ``-EINVAL`` instead. Note that clearing an
|
||||
element does not release the storage associated with it; to reduce the
|
||||
allocated size of an array, call :c:func:`flex_array_shrink()`::
|
||||
|
||||
int flex_array_shrink(struct flex_array *array);
|
||||
|
||||
The return value will be the number of pages of memory actually freed.
|
||||
This function works by scanning the array for pages containing nothing but
|
||||
``FLEX_ARRAY_FREE`` bytes, so (1) it can be expensive, and (2) it will not work
|
||||
if the array's pages are allocated with ``__GFP_ZERO``.
|
||||
|
||||
It is possible to remove all elements of an array with a call to
|
||||
:c:func:`flex_array_free_parts()`::
|
||||
|
||||
void flex_array_free_parts(struct flex_array *array);
|
||||
|
||||
This call frees all elements, but leaves the array itself in place.
|
||||
Freeing the entire array is done with :c:func:`flex_array_free()`::
|
||||
|
||||
void flex_array_free(struct flex_array *array);
|
||||
|
||||
As of this writing, there are no users of flexible arrays in the mainline
|
||||
kernel. The functions described here are also not exported to modules;
|
||||
that will probably be fixed when somebody comes up with a need for it.
|
||||
|
||||
|
||||
Flexible array functions
|
||||
------------------------
|
||||
|
||||
.. kernel-doc:: include/linux/flex_array.h
|
12
Documentation/core-api/generic-radix-tree.rst
Normal file
@ -0,0 +1,12 @@
|
||||
=================================
|
||||
Generic radix trees/sparse arrays
|
||||
=================================
|
||||
|
||||
.. kernel-doc:: include/linux/generic-radix-tree.h
|
||||
:doc: Generic radix trees/sparse arrays
|
||||
|
||||
generic radix tree functions
|
||||
----------------------------
|
||||
|
||||
.. kernel-doc:: include/linux/generic-radix-tree.h
|
||||
:functions:
|
@ -28,6 +28,7 @@ Core utilities
|
||||
errseq
|
||||
printk-formats
|
||||
circular-buffers
|
||||
generic-radix-tree
|
||||
memory-allocation
|
||||
mm-api
|
||||
gfp_mask-from-fs-io
|
||||
|
@ -356,10 +356,6 @@ Read-Copy Update (RCU)
|
||||
|
||||
.. kernel-doc:: include/linux/rcupdate.h
|
||||
|
||||
.. kernel-doc:: include/linux/rcupdate_wait.h
|
||||
|
||||
.. kernel-doc:: include/linux/rcutree.h
|
||||
|
||||
.. kernel-doc:: kernel/rcu/tree.c
|
||||
|
||||
.. kernel-doc:: kernel/rcu/tree_plugin.h
|
||||
|
@ -1,4 +1,4 @@
|
||||
.. _memory-allocation:
|
||||
.. _memory_allocation:
|
||||
|
||||
=======================
|
||||
Memory Allocation Guide
|
||||
@ -113,9 +113,11 @@ see :c:func:`kvmalloc_node` reference documentation. Note that
|
||||
|
||||
If you need to allocate many identical objects you can use the slab
|
||||
cache allocator. The cache should be set up with
|
||||
:c:func:`kmem_cache_create` before it can be used. Afterwards
|
||||
:c:func:`kmem_cache_alloc` and its convenience wrappers can allocate
|
||||
memory from that cache.
|
||||
:c:func:`kmem_cache_create` or :c:func:`kmem_cache_create_usercopy`
|
||||
before it can be used. The second function should be used if a part of
|
||||
the cache might be copied to the userspace. After the cache is
|
||||
created :c:func:`kmem_cache_alloc` and its convenience wrappers can
|
||||
allocate memory from that cache.
|
||||
|
||||
When the allocated memory is no longer needed it must be freed. You
|
||||
can use :c:func:`kvfree` for the memory allocated with `kmalloc`,
|
||||
|
@ -35,7 +35,7 @@ users will want to use a plain ``GFP_KERNEL``.
|
||||
:doc: Reclaim modifiers
|
||||
|
||||
.. kernel-doc:: include/linux/gfp.h
|
||||
:doc: Common combinations
|
||||
:doc: Useful GFP flag combinations
|
||||
|
||||
The Slab Cache
|
||||
==============
|
||||
|
@ -13,6 +13,10 @@ Integer types
|
||||
|
||||
If variable is of Type, use printk format specifier:
|
||||
------------------------------------------------------------
|
||||
char %hhd or %hhx
|
||||
unsigned char %hhu or %hhx
|
||||
short int %hd or %hx
|
||||
unsigned short int %hu or %hx
|
||||
int %d or %x
|
||||
unsigned int %u or %x
|
||||
long %ld or %lx
|
||||
@ -21,6 +25,10 @@ Integer types
|
||||
unsigned long long %llu or %llx
|
||||
size_t %zu or %zx
|
||||
ssize_t %zd or %zx
|
||||
s8 %hhd or %hhx
|
||||
u8 %hhu or %hhx
|
||||
s16 %hd or %hx
|
||||
u16 %hu or %hx
|
||||
s32 %d or %x
|
||||
u32 %u or %x
|
||||
s64 %lld or %llx
|
||||
|
@ -54,6 +54,13 @@ must propagate to all other CPUs before the release operation
|
||||
(A-cumulative property). This is implemented using
|
||||
:c:func:`smp_store_release`.
|
||||
|
||||
An ACQUIRE memory ordering guarantees that all post loads and
|
||||
stores (all po-later instructions) on the same CPU are
|
||||
completed after the acquire operation. It also guarantees that all
|
||||
po-later stores on the same CPU must propagate to all other CPUs
|
||||
after the acquire operation executes. This is implemented using
|
||||
:c:func:`smp_acquire__after_ctrl_dep`.
|
||||
|
||||
A control dependency (on success) for refcounters guarantees that
|
||||
if a reference for an object was successfully obtained (reference
|
||||
counter increment or addition happened, function returned true),
|
||||
@ -119,13 +126,24 @@ Memory ordering guarantees changes:
|
||||
result of obtaining pointer to the object!
|
||||
|
||||
|
||||
case 5) - decrement-based RMW ops that return a value
|
||||
-----------------------------------------------------
|
||||
case 5) - generic dec/sub decrement-based RMW ops that return a value
|
||||
---------------------------------------------------------------------
|
||||
|
||||
Function changes:
|
||||
|
||||
* :c:func:`atomic_dec_and_test` --> :c:func:`refcount_dec_and_test`
|
||||
* :c:func:`atomic_sub_and_test` --> :c:func:`refcount_sub_and_test`
|
||||
|
||||
Memory ordering guarantees changes:
|
||||
|
||||
* fully ordered --> RELEASE ordering + ACQUIRE ordering on success
|
||||
|
||||
|
||||
case 6) other decrement-based RMW ops that return a value
|
||||
---------------------------------------------------------
|
||||
|
||||
Function changes:
|
||||
|
||||
* no atomic counterpart --> :c:func:`refcount_dec_if_one`
|
||||
* ``atomic_add_unless(&var, -1, 1)`` --> ``refcount_dec_not_one(&var)``
|
||||
|
||||
@ -136,7 +154,7 @@ Memory ordering guarantees changes:
|
||||
.. note:: :c:func:`atomic_add_unless` only provides full order on success.
|
||||
|
||||
|
||||
case 6) - lock-based RMW
|
||||
case 7) - lock-based RMW
|
||||
------------------------
|
||||
|
||||
Function changes:
|
||||
|
@ -85,7 +85,7 @@ which was at that index; if it returns the same entry which was passed as
|
||||
|
||||
If you want to only store a new entry to an index if the current entry
|
||||
at that index is ``NULL``, you can use :c:func:`xa_insert` which
|
||||
returns ``-EEXIST`` if the entry is not empty.
|
||||
returns ``-EBUSY`` if the entry is not empty.
|
||||
|
||||
You can enquire whether a mark is set on an entry by using
|
||||
:c:func:`xa_get_mark`. If the entry is not ``NULL``, you can set a mark
|
||||
@ -131,17 +131,23 @@ If you use :c:func:`DEFINE_XARRAY_ALLOC` to define the XArray, or
|
||||
initialise it by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`,
|
||||
the XArray changes to track whether entries are in use or not.
|
||||
|
||||
You can call :c:func:`xa_alloc` to store the entry at any unused index
|
||||
You can call :c:func:`xa_alloc` to store the entry at an unused index
|
||||
in the XArray. If you need to modify the array from interrupt context,
|
||||
you can use :c:func:`xa_alloc_bh` or :c:func:`xa_alloc_irq` to disable
|
||||
interrupts while allocating the ID.
|
||||
|
||||
Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert`
|
||||
will mark the entry as being allocated. Unlike a normal XArray, storing
|
||||
Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert` will
|
||||
also mark the entry as being allocated. Unlike a normal XArray, storing
|
||||
``NULL`` will mark the entry as being in use, like :c:func:`xa_reserve`.
|
||||
To free an entry, use :c:func:`xa_erase` (or :c:func:`xa_release` if
|
||||
you only want to free the entry if it's ``NULL``).
|
||||
|
||||
By default, the lowest free entry is allocated starting from 0. If you
|
||||
want to allocate entries starting at 1, it is more efficient to use
|
||||
:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``. If you want to
|
||||
allocate IDs up to a maximum, then wrap back around to the lowest free
|
||||
ID, you can use :c:func:`xa_alloc_cyclic`.
|
||||
|
||||
You cannot use ``XA_MARK_0`` with an allocating XArray as this mark
|
||||
is used to track whether an entry is free or not. The other marks are
|
||||
available for your use.
|
||||
@ -209,7 +215,6 @@ Assumes xa_lock held on entry:
|
||||
* :c:func:`__xa_erase`
|
||||
* :c:func:`__xa_cmpxchg`
|
||||
* :c:func:`__xa_alloc`
|
||||
* :c:func:`__xa_reserve`
|
||||
* :c:func:`__xa_set_mark`
|
||||
* :c:func:`__xa_clear_mark`
|
||||
|
||||
|
@ -1,37 +0,0 @@
|
||||
|
||||
|
||||
Supporting multiple CPU idle levels in kernel
|
||||
|
||||
cpuidle drivers
|
||||
|
||||
|
||||
|
||||
|
||||
cpuidle driver hooks into the cpuidle infrastructure and handles the
|
||||
architecture/platform dependent part of CPU idle states. Driver
|
||||
provides the platform idle state detection capability and also
|
||||
has mechanisms in place to support actual entry-exit into CPU idle states.
|
||||
|
||||
cpuidle driver initializes the cpuidle_device structure for each CPU device
|
||||
and registers with cpuidle using cpuidle_register_device.
|
||||
|
||||
If all the idle states are the same, the wrapper function cpuidle_register
|
||||
could be used instead.
|
||||
|
||||
It can also support the dynamic changes (like battery <-> AC), by using
|
||||
cpuidle_pause_and_lock, cpuidle_disable_device and cpuidle_enable_device,
|
||||
cpuidle_resume_and_unlock.
|
||||
|
||||
Interfaces:
|
||||
extern int cpuidle_register(struct cpuidle_driver *drv,
|
||||
const struct cpumask *const coupled_cpus);
|
||||
extern int cpuidle_unregister(struct cpuidle_driver *drv);
|
||||
extern int cpuidle_register_driver(struct cpuidle_driver *drv);
|
||||
extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
|
||||
extern int cpuidle_register_device(struct cpuidle_device *dev);
|
||||
extern void cpuidle_unregister_device(struct cpuidle_device *dev);
|
||||
|
||||
extern void cpuidle_pause_and_lock(void);
|
||||
extern void cpuidle_resume_and_unlock(void);
|
||||
extern int cpuidle_enable_device(struct cpuidle_device *dev);
|
||||
extern void cpuidle_disable_device(struct cpuidle_device *dev);
|
@ -1,28 +0,0 @@
|
||||
|
||||
|
||||
|
||||
Supporting multiple CPU idle levels in kernel
|
||||
|
||||
cpuidle governors
|
||||
|
||||
|
||||
|
||||
|
||||
cpuidle governor is policy routine that decides what idle state to enter at
|
||||
any given time. cpuidle core uses different callbacks to the governor.
|
||||
|
||||
* enable() to enable governor for a particular device
|
||||
* disable() to disable governor for a particular device
|
||||
* select() to select an idle state to enter
|
||||
* reflect() called after returning from the idle state, which can be used
|
||||
by the governor for some record keeping.
|
||||
|
||||
More than one governor can be registered at the same time and
|
||||
users can switch between drivers using /sysfs interface (when enabled).
|
||||
More than one governor part is supported for developers to easily experiment
|
||||
with different governors. By default, most optimal governor based on your
|
||||
kernel configuration and platform will be selected by cpuidle.
|
||||
|
||||
Interfaces:
|
||||
extern int cpuidle_register_governor(struct cpuidle_governor *gov);
|
||||
struct cpuidle_governor
|
@ -22,7 +22,7 @@ Configure the kernel with::
|
||||
|
||||
CONFIG_KCOV=y
|
||||
|
||||
CONFIG_KCOV requires gcc built on revision 231296 or later.
|
||||
CONFIG_KCOV requires gcc 6.1.0 or later.
|
||||
|
||||
If the comparison operands need to be collected, set::
|
||||
|
||||
|
@ -206,6 +206,9 @@ Optional feature arguments are:
|
||||
in a separate btree, which improves speed of shutting
|
||||
down the cache.
|
||||
|
||||
no_discard_passdown : disable passing down discards from the cache
|
||||
to the origin's data device.
|
||||
|
||||
A policy called 'default' is always registered. This is an alias for
|
||||
the policy we currently think is giving best all round performance.
|
||||
|
||||
|
114
Documentation/device-mapper/dm-init.txt
Normal file
@ -0,0 +1,114 @@
|
||||
Early creation of mapped devices
|
||||
====================================
|
||||
|
||||
It is possible to configure a device-mapper device to act as the root device for
|
||||
your system in two ways.
|
||||
|
||||
The first is to build an initial ramdisk which boots to a minimal userspace
|
||||
which configures the device, then pivot_root(8) in to it.
|
||||
|
||||
The second is to create one or more device-mappers using the module parameter
|
||||
"dm-mod.create=" through the kernel boot command line argument.
|
||||
|
||||
The format is specified as a string of data separated by commas and optionally
|
||||
semi-colons, where:
|
||||
- a comma is used to separate fields like name, uuid, flags and table
|
||||
(specifies one device)
|
||||
- a semi-colon is used to separate devices.
|
||||
|
||||
So the format will look like this:
|
||||
|
||||
dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
|
||||
|
||||
Where,
|
||||
<name> ::= The device name.
|
||||
<uuid> ::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | ""
|
||||
<minor> ::= The device minor number | ""
|
||||
<flags> ::= "ro" | "rw"
|
||||
<table> ::= <start_sector> <num_sectors> <target_type> <target_args>
|
||||
<target_type> ::= "verity" | "linear" | ... (see list below)
|
||||
|
||||
The dm line should be equivalent to the one used by the dmsetup tool with the
|
||||
--concise argument.
|
||||
|
||||
Target types
|
||||
============
|
||||
|
||||
Not all target types are available as there are serious risks in allowing
|
||||
activation of certain DM targets without first using userspace tools to check
|
||||
the validity of associated metadata.
|
||||
|
||||
"cache": constrained, userspace should verify cache device
|
||||
"crypt": allowed
|
||||
"delay": allowed
|
||||
"era": constrained, userspace should verify metadata device
|
||||
"flakey": constrained, meant for test
|
||||
"linear": allowed
|
||||
"log-writes": constrained, userspace should verify metadata device
|
||||
"mirror": constrained, userspace should verify main/mirror device
|
||||
"raid": constrained, userspace should verify metadata device
|
||||
"snapshot": constrained, userspace should verify src/dst device
|
||||
"snapshot-origin": allowed
|
||||
"snapshot-merge": constrained, userspace should verify src/dst device
|
||||
"striped": allowed
|
||||
"switch": constrained, userspace should verify dev path
|
||||
"thin": constrained, requires dm target message from userspace
|
||||
"thin-pool": constrained, requires dm target message from userspace
|
||||
"verity": allowed
|
||||
"writecache": constrained, userspace should verify cache device
|
||||
"zero": constrained, not meant for rootfs
|
||||
|
||||
If the target is not listed above, it is constrained by default (not tested).
|
||||
|
||||
Examples
|
||||
========
|
||||
An example of booting to a linear array made up of user-mode linux block
|
||||
devices:
|
||||
|
||||
dm-mod.create="lroot,,,rw, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" root=/dev/dm-0
|
||||
|
||||
This will boot to a rw dm-linear target of 8192 sectors split across two block
|
||||
devices identified by their major:minor numbers. After boot, udev will rename
|
||||
this target to /dev/mapper/lroot (depending on the rules). No uuid was assigned.
|
||||
|
||||
An example of multiple device-mappers, with the dm-mod.create="..." contents is shown here
|
||||
split on multiple lines for readability:
|
||||
|
||||
vroot,,,ro,
|
||||
0 1740800 verity 254:0 254:0 1740800 sha1
|
||||
76e9be054b15884a9fa85973e9cb274c93afadb6
|
||||
5b3549d54d6c7a3837b9b81ed72e49463a64c03680c47835bef94d768e5646fe;
|
||||
vram,,,rw,
|
||||
0 32768 linear 1:0 0,
|
||||
32768 32768 linear 1:1 0
|
||||
|
||||
Other examples (per target):
|
||||
|
||||
"crypt":
|
||||
dm-crypt,,8,ro,
|
||||
0 1048576 crypt aes-xts-plain64
|
||||
babebabebabebabebabebabebabebabebabebabebabebabebabebabebabebabe 0
|
||||
/dev/sda 0 1 allow_discards
|
||||
|
||||
"delay":
|
||||
dm-delay,,4,ro,0 409600 delay /dev/sda1 0 500
|
||||
|
||||
"linear":
|
||||
dm-linear,,,rw,
|
||||
0 32768 linear /dev/sda1 0,
|
||||
32768 1024000 linear /dev/sda2 0,
|
||||
1056768 204800 linear /dev/sda3 0,
|
||||
1261568 512000 linear /dev/sda4 0
|
||||
|
||||
"snapshot-origin":
|
||||
dm-snap-orig,,4,ro,0 409600 snapshot-origin 8:2
|
||||
|
||||
"striped":
|
||||
dm-striped,,4,ro,0 1638400 striped 4 4096
|
||||
/dev/sda1 0 /dev/sda2 0 /dev/sda3 0 /dev/sda4 0
|
||||
|
||||
"verity":
|
||||
dm-verity,,4,ro,
|
||||
0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
|
||||
fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
|
||||
51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
|
@ -15,9 +15,13 @@ DT_TMP_SCHEMA := processed-schema.yaml
|
||||
extra-y += $(DT_TMP_SCHEMA)
|
||||
|
||||
quiet_cmd_mk_schema = SCHEMA $@
|
||||
cmd_mk_schema = $(DT_MK_SCHEMA) $(DT_MK_SCHEMA_FLAGS) -o $@ $(filter-out FORCE, $^)
|
||||
cmd_mk_schema = $(DT_MK_SCHEMA) $(DT_MK_SCHEMA_FLAGS) -o $@ $(real-prereqs)
|
||||
|
||||
DT_DOCS = $(shell \
|
||||
cd $(srctree)/$(src) && \
|
||||
find * \( -name '*.yaml' ! -name $(DT_TMP_SCHEMA) \) \
|
||||
)
|
||||
|
||||
DT_DOCS = $(shell cd $(srctree)/$(src) && find * -name '*.yaml')
|
||||
DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS))
|
||||
|
||||
extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
|
||||
|
@ -109,6 +109,7 @@ Board compatible values (alphabetically, grouped by SoC):
|
||||
- "amlogic,s400" (Meson axg a113d)
|
||||
|
||||
- "amlogic,u200" (Meson g12a s905d2)
|
||||
- "amediatech,x96-max" (Meson g12a s905x2)
|
||||
|
||||
Amlogic Meson Firmware registers Interface
|
||||
------------------------------------------
|
||||
|
@ -1,6 +0,0 @@
|
||||
Armadeus i.MX Platforms Device Tree Bindings
|
||||
-----------------------------------------------
|
||||
|
||||
APF51: i.MX51 based module.
|
||||
Required root node properties:
|
||||
- compatible = "armadeus,imx51-apf51", "fsl,imx51";
|
@ -21,7 +21,8 @@ Its subnodes can be:
|
||||
|
||||
RSTC Reset Controller required properties:
|
||||
- compatible: Should be "atmel,<chip>-rstc".
|
||||
<chip> can be "at91sam9260" or "at91sam9g45" or "sama5d3"
|
||||
<chip> can be "at91sam9260", "at91sam9g45", "sama5d3" or "samx7"
|
||||
it also can be "microchip,sam9x60-rstc"
|
||||
- reg: Should contain registers location and length
|
||||
- clocks: phandle to input clock.
|
||||
|
||||
@ -147,6 +148,7 @@ required properties:
|
||||
- compatible: Should be "atmel,<chip>-sfr", "syscon" or
|
||||
"atmel,<chip>-sfrbu", "syscon"
|
||||
<chip> can be "sama5d3", "sama5d4" or "sama5d2".
|
||||
It also can be "microchip,sam9x60-sfr", "syscon".
|
||||
- reg: Should contain registers location and length
|
||||
|
||||
sfr@f0038000 {
|
||||
|
@ -30,6 +30,10 @@ Raspberry Pi 2 Model B
|
||||
Required root node properties:
|
||||
compatible = "raspberrypi,2-model-b", "brcm,bcm2836";
|
||||
|
||||
Raspberry Pi 3 Model A+
|
||||
Required root node properties:
|
||||
compatible = "raspberrypi,3-model-a-plus", "brcm,bcm2837";
|
||||
|
||||
Raspberry Pi 3 Model B
|
||||
Required root node properties:
|
||||
compatible = "raspberrypi,3-model-b", "brcm,bcm2837";
|
||||
|
@ -1,6 +0,0 @@
|
||||
Beckhoff Automation Platforms Device Tree Bindings
|
||||
--------------------------------------------------
|
||||
|
||||
CX9020 Embedded PC
|
||||
Required root node properties:
|
||||
- compatible = "bhf,cx9020", "fsl,imx53";
|
18
Documentation/devicetree/bindings/arm/bitmain.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/arm/bitmain.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Bitmain platform device tree bindings
|
||||
|
||||
maintainers:
|
||||
- Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- enum:
|
||||
- bitmain,sophon-edge
|
||||
- const: bitmain,bm1880
|
||||
...
|
@ -1,25 +0,0 @@
|
||||
CompuLab SB-SOM is a multi-module baseboard capable of carrying:
|
||||
- CM-T43
|
||||
- CM-T54
|
||||
- CM-QS600
|
||||
- CL-SOM-AM57x
|
||||
- CL-SOM-iMX7
|
||||
modules with minor modifications to the SB-SOM assembly.
|
||||
|
||||
Required root node properties:
|
||||
- compatible = should be "compulab,sb-som"
|
||||
|
||||
Compulab CL-SOM-iMX7 is a miniature System-on-Module (SoM) based on
|
||||
Freescale i.MX7 ARM Cortex-A7 System-on-Chip.
|
||||
|
||||
Required root node properties:
|
||||
- compatible = "compulab,cl-som-imx7", "fsl,imx7d";
|
||||
|
||||
Compulab SBC-iMX7 is a single board computer based on the
|
||||
Freescale i.MX7 system-on-chip. SBC-iMX7 is implemented with
|
||||
the CL-SOM-iMX7 System-on-Module providing most of the functions,
|
||||
and SB-SOM-iMX7 carrier board providing additional peripheral
|
||||
functions and connectors.
|
||||
|
||||
Required root node properties:
|
||||
- compatible = "compulab,sbc-imx7", "compulab,cl-som-imx7", "fsl,imx7d";
|
@ -228,6 +228,7 @@ patternProperties:
|
||||
- renesas,r9a06g032-smp
|
||||
- rockchip,rk3036-smp
|
||||
- rockchip,rk3066-smp
|
||||
- socionext,milbeaut-m10v-smp
|
||||
- ste,dbx500-smp
|
||||
|
||||
cpu-release-addr:
|
||||
|
@ -0,0 +1,16 @@
|
||||
Freescale i.MX7ULP System Integration Module
|
||||
----------------------------------------------
|
||||
The system integration module (SIM) provides system control and chip configuration
|
||||
registers. In this module, chip revision information is located in JTAG ID register,
|
||||
and a set of registers have been made available in DGO domain for SW use, with the
|
||||
objective to maintain its value between system resets.
|
||||
|
||||
Required properties:
|
||||
- compatible: Should be "fsl,imx7ulp-sim".
|
||||
- reg: Specifies base physical address and size of the register sets.
|
||||
|
||||
Example:
|
||||
sim: sim@410a3000 {
|
||||
compatible = "fsl,imx7ulp-sim", "syscon";
|
||||
reg = <0x410a3000 0x1000>;
|
||||
};
|
@ -58,7 +58,11 @@ This binding for the SCU power domain providers uses the generic power
|
||||
domain binding[2].
|
||||
|
||||
Required properties:
|
||||
- compatible: Should be "fsl,imx8qxp-scu-pd".
|
||||
- compatible: Should be one of:
|
||||
"fsl,imx8qm-scu-pd",
|
||||
"fsl,imx8qxp-scu-pd"
|
||||
followed by "fsl,scu-pd"
|
||||
|
||||
- #power-domain-cells: Must be 1. Contains the Resource ID used by
|
||||
SCU commands.
|
||||
See detailed Resource ID list from:
|
||||
@ -70,7 +74,10 @@ Clock bindings based on SCU Message Protocol
|
||||
This binding uses the common clock binding[1].
|
||||
|
||||
Required properties:
|
||||
- compatible: Should be "fsl,imx8qxp-clock".
|
||||
- compatible: Should be one of:
|
||||
"fsl,imx8qm-clock"
|
||||
"fsl,imx8qxp-clock"
|
||||
followed by "fsl,scu-clk"
|
||||
- #clock-cells: Should be 1. Contains the Clock ID value.
|
||||
- clocks: List of clock specifiers, must contain an entry for
|
||||
each required entry in clock-names
|
||||
@ -137,7 +144,7 @@ firmware {
|
||||
&lsio_mu1 1 3>;
|
||||
|
||||
clk: clk {
|
||||
compatible = "fsl,imx8qxp-clk";
|
||||
compatible = "fsl,imx8qxp-clk", "fsl,scu-clk";
|
||||
#clock-cells = <1>;
|
||||
};
|
||||
|
||||
@ -154,7 +161,7 @@ firmware {
|
||||
};
|
||||
|
||||
pd: imx8qx-pd {
|
||||
compatible = "fsl,imx8qxp-scu-pd";
|
||||
compatible = "fsl,imx8qxp-scu-pd", "fsl,scu-pd";
|
||||
#power-domain-cells = <1>;
|
||||
};
|
||||
|
||||
|
@ -1,237 +0,0 @@
|
||||
Freescale i.MX Platforms Device Tree Bindings
|
||||
-----------------------------------------------
|
||||
|
||||
i.MX23 Evaluation Kit
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx23-evk", "fsl,imx23";
|
||||
|
||||
i.MX25 Product Development Kit
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx25-pdk", "fsl,imx25";
|
||||
|
||||
i.MX27 Product Development Kit
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx27-pdk", "fsl,imx27";
|
||||
|
||||
i.MX28 Evaluation Kit
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx28-evk", "fsl,imx28";
|
||||
|
||||
i.MX51 Babbage Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx51-babbage", "fsl,imx51";
|
||||
|
||||
i.MX53 Automotive Reference Design Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx53-ard", "fsl,imx53";
|
||||
|
||||
i.MX53 Evaluation Kit
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx53-evk", "fsl,imx53";
|
||||
|
||||
i.MX53 Quick Start Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx53-qsb", "fsl,imx53";
|
||||
|
||||
i.MX53 Smart Mobile Reference Design Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx53-smd", "fsl,imx53";
|
||||
|
||||
i.MX6 Quad Armadillo2 Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6q-arm2", "fsl,imx6q";
|
||||
|
||||
i.MX6 Quad SABRE Lite Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6q-sabrelite", "fsl,imx6q";
|
||||
|
||||
i.MX6 Quad SABRE Smart Device Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6q-sabresd", "fsl,imx6q";
|
||||
|
||||
i.MX6 Quad SABRE Automotive Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6q-sabreauto", "fsl,imx6q";
|
||||
|
||||
i.MX6SLL EVK board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6sll-evk", "fsl,imx6sll";
|
||||
|
||||
i.MX6 Quad Plus SABRE Smart Device Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6qp-sabresd", "fsl,imx6qp";
|
||||
|
||||
i.MX6 Quad Plus SABRE Automotive Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6qp-sabreauto", "fsl,imx6qp";
|
||||
|
||||
i.MX6 DualLite SABRE Smart Device Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6dl-sabresd", "fsl,imx6dl";
|
||||
|
||||
i.MX6 DualLite/Solo SABRE Automotive Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6dl-sabreauto", "fsl,imx6dl";
|
||||
|
||||
i.MX6 SoloLite EVK Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6sl-evk", "fsl,imx6sl";
|
||||
|
||||
i.MX6 UltraLite 14x14 EVK Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6ul-14x14-evk", "fsl,imx6ul";
|
||||
|
||||
i.MX6 UltraLiteLite 14x14 EVK Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6ull-14x14-evk", "fsl,imx6ull";
|
||||
|
||||
i.MX6 ULZ 14x14 EVK Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6ulz-14x14-evk", "fsl,imx6ull", "fsl,imx6ulz";
|
||||
|
||||
i.MX6 SoloX SDB Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6sx-sdb", "fsl,imx6sx";
|
||||
|
||||
i.MX6 SoloX Sabre Auto Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6sx-sabreauto", "fsl,imx6sx";
|
||||
|
||||
i.MX7 SabreSD Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx7d-sdb", "fsl,imx7d";
|
||||
|
||||
i.MX7ULP Evaluation Kit
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx7ulp-evk", "fsl,imx7ulp";
|
||||
|
||||
Generic i.MX boards
|
||||
-------------------
|
||||
|
||||
No iomux setup is done for these boards, so this must have been configured
|
||||
by the bootloader for boards to work with the generic bindings.
|
||||
|
||||
i.MX27 generic board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx27";
|
||||
|
||||
i.MX51 generic board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx51";
|
||||
|
||||
i.MX53 generic board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx53";
|
||||
|
||||
i.MX6q generic board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx6q";
|
||||
|
||||
i.MX7ULP generic board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,imx7ulp";
|
||||
|
||||
Freescale Vybrid Platform Device Tree Bindings
|
||||
----------------------------------------------
|
||||
|
||||
For the Vybrid SoC familiy all variants with DDR controller are supported,
|
||||
which is the VF5xx and VF6xx series. Out of historical reasons, in most
|
||||
places the kernel uses vf610 to refer to the whole familiy.
|
||||
The compatible string "fsl,vf610m4" is used for the secondary Cortex-M4
|
||||
core support.
|
||||
|
||||
Required root node compatible property (one of them):
|
||||
- compatible = "fsl,vf500";
|
||||
- compatible = "fsl,vf510";
|
||||
- compatible = "fsl,vf600";
|
||||
- compatible = "fsl,vf610";
|
||||
- compatible = "fsl,vf610m4";
|
||||
|
||||
Freescale LS1021A Platform Device Tree Bindings
|
||||
------------------------------------------------
|
||||
|
||||
Required root node compatible properties:
|
||||
- compatible = "fsl,ls1021a";
|
||||
|
||||
Freescale ARMv8 based Layerscape SoC family Device Tree Bindings
|
||||
----------------------------------------------------------------
|
||||
|
||||
LS1012A SoC
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1012a";
|
||||
|
||||
LS1012A ARMv8 based RDB Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1012a-rdb", "fsl,ls1012a";
|
||||
|
||||
LS1012A ARMv8 based FRDM Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1012a-frdm", "fsl,ls1012a";
|
||||
|
||||
LS1012A ARMv8 based QDS Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1012a-qds", "fsl,ls1012a";
|
||||
|
||||
LS1043A SoC
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1043a";
|
||||
|
||||
LS1043A ARMv8 based RDB Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1043a-rdb", "fsl,ls1043a";
|
||||
|
||||
LS1043A ARMv8 based QDS Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1043a-qds", "fsl,ls1043a";
|
||||
|
||||
LS1046A SoC
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1046a";
|
||||
|
||||
LS1046A ARMv8 based QDS Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1046a-qds", "fsl,ls1046a";
|
||||
|
||||
LS1046A ARMv8 based RDB Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1046a-rdb", "fsl,ls1046a";
|
||||
|
||||
LS1088A SoC
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1088a";
|
||||
|
||||
LS1088A ARMv8 based QDS Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1088a-qds", "fsl,ls1088a";
|
||||
|
||||
LS1088A ARMv8 based RDB Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls1088a-rdb", "fsl,ls1088a";
|
||||
|
||||
LS2080A SoC
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls2080a";
|
||||
|
||||
LS2080A ARMv8 based Simulator model
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls2080a-simu", "fsl,ls2080a";
|
||||
|
||||
LS2080A ARMv8 based QDS Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls2080a-qds", "fsl,ls2080a";
|
||||
|
||||
LS2080A ARMv8 based RDB Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls2080a-rdb", "fsl,ls2080a";
|
||||
|
||||
LS2088A SoC
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls2088a";
|
||||
|
||||
LS2088A ARMv8 based QDS Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls2088a-qds", "fsl,ls2088a";
|
||||
|
||||
LS2088A ARMv8 based RDB Board
|
||||
Required root node properties:
|
||||
- compatible = "fsl,ls2088a-rdb", "fsl,ls2088a";
|
232
Documentation/devicetree/bindings/arm/fsl.yaml
Normal file
@ -0,0 +1,232 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/bindings/arm/fsl.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Freescale i.MX Platforms Device Tree Bindings
|
||||
|
||||
maintainers:
|
||||
- Shawn Guo <shawnguo@kernel.org>
|
||||
- Li Yang <leoyang.li@nxp.com>
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
const: '/'
|
||||
compatible:
|
||||
oneOf:
|
||||
- description: i.MX23 based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx23-evk
|
||||
- olimex,imx23-olinuxino
|
||||
- const: fsl,imx23
|
||||
|
||||
- description: i.MX25 Product Development Kit
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx25-pdk
|
||||
- const: fsl,imx25
|
||||
|
||||
- description: i.MX27 Product Development Kit
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx27-pdk
|
||||
- const: fsl,imx27
|
||||
|
||||
- description: i.MX28 based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx28-evk
|
||||
- i2se,duckbill
|
||||
- i2se,duckbill-2
|
||||
- technologic,imx28-ts4600
|
||||
- const: fsl,imx28
|
||||
- description: i.MX28 Duckbill 2 based Boards
|
||||
items:
|
||||
- enum:
|
||||
- i2se,duckbill-2-485
|
||||
- i2se,duckbill-2-enocean
|
||||
- i2se,duckbill-2-spi
|
||||
- const: i2se,duckbill-2
|
||||
- const: fsl,imx28
|
||||
|
||||
- description: i.MX51 Babbage Board
|
||||
items:
|
||||
- enum:
|
||||
- armadeus,imx51-apf51
|
||||
- fsl,imx51-babbage
|
||||
- technologic,imx51-ts4800
|
||||
- const: fsl,imx51
|
||||
|
||||
- description: i.MX53 based Boards
|
||||
items:
|
||||
- enum:
|
||||
- bhf,cx9020
|
||||
- fsl,imx53-ard
|
||||
- fsl,imx53-evk
|
||||
- fsl,imx53-qsb
|
||||
- fsl,imx53-smd
|
||||
- const: fsl,imx53
|
||||
|
||||
- description: i.MX6Q based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx6q-arm2
|
||||
- fsl,imx6q-sabreauto
|
||||
- fsl,imx6q-sabrelite
|
||||
- fsl,imx6q-sabresd
|
||||
- technologic,imx6q-ts4900
|
||||
- technologic,imx6q-ts7970
|
||||
- const: fsl,imx6q
|
||||
|
||||
- description: i.MX6QP based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx6qp-sabreauto # i.MX6 Quad Plus SABRE Automotive Board
|
||||
- fsl,imx6qp-sabresd # i.MX6 Quad Plus SABRE Smart Device Board
|
||||
- const: fsl,imx6qp
|
||||
|
||||
- description: i.MX6DL based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx6dl-sabreauto # i.MX6 DualLite/Solo SABRE Automotive Board
|
||||
- fsl,imx6dl-sabresd # i.MX6 DualLite SABRE Smart Device Board
|
||||
- technologic,imx6dl-ts4900
|
||||
- technologic,imx6dl-ts7970
|
||||
- ysoft,imx6dl-yapp4-draco # i.MX6 DualLite Y Soft IOTA Draco board
|
||||
- ysoft,imx6dl-yapp4-hydra # i.MX6 DualLite Y Soft IOTA Hydra board
|
||||
- ysoft,imx6dl-yapp4-ursa # i.MX6 Solo Y Soft IOTA Ursa board
|
||||
- const: fsl,imx6dl
|
||||
|
||||
- description: i.MX6SL based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx6sl-evk # i.MX6 SoloLite EVK Board
|
||||
- const: fsl,imx6sl
|
||||
|
||||
- description: i.MX6SLL based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx6sll-evk
|
||||
- const: fsl,imx6sll
|
||||
|
||||
- description: i.MX6SX based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx6sx-sabreauto # i.MX6 SoloX Sabre Auto Board
|
||||
- fsl,imx6sx-sdb # i.MX6 SoloX SDB Board
|
||||
- const: fsl,imx6sx
|
||||
|
||||
- description: i.MX6UL based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx6ul-14x14-evk # i.MX6 UltraLite 14x14 EVK Board
|
||||
- const: fsl,imx6ul
|
||||
|
||||
- description: i.MX6ULL based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx6ull-14x14-evk # i.MX6 UltraLiteLite 14x14 EVK Board
|
||||
- const: fsl,imx6ull
|
||||
|
||||
- description: i.MX6ULZ based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx6ulz-14x14-evk # i.MX6 ULZ 14x14 EVK Board
|
||||
- const: fsl,imx6ull # This seems odd. Should be last?
|
||||
- const: fsl,imx6ulz
|
||||
|
||||
- description: i.MX7D based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx7d-sdb # i.MX7 SabreSD Board
|
||||
- const: fsl,imx7d
|
||||
|
||||
- description:
|
||||
Compulab SBC-iMX7 is a single board computer based on the
|
||||
Freescale i.MX7 system-on-chip. SBC-iMX7 is implemented with
|
||||
the CL-SOM-iMX7 System-on-Module providing most of the functions,
|
||||
and SB-SOM-iMX7 carrier board providing additional peripheral
|
||||
functions and connectors.
|
||||
items:
|
||||
- const: compulab,sbc-imx7
|
||||
- const: compulab,cl-som-imx7
|
||||
- const: fsl,imx7d
|
||||
|
||||
- description: i.MX8QXP based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx8qxp-mek # i.MX8QXP MEK Board
|
||||
- const: fsl,imx8qxp
|
||||
|
||||
- description:
|
||||
Freescale Vybrid Platform Device Tree Bindings
|
||||
|
||||
For the Vybrid SoC familiy all variants with DDR controller are supported,
|
||||
which is the VF5xx and VF6xx series. Out of historical reasons, in most
|
||||
places the kernel uses vf610 to refer to the whole familiy.
|
||||
The compatible string "fsl,vf610m4" is used for the secondary Cortex-M4
|
||||
core support.
|
||||
items:
|
||||
- enum:
|
||||
- fsl,vf500
|
||||
- fsl,vf510
|
||||
- fsl,vf600
|
||||
- fsl,vf610
|
||||
- fsl,vf610m4
|
||||
|
||||
- description: LS1012A based Boards
|
||||
items:
|
||||
- enum:
|
||||
- ebs-systart,oxalis
|
||||
- fsl,ls1012a-rdb
|
||||
- fsl,ls1012a-frdm
|
||||
- fsl,ls1012a-qds
|
||||
- const: fsl,ls1012a
|
||||
|
||||
- description: LS1021A based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,ls1021a-moxa-uc-8410a
|
||||
- fsl,ls1021a-qds
|
||||
- fsl,ls1021a-twr
|
||||
- const: fsl,ls1021a
|
||||
|
||||
- description: LS1043A based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,ls1043a-rdb
|
||||
- fsl,ls1043a-qds
|
||||
- const: fsl,ls1043a
|
||||
|
||||
- description: LS1046A based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,ls1046a-qds
|
||||
- fsl,ls1046a-rdb
|
||||
- const: fsl,ls1046a
|
||||
|
||||
- description: LS1088A based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,ls1088a-qds
|
||||
- fsl,ls1088a-rdb
|
||||
- const: fsl,ls1088a
|
||||
|
||||
- description: LS2080A based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,ls2080a-simu
|
||||
- fsl,ls2080a-qds
|
||||
- fsl,ls2080a-rdb
|
||||
- const: fsl,ls2080a
|
||||
|
||||
- description: LS2088A based Boards
|
||||
items:
|
||||
- enum:
|
||||
- fsl,ls2088a-qds
|
||||
- fsl,ls2088a-rdb
|
||||
- const: fsl,ls2088a
|
||||
|
||||
...
|
@ -1,22 +0,0 @@
|
||||
I2SE Device Tree Bindings
|
||||
-------------------------
|
||||
|
||||
Duckbill Board
|
||||
Required root node properties:
|
||||
- compatible = "i2se,duckbill", "fsl,imx28";
|
||||
|
||||
Duckbill 2 Board
|
||||
Required root node properties:
|
||||
- compatible = "i2se,duckbill-2", "fsl,imx28";
|
||||
|
||||
Duckbill 2 485 Board
|
||||
Required root node properties:
|
||||
- compatible = "i2se,duckbill-2-485", "i2se,duckbill-2", "fsl,imx28";
|
||||
|
||||
Duckbill 2 EnOcean Board
|
||||
Required root node properties:
|
||||
- compatible = "i2se,duckbill-2-enocean", "i2se,duckbill-2", "fsl,imx28";
|
||||
|
||||
Duckbill 2 SPI Board
|
||||
Required root node properties:
|
||||
- compatible = "i2se,duckbill-2-spi", "i2se,duckbill-2", "fsl,imx28";
|
@ -1,114 +0,0 @@
|
||||
* ARM L2 Cache Controller
|
||||
|
||||
ARM cores often have a separate L2C210/L2C220/L2C310 (also known as PL210/PL220/
|
||||
PL310 and variants) based level 2 cache controller. All these various implementations
|
||||
of the L2 cache controller have compatible programming models (Note 1).
|
||||
Some of the properties that are just prefixed "cache-*" are taken from section
|
||||
3.7.3 of the Devicetree Specification which can be found at:
|
||||
https://www.devicetree.org/specifications/
|
||||
|
||||
The ARM L2 cache representation in the device tree should be done as follows:
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible : should be one of:
|
||||
"arm,pl310-cache"
|
||||
"arm,l220-cache"
|
||||
"arm,l210-cache"
|
||||
"bcm,bcm11351-a2-pl310-cache": DEPRECATED by "brcm,bcm11351-a2-pl310-cache"
|
||||
"brcm,bcm11351-a2-pl310-cache": For Broadcom bcm11351 chipset where an
|
||||
offset needs to be added to the address before passing down to the L2
|
||||
cache controller
|
||||
"marvell,aurora-system-cache": Marvell Controller designed to be
|
||||
compatible with the ARM one, with system cache mode (meaning
|
||||
maintenance operations on L1 are broadcasted to the L2 and L2
|
||||
performs the same operation).
|
||||
"marvell,aurora-outer-cache": Marvell Controller designed to be
|
||||
compatible with the ARM one with outer cache mode.
|
||||
"marvell,tauros3-cache": Marvell Tauros3 cache controller, compatible
|
||||
with arm,pl310-cache controller.
|
||||
- cache-unified : Specifies the cache is a unified cache.
|
||||
- cache-level : Should be set to 2 for a level 2 cache.
|
||||
- reg : Physical base address and size of cache controller's memory mapped
|
||||
registers.
|
||||
|
||||
Optional properties:
|
||||
|
||||
- arm,data-latency : Cycles of latency for Data RAM accesses. Specifies 3 cells of
|
||||
read, write and setup latencies. Minimum valid values are 1. Controllers
|
||||
without setup latency control should use a value of 0.
|
||||
- arm,tag-latency : Cycles of latency for Tag RAM accesses. Specifies 3 cells of
|
||||
read, write and setup latencies. Controllers without setup latency control
|
||||
should use 0. Controllers without separate read and write Tag RAM latency
|
||||
values should only use the first cell.
|
||||
- arm,dirty-latency : Cycles of latency for Dirty RAMs. This is a single cell.
|
||||
- arm,filter-ranges : <start length> Starting address and length of window to
|
||||
filter. Addresses in the filter window are directed to the M1 port. Other
|
||||
addresses will go to the M0 port.
|
||||
- arm,io-coherent : indicates that the system is operating in an hardware
|
||||
I/O coherent mode. Valid only when the arm,pl310-cache compatible
|
||||
string is used.
|
||||
- interrupts : 1 combined interrupt.
|
||||
- cache-size : specifies the size in bytes of the cache
|
||||
- cache-sets : specifies the number of associativity sets of the cache
|
||||
- cache-block-size : specifies the size in bytes of a cache block
|
||||
- cache-line-size : specifies the size in bytes of a line in the cache,
|
||||
if this is not specified, the line size is assumed to be equal to the
|
||||
cache block size
|
||||
- cache-id-part: cache id part number to be used if it is not present
|
||||
on hardware
|
||||
- wt-override: If present then L2 is forced to Write through mode
|
||||
- arm,double-linefill : Override double linefill enable setting. Enable if
|
||||
non-zero, disable if zero.
|
||||
- arm,double-linefill-incr : Override double linefill on INCR read. Enable
|
||||
if non-zero, disable if zero.
|
||||
- arm,double-linefill-wrap : Override double linefill on WRAP read. Enable
|
||||
if non-zero, disable if zero.
|
||||
- arm,prefetch-drop : Override prefetch drop enable setting. Enable if non-zero,
|
||||
disable if zero.
|
||||
- arm,prefetch-offset : Override prefetch offset value. Valid values are
|
||||
0-7, 15, 23, and 31.
|
||||
- arm,shared-override : The default behavior of the L220 or PL310 cache
|
||||
controllers with respect to the shareable attribute is to transform "normal
|
||||
memory non-cacheable transactions" into "cacheable no allocate" (for reads)
|
||||
or "write through no write allocate" (for writes).
|
||||
On systems where this may cause DMA buffer corruption, this property must be
|
||||
specified to indicate that such transforms are precluded.
|
||||
- arm,parity-enable : enable parity checking on the L2 cache (L220 or PL310).
|
||||
- arm,parity-disable : disable parity checking on the L2 cache (L220 or PL310).
|
||||
- arm,outer-sync-disable : disable the outer sync operation on the L2 cache.
|
||||
Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that
|
||||
will randomly hang unless outer sync operations are disabled.
|
||||
- prefetch-data : Data prefetch. Value: <0> (forcibly disable), <1>
|
||||
(forcibly enable), property absent (retain settings set by firmware)
|
||||
- prefetch-instr : Instruction prefetch. Value: <0> (forcibly disable),
|
||||
<1> (forcibly enable), property absent (retain settings set by
|
||||
firmware)
|
||||
- arm,dynamic-clock-gating : L2 dynamic clock gating. Value: <0> (forcibly
|
||||
disable), <1> (forcibly enable), property absent (OS specific behavior,
|
||||
preferably retain firmware settings)
|
||||
- arm,standby-mode: L2 standby mode enable. Value <0> (forcibly disable),
|
||||
<1> (forcibly enable), property absent (OS specific behavior,
|
||||
preferably retain firmware settings)
|
||||
- arm,early-bresp-disable : Disable the CA9 optimization Early BRESP (PL310)
|
||||
- arm,full-line-zero-disable : Disable the CA9 optimization Full line of zero
|
||||
write (PL310)
|
||||
|
||||
Example:
|
||||
|
||||
L2: cache-controller {
|
||||
compatible = "arm,pl310-cache";
|
||||
reg = <0xfff12000 0x1000>;
|
||||
arm,data-latency = <1 1 1>;
|
||||
arm,tag-latency = <2 2 2>;
|
||||
arm,filter-ranges = <0x80000000 0x8000000>;
|
||||
cache-unified;
|
||||
cache-level = <2>;
|
||||
interrupts = <45>;
|
||||
};
|
||||
|
||||
Note 1: The description in this document doesn't apply to integrated L2
|
||||
cache controllers as found in e.g. Cortex-A15/A7/A57/A53. These
|
||||
integrated L2 controllers are assumed to be all preconfigured by
|
||||
early secure boot code. Thus no need to deal with their configuration
|
||||
in the kernel at all.
|
248
Documentation/devicetree/bindings/arm/l2c2x0.yaml
Normal file
@ -0,0 +1,248 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/arm/l2c2x0.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: ARM L2 Cache Controller
|
||||
|
||||
maintainers:
|
||||
- Rob Herring <robh@kernel.org>
|
||||
|
||||
description: |+
|
||||
ARM cores often have a separate L2C210/L2C220/L2C310 (also known as PL210/
|
||||
PL220/PL310 and variants) based level 2 cache controller. All these various
|
||||
implementations of the L2 cache controller have compatible programming
|
||||
models (Note 1). Some of the properties that are just prefixed "cache-*" are
|
||||
taken from section 3.7.3 of the Devicetree Specification which can be found
|
||||
at:
|
||||
https://www.devicetree.org/specifications/
|
||||
|
||||
Note 1: The description in this document doesn't apply to integrated L2
|
||||
cache controllers as found in e.g. Cortex-A15/A7/A57/A53. These
|
||||
integrated L2 controllers are assumed to be all preconfigured by
|
||||
early secure boot code. Thus no need to deal with their configuration
|
||||
in the kernel at all.
|
||||
|
||||
allOf:
|
||||
- $ref: /schemas/cache-controller.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- arm,pl310-cache
|
||||
- arm,l220-cache
|
||||
- arm,l210-cache
|
||||
# DEPRECATED by "brcm,bcm11351-a2-pl310-cache"
|
||||
- bcm,bcm11351-a2-pl310-cache
|
||||
# For Broadcom bcm11351 chipset where an
|
||||
# offset needs to be added to the address before passing down to the L2
|
||||
# cache controller
|
||||
- brcm,bcm11351-a2-pl310-cache
|
||||
# Marvell Controller designed to be
|
||||
# compatible with the ARM one, with system cache mode (meaning
|
||||
# maintenance operations on L1 are broadcasted to the L2 and L2
|
||||
# performs the same operation).
|
||||
- marvell,aurora-system-cache
|
||||
# Marvell Controller designed to be
|
||||
# compatible with the ARM one with outer cache mode.
|
||||
- marvell,aurora-outer-cache
|
||||
# Marvell Tauros3 cache controller, compatible
|
||||
# with arm,pl310-cache controller.
|
||||
- marvell,tauros3-cache
|
||||
|
||||
cache-level:
|
||||
const: 2
|
||||
|
||||
cache-unified: true
|
||||
cache-size: true
|
||||
cache-sets: true
|
||||
cache-block-size: true
|
||||
cache-line-size: true
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
arm,data-latency:
|
||||
description: Cycles of latency for Data RAM accesses. Specifies 3 cells of
|
||||
read, write and setup latencies. Minimum valid values are 1. Controllers
|
||||
without setup latency control should use a value of 0.
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32-array
|
||||
- minItems: 2
|
||||
maxItems: 3
|
||||
items:
|
||||
minimum: 0
|
||||
maximum: 8
|
||||
|
||||
arm,tag-latency:
|
||||
description: Cycles of latency for Tag RAM accesses. Specifies 3 cells of
|
||||
read, write and setup latencies. Controllers without setup latency control
|
||||
should use 0. Controllers without separate read and write Tag RAM latency
|
||||
values should only use the first cell.
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32-array
|
||||
- minItems: 1
|
||||
maxItems: 3
|
||||
items:
|
||||
minimum: 0
|
||||
maximum: 8
|
||||
|
||||
arm,dirty-latency:
|
||||
description: Cycles of latency for Dirty RAMs. This is a single cell.
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32
|
||||
- minimum: 1
|
||||
maximum: 8
|
||||
|
||||
arm,filter-ranges:
|
||||
description: <start length> Starting address and length of window to
|
||||
filter. Addresses in the filter window are directed to the M1 port. Other
|
||||
addresses will go to the M0 port.
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32-array
|
||||
- items:
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
arm,io-coherent:
|
||||
description: indicates that the system is operating in an hardware
|
||||
I/O coherent mode. Valid only when the arm,pl310-cache compatible
|
||||
string is used.
|
||||
type: boolean
|
||||
|
||||
interrupts:
|
||||
# Either a single combined interrupt or up to 9 individual interrupts
|
||||
minItems: 1
|
||||
maxItems: 9
|
||||
|
||||
cache-id-part:
|
||||
description: cache id part number to be used if it is not present
|
||||
on hardware
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
|
||||
wt-override:
|
||||
description: If present then L2 is forced to Write through mode
|
||||
type: boolean
|
||||
|
||||
arm,double-linefill:
|
||||
description: Override double linefill enable setting. Enable if
|
||||
non-zero, disable if zero.
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32
|
||||
- enum: [ 0, 1 ]
|
||||
|
||||
arm,double-linefill-incr:
|
||||
description: Override double linefill on INCR read. Enable
|
||||
if non-zero, disable if zero.
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32
|
||||
- enum: [ 0, 1 ]
|
||||
|
||||
arm,double-linefill-wrap:
|
||||
description: Override double linefill on WRAP read. Enable
|
||||
if non-zero, disable if zero.
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32
|
||||
- enum: [ 0, 1 ]
|
||||
|
||||
arm,prefetch-drop:
|
||||
description: Override prefetch drop enable setting. Enable if non-zero,
|
||||
disable if zero.
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32
|
||||
- enum: [ 0, 1 ]
|
||||
|
||||
arm,prefetch-offset:
|
||||
description: Override prefetch offset value.
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32
|
||||
- enum: [ 0, 1, 2, 3, 4, 5, 6, 7, 15, 23, 31 ]
|
||||
|
||||
arm,shared-override:
|
||||
description: The default behavior of the L220 or PL310 cache
|
||||
controllers with respect to the shareable attribute is to transform "normal
|
||||
memory non-cacheable transactions" into "cacheable no allocate" (for reads)
|
||||
or "write through no write allocate" (for writes).
|
||||
On systems where this may cause DMA buffer corruption, this property must
|
||||
be specified to indicate that such transforms are precluded.
|
||||
type: boolean
|
||||
|
||||
arm,parity-enable:
|
||||
description: enable parity checking on the L2 cache (L220 or PL310).
|
||||
type: boolean
|
||||
|
||||
arm,parity-disable:
|
||||
description: disable parity checking on the L2 cache (L220 or PL310).
|
||||
type: boolean
|
||||
|
||||
arm,outer-sync-disable:
|
||||
description: disable the outer sync operation on the L2 cache.
|
||||
Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that
|
||||
will randomly hang unless outer sync operations are disabled.
|
||||
type: boolean
|
||||
|
||||
prefetch-data:
|
||||
description: |
|
||||
Data prefetch. Value: <0> (forcibly disable), <1>
|
||||
(forcibly enable), property absent (retain settings set by firmware)
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32
|
||||
- enum: [ 0, 1 ]
|
||||
|
||||
prefetch-instr:
|
||||
description: |
|
||||
Instruction prefetch. Value: <0> (forcibly disable),
|
||||
<1> (forcibly enable), property absent (retain settings set by
|
||||
firmware)
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32
|
||||
- enum: [ 0, 1 ]
|
||||
|
||||
arm,dynamic-clock-gating:
|
||||
description: |
|
||||
L2 dynamic clock gating. Value: <0> (forcibly
|
||||
disable), <1> (forcibly enable), property absent (OS specific behavior,
|
||||
preferably retain firmware settings)
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32
|
||||
- enum: [ 0, 1 ]
|
||||
|
||||
arm,standby-mode:
|
||||
description: L2 standby mode enable. Value <0> (forcibly disable),
|
||||
<1> (forcibly enable), property absent (OS specific behavior,
|
||||
preferably retain firmware settings)
|
||||
allOf:
|
||||
- $ref: /schemas/types.yaml#/definitions/uint32
|
||||
- enum: [ 0, 1 ]
|
||||
|
||||
arm,early-bresp-disable:
|
||||
description: Disable the CA9 optimization Early BRESP (PL310)
|
||||
type: boolean
|
||||
|
||||
arm,full-line-zero-disable:
|
||||
description: Disable the CA9 optimization Full line of zero
|
||||
write (PL310)
|
||||
type: boolean
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- cache-unified
|
||||
- reg
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
cache-controller@fff12000 {
|
||||
compatible = "arm,pl310-cache";
|
||||
reg = <0xfff12000 0x1000>;
|
||||
arm,data-latency = <1 1 1>;
|
||||
arm,tag-latency = <2 2 2>;
|
||||
arm,filter-ranges = <0x80000000 0x8000000>;
|
||||
cache-unified;
|
||||
cache-level = <2>;
|
||||
interrupts = <45>;
|
||||
};
|
||||
|
||||
...
|
@ -15,11 +15,12 @@ compatible: Must contain one of
|
||||
"mediatek,mt6795"
|
||||
"mediatek,mt6797"
|
||||
"mediatek,mt7622"
|
||||
"mediatek,mt7623" which is referred to MT7623N SoC
|
||||
"mediatek,mt7623a"
|
||||
"mediatek,mt7623"
|
||||
"mediatek,mt7629"
|
||||
"mediatek,mt8127"
|
||||
"mediatek,mt8135"
|
||||
"mediatek,mt8173"
|
||||
"mediatek,mt8183"
|
||||
|
||||
|
||||
Supported boards:
|
||||
@ -57,6 +58,9 @@ Supported boards:
|
||||
- Reference board variant 1 for MT7622:
|
||||
Required root node properties:
|
||||
- compatible = "mediatek,mt7622-rfb1", "mediatek,mt7622";
|
||||
- Bananapi BPI-R64 for MT7622:
|
||||
Required root node properties:
|
||||
- compatible = "bananapi,bpi-r64", "mediatek,mt7622";
|
||||
- Reference board for MT7623a with eMMC:
|
||||
Required root node properties:
|
||||
- compatible = "mediatek,mt7623a-rfb-emmc", "mediatek,mt7623";
|
||||
@ -68,6 +72,9 @@ Supported boards:
|
||||
- compatible = "mediatek,mt7623n-rfb-emmc", "mediatek,mt7623";
|
||||
- Bananapi BPI-R2 board:
|
||||
- compatible = "bananapi,bpi-r2", "mediatek,mt7623";
|
||||
- Reference board for MT7629:
|
||||
Required root node properties:
|
||||
- compatible = "mediatek,mt7629-rfb", "mediatek,mt7629";
|
||||
- MTK mt8127 tablet moose EVB:
|
||||
Required root node properties:
|
||||
- compatible = "mediatek,mt8127-moose", "mediatek,mt8127";
|
||||
@ -77,3 +84,6 @@ Supported boards:
|
||||
- MTK mt8173 tablet EVB:
|
||||
Required root node properties:
|
||||
- compatible = "mediatek,mt8173-evb", "mediatek,mt8173";
|
||||
- Evaluation board for MT8183:
|
||||
Required root node properties:
|
||||
- compatible = "mediatek,mt8183-evb", "mediatek,mt8183";
|
||||
|
@ -1,10 +0,0 @@
|
||||
Olimex Device Tree Bindings
|
||||
---------------------------
|
||||
|
||||
SAM9-L9260 Board
|
||||
Required root node properties:
|
||||
- compatible = "olimex,sam9-l9260", "atmel,at91sam9260";
|
||||
|
||||
i.MX23 Olinuxino Low Cost Board
|
||||
Required root node properties:
|
||||
- compatible = "olimex,imx23-olinuxino", "fsl,imx23";
|
@ -1,70 +0,0 @@
|
||||
* ARM Performance Monitor Units
|
||||
|
||||
ARM cores often have a PMU for counting cpu and cache events like cache misses
|
||||
and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU
|
||||
representation in the device tree should be done as under:-
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible : should be one of
|
||||
"apm,potenza-pmu"
|
||||
"arm,armv8-pmuv3"
|
||||
"arm,cortex-a73-pmu"
|
||||
"arm,cortex-a72-pmu"
|
||||
"arm,cortex-a57-pmu"
|
||||
"arm,cortex-a53-pmu"
|
||||
"arm,cortex-a35-pmu"
|
||||
"arm,cortex-a17-pmu"
|
||||
"arm,cortex-a15-pmu"
|
||||
"arm,cortex-a12-pmu"
|
||||
"arm,cortex-a9-pmu"
|
||||
"arm,cortex-a8-pmu"
|
||||
"arm,cortex-a7-pmu"
|
||||
"arm,cortex-a5-pmu"
|
||||
"arm,arm11mpcore-pmu"
|
||||
"arm,arm1176-pmu"
|
||||
"arm,arm1136-pmu"
|
||||
"brcm,vulcan-pmu"
|
||||
"cavium,thunder-pmu"
|
||||
"qcom,scorpion-pmu"
|
||||
"qcom,scorpion-mp-pmu"
|
||||
"qcom,krait-pmu"
|
||||
- interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu
|
||||
interrupt (PPI) then 1 interrupt should be specified.
|
||||
|
||||
Optional properties:
|
||||
|
||||
- interrupt-affinity : When using SPIs, specifies a list of phandles to CPU
|
||||
nodes corresponding directly to the affinity of
|
||||
the SPIs listed in the interrupts property.
|
||||
|
||||
When using a PPI, specifies a list of phandles to CPU
|
||||
nodes corresponding to the set of CPUs which have
|
||||
a PMU of this type signalling the PPI listed in the
|
||||
interrupts property, unless this is already specified
|
||||
by the PPI interrupt specifier itself (in which case
|
||||
the interrupt-affinity property shouldn't be present).
|
||||
|
||||
This property should be present when there is more than
|
||||
a single SPI.
|
||||
|
||||
|
||||
- qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd
|
||||
events.
|
||||
|
||||
- secure-reg-access : Indicates that the ARMv7 Secure Debug Enable Register
|
||||
(SDER) is accessible. This will cause the driver to do
|
||||
any setup required that is only possible in ARMv7 secure
|
||||
state. If not present the ARMv7 SDER will not be touched,
|
||||
which means the PMU may fail to operate unless external
|
||||
code (bootloader or security monitor) has performed the
|
||||
appropriate initialisation. Note that this property is
|
||||
not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux
|
||||
in Non-secure state.
|
||||
|
||||
Example:
|
||||
|
||||
pmu {
|
||||
compatible = "arm,cortex-a9-pmu";
|
||||
interrupts = <100 101>;
|
||||
};
|
87
Documentation/devicetree/bindings/arm/pmu.yaml
Normal file
@ -0,0 +1,87 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/arm/pmu.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: ARM Performance Monitor Units
|
||||
|
||||
maintainers:
|
||||
- Mark Rutland <mark.rutland@arm.com>
|
||||
- Will Deacon <will.deacon@arm.com>
|
||||
|
||||
description: |+
|
||||
ARM cores often have a PMU for counting cpu and cache events like cache misses
|
||||
and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU
|
||||
representation in the device tree should be done as under:-
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- enum:
|
||||
- apm,potenza-pmu
|
||||
- arm,armv8-pmuv3
|
||||
- arm,cortex-a73-pmu
|
||||
- arm,cortex-a72-pmu
|
||||
- arm,cortex-a57-pmu
|
||||
- arm,cortex-a53-pmu
|
||||
- arm,cortex-a35-pmu
|
||||
- arm,cortex-a17-pmu
|
||||
- arm,cortex-a15-pmu
|
||||
- arm,cortex-a12-pmu
|
||||
- arm,cortex-a9-pmu
|
||||
- arm,cortex-a8-pmu
|
||||
- arm,cortex-a7-pmu
|
||||
- arm,cortex-a5-pmu
|
||||
- arm,arm11mpcore-pmu
|
||||
- arm,arm1176-pmu
|
||||
- arm,arm1136-pmu
|
||||
- brcm,vulcan-pmu
|
||||
- cavium,thunder-pmu
|
||||
- qcom,scorpion-pmu
|
||||
- qcom,scorpion-mp-pmu
|
||||
- qcom,krait-pmu
|
||||
|
||||
interrupts:
|
||||
# Don't know how many CPUs, so no constraints to specify
|
||||
description: 1 per-cpu interrupt (PPI) or 1 interrupt per core.
|
||||
|
||||
interrupt-affinity:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle-array
|
||||
description:
|
||||
When using SPIs, specifies a list of phandles to CPU
|
||||
nodes corresponding directly to the affinity of
|
||||
the SPIs listed in the interrupts property.
|
||||
|
||||
When using a PPI, specifies a list of phandles to CPU
|
||||
nodes corresponding to the set of CPUs which have
|
||||
a PMU of this type signalling the PPI listed in the
|
||||
interrupts property, unless this is already specified
|
||||
by the PPI interrupt specifier itself (in which case
|
||||
the interrupt-affinity property shouldn't be present).
|
||||
|
||||
This property should be present when there is more than
|
||||
a single SPI.
|
||||
|
||||
qcom,no-pc-write:
|
||||
type: boolean
|
||||
description:
|
||||
Indicates that this PMU doesn't support the 0xc and 0xd events.
|
||||
|
||||
secure-reg-access:
|
||||
type: boolean
|
||||
description:
|
||||
Indicates that the ARMv7 Secure Debug Enable Register
|
||||
(SDER) is accessible. This will cause the driver to do
|
||||
any setup required that is only possible in ARMv7 secure
|
||||
state. If not present the ARMv7 SDER will not be touched,
|
||||
which means the PMU may fail to operate unless external
|
||||
code (bootloader or security monitor) has performed the
|
||||
appropriate initialisation. Note that this property is
|
||||
not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux
|
||||
in Non-secure state.
|
||||
|
||||
required:
|
||||
- compatible
|
||||
|
||||
...
|
238
Documentation/devicetree/bindings/arm/renesas.yaml
Normal file
@ -0,0 +1,238 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/arm/shmobile.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Renesas SH-Mobile, R-Mobile, and R-Car Platform Device Tree Bindings
|
||||
|
||||
maintainers:
|
||||
- Geert Uytterhoeven <geert+renesas@glider.be>
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
const: '/'
|
||||
compatible:
|
||||
oneOf:
|
||||
- description: Emma Mobile EV2
|
||||
items:
|
||||
- enum:
|
||||
- renesas,kzm9d # Kyoto Microcomputer Co. KZM-A9-Dual
|
||||
- const: renesas,emev2
|
||||
|
||||
- description: RZ/A1H (R7S72100)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,genmai # Genmai (RTK772100BC00000BR)
|
||||
- renesas,gr-peach # GR-Peach (X28A-M01-E/F)
|
||||
- renesas,rskrza1 # RSKRZA1 (YR0K77210C000BE)
|
||||
- const: renesas,r7s72100
|
||||
|
||||
- description: RZ/A2 (R7S9210)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,rza2mevb # RZ/A2M Eval Board (RTK7921053S00000BE)
|
||||
- const: renesas,r7s9210
|
||||
|
||||
- description: SH-Mobile AG5 (R8A73A00/SH73A0)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,kzm9g # Kyoto Microcomputer Co. KZM-A9-GT
|
||||
- const: renesas,sh73a0
|
||||
|
||||
- description: R-Mobile APE6 (R8A73A40)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,ape6evm
|
||||
- const: renesas,r8a73a4
|
||||
|
||||
- description: R-Mobile A1 (R8A77400)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,armadillo800eva # Atmark Techno Armadillo-800 EVA
|
||||
- const: renesas,r8a7740
|
||||
|
||||
- description: RZ/G1H (R8A77420)
|
||||
items:
|
||||
- const: renesas,r8a7742
|
||||
|
||||
- description: RZ/G1M (R8A77430)
|
||||
items:
|
||||
- enum:
|
||||
# iWave Systems RZ/G1M Qseven Development Platform (iW-RainboW-G20D-Qseven)
|
||||
- iwave,g20d
|
||||
- const: iwave,g20m
|
||||
- const: renesas,r8a7743
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
# iWave Systems RZ/G1M Qseven System On Module (iW-RainboW-G20M-Qseven)
|
||||
- iwave,g20m
|
||||
- renesas,sk-rzg1m # SK-RZG1M (YR8A77430S000BE)
|
||||
- const: renesas,r8a7743
|
||||
|
||||
- description: RZ/G1N (R8A77440)
|
||||
items:
|
||||
- enum:
|
||||
# iWave Systems RZ/G1N Qseven Development Platform (iW-RainboW-G20D-Qseven)
|
||||
- iwave,g20d
|
||||
- const: iwave,g20m
|
||||
- const: renesas,r8a7744
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
# iWave Systems RZ/G1N Qseven System On Module (iW-RainboW-G20M-Qseven)
|
||||
- iwave,g20m
|
||||
- const: renesas,r8a7744
|
||||
|
||||
- description: RZ/G1E (R8A77450)
|
||||
items:
|
||||
- enum:
|
||||
- iwave,g22m # iWave Systems RZ/G1E SODIMM System On Module (iW-RainboW-G22M-SM)
|
||||
- renesas,sk-rzg1e # SK-RZG1E (YR8A77450S000BE)
|
||||
- const: renesas,r8a7745
|
||||
|
||||
- description: iWave Systems RZ/G1E SODIMM SOM Development Platform (iW-RainboW-G22D)
|
||||
items:
|
||||
- const: iwave,g22d
|
||||
- const: iwave,g22m
|
||||
- const: renesas,r8a7745
|
||||
|
||||
- description: RZ/G1C (R8A77470)
|
||||
items:
|
||||
- enum:
|
||||
- iwave,g23s #iWave Systems RZ/G1C Single Board Computer (iW-RainboW-G23S)
|
||||
- const: renesas,r8a77470
|
||||
|
||||
- description: RZ/G2M (R8A774A1)
|
||||
items:
|
||||
- const: renesas,r8a774a1
|
||||
|
||||
- description: RZ/G2E (R8A774C0)
|
||||
items:
|
||||
- enum:
|
||||
- si-linux,cat874 # Silicon Linux RZ/G2E 96board platform (CAT874)
|
||||
- const: renesas,r8a774c0
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- si-linux,cat875 # Silicon Linux sub board for CAT874 (CAT875)
|
||||
- const: si-linux,cat874
|
||||
- const: renesas,r8a774c0
|
||||
|
||||
- description: R-Car M1A (R8A77781)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,bockw
|
||||
- const: renesas,r8a7778
|
||||
|
||||
- description: R-Car H1 (R8A77790)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,marzen # Marzen (R0P7779A00010S)
|
||||
- const: renesas,r8a7779
|
||||
|
||||
- description: R-Car H2 (R8A77900)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,lager # Lager (RTP0RC7790SEB00010S)
|
||||
- renesas,stout # Stout (ADAS Starterkit, Y-R-CAR-ADAS-SKH2-BOARD)
|
||||
- const: renesas,r8a7790
|
||||
|
||||
- description: R-Car M2-W (R8A77910)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,henninger
|
||||
- renesas,koelsch # Koelsch (RTP0RC7791SEB00010S)
|
||||
- renesas,porter # Porter (M2-LCDP)
|
||||
- const: renesas,r8a7791
|
||||
|
||||
- description: R-Car V2H (R8A77920)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,blanche # Blanche (RTP0RC7792SEB00010S)
|
||||
- renesas,wheat # Wheat (RTP0RC7792ASKB0000JE)
|
||||
- const: renesas,r8a7792
|
||||
|
||||
- description: R-Car M2-N (R8A77930)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,gose # Gose (RTP0RC7793SEB00010S)
|
||||
- const: renesas,r8a7793
|
||||
|
||||
- description: R-Car E2 (R8A77940)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,alt # Alt (RTP0RC7794SEB00010S)
|
||||
- renesas,silk # SILK (RTP0RC7794LCB00011S)
|
||||
- const: renesas,r8a7794
|
||||
|
||||
- description: R-Car H3 (R8A77950)
|
||||
items:
|
||||
- enum:
|
||||
# H3ULCB (R-Car Starter Kit Premier, RTP0RC7795SKBX0010SA00 (H3 ES1.1))
|
||||
# H3ULCB (R-Car Starter Kit Premier, RTP0RC77951SKBX010SA00 (H3 ES2.0))
|
||||
- renesas,h3ulcb
|
||||
- renesas,salvator-x # Salvator-X (RTP0RC7795SIPB0010S)
|
||||
- renesas,salvator-xs # Salvator-XS (Salvator-X 2nd version, RTP0RC7795SIPB0012S)
|
||||
- const: renesas,r8a7795
|
||||
|
||||
- description: R-Car M3-W (R8A77960)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,m3ulcb # M3ULCB (R-Car Starter Kit Pro, RTP0RC7796SKBX0010SA09 (M3 ES1.0))
|
||||
- renesas,salvator-x # Salvator-X (RTP0RC7796SIPB0011S)
|
||||
- renesas,salvator-xs # Salvator-XS (Salvator-X 2nd version, RTP0RC7796SIPB0012S)
|
||||
- const: renesas,r8a7796
|
||||
|
||||
- description: Kingfisher (SBEV-RCAR-KF-M03)
|
||||
items:
|
||||
- const: shimafuji,kingfisher
|
||||
- enum:
|
||||
- renesas,h3ulcb
|
||||
- renesas,m3ulcb
|
||||
- enum:
|
||||
- renesas,r8a7795
|
||||
- renesas,r8a7796
|
||||
|
||||
- description: R-Car M3-N (R8A77965)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,m3nulcb # M3NULCB (R-Car Starter Kit Pro, RTP0RC77965SKBX010SA00 (M3-N ES1.1))
|
||||
- renesas,salvator-x # Salvator-X (RTP0RC7796SIPB0011S (M3-N))
|
||||
- renesas,salvator-xs # Salvator-XS (Salvator-X 2nd version, RTP0RC77965SIPB012S)
|
||||
- const: renesas,r8a77965
|
||||
|
||||
- description: R-Car V3M (R8A77970)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,eagle # Eagle (RTP0RC77970SEB0010S)
|
||||
- renesas,v3msk # V3MSK (Y-ASK-RCAR-V3M-WS10)
|
||||
- const: renesas,r8a77970
|
||||
|
||||
- description: R-Car V3H (R8A77980)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,condor # Condor (RTP0RC77980SEB0010SS/RTP0RC77980SEB0010SA01)
|
||||
- renesas,v3hsk # V3HSK (Y-ASK-RCAR-V3H-WS10)
|
||||
- const: renesas,r8a77980
|
||||
|
||||
- description: R-Car E3 (R8A77990)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,ebisu # Ebisu (RTP0RC77990SEB0010S)
|
||||
- const: renesas,r8a77990
|
||||
|
||||
- description: R-Car D3 (R8A77995)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,draak # Draak (RTP0RC77995SEB0010S)
|
||||
- const: renesas,r8a77995
|
||||
|
||||
- description: RZ/N1D (R9A06G032)
|
||||
items:
|
||||
- enum:
|
||||
- renesas,rzn1d400-db # RZN1D-DB (RZ/N1D Demo Board for the RZ/N1D 400 pins package)
|
||||
- const: renesas,r9a06g032
|
||||
|
||||
...
|
@ -60,6 +60,11 @@ properties:
|
||||
- const: chipspark,rayeager-px2
|
||||
- const: rockchip,rk3066a
|
||||
|
||||
- description: Elgin RV1108 R1
|
||||
items:
|
||||
- const: elgin,rv1108-r1
|
||||
- const: rockchip,rv1108
|
||||
|
||||
- description: Firefly Firefly-RK3288
|
||||
items:
|
||||
- enum:
|
||||
@ -87,6 +92,13 @@ properties:
|
||||
- const: firefly,roc-rk3399-pc
|
||||
- const: rockchip,rk3399
|
||||
|
||||
- description: FriendlyElec NanoPi4 series boards
|
||||
items:
|
||||
- enum:
|
||||
- friendlyarm,nanopc-t4
|
||||
- friendlyarm,nanopi-m4
|
||||
- const: rockchip,rk3399
|
||||
|
||||
- description: GeekBuying GeekBox
|
||||
items:
|
||||
- const: geekbuying,geekbox
|
||||
@ -317,6 +329,11 @@ properties:
|
||||
- const: radxa,rock
|
||||
- const: rockchip,rk3188
|
||||
|
||||
- description: Radxa ROCK Pi 4
|
||||
items:
|
||||
- const: radxa,rockpi4
|
||||
- const: rockchip,rk3399
|
||||
|
||||
- description: Radxa Rock2 Square
|
||||
items:
|
||||
- const: radxa,rock2-square
|
||||
|
@ -1,155 +0,0 @@
|
||||
Renesas SH-Mobile, R-Mobile, and R-Car Platform Device Tree Bindings
|
||||
--------------------------------------------------------------------
|
||||
|
||||
SoCs:
|
||||
|
||||
- Emma Mobile EV2
|
||||
compatible = "renesas,emev2"
|
||||
- RZ/A1H (R7S72100)
|
||||
compatible = "renesas,r7s72100"
|
||||
- RZ/A2 (R7S9210)
|
||||
compatible = "renesas,r7s9210"
|
||||
- SH-Mobile AG5 (R8A73A00/SH73A0)
|
||||
compatible = "renesas,sh73a0"
|
||||
- R-Mobile APE6 (R8A73A40)
|
||||
compatible = "renesas,r8a73a4"
|
||||
- R-Mobile A1 (R8A77400)
|
||||
compatible = "renesas,r8a7740"
|
||||
- RZ/G1H (R8A77420)
|
||||
compatible = "renesas,r8a7742"
|
||||
- RZ/G1M (R8A77430)
|
||||
compatible = "renesas,r8a7743"
|
||||
- RZ/G1N (R8A77440)
|
||||
compatible = "renesas,r8a7744"
|
||||
- RZ/G1E (R8A77450)
|
||||
compatible = "renesas,r8a7745"
|
||||
- RZ/G1C (R8A77470)
|
||||
compatible = "renesas,r8a77470"
|
||||
- RZ/G2M (R8A774A1)
|
||||
compatible = "renesas,r8a774a1"
|
||||
- RZ/G2E (R8A774C0)
|
||||
compatible = "renesas,r8a774c0"
|
||||
- R-Car M1A (R8A77781)
|
||||
compatible = "renesas,r8a7778"
|
||||
- R-Car H1 (R8A77790)
|
||||
compatible = "renesas,r8a7779"
|
||||
- R-Car H2 (R8A77900)
|
||||
compatible = "renesas,r8a7790"
|
||||
- R-Car M2-W (R8A77910)
|
||||
compatible = "renesas,r8a7791"
|
||||
- R-Car V2H (R8A77920)
|
||||
compatible = "renesas,r8a7792"
|
||||
- R-Car M2-N (R8A77930)
|
||||
compatible = "renesas,r8a7793"
|
||||
- R-Car E2 (R8A77940)
|
||||
compatible = "renesas,r8a7794"
|
||||
- R-Car H3 (R8A77950)
|
||||
compatible = "renesas,r8a7795"
|
||||
- R-Car M3-W (R8A77960)
|
||||
compatible = "renesas,r8a7796"
|
||||
- R-Car M3-N (R8A77965)
|
||||
compatible = "renesas,r8a77965"
|
||||
- R-Car V3M (R8A77970)
|
||||
compatible = "renesas,r8a77970"
|
||||
- R-Car V3H (R8A77980)
|
||||
compatible = "renesas,r8a77980"
|
||||
- R-Car E3 (R8A77990)
|
||||
compatible = "renesas,r8a77990"
|
||||
- R-Car D3 (R8A77995)
|
||||
compatible = "renesas,r8a77995"
|
||||
- RZ/N1D (R9A06G032)
|
||||
compatible = "renesas,r9a06g032"
|
||||
|
||||
Boards:
|
||||
|
||||
- Alt (RTP0RC7794SEB00010S)
|
||||
compatible = "renesas,alt", "renesas,r8a7794"
|
||||
- APE6-EVM
|
||||
compatible = "renesas,ape6evm", "renesas,r8a73a4"
|
||||
- Atmark Techno Armadillo-800 EVA
|
||||
compatible = "renesas,armadillo800eva", "renesas,r8a7740"
|
||||
- Blanche (RTP0RC7792SEB00010S)
|
||||
compatible = "renesas,blanche", "renesas,r8a7792"
|
||||
- BOCK-W
|
||||
compatible = "renesas,bockw", "renesas,r8a7778"
|
||||
- Condor (RTP0RC77980SEB0010SS/RTP0RC77980SEB0010SA01)
|
||||
compatible = "renesas,condor", "renesas,r8a77980"
|
||||
- Draak (RTP0RC77995SEB0010S)
|
||||
compatible = "renesas,draak", "renesas,r8a77995"
|
||||
- Eagle (RTP0RC77970SEB0010S)
|
||||
compatible = "renesas,eagle", "renesas,r8a77970"
|
||||
- Ebisu (RTP0RC77990SEB0010S)
|
||||
compatible = "renesas,ebisu", "renesas,r8a77990"
|
||||
- Genmai (RTK772100BC00000BR)
|
||||
compatible = "renesas,genmai", "renesas,r7s72100"
|
||||
- GR-Peach (X28A-M01-E/F)
|
||||
compatible = "renesas,gr-peach", "renesas,r7s72100"
|
||||
- Gose (RTP0RC7793SEB00010S)
|
||||
compatible = "renesas,gose", "renesas,r8a7793"
|
||||
- H3ULCB (R-Car Starter Kit Premier, RTP0RC7795SKBX0010SA00 (H3 ES1.1))
|
||||
H3ULCB (R-Car Starter Kit Premier, RTP0RC77951SKBX010SA00 (H3 ES2.0))
|
||||
compatible = "renesas,h3ulcb", "renesas,r8a7795"
|
||||
- Henninger
|
||||
compatible = "renesas,henninger", "renesas,r8a7791"
|
||||
- iWave Systems RZ/G1C Single Board Computer (iW-RainboW-G23S)
|
||||
compatible = "iwave,g23s", "renesas,r8a77470"
|
||||
- iWave Systems RZ/G1E SODIMM SOM Development Platform (iW-RainboW-G22D)
|
||||
compatible = "iwave,g22d", "iwave,g22m", "renesas,r8a7745"
|
||||
- iWave Systems RZ/G1E SODIMM System On Module (iW-RainboW-G22M-SM)
|
||||
compatible = "iwave,g22m", "renesas,r8a7745"
|
||||
- iWave Systems RZ/G1M Qseven Development Platform (iW-RainboW-G20D-Qseven)
|
||||
compatible = "iwave,g20d", "iwave,g20m", "renesas,r8a7743"
|
||||
- iWave Systems RZ/G1M Qseven System On Module (iW-RainboW-G20M-Qseven)
|
||||
compatible = "iwave,g20m", "renesas,r8a7743"
|
||||
- iWave Systems RZ/G1N Qseven Development Platform (iW-RainboW-G20D-Qseven)
|
||||
compatible = "iwave,g20d", "iwave,g20m", "renesas,r8a7744"
|
||||
- iWave Systems RZ/G1N Qseven System On Module (iW-RainboW-G20M-Qseven)
|
||||
compatible = "iwave,g20m", "renesas,r8a7744"
|
||||
- Kingfisher (SBEV-RCAR-KF-M03)
|
||||
compatible = "shimafuji,kingfisher"
|
||||
- Koelsch (RTP0RC7791SEB00010S)
|
||||
compatible = "renesas,koelsch", "renesas,r8a7791"
|
||||
- Kyoto Microcomputer Co. KZM-A9-Dual
|
||||
compatible = "renesas,kzm9d", "renesas,emev2"
|
||||
- Kyoto Microcomputer Co. KZM-A9-GT
|
||||
compatible = "renesas,kzm9g", "renesas,sh73a0"
|
||||
- Lager (RTP0RC7790SEB00010S)
|
||||
compatible = "renesas,lager", "renesas,r8a7790"
|
||||
- M3ULCB (R-Car Starter Kit Pro, RTP0RC7796SKBX0010SA09 (M3 ES1.0))
|
||||
compatible = "renesas,m3ulcb", "renesas,r8a7796"
|
||||
- M3NULCB (R-Car Starter Kit Pro, RTP0RC77965SKBX010SA00 (M3-N ES1.1))
|
||||
compatible = "renesas,m3nulcb", "renesas,r8a77965"
|
||||
- Marzen (R0P7779A00010S)
|
||||
compatible = "renesas,marzen", "renesas,r8a7779"
|
||||
- Porter (M2-LCDP)
|
||||
compatible = "renesas,porter", "renesas,r8a7791"
|
||||
- RSKRZA1 (YR0K77210C000BE)
|
||||
compatible = "renesas,rskrza1", "renesas,r7s72100"
|
||||
- RZN1D-DB (RZ/N1D Demo Board for the RZ/N1D 400 pins package)
|
||||
compatible = "renesas,rzn1d400-db", "renesas,r9a06g032"
|
||||
- Salvator-X (RTP0RC7795SIPB0010S)
|
||||
compatible = "renesas,salvator-x", "renesas,r8a7795"
|
||||
- Salvator-X (RTP0RC7796SIPB0011S)
|
||||
compatible = "renesas,salvator-x", "renesas,r8a7796"
|
||||
- Salvator-X (RTP0RC7796SIPB0011S (M3-N))
|
||||
compatible = "renesas,salvator-x", "renesas,r8a77965"
|
||||
- Salvator-XS (Salvator-X 2nd version, RTP0RC7795SIPB0012S)
|
||||
compatible = "renesas,salvator-xs", "renesas,r8a7795"
|
||||
- Salvator-XS (Salvator-X 2nd version, RTP0RC7796SIPB0012S)
|
||||
compatible = "renesas,salvator-xs", "renesas,r8a7796"
|
||||
- Salvator-XS (Salvator-X 2nd version, RTP0RC77965SIPB012S)
|
||||
compatible = "renesas,salvator-xs", "renesas,r8a77965"
|
||||
- SILK (RTP0RC7794LCB00011S)
|
||||
compatible = "renesas,silk", "renesas,r8a7794"
|
||||
- SK-RZG1E (YR8A77450S000BE)
|
||||
compatible = "renesas,sk-rzg1e", "renesas,r8a7745"
|
||||
- SK-RZG1M (YR8A77430S000BE)
|
||||
compatible = "renesas,sk-rzg1m", "renesas,r8a7743"
|
||||
- Stout (ADAS Starterkit, Y-R-CAR-ADAS-SKH2-BOARD)
|
||||
compatible = "renesas,stout", "renesas,r8a7790"
|
||||
- V3HSK (Y-ASK-RCAR-V3H-WS10)
|
||||
compatible = "renesas,v3hsk", "renesas,r8a77980"
|
||||
- V3MSK (Y-ASK-RCAR-V3M-WS10)
|
||||
compatible = "renesas,v3msk", "renesas,r8a77970"
|
||||
- Wheat (RTP0RC7792ASKB0000JE)
|
||||
compatible = "renesas,wheat", "renesas,r8a7792"
|
@ -0,0 +1,22 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/arm/milbeaut.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Milbeaut platforms device tree bindings
|
||||
|
||||
maintainers:
|
||||
- Taichi Sugaya <sugaya.taichi@socionext.com>
|
||||
- Takao Orito <orito.takao@socionext.com>
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
const: '/'
|
||||
compatible:
|
||||
oneOf:
|
||||
- items:
|
||||
- enum:
|
||||
- socionext,milbeaut-m10v-evb
|
||||
- const: socionext,sc2000a
|
||||
...
|
@ -1,23 +0,0 @@
|
||||
Technologic Systems Platforms Device Tree Bindings
|
||||
--------------------------------------------------
|
||||
|
||||
TS-4600 is a System-on-Module based on the Freescale i.MX28 System-on-Chip.
|
||||
It can be mounted on a carrier board providing additional peripheral connectors.
|
||||
Required root node properties:
|
||||
- compatible = "technologic,imx28-ts4600", "fsl,imx28"
|
||||
|
||||
TS-4800 board
|
||||
Required root node properties:
|
||||
- compatible = "technologic,imx51-ts4800", "fsl,imx51";
|
||||
|
||||
TS-4900 is a System-on-Module based on the Freescale i.MX6 System-on-Chip.
|
||||
It can be mounted on a carrier board providing additional peripheral connectors.
|
||||
Required root node properties:
|
||||
- compatible = "technologic,imx6dl-ts4900", "fsl,imx6dl"
|
||||
- compatible = "technologic,imx6q-ts4900", "fsl,imx6q"
|
||||
|
||||
TS-7970 is a System-on-Module based on the Freescale i.MX6 System-on-Chip.
|
||||
It can be mounted on a carrier board providing additional peripheral connectors.
|
||||
Required root node properties:
|
||||
- compatible = "technologic,imx6dl-ts7970", "fsl,imx6dl"
|
||||
- compatible = "technologic,imx6q-ts7970", "fsl,imx6q"
|
@ -87,9 +87,11 @@ properties:
|
||||
- const: nvidia,tegra124
|
||||
- items:
|
||||
- enum:
|
||||
- nvidia,darcy
|
||||
- nvidia,p2371-0000
|
||||
- nvidia,p2371-2180
|
||||
- nvidia,p2571
|
||||
- nvidia,p2894-0050-a08
|
||||
- const: nvidia,tegra210
|
||||
- items:
|
||||
- enum:
|
||||
|
@ -47,9 +47,9 @@ Optional properties:
|
||||
Timing property for child nodes. It is mandatory, not optional.
|
||||
|
||||
- fsl,weim-cs-timing: The timing array, contains timing values for the
|
||||
child node. We can get the CS index from the child
|
||||
node's "reg" property. The number of registers depends
|
||||
on the selected chip.
|
||||
child node. We get the CS indexes from the address
|
||||
ranges in the child node's "reg" property.
|
||||
The number of registers depends on the selected chip:
|
||||
For i.MX1, i.MX21 ("fsl,imx1-weim") there are two
|
||||
registers: CSxU, CSxL.
|
||||
For i.MX25, i.MX27, i.MX31 and i.MX35 ("fsl,imx27-weim")
|
||||
@ -80,3 +80,29 @@ Example for an imx6q-sabreauto board, the NOR flash connected to the WEIM:
|
||||
0x0000c000 0x1404a38e 0x00000000>;
|
||||
};
|
||||
};
|
||||
|
||||
Example for an imx6q-based board, a multi-chipselect device connected to WEIM:
|
||||
|
||||
In this case, both chip select 0 and 1 will be configured with the same timing
|
||||
array values.
|
||||
|
||||
weim: weim@21b8000 {
|
||||
compatible = "fsl,imx6q-weim";
|
||||
reg = <0x021b8000 0x4000>;
|
||||
clocks = <&clks 196>;
|
||||
#address-cells = <2>;
|
||||
#size-cells = <1>;
|
||||
ranges = <0 0 0x08000000 0x02000000
|
||||
1 0 0x0a000000 0x02000000
|
||||
2 0 0x0c000000 0x02000000
|
||||
3 0 0x0e000000 0x02000000>;
|
||||
fsl,weim-cs-gpr = <&gpr>;
|
||||
|
||||
acme@0 {
|
||||
compatible = "acme,whatever";
|
||||
reg = <0 0 0x100>, <0 0x400000 0x800>,
|
||||
<1 0x400000 0x800>;
|
||||
fsl,weim-cs-timing = <0x024400b1 0x00001010 0x20081100
|
||||
0x00000000 0xa0000240 0x00000000>;
|
||||
};
|
||||
};
|
||||
|
@ -2,13 +2,14 @@
|
||||
|
||||
The Actions Semi Owl Clock Management Unit generates and supplies clock
|
||||
to various controllers within the SoC. The clock binding described here is
|
||||
applicable to S900 and S700 SoC's.
|
||||
applicable to S900, S700 and S500 SoC's.
|
||||
|
||||
Required Properties:
|
||||
|
||||
- compatible: should be one of the following,
|
||||
"actions,s900-cmu"
|
||||
"actions,s700-cmu"
|
||||
"actions,s500-cmu"
|
||||
- reg: physical base address of the controller and length of memory mapped
|
||||
region.
|
||||
- clocks: Reference to the parent clocks ("hosc", "losc")
|
||||
@ -19,8 +20,8 @@ Each clock is assigned an identifier, and client nodes can use this identifier
|
||||
to specify the clock which they consume.
|
||||
|
||||
All available clocks are defined as preprocessor macros in corresponding
|
||||
dt-bindings/clock/actions,s900-cmu.h or actions,s700-cmu.h header and can be
|
||||
used in device tree sources.
|
||||
dt-bindings/clock/actions,s900-cmu.h or actions,s700-cmu.h or
|
||||
actions,s500-cmu.h header and can be used in device tree sources.
|
||||
|
||||
External clocks:
|
||||
|
||||
|