Merge branch 'for-6.14' into for-next

This commit is contained in:
Petr Mladek 2024-12-12 12:29:15 +01:00
commit 44722ff761
12275 changed files with 427047 additions and 197176 deletions

View File

@ -141,11 +141,13 @@ ForEachMacros:
- 'damon_for_each_target_safe'
- 'damos_for_each_filter'
- 'damos_for_each_filter_safe'
- 'damos_for_each_quota_goal'
- 'damos_for_each_quota_goal_safe'
- 'data__for_each_file'
- 'data__for_each_file_new'
- 'data__for_each_file_start'
- 'device_for_each_child_node'
- 'displayid_iter_for_each'
- 'device_for_each_child_node_scoped'
- 'dma_fence_array_for_each'
- 'dma_fence_chain_for_each'
- 'dma_fence_unwrap_for_each'
@ -172,11 +174,14 @@ ForEachMacros:
- 'drm_for_each_plane'
- 'drm_for_each_plane_mask'
- 'drm_for_each_privobj'
- 'drm_gem_for_each_gpuva'
- 'drm_gem_for_each_gpuva_safe'
- 'drm_gem_for_each_gpuvm_bo'
- 'drm_gem_for_each_gpuvm_bo_safe'
- 'drm_gpuva_for_each_op'
- 'drm_gpuva_for_each_op_from_reverse'
- 'drm_gpuva_for_each_op_reverse'
- 'drm_gpuva_for_each_op_safe'
- 'drm_gpuvm_bo_for_each_va'
- 'drm_gpuvm_bo_for_each_va_safe'
- 'drm_gpuvm_for_each_va'
- 'drm_gpuvm_for_each_va_range'
- 'drm_gpuvm_for_each_va_range_safe'
@ -192,11 +197,11 @@ ForEachMacros:
- 'dsa_switch_for_each_port_continue_reverse'
- 'dsa_switch_for_each_port_safe'
- 'dsa_switch_for_each_user_port'
- 'dsa_switch_for_each_user_port_continue_reverse'
- 'dsa_tree_for_each_cpu_port'
- 'dsa_tree_for_each_user_port'
- 'dsa_tree_for_each_user_port_continue_reverse'
- 'dso__for_each_symbol'
- 'dsos__for_each_with_build_id'
- 'elf_hash_for_each_possible'
- 'elf_symtab__for_each_symbol'
- 'evlist__for_each_cpu'
@ -216,6 +221,7 @@ ForEachMacros:
- 'for_each_and_bit'
- 'for_each_andnot_bit'
- 'for_each_available_child_of_node'
- 'for_each_available_child_of_node_scoped'
- 'for_each_bench'
- 'for_each_bio'
- 'for_each_board_func_rsrc'
@ -234,6 +240,7 @@ ForEachMacros:
- 'for_each_card_widgets_safe'
- 'for_each_cgroup_storage_type'
- 'for_each_child_of_node'
- 'for_each_child_of_node_scoped'
- 'for_each_clear_bit'
- 'for_each_clear_bit_from'
- 'for_each_clear_bitrange'
@ -251,6 +258,7 @@ ForEachMacros:
- 'for_each_cpu'
- 'for_each_cpu_and'
- 'for_each_cpu_andnot'
- 'for_each_cpu_from'
- 'for_each_cpu_or'
- 'for_each_cpu_wrap'
- 'for_each_dapm_widgets'
@ -269,13 +277,14 @@ ForEachMacros:
- 'for_each_element'
- 'for_each_element_extid'
- 'for_each_element_id'
- 'for_each_enabled_cpu'
- 'for_each_endpoint_of_node'
- 'for_each_event'
- 'for_each_event_tps'
- 'for_each_evictable_lru'
- 'for_each_fib6_node_rt_rcu'
- 'for_each_fib6_walker_rt'
- 'for_each_free_mem_pfn_range_in_zone'
- 'for_each_file_lock'
- 'for_each_free_mem_pfn_range_in_zone_from'
- 'for_each_free_mem_range'
- 'for_each_free_mem_range_reverse'
@ -286,15 +295,18 @@ ForEachMacros:
- 'for_each_group_member'
- 'for_each_group_member_head'
- 'for_each_hstate'
- 'for_each_hwgpio'
- 'for_each_if'
- 'for_each_inject_fn'
- 'for_each_insn'
- 'for_each_insn_op_loc'
- 'for_each_insn_prefix'
- 'for_each_intid'
- 'for_each_iommu'
- 'for_each_ip_tunnel_rcu'
- 'for_each_irq_nr'
- 'for_each_lang'
- 'for_each_link_ch_maps'
- 'for_each_link_codecs'
- 'for_each_link_cpus'
- 'for_each_link_platforms'
@ -332,6 +344,9 @@ ForEachMacros:
- 'for_each_new_plane_in_state_reverse'
- 'for_each_new_private_obj_in_state'
- 'for_each_new_reg'
- 'for_each_nhlt_endpoint'
- 'for_each_nhlt_endpoint_fmtcfg'
- 'for_each_nhlt_fmtcfg'
- 'for_each_node'
- 'for_each_node_by_name'
- 'for_each_node_by_type'
@ -387,12 +402,15 @@ ForEachMacros:
- 'for_each_reloc_from'
- 'for_each_requested_gpio'
- 'for_each_requested_gpio_in_range'
- 'for_each_reserved_child_of_node'
- 'for_each_reserved_mem_range'
- 'for_each_reserved_mem_region'
- 'for_each_rtd_ch_maps'
- 'for_each_rtd_codec_dais'
- 'for_each_rtd_components'
- 'for_each_rtd_cpu_dais'
- 'for_each_rtd_dais'
- 'for_each_rtd_dais_reverse'
- 'for_each_sband_iftype_data'
- 'for_each_script'
- 'for_each_sec'
@ -533,8 +551,6 @@ ForEachMacros:
- 'lwq_for_each_safe'
- 'map__for_each_symbol'
- 'map__for_each_symbol_by_name'
- 'maps__for_each_entry'
- 'maps__for_each_entry_safe'
- 'mas_for_each'
- 'mci_for_each_dimm'
- 'media_device_for_each_entity'
@ -560,7 +576,9 @@ ForEachMacros:
- 'netdev_hw_addr_list_for_each'
- 'nft_rule_for_each_expr'
- 'nla_for_each_attr'
- 'nla_for_each_attr_type'
- 'nla_for_each_nested'
- 'nla_for_each_nested_type'
- 'nlmsg_for_each_attr'
- 'nlmsg_for_each_msg'
- 'nr_neigh_for_each'
@ -579,6 +597,7 @@ ForEachMacros:
- 'perf_config_sections__for_each_entry'
- 'perf_config_set__for_each_entry'
- 'perf_cpu_map__for_each_cpu'
- 'perf_cpu_map__for_each_cpu_skip_any'
- 'perf_cpu_map__for_each_idx'
- 'perf_evlist__for_each_entry'
- 'perf_evlist__for_each_entry_reverse'
@ -639,7 +658,6 @@ ForEachMacros:
- 'shost_for_each_device'
- 'sk_for_each'
- 'sk_for_each_bound'
- 'sk_for_each_bound_bhash2'
- 'sk_for_each_entry_offset_rcu'
- 'sk_for_each_from'
- 'sk_for_each_rcu'
@ -653,6 +671,7 @@ ForEachMacros:
- 'snd_soc_dapm_widget_for_each_path_safe'
- 'snd_soc_dapm_widget_for_each_sink_path'
- 'snd_soc_dapm_widget_for_each_source_path'
- 'sparsebit_for_each_set_range'
- 'strlist__for_each_entry'
- 'strlist__for_each_entry_safe'
- 'sym_for_each_insn'
@ -662,7 +681,6 @@ ForEachMacros:
- 'tcf_act_for_each_action'
- 'tcf_exts_for_each_action'
- 'ttm_resource_manager_for_each_res'
- 'twsk_for_each_bound_bhash2'
- 'udp_portaddr_for_each_entry'
- 'udp_portaddr_for_each_entry_rcu'
- 'usb_hub_for_each_child'
@ -686,6 +704,9 @@ ForEachMacros:
- 'xbc_node_for_each_child'
- 'xbc_node_for_each_key_value'
- 'xbc_node_for_each_subkey'
- 'ynl_attr_for_each'
- 'ynl_attr_for_each_nested'
- 'ynl_attr_for_each_payload'
- 'zorro_for_each_dev'
IncludeBlocks: Preserve

View File

@ -3,3 +3,4 @@ Alan Cox <root@hraefn.swansea.linux.org.uk>
Christoph Hellwig <hch@lst.de>
Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Marc Gonzalez <marc.w.gonzalez@free.fr>
Ralf Baechle <ralf@linux-mips.org>

3
.gitignore vendored
View File

@ -47,7 +47,6 @@
*.so.dbg
*.su
*.symtypes
*.symversions
*.tab.[ch]
*.tar
*.xz
@ -71,6 +70,7 @@ modules.order
/Module.markers
/modules.builtin
/modules.builtin.modinfo
/modules.builtin.ranges
/modules.nsdeps
#
@ -143,7 +143,6 @@ GTAGS
# id-utils files
ID
*.orig
*~
\#*#

View File

@ -73,6 +73,8 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <aryabinin@virtuozzo.com>
Andrzej Hajda <andrzej.hajda@intel.com> <a.hajda@samsung.com>
André Almeida <andrealmeid@igalia.com> <andrealmeid@collabora.com>
Andy Adamson <andros@citi.umich.edu>
Andy Chiu <andybnac@gmail.com> <andy.chiu@sifive.com>
Andy Chiu <andybnac@gmail.com> <taochiu@synology.com>
Andy Shevchenko <andy@kernel.org> <andy@smile.org.ua>
Andy Shevchenko <andy@kernel.org> <ext-andriy.shevchenko@nokia.com>
Anilkumar Kolli <quic_akolli@quicinc.com> <akolli@codeaurora.org>
@ -154,6 +156,9 @@ Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
Christian Marangi <ansuelsmth@gmail.com>
Christophe Ricard <christophe.ricard@gmail.com>
Christoph Hellwig <hch@lst.de>
Chuck Lever <chuck.lever@oracle.com> <cel@kernel.org>
Chuck Lever <chuck.lever@oracle.com> <cel@netapp.com>
Chuck Lever <chuck.lever@oracle.com> <cel@citi.umich.edu>
Claudiu Beznea <claudiu.beznea@tuxon.dev> <claudiu.beznea@microchip.com>
Colin Ian King <colin.i.king@gmail.com> <colin.king@canonical.com>
Corey Minyard <minyard@acm.org>
@ -194,18 +199,23 @@ Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@microchip.com>
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@collabora.com>
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
Faith Ekstrand <faith.ekstrand@collabora.com> <jason@jlekstrand.net>
Faith Ekstrand <faith.ekstrand@collabora.com> <jason.ekstrand@intel.com>
Faith Ekstrand <faith.ekstrand@collabora.com> <jason.ekstrand@collabora.com>
Fangrui Song <i@maskray.me> <maskray@google.com>
Felipe W Damasio <felipewd@terra.com.br>
Felix Kuhling <fxkuehl@gmx.de>
Felix Moeller <felix@derklecks.de>
Fenglin Wu <quic_fenglinw@quicinc.com> <fenglinw@codeaurora.org>
Filipe Lautert <filipe@icewall.org>
Finn Thain <fthain@linux-m68k.org> <fthain@telegraphics.com.au>
Fiona Behrens <me@kloenk.dev>
Fiona Behrens <me@kloenk.dev> <me@kloenk.de>
Fiona Behrens <me@kloenk.dev> <fin@nyantec.com>
Franck Bui-Huu <vagabon.xyz@gmail.com>
Frank Rowand <frowand.list@gmail.com> <frank.rowand@am.sony.com>
Frank Rowand <frowand.list@gmail.com> <frank.rowand@sony.com>
@ -273,7 +283,7 @@ Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
Jan Kuliga <jtkuliga.kdev@gmail.com> <jankul@alatek.krakow.pl>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@tuni.fi>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@parity.io>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
@ -297,6 +307,11 @@ Jens Axboe <axboe@kernel.dk> <axboe@fb.com>
Jens Axboe <axboe@kernel.dk> <axboe@meta.com>
Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
Jesper Dangaard Brouer <hawk@kernel.org> <brouer@redhat.com>
Jesper Dangaard Brouer <hawk@kernel.org> <hawk@comx.dk>
Jesper Dangaard Brouer <hawk@kernel.org> <jbrouer@redhat.com>
Jesper Dangaard Brouer <hawk@kernel.org> <jdb@comx.dk>
Jesper Dangaard Brouer <hawk@kernel.org> <netoptimizer@brouer.com>
Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org>
Jilai Wang <quic_jilaiw@quicinc.com> <jilaiw@codeaurora.org>
Jiri Kosina <jikos@kernel.org> <jikos@jikos.cz>
@ -313,6 +328,7 @@ Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
Jishnu Prakash <quic_jprakash@quicinc.com> <jprakash@codeaurora.org>
Joel Granados <joel.granados@kernel.org> <j.granados@samsung.com>
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
John Crispin <john@phrozen.org> <blogic@openwrt.org>
@ -613,6 +629,10 @@ Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
Sibi Sankar <quic_sibis@quicinc.com> <sibis@codeaurora.org>
Sid Manning <quic_sidneym@quicinc.com> <sidneym@codeaurora.org>
Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu>
Simona Vetter <simona.vetter@ffwll.ch> <daniel.vetter@ffwll.ch>
Simona Vetter <simona.vetter@ffwll.ch> <daniel.vetter@intel.com>
Simona Vetter <simona.vetter@ffwll.ch> <daniel@ffwll.ch>
Simona Vetter <simona.vetter@ffwll.ch> <daniel@biene.ffwll.ch>
Simon Horman <horms@kernel.org> <simon.horman@corigine.com>
Simon Horman <horms@kernel.org> <simon.horman@netronome.com>
Simon Kelley <simon@thekelleys.org.uk>
@ -645,6 +665,7 @@ Tomeu Vizoso <tomeu@tomeuvizoso.net> <tomeu.vizoso@collabora.com>
Thomas Graf <tgraf@suug.ch>
Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu>
Thomas Pedersen <twp@codeaurora.org>
Thorsten Blum <thorsten.blum@linux.dev> <thorsten.blum@toblux.com>
Tiezhu Yang <yangtiezhu@loongson.cn> <kernelpatch@126.com>
Tingwei Zhang <quic_tingwei@quicinc.com> <tingwei@codeaurora.org>
Tirupathi Reddy <quic_tirupath@quicinc.com> <tirupath@codeaurora.org>
@ -709,6 +730,7 @@ Will Deacon <will@kernel.org> <will.deacon@arm.com>
Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de>
Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
Yusuke Goda <goda.yusuke@renesas.com>
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>

73
CREDITS
View File

@ -185,6 +185,11 @@ P: 1024/AF7B30C1 CF 97 C2 CC 6D AE A7 FE C8 BA 9C FC 88 DE 32 C3
D: Linux/MIPS port
D: Linux/68k hacker
D: AX25 maintainer
D: EDAC-CAVIUM OCTEON maintainer
D: IOC3 ETHERNET DRIVER maintainer
D: NETROM NETWORK LAYER maintainer
D: ROSE NETWORK LAYER maintainer
D: TURBOCHANNEL SUBSYSTEM maintainer
S: Hauptstrasse 19
S: 79837 St. Blasien
S: Germany
@ -378,6 +383,9 @@ S: 1549 Hiironen Rd.
S: Brimson, MN 55602
S: USA
N: Arnd Bergmann
D: Maintainer of Cell Broadband Engine Architecture
N: Hennus Bergman
P: 1024/77D50909 76 99 FD 31 91 E1 96 1C 90 BB 22 80 62 F6 BD 63
D: Author and maintainer of the QIC-02 tape driver
@ -1201,6 +1209,10 @@ S: Dreisbachstrasse 24
S: D-57250 Netphen
S: Germany
N: Florian Fainelli
E: f.fainelli@gmail.com
D: DSA
N: Rik Faith
E: faith@acm.org
D: Future Domain TMC-16x0 SCSI driver (author)
@ -1355,10 +1367,6 @@ D: Major kbuild rework during the 2.5 cycle
D: ISDN Maintainer
S: USA
N: Gerrit Renker
E: gerrit@erg.abdn.ac.uk
D: DCCP protocol support.
N: Philip Gladstone
E: philip@gladstonefamily.net
D: Kernel / timekeeping stuff
@ -1674,11 +1682,6 @@ W: http://www.carumba.com/
D: bug toaster (A1 sauce makes all the difference)
D: Random linux hacker
N: James Hogan
E: jhogan@kernel.org
D: Metag architecture maintainer
D: TZ1090 SoC maintainer
N: Tim Hockin
E: thockin@hockin.org
W: http://www.hockin.org/~thockin
@ -1694,6 +1697,11 @@ D: hwmon subsystem maintainer
D: i2c-sis96x and i2c-stub SMBus drivers
S: USA
N: James Hogan
E: jhogan@kernel.org
D: Metag architecture maintainer
D: TZ1090 SoC maintainer
N: Dirk Hohndel
E: hohndel@suse.de
D: The XFree86[tm] Project
@ -1869,6 +1877,13 @@ S: K osmidomkum 723
S: 160 00 Praha 6
S: Czech Republic
N: Seth Jennings
E: sjenning@redhat.com
D: Creation and maintenance of zswap
N: Jeremy Kerr
D: Maintainer of SPU File System
N: Michael Kerrisk
E: mtk.manpages@gmail.com
W: https://man7.org/
@ -2182,19 +2197,6 @@ N: Mike Kravetz
E: mike.kravetz@oracle.com
D: Maintenance and development of the hugetlb subsystem
N: Seth Jennings
E: sjenning@redhat.com
D: Creation and maintenance of zswap
N: Dan Streetman
E: ddstreet@ieee.org
D: Maintenance and development of zswap
D: Creation and maintenance of the zpool API
N: Vitaly Wool
E: vitaly.wool@konsulko.com
D: Maintenance and development of zswap
N: Andreas S. Krebs
E: akrebs@altavista.net
D: CYPRESS CY82C693 chipset IDE, Digital's PC-Alpha 164SX boards
@ -3185,6 +3187,11 @@ N: Ken Pizzini
E: ken@halcyon.com
D: CDROM driver "sonycd535" (Sony CDU-535/531)
N: Mathieu Poirier
E: mathieu.poirier@linaro.org
D: CoreSight kernel subsystem, Maintainer 2014-2022
D: Perf tool support for CoreSight
N: Stelian Pop
E: stelian@popies.net
P: 1024D/EDBB6147 7B36 0E07 04BC 11DC A7A0 D3F7 7185 9E7A EDBB 6147
@ -3294,6 +3301,10 @@ S: Schlossbergring 9
S: 79098 Freiburg
S: Germany
N: Gerrit Renker
E: gerrit@erg.abdn.ac.uk
D: DCCP protocol support.
N: Thomas Renninger
E: trenn@suse.de
D: cpupowerutils
@ -3570,11 +3581,6 @@ D: several improvements to system programs
S: Oldenburg
S: Germany
N: Mathieu Poirier
E: mathieu.poirier@linaro.org
D: CoreSight kernel subsystem, Maintainer 2014-2022
D: Perf tool support for CoreSight
N: Robert Schwebel
E: robert@schwebel.de
W: https://www.schwebel.de
@ -3765,6 +3771,11 @@ S: Chr. Winthersvej 1 B, st.th.
S: DK-1860 Frederiksberg C
S: Denmark
N: Dan Streetman
E: ddstreet@ieee.org
D: Maintenance and development of zswap
D: Creation and maintenance of the zpool API
N: Drew Sullivan
E: drew@ss.org
W: http://www.ss.org/
@ -3789,6 +3800,10 @@ S: Department of Zoology, University of Washington
S: Seattle, WA 98195-1800
S: USA
N: York Sun
E: york.sun@nxp.com
D: Freescale DDR EDAC
N: Eugene Surovegin
E: ebs@ebshome.net
W: https://kernel.ebshome.net/
@ -4280,6 +4295,10 @@ S: Pipers Way
S: Swindon. SN3 1RJ
S: England
N: Vitaly Wool
E: vitaly.wool@konsulko.com
D: Maintenance and development of zswap
N: Chris Wright
E: chrisw@sous-sol.org
D: hacking on LSM framework and security modules.

View File

@ -0,0 +1,12 @@
What: /sys/fs/selinux/user
Date: April 2005 (predates git)
KernelVersion: 2.6.12-rc2 (predates git)
Contact: selinux@vger.kernel.org
Description:
The selinuxfs "user" node allows userspace to request a list
of security contexts that can be reached for a given SELinux
user from a given starting context. This was used by libselinux
when various login-style programs requested contexts for
users, but libselinux stopped using it in 2020.
Kernel support will be removed no sooner than Dec 2025.

View File

@ -424,6 +424,13 @@ Description:
[RW] This file is used to control (on/off) the iostats
accounting of the disk.
What: /sys/block/<disk>/queue/iostats_passthrough
Date: October 2024
Contact: linux-block@vger.kernel.org
Description:
[RW] This file is used to control (on/off) the iostats
accounting of the disk for passthrough commands.
What: /sys/block/<disk>/queue/logical_block_size
Date: May 2009
@ -594,6 +601,9 @@ Description:
[RW] Maximum number of kilobytes to read-ahead for filesystems
on this block device.
For MADV_HUGEPAGE, the readahead size may exceed this setting
since its granularity is based on the hugepage size.
What: /sys/block/<disk>/queue/rotational
Date: January 2009

View File

@ -11,7 +11,7 @@ Description:
Read returns '0' or '1' for read-write or read-only modes
respectively.
Write parses one of 'YyTt1NnFf0', or [oO][NnFf] for "on"
and "off", i.e. what kstrbool() supports.
and "off", i.e. what kstrtobool() supports.
Note: This file is only present if CONFIG_NVMEM_SYSFS
is enabled.

View File

@ -9,9 +9,11 @@ maps an ELF DSO into that program's address space. This DSO is called
the vDSO and it often contains useful and highly-optimized alternatives
to real syscalls.
These functions are called just like ordinary C function according to
your platform's ABI. Call them from a sensible context. (For example,
if you set CS on x86 to something strange, the vDSO functions are
These functions are called according to your platform's ABI. On many
platforms they are called just like ordinary C function. On other platforms
(ex: powerpc) they are called with the same convention as system calls which
is different from ordinary C functions. Call them from a sensible context.
(For example, if you set CS on x86 to something strange, the vDSO functions are
within their rights to crash.) In addition, if you pass a bad
pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT.

View File

@ -6,3 +6,10 @@ Description:
This item contains just one readonly attribute: port_num.
It contains the port number of the /dev/ttyGS<n> device
associated with acm function's instance "name".
What: /config/usb-gadget/gadget/functions/acm.name/protocol
Date: Aug 2024
KernelVersion: 6.13
Description:
Reported bInterfaceProtocol for the ACM device. For legacy
reasons, this defaults to 1 (USB_CDC_ACM_PROTO_AT_V25TER).

View File

@ -30,4 +30,12 @@ Description:
req_number the number of pre-allocated requests
for both capture and playback
function_name name of the interface
p_it_name playback input terminal name
p_it_ch_name playback channels name
p_ot_name playback output terminal name
p_fu_vol_name playback mute/volume functional unit name
c_it_name capture input terminal name
c_it_ch_name capture channels name
c_ot_name capture output terminal name
c_fu_vol_name capture mute/volume functional unit name
===================== =======================================

View File

@ -35,6 +35,17 @@ Description:
req_number the number of pre-allocated requests
for both capture and playback
function_name name of the interface
if_ctrl_name topology control name
clksrc_in_name input clock name
clksrc_out_name output clock name
p_it_name playback input terminal name
p_it_ch_name playback input first channel name
p_ot_name playback output terminal name
p_fu_vol_name playback mute/volume function unit name
c_it_name capture input terminal name
c_it_ch_name capture input first channel name
c_ot_name capture output terminal name
c_fu_vol_name capture mute/volume functional unit name
c_terminal_type code of the capture terminal type
p_terminal_type code of the playback terminal type
===================== =======================================

View File

@ -184,3 +184,10 @@ Date: Apr 2020
Contact: linux-crypto@vger.kernel.org
Description: Dump the total number of time out requests.
Available for both PF and VF, and take no other effect on HPRE.
What: /sys/kernel/debug/hisi_hpre/<bdf>/cap_regs
Date: Oct 2024
Contact: linux-crypto@vger.kernel.org
Description: Dump the values of the qm and hpre capability bit registers and
support the query of device specifications to facilitate fault locating.
Available for both PF and VF, and take no other effect on HPRE.

View File

@ -157,3 +157,10 @@ Contact: linux-crypto@vger.kernel.org
Description: Dump the total number of completed but marked error requests
to be received.
Available for both PF and VF, and take no other effect on SEC.
What: /sys/kernel/debug/hisi_sec2/<bdf>/cap_regs
Date: Oct 2024
Contact: linux-crypto@vger.kernel.org
Description: Dump the values of the qm and sec capability bit registers and
support the query of device specifications to facilitate fault locating.
Available for both PF and VF, and take no other effect on SEC.

View File

@ -158,3 +158,10 @@ Contact: linux-crypto@vger.kernel.org
Description: Dump the total number of BD type error requests
to be received.
Available for both PF and VF, and take no other effect on ZIP.
What: /sys/kernel/debug/hisi_zip/<bdf>/cap_regs
Date: Oct 2024
Contact: linux-crypto@vger.kernel.org
Description: Dump the values of the qm and zip capability bit registers and
support the query of device specifications to facilitate fault locating.
Available for both PF and VF, and take no other effect on ZIP.

View File

@ -0,0 +1,39 @@
What: /sys/kernel/debug/iio/iio:deviceX/calibration_table_dump
KernelVersion: 6.11
Contact: linux-iio@vger.kernel.org
Description:
This dumps the calibration table that was filled during the
digital interface tuning process.
What: /sys/kernel/debug/iio/iio:deviceX/in_voltage_test_mode_available
KernelVersion: 6.11
Contact: linux-iio@vger.kernel.org
Description:
List all the available test tones:
- off
- midscale_short
- pos_fullscale
- neg_fullscale
- checkerboard
- prbs23
- prbs9
- one_zero_toggle
- user
- bit_toggle
- sync
- one_bit_high
- mixed_bit_frequency
- ramp
Note that depending on the actual device being used, some of the
above might not be available (and they won't be listed when
reading the file).
What: /sys/kernel/debug/iio/iio:deviceX/in_voltageY_test_mode
KernelVersion: 6.11
Contact: linux-iio@vger.kernel.org
Description:
Writing to this file will initiate one of available test tone on
channel Y. Reading it, shows which test is running. In cases
where an IIO backend is available and supports the test tone,
additional information about the data correctness is given.

View File

@ -0,0 +1,20 @@
What: /sys/kernel/debug/iio/iio:deviceX/backendY/name
KernelVersion: 6.11
Contact: linux-iio@vger.kernel.org
Description:
Name of Backend Y connected to device X.
What: /sys/kernel/debug/iio/iio:deviceX/backendY/direct_reg_access
KernelVersion: 6.11
Contact: linux-iio@vger.kernel.org
Description:
Directly access the registers of backend Y. Typical usage is:
Reading address 0x50
echo 0x50 > direct_reg_access
cat direct_reg_access
Writing address 0x50
echo 0x50 0x3 > direct_reg_access
//readback address 0x50
cat direct_reg_access

View File

@ -151,3 +151,10 @@ Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
Description:
The recompress file is write-only and triggers re-compression
with secondary compression algorithms.
What: /sys/block/zram<id>/algorithm_params
Date: August 2024
Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
Description:
The algorithm_params file is write-only and is used to setup
compression algorithm parameters.

View File

@ -523,13 +523,27 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_i_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_q_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibbias
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@ -541,6 +555,10 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/in_accel_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias_available
KernelVersion: 5.8
Contact: linux-iio@vger.kernel.org
Description:
@ -549,25 +567,34 @@ Description:
- a small discrete set of values like "0 2 4 6 8"
- a range specified as "[min step max]"
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_both_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_ir_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibscale
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibscale
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@ -575,6 +602,20 @@ Description:
production inaccuracies). If shared across all channels,
<type>_calibscale is used.
What: /sys/bus/iio/devices/iio:deviceX/in_illuminanceY_calibscale_available
What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibscale_available
What: /sys/bus/iio/devices/iio:deviceX/in_proximityY_calibscale_available
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale_available
KernelVersion: 4.8
Contact: linux-iio@vger.kernel.org
Description:
Available values of calibscale. Maybe expressed as either of:
- a small discrete set of values like "1 8 16"
- a range specified as "[min step max]"
If shared across all channels, <type>_calibscale_available is used.
What: /sys/bus/iio/devices/iio:deviceX/in_activity_calibgender
What: /sys/bus/iio/devices/iio:deviceX/in_energy_calibgender
What: /sys/bus/iio/devices/iio:deviceX/in_distance_calibgender
@ -708,6 +749,7 @@ Description:
2.5kohm_to_gnd: connected to ground via a 2.5kOhm resistor,
6kohm_to_gnd: connected to ground via a 6kOhm resistor,
20kohm_to_gnd: connected to ground via a 20kOhm resistor,
42kohm_to_gnd: connected to ground via a 42kOhm resistor,
90kohm_to_gnd: connected to ground via a 90kOhm resistor,
100kohm_to_gnd: connected to ground via an 100kOhm resistor,
125kohm_to_gnd: connected to ground via an 125kOhm resistor,
@ -2289,3 +2331,11 @@ KernelVersion: 6.7
Contact: linux-iio@vger.kernel.org
Description:
List of available timeout value for tap gesture confirmation.
What: /sys/.../iio:deviceX/in_shunt_resistor
What: /sys/.../iio:deviceX/in_current_shunt_resistor
What: /sys/.../iio:deviceX/in_power_shunt_resistor
KernelVersion: 6.10
Contact: linux-iio@vger.kernel.org
Description:
The value of current sense resistor in Ohms.

View File

@ -1,17 +0,0 @@
What: /sys/bus/iio/devices/iio:deviceX/in_power_shunt_resistor
Date: March 2017
KernelVersion: 4.12
Contact: linux-iio@vger.kernel.org
Description: The value of the shunt resistor used to compute power drain on
common input voltage pin (RS+). In Ohms.
What: /sys/bus/iio/devices/iio:deviceX/in_current_shunt_resistor
Date: March 2017
KernelVersion: 4.12
Contact: linux-iio@vger.kernel.org
Description: The value of the shunt resistor used to compute current flowing
between RS+ and RS- voltage sense inputs. In Ohms.
These attributes describe a single physical component, exposed as two distinct
attributes as it is used to calculate two different values: power load and
current flowing between RS+ and RS- inputs.

View File

@ -15,17 +15,3 @@ Description:
Set the relative humidity. This value is sent to the sensor for
humidity compensation.
Default value: 50000 (50 % relative humidity)
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
Date: August 2021
KernelVersion: 5.15
Contact: Andreas Klinger <ak@it-klinger.de>
Description:
Set the bias value for the resistance which is used for
calculation of in_concentration_input as follows:
x = (in_resistance_raw - in_resistance_calibbias) * 0.65
in_concentration_input = 500 / (1 + e^x)
Default value: 30000

View File

@ -0,0 +1,61 @@
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_toggle_en
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Toggle enable. Write 1 to enable toggle or 0 to disable it. This
is useful when one wants to change the DAC output codes. For
autonomous toggling, the way it should be done is:
- disable toggle operation;
- change out_currentY_rawN, where N is the integer value of the symbol;
- enable toggle operation.
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_rawN
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
This attribute has the same meaning as out_currentY_raw. It is
specific to toggle enabled channels and refers to the DAC output
code in INPUT_N (_rawN), where N is the integer value of the symbol.
The same scale and offset as in out_currentY_raw applies.
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_symbol
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Performs a SW switch to a predefined output symbol. This attribute
is specific to toggle enabled channels and allows switching between
multiple predefined symbols. Each symbol corresponds to a different
output, denoted as out_currentY_rawN, where N is the integer value
of the symbol. Writing an integer value N will select out_currentY_rawN.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Toggle enable. Write 1 to enable toggle or 0 to disable it. This
is useful when one wants to change the DAC output codes. For
autonomous toggling, the way it should be done is:
- disable toggle operation;
- change out_voltageY_rawN, where N is the integer value of the symbol;
- enable toggle operation.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_rawN
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
This attribute has the same meaning as out_currentY_raw. It is
specific to toggle enabled channels and refers to the DAC output
code in INPUT_N (_rawN), where N is the integer value of the symbol.
The same scale and offset as in out_currentY_raw applies.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Performs a SW switch to a predefined output symbol. This attribute
is specific to toggle enabled channels and allows switching between
multiple predefined symbols. Each symbol corresponds to a different
output, denoted as out_voltageY_rawN, where N is the integer value
of the symbol. Writing an integer value N will select out_voltageY_rawN.

View File

@ -53,34 +53,3 @@ KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Returns the available values for the dither phase.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Toggle enable. Write 1 to enable toggle or 0 to disable it. This is
useful when one wants to change the DAC output codes. The way it should
be done is:
- disable toggle operation;
- change out_voltageY_raw0 and out_voltageY_raw1;
- enable toggle operation.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw0
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw1
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
It has the same meaning as out_voltageY_raw. This attribute is
specific to toggle enabled channels and refers to the DAC output
code in INPUT_A (_raw0) and INPUT_B (_raw1). The same scale and offset
as in out_voltageY_raw applies.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Performs a SW toggle. This attribute is specific to toggle
enabled channels and allows to toggle between out_voltageY_raw0
and out_voltageY_raw1 through software. Writing 0 will select
out_voltageY_raw0 while 1 selects out_voltageY_raw1.

View File

@ -3,7 +3,7 @@ KernelVersion:
Contact: linux-iio@vger.kernel.org
Description:
Reading this returns the valid values that can be written to the
on_altvoltage0_mode attribute:
filter_mode attribute:
- auto -> Adjust bandpass filter to track changes in input clock rate.
- manual -> disable/unregister the clock rate notifier / input clock tracking.

View File

@ -13,12 +13,3 @@ Description:
available for reading data. However, samples can be occasionally skipped
or repeated, depending on the beat between the capture and conversion
rates.
What: /sys/bus/iio/devices/iio:deviceX/in_shunt_resistor
Date: December 2015
KernelVersion: 4.4
Contact: linux-iio@vger.kernel.org
Description:
The value of the shunt resistor may be known only at runtime fom an
eeprom content read by a client application. This attribute allows to
set its value in ohms.

View File

@ -500,3 +500,75 @@ Description:
console drivers from the device. Raw users of pci-sysfs
resourceN attributes must be terminated prior to resizing.
Success of the resizing operation is not guaranteed.
What: /sys/bus/pci/devices/.../leds/*:enclosure:*/brightness
What: /sys/class/leds/*:enclosure:*/brightness
Date: August 2024
KernelVersion: 6.12
Description:
LED indications on PCIe storage enclosures which are controlled
through the NPEM interface (Native PCIe Enclosure Management,
PCIe r6.1 sec 6.28) are accessible as led class devices, both
below /sys/class/leds and below NPEM-capable PCI devices.
Although these led class devices could be manipulated manually,
in practice they are typically manipulated automatically by an
application such as ledmon(8).
The name of a led class device is as follows:
<bdf>:enclosure:<indication>
where:
- <bdf> is the domain, bus, device and function number
(e.g. 10000:02:05.0)
- <indication> is a short description of the LED indication
Valid indications per PCIe r6.1 table 6-27 are:
- ok (drive is functioning normally)
- locate (drive is being identified by an admin)
- fail (drive is not functioning properly)
- rebuild (drive is part of an array that is rebuilding)
- pfa (drive is predicted to fail soon)
- hotspare (drive is marked to be used as a replacement)
- ica (drive is part of an array that is degraded)
- ifa (drive is part of an array that is failed)
- idt (drive is not the right type for the connector)
- disabled (drive is disabled, removal is safe)
- specific0 to specific7 (enclosure-specific indications)
Broadly, the indications fall into one of these categories:
- to signify drive state (ok, locate, fail, idt, disabled)
- to signify drive role or state in a software RAID array
(rebuild, pfa, hotspare, ica, ifa)
- to signify any other role or state (specific0 to specific7)
Mandatory indications per PCIe r6.1 sec 7.9.19.2 comprise:
ok, locate, fail, rebuild. All others are optional.
A led class device is only visible if the corresponding
indication is supported by the device.
To manipulate the indications, write 0 (LED_OFF) or 1 (LED_ON)
to the "brightness" file. Note that manipulating an indication
may implicitly manipulate other indications at the vendor's
discretion. E.g. when the user lights up the "ok" indication,
the vendor may choose to automatically turn off the "fail"
indication. The current state of an indication can be
retrieved by reading its "brightness" file.
The PCIe Base Specification allows vendors leeway to choose
different colors or blinking patterns for the indications,
but they typically follow the IBPI standard. E.g. the "locate"
indication is usually presented as one or two LEDs blinking at
4 Hz frequency:
https://en.wikipedia.org/wiki/International_Blinking_Pattern_Interpretation
PCI Firmware Specification r3.3 sec 4.7 defines a DSM interface
to facilitate shared access by operating system and platform
firmware to a device's NPEM registers. The kernel will use
this DSM interface where available, instead of accessing NPEM
registers directly. The DSM interface does not support the
enclosure-specific indications "specific0" to "specific7",
hence the corresponding led class devices are unavailable if
the DSM interface is used.

View File

@ -377,17 +377,33 @@ What: /sys/class/power_supply/<supply_name>/charge_type
Date: July 2009
Contact: linux-pm@vger.kernel.org
Description:
Represents the type of charging currently being applied to the
battery. "Trickle", "Fast", and "Standard" all mean different
charging speeds. "Adaptive" means that the charger uses some
algorithm to adjust the charge rate dynamically, without
any user configuration required. "Custom" means that the charger
uses the charge_control_* properties as configuration for some
different algorithm. "Long Life" means the charger reduces its
charging rate in order to prolong the battery health. "Bypass"
means the charger bypasses the charging path around the
integrated converter allowing for a "smart" wall adaptor to
perform the power conversion externally.
Select the charging algorithm to use for a battery.
Standard:
Fully charge the battery at a moderate rate.
Fast:
Quickly charge the battery using fast-charge
technology. This is typically harder on the battery
than standard charging and may lower its lifespan.
Trickle:
Users who primarily operate the system while
plugged into an external power source can extend
battery life with this mode. Vendor tooling may
call this "Primarily AC Use".
Adaptive:
Automatically optimize battery charge rate based
on typical usage pattern.
Custom:
Use the charge_control_* properties to determine
when to start and stop charging. Advanced users
can use this to drastically extend battery life.
Long Life:
The charger reduces its charging rate in order to
prolong the battery health.
Bypass:
The charger bypasses the charging path around the
integrated converter allowing for a "smart" wall
adaptor to perform the power conversion externally.
Access: Read, Write
@ -592,7 +608,12 @@ Description:
the supply, for example it can show if USB-PD capable source
is attached.
Access: Read-Only
Access: For power-supplies which consume USB power such
as battery charger chips, this indicates the type of
the connected USB power source and is Read-Only.
For power-supplies which act as a USB power-source such as
e.g. the UCS1002 USB Port Power Controller this is writable.
Valid values:
"Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD",

View File

@ -0,0 +1,15 @@
What: /sys/class/tee/tee{,priv}X/rpmb_routing_model
Date: May 2024
KernelVersion: 6.10
Contact: op-tee@lists.trustedfirmware.org
Description:
RPMB frames can be routed to the RPMB device via the
user-space daemon tee-supplicant or the RPMB subsystem
in the kernel. The value "user" means that the driver
will route the RPMB frames via user space. Conversely,
"kernel" means that the frames are routed via the RPMB
subsystem without assistance from tee-supplicant. It
should be assumed that RPMB frames are routed via user
space if the variable is absent. The primary purpose
of this variable is to let systemd know whether
tee-supplicant is needed in the early boot with initramfs.

View File

@ -115,6 +115,6 @@ What: /sys/devices/system/memory/crash_hotplug
Date: Aug 2023
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description:
(RO) indicates whether or not the kernel directly supports
modifying the crash elfcorehdr for memory hot un/plug and/or
on/offline changes.
(RO) indicates whether or not the kernel updates relevant kexec
segments on memory hot un/plug and/or on/offline events, avoiding the
need to reload kdump kernel.

View File

@ -704,9 +704,9 @@ What: /sys/devices/system/cpu/crash_hotplug
Date: Aug 2023
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description:
(RO) indicates whether or not the kernel directly supports
modifying the crash elfcorehdr for CPU hot un/plug and/or
on/offline changes.
(RO) indicates whether or not the kernel updates relevant kexec
segments on memory hot un/plug and/or on/offline events, avoiding the
need to reload kdump kernel.
What: /sys/devices/system/cpu/enabled
Date: Nov 2022

View File

@ -75,3 +75,11 @@ Description: RO. Energy input of device or gt in microjoules.
for the gt.
Only supported for particular Intel i915 graphics platforms.
What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/fan1_input
Date: November 2024
KernelVersion: 6.12
Contact: intel-gfx@lists.freedesktop.org
Description: RO. Fan speed of device in RPM.
Only supported for particular Intel i915 graphics platforms.

View File

@ -1532,3 +1532,30 @@ Contact: Bean Huo <beanhuo@micron.com>
Description:
rtc_update_ms indicates how often the host should synchronize or update the
UFS RTC. If set to 0, this will disable UFS RTC periodic update.
What: /sys/devices/platform/.../ufshci_capabilities/version
Date: August 2024
Contact: Avri Altman <avri.altman@wdc.com>
Description:
Host Capabilities register group: UFS version register.
Symbol - VER. This file shows the UFSHCD version.
Example: Version 3.12 would be represented as 0000_0312h.
The file is read only.
What: /sys/devices/platform/.../ufshci_capabilities/product_id
Date: August 2024
Contact: Avri Altman <avri.altman@wdc.com>
Description:
Host Capabilities register group: product ID register.
Symbol - HCPID. This file shows the UFSHCD product id.
The content of this register is vendor specific.
The file is read only.
What: /sys/devices/platform/.../ufshci_capabilities/man_id
Date: August 2024
Contact: Avri Altman <avri.altman@wdc.com>
Description:
Host Capabilities register group: manufacturer ID register.
Symbol - HCMID. This file shows the UFSHCD manufacturer id.
The Manufacturer ID is defined by JEDEC in JEDEC-JEP106.
The file is read only.

View File

@ -579,6 +579,12 @@ Description: When ATGC is on, it controls age threshold to bypass GCing young
candidates whose age is not beyond the threshold, by default it was
initialized as 604800 seconds (equals to 7 days).
What: /sys/fs/f2fs/<disk>/atgc_enabled
Date: Feb 2024
Contact: "Jinbao Liu" <liujinbao1@xiaomi.com>
Description: It represents whether ATGC is on or off. The value is 1 which
indicates that ATGC is on, and 0 indicates that it is off.
What: /sys/fs/f2fs/<disk>/gc_reclaimed_segments
Date: July 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
@ -763,3 +769,53 @@ Date: November 2023
Contact: "Chao Yu" <chao@kernel.org>
Description: It controls to enable/disable IO aware feature for background discard.
By default, the value is 1 which indicates IO aware is on.
What: /sys/fs/f2fs/<disk>/blkzone_alloc_policy
Date: July 2024
Contact: "Yuanhong Liao" <liaoyuanhong@vivo.com>
Description: The zone UFS we are currently using consists of two parts:
conventional zones and sequential zones. It can be used to control which part
to prioritize for writes, with a default value of 0.
======================== =========================================
value description
blkzone_alloc_policy = 0 Prioritize writing to sequential zones
blkzone_alloc_policy = 1 Only allow writing to sequential zones
blkzone_alloc_policy = 2 Prioritize writing to conventional zones
======================== =========================================
What: /sys/fs/f2fs/<disk>/migration_window_granularity
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Controls migration window granularity of garbage collection on large
section. it can control the scanning window granularity for GC migration
in a unit of segment, while migration_granularity controls the number
of segments which can be migrated at the same turn.
What: /sys/fs/f2fs/<disk>/reserved_segments
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: In order to fine tune GC behavior, we can control the number of
reserved segments.
What: /sys/fs/f2fs/<disk>/gc_no_zoned_gc_percent
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: If the percentage of free sections over total sections is above this
number, F2FS do not garbage collection for zoned devices through the
background GC thread. the default number is "60".
What: /sys/fs/f2fs/<disk>/gc_boost_zoned_gc_percent
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: If the percentage of free sections over total sections is under this
number, F2FS boosts garbage collection for zoned devices through the
background GC thread. the default number is "25".
What: /sys/fs/f2fs/<disk>/gc_valid_thresh_ratio
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: It controls the valid block ratio threshold not to trigger excessive GC
for zoned deivces. The initial value of it is 95(%). F2FS will stop the
background GC thread from intiating GC for sections having valid blocks
exceeding the ratio.

View File

@ -52,7 +52,7 @@ driver generally needs to perform the following initialization:
- Enable DMA/processing engines
When done using the device, and perhaps the module needs to be unloaded,
the driver needs to take the follow steps:
the driver needs to take the following steps:
- Disable the device from generating IRQs
- Release the IRQ (free_irq())

View File

@ -921,10 +921,10 @@ This portion of the ``rcu_data`` structure is declared as follows:
::
1 int dynticks_snap;
1 int watching_snap;
2 unsigned long dynticks_fqs;
The ``->dynticks_snap`` field is used to take a snapshot of the
The ``->watching_snap`` field is used to take a snapshot of the
corresponding CPU's dyntick-idle state when forcing quiescent states,
and is therefore accessed from other CPUs. Finally, the
``->dynticks_fqs`` field is used to count the number of times this CPU
@ -935,8 +935,8 @@ This portion of the rcu_data structure is declared as follows:
::
1 long dynticks_nesting;
2 long dynticks_nmi_nesting;
1 long nesting;
2 long nmi_nesting;
3 atomic_t dynticks;
4 bool rcu_need_heavy_qs;
5 bool rcu_urgent_qs;
@ -945,14 +945,14 @@ These fields in the rcu_data structure maintain the per-CPU dyntick-idle
state for the corresponding CPU. The fields may be accessed only from
the corresponding CPU (and from tracing) unless otherwise stated.
The ``->dynticks_nesting`` field counts the nesting depth of process
The ``->nesting`` field counts the nesting depth of process
execution, so that in normal circumstances this counter has value zero
or one. NMIs, irqs, and tracers are counted by the
``->dynticks_nmi_nesting`` field. Because NMIs cannot be masked, changes
``->nmi_nesting`` field. Because NMIs cannot be masked, changes
to this variable have to be undertaken carefully using an algorithm
provided by Andy Lutomirski. The initial transition from idle adds one,
and nested transitions add two, so that a nesting level of five is
represented by a ``->dynticks_nmi_nesting`` value of nine. This counter
represented by a ``->nmi_nesting`` value of nine. This counter
can therefore be thought of as counting the number of reasons why this
CPU cannot be permitted to enter dyntick-idle mode, aside from
process-level transitions.
@ -960,12 +960,12 @@ process-level transitions.
However, it turns out that when running in non-idle kernel context, the
Linux kernel is fully capable of entering interrupt handlers that never
exit and perhaps also vice versa. Therefore, whenever the
``->dynticks_nesting`` field is incremented up from zero, the
``->dynticks_nmi_nesting`` field is set to a large positive number, and
whenever the ``->dynticks_nesting`` field is decremented down to zero,
the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that
``->nesting`` field is incremented up from zero, the
``->nmi_nesting`` field is set to a large positive number, and
whenever the ``->nesting`` field is decremented down to zero,
the ``->nmi_nesting`` field is set to zero. Assuming that
the number of misnested interrupts is not sufficient to overflow the
counter, this approach corrects the ``->dynticks_nmi_nesting`` field
counter, this approach corrects the ``->nmi_nesting`` field
every time the corresponding CPU enters the idle loop from process
context.
@ -992,8 +992,8 @@ code.
+-----------------------------------------------------------------------+
| **Quick Quiz**: |
+-----------------------------------------------------------------------+
| Why not simply combine the ``->dynticks_nesting`` and |
| ``->dynticks_nmi_nesting`` counters into a single counter that just |
| Why not simply combine the ``->nesting`` and |
| ``->nmi_nesting`` counters into a single counter that just |
| counts the number of reasons that the corresponding CPU is non-idle? |
+-----------------------------------------------------------------------+
| **Answer**: |

View File

@ -147,10 +147,10 @@ RCU read-side critical sections preceding and following the current
idle sojourn.
This case is handled by calls to the strongly ordered
``atomic_add_return()`` read-modify-write atomic operation that
is invoked within ``rcu_dynticks_eqs_enter()`` at idle-entry
time and within ``rcu_dynticks_eqs_exit()`` at idle-exit time.
The grace-period kthread invokes first ``ct_dynticks_cpu_acquire()``
(preceded by a full memory barrier) and ``rcu_dynticks_in_eqs_since()``
is invoked within ``ct_kernel_exit_state()`` at idle-entry
time and within ``ct_kernel_enter_state()`` at idle-exit time.
The grace-period kthread invokes first ``ct_rcu_watching_cpu_acquire()``
(preceded by a full memory barrier) and ``rcu_watching_snap_stopped_since()``
(both of which rely on acquire semantics) to detect idle CPUs.
+-----------------------------------------------------------------------+

View File

@ -528,7 +528,7 @@
font-style="normal"
y="-8652.5312"
x="2466.7822"
xml:space="preserve">dyntick_save_progress_counter()</text>
xml:space="preserve">rcu_watching_snap_save()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-2-7-2-0"
@ -537,7 +537,7 @@
font-style="normal"
y="-8368.1475"
x="2463.3262"
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
xml:space="preserve">rcu_watching_snap_recheck()</text>
</g>
<g
id="g4504"
@ -607,7 +607,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@ -638,7 +638,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

@ -844,7 +844,7 @@
font-style="normal"
y="1547.8876"
x="4417.6396"
xml:space="preserve">dyntick_save_progress_counter()</text>
xml:space="preserve">rcu_watching_snap_save()</text>
<g
style="fill:none;stroke-width:0.025in"
transform="translate(6501.9719,-10685.904)"
@ -899,7 +899,7 @@
font-style="normal"
y="1858.8729"
x="4414.1836"
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
xml:space="preserve"
x="14659.87"
@ -977,7 +977,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@ -1008,7 +1008,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 50 KiB

View File

@ -2974,7 +2974,7 @@
font-style="normal"
y="38114.047"
x="-334.33856"
xml:space="preserve">dyntick_save_progress_counter()</text>
xml:space="preserve">rcu_watching_snap_save()</text>
<g
style="fill:none;stroke-width:0.025in"
transform="translate(1749.9916,25880.249)"
@ -3029,7 +3029,7 @@
font-style="normal"
y="38425.035"
x="-337.79462"
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
xml:space="preserve"
x="9907.8887"
@ -3107,7 +3107,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@ -3138,7 +3138,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"

Before

Width:  |  Height:  |  Size: 208 KiB

After

Width:  |  Height:  |  Size: 208 KiB

View File

@ -516,7 +516,7 @@
font-style="normal"
y="-8652.5312"
x="2466.7822"
xml:space="preserve">dyntick_save_progress_counter()</text>
xml:space="preserve">rcu_watching_snap_save()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-2-7-2-0"
@ -525,7 +525,7 @@
font-style="normal"
y="-8368.1475"
x="2463.3262"
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
sodipodi:linespacing="125%"
style="font-size:192px;font-style:normal;font-weight:bold;line-height:125%;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -2649,8 +2649,7 @@ those that are idle from RCU's perspective) and then Tasks Rude RCU can
be removed from the kernel.
The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
and rcu_barrier_tasks_rude().
consisting solely of synchronize_rcu_tasks_rude().
Tasks Trace RCU
~~~~~~~~~~~~~~~

View File

@ -194,14 +194,13 @@ over a rather long period of time, but improvements are always welcome!
when publicizing a pointer to a structure that can
be traversed by an RCU read-side critical section.
5. If any of call_rcu(), call_srcu(), call_rcu_tasks(),
call_rcu_tasks_rude(), or call_rcu_tasks_trace() is used,
the callback function may be invoked from softirq context,
and in any case with bottom halves disabled. In particular,
this callback function cannot block. If you need the callback
to block, run that code in a workqueue handler scheduled from
the callback. The queue_rcu_work() function does this for you
in the case of call_rcu().
5. If any of call_rcu(), call_srcu(), call_rcu_tasks(), or
call_rcu_tasks_trace() is used, the callback function may be
invoked from softirq context, and in any case with bottom halves
disabled. In particular, this callback function cannot block.
If you need the callback to block, run that code in a workqueue
handler scheduled from the callback. The queue_rcu_work()
function does this for you in the case of call_rcu().
6. Since synchronize_rcu() can block, it cannot be called
from any sort of irq context. The same rule applies
@ -254,10 +253,10 @@ over a rather long period of time, but improvements are always welcome!
corresponding readers must use rcu_read_lock_trace()
and rcu_read_unlock_trace().
c. If an updater uses call_rcu_tasks_rude() or
synchronize_rcu_tasks_rude(), then the corresponding
readers must use anything that disables preemption,
for example, preempt_disable() and preempt_enable().
c. If an updater uses synchronize_rcu_tasks_rude(),
then the corresponding readers must use anything that
disables preemption, for example, preempt_disable()
and preempt_enable().
Mixing things up will result in confusion and broken kernels, and
has even resulted in an exploitable security issue. Therefore,
@ -326,11 +325,9 @@ over a rather long period of time, but improvements are always welcome!
d. Periodically invoke rcu_barrier(), permitting a limited
number of updates per grace period.
The same cautions apply to call_srcu(), call_rcu_tasks(),
call_rcu_tasks_rude(), and call_rcu_tasks_trace(). This is
why there is an srcu_barrier(), rcu_barrier_tasks(),
rcu_barrier_tasks_rude(), and rcu_barrier_tasks_rude(),
respectively.
The same cautions apply to call_srcu(), call_rcu_tasks(), and
call_rcu_tasks_trace(). This is why there is an srcu_barrier(),
rcu_barrier_tasks(), and rcu_barrier_tasks_trace(), respectively.
Note that although these primitives do take action to avoid
memory exhaustion when any given CPU has too many callbacks,
@ -383,17 +380,17 @@ over a rather long period of time, but improvements are always welcome!
must use whatever locking or other synchronization is required
to safely access and/or modify that data structure.
Do not assume that RCU callbacks will be executed on
the same CPU that executed the corresponding call_rcu(),
call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(), or
call_rcu_tasks_trace(). For example, if a given CPU goes offline
while having an RCU callback pending, then that RCU callback
will execute on some surviving CPU. (If this was not the case,
a self-spawning RCU callback would prevent the victim CPU from
ever going offline.) Furthermore, CPUs designated by rcu_nocbs=
might well *always* have their RCU callbacks executed on some
other CPUs, in fact, for some real-time workloads, this is the
whole point of using the rcu_nocbs= kernel boot parameter.
Do not assume that RCU callbacks will be executed on the same
CPU that executed the corresponding call_rcu(), call_srcu(),
call_rcu_tasks(), or call_rcu_tasks_trace(). For example, if
a given CPU goes offline while having an RCU callback pending,
then that RCU callback will execute on some surviving CPU.
(If this was not the case, a self-spawning RCU callback would
prevent the victim CPU from ever going offline.) Furthermore,
CPUs designated by rcu_nocbs= might well *always* have their
RCU callbacks executed on some other CPUs, in fact, for some
real-time workloads, this is the whole point of using the
rcu_nocbs= kernel boot parameter.
In addition, do not assume that callbacks queued in a given order
will be invoked in that order, even if they all are queued on the
@ -507,9 +504,9 @@ over a rather long period of time, but improvements are always welcome!
These debugging aids can help you find problems that are
otherwise extremely difficult to spot.
17. If you pass a callback function defined within a module to one of
call_rcu(), call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(),
or call_rcu_tasks_trace(), then it is necessary to wait for all
17. If you pass a callback function defined within a module
to one of call_rcu(), call_srcu(), call_rcu_tasks(), or
call_rcu_tasks_trace(), then it is necessary to wait for all
pending callbacks to be invoked before unloading that module.
Note that it is absolutely *not* sufficient to wait for a grace
period! For example, synchronize_rcu() implementation is *not*
@ -522,7 +519,6 @@ over a rather long period of time, but improvements are always welcome!
- call_rcu() -> rcu_barrier()
- call_srcu() -> srcu_barrier()
- call_rcu_tasks() -> rcu_barrier_tasks()
- call_rcu_tasks_rude() -> rcu_barrier_tasks_rude()
- call_rcu_tasks_trace() -> rcu_barrier_tasks_trace()
However, these barrier functions are absolutely *not* guaranteed
@ -539,7 +535,6 @@ over a rather long period of time, but improvements are always welcome!
- Either synchronize_srcu() or synchronize_srcu_expedited(),
together with and srcu_barrier()
- synchronize_rcu_tasks() and rcu_barrier_tasks()
- synchronize_tasks_rude() and rcu_barrier_tasks_rude()
- synchronize_tasks_trace() and rcu_barrier_tasks_trace()
If necessary, you can use something like workqueues to execute

View File

@ -249,7 +249,7 @@ ticks this GP)" indicates that this CPU has not taken any scheduling-clock
interrupts during the current stalled grace period.
The "idle=" portion of the message prints the dyntick-idle state.
The hex number before the first "/" is the low-order 12 bits of the
The hex number before the first "/" is the low-order 16 bits of the
dynticks counter, which will have an even-numbered value if the CPU
is in dyntick-idle mode and an odd-numbered value otherwise. The hex
number between the two "/"s is the value of the nesting, which will be

View File

@ -1103,7 +1103,7 @@ RCU-Tasks-Rude::
Critical sections Grace period Barrier
N/A call_rcu_tasks_rude rcu_barrier_tasks_rude
N/A N/A
synchronize_rcu_tasks_rude

View File

@ -93,7 +93,7 @@ commands (does not impact QAIC).
uAPI
====
QAIC creates an accel device per phsyical PCIe device. This accel device exists
QAIC creates an accel device per physical PCIe device. This accel device exists
for as long as the PCIe device is known to Linux.
The PCIe device may not be in the state to accept requests from userspace at
@ -147,12 +147,6 @@ DRM_IOCTL_QAIC_PERF_STATS_BO
recent execution of a BO. This allows userspace to construct an end to end
timeline of the BO processing for a performance analysis.
DRM_IOCTL_QAIC_PART_DEV
This IOCTL allows userspace to request a duplicate "shadow device". This extra
accelN device is associated with a specific partition of resources on the
AIC100 device and can be used for limiting a process to some subset of
resources.
DRM_IOCTL_QAIC_DETACH_SLICE_BO
This IOCTL allows userspace to remove the slicing information from a BO that
was originally provided by a call to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. This

View File

@ -223,7 +223,10 @@ are signed through the PKCS#7 message format to enforce some level of
authorization of the policies (prohibiting an attacker from gaining
unconstrained root, and deploying an "allow all" policy). These
policies must be signed by a certificate that chains to the
``SYSTEM_TRUSTED_KEYRING``. With openssl, the policy can be signed by::
``SYSTEM_TRUSTED_KEYRING``, or to the secondary and/or platform keyrings if
``CONFIG_IPE_POLICY_SIG_SECONDARY_KEYRING`` and/or
``CONFIG_IPE_POLICY_SIG_PLATFORM_KEYRING`` are enabled, respectively.
With openssl, the policy can be signed by::
openssl smime -sign \
-in "$MY_POLICY" \
@ -266,7 +269,7 @@ in the kernel. This file is write-only and accepts a PKCS#7 signed
policy. Two checks will always be performed on this policy: First, the
``policy_names`` must match with the updated version and the existing
version. Second the updated policy must have a policy version greater than
or equal to the currently-running version. This is to prevent rollback attacks.
the currently-running version. This is to prevent rollback attacks.
The ``delete`` file is used to remove a policy that is no longer needed.
This file is write-only and accepts a value of ``1`` to delete the policy.

View File

@ -102,17 +102,41 @@ Examples::
#select lzo compression algorithm
echo lzo > /sys/block/zram0/comp_algorithm
For the time being, the `comp_algorithm` content does not necessarily
show every compression algorithm supported by the kernel. We keep this
list primarily to simplify device configuration and one can configure
a new device with a compression algorithm that is not listed in
`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API
and, if some of the algorithms were built as modules, it's impossible
to list all of them using, for instance, /proc/crypto or any other
method. This, however, has an advantage of permitting the usage of
custom crypto compression modules (implementing S/W or H/W compression).
For the time being, the `comp_algorithm` content shows only compression
algorithms that are supported by zram.
4) Set Disksize
4) Set compression algorithm parameters: Optional
=================================================
Compression algorithms may support specific parameters which can be
tweaked for particular dataset. ZRAM has an `algorithm_params` device
attribute which provides a per-algorithm params configuration.
For example, several compression algorithms support `level` parameter.
In addition, certain compression algorithms support pre-trained dictionaries,
which significantly change algorithms' characteristics. In order to configure
compression algorithm to use external pre-trained dictionary, pass full
path to the `dict` along with other parameters::
#pass path to pre-trained zstd dictionary
echo "algo=zstd dict=/etc/dictioary" > /sys/block/zram0/algorithm_params
#same, but using algorithm priority
echo "priority=1 dict=/etc/dictioary" > \
/sys/block/zram0/algorithm_params
#pass path to pre-trained zstd dictionary and compression level
echo "algo=zstd level=8 dict=/etc/dictioary" > \
/sys/block/zram0/algorithm_params
Parameters are algorithm specific: not all algorithms support pre-trained
dictionaries, not all algorithms support `level`. Furthermore, for certain
algorithms `level` controls the compression level (the higher the value the
better the compression ratio, it even can take negatives values for some
algorithms), for other algorithms `level` is acceleration level (the higher
the value the lower the compression ratio).
5) Set Disksize
===============
Set disk size by writing the value to sysfs node 'disksize'.
@ -132,7 +156,7 @@ There is little point creating a zram of greater than twice the size of memory
since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
size of the disk when not in use so a huge zram is wasteful.
5) Set memory limit: Optional
6) Set memory limit: Optional
=============================
Set memory limit by writing the value to sysfs node 'mem_limit'.
@ -151,7 +175,7 @@ Examples::
# To disable memory limit
echo 0 > /sys/block/zram0/mem_limit
6) Activate
7) Activate
===========
::
@ -162,7 +186,7 @@ Examples::
mkfs.ext4 /dev/zram1
mount /dev/zram1 /tmp
7) Add/remove zram devices
8) Add/remove zram devices
==========================
zram provides a control interface, which enables dynamic (on-demand) device
@ -182,7 +206,7 @@ execute::
echo X > /sys/class/zram-control/hot_remove
8) Stats
9) Stats
========
Per-device statistics are exported as various nodes under /sys/block/zram<id>/
@ -205,6 +229,7 @@ writeback_limit_enable RW show and set writeback_limit feature
max_comp_streams RW the number of possible concurrent compress
operations
comp_algorithm RW show and change the compression algorithm
algorithm_params WO setup compression algorithm parameters
compact WO trigger memory compaction
debug_stat RO this file is used for zram debugging purposes
backing_dev RW set up backend storage for zram to write out
@ -283,15 +308,15 @@ a single line of text and contains the following stats separated by whitespace:
Unit: 4K bytes
============== =============================================================
9) Deactivate
=============
10) Deactivate
==============
::
swapoff /dev/zram0
umount /dev/zram1
10) Reset
11) Reset
=========
Write any positive value to 'reset' sysfs node::
@ -487,11 +512,14 @@ registered compression algorithms, increases our chances of finding the
algorithm that successfully compresses a particular page. Sometimes, however,
it is convenient (and sometimes even necessary) to limit recompression to
only one particular algorithm so that it will not try any other algorithms.
This can be achieved by providing a algo=NAME parameter:::
This can be achieved by providing a `algo` or `priority` parameter:::
#use zstd algorithm only (if registered)
echo "type=huge algo=zstd" > /sys/block/zramX/recompress
#use zstd algorithm only (if zstd was registered under priority 1)
echo "type=huge priority=1" > /sys/block/zramX/recompress
memory tracking
===============

View File

@ -1,76 +1,165 @@
Bisecting a bug
+++++++++++++++
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
.. [see the bottom of this file for redistribution information]
Last updated: 28 October 2016
======================
Bisecting a regression
======================
Introduction
============
This document describes how to use a ``git bisect`` to find the source code
change that broke something -- for example when some functionality stopped
working after upgrading from Linux 6.0 to 6.1.
Always try the latest kernel from kernel.org and build from source. If you are
not confident in doing that please report the bug to your distribution vendor
instead of to a kernel developer.
The text focuses on the gist of the process. If you are new to bisecting the
kernel, better follow Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
instead: it depicts everything from start to finish while covering multiple
aspects even kernel developers occasionally forget. This includes detecting
situations early where a bisection would be a waste of time, as nobody would
care about the result -- for example, because the problem happens after the
kernel marked itself as 'tainted', occurs in an abandoned version, was already
fixed, or is caused by a .config change you or your Linux distributor performed.
Finding bugs is not always easy. Have a go though. If you can't find it don't
give up. Report as much as you have found to the relevant maintainer. See
MAINTAINERS for who that is for the subsystem you have worked on.
Finding the change causing a kernel issue using a bisection
===========================================================
Before you submit a bug report read
'Documentation/admin-guide/reporting-issues.rst'.
*Note: the following process assumes you prepared everything for a bisection.
This includes having a Git clone with the appropriate sources, installing the
software required to build and install kernels, as well as a .config file stored
in a safe place (the following example assumes '~/prepared_kernel_.config') to
use as pristine base at each bisection step; ideally, you have also worked out
a fully reliable and straight-forward way to reproduce the regression, too.*
Devices not appearing
=====================
* Preparation: start the bisection and tell Git about the points in the history
you consider to be working and broken, which Git calls 'good' and 'bad'::
Often this is caused by udev/systemd. Check that first before blaming it
on the kernel.
git bisect start
git bisect good v6.0
git bisect bad v6.1
Finding patch that caused a bug
===============================
Instead of Git tags like 'v6.0' and 'v6.1' you can specify commit-ids, too.
Using the provided tools with ``git`` makes finding bugs easy provided the bug
is reproducible.
1. Copy your prepared .config into the build directory and adjust it to the
needs of the codebase Git checked out for testing::
Steps to do it:
cp ~/prepared_kernel_.config .config
make olddefconfig
- build the Kernel from its git source
- start bisect with [#f1]_::
2. Now build, install, and boot a kernel. This might fail for unrelated reasons,
for example, when a compile error happens at the current stage of the
bisection a later change resolves. In such cases run ``git bisect skip`` and
go back to step 1.
$ git bisect start
3. Check if the functionality that regressed works in the kernel you just built.
- mark the broken changeset with::
If it works, execute::
$ git bisect bad [commit]
git bisect good
- mark a changeset where the code is known to work with::
If it is broken, run::
$ git bisect good [commit]
git bisect bad
- rebuild the Kernel and test
- interact with git bisect by using either::
Note, getting this wrong just once will send the rest of the bisection
totally off course. To prevent having to start anew later you thus want to
ensure what you tell Git is correct; it is thus often wise to spend a few
minutes more on testing in case your reproducer is unreliable.
$ git bisect good
After issuing one of these two commands, Git will usually check out another
bisection point and print something like 'Bisecting: 675 revisions left to
test after this (roughly 10 steps)'. In that case go back to step 1.
or::
If Git instead prints something like 'cafecaca0c0dacafecaca0c0dacafecaca0c0da
is the first bad commit', then you have finished the bisection. In that case
move to the next point below. Note, right after displaying that line Git will
show some details about the culprit including its patch description; this can
easily fill your terminal, so you might need to scroll up to see the message
mentioning the culprit's commit-id.
$ git bisect bad
In case you missed Git's output, you can always run ``git bisect log`` to
print the status: it will show how many steps remain or mention the result of
the bisection.
depending if the bug happened on the changeset you're testing
- After some interactions, git bisect will give you the changeset that
likely caused the bug.
* Recommended complementary task: put the bisection log and the current .config
file aside for the bug report; furthermore tell Git to reset the sources to
the state before the bisection::
- For example, if you know that the current version is bad, and version
4.8 is good, you could do::
git bisect log > ~/bisection-log
cp .config ~/bisection-config-culprit
git bisect reset
$ git bisect start
$ git bisect bad # Current version is bad
$ git bisect good v4.8
* Recommended optional task: try reverting the culprit on top of the latest
codebase and check if that fixes your bug; if that is the case, it validates
the bisection and enables developers to resolve the regression through a
revert.
To try this, update your clone and check out latest mainline. Then tell Git
to revert the change by specifying its commit-id::
.. [#f1] You can, optionally, provide both good and bad arguments at git
start with ``git bisect start [BAD] [GOOD]``
git revert --no-edit cafec0cacaca0
For further references, please read:
Git might reject this, for example when the bisection landed on a merge
commit. In that case, abandon the attempt. Do the same, if Git fails to revert
the culprit on its own because later changes depend on it -- at least unless
you bisected a stable or longterm kernel series, in which case you want to
check out its latest codebase and try a revert there.
- The man page for ``git-bisect``
- `Fighting regressions with git bisect <https://www.kernel.org/pub/software/scm/git/docs/git-bisect-lk2009.html>`_
- `Fully automated bisecting with "git bisect run" <https://lwn.net/Articles/317154>`_
- `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_
If a revert succeeds, build and test another kernel to check if reverting
resolved your regression.
With that the process is complete. Now report the regression as described by
Documentation/admin-guide/reporting-issues.rst.
Bisecting linux-next
--------------------
If you face a problem only happening in linux-next, bisect between the
linux-next branches 'stable' and 'master'. The following commands will start
the process for a linux-next tree you added as a remote called 'next'::
git bisect start
git bisect good next/stable
git bisect bad next/master
The 'stable' branch refers to the state of linux-mainline that the current
linux-next release (found in the 'master' branch) is based on -- the former
thus should be free of any problems that show up in -next, but not in Linus'
tree.
This will bisect across a wide range of changes, some of which you might have
used in earlier linux-next releases without problems. Sadly there is no simple
way to avoid checking them: bisecting from one linux-next release to a later
one (say between 'next-20241020' and 'next-20241021') is impossible, as they
share no common history.
Additional reading material
---------------------------
* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
`fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
in the Git documentation.
* `Working with git bisect <https://nathanchance.dev/posts/working-with-git-bisect/>`_
from kernel developer Nathan Chancellor.
* `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_.
* `Fully automated bisecting with 'git bisect run' <https://lwn.net/Articles/317154>`_.
..
end-of-content
..
This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
you spot a typo or small mistake, feel free to let him know directly and
he'll fix it. You are free to do the same in a mostly informal way if you
want to contribute changes to the text -- but for copyright reasons please CC
linux-doc@vger.kernel.org and 'sign-off' your contribution as
Documentation/process/submitting-patches.rst explains in the section 'Sign
your work - the Developer's Certificate of Origin'.
..
This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
of the file. If you want to distribute this text under CC-BY-4.0 only,
please use 'The Linux kernel development community' for author attribution
and link this as source:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/bug-bisect.rst
..
Note: Only the content of this RST file as found in the Linux kernel sources
is available under CC-BY-4.0, as versions of this text that were processed
(for example by the kernel's build system) might contain content taken from
files which use a more restrictive license.

View File

@ -244,14 +244,14 @@ Reporting the bug
Once you find where the bug happened, by inspecting its location,
you could either try to fix it yourself or report it upstream.
In order to report it upstream, you should identify the mailing list
used for the development of the affected code. This can be done by using
the ``get_maintainer.pl`` script.
In order to report it upstream, you should identify the bug tracker, if any, or
mailing list used for the development of the affected code. This can be done by
using the ``get_maintainer.pl`` script.
For example, if you find a bug at the gspca's sonixj.c file, you can get
its maintainers with::
$ ./scripts/get_maintainer.pl -f drivers/media/usb/gspca/sonixj.c
$ ./scripts/get_maintainer.pl --bug -f drivers/media/usb/gspca/sonixj.c
Hans Verkuil <hverkuil@xs4all.nl> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
Mauro Carvalho Chehab <mchehab@kernel.org> (maintainer:MEDIA INPUT INFRASTRUCTURE (V4L/DVB),commit_signer:1/1=100%)
Tejun Heo <tj@kernel.org> (commit_signer:1/1=100%)
@ -267,11 +267,12 @@ Please notice that it will point to:
- The driver maintainer (Hans Verkuil);
- The subsystem maintainer (Mauro Carvalho Chehab);
- The driver and/or subsystem mailing list (linux-media@vger.kernel.org);
- the Linux Kernel mailing list (linux-kernel@vger.kernel.org).
- The Linux Kernel mailing list (linux-kernel@vger.kernel.org);
- The bug reporting URIs for the driver/subsystem (none in the above example).
Usually, the fastest way to have your bug fixed is to report it to mailing
list used for the development of the code (linux-media ML) copying the
driver maintainer (Hans).
If the listing contains bug reporting URIs at the end, please prefer them over
email. Otherwise, please report bugs to the mailing list used for the
development of the code (linux-media ML) copying the driver maintainer (Hans).
If you are totally stumped as to whom to send the report, and
``get_maintainer.pl`` didn't provide you anything useful, send it to

View File

@ -78,18 +78,24 @@ Brief summary of control files.
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
memory.soft_limit_in_bytes set/show soft limit of memory usage
This knob is not available on CONFIG_PREEMPT_RT systems.
This knob is deprecated and shouldn't be
used.
memory.stat show various statistics
memory.use_hierarchy set/show hierarchical account enabled
This knob is deprecated and shouldn't be
used.
memory.force_empty trigger forced page reclaim
memory.pressure_level set memory pressure notifications
This knob is deprecated and shouldn't be
used.
memory.swappiness set/show swappiness parameter of vmscan
(See sysctl's vm.swappiness)
memory.move_charge_at_immigrate set/show controls of moving charges
This knob is deprecated and shouldn't be
used.
memory.oom_control set/show oom controls.
This knob is deprecated and shouldn't be
used.
memory.numa_stat show the number of memory usage per numa
node
memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel
@ -105,10 +111,18 @@ Brief summary of control files.
memory.kmem.max_usage_in_bytes show max kernel memory usage recorded
memory.kmem.tcp.limit_in_bytes set/show hard limit for tcp buf memory
This knob is deprecated and shouldn't be
used.
memory.kmem.tcp.usage_in_bytes show current tcp buf memory allocation
This knob is deprecated and shouldn't be
used.
memory.kmem.tcp.failcnt show the number of tcp buf memory usage
hits limits
This knob is deprecated and shouldn't be
used.
memory.kmem.tcp.max_usage_in_bytes show max tcp buf memory usage recorded
This knob is deprecated and shouldn't be
used.
==================================== ==========================================
1. History
@ -693,8 +707,10 @@ For compatibility reasons writing 1 to memory.use_hierarchy will always pass::
# echo 1 > memory.use_hierarchy
7. Soft limits
==============
7. Soft limits (DEPRECATED)
===========================
THIS IS DEPRECATED!
Soft limits allow for greater sharing of memory. The idea behind soft limits
is to allow control groups to use as much of the memory as needed, provided
@ -834,8 +850,10 @@ It's applicable for root and non-root cgroup.
.. _cgroup-v1-memory-oom-control:
10. OOM Control
===============
10. OOM Control (DEPRECATED)
============================
THIS IS DEPRECATED!
memory.oom_control file is for OOM notification and other controls.
@ -882,8 +900,10 @@ At reading, current status of OOM is shown.
The number of processes belonging to this cgroup killed by any
kind of OOM killer.
11. Memory Pressure
===================
11. Memory Pressure (DEPRECATED)
================================
THIS IS DEPRECATED!
The pressure level notifications can be used to monitor the memory
allocation cost; based on the pressure, applications can implement

View File

@ -533,10 +533,12 @@ cgroup namespace on namespace creation.
Because the resource control interface files in a given directory
control the distribution of the parent's resources, the delegatee
shouldn't be allowed to write to them. For the first method, this is
achieved by not granting access to these files. For the second, the
kernel rejects writes to all files other than "cgroup.procs" and
"cgroup.subtree_control" on a namespace root from inside the
namespace.
achieved by not granting access to these files. For the second, files
outside the namespace should be hidden from the delegatee by the means
of at least mount namespacing, and the kernel rejects writes to all
files on a namespace root from inside the cgroup namespace, except for
those files listed in "/sys/kernel/cgroup/delegate" (including
"cgroup.procs", "cgroup.threads", "cgroup.subtree_control", etc.).
The end results are equivalent for both delegation types. Once
delegated, the user can build sub-hierarchy under the directory,
@ -981,6 +983,14 @@ All cgroup core files are prefixed with "cgroup."
A dying cgroup can consume system resources not exceeding
limits, which were active at the moment of cgroup deletion.
nr_subsys_<cgroup_subsys>
Total number of live cgroup subsystems (e.g memory
cgroup) at and beneath the current cgroup.
nr_dying_subsys_<cgroup_subsys>
Total number of dying cgroup subsystems (e.g. memory
cgroup) at and beneath the current cgroup.
cgroup.freeze
A read-write single value file which exists on non-root cgroups.
Allowed values are "0" and "1". The default is "0".
@ -1333,11 +1343,14 @@ The following nested keys are defined.
all the existing limitations and potential future extensions.
memory.peak
A read-only single value file which exists on non-root
cgroups.
A read-write single value file which exists on non-root cgroups.
The max memory usage recorded for the cgroup and its
descendants since the creation of the cgroup.
The max memory usage recorded for the cgroup and its descendants since
either the creation of the cgroup or the most recent reset for that FD.
A write of any non-empty string to this file resets it to the
current memory usage for subsequent reads through the same
file descriptor.
memory.oom.group
A read-write single value file which exists on non-root
@ -1586,6 +1599,15 @@ The following nested keys are defined.
pglazyfreed (npn)
Amount of reclaimed lazyfree pages
swpin_zero
Number of pages swapped into memory and filled with zero, where I/O
was optimized out because the page content was detected to be zero
during swapout.
swpout_zero
Number of zero-filled pages swapped out with I/O skipped due to the
content being detected as zero.
zswpin
Number of pages moved in to memory from zswap.
@ -1614,6 +1636,25 @@ The following nested keys are defined.
Usually because failed to allocate some continuous swap space
for the huge page.
numa_pages_migrated (npn)
Number of pages migrated by NUMA balancing.
numa_pte_updates (npn)
Number of pages whose page table entries are modified by
NUMA balancing to produce NUMA hinting faults on access.
numa_hint_faults (npn)
Number of NUMA hinting faults.
pgdemote_kswapd
Number of pages demoted by kswapd.
pgdemote_direct
Number of pages demoted directly.
pgdemote_khugepaged
Number of pages demoted by khugepaged.
memory.numa_stat
A read-only nested-keyed file which exists on non-root cgroups.
@ -1663,11 +1704,14 @@ The following nested keys are defined.
Healthy workloads are not expected to reach this limit.
memory.swap.peak
A read-only single value file which exists on non-root
cgroups.
A read-write single value file which exists on non-root cgroups.
The max swap usage recorded for the cgroup and its
descendants since the creation of the cgroup.
The max swap usage recorded for the cgroup and its descendants since
the creation of the cgroup or the most recent reset for that FD.
A write of any non-empty string to this file resets it to the
current memory usage for subsequent reads through the same
file descriptor.
memory.swap.max
A read-write single value file which exists on non-root
@ -1731,6 +1775,8 @@ The following nested keys are defined.
Note that this is subtly different from setting memory.swap.max to
0, as it still allows for pages to be written to the zswap pool.
This setting has no effect if zswap is disabled, and swapping
is allowed unless memory.swap.max is set to 0.
memory.pressure
A read-only nested-keyed file.
@ -2908,7 +2954,7 @@ following two functions.
a queue (device) has been associated with the bio and
before submission.
wbc_account_cgroup_owner(@wbc, @page, @bytes)
wbc_account_cgroup_owner(@wbc, @folio, @bytes)
Should be called for each data segment being written out.
While this function doesn't care exactly when it's called
during the writeback session, it's the easiest and most
@ -2940,8 +2986,8 @@ Deprecated v1 Core Features
- "cgroup.clone_children" is removed.
- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" file
at the root instead.
- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" or
"cgroup.stat" files at the root instead.
Issues with v1 and Rationales for v2

View File

@ -3,29 +3,52 @@ dm-delay
========
Device-Mapper's "delay" target delays reads and/or writes
and maps them to different devices.
and/or flushs and optionally maps them to different devices.
Parameters::
Arguments::
<device> <offset> <delay> [<write_device> <write_offset> <write_delay>
[<flush_device> <flush_offset> <flush_delay>]]
With separate write parameters, the first set is only used for reads.
Table line has to either have 3, 6 or 9 arguments:
3: apply offset and delay to read, write and flush operations on device
6: apply offset and delay to device, also apply write_offset and write_delay
to write and flush operations on optionally different write_device with
optionally different sector offset
9: same as 6 arguments plus define flush_offset and flush_delay explicitely
on/with optionally different flush_device/flush_offset.
Offsets are specified in sectors.
Delays are specified in milliseconds.
Example scripts
===============
::
#!/bin/sh
# Create device delaying rw operation for 500ms
echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
#
# Create mapped device named "delayed" delaying read, write and flush operations for 500ms.
#
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 500"
::
#!/bin/sh
# Create device delaying only write operation for 500ms and
# splitting reads and writes to different devices $1 $2
echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
#
# Create mapped device delaying write and flush operations for 400ms and
# splitting reads to device $1 but writes and flushs to different device $2
# to different offsets of 2048 and 4096 sectors respectively.
#
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 2048 0 $2 4096 400"
::
#!/bin/sh
#
# Create mapped device delaying reads for 50ms, writes for 100ms and flushs for 333ms
# onto the same backing device at offset 0 sectors.
#
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 50 $2 0 100 $1 0 333"

View File

@ -160,15 +160,24 @@ iv_large_sectors
The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
if this flag is specified.
integrity_key_size:<bytes>
Use an integrity key of <bytes> size instead of using an integrity key size
of the digest size of the used HMAC algorithm.
Module parameters::
max_read_size
max_write_size
Maximum size of read or write requests. When a request larger than this size
Maximum size of read requests. When a request larger than this size
is received, dm-crypt will split the request. The splitting improves
concurrency (the split requests could be encrypted in parallel by multiple
cores), but it also causes overhead. The user should tune these parameters to
cores), but it also causes overhead. The user should tune this parameters to
fit the actual workload.
max_write_size
Maximum size of write requests. When a request larger than this size
is received, dm-crypt will split the request. The splitting improves
concurrency (the split requests could be encrypted in parallel by multiple
cores), but it also causes overhead. The user should tune this parameters to
fit the actual workload.

View File

@ -251,7 +251,12 @@ The messages are:
by the vdostats userspace program to interpret the output
buffer.
dump:
config:
Outputs useful vdo configuration information. Mostly used
by users who want to recreate a similar VDO volume and
want to know the creation configuration used.
dump:
Dumps many internal structures to the system log. This is
not always safe to run, so it should only be used to debug
a hung vdo. Optional parameters to specify structures to

View File

@ -212,16 +212,6 @@ When mounting an ext4 filesystem, the following option are accepted:
that ext4's inode table readahead algorithm will pre-read into the
buffer cache. The default value is 32 blocks.
nouser_xattr
Disables Extended User Attributes. See the attr(5) manual page for
more information about extended attributes.
noacl
This option disables POSIX Access Control List support. If ACL support
is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL
is enabled by default on mount. See the acl(5) manual page for more
information about acl.
bsddf (*)
Make 'df' act like BSD.

View File

@ -27,6 +27,16 @@ kernel command line (/proc/cmdline) and collects module parameters
when it loads a module, so the kernel command line can be used for
loadable modules too.
This document may not be entirely up to date and comprehensive. The command
"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
module. Loadable modules, after being loaded into the running kernel, also
reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
parameters may be changed at runtime by the command
``echo -n ${value} > /sys/module/${modulename}/parameters/${parm}``.
Special handling
----------------
Hyphens (dashes) and underscores are equivalent in parameter names, so::
log_buf_len=1M print-fatal-signals=1
@ -39,8 +49,8 @@ Double-quotes can be used to protect spaces in values, e.g.::
param="spaces in here"
cpu lists:
----------
cpu lists
~~~~~~~~~
Some kernel parameters take a list of CPUs as a value, e.g. isolcpus,
nohz_full, irqaffinity, rcu_nocbs. The format of this list is:
@ -82,12 +92,17 @@ so that "nohz_full=all" is the equivalent of "nohz_full=0-N".
The semantics of "N" and "all" is supported on a level of bitmaps and holds for
all users of bitmap_parselist().
This document may not be entirely up to date and comprehensive. The command
"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
module. Loadable modules, after being loaded into the running kernel, also
reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
parameters may be changed at runtime by the command
``echo -n ${value} > /sys/module/${modulename}/parameters/${parm}``.
Metric suffixes
~~~~~~~~~~~~~~~
The [KMG] suffix is commonly described after a number of kernel
parameter values. 'K', 'M', 'G', 'T', 'P', and 'E' suffixes are allowed.
These letters represent the _binary_ multipliers 'Kilo', 'Mega', 'Giga',
'Tera', 'Peta', and 'Exa', equaling 2^10, 2^20, 2^30, 2^40, 2^50, and
2^60 bytes respectively. Such letter suffixes can also be entirely omitted.
Kernel Build Options
--------------------
The parameters listed below are only valid if certain kernel build options
were enabled and if respective hardware is present. This list should be kept
@ -211,10 +226,5 @@ a fixed number of characters. This limit depends on the architecture
and is between 256 and 4096 characters. It is defined in the file
./include/uapi/asm-generic/setup.h as COMMAND_LINE_SIZE.
Finally, the [KMG] suffix is commonly described after a number of kernel
parameter values. These 'K', 'M', and 'G' letters represent the _binary_
multipliers 'Kilo', 'Mega', and 'Giga', equaling 2^10, 2^20, and 2^30
bytes respectively. Such letter suffixes can also be entirely omitted:
.. include:: kernel-parameters.txt
:literal:

View File

@ -333,12 +333,17 @@
allowed anymore to lift isolation
requirements as needed. This option
does not override iommu=pt
force_enable - Force enable the IOMMU on platforms known
to be buggy with IOMMU enabled. Use this
option with care.
pgtbl_v1 - Use v1 page table for DMA-API (Default).
pgtbl_v2 - Use v2 page table for DMA-API.
irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
force_enable - Force enable the IOMMU on platforms known
to be buggy with IOMMU enabled. Use this
option with care.
pgtbl_v1 - Use v1 page table for DMA-API (Default).
pgtbl_v2 - Use v2 page table for DMA-API.
irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
nohugepages - Limit page-sizes used for v1 page-tables
to 4 KiB.
v2_pgsizes_only - Limit page-sizes used for v1 page-tables
to 4KiB/2Mib/1GiB.
amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table
@ -441,6 +446,9 @@
arm64.nobti [ARM64] Unconditionally disable Branch Target
Identification support
arm64.nogcs [ARM64] Unconditionally disable Guarded Control Stack
support
arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory
Set instructions support
@ -517,6 +525,18 @@
Format: <io>,<irq>,<mode>
See header of drivers/net/hamradio/baycom_ser_hdx.c.
bdev_allow_write_mounted=
Format: <bool>
Control the ability to open a mounted block device
for writing, i.e., allow / disallow writes that bypass
the FS. This was implemented as a means to prevent
fuzzers from crashing the kernel by overwriting the
metadata underneath a mounted FS without its awareness.
This also prevents destructive formatting of mounted
filesystems by naive storage tooling that don't use
O_EXCL. Default is Y and can be changed through the
Kconfig option CONFIG_BLK_DEV_WRITE_MOUNTED.
bert_disable [ACPI]
Disable BERT OS support on buggy BIOSes.
@ -901,12 +921,16 @@
the parameter has no effect.
crash_kexec_post_notifiers
Run kdump after running panic-notifiers and dumping
kmsg. This only for the users who doubt kdump always
succeeds in any situation.
Note that this also increases risks of kdump failure,
because some panic notifiers can make the crashed
kernel more unstable.
Only jump to kdump kernel after running the panic
notifiers and dumping kmsg. This option increases
the risks of a kdump failure, since some panic
notifiers can make the crashed kernel more unstable.
In configurations where kdump may not be reliable,
running the panic notifiers could allow collecting
more data on dmesg, like stack traces from other CPUS
or extra data dumped by panic_print. Note that some
configurations enable this option unconditionally,
like Hyper-V, PowerPC (fadump) and AMD SEV-SNP.
crashkernel=size[KMG][@offset[KMG]]
[KNL,EARLY] Using kexec, Linux can switch to a 'crash kernel'
@ -2660,6 +2684,23 @@
Default is Y (on).
kvm.enable_virt_at_load=[KVM,ARM64,LOONGARCH,MIPS,RISCV,X86]
If enabled, KVM will enable virtualization in hardware
when KVM is loaded, and disable virtualization when KVM
is unloaded (if KVM is built as a module).
If disabled, KVM will dynamically enable and disable
virtualization on-demand when creating and destroying
VMs, i.e. on the 0=>1 and 1=>0 transitions of the
number of VMs.
Enabling virtualization at module lode avoids potential
latency for creation of the 0=>1 VM, as KVM serializes
virtualization enabling across all online CPUs. The
"cost" of enabling virtualization when KVM is loaded,
is that doing so may interfere with using out-of-tree
hypervisors that want to "own" virtualization hardware.
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
Default is false (don't support).
@ -4135,6 +4176,21 @@
Disable NUMA, Only set up a single NUMA node
spanning all memory.
numa=fake=<size>[MG]
[KNL, ARM64, RISCV, X86, EARLY]
If given as a memory unit, fills all system RAM with
nodes of size interleaved over physical nodes.
numa=fake=<N>
[KNL, ARM64, RISCV, X86, EARLY]
If given as an integer, fills all system RAM with N
fake nodes interleaved over physical nodes.
numa=fake=<N>U
[KNL, ARM64, RISCV, X86, EARLY]
If given as an integer followed by 'U', it will
divide each physical node into N emulated nodes.
numa_balancing= [KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
NUMA balancing.
Allowed values are enable and disable
@ -4957,6 +5013,10 @@
Set maximum number of finished RCU callbacks to
process in one batch.
rcutree.csd_lock_suppress_rcu_stall= [KNL]
Do only a one-line RCU CPU stall warning when
there is an ongoing too-long CSD-lock wait.
rcutree.do_rcu_barrier= [KNL]
Request a call to rcu_barrier(). This is
throttled so that userspace tests can safely
@ -5359,11 +5419,6 @@
Set time (jiffies) between CPU-hotplug operations,
or zero to disable CPU-hotplug testing.
rcutorture.read_exit= [KNL]
Set the number of read-then-exit kthreads used
to test the interaction of RCU updaters and
task-exit processing.
rcutorture.read_exit_burst= [KNL]
The number of times in a given read-then-exit
episode that a set of read-then-exit kthreads
@ -5373,6 +5428,14 @@
The delay, in seconds, between successive
read-then-exit testing episodes.
rcutorture.reader_flavor= [KNL]
A bit mask indicating which readers to use.
If there is more than one bit set, the readers
are entered from low-order bit up, and are
exited in the opposite order. For SRCU, the
0x1 bit is normal readers, 0x2 NMI-safe readers,
and 0x4 light-weight readers.
rcutorture.shuffle_interval= [KNL]
Set task-shuffle interval (s). Shuffling tasks
allows some CPUs to go into dyntick-idle mode
@ -5404,7 +5467,13 @@
Time to wait (s) after boot before inducing stall.
rcutorture.stall_cpu_irqsoff= [KNL]
Disable interrupts while stalling if set.
Disable interrupts while stalling if set, but only
on the first stall in the set.
rcutorture.stall_cpu_repeat= [KNL]
Number of times to repeat the stall sequence,
so that rcutorture.stall_cpu_repeat=3 will result
in four stall sequences.
rcutorture.stall_gp_kthread= [KNL]
Duration (s) of forced sleep within RCU
@ -5592,14 +5661,6 @@
of zero will disable batching. Batching is
always disabled for synchronize_rcu_tasks().
rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
Set timeout in milliseconds RCU Tasks
Rude asynchronous callback batching for
call_rcu_tasks_rude(). A negative value
will take the default. A value of zero will
disable batching. Batching is always disabled
for synchronize_rcu_tasks_rude().
rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
Set timeout in milliseconds RCU Tasks
Trace asynchronous callback batching for
@ -6636,6 +6697,15 @@
<deci-seconds>: poll all this frequency
0: no polling (default)
thp_anon= [KNL]
Format: <size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>
state is one of "always", "madvise", "never" or "inherit".
Control the default behavior of the system with respect
to anonymous transparent hugepages.
Can be used multiple times for multiple anon THP sizes.
See Documentation/admin-guide/mm/transhuge.rst for more
details.
threadirqs [KNL,EARLY]
Force threading of all interrupt handlers except those
marked explicitly IRQF_NO_THREAD.
@ -6667,6 +6737,15 @@
torture.verbose_sleep_duration= [KNL]
Duration of each verbose-printk() sleep in jiffies.
tpm.disable_pcr_integrity= [HW,TPM]
Do not protect PCR registers from unintended physical
access, or interposers in the bus by the means of
having an integrity protected session wrapped around
TPM2_PCR_Extend command. Consider this in a situation
where TPM is heavily utilized by IMA, thus protection
causing a major performance hit, and the space where
machines are deployed is by other means guarded.
tpm_suspend_pcr=[HW,TPM]
Format: integer pcr id
Specify that at suspend time, the tpm driver
@ -6765,6 +6844,57 @@
the same thing would happen if it was left off). The irq_handler_entry
event, and all events under the "initcall" system.
Flags can be added to the instance to modify its behavior when it is
created. The flags are separated by '^'.
The available flags are:
traceoff - Have the tracing instance tracing disabled after it is created.
traceprintk - Have trace_printk() write into this trace instance
(note, "printk" and "trace_printk" can also be used)
trace_instance=foo^traceoff^traceprintk,sched,irq
The flags must come before the defined events.
If memory has been reserved (see memmap for x86), the instance
can use that memory:
memmap=12M$0x284500000 trace_instance=boot_map@0x284500000:12M
The above will create a "boot_map" instance that uses the physical
memory at 0x284500000 that is 12Megs. The per CPU buffers of that
instance will be split up accordingly.
Alternatively, the memory can be reserved by the reserve_mem option:
reserve_mem=12M:4096:trace trace_instance=boot_map@trace
This will reserve 12 megabytes at boot up with a 4096 byte alignment
and place the ring buffer in this memory. Note that due to KASLR, the
memory may not be the same location each time, which will not preserve
the buffer content.
Also note that the layout of the ring buffer data may change between
kernel versions where the validator will fail and reset the ring buffer
if the layout is not the same as the previous kernel.
If the ring buffer is used for persistent bootups and has events enabled,
it is recommend to disable tracing so that events from a previous boot do not
mix with events of the current boot (unless you are debugging a random crash
at boot up).
reserve_mem=12M:4096:trace trace_instance=boot_map^traceoff^traceprintk@trace,sched,irq
Note, saving the trace buffer across reboots does require that the system
is set up to not wipe memory. For instance, CONFIG_RESET_ATTACK_MITIGATION
can force a memory reset on boot which will clear any trace that was stored.
This is just one of many ways that can clear memory. Make sure your system
keeps the content of memory across reboots before relying on this option.
See also Documentation/trace/debugging.rst
trace_options=[option-list]
[FTRACE] Enable or disable tracer options at boot.
The option-list is a comma delimited list of options
@ -7374,6 +7504,13 @@
it can be updated at runtime by writing to the
corresponding sysfs file.
workqueue.panic_on_stall=<uint>
Panic when workqueue stall is detected by
CONFIG_WQ_WATCHDOG. It sets the number times of the
stall to trigger panic.
The default is 0, which disables the panic on stall.
workqueue.cpu_intensive_thresh_us=
Per-cpu work items which run for longer than this
threshold are automatically considered CPU intensive

View File

@ -315,7 +315,7 @@ To reduce its OS jitter, do at least one of the following:
to do.
Name:
rcuop/%d and rcuos/%d
rcuop/%d, rcuos/%d, and rcuog/%d
Purpose:
Offload RCU callbacks from the corresponding CPU.

View File

@ -42,10 +42,14 @@ dongles):
``persistent_config``: by default this is off, but when set to 1 the driver
will store the current settings to the device's internal eeprom and restore
it the next time the device is connected to the USB port.
- RainShadow Tech. Note: this driver does not support the persistent_config
module option of the Pulse-Eight driver. The hardware supports it, but I
have no plans to add this feature. But I accept patches :-)
- Extron DA HD 4K PLUS HDMI Distribution Amplifier. See
:ref:`extron_da_hd_4k_plus` for more information.
Miscellaneous:
- vivid: emulates a CEC receiver and CEC transmitter.
@ -378,3 +382,86 @@ it later using ``--analyze-pin``.
You can also use this as a full-fledged CEC device by configuring it
using ``cec-ctl --tv -p0.0.0.0`` or ``cec-ctl --playback -p1.0.0.0``.
.. _extron_da_hd_4k_plus:
Extron DA HD 4K PLUS CEC Adapter driver
=======================================
This driver is for the Extron DA HD 4K PLUS series of HDMI Distribution
Amplifiers: https://www.extron.com/product/dahd4kplusseries
The 2, 4 and 6 port models are supported.
Firmware version 1.02.0001 or higher is required.
Note that older Extron hardware revisions have a problem with the CEC voltage,
which may mean that CEC will not work. This is fixed in hardware revisions
E34814 and up.
The CEC support has two modes: the first is a manual mode where userspace has
to manually control CEC for the HDMI Input and all HDMI Outputs. While this gives
full control, it is also complicated.
The second mode is an automatic mode, which is selected if the module option
``vendor_id`` is set. In that case the driver controls CEC and CEC messages
received in the input will be distributed to the outputs. It is still possible
to use the /dev/cecX devices to talk to the connected devices directly, but it is
the driver that configures everything and deals with things like Hotplug Detect
changes.
The driver also takes care of the EDIDs: /dev/videoX devices are created to
read the EDIDs and (for the HDMI Input port) to set the EDID.
By default userspace is responsible to set the EDID for the HDMI Input
according to the EDIDs of the connected displays. But if the ``manufacturer_name``
module option is set, then the driver will take care of setting the EDID
of the HDMI Input based on the supported resolutions of the connected displays.
Currently the driver only supports resolutions 1080p60 and 4kp60: if all connected
displays support 4kp60, then it will advertise 4kp60 on the HDMI input, otherwise
it will fall back to an EDID that just reports 1080p60.
The status of the Extron is reported in ``/sys/kernel/debug/cec/cecX/status``.
The extron-da-hd-4k-plus driver implements the following module options:
``debug``
---------
If set to 1, then all serial port traffic is shown.
``vendor_id``
-------------
The CEC Vendor ID to report to connected displays.
If set, then the driver will take care of distributing CEC messages received
on the input to the HDMI outputs. This is done for the following CEC messages:
- <Standby>
- <Image View On> and <Text View On>
- <Give Device Power Status>
- <Set System Audio Mode>
- <Request Current Latency>
If not set, then userspace is responsible for this, and it will have to
configure the CEC devices for HDMI Input and the HDMI Outputs manually.
``manufacturer_name``
---------------------
A three character manufacturer name that is used in the EDID for the HDMI
Input. If not set, then userspace is reponsible for configuring an EDID.
If set, then the driver will update the EDID automatically based on the
resolutions supported by the connected displays, and it will not be possible
anymore to manually set the EDID for the HDMI Input.
``hpd_never_low``
-----------------
If set, then the Hotplug Detect pin of the HDMI Input will always be high,
even if nothing is connected to the HDMI Outputs. If not set (the default)
then the Hotplug Detect pin of the HDMI input will go low if all the detected
Hotplug Detect pins of the HDMI Outputs are also low.
This option may be changed dynamically.

View File

@ -227,8 +227,13 @@ Common FPDL3/GMSL output parameters
open.*
**frame_rate** (RW):
Output video frame rate in frames per second. The default frame rate is
60Hz.
Output video signal frame rate limit in frames per second. Due to
the limited output pixel clock steps, the card can not always generate
a frame rate perfectly matching the value required by the connected display.
Using this parameter one can limit the frame rate by "crippling" the signal
so that the lines are not equal (the porches of the last line differ) but
the signal appears like having the exact frame rate to the connected display.
The default frame rate limit is 60Hz.
**hsync_polarity** (RW):
HSYNC signal polarity.
@ -253,33 +258,33 @@ Common FPDL3/GMSL output parameters
and there is a non-linear stepping between two consecutive allowed
frequencies. The driver finds the nearest allowed frequency to the given
value and sets it. When reading this property, you get the exact
frequency set by the driver. The default frequency is 70000kHz.
frequency set by the driver. The default frequency is 61150kHz.
*Note: This parameter can not be changed while the output v4l2 device is
open.*
**hsync_width** (RW):
Width of the HSYNC signal in pixels. The default value is 16.
Width of the HSYNC signal in pixels. The default value is 40.
**vsync_width** (RW):
Width of the VSYNC signal in video lines. The default value is 2.
Width of the VSYNC signal in video lines. The default value is 20.
**hback_porch** (RW):
Number of PCLK pulses between deassertion of the HSYNC signal and the first
valid pixel in the video line (marked by DE=1). The default value is 32.
valid pixel in the video line (marked by DE=1). The default value is 50.
**hfront_porch** (RW):
Number of PCLK pulses between the end of the last valid pixel in the video
line (marked by DE=1) and assertion of the HSYNC signal. The default value
is 32.
is 50.
**vback_porch** (RW):
Number of video lines between deassertion of the VSYNC signal and the video
line with the first valid pixel (marked by DE=1). The default value is 2.
line with the first valid pixel (marked by DE=1). The default value is 31.
**vfront_porch** (RW):
Number of video lines between the end of the last valid pixel line (marked
by DE=1) and assertion of the VSYNC signal. The default value is 2.
by DE=1) and assertion of the VSYNC signal. The default value is 30.
FPDL3 specific input parameters
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -114,11 +114,18 @@ to be applied to the hardware during a video stream, allowing userspace
to dynamically modify values such as black level, cross talk corrections
and others.
The buffer format is defined by struct :c:type:`rkisp1_params_cfg`, and
userspace should set
The ISP driver supports two different parameters configuration methods, the
`fixed parameters format` or the `extensible parameters format`.
When using the `fixed parameters` method the buffer format is defined by struct
:c:type:`rkisp1_params_cfg`, and userspace should set
:ref:`V4L2_META_FMT_RK_ISP1_PARAMS <v4l2-meta-fmt-rk-isp1-params>` as the
dataformat.
When using the `extensible parameters` method the buffer format is defined by
struct :c:type:`rkisp1_ext_params_cfg`, and userspace should set
:ref:`V4L2_META_FMT_RK_ISP1_EXT_PARAMS <v4l2-meta-fmt-rk-isp1-ext-params>` as
the dataformat.
Capturing Video Frames Example
==============================

View File

@ -1343,7 +1343,7 @@ Some Future Improvements
Just as a reminder and in no particular order:
- Add a virtual alsa driver to test audio
- Add virtual sub-devices and media controller support
- Add virtual sub-devices
- Some support for testing compressed video
- Add support to loop raw VBI output to raw VBI input
- Add support to loop teletext sliced VBI output to VBI input
@ -1358,4 +1358,4 @@ Just as a reminder and in no particular order:
- Make a thread for the RDS generation, that would help in particular for the
"Controls" RDS Rx I/O Mode as the read-only RDS controls could be updated
in real-time.
- Changing the EDID should cause hotplug detect emulation to happen.
- Changing the EDID doesn't wait 100 ms before setting the HPD signal.

View File

@ -7,7 +7,7 @@ Getting Started
This document briefly describes how you can use DAMON by demonstrating its
default user space tool. Please note that this document describes only a part
of its features for brevity. Please refer to the usage `doc
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_ of the tool for more
<https://github.com/damonitor/damo/blob/next/USAGE.md>`_ of the tool for more
details.
@ -26,7 +26,7 @@ User Space Tool
For the demonstration, we will use the default user space tool for DAMON,
called DAMON Operator (DAMO). It is available at
https://github.com/awslabs/damo. The examples below assume that ``damo`` is on
https://github.com/damonitor/damo. The examples below assume that ``damo`` is on
your ``$PATH``. It's not mandatory, though.
Because DAMO is using the sysfs interface (refer to :doc:`usage` for the

View File

@ -7,19 +7,19 @@ Detailed Usages
DAMON provides below interfaces for different users.
- *DAMON user space tool.*
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
`This <https://github.com/damonitor/damo>`_ is for privileged people such as
system administrators who want a just-working human-friendly interface.
Using this, users can use the DAMONs major features in a human-friendly way.
It may not be highly tuned for special cases, though. For more detail,
please refer to its `usage document
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
<https://github.com/damonitor/damo/blob/next/USAGE.md>`_.
- *sysfs interface.*
:ref:`This <sysfs_interface>` is for privileged user space programmers who
want more optimized use of DAMON. Using this, users can use DAMONs major
features by reading from and writing to special sysfs files. Therefore,
you can write and use your personalized DAMON sysfs wrapper programs that
reads/writes the sysfs files instead of you. The `DAMON user space tool
<https://github.com/awslabs/damo>`_ is one example of such programs.
<https://github.com/damonitor/damo>`_ is one example of such programs.
- *Kernel Space Programming Interface.*
:doc:`This </mm/damon/api>` is for kernel space programmers. Using this,
users can utilize every feature of DAMON most flexibly and efficiently by
@ -543,7 +543,7 @@ memory rate becomes larger than 60%, or lower than 30%". ::
# echo 300 > watermarks/low
Please note that it's highly recommended to use user space tools like `damo
<https://github.com/awslabs/damo>`_ rather than manually reading and writing
<https://github.com/damonitor/damo>`_ rather than manually reading and writing
the files as above. Above is only for an example.
.. _tracepoint:

View File

@ -294,8 +294,9 @@ The following files are currently defined:
``crash_hotplug`` read-only: when changes to the system memory map
occur due to hot un/plug of memory, this file contains
'1' if the kernel updates the kdump capture kernel memory
map itself (via elfcorehdr), or '0' if userspace must update
the kdump capture kernel memory map.
map itself (via elfcorehdr and other relevant kexec
segments), or '0' if userspace must update the kdump
capture kernel memory map.
Availability depends on the CONFIG_MEMORY_HOTPLUG kernel
configuration option.

View File

@ -202,6 +202,16 @@ PMD-mappable transparent hugepage::
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
All THPs at fault and collapse time will be added to _deferred_list,
and will therefore be split under memory presure if they are considered
"underused". A THP is underused if the number of zero-filled pages in
the THP is above max_ptes_none (see below). It is possible to disable
this behaviour by writing 0 to shrink_underused, and enable it by writing
1 to it::
echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused
echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused
khugepaged will be automatically started when PMD-sized THP is enabled
(either of the per-size anon control or the top-level control are set
to "always" or "madvise"), and it'll be automatically shutdown when
@ -284,13 +294,37 @@ that THP is shared. Exceeding the number would block the collapse::
A higher value may increase memory footprint for some workloads.
Boot parameter
==============
Boot parameters
===============
You can change the sysfs boot time defaults of Transparent Hugepage
Support by passing the parameter ``transparent_hugepage=always`` or
``transparent_hugepage=madvise`` or ``transparent_hugepage=never``
to the kernel command line.
You can change the sysfs boot time default for the top-level "enabled"
control by passing the parameter ``transparent_hugepage=always`` or
``transparent_hugepage=madvise`` or ``transparent_hugepage=never`` to the
kernel command line.
Alternatively, each supported anonymous THP size can be controlled by
passing ``thp_anon=<size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>``,
where ``<size>`` is the THP size (must be a power of 2 of PAGE_SIZE and
supported anonymous THP) and ``<state>`` is one of ``always``, ``madvise``,
``never`` or ``inherit``.
For example, the following will set 16K, 32K, 64K THP to ``always``,
set 128K, 512K to ``inherit``, set 256K to ``madvise`` and 1M, 2M
to ``never``::
thp_anon=16K-64K:always;128K,512K:inherit;256K:madvise;1M-2M:never
``thp_anon=`` may be specified multiple times to configure all THP sizes as
required. If ``thp_anon=`` is specified at least once, any anon THP sizes
not explicitly configured on the command line are implicitly set to
``never``.
``transparent_hugepage`` setting only affects the global toggle. If
``thp_anon`` is not specified, PMD_ORDER THP will default to ``inherit``.
However, if a valid ``thp_anon`` setting is provided by the user, the
PMD_ORDER THP policy will be overridden. If the policy for PMD_ORDER
is not defined within a valid ``thp_anon``, its policy will default to
``never``.
Hugepages in tmpfs/shmem
========================
@ -447,6 +481,12 @@ thp_deferred_split_page
splitting it would free up some memory. Pages on split queue are
going to be split under memory pressure.
thp_underused_split_page
is incremented when a huge page on the split queue was split
because it was underused. A THP is underused if the number of
zero pages in the THP is above a certain threshold
(/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none).
thp_split_pmd
is incremented every time a PMD split into table of PTEs.
This can happen, for instance, when application calls mprotect() or
@ -527,6 +567,18 @@ split_deferred
it would free up some memory. Pages on split queue are going to
be split under memory pressure, if splitting is possible.
nr_anon
the number of anonymous THP we have in the whole system. These THPs
might be currently entirely mapped or have partially unmapped/unused
subpages.
nr_anon_partially_mapped
the number of anonymous THP which are likely partially mapped, possibly
wasting memory, and have been queued for deferred memory reclamation.
Note that in corner some cases (e.g., failed migration), we might detect
an anonymous THP as "partially mapped" and count it here, even though it
is not actually partially mapped anymore.
As the system ages, allocating huge pages may be expensive as the
system uses memory compaction to copy data around memory to free a
huge page for use. There are some counters in ``/proc/vmstat`` to help

View File

@ -26,3 +26,4 @@ Performance monitor support
meson-ddr-pmu
cxl
ampere_cspmu
mrvl-pem-pmu

View File

@ -0,0 +1,56 @@
=================================================================
Marvell Odyssey PEM Performance Monitoring Unit (PMU UNCORE)
=================================================================
The PCI Express Interface Units(PEM) are associated with a corresponding
monitoring unit. This includes performance counters to track various
characteristics of the data that is transmitted over the PCIe link.
The counters track inbound and outbound transactions which
includes separate counters for posted/non-posted/completion TLPs.
Also, inbound and outbound memory read requests along with their
latencies can also be monitored. Address Translation Services(ATS)events
such as ATS Translation, ATS Page Request, ATS Invalidation along with
their corresponding latencies are also tracked.
There are separate 64 bit counters to measure posted/non-posted/completion
tlps in inbound and outbound transactions. ATS events are measured by
different counters.
The PMU driver exposes the available events and format options under sysfs,
/sys/bus/event_source/devices/mrvl_pcie_rc_pmu_<>/events/
/sys/bus/event_source/devices/mrvl_pcie_rc_pmu_<>/format/
Examples::
# perf list | grep mrvl_pcie_rc_pmu
mrvl_pcie_rc_pmu_<>/ats_inv/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ats_inv_latency/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ats_pri/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ats_pri_latency/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ats_trans/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ats_trans_latency/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ib_inflight/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ib_reads/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ib_req_no_ro_ebus/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ib_req_no_ro_ncb/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ib_tlp_cpl_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_cpl_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_npr/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_pr/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ib_tlp_npr/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ib_tlp_pr/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_inflight_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_merges_cpl_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_merges_npr_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_merges_pr_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_reads_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_tlp_cpl_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_cpl_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_npr_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_pr_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_tlp_npr_partid/ [Kernel PMU event]
mrvl_pcie_rc_pmu_<>/ob_tlp_pr_partid/ [Kernel PMU event]
# perf stat -e ib_inflight,ib_reads,ib_req_no_ro_ebus,ib_req_no_ro_ncb <workload>

View File

@ -425,8 +425,8 @@ This governor exposes only one tunable:
``rate_limit_us``
Minimum time (in microseconds) that has to pass between two consecutive
runs of governor computations (default: 1000 times the scaling driver's
transition latency).
runs of governor computations (default: 1.5 times the scaling driver's
transition latency or the maximum 2ms).
The purpose of this tunable is to reduce the scheduler context overhead
of the governor which might be excessive without it.
@ -474,17 +474,17 @@ This governor exposes the following tunables:
This is how often the governor's worker routine should run, in
microseconds.
Typically, it is set to values of the order of 10000 (10 ms). Its
default value is equal to the value of ``cpuinfo_transition_latency``
for each policy this governor is attached to (but since the unit here
is greater by 1000, this means that the time represented by
``sampling_rate`` is 1000 times greater than the transition latency by
default).
Typically, it is set to values of the order of 2000 (2 ms). Its
default value is to add a 50% breathing room
to ``cpuinfo_transition_latency`` on each policy this governor is
attached to. The minimum is typically the length of two scheduler
ticks.
If this tunable is per-policy, the following shell command sets the time
represented by it to be 750 times as high as the transition latency::
represented by it to be 1.5 times as high as the transition latency
(the default)::
# echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) > ondemand/sampling_rate
# echo `$(($(cat cpuinfo_transition_latency) * 3 / 2)) > ondemand/sampling_rate
``up_threshold``
If the estimated CPU load is above this value (in percent), the governor

View File

@ -113,3 +113,62 @@ to apply at each uncore* level.
Support for "current_freq_khz" is available only at each fabric cluster
level (i.e., in uncore* directory).
Efficiency vs. Latency Tradeoff
-------------------------------
The Efficiency Latency Control (ELC) feature improves performance
per watt. With this feature hardware power management algorithms
optimize trade-off between latency and power consumption. For some
latency sensitive workloads further tuning can be done by SW to
get desired performance.
The hardware monitors the average CPU utilization across all cores
in a power domain at regular intervals and decides an uncore frequency.
While this may result in the best performance per watt, workload may be
expecting higher performance at the expense of power. Consider an
application that intermittently wakes up to perform memory reads on an
otherwise idle system. In such cases, if hardware lowers uncore
frequency, then there may be delay in ramp up of frequency to meet
target performance.
The ELC control defines some parameters which can be changed from SW.
If the average CPU utilization is below a user-defined threshold
(elc_low_threshold_percent attribute below), the user-defined uncore
floor frequency will be used (elc_floor_freq_khz attribute below)
instead of hardware calculated minimum.
Similarly in high load scenario where the CPU utilization goes above
the high threshold value (elc_high_threshold_percent attribute below)
instead of jumping to maximum uncore frequency, frequency is increased
in 100MHz steps. This avoids consuming unnecessarily high power
immediately with CPU utilization spikes.
Attributes for efficiency latency control:
``elc_floor_freq_khz``
This attribute is used to get/set the efficiency latency floor frequency.
If this variable is lower than the 'min_freq_khz', it is ignored by
the firmware.
``elc_low_threshold_percent``
This attribute is used to get/set the efficiency latency control low
threshold. This attribute is in percentages of CPU utilization.
``elc_high_threshold_percent``
This attribute is used to get/set the efficiency latency control high
threshold. This attribute is in percentages of CPU utilization.
``elc_high_threshold_enable``
This attribute is used to enable/disable the efficiency latency control
high threshold. Write '1' to enable, '0' to disable.
Example system configuration below, which does following:
* when CPU utilization is less than 10%: sets uncore frequency to 800MHz
* when CPU utilization is higher than 95%: increases uncore frequency in
100MHz steps, until power limit is reached
elc_floor_freq_khz:800000
elc_high_threshold_percent:95
elc_high_threshold_enable:1
elc_low_threshold_percent:10

View File

@ -129,7 +129,7 @@ Setting the ramoops parameters can be done in several different manners:
takes a size, alignment and name as arguments. The name is used
to map the memory to a label that can be retrieved by ramoops.
reserver_mem=2M:4096:oops ramoops.mem_name=oops
reserve_mem=2M:4096:oops ramoops.mem_name=oops
You can specify either RAM memory or peripheral devices' memory. However, when
specifying RAM, be sure to reserve the memory by issuing memblock_reserve()

View File

@ -38,6 +38,11 @@ requests. ``aio-max-nr`` allows you to change the maximum value
``aio-max-nr`` does not result in the
pre-allocation or re-sizing of any kernel data structures.
dentry-negative
----------------------------
Policy for negative dentries. Set to 1 to to always delete the dentry when a
file is removed, and 0 to disable it. By default, this behavior is disabled.
dentry-state
------------

View File

@ -182,3 +182,5 @@ More detailed explanation for tainting
produce extremely unusual kernel structure layouts (even performance
pathological ones), which is important to know when debugging. Set at
build time.
18) ``N`` if an in-kernel test, such as a KUnit test, has been run.

View File

@ -12,7 +12,7 @@ ones.
Of course this is a bad idea to rely on the alignment trap to perform
unaligned memory access in general. If those access are predictable, you
are better to use the macros provided by include/asm/unaligned.h. The
are better to use the macros provided by include/linux/unaligned.h. The
alignment trap can fixup misaligned access for the exception cases, but at
a high performance cost. It better be rare.

View File

@ -359,7 +359,7 @@ Driver updates for STM32 DMA-MDMA chaining support in foo driver
descriptor you want a callback to be called at the end of the transfer
(dmaengine_prep_slave_sg()) or the period (dmaengine_prep_dma_cyclic()).
Depending on the direction, set the callback on the descriptor that finishes
the overal transfer:
the overall transfer:
* DMA_DEV_TO_MEM: set the callback on the "MDMA" descriptor
* DMA_MEM_TO_DEV: set the callback on the "DMA" descriptor
@ -371,7 +371,7 @@ Driver updates for STM32 DMA-MDMA chaining support in foo driver
As STM32 MDMA channel transfer is triggered by STM32 DMA, you must issue
STM32 MDMA channel before STM32 DMA channel.
If any, your callback will be called to warn you about the end of the overal
If any, your callback will be called to warn you about the end of the overall
transfer or the period completion.
Don't forget to terminate both channels. STM32 DMA channel is configured in

View File

@ -0,0 +1,69 @@
.. SPDX-License-Identifier: GPL-2.0
=====================================
Arm Confidential Compute Architecture
=====================================
Arm systems that support the Realm Management Extension (RME) contain
hardware to allow a VM guest to be run in a way which protects the code
and data of the guest from the hypervisor. It extends the older "two
world" model (Normal and Secure World) into four worlds: Normal, Secure,
Root and Realm. Linux can then also be run as a guest to a monitor
running in the Realm world.
The monitor running in the Realm world is known as the Realm Management
Monitor (RMM) and implements the Realm Management Monitor
specification[1]. The monitor acts a bit like a hypervisor (e.g. it runs
in EL2 and manages the stage 2 page tables etc of the guests running in
Realm world), however much of the control is handled by a hypervisor
running in the Normal World. The Normal World hypervisor uses the Realm
Management Interface (RMI) defined by the RMM specification to request
the RMM to perform operations (e.g. mapping memory or executing a vCPU).
The RMM defines an environment for guests where the address space (IPA)
is split into two. The lower half is protected - any memory that is
mapped in this half cannot be seen by the Normal World and the RMM
restricts what operations the Normal World can perform on this memory
(e.g. the Normal World cannot replace pages in this region without the
guest's cooperation). The upper half is shared, the Normal World is free
to make changes to the pages in this region, and is able to emulate MMIO
devices in this region too.
A guest running in a Realm may also communicate with the RMM using the
Realm Services Interface (RSI) to request changes in its environment or
to perform attestation about its environment. In particular it may
request that areas of the protected address space are transitioned
between 'RAM' and 'EMPTY' (in either direction). This allows a Realm
guest to give up memory to be returned to the Normal World, or to
request new memory from the Normal World. Without an explicit request
from the Realm guest the RMM will otherwise prevent the Normal World
from making these changes.
Linux as a Realm Guest
----------------------
To run Linux as a guest within a Realm, the following must be provided
either by the VMM or by a `boot loader` run in the Realm before Linux:
* All protected RAM described to Linux (by DT or ACPI) must be marked
RIPAS RAM before handing control over to Linux.
* MMIO devices must be either unprotected (e.g. emulated by the Normal
World) or marked RIPAS DEV.
* MMIO devices emulated by the Normal World and used very early in boot
(specifically earlycon) must be specified in the upper half of IPA.
For earlycon this can be done by specifying the address on the
command line, e.g. with an IPA size of 33 bits and the base address
of the emulated UART at 0x1000000: ``earlycon=uart,mmio,0x101000000``
* Linux will use bounce buffers for communicating with unprotected
devices. It will transition some protected memory to RIPAS EMPTY and
expect to be able to access unprotected pages at the same IPA address
but with the highest valid IPA bit set. The expectation is that the
VMM will remove the physical pages from the protected mapping and
provide those pages as unprotected pages.
References
----------
[1] https://developer.arm.com/documentation/den0137/

View File

@ -41,6 +41,9 @@ to automatically locate and size all RAM, or it may use knowledge of
the RAM in the machine, or any other method the boot loader designer
sees fit.)
For Arm Confidential Compute Realms this includes ensuring that all
protected RAM has a Realm IPA state (RIPAS) of "RAM".
2. Setup the device tree
-------------------------
@ -385,6 +388,9 @@ Before jumping into the kernel, the following conditions must be met:
- HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1.
- HCRX_EL2.MCE2 (bit 10) must be initialised to 0b1 and the hypervisor
must handle MOPS exceptions as described in :ref:`arm64_mops_hyp`.
For CPUs with the Extended Translation Control Register feature (FEAT_TCR2):
- If EL3 is present:
@ -411,6 +417,38 @@ Before jumping into the kernel, the following conditions must be met:
- HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1.
- For CPUs with Guarded Control Stacks (FEAT_GCS):
- GCSCR_EL1 must be initialised to 0.
- GCSCRE0_EL1 must be initialised to 0.
- If EL3 is present:
- SCR_EL3.GCSEn (bit 39) must be initialised to 0b1.
- If EL2 is present:
- GCSCR_EL2 must be initialised to 0.
- If the kernel is entered at EL1 and EL2 is present:
- HCRX_EL2.GCSEn must be initialised to 0b1.
- HFGITR_EL2.nGCSEPP (bit 59) must be initialised to 0b1.
- HFGITR_EL2.nGCSSTR_EL1 (bit 58) must be initialised to 0b1.
- HFGITR_EL2.nGCSPUSHM_EL1 (bit 57) must be initialised to 0b1.
- HFGRTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
- HFGRTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
- HFGWTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
- HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
The requirements described above for CPU mode, caches, MMUs, architected
timers, coherency and system registers apply to all CPUs. All CPUs must
enter the kernel in the same exception level. Where the values documented

View File

@ -26,7 +26,7 @@ There are no systems that support the physical addition (or removal) of CPUs
while the system is running, and ACPI is not able to sufficiently describe
them.
e.g. New CPUs come with new caches, but the platform's cache toplogy is
e.g. New CPUs come with new caches, but the platform's cache topology is
described in a static table, the PPTT. How caches are shared between CPUs is
not discoverable, and must be described by firmware.

View File

@ -16,9 +16,9 @@ architected discovery mechanism available to userspace code at EL0. The
kernel exposes the presence of these features to userspace through a set
of flags called hwcaps, exposed in the auxiliary vector.
Userspace software can test for features by acquiring the AT_HWCAP or
AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
flags are set, e.g.::
Userspace software can test for features by acquiring the AT_HWCAP,
AT_HWCAP2 or AT_HWCAP3 entry of the auxiliary vector, and testing
whether the relevant flags are set, e.g.::
bool floating_point_is_present(void)
{
@ -170,6 +170,10 @@ HWCAP_PACG
ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
Documentation/arch/arm64/pointer-authentication.rst.
HWCAP_GCS
Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1, as
described by Documentation/arch/arm64/gcs.rst.
HWCAP2_DCPODP
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.

View File

@ -0,0 +1,227 @@
===============================================
Guarded Control Stack support for AArch64 Linux
===============================================
This document outlines briefly the interface provided to userspace by Linux in
order to support use of the ARM Guarded Control Stack (GCS) feature.
This is an outline of the most important features and issues only and not
intended to be exhaustive.
1. General
-----------
* GCS is an architecture feature intended to provide greater protection
against return oriented programming (ROP) attacks and to simplify the
implementation of features that need to collect stack traces such as
profiling.
* When GCS is enabled a separate guarded control stack is maintained by the
PE which is writeable only through specific GCS operations. This
stores the call stack only, when a procedure call instruction is
performed the current PC is pushed onto the GCS and on RET the
address in the LR is verified against that on the top of the GCS.
* When active the current GCS pointer is stored in the system register
GCSPR_EL0. This is readable by userspace but can only be updated
via specific GCS instructions.
* The architecture provides instructions for switching between guarded
control stacks with checks to ensure that the new stack is a valid
target for switching.
* The functionality of GCS is similar to that provided by the x86 Shadow
Stack feature, due to sharing of userspace interfaces the ABI refers to
shadow stacks rather than GCS.
* Support for GCS is reported to userspace via HWCAP_GCS in the aux vector
AT_HWCAP2 entry.
* GCS is enabled per thread. While there is support for disabling GCS
at runtime this should be done with great care.
* GCS memory access faults are reported as normal memory access faults.
* GCS specific errors (those reported with EC 0x2d) will be reported as
SIGSEGV with a si_code of SEGV_CPERR (control protection error).
* GCS is supported only for AArch64.
* On systems where GCS is supported GCSPR_EL0 is always readable by EL0
regardless of the GCS configuration for the thread.
* The architecture supports enabling GCS without verifying that return values
in LR match those in the GCS, the LR will be ignored. This is not supported
by Linux.
2. Enabling and disabling Guarded Control Stacks
-------------------------------------------------
* GCS is enabled and disabled for a thread via the PR_SET_SHADOW_STACK_STATUS
prctl(), this takes a single flags argument specifying which GCS features
should be used.
* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack
and enables GCS for the thread, enabling the functionality controlled by
GCSCRE0_EL1.{nTR, RVCHKEN, PCRSEL}.
* When set the PR_SHADOW_STACK_PUSH flag enables the functionality controlled
by GCSCRE0_EL1.PUSHMEn, allowing explicit GCS pushes.
* When set the PR_SHADOW_STACK_WRITE flag enables the functionality controlled
by GCSCRE0_EL1.STREn, allowing explicit stores to the Guarded Control Stack.
* Any unknown flags will cause PR_SET_SHADOW_STACK_STATUS to return -EINVAL.
* PR_LOCK_SHADOW_STACK_STATUS is passed a bitmask of features with the same
values as used for PR_SET_SHADOW_STACK_STATUS. Any future changes to the
status of the specified GCS mode bits will be rejected.
* PR_LOCK_SHADOW_STACK_STATUS allows any bit to be locked, this allows
userspace to prevent changes to any future features.
* There is no support for a process to remove a lock that has been set for
it.
* PR_SET_SHADOW_STACK_STATUS and PR_LOCK_SHADOW_STACK_STATUS affect only the
thread that called them, any other running threads will be unaffected.
* New threads inherit the GCS configuration of the thread that created them.
* GCS is disabled on exec().
* The current GCS configuration for a thread may be read with the
PR_GET_SHADOW_STACK_STATUS prctl(), this returns the same flags that
are passed to PR_SET_SHADOW_STACK_STATUS.
* If GCS is disabled for a thread after having previously been enabled then
the stack will remain allocated for the lifetime of the thread. At present
any attempt to reenable GCS for the thread will be rejected, this may be
revisited in future.
* It should be noted that since enabling GCS will result in GCS becoming
active immediately it is not normally possible to return from the function
that invoked the prctl() that enabled GCS. It is expected that the normal
usage will be that GCS is enabled very early in execution of a program.
3. Allocation of Guarded Control Stacks
----------------------------------------
* When GCS is enabled for a thread a new Guarded Control Stack will be
allocated for it of half the standard stack size or 2 gigabytes,
whichever is smaller.
* When a new thread is created by a thread which has GCS enabled then a
new Guarded Control Stack will be allocated for the new thread with
half the size of the standard stack.
* When a stack is allocated by enabling GCS or during thread creation then
the top 8 bytes of the stack will be initialised to 0 and GCSPR_EL0 will
be set to point to the address of this 0 value, this can be used to
detect the top of the stack.
* Additional Guarded Control Stacks can be allocated using the
map_shadow_stack() system call.
* Stacks allocated using map_shadow_stack() can optionally have an end of
stack marker and cap placed at the top of the stack. If the flag
SHADOW_STACK_SET_TOKEN is specified a cap will be placed on the stack,
if SHADOW_STACK_SET_MARKER is not specified the cap will be the top 8
bytes of the stack and if it is specified then the cap will be the next
8 bytes. While specifying just SHADOW_STACK_SET_MARKER by itself is
valid since the marker is all bits 0 it has no observable effect.
* Stacks allocated using map_shadow_stack() must have a size which is a
multiple of 8 bytes larger than 8 bytes and must be 8 bytes aligned.
* An address can be specified to map_shadow_stack(), if one is provided then
it must be aligned to a page boundary.
* When a thread is freed the Guarded Control Stack initially allocated for
that thread will be freed. Note carefully that if the stack has been
switched this may not be the stack currently in use by the thread.
4. Signal handling
--------------------
* A new signal frame record gcs_context encodes the current GCS mode and
pointer for the interrupted context on signal delivery. This will always
be present on systems that support GCS.
* The record contains a flag field which reports the current GCS configuration
for the interrupted context as PR_GET_SHADOW_STACK_STATUS would.
* The signal handler is run with the same GCS configuration as the interrupted
context.
* When GCS is enabled for the interrupted thread a signal handling specific
GCS cap token will be written to the GCS, this is an architectural GCS cap
with the token type (bits 0..11) all clear. The GCSPR_EL0 reported in the
signal frame will point to this cap token.
* The signal handler will use the same GCS as the interrupted context.
* When GCS is enabled on signal entry a frame with the address of the signal
return handler will be pushed onto the GCS, allowing return from the signal
handler via RET as normal. This will not be reported in the gcs_context in
the signal frame.
5. Signal return
-----------------
When returning from a signal handler:
* If there is a gcs_context record in the signal frame then the GCS flags
and GCSPR_EL0 will be restored from that context prior to further
validation.
* If there is no gcs_context record in the signal frame then the GCS
configuration will be unchanged.
* If GCS is enabled on return from a signal handler then GCSPR_EL0 must
point to a valid GCS signal cap record, this will be popped from the
GCS prior to signal return.
* If the GCS configuration is locked when returning from a signal then any
attempt to change the GCS configuration will be treated as an error. This
is true even if GCS was not enabled prior to signal entry.
* GCS may be disabled via signal return but any attempt to enable GCS via
signal return will be rejected.
6. ptrace extensions
---------------------
* A new regset NT_ARM_GCS is defined for use with PTRACE_GETREGSET and
PTRACE_SETREGSET.
* The GCS mode, including enable and disable, may be configured via ptrace.
If GCS is enabled via ptrace no new GCS will be allocated for the thread.
* Configuration via ptrace ignores locking of GCS mode bits.
7. ELF coredump extensions
---------------------------
* NT_ARM_GCS notes will be added to each coredump for each thread of the
dumped process. The contents will be equivalent to the data that would
have been read if a PTRACE_GETREGSET of the corresponding type were
executed for each thread when the coredump was generated.
8. /proc extensions
--------------------
* Guarded Control Stack pages will include "ss" in their VmFlags in
/proc/<pid>/smaps.

View File

@ -10,16 +10,19 @@ ARM64 Architecture
acpi_object_usage
amu
arm-acpi
arm-cca
asymmetric-32bit
booting
cpu-feature-registers
cpu-hotplug
elf_hwcaps
gcs
hugetlbpage
kdump
legacy_instructions
memory
memory-tagging-extension
mops
perf
pointer-authentication
ptdump

View File

@ -0,0 +1,44 @@
.. SPDX-License-Identifier: GPL-2.0
===================================
Memory copy/set instructions (MOPS)
===================================
A MOPS memory copy/set operation consists of three consecutive CPY* or SET*
instructions: a prologue, main and epilogue (for example: CPYP, CPYM, CPYE).
A main or epilogue instruction can take a MOPS exception for various reasons,
for example when a task is migrated to a CPU with a different MOPS
implementation, or when the instruction's alignment and size requirements are
not met. The software exception handler is then expected to reset the registers
and restart execution from the prologue instruction. Normally this is handled
by the kernel.
For more details refer to "D1.3.5.7 Memory Copy and Memory Set exceptions" in
the Arm Architecture Reference Manual DDI 0487K.a (Arm ARM).
.. _arm64_mops_hyp:
Hypervisor requirements
-----------------------
A hypervisor running a Linux guest must handle all MOPS exceptions from the
guest kernel, as Linux may not be able to handle the exception at all times.
For example, a MOPS exception can be taken when the hypervisor migrates a vCPU
to another physical CPU with a different MOPS implementation.
To do this, the hypervisor must:
- Set HCRX_EL2.MCE2 to 1 so that the exception is taken to the hypervisor.
- Have an exception handler that implements the algorithm from the Arm ARM
rules CNTMJ and MWFQH.
- Set the guest's PSTATE.SS to 0 in the exception handler, to handle a
potential step of the current instruction.
Note: Clearing PSTATE.SS is needed so that a single step exception is taken
on the next instruction (the prologue instruction). Otherwise prologue
would get silently stepped over and the single step exception taken on the
main instruction. Note that if the guest instruction is not being stepped
then clearing PSTATE.SS has no effect.

View File

@ -146,6 +146,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A715 | #3456084 | ARM64_ERRATUM_3194386 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A720 | #3456091 | ARM64_ERRATUM_3194386 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A725 | #3456106 | ARM64_ERRATUM_3194386 |
@ -186,6 +188,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N2 | #3324339 | ARM64_ERRATUM_3194386 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N3 | #3456111 | ARM64_ERRATUM_3194386 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-V1 | #1619801 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-V1 | #3324341 | ARM64_ERRATUM_3194386 |
@ -289,3 +293,5 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 |
+----------------+-----------------+-----------------+-----------------------------+
| Microsoft | Azure Cobalt 100| #3324339 | ARM64_ERRATUM_3194386 |
+----------------+-----------------+-----------------+-----------------------------+

View File

@ -346,6 +346,10 @@ The regset data starts with struct user_za_header, containing:
* Writes to NT_ARM_ZT will set PSTATE.ZA to 1.
* If any register data is provided along with SME_PT_VL_ONEXEC then the
registers data will be interpreted with the current vector length, not
the vector length configured for use on exec.
8. ELF coredump extensions
---------------------------

View File

@ -402,6 +402,10 @@ The regset data starts with struct user_sve_header, containing:
streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
if the target was not in streaming mode.
* If any register data is provided along with SVE_PT_VL_ONEXEC then the
registers data will be interpreted with the current vector length, not
the vector length configured for use on exec.
* The effect of writing a partial, incomplete payload is unspecified.

View File

@ -85,6 +85,38 @@ to CPUINTC directly::
| Devices |
+---------+
Advanced Extended IRQ model
===========================
In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go
to AVECINTC, and then go to CPUINTC directly, while all other devices interrupts
go to PCH-PIC/PCH-LPC and gathered by EIOINTC, and then go to CPUINTC directly::
+-----+ +-----------------------+ +-------+
| IPI | --> | CPUINTC | <-- | Timer |
+-----+ +-----------------------+ +-------+
^ ^ ^
| | |
+---------+ +----------+ +---------+ +-------+
| EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
+---------+ +----------+ +---------+ +-------+
^ ^
| |
+---------+ +---------+
| PCH-PIC | | PCH-MSI |
+---------+ +---------+
^ ^ ^
| | |
+---------+ +---------+ +---------+
| Devices | | PCH-LPC | | Devices |
+---------+ +---------+ +---------+
^
|
+---------+
| Devices |
+---------+
ACPI-related definitions
========================

View File

@ -134,7 +134,7 @@ Hardware
* PTCR and partition table entries (partition table is in secure
memory). An attempt to write to PTCR will cause a Hypervisor
Emulation Assitance interrupt.
Emulation Assistance interrupt.
* LDBAR (LD Base Address Register) and IMC (In-Memory Collection)
non-architected registers. An attempt to write to them will cause a

View File

@ -15,7 +15,7 @@ status for the use of Vector in userspace. The intended usage guideline for
these interfaces is to give init systems a way to modify the availability of V
for processes running under its domain. Calling these interfaces is not
recommended in libraries routines because libraries should not override policies
configured from the parant process. Also, users must noted that these interfaces
configured from the parent process. Also, users must note that these interfaces
are not portable to non-Linux, nor non-RISC-V environments, so it is discourage
to use in a portable code. To get the availability of V in an ELF program,
please read :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the

View File

@ -999,6 +999,36 @@ the vfio_ap mediated device to which it is assigned as long as each new APQN
resulting from plugging it in references a queue device bound to the vfio_ap
device driver.
Driver Features
===============
The vfio_ap driver exposes a sysfs file containing supported features.
This exists so third party tools (like Libvirt and mdevctl) can query the
availability of specific features.
The features list can be found here: /sys/bus/matrix/devices/matrix/features
Entries are space delimited. Each entry consists of a combination of
alphanumeric and underscore characters.
Example:
cat /sys/bus/matrix/devices/matrix/features
guest_matrix dyn ap_config
the following features are advertised:
---------------+---------------------------------------------------------------+
| Flag | Description |
+==============+===============================================================+
| guest_matrix | guest_matrix attribute exists. It reports the matrix of |
| | adapters and domains that are or will be passed through to a |
| | guest when the mdev is attached to it. |
+--------------+---------------------------------------------------------------+
| dyn | Indicates hot plug/unplug of AP adapters, domains and control |
| | domains for a guest to which the mdev is attached. |
+------------+-----------------------------------------------------------------+
| ap_config | ap_config interface for one-shot modifications to mdev config |
+--------------+---------------------------------------------------------------+
Limitations
===========
Live guest migration is not supported for guests using AP devices without

View File

@ -26,7 +26,8 @@ Detection
=========
Intel processors may support either or both of the following hardware
mechanisms to detect split locks and bus locks.
mechanisms to detect split locks and bus locks. Some AMD processors also
support bus lock detect.
#AC exception for split lock detection
--------------------------------------

View File

@ -162,7 +162,7 @@ Mitigation points
3. It would take a large number of these precisely-timed NMIs to mount
an actual attack. There's presumably not enough bandwidth.
4. The NMI in question occurs after a VERW, i.e. when user state is
restored and most interesting data is already scrubbed. Whats left
restored and most interesting data is already scrubbed. What's left
is only the data that NMI touches, and that may or may not be of
any interest.

View File

@ -170,18 +170,6 @@ NUMA
Don't parse the HMAT table for NUMA setup, or soft-reserved memory
partitioning.
numa=fake=<size>[MG]
If given as a memory unit, fills all system RAM with nodes of
size interleaved over physical nodes.
numa=fake=<N>
If given as an integer, fills all system RAM with N fake nodes
interleaved over physical nodes.
numa=fake=<N>U
If given as an integer followed by 'U', it will divide each
physical node into N emulated nodes.
ACPI
====
@ -317,3 +305,8 @@ The available options are:
debug
Enable debug messages.
nosnp
Do not enable SEV-SNP (applies to host/hypervisor only). Setting
'nosnp' avoids the RMP check overhead in memory accesses when
users do not want to run SEV-SNP guests.

View File

@ -125,7 +125,7 @@ FSGSBASE instructions enablement
FSGSBASE instructions compiler support
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE
GCC version 4.6.4 and newer provide intrinsics for the FSGSBASE
instructions. Clang 5 supports them as well.
=================== ===========================
@ -135,7 +135,7 @@ instructions. Clang 5 supports them as well.
_writegsbase_u64() Write the GS base register
=================== ===========================
To utilize these instrinsics <immintrin.h> must be included in the source
To utilize these intrinsics <immintrin.h> must be included in the source
code and the compiler option -mfsgsbase has to be added.
Compiler support for FS/GS based addressing

View File

@ -29,15 +29,27 @@ Complete virtual memory map with 4-level page tables
Start addr | Offset | End addr | Size | VM area description
========================================================================================================================
| | | |
0000000000000000 | 0 | 00007fffffffffff | 128 TB | user-space virtual memory, different per mm
0000000000000000 | 0 | 00007fffffffefff | ~128 TB | user-space virtual memory, different per mm
00007ffffffff000 | ~128 TB | 00007fffffffffff | 4 kB | ... guard hole
__________________|____________|__________________|_________|___________________________________________________________
| | | |
0000800000000000 | +128 TB | ffff7fffffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical
| | | | virtual memory addresses up to the -128 TB
0000800000000000 | +128 TB | 7fffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
| | | | virtual memory addresses up to the -8 EB
| | | | starting offset of kernel mappings.
| | | |
| | | | LAM relaxes canonicallity check allowing to create aliases
| | | | for userspace memory here.
__________________|____________|__________________|_________|___________________________________________________________
|
| Kernel-space virtual memory, shared between all processes:
__________________|____________|__________________|_________|___________________________________________________________
| | | |
8000000000000000 | -8 EB | ffff7fffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
| | | | virtual memory addresses up to the -128 TB
| | | | starting offset of kernel mappings.
| | | |
| | | | LAM_SUP relaxes canonicallity check allowing to create
| | | | aliases for kernel memory here.
____________________________________________________________|___________________________________________________________
| | | |
ffff800000000000 | -128 TB | ffff87ffffffffff | 8 TB | ... guard hole, also reserved for hypervisor
@ -88,15 +100,26 @@ Complete virtual memory map with 5-level page tables
Start addr | Offset | End addr | Size | VM area description
========================================================================================================================
| | | |
0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm
0000000000000000 | 0 | 00fffffffffff000 | ~64 PB | user-space virtual memory, different per mm
00fffffffffff000 | ~64 PB | 00ffffffffffffff | 4 kB | ... guard hole
__________________|____________|__________________|_________|___________________________________________________________
| | | |
0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical
| | | | virtual memory addresses up to the -64 PB
0100000000000000 | +64 PB | 7fffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
| | | | virtual memory addresses up to the -8EB TB
| | | | starting offset of kernel mappings.
| | | |
| | | | LAM relaxes canonicallity check allowing to create aliases
| | | | for userspace memory here.
__________________|____________|__________________|_________|___________________________________________________________
|
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
8000000000000000 | -8 EB | feffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
| | | | virtual memory addresses up to the -64 PB
| | | | starting offset of kernel mappings.
| | | |
| | | | LAM_SUP relaxes canonicallity check allowing to create
| | | | aliases for kernel memory here.
____________________________________________________________|___________________________________________________________
| | | |
ff00000000000000 | -64 PB | ff0fffffffffffff | 4 PB | ... guard hole, also reserved for hypervisor

View File

@ -9,7 +9,7 @@ controllers), BFQ's main features are:
- BFQ guarantees a high system and application responsiveness, and a
low latency for time-sensitive applications, such as audio or video
players;
- BFQ distributes bandwidth, and not just time, among processes or
- BFQ distributes bandwidth, not just time, among processes or
groups (switching back to time distribution when needed to keep
throughput high).
@ -111,7 +111,7 @@ Higher speed for code-development tasks
If some additional workload happens to be executed in parallel, then
BFQ executes the I/O-related components of typical code-development
tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
tasks (compilation, checkout, merge, etc.) much more quickly than CFQ,
NOOP or DEADLINE.
High throughput
@ -127,9 +127,9 @@ Strong fairness, bandwidth and delay guarantees
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
BFQ distributes the device throughput, and not just the device time,
among I/O-bound applications in proportion their weights, with any
among I/O-bound applications in proportion to their weights, with any
workload and regardless of the device parameters. From these bandwidth
guarantees, it is possible to compute tight per-I/O-request delay
guarantees, it is possible to compute a tight per-I/O-request delay
guarantees by a simple formula. If not configured for strict service
guarantees, BFQ switches to time-based resource sharing (only) for
applications that would otherwise cause a throughput loss.
@ -199,7 +199,7 @@ plus a lot of code, are borrowed from CFQ.
- On flash-based storage with internal queueing of commands
(typically NCQ), device idling happens to be always detrimental
for throughput. So, with these devices, BFQ performs idling
to throughput. So, with these devices, BFQ performs idling
only when strictly needed for service guarantees, i.e., for
guaranteeing low latency or fairness. In these cases, overall
throughput may be sub-optimal. No solution currently exists to
@ -212,7 +212,7 @@ plus a lot of code, are borrowed from CFQ.
and to reduce their latency. The most important action taken to
achieve this goal is to give to the queues associated with these
applications more than their fair share of the device
throughput. For brevity, we call just "weight-raising" the whole
throughput. For brevity, we call it just "weight-raising" the whole
sets of actions taken by BFQ to privilege these queues. In
particular, BFQ provides a milder form of weight-raising for
interactive applications, and a stronger form for soft real-time
@ -231,7 +231,7 @@ plus a lot of code, are borrowed from CFQ.
responsive in detecting interleaved I/O (cooperating processes),
that it enables BFQ to achieve a high throughput, by queue
merging, even for queues for which CFQ needs a different
mechanism, preemption, to get a high throughput. As such EQM is a
mechanism, preemption, to get a high throughput. As such, EQM is a
unified mechanism to achieve a high throughput with interleaved
I/O.
@ -254,7 +254,7 @@ plus a lot of code, are borrowed from CFQ.
- First, with any proportional-share scheduler, the maximum
deviation with respect to an ideal service is proportional to
the maximum budget (slice) assigned to queues. As a consequence,
BFQ can keep this deviation tight not only because of the
BFQ can keep this deviation tight, not only because of the
accurate service of B-WF2Q+, but also because BFQ *does not*
need to assign a larger budget to a queue to let the queue
receive a higher fraction of the device throughput.
@ -327,7 +327,7 @@ applications. Unset this tunable if you need/want to control weights.
slice_idle
----------
This parameter specifies how long BFQ should idle for next I/O
This parameter specifies how long BFQ should idle for the next I/O
request, when certain sync BFQ queues become empty. By default
slice_idle is a non-zero value. Idling has a double purpose: boosting
throughput and making sure that the desired throughput distribution is
@ -365,7 +365,7 @@ terms of I/O-request dispatches. To guarantee that the actual service
order then corresponds to the dispatch order, the strict_guarantees
tunable must be set too.
There is an important flipside for idling: apart from the above cases
There is an important flip side to idling: apart from the above cases
where it is beneficial also for throughput, idling can severely impact
throughput. One important case is random workload. Because of this
issue, BFQ tends to avoid idling as much as possible, when it is not
@ -475,7 +475,7 @@ max_budget
Maximum amount of service, measured in sectors, that can be provided
to a BFQ queue once it is set in service (of course within the limits
of the above timeout). According to what said in the description of
of the above timeout). According to what was said in the description of
the algorithm, larger values increase the throughput in proportion to
the percentage of sequential I/O requests issued. The price of larger
values is that they coarsen the granularity of short-term bandwidth

View File

@ -39,13 +39,16 @@ blkdevparts=<blkdev-def>[;<blkdev-def>]
create a link to block device partition with the name "PARTNAME".
User space application can access partition by partition name.
ro
read-only. Flag the partition as read-only.
Example:
eMMC disk names are "mmcblk0" and "mmcblk0boot0".
bootargs::
'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot)ro,-(kernel)'
dmesg::

View File

@ -199,24 +199,36 @@ managing and controlling ublk devices with help of several control commands:
- user recovery feature description
Two new features are added for user recovery: ``UBLK_F_USER_RECOVERY`` and
``UBLK_F_USER_RECOVERY_REISSUE``.
Three new features are added for user recovery: ``UBLK_F_USER_RECOVERY``,
``UBLK_F_USER_RECOVERY_REISSUE``, and ``UBLK_F_USER_RECOVERY_FAIL_IO``. To
enable recovery of ublk devices after the ublk server exits, the ublk server
should specify the ``UBLK_F_USER_RECOVERY`` flag when creating the device. The
ublk server may additionally specify at most one of
``UBLK_F_USER_RECOVERY_REISSUE`` and ``UBLK_F_USER_RECOVERY_FAIL_IO`` to
modify how I/O is handled while the ublk server is dying/dead (this is called
the ``nosrv`` case in the driver code).
With ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
With just ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
handler) is dying, ublk does not delete ``/dev/ublkb*`` during the whole
recovery stage and ublk device ID is kept. It is ublk server's
responsibility to recover the device context by its own knowledge.
Requests which have not been issued to userspace are requeued. Requests
which have been issued to userspace are aborted.
With ``UBLK_F_USER_RECOVERY_REISSUE`` set, after one ubq_daemon(ublk
server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
With ``UBLK_F_USER_RECOVERY_REISSUE`` additionally set, after one ubq_daemon
(ublk server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
requests which have been issued to userspace are requeued and will be
re-issued to the new process after handling ``UBLK_CMD_END_USER_RECOVERY``.
``UBLK_F_USER_RECOVERY_REISSUE`` is designed for backends who tolerate
double-write since the driver may issue the same I/O request twice. It
might be useful to a read-only FS or a VM backend.
With ``UBLK_F_USER_RECOVERY_FAIL_IO`` additionally set, after the ublk server
exits, requests which have issued to userspace are failed, as are any
subsequently issued requests. Applications continuously issuing I/O against
devices with this flag set will see a stream of I/O errors until a new ublk
server recovers the device.
Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``.
Once the flag is set, all control commands can be sent by unprivileged
user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on

View File

@ -368,7 +368,7 @@ No additional type data follow ``btf_type``.
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_FUNC
* ``info.vlen``: linkage information (BTF_FUNC_STATIC, BTF_FUNC_GLOBAL
or BTF_FUNC_EXTERN)
or BTF_FUNC_EXTERN - see :ref:`BTF_Function_Linkage_Constants`)
* ``type``: a BTF_KIND_FUNC_PROTO type
No additional type data follow ``btf_type``.
@ -424,9 +424,8 @@ following data::
__u32 linkage;
};
``struct btf_var`` encoding:
* ``linkage``: currently only static variable 0, or globally allocated
variable in ELF sections 1
``btf_var.linkage`` may take the values: BTF_VAR_STATIC, BTF_VAR_GLOBAL_ALLOCATED or BTF_VAR_GLOBAL_EXTERN -
see :ref:`BTF_Var_Linkage_Constants`.
Not all type of global variables are supported by LLVM at this point.
The following is currently available:
@ -549,6 +548,38 @@ The ``btf_enum64`` encoding:
If the original enum value is signed and the size is less than 8,
that value will be sign extended into 8 bytes.
2.3 Constant Values
-------------------
.. _BTF_Function_Linkage_Constants:
2.3.1 Function Linkage Constant Values
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. table:: Function Linkage Values and Meanings
=================== ===== ===========
kind value description
=================== ===== ===========
``BTF_FUNC_STATIC`` 0x0 definition of subprogram not visible outside containing compilation unit
``BTF_FUNC_GLOBAL`` 0x1 definition of subprogram visible outside containing compilation unit
``BTF_FUNC_EXTERN`` 0x2 declaration of a subprogram whose definition is outside the containing compilation unit
=================== ===== ===========
.. _BTF_Var_Linkage_Constants:
2.3.2 Variable Linkage Constant Values
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. table:: Variable Linkage Values and Meanings
============================ ===== ===========
kind value description
============================ ===== ===========
``BTF_VAR_STATIC`` 0x0 definition of global variable not visible outside containing compilation unit
``BTF_VAR_GLOBAL_ALLOCATED`` 0x1 definition of global variable visible outside containing compilation unit
``BTF_VAR_GLOBAL_EXTERN`` 0x2 declaration of global variable whose definition is outside the containing compilation unit
============================ ===== ===========
3. BTF Kernel API
=================

View File

@ -121,6 +121,8 @@ described in more detail in the footnotes.
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LWT_XMIT`` | | ``lwt_xmit`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_NETFILTER`` | | ``netfilter`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_PERF_EVENT`` | | ``perf_event`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` | | ``raw_tp.w+`` [#rawtp]_ | |
@ -131,11 +133,23 @@ described in more detail in the footnotes.
+ + +----------------------------------+-----------+
| | | ``raw_tracepoint+`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` | |
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` [#tc_legacy]_ | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` | |
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` [#tc_legacy]_ | |
+ + +----------------------------------+-----------+
| | | ``tc`` | |
| | | ``tc`` [#tc_legacy]_ | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_NETKIT_PRIMARY`` | ``netkit/primary`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_NETKIT_PEER`` | ``netkit/peer`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_INGRESS`` | ``tc/ingress`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_EGRESS`` | ``tc/egress`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_INGRESS`` | ``tcx/ingress`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_EGRESS`` | ``tcx/egress`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SK_LOOKUP`` | ``BPF_SK_LOOKUP`` | ``sk_lookup`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
@ -155,7 +169,9 @@ described in more detail in the footnotes.
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SOCK_OPS`` | ``BPF_CGROUP_SOCK_OPS`` | ``sockops`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` | |
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` [#struct_ops]_ | |
+ + +----------------------------------+-----------+
| | | ``struct_ops.s+`` [#struct_ops]_ | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SYSCALL`` | | ``syscall`` | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
@ -209,5 +225,11 @@ described in more detail in the footnotes.
``a-zA-Z0-9_.*?``.
.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
.. [#tc_legacy] The ``tc``, ``classifier`` and ``action`` attach types are deprecated, use
``tcx/*`` instead.
.. [#struct_ops] The ``struct_ops`` attach format supports ``struct_ops[.s]/<name>`` convention,
but ``name`` is ignored and it is recommended to just use plain
``SEC("struct_ops[.s]")``. The attachments are defined in a struct initializer
that is tagged with ``SEC(".struct_ops[.link]")``.
.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.

View File

@ -418,7 +418,7 @@ The rules for correspondence between registers / stack slots are as follows:
linked to the registers and stack slots of the parent state with the same
indices.
* For the outer stack frames, only caller saved registers (r6-r9) and stack
* For the outer stack frames, only callee saved registers (r6-r9) and stack
slots are linked to the registers and stack slots of the parent state with the
same indices.

Some files were not shown because too many files have changed in this diff Show More