Merge branch 'for-6.14' into for-next
@ -141,11 +141,13 @@ ForEachMacros:
|
||||
- 'damon_for_each_target_safe'
|
||||
- 'damos_for_each_filter'
|
||||
- 'damos_for_each_filter_safe'
|
||||
- 'damos_for_each_quota_goal'
|
||||
- 'damos_for_each_quota_goal_safe'
|
||||
- 'data__for_each_file'
|
||||
- 'data__for_each_file_new'
|
||||
- 'data__for_each_file_start'
|
||||
- 'device_for_each_child_node'
|
||||
- 'displayid_iter_for_each'
|
||||
- 'device_for_each_child_node_scoped'
|
||||
- 'dma_fence_array_for_each'
|
||||
- 'dma_fence_chain_for_each'
|
||||
- 'dma_fence_unwrap_for_each'
|
||||
@ -172,11 +174,14 @@ ForEachMacros:
|
||||
- 'drm_for_each_plane'
|
||||
- 'drm_for_each_plane_mask'
|
||||
- 'drm_for_each_privobj'
|
||||
- 'drm_gem_for_each_gpuva'
|
||||
- 'drm_gem_for_each_gpuva_safe'
|
||||
- 'drm_gem_for_each_gpuvm_bo'
|
||||
- 'drm_gem_for_each_gpuvm_bo_safe'
|
||||
- 'drm_gpuva_for_each_op'
|
||||
- 'drm_gpuva_for_each_op_from_reverse'
|
||||
- 'drm_gpuva_for_each_op_reverse'
|
||||
- 'drm_gpuva_for_each_op_safe'
|
||||
- 'drm_gpuvm_bo_for_each_va'
|
||||
- 'drm_gpuvm_bo_for_each_va_safe'
|
||||
- 'drm_gpuvm_for_each_va'
|
||||
- 'drm_gpuvm_for_each_va_range'
|
||||
- 'drm_gpuvm_for_each_va_range_safe'
|
||||
@ -192,11 +197,11 @@ ForEachMacros:
|
||||
- 'dsa_switch_for_each_port_continue_reverse'
|
||||
- 'dsa_switch_for_each_port_safe'
|
||||
- 'dsa_switch_for_each_user_port'
|
||||
- 'dsa_switch_for_each_user_port_continue_reverse'
|
||||
- 'dsa_tree_for_each_cpu_port'
|
||||
- 'dsa_tree_for_each_user_port'
|
||||
- 'dsa_tree_for_each_user_port_continue_reverse'
|
||||
- 'dso__for_each_symbol'
|
||||
- 'dsos__for_each_with_build_id'
|
||||
- 'elf_hash_for_each_possible'
|
||||
- 'elf_symtab__for_each_symbol'
|
||||
- 'evlist__for_each_cpu'
|
||||
@ -216,6 +221,7 @@ ForEachMacros:
|
||||
- 'for_each_and_bit'
|
||||
- 'for_each_andnot_bit'
|
||||
- 'for_each_available_child_of_node'
|
||||
- 'for_each_available_child_of_node_scoped'
|
||||
- 'for_each_bench'
|
||||
- 'for_each_bio'
|
||||
- 'for_each_board_func_rsrc'
|
||||
@ -234,6 +240,7 @@ ForEachMacros:
|
||||
- 'for_each_card_widgets_safe'
|
||||
- 'for_each_cgroup_storage_type'
|
||||
- 'for_each_child_of_node'
|
||||
- 'for_each_child_of_node_scoped'
|
||||
- 'for_each_clear_bit'
|
||||
- 'for_each_clear_bit_from'
|
||||
- 'for_each_clear_bitrange'
|
||||
@ -251,6 +258,7 @@ ForEachMacros:
|
||||
- 'for_each_cpu'
|
||||
- 'for_each_cpu_and'
|
||||
- 'for_each_cpu_andnot'
|
||||
- 'for_each_cpu_from'
|
||||
- 'for_each_cpu_or'
|
||||
- 'for_each_cpu_wrap'
|
||||
- 'for_each_dapm_widgets'
|
||||
@ -269,13 +277,14 @@ ForEachMacros:
|
||||
- 'for_each_element'
|
||||
- 'for_each_element_extid'
|
||||
- 'for_each_element_id'
|
||||
- 'for_each_enabled_cpu'
|
||||
- 'for_each_endpoint_of_node'
|
||||
- 'for_each_event'
|
||||
- 'for_each_event_tps'
|
||||
- 'for_each_evictable_lru'
|
||||
- 'for_each_fib6_node_rt_rcu'
|
||||
- 'for_each_fib6_walker_rt'
|
||||
- 'for_each_free_mem_pfn_range_in_zone'
|
||||
- 'for_each_file_lock'
|
||||
- 'for_each_free_mem_pfn_range_in_zone_from'
|
||||
- 'for_each_free_mem_range'
|
||||
- 'for_each_free_mem_range_reverse'
|
||||
@ -286,15 +295,18 @@ ForEachMacros:
|
||||
- 'for_each_group_member'
|
||||
- 'for_each_group_member_head'
|
||||
- 'for_each_hstate'
|
||||
- 'for_each_hwgpio'
|
||||
- 'for_each_if'
|
||||
- 'for_each_inject_fn'
|
||||
- 'for_each_insn'
|
||||
- 'for_each_insn_op_loc'
|
||||
- 'for_each_insn_prefix'
|
||||
- 'for_each_intid'
|
||||
- 'for_each_iommu'
|
||||
- 'for_each_ip_tunnel_rcu'
|
||||
- 'for_each_irq_nr'
|
||||
- 'for_each_lang'
|
||||
- 'for_each_link_ch_maps'
|
||||
- 'for_each_link_codecs'
|
||||
- 'for_each_link_cpus'
|
||||
- 'for_each_link_platforms'
|
||||
@ -332,6 +344,9 @@ ForEachMacros:
|
||||
- 'for_each_new_plane_in_state_reverse'
|
||||
- 'for_each_new_private_obj_in_state'
|
||||
- 'for_each_new_reg'
|
||||
- 'for_each_nhlt_endpoint'
|
||||
- 'for_each_nhlt_endpoint_fmtcfg'
|
||||
- 'for_each_nhlt_fmtcfg'
|
||||
- 'for_each_node'
|
||||
- 'for_each_node_by_name'
|
||||
- 'for_each_node_by_type'
|
||||
@ -387,12 +402,15 @@ ForEachMacros:
|
||||
- 'for_each_reloc_from'
|
||||
- 'for_each_requested_gpio'
|
||||
- 'for_each_requested_gpio_in_range'
|
||||
- 'for_each_reserved_child_of_node'
|
||||
- 'for_each_reserved_mem_range'
|
||||
- 'for_each_reserved_mem_region'
|
||||
- 'for_each_rtd_ch_maps'
|
||||
- 'for_each_rtd_codec_dais'
|
||||
- 'for_each_rtd_components'
|
||||
- 'for_each_rtd_cpu_dais'
|
||||
- 'for_each_rtd_dais'
|
||||
- 'for_each_rtd_dais_reverse'
|
||||
- 'for_each_sband_iftype_data'
|
||||
- 'for_each_script'
|
||||
- 'for_each_sec'
|
||||
@ -533,8 +551,6 @@ ForEachMacros:
|
||||
- 'lwq_for_each_safe'
|
||||
- 'map__for_each_symbol'
|
||||
- 'map__for_each_symbol_by_name'
|
||||
- 'maps__for_each_entry'
|
||||
- 'maps__for_each_entry_safe'
|
||||
- 'mas_for_each'
|
||||
- 'mci_for_each_dimm'
|
||||
- 'media_device_for_each_entity'
|
||||
@ -560,7 +576,9 @@ ForEachMacros:
|
||||
- 'netdev_hw_addr_list_for_each'
|
||||
- 'nft_rule_for_each_expr'
|
||||
- 'nla_for_each_attr'
|
||||
- 'nla_for_each_attr_type'
|
||||
- 'nla_for_each_nested'
|
||||
- 'nla_for_each_nested_type'
|
||||
- 'nlmsg_for_each_attr'
|
||||
- 'nlmsg_for_each_msg'
|
||||
- 'nr_neigh_for_each'
|
||||
@ -579,6 +597,7 @@ ForEachMacros:
|
||||
- 'perf_config_sections__for_each_entry'
|
||||
- 'perf_config_set__for_each_entry'
|
||||
- 'perf_cpu_map__for_each_cpu'
|
||||
- 'perf_cpu_map__for_each_cpu_skip_any'
|
||||
- 'perf_cpu_map__for_each_idx'
|
||||
- 'perf_evlist__for_each_entry'
|
||||
- 'perf_evlist__for_each_entry_reverse'
|
||||
@ -639,7 +658,6 @@ ForEachMacros:
|
||||
- 'shost_for_each_device'
|
||||
- 'sk_for_each'
|
||||
- 'sk_for_each_bound'
|
||||
- 'sk_for_each_bound_bhash2'
|
||||
- 'sk_for_each_entry_offset_rcu'
|
||||
- 'sk_for_each_from'
|
||||
- 'sk_for_each_rcu'
|
||||
@ -653,6 +671,7 @@ ForEachMacros:
|
||||
- 'snd_soc_dapm_widget_for_each_path_safe'
|
||||
- 'snd_soc_dapm_widget_for_each_sink_path'
|
||||
- 'snd_soc_dapm_widget_for_each_source_path'
|
||||
- 'sparsebit_for_each_set_range'
|
||||
- 'strlist__for_each_entry'
|
||||
- 'strlist__for_each_entry_safe'
|
||||
- 'sym_for_each_insn'
|
||||
@ -662,7 +681,6 @@ ForEachMacros:
|
||||
- 'tcf_act_for_each_action'
|
||||
- 'tcf_exts_for_each_action'
|
||||
- 'ttm_resource_manager_for_each_res'
|
||||
- 'twsk_for_each_bound_bhash2'
|
||||
- 'udp_portaddr_for_each_entry'
|
||||
- 'udp_portaddr_for_each_entry_rcu'
|
||||
- 'usb_hub_for_each_child'
|
||||
@ -686,6 +704,9 @@ ForEachMacros:
|
||||
- 'xbc_node_for_each_child'
|
||||
- 'xbc_node_for_each_key_value'
|
||||
- 'xbc_node_for_each_subkey'
|
||||
- 'ynl_attr_for_each'
|
||||
- 'ynl_attr_for_each_nested'
|
||||
- 'ynl_attr_for_each_payload'
|
||||
- 'zorro_for_each_dev'
|
||||
|
||||
IncludeBlocks: Preserve
|
||||
|
@ -3,3 +3,4 @@ Alan Cox <root@hraefn.swansea.linux.org.uk>
|
||||
Christoph Hellwig <hch@lst.de>
|
||||
Jeff Kirsher <jeffrey.t.kirsher@intel.com>
|
||||
Marc Gonzalez <marc.w.gonzalez@free.fr>
|
||||
Ralf Baechle <ralf@linux-mips.org>
|
||||
|
3
.gitignore
vendored
@ -47,7 +47,6 @@
|
||||
*.so.dbg
|
||||
*.su
|
||||
*.symtypes
|
||||
*.symversions
|
||||
*.tab.[ch]
|
||||
*.tar
|
||||
*.xz
|
||||
@ -71,6 +70,7 @@ modules.order
|
||||
/Module.markers
|
||||
/modules.builtin
|
||||
/modules.builtin.modinfo
|
||||
/modules.builtin.ranges
|
||||
/modules.nsdeps
|
||||
|
||||
#
|
||||
@ -143,7 +143,6 @@ GTAGS
|
||||
# id-utils files
|
||||
ID
|
||||
|
||||
*.orig
|
||||
*~
|
||||
\#*#
|
||||
|
||||
|
26
.mailmap
@ -73,6 +73,8 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <aryabinin@virtuozzo.com>
|
||||
Andrzej Hajda <andrzej.hajda@intel.com> <a.hajda@samsung.com>
|
||||
André Almeida <andrealmeid@igalia.com> <andrealmeid@collabora.com>
|
||||
Andy Adamson <andros@citi.umich.edu>
|
||||
Andy Chiu <andybnac@gmail.com> <andy.chiu@sifive.com>
|
||||
Andy Chiu <andybnac@gmail.com> <taochiu@synology.com>
|
||||
Andy Shevchenko <andy@kernel.org> <andy@smile.org.ua>
|
||||
Andy Shevchenko <andy@kernel.org> <ext-andriy.shevchenko@nokia.com>
|
||||
Anilkumar Kolli <quic_akolli@quicinc.com> <akolli@codeaurora.org>
|
||||
@ -154,6 +156,9 @@ Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
|
||||
Christian Marangi <ansuelsmth@gmail.com>
|
||||
Christophe Ricard <christophe.ricard@gmail.com>
|
||||
Christoph Hellwig <hch@lst.de>
|
||||
Chuck Lever <chuck.lever@oracle.com> <cel@kernel.org>
|
||||
Chuck Lever <chuck.lever@oracle.com> <cel@netapp.com>
|
||||
Chuck Lever <chuck.lever@oracle.com> <cel@citi.umich.edu>
|
||||
Claudiu Beznea <claudiu.beznea@tuxon.dev> <claudiu.beznea@microchip.com>
|
||||
Colin Ian King <colin.i.king@gmail.com> <colin.king@canonical.com>
|
||||
Corey Minyard <minyard@acm.org>
|
||||
@ -194,18 +199,23 @@ Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
|
||||
Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
|
||||
Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
|
||||
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
|
||||
Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
|
||||
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@microchip.com>
|
||||
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@collabora.com>
|
||||
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
|
||||
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
|
||||
Faith Ekstrand <faith.ekstrand@collabora.com> <jason@jlekstrand.net>
|
||||
Faith Ekstrand <faith.ekstrand@collabora.com> <jason.ekstrand@intel.com>
|
||||
Faith Ekstrand <faith.ekstrand@collabora.com> <jason.ekstrand@collabora.com>
|
||||
Fangrui Song <i@maskray.me> <maskray@google.com>
|
||||
Felipe W Damasio <felipewd@terra.com.br>
|
||||
Felix Kuhling <fxkuehl@gmx.de>
|
||||
Felix Moeller <felix@derklecks.de>
|
||||
Fenglin Wu <quic_fenglinw@quicinc.com> <fenglinw@codeaurora.org>
|
||||
Filipe Lautert <filipe@icewall.org>
|
||||
Finn Thain <fthain@linux-m68k.org> <fthain@telegraphics.com.au>
|
||||
Fiona Behrens <me@kloenk.dev>
|
||||
Fiona Behrens <me@kloenk.dev> <me@kloenk.de>
|
||||
Fiona Behrens <me@kloenk.dev> <fin@nyantec.com>
|
||||
Franck Bui-Huu <vagabon.xyz@gmail.com>
|
||||
Frank Rowand <frowand.list@gmail.com> <frank.rowand@am.sony.com>
|
||||
Frank Rowand <frowand.list@gmail.com> <frank.rowand@sony.com>
|
||||
@ -273,7 +283,7 @@ Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
|
||||
Jan Kuliga <jtkuliga.kdev@gmail.com> <jankul@alatek.krakow.pl>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@tuni.fi>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@parity.io>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
|
||||
@ -297,6 +307,11 @@ Jens Axboe <axboe@kernel.dk> <axboe@fb.com>
|
||||
Jens Axboe <axboe@kernel.dk> <axboe@meta.com>
|
||||
Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
|
||||
Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
|
||||
Jesper Dangaard Brouer <hawk@kernel.org> <brouer@redhat.com>
|
||||
Jesper Dangaard Brouer <hawk@kernel.org> <hawk@comx.dk>
|
||||
Jesper Dangaard Brouer <hawk@kernel.org> <jbrouer@redhat.com>
|
||||
Jesper Dangaard Brouer <hawk@kernel.org> <jdb@comx.dk>
|
||||
Jesper Dangaard Brouer <hawk@kernel.org> <netoptimizer@brouer.com>
|
||||
Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org>
|
||||
Jilai Wang <quic_jilaiw@quicinc.com> <jilaiw@codeaurora.org>
|
||||
Jiri Kosina <jikos@kernel.org> <jikos@jikos.cz>
|
||||
@ -313,6 +328,7 @@ Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
|
||||
Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
|
||||
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
|
||||
Jishnu Prakash <quic_jprakash@quicinc.com> <jprakash@codeaurora.org>
|
||||
Joel Granados <joel.granados@kernel.org> <j.granados@samsung.com>
|
||||
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
|
||||
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
|
||||
John Crispin <john@phrozen.org> <blogic@openwrt.org>
|
||||
@ -613,6 +629,10 @@ Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
|
||||
Sibi Sankar <quic_sibis@quicinc.com> <sibis@codeaurora.org>
|
||||
Sid Manning <quic_sidneym@quicinc.com> <sidneym@codeaurora.org>
|
||||
Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu>
|
||||
Simona Vetter <simona.vetter@ffwll.ch> <daniel.vetter@ffwll.ch>
|
||||
Simona Vetter <simona.vetter@ffwll.ch> <daniel.vetter@intel.com>
|
||||
Simona Vetter <simona.vetter@ffwll.ch> <daniel@ffwll.ch>
|
||||
Simona Vetter <simona.vetter@ffwll.ch> <daniel@biene.ffwll.ch>
|
||||
Simon Horman <horms@kernel.org> <simon.horman@corigine.com>
|
||||
Simon Horman <horms@kernel.org> <simon.horman@netronome.com>
|
||||
Simon Kelley <simon@thekelleys.org.uk>
|
||||
@ -645,6 +665,7 @@ Tomeu Vizoso <tomeu@tomeuvizoso.net> <tomeu.vizoso@collabora.com>
|
||||
Thomas Graf <tgraf@suug.ch>
|
||||
Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu>
|
||||
Thomas Pedersen <twp@codeaurora.org>
|
||||
Thorsten Blum <thorsten.blum@linux.dev> <thorsten.blum@toblux.com>
|
||||
Tiezhu Yang <yangtiezhu@loongson.cn> <kernelpatch@126.com>
|
||||
Tingwei Zhang <quic_tingwei@quicinc.com> <tingwei@codeaurora.org>
|
||||
Tirupathi Reddy <quic_tirupath@quicinc.com> <tirupath@codeaurora.org>
|
||||
@ -709,6 +730,7 @@ Will Deacon <will@kernel.org> <will.deacon@arm.com>
|
||||
Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de>
|
||||
Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
|
||||
Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
|
||||
Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
|
||||
Yusuke Goda <goda.yusuke@renesas.com>
|
||||
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
|
||||
Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
|
||||
|
73
CREDITS
@ -185,6 +185,11 @@ P: 1024/AF7B30C1 CF 97 C2 CC 6D AE A7 FE C8 BA 9C FC 88 DE 32 C3
|
||||
D: Linux/MIPS port
|
||||
D: Linux/68k hacker
|
||||
D: AX25 maintainer
|
||||
D: EDAC-CAVIUM OCTEON maintainer
|
||||
D: IOC3 ETHERNET DRIVER maintainer
|
||||
D: NETROM NETWORK LAYER maintainer
|
||||
D: ROSE NETWORK LAYER maintainer
|
||||
D: TURBOCHANNEL SUBSYSTEM maintainer
|
||||
S: Hauptstrasse 19
|
||||
S: 79837 St. Blasien
|
||||
S: Germany
|
||||
@ -378,6 +383,9 @@ S: 1549 Hiironen Rd.
|
||||
S: Brimson, MN 55602
|
||||
S: USA
|
||||
|
||||
N: Arnd Bergmann
|
||||
D: Maintainer of Cell Broadband Engine Architecture
|
||||
|
||||
N: Hennus Bergman
|
||||
P: 1024/77D50909 76 99 FD 31 91 E1 96 1C 90 BB 22 80 62 F6 BD 63
|
||||
D: Author and maintainer of the QIC-02 tape driver
|
||||
@ -1201,6 +1209,10 @@ S: Dreisbachstrasse 24
|
||||
S: D-57250 Netphen
|
||||
S: Germany
|
||||
|
||||
N: Florian Fainelli
|
||||
E: f.fainelli@gmail.com
|
||||
D: DSA
|
||||
|
||||
N: Rik Faith
|
||||
E: faith@acm.org
|
||||
D: Future Domain TMC-16x0 SCSI driver (author)
|
||||
@ -1355,10 +1367,6 @@ D: Major kbuild rework during the 2.5 cycle
|
||||
D: ISDN Maintainer
|
||||
S: USA
|
||||
|
||||
N: Gerrit Renker
|
||||
E: gerrit@erg.abdn.ac.uk
|
||||
D: DCCP protocol support.
|
||||
|
||||
N: Philip Gladstone
|
||||
E: philip@gladstonefamily.net
|
||||
D: Kernel / timekeeping stuff
|
||||
@ -1674,11 +1682,6 @@ W: http://www.carumba.com/
|
||||
D: bug toaster (A1 sauce makes all the difference)
|
||||
D: Random linux hacker
|
||||
|
||||
N: James Hogan
|
||||
E: jhogan@kernel.org
|
||||
D: Metag architecture maintainer
|
||||
D: TZ1090 SoC maintainer
|
||||
|
||||
N: Tim Hockin
|
||||
E: thockin@hockin.org
|
||||
W: http://www.hockin.org/~thockin
|
||||
@ -1694,6 +1697,11 @@ D: hwmon subsystem maintainer
|
||||
D: i2c-sis96x and i2c-stub SMBus drivers
|
||||
S: USA
|
||||
|
||||
N: James Hogan
|
||||
E: jhogan@kernel.org
|
||||
D: Metag architecture maintainer
|
||||
D: TZ1090 SoC maintainer
|
||||
|
||||
N: Dirk Hohndel
|
||||
E: hohndel@suse.de
|
||||
D: The XFree86[tm] Project
|
||||
@ -1869,6 +1877,13 @@ S: K osmidomkum 723
|
||||
S: 160 00 Praha 6
|
||||
S: Czech Republic
|
||||
|
||||
N: Seth Jennings
|
||||
E: sjenning@redhat.com
|
||||
D: Creation and maintenance of zswap
|
||||
|
||||
N: Jeremy Kerr
|
||||
D: Maintainer of SPU File System
|
||||
|
||||
N: Michael Kerrisk
|
||||
E: mtk.manpages@gmail.com
|
||||
W: https://man7.org/
|
||||
@ -2182,19 +2197,6 @@ N: Mike Kravetz
|
||||
E: mike.kravetz@oracle.com
|
||||
D: Maintenance and development of the hugetlb subsystem
|
||||
|
||||
N: Seth Jennings
|
||||
E: sjenning@redhat.com
|
||||
D: Creation and maintenance of zswap
|
||||
|
||||
N: Dan Streetman
|
||||
E: ddstreet@ieee.org
|
||||
D: Maintenance and development of zswap
|
||||
D: Creation and maintenance of the zpool API
|
||||
|
||||
N: Vitaly Wool
|
||||
E: vitaly.wool@konsulko.com
|
||||
D: Maintenance and development of zswap
|
||||
|
||||
N: Andreas S. Krebs
|
||||
E: akrebs@altavista.net
|
||||
D: CYPRESS CY82C693 chipset IDE, Digital's PC-Alpha 164SX boards
|
||||
@ -3185,6 +3187,11 @@ N: Ken Pizzini
|
||||
E: ken@halcyon.com
|
||||
D: CDROM driver "sonycd535" (Sony CDU-535/531)
|
||||
|
||||
N: Mathieu Poirier
|
||||
E: mathieu.poirier@linaro.org
|
||||
D: CoreSight kernel subsystem, Maintainer 2014-2022
|
||||
D: Perf tool support for CoreSight
|
||||
|
||||
N: Stelian Pop
|
||||
E: stelian@popies.net
|
||||
P: 1024D/EDBB6147 7B36 0E07 04BC 11DC A7A0 D3F7 7185 9E7A EDBB 6147
|
||||
@ -3294,6 +3301,10 @@ S: Schlossbergring 9
|
||||
S: 79098 Freiburg
|
||||
S: Germany
|
||||
|
||||
N: Gerrit Renker
|
||||
E: gerrit@erg.abdn.ac.uk
|
||||
D: DCCP protocol support.
|
||||
|
||||
N: Thomas Renninger
|
||||
E: trenn@suse.de
|
||||
D: cpupowerutils
|
||||
@ -3570,11 +3581,6 @@ D: several improvements to system programs
|
||||
S: Oldenburg
|
||||
S: Germany
|
||||
|
||||
N: Mathieu Poirier
|
||||
E: mathieu.poirier@linaro.org
|
||||
D: CoreSight kernel subsystem, Maintainer 2014-2022
|
||||
D: Perf tool support for CoreSight
|
||||
|
||||
N: Robert Schwebel
|
||||
E: robert@schwebel.de
|
||||
W: https://www.schwebel.de
|
||||
@ -3765,6 +3771,11 @@ S: Chr. Winthersvej 1 B, st.th.
|
||||
S: DK-1860 Frederiksberg C
|
||||
S: Denmark
|
||||
|
||||
N: Dan Streetman
|
||||
E: ddstreet@ieee.org
|
||||
D: Maintenance and development of zswap
|
||||
D: Creation and maintenance of the zpool API
|
||||
|
||||
N: Drew Sullivan
|
||||
E: drew@ss.org
|
||||
W: http://www.ss.org/
|
||||
@ -3789,6 +3800,10 @@ S: Department of Zoology, University of Washington
|
||||
S: Seattle, WA 98195-1800
|
||||
S: USA
|
||||
|
||||
N: York Sun
|
||||
E: york.sun@nxp.com
|
||||
D: Freescale DDR EDAC
|
||||
|
||||
N: Eugene Surovegin
|
||||
E: ebs@ebshome.net
|
||||
W: https://kernel.ebshome.net/
|
||||
@ -4280,6 +4295,10 @@ S: Pipers Way
|
||||
S: Swindon. SN3 1RJ
|
||||
S: England
|
||||
|
||||
N: Vitaly Wool
|
||||
E: vitaly.wool@konsulko.com
|
||||
D: Maintenance and development of zswap
|
||||
|
||||
N: Chris Wright
|
||||
E: chrisw@sous-sol.org
|
||||
D: hacking on LSM framework and security modules.
|
||||
|
12
Documentation/ABI/obsolete/sysfs-selinux-user
Normal file
@ -0,0 +1,12 @@
|
||||
What: /sys/fs/selinux/user
|
||||
Date: April 2005 (predates git)
|
||||
KernelVersion: 2.6.12-rc2 (predates git)
|
||||
Contact: selinux@vger.kernel.org
|
||||
Description:
|
||||
|
||||
The selinuxfs "user" node allows userspace to request a list
|
||||
of security contexts that can be reached for a given SELinux
|
||||
user from a given starting context. This was used by libselinux
|
||||
when various login-style programs requested contexts for
|
||||
users, but libselinux stopped using it in 2020.
|
||||
Kernel support will be removed no sooner than Dec 2025.
|
@ -424,6 +424,13 @@ Description:
|
||||
[RW] This file is used to control (on/off) the iostats
|
||||
accounting of the disk.
|
||||
|
||||
What: /sys/block/<disk>/queue/iostats_passthrough
|
||||
Date: October 2024
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This file is used to control (on/off) the iostats
|
||||
accounting of the disk for passthrough commands.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/logical_block_size
|
||||
Date: May 2009
|
||||
@ -594,6 +601,9 @@ Description:
|
||||
[RW] Maximum number of kilobytes to read-ahead for filesystems
|
||||
on this block device.
|
||||
|
||||
For MADV_HUGEPAGE, the readahead size may exceed this setting
|
||||
since its granularity is based on the hugepage size.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/rotational
|
||||
Date: January 2009
|
||||
|
@ -11,7 +11,7 @@ Description:
|
||||
Read returns '0' or '1' for read-write or read-only modes
|
||||
respectively.
|
||||
Write parses one of 'YyTt1NnFf0', or [oO][NnFf] for "on"
|
||||
and "off", i.e. what kstrbool() supports.
|
||||
and "off", i.e. what kstrtobool() supports.
|
||||
Note: This file is only present if CONFIG_NVMEM_SYSFS
|
||||
is enabled.
|
||||
|
||||
|
@ -9,9 +9,11 @@ maps an ELF DSO into that program's address space. This DSO is called
|
||||
the vDSO and it often contains useful and highly-optimized alternatives
|
||||
to real syscalls.
|
||||
|
||||
These functions are called just like ordinary C function according to
|
||||
your platform's ABI. Call them from a sensible context. (For example,
|
||||
if you set CS on x86 to something strange, the vDSO functions are
|
||||
These functions are called according to your platform's ABI. On many
|
||||
platforms they are called just like ordinary C function. On other platforms
|
||||
(ex: powerpc) they are called with the same convention as system calls which
|
||||
is different from ordinary C functions. Call them from a sensible context.
|
||||
(For example, if you set CS on x86 to something strange, the vDSO functions are
|
||||
within their rights to crash.) In addition, if you pass a bad
|
||||
pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT.
|
||||
|
||||
|
@ -6,3 +6,10 @@ Description:
|
||||
This item contains just one readonly attribute: port_num.
|
||||
It contains the port number of the /dev/ttyGS<n> device
|
||||
associated with acm function's instance "name".
|
||||
|
||||
What: /config/usb-gadget/gadget/functions/acm.name/protocol
|
||||
Date: Aug 2024
|
||||
KernelVersion: 6.13
|
||||
Description:
|
||||
Reported bInterfaceProtocol for the ACM device. For legacy
|
||||
reasons, this defaults to 1 (USB_CDC_ACM_PROTO_AT_V25TER).
|
||||
|
@ -30,4 +30,12 @@ Description:
|
||||
req_number the number of pre-allocated requests
|
||||
for both capture and playback
|
||||
function_name name of the interface
|
||||
p_it_name playback input terminal name
|
||||
p_it_ch_name playback channels name
|
||||
p_ot_name playback output terminal name
|
||||
p_fu_vol_name playback mute/volume functional unit name
|
||||
c_it_name capture input terminal name
|
||||
c_it_ch_name capture channels name
|
||||
c_ot_name capture output terminal name
|
||||
c_fu_vol_name capture mute/volume functional unit name
|
||||
===================== =======================================
|
||||
|
@ -35,6 +35,17 @@ Description:
|
||||
req_number the number of pre-allocated requests
|
||||
for both capture and playback
|
||||
function_name name of the interface
|
||||
if_ctrl_name topology control name
|
||||
clksrc_in_name input clock name
|
||||
clksrc_out_name output clock name
|
||||
p_it_name playback input terminal name
|
||||
p_it_ch_name playback input first channel name
|
||||
p_ot_name playback output terminal name
|
||||
p_fu_vol_name playback mute/volume function unit name
|
||||
c_it_name capture input terminal name
|
||||
c_it_ch_name capture input first channel name
|
||||
c_ot_name capture output terminal name
|
||||
c_fu_vol_name capture mute/volume functional unit name
|
||||
c_terminal_type code of the capture terminal type
|
||||
p_terminal_type code of the playback terminal type
|
||||
===================== =======================================
|
||||
|
@ -184,3 +184,10 @@ Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of time out requests.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/cap_regs
|
||||
Date: Oct 2024
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the values of the qm and hpre capability bit registers and
|
||||
support the query of device specifications to facilitate fault locating.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
@ -157,3 +157,10 @@ Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of completed but marked error requests
|
||||
to be received.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/cap_regs
|
||||
Date: Oct 2024
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the values of the qm and sec capability bit registers and
|
||||
support the query of device specifications to facilitate fault locating.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
@ -158,3 +158,10 @@ Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of BD type error requests
|
||||
to be received.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/cap_regs
|
||||
Date: Oct 2024
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the values of the qm and zip capability bit registers and
|
||||
support the query of device specifications to facilitate fault locating.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
39
Documentation/ABI/testing/debugfs-iio-ad9467
Normal file
@ -0,0 +1,39 @@
|
||||
What: /sys/kernel/debug/iio/iio:deviceX/calibration_table_dump
|
||||
KernelVersion: 6.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This dumps the calibration table that was filled during the
|
||||
digital interface tuning process.
|
||||
|
||||
What: /sys/kernel/debug/iio/iio:deviceX/in_voltage_test_mode_available
|
||||
KernelVersion: 6.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
List all the available test tones:
|
||||
- off
|
||||
- midscale_short
|
||||
- pos_fullscale
|
||||
- neg_fullscale
|
||||
- checkerboard
|
||||
- prbs23
|
||||
- prbs9
|
||||
- one_zero_toggle
|
||||
- user
|
||||
- bit_toggle
|
||||
- sync
|
||||
- one_bit_high
|
||||
- mixed_bit_frequency
|
||||
- ramp
|
||||
|
||||
Note that depending on the actual device being used, some of the
|
||||
above might not be available (and they won't be listed when
|
||||
reading the file).
|
||||
|
||||
What: /sys/kernel/debug/iio/iio:deviceX/in_voltageY_test_mode
|
||||
KernelVersion: 6.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Writing to this file will initiate one of available test tone on
|
||||
channel Y. Reading it, shows which test is running. In cases
|
||||
where an IIO backend is available and supports the test tone,
|
||||
additional information about the data correctness is given.
|
20
Documentation/ABI/testing/debugfs-iio-backend
Normal file
@ -0,0 +1,20 @@
|
||||
What: /sys/kernel/debug/iio/iio:deviceX/backendY/name
|
||||
KernelVersion: 6.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Name of Backend Y connected to device X.
|
||||
|
||||
What: /sys/kernel/debug/iio/iio:deviceX/backendY/direct_reg_access
|
||||
KernelVersion: 6.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Directly access the registers of backend Y. Typical usage is:
|
||||
|
||||
Reading address 0x50
|
||||
echo 0x50 > direct_reg_access
|
||||
cat direct_reg_access
|
||||
|
||||
Writing address 0x50
|
||||
echo 0x50 0x3 > direct_reg_access
|
||||
//readback address 0x50
|
||||
cat direct_reg_access
|
@ -151,3 +151,10 @@ Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
|
||||
Description:
|
||||
The recompress file is write-only and triggers re-compression
|
||||
with secondary compression algorithms.
|
||||
|
||||
What: /sys/block/zram<id>/algorithm_params
|
||||
Date: August 2024
|
||||
Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
|
||||
Description:
|
||||
The algorithm_params file is write-only and is used to setup
|
||||
compression algorithm parameters.
|
||||
|
@ -523,13 +523,27 @@ Description:
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_i_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_q_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias
|
||||
KernelVersion: 2.6.35
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -541,6 +555,10 @@ Description:
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_calibbias_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_calibbias_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias_available
|
||||
KernelVersion: 5.8
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -549,25 +567,34 @@ Description:
|
||||
- a small discrete set of values like "0 2 4 6 8"
|
||||
- a range specified as "[min step max]"
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_both_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_ir_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibscale
|
||||
KernelVersion: 2.6.35
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -575,6 +602,20 @@ Description:
|
||||
production inaccuracies). If shared across all channels,
|
||||
<type>_calibscale is used.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminanceY_calibscale_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibscale_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximityY_calibscale_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale_available
|
||||
KernelVersion: 4.8
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Available values of calibscale. Maybe expressed as either of:
|
||||
|
||||
- a small discrete set of values like "1 8 16"
|
||||
- a range specified as "[min step max]"
|
||||
|
||||
If shared across all channels, <type>_calibscale_available is used.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_activity_calibgender
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_energy_calibgender
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_distance_calibgender
|
||||
@ -708,6 +749,7 @@ Description:
|
||||
2.5kohm_to_gnd: connected to ground via a 2.5kOhm resistor,
|
||||
6kohm_to_gnd: connected to ground via a 6kOhm resistor,
|
||||
20kohm_to_gnd: connected to ground via a 20kOhm resistor,
|
||||
42kohm_to_gnd: connected to ground via a 42kOhm resistor,
|
||||
90kohm_to_gnd: connected to ground via a 90kOhm resistor,
|
||||
100kohm_to_gnd: connected to ground via an 100kOhm resistor,
|
||||
125kohm_to_gnd: connected to ground via an 125kOhm resistor,
|
||||
@ -2289,3 +2331,11 @@ KernelVersion: 6.7
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
List of available timeout value for tap gesture confirmation.
|
||||
|
||||
What: /sys/.../iio:deviceX/in_shunt_resistor
|
||||
What: /sys/.../iio:deviceX/in_current_shunt_resistor
|
||||
What: /sys/.../iio:deviceX/in_power_shunt_resistor
|
||||
KernelVersion: 6.10
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
The value of current sense resistor in Ohms.
|
||||
|
@ -1,17 +0,0 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_power_shunt_resistor
|
||||
Date: March 2017
|
||||
KernelVersion: 4.12
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description: The value of the shunt resistor used to compute power drain on
|
||||
common input voltage pin (RS+). In Ohms.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_current_shunt_resistor
|
||||
Date: March 2017
|
||||
KernelVersion: 4.12
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description: The value of the shunt resistor used to compute current flowing
|
||||
between RS+ and RS- voltage sense inputs. In Ohms.
|
||||
|
||||
These attributes describe a single physical component, exposed as two distinct
|
||||
attributes as it is used to calculate two different values: power load and
|
||||
current flowing between RS+ and RS- inputs.
|
@ -15,17 +15,3 @@ Description:
|
||||
Set the relative humidity. This value is sent to the sensor for
|
||||
humidity compensation.
|
||||
Default value: 50000 (50 % relative humidity)
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
|
||||
Date: August 2021
|
||||
KernelVersion: 5.15
|
||||
Contact: Andreas Klinger <ak@it-klinger.de>
|
||||
Description:
|
||||
Set the bias value for the resistance which is used for
|
||||
calculation of in_concentration_input as follows:
|
||||
|
||||
x = (in_resistance_raw - in_resistance_calibbias) * 0.65
|
||||
|
||||
in_concentration_input = 500 / (1 + e^x)
|
||||
|
||||
Default value: 30000
|
||||
|
61
Documentation/ABI/testing/sysfs-bus-iio-dac
Normal file
@ -0,0 +1,61 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_toggle_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Toggle enable. Write 1 to enable toggle or 0 to disable it. This
|
||||
is useful when one wants to change the DAC output codes. For
|
||||
autonomous toggling, the way it should be done is:
|
||||
|
||||
- disable toggle operation;
|
||||
- change out_currentY_rawN, where N is the integer value of the symbol;
|
||||
- enable toggle operation.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_rawN
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This attribute has the same meaning as out_currentY_raw. It is
|
||||
specific to toggle enabled channels and refers to the DAC output
|
||||
code in INPUT_N (_rawN), where N is the integer value of the symbol.
|
||||
The same scale and offset as in out_currentY_raw applies.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_symbol
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Performs a SW switch to a predefined output symbol. This attribute
|
||||
is specific to toggle enabled channels and allows switching between
|
||||
multiple predefined symbols. Each symbol corresponds to a different
|
||||
output, denoted as out_currentY_rawN, where N is the integer value
|
||||
of the symbol. Writing an integer value N will select out_currentY_rawN.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Toggle enable. Write 1 to enable toggle or 0 to disable it. This
|
||||
is useful when one wants to change the DAC output codes. For
|
||||
autonomous toggling, the way it should be done is:
|
||||
|
||||
- disable toggle operation;
|
||||
- change out_voltageY_rawN, where N is the integer value of the symbol;
|
||||
- enable toggle operation.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_rawN
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This attribute has the same meaning as out_currentY_raw. It is
|
||||
specific to toggle enabled channels and refers to the DAC output
|
||||
code in INPUT_N (_rawN), where N is the integer value of the symbol.
|
||||
The same scale and offset as in out_currentY_raw applies.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Performs a SW switch to a predefined output symbol. This attribute
|
||||
is specific to toggle enabled channels and allows switching between
|
||||
multiple predefined symbols. Each symbol corresponds to a different
|
||||
output, denoted as out_voltageY_rawN, where N is the integer value
|
||||
of the symbol. Writing an integer value N will select out_voltageY_rawN.
|
@ -53,34 +53,3 @@ KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Returns the available values for the dither phase.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Toggle enable. Write 1 to enable toggle or 0 to disable it. This is
|
||||
useful when one wants to change the DAC output codes. The way it should
|
||||
be done is:
|
||||
|
||||
- disable toggle operation;
|
||||
- change out_voltageY_raw0 and out_voltageY_raw1;
|
||||
- enable toggle operation.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw0
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw1
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
It has the same meaning as out_voltageY_raw. This attribute is
|
||||
specific to toggle enabled channels and refers to the DAC output
|
||||
code in INPUT_A (_raw0) and INPUT_B (_raw1). The same scale and offset
|
||||
as in out_voltageY_raw applies.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Performs a SW toggle. This attribute is specific to toggle
|
||||
enabled channels and allows to toggle between out_voltageY_raw0
|
||||
and out_voltageY_raw1 through software. Writing 0 will select
|
||||
out_voltageY_raw0 while 1 selects out_voltageY_raw1.
|
||||
|
@ -3,7 +3,7 @@ KernelVersion:
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Reading this returns the valid values that can be written to the
|
||||
on_altvoltage0_mode attribute:
|
||||
filter_mode attribute:
|
||||
|
||||
- auto -> Adjust bandpass filter to track changes in input clock rate.
|
||||
- manual -> disable/unregister the clock rate notifier / input clock tracking.
|
||||
|
@ -13,12 +13,3 @@ Description:
|
||||
available for reading data. However, samples can be occasionally skipped
|
||||
or repeated, depending on the beat between the capture and conversion
|
||||
rates.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_shunt_resistor
|
||||
Date: December 2015
|
||||
KernelVersion: 4.4
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
The value of the shunt resistor may be known only at runtime fom an
|
||||
eeprom content read by a client application. This attribute allows to
|
||||
set its value in ohms.
|
||||
|
@ -500,3 +500,75 @@ Description:
|
||||
console drivers from the device. Raw users of pci-sysfs
|
||||
resourceN attributes must be terminated prior to resizing.
|
||||
Success of the resizing operation is not guaranteed.
|
||||
|
||||
What: /sys/bus/pci/devices/.../leds/*:enclosure:*/brightness
|
||||
What: /sys/class/leds/*:enclosure:*/brightness
|
||||
Date: August 2024
|
||||
KernelVersion: 6.12
|
||||
Description:
|
||||
LED indications on PCIe storage enclosures which are controlled
|
||||
through the NPEM interface (Native PCIe Enclosure Management,
|
||||
PCIe r6.1 sec 6.28) are accessible as led class devices, both
|
||||
below /sys/class/leds and below NPEM-capable PCI devices.
|
||||
|
||||
Although these led class devices could be manipulated manually,
|
||||
in practice they are typically manipulated automatically by an
|
||||
application such as ledmon(8).
|
||||
|
||||
The name of a led class device is as follows:
|
||||
<bdf>:enclosure:<indication>
|
||||
where:
|
||||
|
||||
- <bdf> is the domain, bus, device and function number
|
||||
(e.g. 10000:02:05.0)
|
||||
- <indication> is a short description of the LED indication
|
||||
|
||||
Valid indications per PCIe r6.1 table 6-27 are:
|
||||
|
||||
- ok (drive is functioning normally)
|
||||
- locate (drive is being identified by an admin)
|
||||
- fail (drive is not functioning properly)
|
||||
- rebuild (drive is part of an array that is rebuilding)
|
||||
- pfa (drive is predicted to fail soon)
|
||||
- hotspare (drive is marked to be used as a replacement)
|
||||
- ica (drive is part of an array that is degraded)
|
||||
- ifa (drive is part of an array that is failed)
|
||||
- idt (drive is not the right type for the connector)
|
||||
- disabled (drive is disabled, removal is safe)
|
||||
- specific0 to specific7 (enclosure-specific indications)
|
||||
|
||||
Broadly, the indications fall into one of these categories:
|
||||
|
||||
- to signify drive state (ok, locate, fail, idt, disabled)
|
||||
- to signify drive role or state in a software RAID array
|
||||
(rebuild, pfa, hotspare, ica, ifa)
|
||||
- to signify any other role or state (specific0 to specific7)
|
||||
|
||||
Mandatory indications per PCIe r6.1 sec 7.9.19.2 comprise:
|
||||
ok, locate, fail, rebuild. All others are optional.
|
||||
A led class device is only visible if the corresponding
|
||||
indication is supported by the device.
|
||||
|
||||
To manipulate the indications, write 0 (LED_OFF) or 1 (LED_ON)
|
||||
to the "brightness" file. Note that manipulating an indication
|
||||
may implicitly manipulate other indications at the vendor's
|
||||
discretion. E.g. when the user lights up the "ok" indication,
|
||||
the vendor may choose to automatically turn off the "fail"
|
||||
indication. The current state of an indication can be
|
||||
retrieved by reading its "brightness" file.
|
||||
|
||||
The PCIe Base Specification allows vendors leeway to choose
|
||||
different colors or blinking patterns for the indications,
|
||||
but they typically follow the IBPI standard. E.g. the "locate"
|
||||
indication is usually presented as one or two LEDs blinking at
|
||||
4 Hz frequency:
|
||||
https://en.wikipedia.org/wiki/International_Blinking_Pattern_Interpretation
|
||||
|
||||
PCI Firmware Specification r3.3 sec 4.7 defines a DSM interface
|
||||
to facilitate shared access by operating system and platform
|
||||
firmware to a device's NPEM registers. The kernel will use
|
||||
this DSM interface where available, instead of accessing NPEM
|
||||
registers directly. The DSM interface does not support the
|
||||
enclosure-specific indications "specific0" to "specific7",
|
||||
hence the corresponding led class devices are unavailable if
|
||||
the DSM interface is used.
|
||||
|
@ -377,17 +377,33 @@ What: /sys/class/power_supply/<supply_name>/charge_type
|
||||
Date: July 2009
|
||||
Contact: linux-pm@vger.kernel.org
|
||||
Description:
|
||||
Represents the type of charging currently being applied to the
|
||||
battery. "Trickle", "Fast", and "Standard" all mean different
|
||||
charging speeds. "Adaptive" means that the charger uses some
|
||||
algorithm to adjust the charge rate dynamically, without
|
||||
any user configuration required. "Custom" means that the charger
|
||||
uses the charge_control_* properties as configuration for some
|
||||
different algorithm. "Long Life" means the charger reduces its
|
||||
charging rate in order to prolong the battery health. "Bypass"
|
||||
means the charger bypasses the charging path around the
|
||||
integrated converter allowing for a "smart" wall adaptor to
|
||||
perform the power conversion externally.
|
||||
Select the charging algorithm to use for a battery.
|
||||
|
||||
Standard:
|
||||
Fully charge the battery at a moderate rate.
|
||||
Fast:
|
||||
Quickly charge the battery using fast-charge
|
||||
technology. This is typically harder on the battery
|
||||
than standard charging and may lower its lifespan.
|
||||
Trickle:
|
||||
Users who primarily operate the system while
|
||||
plugged into an external power source can extend
|
||||
battery life with this mode. Vendor tooling may
|
||||
call this "Primarily AC Use".
|
||||
Adaptive:
|
||||
Automatically optimize battery charge rate based
|
||||
on typical usage pattern.
|
||||
Custom:
|
||||
Use the charge_control_* properties to determine
|
||||
when to start and stop charging. Advanced users
|
||||
can use this to drastically extend battery life.
|
||||
Long Life:
|
||||
The charger reduces its charging rate in order to
|
||||
prolong the battery health.
|
||||
Bypass:
|
||||
The charger bypasses the charging path around the
|
||||
integrated converter allowing for a "smart" wall
|
||||
adaptor to perform the power conversion externally.
|
||||
|
||||
Access: Read, Write
|
||||
|
||||
@ -592,7 +608,12 @@ Description:
|
||||
the supply, for example it can show if USB-PD capable source
|
||||
is attached.
|
||||
|
||||
Access: Read-Only
|
||||
Access: For power-supplies which consume USB power such
|
||||
as battery charger chips, this indicates the type of
|
||||
the connected USB power source and is Read-Only.
|
||||
|
||||
For power-supplies which act as a USB power-source such as
|
||||
e.g. the UCS1002 USB Port Power Controller this is writable.
|
||||
|
||||
Valid values:
|
||||
"Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD",
|
||||
|
15
Documentation/ABI/testing/sysfs-class-tee
Normal file
@ -0,0 +1,15 @@
|
||||
What: /sys/class/tee/tee{,priv}X/rpmb_routing_model
|
||||
Date: May 2024
|
||||
KernelVersion: 6.10
|
||||
Contact: op-tee@lists.trustedfirmware.org
|
||||
Description:
|
||||
RPMB frames can be routed to the RPMB device via the
|
||||
user-space daemon tee-supplicant or the RPMB subsystem
|
||||
in the kernel. The value "user" means that the driver
|
||||
will route the RPMB frames via user space. Conversely,
|
||||
"kernel" means that the frames are routed via the RPMB
|
||||
subsystem without assistance from tee-supplicant. It
|
||||
should be assumed that RPMB frames are routed via user
|
||||
space if the variable is absent. The primary purpose
|
||||
of this variable is to let systemd know whether
|
||||
tee-supplicant is needed in the early boot with initramfs.
|
@ -115,6 +115,6 @@ What: /sys/devices/system/memory/crash_hotplug
|
||||
Date: Aug 2023
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description:
|
||||
(RO) indicates whether or not the kernel directly supports
|
||||
modifying the crash elfcorehdr for memory hot un/plug and/or
|
||||
on/offline changes.
|
||||
(RO) indicates whether or not the kernel updates relevant kexec
|
||||
segments on memory hot un/plug and/or on/offline events, avoiding the
|
||||
need to reload kdump kernel.
|
||||
|
@ -704,9 +704,9 @@ What: /sys/devices/system/cpu/crash_hotplug
|
||||
Date: Aug 2023
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description:
|
||||
(RO) indicates whether or not the kernel directly supports
|
||||
modifying the crash elfcorehdr for CPU hot un/plug and/or
|
||||
on/offline changes.
|
||||
(RO) indicates whether or not the kernel updates relevant kexec
|
||||
segments on memory hot un/plug and/or on/offline events, avoiding the
|
||||
need to reload kdump kernel.
|
||||
|
||||
What: /sys/devices/system/cpu/enabled
|
||||
Date: Nov 2022
|
||||
|
@ -75,3 +75,11 @@ Description: RO. Energy input of device or gt in microjoules.
|
||||
for the gt.
|
||||
|
||||
Only supported for particular Intel i915 graphics platforms.
|
||||
|
||||
What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/fan1_input
|
||||
Date: November 2024
|
||||
KernelVersion: 6.12
|
||||
Contact: intel-gfx@lists.freedesktop.org
|
||||
Description: RO. Fan speed of device in RPM.
|
||||
|
||||
Only supported for particular Intel i915 graphics platforms.
|
||||
|
@ -1532,3 +1532,30 @@ Contact: Bean Huo <beanhuo@micron.com>
|
||||
Description:
|
||||
rtc_update_ms indicates how often the host should synchronize or update the
|
||||
UFS RTC. If set to 0, this will disable UFS RTC periodic update.
|
||||
|
||||
What: /sys/devices/platform/.../ufshci_capabilities/version
|
||||
Date: August 2024
|
||||
Contact: Avri Altman <avri.altman@wdc.com>
|
||||
Description:
|
||||
Host Capabilities register group: UFS version register.
|
||||
Symbol - VER. This file shows the UFSHCD version.
|
||||
Example: Version 3.12 would be represented as 0000_0312h.
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/.../ufshci_capabilities/product_id
|
||||
Date: August 2024
|
||||
Contact: Avri Altman <avri.altman@wdc.com>
|
||||
Description:
|
||||
Host Capabilities register group: product ID register.
|
||||
Symbol - HCPID. This file shows the UFSHCD product id.
|
||||
The content of this register is vendor specific.
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/.../ufshci_capabilities/man_id
|
||||
Date: August 2024
|
||||
Contact: Avri Altman <avri.altman@wdc.com>
|
||||
Description:
|
||||
Host Capabilities register group: manufacturer ID register.
|
||||
Symbol - HCMID. This file shows the UFSHCD manufacturer id.
|
||||
The Manufacturer ID is defined by JEDEC in JEDEC-JEP106.
|
||||
The file is read only.
|
||||
|
@ -579,6 +579,12 @@ Description: When ATGC is on, it controls age threshold to bypass GCing young
|
||||
candidates whose age is not beyond the threshold, by default it was
|
||||
initialized as 604800 seconds (equals to 7 days).
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/atgc_enabled
|
||||
Date: Feb 2024
|
||||
Contact: "Jinbao Liu" <liujinbao1@xiaomi.com>
|
||||
Description: It represents whether ATGC is on or off. The value is 1 which
|
||||
indicates that ATGC is on, and 0 indicates that it is off.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_reclaimed_segments
|
||||
Date: July 2021
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
@ -763,3 +769,53 @@ Date: November 2023
|
||||
Contact: "Chao Yu" <chao@kernel.org>
|
||||
Description: It controls to enable/disable IO aware feature for background discard.
|
||||
By default, the value is 1 which indicates IO aware is on.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/blkzone_alloc_policy
|
||||
Date: July 2024
|
||||
Contact: "Yuanhong Liao" <liaoyuanhong@vivo.com>
|
||||
Description: The zone UFS we are currently using consists of two parts:
|
||||
conventional zones and sequential zones. It can be used to control which part
|
||||
to prioritize for writes, with a default value of 0.
|
||||
|
||||
======================== =========================================
|
||||
value description
|
||||
blkzone_alloc_policy = 0 Prioritize writing to sequential zones
|
||||
blkzone_alloc_policy = 1 Only allow writing to sequential zones
|
||||
blkzone_alloc_policy = 2 Prioritize writing to conventional zones
|
||||
======================== =========================================
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/migration_window_granularity
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: Controls migration window granularity of garbage collection on large
|
||||
section. it can control the scanning window granularity for GC migration
|
||||
in a unit of segment, while migration_granularity controls the number
|
||||
of segments which can be migrated at the same turn.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/reserved_segments
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: In order to fine tune GC behavior, we can control the number of
|
||||
reserved segments.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_no_zoned_gc_percent
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: If the percentage of free sections over total sections is above this
|
||||
number, F2FS do not garbage collection for zoned devices through the
|
||||
background GC thread. the default number is "60".
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_boost_zoned_gc_percent
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: If the percentage of free sections over total sections is under this
|
||||
number, F2FS boosts garbage collection for zoned devices through the
|
||||
background GC thread. the default number is "25".
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_valid_thresh_ratio
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: It controls the valid block ratio threshold not to trigger excessive GC
|
||||
for zoned deivces. The initial value of it is 95(%). F2FS will stop the
|
||||
background GC thread from intiating GC for sections having valid blocks
|
||||
exceeding the ratio.
|
||||
|
@ -52,7 +52,7 @@ driver generally needs to perform the following initialization:
|
||||
- Enable DMA/processing engines
|
||||
|
||||
When done using the device, and perhaps the module needs to be unloaded,
|
||||
the driver needs to take the follow steps:
|
||||
the driver needs to take the following steps:
|
||||
|
||||
- Disable the device from generating IRQs
|
||||
- Release the IRQ (free_irq())
|
||||
|
@ -921,10 +921,10 @@ This portion of the ``rcu_data`` structure is declared as follows:
|
||||
|
||||
::
|
||||
|
||||
1 int dynticks_snap;
|
||||
1 int watching_snap;
|
||||
2 unsigned long dynticks_fqs;
|
||||
|
||||
The ``->dynticks_snap`` field is used to take a snapshot of the
|
||||
The ``->watching_snap`` field is used to take a snapshot of the
|
||||
corresponding CPU's dyntick-idle state when forcing quiescent states,
|
||||
and is therefore accessed from other CPUs. Finally, the
|
||||
``->dynticks_fqs`` field is used to count the number of times this CPU
|
||||
@ -935,8 +935,8 @@ This portion of the rcu_data structure is declared as follows:
|
||||
|
||||
::
|
||||
|
||||
1 long dynticks_nesting;
|
||||
2 long dynticks_nmi_nesting;
|
||||
1 long nesting;
|
||||
2 long nmi_nesting;
|
||||
3 atomic_t dynticks;
|
||||
4 bool rcu_need_heavy_qs;
|
||||
5 bool rcu_urgent_qs;
|
||||
@ -945,14 +945,14 @@ These fields in the rcu_data structure maintain the per-CPU dyntick-idle
|
||||
state for the corresponding CPU. The fields may be accessed only from
|
||||
the corresponding CPU (and from tracing) unless otherwise stated.
|
||||
|
||||
The ``->dynticks_nesting`` field counts the nesting depth of process
|
||||
The ``->nesting`` field counts the nesting depth of process
|
||||
execution, so that in normal circumstances this counter has value zero
|
||||
or one. NMIs, irqs, and tracers are counted by the
|
||||
``->dynticks_nmi_nesting`` field. Because NMIs cannot be masked, changes
|
||||
``->nmi_nesting`` field. Because NMIs cannot be masked, changes
|
||||
to this variable have to be undertaken carefully using an algorithm
|
||||
provided by Andy Lutomirski. The initial transition from idle adds one,
|
||||
and nested transitions add two, so that a nesting level of five is
|
||||
represented by a ``->dynticks_nmi_nesting`` value of nine. This counter
|
||||
represented by a ``->nmi_nesting`` value of nine. This counter
|
||||
can therefore be thought of as counting the number of reasons why this
|
||||
CPU cannot be permitted to enter dyntick-idle mode, aside from
|
||||
process-level transitions.
|
||||
@ -960,12 +960,12 @@ process-level transitions.
|
||||
However, it turns out that when running in non-idle kernel context, the
|
||||
Linux kernel is fully capable of entering interrupt handlers that never
|
||||
exit and perhaps also vice versa. Therefore, whenever the
|
||||
``->dynticks_nesting`` field is incremented up from zero, the
|
||||
``->dynticks_nmi_nesting`` field is set to a large positive number, and
|
||||
whenever the ``->dynticks_nesting`` field is decremented down to zero,
|
||||
the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that
|
||||
``->nesting`` field is incremented up from zero, the
|
||||
``->nmi_nesting`` field is set to a large positive number, and
|
||||
whenever the ``->nesting`` field is decremented down to zero,
|
||||
the ``->nmi_nesting`` field is set to zero. Assuming that
|
||||
the number of misnested interrupts is not sufficient to overflow the
|
||||
counter, this approach corrects the ``->dynticks_nmi_nesting`` field
|
||||
counter, this approach corrects the ``->nmi_nesting`` field
|
||||
every time the corresponding CPU enters the idle loop from process
|
||||
context.
|
||||
|
||||
@ -992,8 +992,8 @@ code.
|
||||
+-----------------------------------------------------------------------+
|
||||
| **Quick Quiz**: |
|
||||
+-----------------------------------------------------------------------+
|
||||
| Why not simply combine the ``->dynticks_nesting`` and |
|
||||
| ``->dynticks_nmi_nesting`` counters into a single counter that just |
|
||||
| Why not simply combine the ``->nesting`` and |
|
||||
| ``->nmi_nesting`` counters into a single counter that just |
|
||||
| counts the number of reasons that the corresponding CPU is non-idle? |
|
||||
+-----------------------------------------------------------------------+
|
||||
| **Answer**: |
|
||||
|
@ -147,10 +147,10 @@ RCU read-side critical sections preceding and following the current
|
||||
idle sojourn.
|
||||
This case is handled by calls to the strongly ordered
|
||||
``atomic_add_return()`` read-modify-write atomic operation that
|
||||
is invoked within ``rcu_dynticks_eqs_enter()`` at idle-entry
|
||||
time and within ``rcu_dynticks_eqs_exit()`` at idle-exit time.
|
||||
The grace-period kthread invokes first ``ct_dynticks_cpu_acquire()``
|
||||
(preceded by a full memory barrier) and ``rcu_dynticks_in_eqs_since()``
|
||||
is invoked within ``ct_kernel_exit_state()`` at idle-entry
|
||||
time and within ``ct_kernel_enter_state()`` at idle-exit time.
|
||||
The grace-period kthread invokes first ``ct_rcu_watching_cpu_acquire()``
|
||||
(preceded by a full memory barrier) and ``rcu_watching_snap_stopped_since()``
|
||||
(both of which rely on acquire semantics) to detect idle CPUs.
|
||||
|
||||
+-----------------------------------------------------------------------+
|
||||
|
@ -528,7 +528,7 @@
|
||||
font-style="normal"
|
||||
y="-8652.5312"
|
||||
x="2466.7822"
|
||||
xml:space="preserve">dyntick_save_progress_counter()</text>
|
||||
xml:space="preserve">rcu_watching_snap_save()</text>
|
||||
<text
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
|
||||
id="text202-7-2-7-2-0"
|
||||
@ -537,7 +537,7 @@
|
||||
font-style="normal"
|
||||
y="-8368.1475"
|
||||
x="2463.3262"
|
||||
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
|
||||
xml:space="preserve">rcu_watching_snap_recheck()</text>
|
||||
</g>
|
||||
<g
|
||||
id="g4504"
|
||||
@ -607,7 +607,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
@ -638,7 +638,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6-1"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
@ -844,7 +844,7 @@
|
||||
font-style="normal"
|
||||
y="1547.8876"
|
||||
x="4417.6396"
|
||||
xml:space="preserve">dyntick_save_progress_counter()</text>
|
||||
xml:space="preserve">rcu_watching_snap_save()</text>
|
||||
<g
|
||||
style="fill:none;stroke-width:0.025in"
|
||||
transform="translate(6501.9719,-10685.904)"
|
||||
@ -899,7 +899,7 @@
|
||||
font-style="normal"
|
||||
y="1858.8729"
|
||||
x="4414.1836"
|
||||
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
|
||||
xml:space="preserve">rcu_watching_snap_recheck()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="14659.87"
|
||||
@ -977,7 +977,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
@ -1008,7 +1008,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6-1"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
|
Before Width: | Height: | Size: 50 KiB After Width: | Height: | Size: 50 KiB |
@ -2974,7 +2974,7 @@
|
||||
font-style="normal"
|
||||
y="38114.047"
|
||||
x="-334.33856"
|
||||
xml:space="preserve">dyntick_save_progress_counter()</text>
|
||||
xml:space="preserve">rcu_watching_snap_save()</text>
|
||||
<g
|
||||
style="fill:none;stroke-width:0.025in"
|
||||
transform="translate(1749.9916,25880.249)"
|
||||
@ -3029,7 +3029,7 @@
|
||||
font-style="normal"
|
||||
y="38425.035"
|
||||
x="-337.79462"
|
||||
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
|
||||
xml:space="preserve">rcu_watching_snap_recheck()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="9907.8887"
|
||||
@ -3107,7 +3107,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
@ -3138,7 +3138,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6-1"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
|
Before Width: | Height: | Size: 208 KiB After Width: | Height: | Size: 208 KiB |
@ -516,7 +516,7 @@
|
||||
font-style="normal"
|
||||
y="-8652.5312"
|
||||
x="2466.7822"
|
||||
xml:space="preserve">dyntick_save_progress_counter()</text>
|
||||
xml:space="preserve">rcu_watching_snap_save()</text>
|
||||
<text
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
|
||||
id="text202-7-2-7-2-0"
|
||||
@ -525,7 +525,7 @@
|
||||
font-style="normal"
|
||||
y="-8368.1475"
|
||||
x="2463.3262"
|
||||
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
|
||||
xml:space="preserve">rcu_watching_snap_recheck()</text>
|
||||
<text
|
||||
sodipodi:linespacing="125%"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;line-height:125%;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
|
||||
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 28 KiB |
@ -2649,8 +2649,7 @@ those that are idle from RCU's perspective) and then Tasks Rude RCU can
|
||||
be removed from the kernel.
|
||||
|
||||
The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
|
||||
consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
|
||||
and rcu_barrier_tasks_rude().
|
||||
consisting solely of synchronize_rcu_tasks_rude().
|
||||
|
||||
Tasks Trace RCU
|
||||
~~~~~~~~~~~~~~~
|
||||
|
@ -194,14 +194,13 @@ over a rather long period of time, but improvements are always welcome!
|
||||
when publicizing a pointer to a structure that can
|
||||
be traversed by an RCU read-side critical section.
|
||||
|
||||
5. If any of call_rcu(), call_srcu(), call_rcu_tasks(),
|
||||
call_rcu_tasks_rude(), or call_rcu_tasks_trace() is used,
|
||||
the callback function may be invoked from softirq context,
|
||||
and in any case with bottom halves disabled. In particular,
|
||||
this callback function cannot block. If you need the callback
|
||||
to block, run that code in a workqueue handler scheduled from
|
||||
the callback. The queue_rcu_work() function does this for you
|
||||
in the case of call_rcu().
|
||||
5. If any of call_rcu(), call_srcu(), call_rcu_tasks(), or
|
||||
call_rcu_tasks_trace() is used, the callback function may be
|
||||
invoked from softirq context, and in any case with bottom halves
|
||||
disabled. In particular, this callback function cannot block.
|
||||
If you need the callback to block, run that code in a workqueue
|
||||
handler scheduled from the callback. The queue_rcu_work()
|
||||
function does this for you in the case of call_rcu().
|
||||
|
||||
6. Since synchronize_rcu() can block, it cannot be called
|
||||
from any sort of irq context. The same rule applies
|
||||
@ -254,10 +253,10 @@ over a rather long period of time, but improvements are always welcome!
|
||||
corresponding readers must use rcu_read_lock_trace()
|
||||
and rcu_read_unlock_trace().
|
||||
|
||||
c. If an updater uses call_rcu_tasks_rude() or
|
||||
synchronize_rcu_tasks_rude(), then the corresponding
|
||||
readers must use anything that disables preemption,
|
||||
for example, preempt_disable() and preempt_enable().
|
||||
c. If an updater uses synchronize_rcu_tasks_rude(),
|
||||
then the corresponding readers must use anything that
|
||||
disables preemption, for example, preempt_disable()
|
||||
and preempt_enable().
|
||||
|
||||
Mixing things up will result in confusion and broken kernels, and
|
||||
has even resulted in an exploitable security issue. Therefore,
|
||||
@ -326,11 +325,9 @@ over a rather long period of time, but improvements are always welcome!
|
||||
d. Periodically invoke rcu_barrier(), permitting a limited
|
||||
number of updates per grace period.
|
||||
|
||||
The same cautions apply to call_srcu(), call_rcu_tasks(),
|
||||
call_rcu_tasks_rude(), and call_rcu_tasks_trace(). This is
|
||||
why there is an srcu_barrier(), rcu_barrier_tasks(),
|
||||
rcu_barrier_tasks_rude(), and rcu_barrier_tasks_rude(),
|
||||
respectively.
|
||||
The same cautions apply to call_srcu(), call_rcu_tasks(), and
|
||||
call_rcu_tasks_trace(). This is why there is an srcu_barrier(),
|
||||
rcu_barrier_tasks(), and rcu_barrier_tasks_trace(), respectively.
|
||||
|
||||
Note that although these primitives do take action to avoid
|
||||
memory exhaustion when any given CPU has too many callbacks,
|
||||
@ -383,17 +380,17 @@ over a rather long period of time, but improvements are always welcome!
|
||||
must use whatever locking or other synchronization is required
|
||||
to safely access and/or modify that data structure.
|
||||
|
||||
Do not assume that RCU callbacks will be executed on
|
||||
the same CPU that executed the corresponding call_rcu(),
|
||||
call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(), or
|
||||
call_rcu_tasks_trace(). For example, if a given CPU goes offline
|
||||
while having an RCU callback pending, then that RCU callback
|
||||
will execute on some surviving CPU. (If this was not the case,
|
||||
a self-spawning RCU callback would prevent the victim CPU from
|
||||
ever going offline.) Furthermore, CPUs designated by rcu_nocbs=
|
||||
might well *always* have their RCU callbacks executed on some
|
||||
other CPUs, in fact, for some real-time workloads, this is the
|
||||
whole point of using the rcu_nocbs= kernel boot parameter.
|
||||
Do not assume that RCU callbacks will be executed on the same
|
||||
CPU that executed the corresponding call_rcu(), call_srcu(),
|
||||
call_rcu_tasks(), or call_rcu_tasks_trace(). For example, if
|
||||
a given CPU goes offline while having an RCU callback pending,
|
||||
then that RCU callback will execute on some surviving CPU.
|
||||
(If this was not the case, a self-spawning RCU callback would
|
||||
prevent the victim CPU from ever going offline.) Furthermore,
|
||||
CPUs designated by rcu_nocbs= might well *always* have their
|
||||
RCU callbacks executed on some other CPUs, in fact, for some
|
||||
real-time workloads, this is the whole point of using the
|
||||
rcu_nocbs= kernel boot parameter.
|
||||
|
||||
In addition, do not assume that callbacks queued in a given order
|
||||
will be invoked in that order, even if they all are queued on the
|
||||
@ -507,9 +504,9 @@ over a rather long period of time, but improvements are always welcome!
|
||||
These debugging aids can help you find problems that are
|
||||
otherwise extremely difficult to spot.
|
||||
|
||||
17. If you pass a callback function defined within a module to one of
|
||||
call_rcu(), call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(),
|
||||
or call_rcu_tasks_trace(), then it is necessary to wait for all
|
||||
17. If you pass a callback function defined within a module
|
||||
to one of call_rcu(), call_srcu(), call_rcu_tasks(), or
|
||||
call_rcu_tasks_trace(), then it is necessary to wait for all
|
||||
pending callbacks to be invoked before unloading that module.
|
||||
Note that it is absolutely *not* sufficient to wait for a grace
|
||||
period! For example, synchronize_rcu() implementation is *not*
|
||||
@ -522,7 +519,6 @@ over a rather long period of time, but improvements are always welcome!
|
||||
- call_rcu() -> rcu_barrier()
|
||||
- call_srcu() -> srcu_barrier()
|
||||
- call_rcu_tasks() -> rcu_barrier_tasks()
|
||||
- call_rcu_tasks_rude() -> rcu_barrier_tasks_rude()
|
||||
- call_rcu_tasks_trace() -> rcu_barrier_tasks_trace()
|
||||
|
||||
However, these barrier functions are absolutely *not* guaranteed
|
||||
@ -539,7 +535,6 @@ over a rather long period of time, but improvements are always welcome!
|
||||
- Either synchronize_srcu() or synchronize_srcu_expedited(),
|
||||
together with and srcu_barrier()
|
||||
- synchronize_rcu_tasks() and rcu_barrier_tasks()
|
||||
- synchronize_tasks_rude() and rcu_barrier_tasks_rude()
|
||||
- synchronize_tasks_trace() and rcu_barrier_tasks_trace()
|
||||
|
||||
If necessary, you can use something like workqueues to execute
|
||||
|
@ -249,7 +249,7 @@ ticks this GP)" indicates that this CPU has not taken any scheduling-clock
|
||||
interrupts during the current stalled grace period.
|
||||
|
||||
The "idle=" portion of the message prints the dyntick-idle state.
|
||||
The hex number before the first "/" is the low-order 12 bits of the
|
||||
The hex number before the first "/" is the low-order 16 bits of the
|
||||
dynticks counter, which will have an even-numbered value if the CPU
|
||||
is in dyntick-idle mode and an odd-numbered value otherwise. The hex
|
||||
number between the two "/"s is the value of the nesting, which will be
|
||||
|
@ -1103,7 +1103,7 @@ RCU-Tasks-Rude::
|
||||
|
||||
Critical sections Grace period Barrier
|
||||
|
||||
N/A call_rcu_tasks_rude rcu_barrier_tasks_rude
|
||||
N/A N/A
|
||||
synchronize_rcu_tasks_rude
|
||||
|
||||
|
||||
|
@ -93,7 +93,7 @@ commands (does not impact QAIC).
|
||||
uAPI
|
||||
====
|
||||
|
||||
QAIC creates an accel device per phsyical PCIe device. This accel device exists
|
||||
QAIC creates an accel device per physical PCIe device. This accel device exists
|
||||
for as long as the PCIe device is known to Linux.
|
||||
|
||||
The PCIe device may not be in the state to accept requests from userspace at
|
||||
@ -147,12 +147,6 @@ DRM_IOCTL_QAIC_PERF_STATS_BO
|
||||
recent execution of a BO. This allows userspace to construct an end to end
|
||||
timeline of the BO processing for a performance analysis.
|
||||
|
||||
DRM_IOCTL_QAIC_PART_DEV
|
||||
This IOCTL allows userspace to request a duplicate "shadow device". This extra
|
||||
accelN device is associated with a specific partition of resources on the
|
||||
AIC100 device and can be used for limiting a process to some subset of
|
||||
resources.
|
||||
|
||||
DRM_IOCTL_QAIC_DETACH_SLICE_BO
|
||||
This IOCTL allows userspace to remove the slicing information from a BO that
|
||||
was originally provided by a call to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. This
|
||||
|
@ -223,7 +223,10 @@ are signed through the PKCS#7 message format to enforce some level of
|
||||
authorization of the policies (prohibiting an attacker from gaining
|
||||
unconstrained root, and deploying an "allow all" policy). These
|
||||
policies must be signed by a certificate that chains to the
|
||||
``SYSTEM_TRUSTED_KEYRING``. With openssl, the policy can be signed by::
|
||||
``SYSTEM_TRUSTED_KEYRING``, or to the secondary and/or platform keyrings if
|
||||
``CONFIG_IPE_POLICY_SIG_SECONDARY_KEYRING`` and/or
|
||||
``CONFIG_IPE_POLICY_SIG_PLATFORM_KEYRING`` are enabled, respectively.
|
||||
With openssl, the policy can be signed by::
|
||||
|
||||
openssl smime -sign \
|
||||
-in "$MY_POLICY" \
|
||||
@ -266,7 +269,7 @@ in the kernel. This file is write-only and accepts a PKCS#7 signed
|
||||
policy. Two checks will always be performed on this policy: First, the
|
||||
``policy_names`` must match with the updated version and the existing
|
||||
version. Second the updated policy must have a policy version greater than
|
||||
or equal to the currently-running version. This is to prevent rollback attacks.
|
||||
the currently-running version. This is to prevent rollback attacks.
|
||||
|
||||
The ``delete`` file is used to remove a policy that is no longer needed.
|
||||
This file is write-only and accepts a value of ``1`` to delete the policy.
|
||||
|
@ -102,17 +102,41 @@ Examples::
|
||||
#select lzo compression algorithm
|
||||
echo lzo > /sys/block/zram0/comp_algorithm
|
||||
|
||||
For the time being, the `comp_algorithm` content does not necessarily
|
||||
show every compression algorithm supported by the kernel. We keep this
|
||||
list primarily to simplify device configuration and one can configure
|
||||
a new device with a compression algorithm that is not listed in
|
||||
`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API
|
||||
and, if some of the algorithms were built as modules, it's impossible
|
||||
to list all of them using, for instance, /proc/crypto or any other
|
||||
method. This, however, has an advantage of permitting the usage of
|
||||
custom crypto compression modules (implementing S/W or H/W compression).
|
||||
For the time being, the `comp_algorithm` content shows only compression
|
||||
algorithms that are supported by zram.
|
||||
|
||||
4) Set Disksize
|
||||
4) Set compression algorithm parameters: Optional
|
||||
=================================================
|
||||
|
||||
Compression algorithms may support specific parameters which can be
|
||||
tweaked for particular dataset. ZRAM has an `algorithm_params` device
|
||||
attribute which provides a per-algorithm params configuration.
|
||||
|
||||
For example, several compression algorithms support `level` parameter.
|
||||
In addition, certain compression algorithms support pre-trained dictionaries,
|
||||
which significantly change algorithms' characteristics. In order to configure
|
||||
compression algorithm to use external pre-trained dictionary, pass full
|
||||
path to the `dict` along with other parameters::
|
||||
|
||||
#pass path to pre-trained zstd dictionary
|
||||
echo "algo=zstd dict=/etc/dictioary" > /sys/block/zram0/algorithm_params
|
||||
|
||||
#same, but using algorithm priority
|
||||
echo "priority=1 dict=/etc/dictioary" > \
|
||||
/sys/block/zram0/algorithm_params
|
||||
|
||||
#pass path to pre-trained zstd dictionary and compression level
|
||||
echo "algo=zstd level=8 dict=/etc/dictioary" > \
|
||||
/sys/block/zram0/algorithm_params
|
||||
|
||||
Parameters are algorithm specific: not all algorithms support pre-trained
|
||||
dictionaries, not all algorithms support `level`. Furthermore, for certain
|
||||
algorithms `level` controls the compression level (the higher the value the
|
||||
better the compression ratio, it even can take negatives values for some
|
||||
algorithms), for other algorithms `level` is acceleration level (the higher
|
||||
the value the lower the compression ratio).
|
||||
|
||||
5) Set Disksize
|
||||
===============
|
||||
|
||||
Set disk size by writing the value to sysfs node 'disksize'.
|
||||
@ -132,7 +156,7 @@ There is little point creating a zram of greater than twice the size of memory
|
||||
since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
|
||||
size of the disk when not in use so a huge zram is wasteful.
|
||||
|
||||
5) Set memory limit: Optional
|
||||
6) Set memory limit: Optional
|
||||
=============================
|
||||
|
||||
Set memory limit by writing the value to sysfs node 'mem_limit'.
|
||||
@ -151,7 +175,7 @@ Examples::
|
||||
# To disable memory limit
|
||||
echo 0 > /sys/block/zram0/mem_limit
|
||||
|
||||
6) Activate
|
||||
7) Activate
|
||||
===========
|
||||
|
||||
::
|
||||
@ -162,7 +186,7 @@ Examples::
|
||||
mkfs.ext4 /dev/zram1
|
||||
mount /dev/zram1 /tmp
|
||||
|
||||
7) Add/remove zram devices
|
||||
8) Add/remove zram devices
|
||||
==========================
|
||||
|
||||
zram provides a control interface, which enables dynamic (on-demand) device
|
||||
@ -182,7 +206,7 @@ execute::
|
||||
|
||||
echo X > /sys/class/zram-control/hot_remove
|
||||
|
||||
8) Stats
|
||||
9) Stats
|
||||
========
|
||||
|
||||
Per-device statistics are exported as various nodes under /sys/block/zram<id>/
|
||||
@ -205,6 +229,7 @@ writeback_limit_enable RW show and set writeback_limit feature
|
||||
max_comp_streams RW the number of possible concurrent compress
|
||||
operations
|
||||
comp_algorithm RW show and change the compression algorithm
|
||||
algorithm_params WO setup compression algorithm parameters
|
||||
compact WO trigger memory compaction
|
||||
debug_stat RO this file is used for zram debugging purposes
|
||||
backing_dev RW set up backend storage for zram to write out
|
||||
@ -283,15 +308,15 @@ a single line of text and contains the following stats separated by whitespace:
|
||||
Unit: 4K bytes
|
||||
============== =============================================================
|
||||
|
||||
9) Deactivate
|
||||
=============
|
||||
10) Deactivate
|
||||
==============
|
||||
|
||||
::
|
||||
|
||||
swapoff /dev/zram0
|
||||
umount /dev/zram1
|
||||
|
||||
10) Reset
|
||||
11) Reset
|
||||
=========
|
||||
|
||||
Write any positive value to 'reset' sysfs node::
|
||||
@ -487,11 +512,14 @@ registered compression algorithms, increases our chances of finding the
|
||||
algorithm that successfully compresses a particular page. Sometimes, however,
|
||||
it is convenient (and sometimes even necessary) to limit recompression to
|
||||
only one particular algorithm so that it will not try any other algorithms.
|
||||
This can be achieved by providing a algo=NAME parameter:::
|
||||
This can be achieved by providing a `algo` or `priority` parameter:::
|
||||
|
||||
#use zstd algorithm only (if registered)
|
||||
echo "type=huge algo=zstd" > /sys/block/zramX/recompress
|
||||
|
||||
#use zstd algorithm only (if zstd was registered under priority 1)
|
||||
echo "type=huge priority=1" > /sys/block/zramX/recompress
|
||||
|
||||
memory tracking
|
||||
===============
|
||||
|
||||
|
@ -1,76 +1,165 @@
|
||||
Bisecting a bug
|
||||
+++++++++++++++
|
||||
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
|
||||
.. [see the bottom of this file for redistribution information]
|
||||
|
||||
Last updated: 28 October 2016
|
||||
======================
|
||||
Bisecting a regression
|
||||
======================
|
||||
|
||||
Introduction
|
||||
============
|
||||
This document describes how to use a ``git bisect`` to find the source code
|
||||
change that broke something -- for example when some functionality stopped
|
||||
working after upgrading from Linux 6.0 to 6.1.
|
||||
|
||||
Always try the latest kernel from kernel.org and build from source. If you are
|
||||
not confident in doing that please report the bug to your distribution vendor
|
||||
instead of to a kernel developer.
|
||||
The text focuses on the gist of the process. If you are new to bisecting the
|
||||
kernel, better follow Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
|
||||
instead: it depicts everything from start to finish while covering multiple
|
||||
aspects even kernel developers occasionally forget. This includes detecting
|
||||
situations early where a bisection would be a waste of time, as nobody would
|
||||
care about the result -- for example, because the problem happens after the
|
||||
kernel marked itself as 'tainted', occurs in an abandoned version, was already
|
||||
fixed, or is caused by a .config change you or your Linux distributor performed.
|
||||
|
||||
Finding bugs is not always easy. Have a go though. If you can't find it don't
|
||||
give up. Report as much as you have found to the relevant maintainer. See
|
||||
MAINTAINERS for who that is for the subsystem you have worked on.
|
||||
Finding the change causing a kernel issue using a bisection
|
||||
===========================================================
|
||||
|
||||
Before you submit a bug report read
|
||||
'Documentation/admin-guide/reporting-issues.rst'.
|
||||
*Note: the following process assumes you prepared everything for a bisection.
|
||||
This includes having a Git clone with the appropriate sources, installing the
|
||||
software required to build and install kernels, as well as a .config file stored
|
||||
in a safe place (the following example assumes '~/prepared_kernel_.config') to
|
||||
use as pristine base at each bisection step; ideally, you have also worked out
|
||||
a fully reliable and straight-forward way to reproduce the regression, too.*
|
||||
|
||||
Devices not appearing
|
||||
=====================
|
||||
* Preparation: start the bisection and tell Git about the points in the history
|
||||
you consider to be working and broken, which Git calls 'good' and 'bad'::
|
||||
|
||||
Often this is caused by udev/systemd. Check that first before blaming it
|
||||
on the kernel.
|
||||
git bisect start
|
||||
git bisect good v6.0
|
||||
git bisect bad v6.1
|
||||
|
||||
Finding patch that caused a bug
|
||||
===============================
|
||||
Instead of Git tags like 'v6.0' and 'v6.1' you can specify commit-ids, too.
|
||||
|
||||
Using the provided tools with ``git`` makes finding bugs easy provided the bug
|
||||
is reproducible.
|
||||
1. Copy your prepared .config into the build directory and adjust it to the
|
||||
needs of the codebase Git checked out for testing::
|
||||
|
||||
Steps to do it:
|
||||
cp ~/prepared_kernel_.config .config
|
||||
make olddefconfig
|
||||
|
||||
- build the Kernel from its git source
|
||||
- start bisect with [#f1]_::
|
||||
2. Now build, install, and boot a kernel. This might fail for unrelated reasons,
|
||||
for example, when a compile error happens at the current stage of the
|
||||
bisection a later change resolves. In such cases run ``git bisect skip`` and
|
||||
go back to step 1.
|
||||
|
||||
$ git bisect start
|
||||
3. Check if the functionality that regressed works in the kernel you just built.
|
||||
|
||||
- mark the broken changeset with::
|
||||
If it works, execute::
|
||||
|
||||
$ git bisect bad [commit]
|
||||
git bisect good
|
||||
|
||||
- mark a changeset where the code is known to work with::
|
||||
If it is broken, run::
|
||||
|
||||
$ git bisect good [commit]
|
||||
git bisect bad
|
||||
|
||||
- rebuild the Kernel and test
|
||||
- interact with git bisect by using either::
|
||||
Note, getting this wrong just once will send the rest of the bisection
|
||||
totally off course. To prevent having to start anew later you thus want to
|
||||
ensure what you tell Git is correct; it is thus often wise to spend a few
|
||||
minutes more on testing in case your reproducer is unreliable.
|
||||
|
||||
$ git bisect good
|
||||
After issuing one of these two commands, Git will usually check out another
|
||||
bisection point and print something like 'Bisecting: 675 revisions left to
|
||||
test after this (roughly 10 steps)'. In that case go back to step 1.
|
||||
|
||||
or::
|
||||
If Git instead prints something like 'cafecaca0c0dacafecaca0c0dacafecaca0c0da
|
||||
is the first bad commit', then you have finished the bisection. In that case
|
||||
move to the next point below. Note, right after displaying that line Git will
|
||||
show some details about the culprit including its patch description; this can
|
||||
easily fill your terminal, so you might need to scroll up to see the message
|
||||
mentioning the culprit's commit-id.
|
||||
|
||||
$ git bisect bad
|
||||
In case you missed Git's output, you can always run ``git bisect log`` to
|
||||
print the status: it will show how many steps remain or mention the result of
|
||||
the bisection.
|
||||
|
||||
depending if the bug happened on the changeset you're testing
|
||||
- After some interactions, git bisect will give you the changeset that
|
||||
likely caused the bug.
|
||||
* Recommended complementary task: put the bisection log and the current .config
|
||||
file aside for the bug report; furthermore tell Git to reset the sources to
|
||||
the state before the bisection::
|
||||
|
||||
- For example, if you know that the current version is bad, and version
|
||||
4.8 is good, you could do::
|
||||
git bisect log > ~/bisection-log
|
||||
cp .config ~/bisection-config-culprit
|
||||
git bisect reset
|
||||
|
||||
$ git bisect start
|
||||
$ git bisect bad # Current version is bad
|
||||
$ git bisect good v4.8
|
||||
* Recommended optional task: try reverting the culprit on top of the latest
|
||||
codebase and check if that fixes your bug; if that is the case, it validates
|
||||
the bisection and enables developers to resolve the regression through a
|
||||
revert.
|
||||
|
||||
To try this, update your clone and check out latest mainline. Then tell Git
|
||||
to revert the change by specifying its commit-id::
|
||||
|
||||
.. [#f1] You can, optionally, provide both good and bad arguments at git
|
||||
start with ``git bisect start [BAD] [GOOD]``
|
||||
git revert --no-edit cafec0cacaca0
|
||||
|
||||
For further references, please read:
|
||||
Git might reject this, for example when the bisection landed on a merge
|
||||
commit. In that case, abandon the attempt. Do the same, if Git fails to revert
|
||||
the culprit on its own because later changes depend on it -- at least unless
|
||||
you bisected a stable or longterm kernel series, in which case you want to
|
||||
check out its latest codebase and try a revert there.
|
||||
|
||||
- The man page for ``git-bisect``
|
||||
- `Fighting regressions with git bisect <https://www.kernel.org/pub/software/scm/git/docs/git-bisect-lk2009.html>`_
|
||||
- `Fully automated bisecting with "git bisect run" <https://lwn.net/Articles/317154>`_
|
||||
- `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_
|
||||
If a revert succeeds, build and test another kernel to check if reverting
|
||||
resolved your regression.
|
||||
|
||||
With that the process is complete. Now report the regression as described by
|
||||
Documentation/admin-guide/reporting-issues.rst.
|
||||
|
||||
Bisecting linux-next
|
||||
--------------------
|
||||
|
||||
If you face a problem only happening in linux-next, bisect between the
|
||||
linux-next branches 'stable' and 'master'. The following commands will start
|
||||
the process for a linux-next tree you added as a remote called 'next'::
|
||||
|
||||
git bisect start
|
||||
git bisect good next/stable
|
||||
git bisect bad next/master
|
||||
|
||||
The 'stable' branch refers to the state of linux-mainline that the current
|
||||
linux-next release (found in the 'master' branch) is based on -- the former
|
||||
thus should be free of any problems that show up in -next, but not in Linus'
|
||||
tree.
|
||||
|
||||
This will bisect across a wide range of changes, some of which you might have
|
||||
used in earlier linux-next releases without problems. Sadly there is no simple
|
||||
way to avoid checking them: bisecting from one linux-next release to a later
|
||||
one (say between 'next-20241020' and 'next-20241021') is impossible, as they
|
||||
share no common history.
|
||||
|
||||
Additional reading material
|
||||
---------------------------
|
||||
|
||||
* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
|
||||
`fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
|
||||
in the Git documentation.
|
||||
* `Working with git bisect <https://nathanchance.dev/posts/working-with-git-bisect/>`_
|
||||
from kernel developer Nathan Chancellor.
|
||||
* `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_.
|
||||
* `Fully automated bisecting with 'git bisect run' <https://lwn.net/Articles/317154>`_.
|
||||
|
||||
..
|
||||
end-of-content
|
||||
..
|
||||
This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
|
||||
you spot a typo or small mistake, feel free to let him know directly and
|
||||
he'll fix it. You are free to do the same in a mostly informal way if you
|
||||
want to contribute changes to the text -- but for copyright reasons please CC
|
||||
linux-doc@vger.kernel.org and 'sign-off' your contribution as
|
||||
Documentation/process/submitting-patches.rst explains in the section 'Sign
|
||||
your work - the Developer's Certificate of Origin'.
|
||||
..
|
||||
This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
|
||||
of the file. If you want to distribute this text under CC-BY-4.0 only,
|
||||
please use 'The Linux kernel development community' for author attribution
|
||||
and link this as source:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/bug-bisect.rst
|
||||
|
||||
..
|
||||
Note: Only the content of this RST file as found in the Linux kernel sources
|
||||
is available under CC-BY-4.0, as versions of this text that were processed
|
||||
(for example by the kernel's build system) might contain content taken from
|
||||
files which use a more restrictive license.
|
||||
|
@ -244,14 +244,14 @@ Reporting the bug
|
||||
Once you find where the bug happened, by inspecting its location,
|
||||
you could either try to fix it yourself or report it upstream.
|
||||
|
||||
In order to report it upstream, you should identify the mailing list
|
||||
used for the development of the affected code. This can be done by using
|
||||
the ``get_maintainer.pl`` script.
|
||||
In order to report it upstream, you should identify the bug tracker, if any, or
|
||||
mailing list used for the development of the affected code. This can be done by
|
||||
using the ``get_maintainer.pl`` script.
|
||||
|
||||
For example, if you find a bug at the gspca's sonixj.c file, you can get
|
||||
its maintainers with::
|
||||
|
||||
$ ./scripts/get_maintainer.pl -f drivers/media/usb/gspca/sonixj.c
|
||||
$ ./scripts/get_maintainer.pl --bug -f drivers/media/usb/gspca/sonixj.c
|
||||
Hans Verkuil <hverkuil@xs4all.nl> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
|
||||
Mauro Carvalho Chehab <mchehab@kernel.org> (maintainer:MEDIA INPUT INFRASTRUCTURE (V4L/DVB),commit_signer:1/1=100%)
|
||||
Tejun Heo <tj@kernel.org> (commit_signer:1/1=100%)
|
||||
@ -267,11 +267,12 @@ Please notice that it will point to:
|
||||
- The driver maintainer (Hans Verkuil);
|
||||
- The subsystem maintainer (Mauro Carvalho Chehab);
|
||||
- The driver and/or subsystem mailing list (linux-media@vger.kernel.org);
|
||||
- the Linux Kernel mailing list (linux-kernel@vger.kernel.org).
|
||||
- The Linux Kernel mailing list (linux-kernel@vger.kernel.org);
|
||||
- The bug reporting URIs for the driver/subsystem (none in the above example).
|
||||
|
||||
Usually, the fastest way to have your bug fixed is to report it to mailing
|
||||
list used for the development of the code (linux-media ML) copying the
|
||||
driver maintainer (Hans).
|
||||
If the listing contains bug reporting URIs at the end, please prefer them over
|
||||
email. Otherwise, please report bugs to the mailing list used for the
|
||||
development of the code (linux-media ML) copying the driver maintainer (Hans).
|
||||
|
||||
If you are totally stumped as to whom to send the report, and
|
||||
``get_maintainer.pl`` didn't provide you anything useful, send it to
|
||||
|
@ -78,18 +78,24 @@ Brief summary of control files.
|
||||
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
|
||||
memory.soft_limit_in_bytes set/show soft limit of memory usage
|
||||
This knob is not available on CONFIG_PREEMPT_RT systems.
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.stat show various statistics
|
||||
memory.use_hierarchy set/show hierarchical account enabled
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.force_empty trigger forced page reclaim
|
||||
memory.pressure_level set memory pressure notifications
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.swappiness set/show swappiness parameter of vmscan
|
||||
(See sysctl's vm.swappiness)
|
||||
memory.move_charge_at_immigrate set/show controls of moving charges
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.oom_control set/show oom controls.
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.numa_stat show the number of memory usage per numa
|
||||
node
|
||||
memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel
|
||||
@ -105,10 +111,18 @@ Brief summary of control files.
|
||||
memory.kmem.max_usage_in_bytes show max kernel memory usage recorded
|
||||
|
||||
memory.kmem.tcp.limit_in_bytes set/show hard limit for tcp buf memory
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.kmem.tcp.usage_in_bytes show current tcp buf memory allocation
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.kmem.tcp.failcnt show the number of tcp buf memory usage
|
||||
hits limits
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.kmem.tcp.max_usage_in_bytes show max tcp buf memory usage recorded
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
==================================== ==========================================
|
||||
|
||||
1. History
|
||||
@ -693,8 +707,10 @@ For compatibility reasons writing 1 to memory.use_hierarchy will always pass::
|
||||
|
||||
# echo 1 > memory.use_hierarchy
|
||||
|
||||
7. Soft limits
|
||||
==============
|
||||
7. Soft limits (DEPRECATED)
|
||||
===========================
|
||||
|
||||
THIS IS DEPRECATED!
|
||||
|
||||
Soft limits allow for greater sharing of memory. The idea behind soft limits
|
||||
is to allow control groups to use as much of the memory as needed, provided
|
||||
@ -834,8 +850,10 @@ It's applicable for root and non-root cgroup.
|
||||
|
||||
.. _cgroup-v1-memory-oom-control:
|
||||
|
||||
10. OOM Control
|
||||
===============
|
||||
10. OOM Control (DEPRECATED)
|
||||
============================
|
||||
|
||||
THIS IS DEPRECATED!
|
||||
|
||||
memory.oom_control file is for OOM notification and other controls.
|
||||
|
||||
@ -882,8 +900,10 @@ At reading, current status of OOM is shown.
|
||||
The number of processes belonging to this cgroup killed by any
|
||||
kind of OOM killer.
|
||||
|
||||
11. Memory Pressure
|
||||
===================
|
||||
11. Memory Pressure (DEPRECATED)
|
||||
================================
|
||||
|
||||
THIS IS DEPRECATED!
|
||||
|
||||
The pressure level notifications can be used to monitor the memory
|
||||
allocation cost; based on the pressure, applications can implement
|
||||
|
@ -533,10 +533,12 @@ cgroup namespace on namespace creation.
|
||||
Because the resource control interface files in a given directory
|
||||
control the distribution of the parent's resources, the delegatee
|
||||
shouldn't be allowed to write to them. For the first method, this is
|
||||
achieved by not granting access to these files. For the second, the
|
||||
kernel rejects writes to all files other than "cgroup.procs" and
|
||||
"cgroup.subtree_control" on a namespace root from inside the
|
||||
namespace.
|
||||
achieved by not granting access to these files. For the second, files
|
||||
outside the namespace should be hidden from the delegatee by the means
|
||||
of at least mount namespacing, and the kernel rejects writes to all
|
||||
files on a namespace root from inside the cgroup namespace, except for
|
||||
those files listed in "/sys/kernel/cgroup/delegate" (including
|
||||
"cgroup.procs", "cgroup.threads", "cgroup.subtree_control", etc.).
|
||||
|
||||
The end results are equivalent for both delegation types. Once
|
||||
delegated, the user can build sub-hierarchy under the directory,
|
||||
@ -981,6 +983,14 @@ All cgroup core files are prefixed with "cgroup."
|
||||
A dying cgroup can consume system resources not exceeding
|
||||
limits, which were active at the moment of cgroup deletion.
|
||||
|
||||
nr_subsys_<cgroup_subsys>
|
||||
Total number of live cgroup subsystems (e.g memory
|
||||
cgroup) at and beneath the current cgroup.
|
||||
|
||||
nr_dying_subsys_<cgroup_subsys>
|
||||
Total number of dying cgroup subsystems (e.g. memory
|
||||
cgroup) at and beneath the current cgroup.
|
||||
|
||||
cgroup.freeze
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
Allowed values are "0" and "1". The default is "0".
|
||||
@ -1333,11 +1343,14 @@ The following nested keys are defined.
|
||||
all the existing limitations and potential future extensions.
|
||||
|
||||
memory.peak
|
||||
A read-only single value file which exists on non-root
|
||||
cgroups.
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
|
||||
The max memory usage recorded for the cgroup and its
|
||||
descendants since the creation of the cgroup.
|
||||
The max memory usage recorded for the cgroup and its descendants since
|
||||
either the creation of the cgroup or the most recent reset for that FD.
|
||||
|
||||
A write of any non-empty string to this file resets it to the
|
||||
current memory usage for subsequent reads through the same
|
||||
file descriptor.
|
||||
|
||||
memory.oom.group
|
||||
A read-write single value file which exists on non-root
|
||||
@ -1586,6 +1599,15 @@ The following nested keys are defined.
|
||||
pglazyfreed (npn)
|
||||
Amount of reclaimed lazyfree pages
|
||||
|
||||
swpin_zero
|
||||
Number of pages swapped into memory and filled with zero, where I/O
|
||||
was optimized out because the page content was detected to be zero
|
||||
during swapout.
|
||||
|
||||
swpout_zero
|
||||
Number of zero-filled pages swapped out with I/O skipped due to the
|
||||
content being detected as zero.
|
||||
|
||||
zswpin
|
||||
Number of pages moved in to memory from zswap.
|
||||
|
||||
@ -1614,6 +1636,25 @@ The following nested keys are defined.
|
||||
Usually because failed to allocate some continuous swap space
|
||||
for the huge page.
|
||||
|
||||
numa_pages_migrated (npn)
|
||||
Number of pages migrated by NUMA balancing.
|
||||
|
||||
numa_pte_updates (npn)
|
||||
Number of pages whose page table entries are modified by
|
||||
NUMA balancing to produce NUMA hinting faults on access.
|
||||
|
||||
numa_hint_faults (npn)
|
||||
Number of NUMA hinting faults.
|
||||
|
||||
pgdemote_kswapd
|
||||
Number of pages demoted by kswapd.
|
||||
|
||||
pgdemote_direct
|
||||
Number of pages demoted directly.
|
||||
|
||||
pgdemote_khugepaged
|
||||
Number of pages demoted by khugepaged.
|
||||
|
||||
memory.numa_stat
|
||||
A read-only nested-keyed file which exists on non-root cgroups.
|
||||
|
||||
@ -1663,11 +1704,14 @@ The following nested keys are defined.
|
||||
Healthy workloads are not expected to reach this limit.
|
||||
|
||||
memory.swap.peak
|
||||
A read-only single value file which exists on non-root
|
||||
cgroups.
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
|
||||
The max swap usage recorded for the cgroup and its
|
||||
descendants since the creation of the cgroup.
|
||||
The max swap usage recorded for the cgroup and its descendants since
|
||||
the creation of the cgroup or the most recent reset for that FD.
|
||||
|
||||
A write of any non-empty string to this file resets it to the
|
||||
current memory usage for subsequent reads through the same
|
||||
file descriptor.
|
||||
|
||||
memory.swap.max
|
||||
A read-write single value file which exists on non-root
|
||||
@ -1731,6 +1775,8 @@ The following nested keys are defined.
|
||||
|
||||
Note that this is subtly different from setting memory.swap.max to
|
||||
0, as it still allows for pages to be written to the zswap pool.
|
||||
This setting has no effect if zswap is disabled, and swapping
|
||||
is allowed unless memory.swap.max is set to 0.
|
||||
|
||||
memory.pressure
|
||||
A read-only nested-keyed file.
|
||||
@ -2908,7 +2954,7 @@ following two functions.
|
||||
a queue (device) has been associated with the bio and
|
||||
before submission.
|
||||
|
||||
wbc_account_cgroup_owner(@wbc, @page, @bytes)
|
||||
wbc_account_cgroup_owner(@wbc, @folio, @bytes)
|
||||
Should be called for each data segment being written out.
|
||||
While this function doesn't care exactly when it's called
|
||||
during the writeback session, it's the easiest and most
|
||||
@ -2940,8 +2986,8 @@ Deprecated v1 Core Features
|
||||
|
||||
- "cgroup.clone_children" is removed.
|
||||
|
||||
- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" file
|
||||
at the root instead.
|
||||
- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" or
|
||||
"cgroup.stat" files at the root instead.
|
||||
|
||||
|
||||
Issues with v1 and Rationales for v2
|
||||
|
@ -3,29 +3,52 @@ dm-delay
|
||||
========
|
||||
|
||||
Device-Mapper's "delay" target delays reads and/or writes
|
||||
and maps them to different devices.
|
||||
and/or flushs and optionally maps them to different devices.
|
||||
|
||||
Parameters::
|
||||
Arguments::
|
||||
|
||||
<device> <offset> <delay> [<write_device> <write_offset> <write_delay>
|
||||
[<flush_device> <flush_offset> <flush_delay>]]
|
||||
|
||||
With separate write parameters, the first set is only used for reads.
|
||||
Table line has to either have 3, 6 or 9 arguments:
|
||||
|
||||
3: apply offset and delay to read, write and flush operations on device
|
||||
|
||||
6: apply offset and delay to device, also apply write_offset and write_delay
|
||||
to write and flush operations on optionally different write_device with
|
||||
optionally different sector offset
|
||||
|
||||
9: same as 6 arguments plus define flush_offset and flush_delay explicitely
|
||||
on/with optionally different flush_device/flush_offset.
|
||||
|
||||
Offsets are specified in sectors.
|
||||
|
||||
Delays are specified in milliseconds.
|
||||
|
||||
|
||||
Example scripts
|
||||
===============
|
||||
|
||||
::
|
||||
|
||||
#!/bin/sh
|
||||
# Create device delaying rw operation for 500ms
|
||||
echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
|
||||
#
|
||||
# Create mapped device named "delayed" delaying read, write and flush operations for 500ms.
|
||||
#
|
||||
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 500"
|
||||
|
||||
::
|
||||
|
||||
#!/bin/sh
|
||||
# Create device delaying only write operation for 500ms and
|
||||
# splitting reads and writes to different devices $1 $2
|
||||
echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
|
||||
#
|
||||
# Create mapped device delaying write and flush operations for 400ms and
|
||||
# splitting reads to device $1 but writes and flushs to different device $2
|
||||
# to different offsets of 2048 and 4096 sectors respectively.
|
||||
#
|
||||
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 2048 0 $2 4096 400"
|
||||
|
||||
::
|
||||
#!/bin/sh
|
||||
#
|
||||
# Create mapped device delaying reads for 50ms, writes for 100ms and flushs for 333ms
|
||||
# onto the same backing device at offset 0 sectors.
|
||||
#
|
||||
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 50 $2 0 100 $1 0 333"
|
||||
|
@ -160,15 +160,24 @@ iv_large_sectors
|
||||
The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
|
||||
if this flag is specified.
|
||||
|
||||
integrity_key_size:<bytes>
|
||||
Use an integrity key of <bytes> size instead of using an integrity key size
|
||||
of the digest size of the used HMAC algorithm.
|
||||
|
||||
|
||||
Module parameters::
|
||||
|
||||
max_read_size
|
||||
max_write_size
|
||||
Maximum size of read or write requests. When a request larger than this size
|
||||
Maximum size of read requests. When a request larger than this size
|
||||
is received, dm-crypt will split the request. The splitting improves
|
||||
concurrency (the split requests could be encrypted in parallel by multiple
|
||||
cores), but it also causes overhead. The user should tune these parameters to
|
||||
cores), but it also causes overhead. The user should tune this parameters to
|
||||
fit the actual workload.
|
||||
|
||||
max_write_size
|
||||
Maximum size of write requests. When a request larger than this size
|
||||
is received, dm-crypt will split the request. The splitting improves
|
||||
concurrency (the split requests could be encrypted in parallel by multiple
|
||||
cores), but it also causes overhead. The user should tune this parameters to
|
||||
fit the actual workload.
|
||||
|
||||
|
||||
|
@ -251,7 +251,12 @@ The messages are:
|
||||
by the vdostats userspace program to interpret the output
|
||||
buffer.
|
||||
|
||||
dump:
|
||||
config:
|
||||
Outputs useful vdo configuration information. Mostly used
|
||||
by users who want to recreate a similar VDO volume and
|
||||
want to know the creation configuration used.
|
||||
|
||||
dump:
|
||||
Dumps many internal structures to the system log. This is
|
||||
not always safe to run, so it should only be used to debug
|
||||
a hung vdo. Optional parameters to specify structures to
|
||||
|
@ -212,16 +212,6 @@ When mounting an ext4 filesystem, the following option are accepted:
|
||||
that ext4's inode table readahead algorithm will pre-read into the
|
||||
buffer cache. The default value is 32 blocks.
|
||||
|
||||
nouser_xattr
|
||||
Disables Extended User Attributes. See the attr(5) manual page for
|
||||
more information about extended attributes.
|
||||
|
||||
noacl
|
||||
This option disables POSIX Access Control List support. If ACL support
|
||||
is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL
|
||||
is enabled by default on mount. See the acl(5) manual page for more
|
||||
information about acl.
|
||||
|
||||
bsddf (*)
|
||||
Make 'df' act like BSD.
|
||||
|
||||
|
@ -27,6 +27,16 @@ kernel command line (/proc/cmdline) and collects module parameters
|
||||
when it loads a module, so the kernel command line can be used for
|
||||
loadable modules too.
|
||||
|
||||
This document may not be entirely up to date and comprehensive. The command
|
||||
"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
|
||||
module. Loadable modules, after being loaded into the running kernel, also
|
||||
reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
|
||||
parameters may be changed at runtime by the command
|
||||
``echo -n ${value} > /sys/module/${modulename}/parameters/${parm}``.
|
||||
|
||||
Special handling
|
||||
----------------
|
||||
|
||||
Hyphens (dashes) and underscores are equivalent in parameter names, so::
|
||||
|
||||
log_buf_len=1M print-fatal-signals=1
|
||||
@ -39,8 +49,8 @@ Double-quotes can be used to protect spaces in values, e.g.::
|
||||
|
||||
param="spaces in here"
|
||||
|
||||
cpu lists:
|
||||
----------
|
||||
cpu lists
|
||||
~~~~~~~~~
|
||||
|
||||
Some kernel parameters take a list of CPUs as a value, e.g. isolcpus,
|
||||
nohz_full, irqaffinity, rcu_nocbs. The format of this list is:
|
||||
@ -82,12 +92,17 @@ so that "nohz_full=all" is the equivalent of "nohz_full=0-N".
|
||||
The semantics of "N" and "all" is supported on a level of bitmaps and holds for
|
||||
all users of bitmap_parselist().
|
||||
|
||||
This document may not be entirely up to date and comprehensive. The command
|
||||
"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
|
||||
module. Loadable modules, after being loaded into the running kernel, also
|
||||
reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
|
||||
parameters may be changed at runtime by the command
|
||||
``echo -n ${value} > /sys/module/${modulename}/parameters/${parm}``.
|
||||
Metric suffixes
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
The [KMG] suffix is commonly described after a number of kernel
|
||||
parameter values. 'K', 'M', 'G', 'T', 'P', and 'E' suffixes are allowed.
|
||||
These letters represent the _binary_ multipliers 'Kilo', 'Mega', 'Giga',
|
||||
'Tera', 'Peta', and 'Exa', equaling 2^10, 2^20, 2^30, 2^40, 2^50, and
|
||||
2^60 bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||
|
||||
Kernel Build Options
|
||||
--------------------
|
||||
|
||||
The parameters listed below are only valid if certain kernel build options
|
||||
were enabled and if respective hardware is present. This list should be kept
|
||||
@ -211,10 +226,5 @@ a fixed number of characters. This limit depends on the architecture
|
||||
and is between 256 and 4096 characters. It is defined in the file
|
||||
./include/uapi/asm-generic/setup.h as COMMAND_LINE_SIZE.
|
||||
|
||||
Finally, the [KMG] suffix is commonly described after a number of kernel
|
||||
parameter values. These 'K', 'M', and 'G' letters represent the _binary_
|
||||
multipliers 'Kilo', 'Mega', and 'Giga', equaling 2^10, 2^20, and 2^30
|
||||
bytes respectively. Such letter suffixes can also be entirely omitted:
|
||||
|
||||
.. include:: kernel-parameters.txt
|
||||
:literal:
|
||||
|
@ -333,12 +333,17 @@
|
||||
allowed anymore to lift isolation
|
||||
requirements as needed. This option
|
||||
does not override iommu=pt
|
||||
force_enable - Force enable the IOMMU on platforms known
|
||||
to be buggy with IOMMU enabled. Use this
|
||||
option with care.
|
||||
pgtbl_v1 - Use v1 page table for DMA-API (Default).
|
||||
pgtbl_v2 - Use v2 page table for DMA-API.
|
||||
irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
|
||||
force_enable - Force enable the IOMMU on platforms known
|
||||
to be buggy with IOMMU enabled. Use this
|
||||
option with care.
|
||||
pgtbl_v1 - Use v1 page table for DMA-API (Default).
|
||||
pgtbl_v2 - Use v2 page table for DMA-API.
|
||||
irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
|
||||
nohugepages - Limit page-sizes used for v1 page-tables
|
||||
to 4 KiB.
|
||||
v2_pgsizes_only - Limit page-sizes used for v1 page-tables
|
||||
to 4KiB/2Mib/1GiB.
|
||||
|
||||
|
||||
amd_iommu_dump= [HW,X86-64]
|
||||
Enable AMD IOMMU driver option to dump the ACPI table
|
||||
@ -441,6 +446,9 @@
|
||||
arm64.nobti [ARM64] Unconditionally disable Branch Target
|
||||
Identification support
|
||||
|
||||
arm64.nogcs [ARM64] Unconditionally disable Guarded Control Stack
|
||||
support
|
||||
|
||||
arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory
|
||||
Set instructions support
|
||||
|
||||
@ -517,6 +525,18 @@
|
||||
Format: <io>,<irq>,<mode>
|
||||
See header of drivers/net/hamradio/baycom_ser_hdx.c.
|
||||
|
||||
bdev_allow_write_mounted=
|
||||
Format: <bool>
|
||||
Control the ability to open a mounted block device
|
||||
for writing, i.e., allow / disallow writes that bypass
|
||||
the FS. This was implemented as a means to prevent
|
||||
fuzzers from crashing the kernel by overwriting the
|
||||
metadata underneath a mounted FS without its awareness.
|
||||
This also prevents destructive formatting of mounted
|
||||
filesystems by naive storage tooling that don't use
|
||||
O_EXCL. Default is Y and can be changed through the
|
||||
Kconfig option CONFIG_BLK_DEV_WRITE_MOUNTED.
|
||||
|
||||
bert_disable [ACPI]
|
||||
Disable BERT OS support on buggy BIOSes.
|
||||
|
||||
@ -901,12 +921,16 @@
|
||||
the parameter has no effect.
|
||||
|
||||
crash_kexec_post_notifiers
|
||||
Run kdump after running panic-notifiers and dumping
|
||||
kmsg. This only for the users who doubt kdump always
|
||||
succeeds in any situation.
|
||||
Note that this also increases risks of kdump failure,
|
||||
because some panic notifiers can make the crashed
|
||||
kernel more unstable.
|
||||
Only jump to kdump kernel after running the panic
|
||||
notifiers and dumping kmsg. This option increases
|
||||
the risks of a kdump failure, since some panic
|
||||
notifiers can make the crashed kernel more unstable.
|
||||
In configurations where kdump may not be reliable,
|
||||
running the panic notifiers could allow collecting
|
||||
more data on dmesg, like stack traces from other CPUS
|
||||
or extra data dumped by panic_print. Note that some
|
||||
configurations enable this option unconditionally,
|
||||
like Hyper-V, PowerPC (fadump) and AMD SEV-SNP.
|
||||
|
||||
crashkernel=size[KMG][@offset[KMG]]
|
||||
[KNL,EARLY] Using kexec, Linux can switch to a 'crash kernel'
|
||||
@ -2660,6 +2684,23 @@
|
||||
|
||||
Default is Y (on).
|
||||
|
||||
kvm.enable_virt_at_load=[KVM,ARM64,LOONGARCH,MIPS,RISCV,X86]
|
||||
If enabled, KVM will enable virtualization in hardware
|
||||
when KVM is loaded, and disable virtualization when KVM
|
||||
is unloaded (if KVM is built as a module).
|
||||
|
||||
If disabled, KVM will dynamically enable and disable
|
||||
virtualization on-demand when creating and destroying
|
||||
VMs, i.e. on the 0=>1 and 1=>0 transitions of the
|
||||
number of VMs.
|
||||
|
||||
Enabling virtualization at module lode avoids potential
|
||||
latency for creation of the 0=>1 VM, as KVM serializes
|
||||
virtualization enabling across all online CPUs. The
|
||||
"cost" of enabling virtualization when KVM is loaded,
|
||||
is that doing so may interfere with using out-of-tree
|
||||
hypervisors that want to "own" virtualization hardware.
|
||||
|
||||
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
|
||||
Default is false (don't support).
|
||||
|
||||
@ -4135,6 +4176,21 @@
|
||||
Disable NUMA, Only set up a single NUMA node
|
||||
spanning all memory.
|
||||
|
||||
numa=fake=<size>[MG]
|
||||
[KNL, ARM64, RISCV, X86, EARLY]
|
||||
If given as a memory unit, fills all system RAM with
|
||||
nodes of size interleaved over physical nodes.
|
||||
|
||||
numa=fake=<N>
|
||||
[KNL, ARM64, RISCV, X86, EARLY]
|
||||
If given as an integer, fills all system RAM with N
|
||||
fake nodes interleaved over physical nodes.
|
||||
|
||||
numa=fake=<N>U
|
||||
[KNL, ARM64, RISCV, X86, EARLY]
|
||||
If given as an integer followed by 'U', it will
|
||||
divide each physical node into N emulated nodes.
|
||||
|
||||
numa_balancing= [KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
|
||||
NUMA balancing.
|
||||
Allowed values are enable and disable
|
||||
@ -4957,6 +5013,10 @@
|
||||
Set maximum number of finished RCU callbacks to
|
||||
process in one batch.
|
||||
|
||||
rcutree.csd_lock_suppress_rcu_stall= [KNL]
|
||||
Do only a one-line RCU CPU stall warning when
|
||||
there is an ongoing too-long CSD-lock wait.
|
||||
|
||||
rcutree.do_rcu_barrier= [KNL]
|
||||
Request a call to rcu_barrier(). This is
|
||||
throttled so that userspace tests can safely
|
||||
@ -5359,11 +5419,6 @@
|
||||
Set time (jiffies) between CPU-hotplug operations,
|
||||
or zero to disable CPU-hotplug testing.
|
||||
|
||||
rcutorture.read_exit= [KNL]
|
||||
Set the number of read-then-exit kthreads used
|
||||
to test the interaction of RCU updaters and
|
||||
task-exit processing.
|
||||
|
||||
rcutorture.read_exit_burst= [KNL]
|
||||
The number of times in a given read-then-exit
|
||||
episode that a set of read-then-exit kthreads
|
||||
@ -5373,6 +5428,14 @@
|
||||
The delay, in seconds, between successive
|
||||
read-then-exit testing episodes.
|
||||
|
||||
rcutorture.reader_flavor= [KNL]
|
||||
A bit mask indicating which readers to use.
|
||||
If there is more than one bit set, the readers
|
||||
are entered from low-order bit up, and are
|
||||
exited in the opposite order. For SRCU, the
|
||||
0x1 bit is normal readers, 0x2 NMI-safe readers,
|
||||
and 0x4 light-weight readers.
|
||||
|
||||
rcutorture.shuffle_interval= [KNL]
|
||||
Set task-shuffle interval (s). Shuffling tasks
|
||||
allows some CPUs to go into dyntick-idle mode
|
||||
@ -5404,7 +5467,13 @@
|
||||
Time to wait (s) after boot before inducing stall.
|
||||
|
||||
rcutorture.stall_cpu_irqsoff= [KNL]
|
||||
Disable interrupts while stalling if set.
|
||||
Disable interrupts while stalling if set, but only
|
||||
on the first stall in the set.
|
||||
|
||||
rcutorture.stall_cpu_repeat= [KNL]
|
||||
Number of times to repeat the stall sequence,
|
||||
so that rcutorture.stall_cpu_repeat=3 will result
|
||||
in four stall sequences.
|
||||
|
||||
rcutorture.stall_gp_kthread= [KNL]
|
||||
Duration (s) of forced sleep within RCU
|
||||
@ -5592,14 +5661,6 @@
|
||||
of zero will disable batching. Batching is
|
||||
always disabled for synchronize_rcu_tasks().
|
||||
|
||||
rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
|
||||
Set timeout in milliseconds RCU Tasks
|
||||
Rude asynchronous callback batching for
|
||||
call_rcu_tasks_rude(). A negative value
|
||||
will take the default. A value of zero will
|
||||
disable batching. Batching is always disabled
|
||||
for synchronize_rcu_tasks_rude().
|
||||
|
||||
rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
|
||||
Set timeout in milliseconds RCU Tasks
|
||||
Trace asynchronous callback batching for
|
||||
@ -6636,6 +6697,15 @@
|
||||
<deci-seconds>: poll all this frequency
|
||||
0: no polling (default)
|
||||
|
||||
thp_anon= [KNL]
|
||||
Format: <size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>
|
||||
state is one of "always", "madvise", "never" or "inherit".
|
||||
Control the default behavior of the system with respect
|
||||
to anonymous transparent hugepages.
|
||||
Can be used multiple times for multiple anon THP sizes.
|
||||
See Documentation/admin-guide/mm/transhuge.rst for more
|
||||
details.
|
||||
|
||||
threadirqs [KNL,EARLY]
|
||||
Force threading of all interrupt handlers except those
|
||||
marked explicitly IRQF_NO_THREAD.
|
||||
@ -6667,6 +6737,15 @@
|
||||
torture.verbose_sleep_duration= [KNL]
|
||||
Duration of each verbose-printk() sleep in jiffies.
|
||||
|
||||
tpm.disable_pcr_integrity= [HW,TPM]
|
||||
Do not protect PCR registers from unintended physical
|
||||
access, or interposers in the bus by the means of
|
||||
having an integrity protected session wrapped around
|
||||
TPM2_PCR_Extend command. Consider this in a situation
|
||||
where TPM is heavily utilized by IMA, thus protection
|
||||
causing a major performance hit, and the space where
|
||||
machines are deployed is by other means guarded.
|
||||
|
||||
tpm_suspend_pcr=[HW,TPM]
|
||||
Format: integer pcr id
|
||||
Specify that at suspend time, the tpm driver
|
||||
@ -6765,6 +6844,57 @@
|
||||
the same thing would happen if it was left off). The irq_handler_entry
|
||||
event, and all events under the "initcall" system.
|
||||
|
||||
Flags can be added to the instance to modify its behavior when it is
|
||||
created. The flags are separated by '^'.
|
||||
|
||||
The available flags are:
|
||||
|
||||
traceoff - Have the tracing instance tracing disabled after it is created.
|
||||
traceprintk - Have trace_printk() write into this trace instance
|
||||
(note, "printk" and "trace_printk" can also be used)
|
||||
|
||||
trace_instance=foo^traceoff^traceprintk,sched,irq
|
||||
|
||||
The flags must come before the defined events.
|
||||
|
||||
If memory has been reserved (see memmap for x86), the instance
|
||||
can use that memory:
|
||||
|
||||
memmap=12M$0x284500000 trace_instance=boot_map@0x284500000:12M
|
||||
|
||||
The above will create a "boot_map" instance that uses the physical
|
||||
memory at 0x284500000 that is 12Megs. The per CPU buffers of that
|
||||
instance will be split up accordingly.
|
||||
|
||||
Alternatively, the memory can be reserved by the reserve_mem option:
|
||||
|
||||
reserve_mem=12M:4096:trace trace_instance=boot_map@trace
|
||||
|
||||
This will reserve 12 megabytes at boot up with a 4096 byte alignment
|
||||
and place the ring buffer in this memory. Note that due to KASLR, the
|
||||
memory may not be the same location each time, which will not preserve
|
||||
the buffer content.
|
||||
|
||||
Also note that the layout of the ring buffer data may change between
|
||||
kernel versions where the validator will fail and reset the ring buffer
|
||||
if the layout is not the same as the previous kernel.
|
||||
|
||||
If the ring buffer is used for persistent bootups and has events enabled,
|
||||
it is recommend to disable tracing so that events from a previous boot do not
|
||||
mix with events of the current boot (unless you are debugging a random crash
|
||||
at boot up).
|
||||
|
||||
reserve_mem=12M:4096:trace trace_instance=boot_map^traceoff^traceprintk@trace,sched,irq
|
||||
|
||||
Note, saving the trace buffer across reboots does require that the system
|
||||
is set up to not wipe memory. For instance, CONFIG_RESET_ATTACK_MITIGATION
|
||||
can force a memory reset on boot which will clear any trace that was stored.
|
||||
This is just one of many ways that can clear memory. Make sure your system
|
||||
keeps the content of memory across reboots before relying on this option.
|
||||
|
||||
See also Documentation/trace/debugging.rst
|
||||
|
||||
|
||||
trace_options=[option-list]
|
||||
[FTRACE] Enable or disable tracer options at boot.
|
||||
The option-list is a comma delimited list of options
|
||||
@ -7374,6 +7504,13 @@
|
||||
it can be updated at runtime by writing to the
|
||||
corresponding sysfs file.
|
||||
|
||||
workqueue.panic_on_stall=<uint>
|
||||
Panic when workqueue stall is detected by
|
||||
CONFIG_WQ_WATCHDOG. It sets the number times of the
|
||||
stall to trigger panic.
|
||||
|
||||
The default is 0, which disables the panic on stall.
|
||||
|
||||
workqueue.cpu_intensive_thresh_us=
|
||||
Per-cpu work items which run for longer than this
|
||||
threshold are automatically considered CPU intensive
|
||||
|
@ -315,7 +315,7 @@ To reduce its OS jitter, do at least one of the following:
|
||||
to do.
|
||||
|
||||
Name:
|
||||
rcuop/%d and rcuos/%d
|
||||
rcuop/%d, rcuos/%d, and rcuog/%d
|
||||
|
||||
Purpose:
|
||||
Offload RCU callbacks from the corresponding CPU.
|
||||
|
@ -42,10 +42,14 @@ dongles):
|
||||
``persistent_config``: by default this is off, but when set to 1 the driver
|
||||
will store the current settings to the device's internal eeprom and restore
|
||||
it the next time the device is connected to the USB port.
|
||||
|
||||
- RainShadow Tech. Note: this driver does not support the persistent_config
|
||||
module option of the Pulse-Eight driver. The hardware supports it, but I
|
||||
have no plans to add this feature. But I accept patches :-)
|
||||
|
||||
- Extron DA HD 4K PLUS HDMI Distribution Amplifier. See
|
||||
:ref:`extron_da_hd_4k_plus` for more information.
|
||||
|
||||
Miscellaneous:
|
||||
|
||||
- vivid: emulates a CEC receiver and CEC transmitter.
|
||||
@ -378,3 +382,86 @@ it later using ``--analyze-pin``.
|
||||
|
||||
You can also use this as a full-fledged CEC device by configuring it
|
||||
using ``cec-ctl --tv -p0.0.0.0`` or ``cec-ctl --playback -p1.0.0.0``.
|
||||
|
||||
.. _extron_da_hd_4k_plus:
|
||||
|
||||
Extron DA HD 4K PLUS CEC Adapter driver
|
||||
=======================================
|
||||
|
||||
This driver is for the Extron DA HD 4K PLUS series of HDMI Distribution
|
||||
Amplifiers: https://www.extron.com/product/dahd4kplusseries
|
||||
|
||||
The 2, 4 and 6 port models are supported.
|
||||
|
||||
Firmware version 1.02.0001 or higher is required.
|
||||
|
||||
Note that older Extron hardware revisions have a problem with the CEC voltage,
|
||||
which may mean that CEC will not work. This is fixed in hardware revisions
|
||||
E34814 and up.
|
||||
|
||||
The CEC support has two modes: the first is a manual mode where userspace has
|
||||
to manually control CEC for the HDMI Input and all HDMI Outputs. While this gives
|
||||
full control, it is also complicated.
|
||||
|
||||
The second mode is an automatic mode, which is selected if the module option
|
||||
``vendor_id`` is set. In that case the driver controls CEC and CEC messages
|
||||
received in the input will be distributed to the outputs. It is still possible
|
||||
to use the /dev/cecX devices to talk to the connected devices directly, but it is
|
||||
the driver that configures everything and deals with things like Hotplug Detect
|
||||
changes.
|
||||
|
||||
The driver also takes care of the EDIDs: /dev/videoX devices are created to
|
||||
read the EDIDs and (for the HDMI Input port) to set the EDID.
|
||||
|
||||
By default userspace is responsible to set the EDID for the HDMI Input
|
||||
according to the EDIDs of the connected displays. But if the ``manufacturer_name``
|
||||
module option is set, then the driver will take care of setting the EDID
|
||||
of the HDMI Input based on the supported resolutions of the connected displays.
|
||||
Currently the driver only supports resolutions 1080p60 and 4kp60: if all connected
|
||||
displays support 4kp60, then it will advertise 4kp60 on the HDMI input, otherwise
|
||||
it will fall back to an EDID that just reports 1080p60.
|
||||
|
||||
The status of the Extron is reported in ``/sys/kernel/debug/cec/cecX/status``.
|
||||
|
||||
The extron-da-hd-4k-plus driver implements the following module options:
|
||||
|
||||
``debug``
|
||||
---------
|
||||
|
||||
If set to 1, then all serial port traffic is shown.
|
||||
|
||||
``vendor_id``
|
||||
-------------
|
||||
|
||||
The CEC Vendor ID to report to connected displays.
|
||||
|
||||
If set, then the driver will take care of distributing CEC messages received
|
||||
on the input to the HDMI outputs. This is done for the following CEC messages:
|
||||
|
||||
- <Standby>
|
||||
- <Image View On> and <Text View On>
|
||||
- <Give Device Power Status>
|
||||
- <Set System Audio Mode>
|
||||
- <Request Current Latency>
|
||||
|
||||
If not set, then userspace is responsible for this, and it will have to
|
||||
configure the CEC devices for HDMI Input and the HDMI Outputs manually.
|
||||
|
||||
``manufacturer_name``
|
||||
---------------------
|
||||
|
||||
A three character manufacturer name that is used in the EDID for the HDMI
|
||||
Input. If not set, then userspace is reponsible for configuring an EDID.
|
||||
If set, then the driver will update the EDID automatically based on the
|
||||
resolutions supported by the connected displays, and it will not be possible
|
||||
anymore to manually set the EDID for the HDMI Input.
|
||||
|
||||
``hpd_never_low``
|
||||
-----------------
|
||||
|
||||
If set, then the Hotplug Detect pin of the HDMI Input will always be high,
|
||||
even if nothing is connected to the HDMI Outputs. If not set (the default)
|
||||
then the Hotplug Detect pin of the HDMI input will go low if all the detected
|
||||
Hotplug Detect pins of the HDMI Outputs are also low.
|
||||
|
||||
This option may be changed dynamically.
|
||||
|
@ -227,8 +227,13 @@ Common FPDL3/GMSL output parameters
|
||||
open.*
|
||||
|
||||
**frame_rate** (RW):
|
||||
Output video frame rate in frames per second. The default frame rate is
|
||||
60Hz.
|
||||
Output video signal frame rate limit in frames per second. Due to
|
||||
the limited output pixel clock steps, the card can not always generate
|
||||
a frame rate perfectly matching the value required by the connected display.
|
||||
Using this parameter one can limit the frame rate by "crippling" the signal
|
||||
so that the lines are not equal (the porches of the last line differ) but
|
||||
the signal appears like having the exact frame rate to the connected display.
|
||||
The default frame rate limit is 60Hz.
|
||||
|
||||
**hsync_polarity** (RW):
|
||||
HSYNC signal polarity.
|
||||
@ -253,33 +258,33 @@ Common FPDL3/GMSL output parameters
|
||||
and there is a non-linear stepping between two consecutive allowed
|
||||
frequencies. The driver finds the nearest allowed frequency to the given
|
||||
value and sets it. When reading this property, you get the exact
|
||||
frequency set by the driver. The default frequency is 70000kHz.
|
||||
frequency set by the driver. The default frequency is 61150kHz.
|
||||
|
||||
*Note: This parameter can not be changed while the output v4l2 device is
|
||||
open.*
|
||||
|
||||
**hsync_width** (RW):
|
||||
Width of the HSYNC signal in pixels. The default value is 16.
|
||||
Width of the HSYNC signal in pixels. The default value is 40.
|
||||
|
||||
**vsync_width** (RW):
|
||||
Width of the VSYNC signal in video lines. The default value is 2.
|
||||
Width of the VSYNC signal in video lines. The default value is 20.
|
||||
|
||||
**hback_porch** (RW):
|
||||
Number of PCLK pulses between deassertion of the HSYNC signal and the first
|
||||
valid pixel in the video line (marked by DE=1). The default value is 32.
|
||||
valid pixel in the video line (marked by DE=1). The default value is 50.
|
||||
|
||||
**hfront_porch** (RW):
|
||||
Number of PCLK pulses between the end of the last valid pixel in the video
|
||||
line (marked by DE=1) and assertion of the HSYNC signal. The default value
|
||||
is 32.
|
||||
is 50.
|
||||
|
||||
**vback_porch** (RW):
|
||||
Number of video lines between deassertion of the VSYNC signal and the video
|
||||
line with the first valid pixel (marked by DE=1). The default value is 2.
|
||||
line with the first valid pixel (marked by DE=1). The default value is 31.
|
||||
|
||||
**vfront_porch** (RW):
|
||||
Number of video lines between the end of the last valid pixel line (marked
|
||||
by DE=1) and assertion of the VSYNC signal. The default value is 2.
|
||||
by DE=1) and assertion of the VSYNC signal. The default value is 30.
|
||||
|
||||
FPDL3 specific input parameters
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -114,11 +114,18 @@ to be applied to the hardware during a video stream, allowing userspace
|
||||
to dynamically modify values such as black level, cross talk corrections
|
||||
and others.
|
||||
|
||||
The buffer format is defined by struct :c:type:`rkisp1_params_cfg`, and
|
||||
userspace should set
|
||||
The ISP driver supports two different parameters configuration methods, the
|
||||
`fixed parameters format` or the `extensible parameters format`.
|
||||
|
||||
When using the `fixed parameters` method the buffer format is defined by struct
|
||||
:c:type:`rkisp1_params_cfg`, and userspace should set
|
||||
:ref:`V4L2_META_FMT_RK_ISP1_PARAMS <v4l2-meta-fmt-rk-isp1-params>` as the
|
||||
dataformat.
|
||||
|
||||
When using the `extensible parameters` method the buffer format is defined by
|
||||
struct :c:type:`rkisp1_ext_params_cfg`, and userspace should set
|
||||
:ref:`V4L2_META_FMT_RK_ISP1_EXT_PARAMS <v4l2-meta-fmt-rk-isp1-ext-params>` as
|
||||
the dataformat.
|
||||
|
||||
Capturing Video Frames Example
|
||||
==============================
|
||||
|
@ -1343,7 +1343,7 @@ Some Future Improvements
|
||||
Just as a reminder and in no particular order:
|
||||
|
||||
- Add a virtual alsa driver to test audio
|
||||
- Add virtual sub-devices and media controller support
|
||||
- Add virtual sub-devices
|
||||
- Some support for testing compressed video
|
||||
- Add support to loop raw VBI output to raw VBI input
|
||||
- Add support to loop teletext sliced VBI output to VBI input
|
||||
@ -1358,4 +1358,4 @@ Just as a reminder and in no particular order:
|
||||
- Make a thread for the RDS generation, that would help in particular for the
|
||||
"Controls" RDS Rx I/O Mode as the read-only RDS controls could be updated
|
||||
in real-time.
|
||||
- Changing the EDID should cause hotplug detect emulation to happen.
|
||||
- Changing the EDID doesn't wait 100 ms before setting the HPD signal.
|
||||
|
@ -7,7 +7,7 @@ Getting Started
|
||||
This document briefly describes how you can use DAMON by demonstrating its
|
||||
default user space tool. Please note that this document describes only a part
|
||||
of its features for brevity. Please refer to the usage `doc
|
||||
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_ of the tool for more
|
||||
<https://github.com/damonitor/damo/blob/next/USAGE.md>`_ of the tool for more
|
||||
details.
|
||||
|
||||
|
||||
@ -26,7 +26,7 @@ User Space Tool
|
||||
|
||||
For the demonstration, we will use the default user space tool for DAMON,
|
||||
called DAMON Operator (DAMO). It is available at
|
||||
https://github.com/awslabs/damo. The examples below assume that ``damo`` is on
|
||||
https://github.com/damonitor/damo. The examples below assume that ``damo`` is on
|
||||
your ``$PATH``. It's not mandatory, though.
|
||||
|
||||
Because DAMO is using the sysfs interface (refer to :doc:`usage` for the
|
||||
|
@ -7,19 +7,19 @@ Detailed Usages
|
||||
DAMON provides below interfaces for different users.
|
||||
|
||||
- *DAMON user space tool.*
|
||||
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
|
||||
`This <https://github.com/damonitor/damo>`_ is for privileged people such as
|
||||
system administrators who want a just-working human-friendly interface.
|
||||
Using this, users can use the DAMON’s major features in a human-friendly way.
|
||||
It may not be highly tuned for special cases, though. For more detail,
|
||||
please refer to its `usage document
|
||||
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
|
||||
<https://github.com/damonitor/damo/blob/next/USAGE.md>`_.
|
||||
- *sysfs interface.*
|
||||
:ref:`This <sysfs_interface>` is for privileged user space programmers who
|
||||
want more optimized use of DAMON. Using this, users can use DAMON’s major
|
||||
features by reading from and writing to special sysfs files. Therefore,
|
||||
you can write and use your personalized DAMON sysfs wrapper programs that
|
||||
reads/writes the sysfs files instead of you. The `DAMON user space tool
|
||||
<https://github.com/awslabs/damo>`_ is one example of such programs.
|
||||
<https://github.com/damonitor/damo>`_ is one example of such programs.
|
||||
- *Kernel Space Programming Interface.*
|
||||
:doc:`This </mm/damon/api>` is for kernel space programmers. Using this,
|
||||
users can utilize every feature of DAMON most flexibly and efficiently by
|
||||
@ -543,7 +543,7 @@ memory rate becomes larger than 60%, or lower than 30%". ::
|
||||
# echo 300 > watermarks/low
|
||||
|
||||
Please note that it's highly recommended to use user space tools like `damo
|
||||
<https://github.com/awslabs/damo>`_ rather than manually reading and writing
|
||||
<https://github.com/damonitor/damo>`_ rather than manually reading and writing
|
||||
the files as above. Above is only for an example.
|
||||
|
||||
.. _tracepoint:
|
||||
|
@ -294,8 +294,9 @@ The following files are currently defined:
|
||||
``crash_hotplug`` read-only: when changes to the system memory map
|
||||
occur due to hot un/plug of memory, this file contains
|
||||
'1' if the kernel updates the kdump capture kernel memory
|
||||
map itself (via elfcorehdr), or '0' if userspace must update
|
||||
the kdump capture kernel memory map.
|
||||
map itself (via elfcorehdr and other relevant kexec
|
||||
segments), or '0' if userspace must update the kdump
|
||||
capture kernel memory map.
|
||||
|
||||
Availability depends on the CONFIG_MEMORY_HOTPLUG kernel
|
||||
configuration option.
|
||||
|
@ -202,6 +202,16 @@ PMD-mappable transparent hugepage::
|
||||
|
||||
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
|
||||
|
||||
All THPs at fault and collapse time will be added to _deferred_list,
|
||||
and will therefore be split under memory presure if they are considered
|
||||
"underused". A THP is underused if the number of zero-filled pages in
|
||||
the THP is above max_ptes_none (see below). It is possible to disable
|
||||
this behaviour by writing 0 to shrink_underused, and enable it by writing
|
||||
1 to it::
|
||||
|
||||
echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused
|
||||
echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused
|
||||
|
||||
khugepaged will be automatically started when PMD-sized THP is enabled
|
||||
(either of the per-size anon control or the top-level control are set
|
||||
to "always" or "madvise"), and it'll be automatically shutdown when
|
||||
@ -284,13 +294,37 @@ that THP is shared. Exceeding the number would block the collapse::
|
||||
|
||||
A higher value may increase memory footprint for some workloads.
|
||||
|
||||
Boot parameter
|
||||
==============
|
||||
Boot parameters
|
||||
===============
|
||||
|
||||
You can change the sysfs boot time defaults of Transparent Hugepage
|
||||
Support by passing the parameter ``transparent_hugepage=always`` or
|
||||
``transparent_hugepage=madvise`` or ``transparent_hugepage=never``
|
||||
to the kernel command line.
|
||||
You can change the sysfs boot time default for the top-level "enabled"
|
||||
control by passing the parameter ``transparent_hugepage=always`` or
|
||||
``transparent_hugepage=madvise`` or ``transparent_hugepage=never`` to the
|
||||
kernel command line.
|
||||
|
||||
Alternatively, each supported anonymous THP size can be controlled by
|
||||
passing ``thp_anon=<size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>``,
|
||||
where ``<size>`` is the THP size (must be a power of 2 of PAGE_SIZE and
|
||||
supported anonymous THP) and ``<state>`` is one of ``always``, ``madvise``,
|
||||
``never`` or ``inherit``.
|
||||
|
||||
For example, the following will set 16K, 32K, 64K THP to ``always``,
|
||||
set 128K, 512K to ``inherit``, set 256K to ``madvise`` and 1M, 2M
|
||||
to ``never``::
|
||||
|
||||
thp_anon=16K-64K:always;128K,512K:inherit;256K:madvise;1M-2M:never
|
||||
|
||||
``thp_anon=`` may be specified multiple times to configure all THP sizes as
|
||||
required. If ``thp_anon=`` is specified at least once, any anon THP sizes
|
||||
not explicitly configured on the command line are implicitly set to
|
||||
``never``.
|
||||
|
||||
``transparent_hugepage`` setting only affects the global toggle. If
|
||||
``thp_anon`` is not specified, PMD_ORDER THP will default to ``inherit``.
|
||||
However, if a valid ``thp_anon`` setting is provided by the user, the
|
||||
PMD_ORDER THP policy will be overridden. If the policy for PMD_ORDER
|
||||
is not defined within a valid ``thp_anon``, its policy will default to
|
||||
``never``.
|
||||
|
||||
Hugepages in tmpfs/shmem
|
||||
========================
|
||||
@ -447,6 +481,12 @@ thp_deferred_split_page
|
||||
splitting it would free up some memory. Pages on split queue are
|
||||
going to be split under memory pressure.
|
||||
|
||||
thp_underused_split_page
|
||||
is incremented when a huge page on the split queue was split
|
||||
because it was underused. A THP is underused if the number of
|
||||
zero pages in the THP is above a certain threshold
|
||||
(/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none).
|
||||
|
||||
thp_split_pmd
|
||||
is incremented every time a PMD split into table of PTEs.
|
||||
This can happen, for instance, when application calls mprotect() or
|
||||
@ -527,6 +567,18 @@ split_deferred
|
||||
it would free up some memory. Pages on split queue are going to
|
||||
be split under memory pressure, if splitting is possible.
|
||||
|
||||
nr_anon
|
||||
the number of anonymous THP we have in the whole system. These THPs
|
||||
might be currently entirely mapped or have partially unmapped/unused
|
||||
subpages.
|
||||
|
||||
nr_anon_partially_mapped
|
||||
the number of anonymous THP which are likely partially mapped, possibly
|
||||
wasting memory, and have been queued for deferred memory reclamation.
|
||||
Note that in corner some cases (e.g., failed migration), we might detect
|
||||
an anonymous THP as "partially mapped" and count it here, even though it
|
||||
is not actually partially mapped anymore.
|
||||
|
||||
As the system ages, allocating huge pages may be expensive as the
|
||||
system uses memory compaction to copy data around memory to free a
|
||||
huge page for use. There are some counters in ``/proc/vmstat`` to help
|
||||
|
@ -26,3 +26,4 @@ Performance monitor support
|
||||
meson-ddr-pmu
|
||||
cxl
|
||||
ampere_cspmu
|
||||
mrvl-pem-pmu
|
||||
|
56
Documentation/admin-guide/perf/mrvl-pem-pmu.rst
Normal file
@ -0,0 +1,56 @@
|
||||
=================================================================
|
||||
Marvell Odyssey PEM Performance Monitoring Unit (PMU UNCORE)
|
||||
=================================================================
|
||||
|
||||
The PCI Express Interface Units(PEM) are associated with a corresponding
|
||||
monitoring unit. This includes performance counters to track various
|
||||
characteristics of the data that is transmitted over the PCIe link.
|
||||
|
||||
The counters track inbound and outbound transactions which
|
||||
includes separate counters for posted/non-posted/completion TLPs.
|
||||
Also, inbound and outbound memory read requests along with their
|
||||
latencies can also be monitored. Address Translation Services(ATS)events
|
||||
such as ATS Translation, ATS Page Request, ATS Invalidation along with
|
||||
their corresponding latencies are also tracked.
|
||||
|
||||
There are separate 64 bit counters to measure posted/non-posted/completion
|
||||
tlps in inbound and outbound transactions. ATS events are measured by
|
||||
different counters.
|
||||
|
||||
The PMU driver exposes the available events and format options under sysfs,
|
||||
/sys/bus/event_source/devices/mrvl_pcie_rc_pmu_<>/events/
|
||||
/sys/bus/event_source/devices/mrvl_pcie_rc_pmu_<>/format/
|
||||
|
||||
Examples::
|
||||
|
||||
# perf list | grep mrvl_pcie_rc_pmu
|
||||
mrvl_pcie_rc_pmu_<>/ats_inv/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ats_inv_latency/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ats_pri/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ats_pri_latency/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ats_trans/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ats_trans_latency/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ib_inflight/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ib_reads/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ib_req_no_ro_ebus/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ib_req_no_ro_ncb/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ib_tlp_cpl_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_cpl_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_npr/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_pr/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ib_tlp_npr/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ib_tlp_pr/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_inflight_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_merges_cpl_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_merges_npr_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_merges_pr_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_reads_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_tlp_cpl_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_cpl_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_npr_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_pr_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_tlp_npr_partid/ [Kernel PMU event]
|
||||
mrvl_pcie_rc_pmu_<>/ob_tlp_pr_partid/ [Kernel PMU event]
|
||||
|
||||
|
||||
# perf stat -e ib_inflight,ib_reads,ib_req_no_ro_ebus,ib_req_no_ro_ncb <workload>
|
@ -425,8 +425,8 @@ This governor exposes only one tunable:
|
||||
|
||||
``rate_limit_us``
|
||||
Minimum time (in microseconds) that has to pass between two consecutive
|
||||
runs of governor computations (default: 1000 times the scaling driver's
|
||||
transition latency).
|
||||
runs of governor computations (default: 1.5 times the scaling driver's
|
||||
transition latency or the maximum 2ms).
|
||||
|
||||
The purpose of this tunable is to reduce the scheduler context overhead
|
||||
of the governor which might be excessive without it.
|
||||
@ -474,17 +474,17 @@ This governor exposes the following tunables:
|
||||
This is how often the governor's worker routine should run, in
|
||||
microseconds.
|
||||
|
||||
Typically, it is set to values of the order of 10000 (10 ms). Its
|
||||
default value is equal to the value of ``cpuinfo_transition_latency``
|
||||
for each policy this governor is attached to (but since the unit here
|
||||
is greater by 1000, this means that the time represented by
|
||||
``sampling_rate`` is 1000 times greater than the transition latency by
|
||||
default).
|
||||
Typically, it is set to values of the order of 2000 (2 ms). Its
|
||||
default value is to add a 50% breathing room
|
||||
to ``cpuinfo_transition_latency`` on each policy this governor is
|
||||
attached to. The minimum is typically the length of two scheduler
|
||||
ticks.
|
||||
|
||||
If this tunable is per-policy, the following shell command sets the time
|
||||
represented by it to be 750 times as high as the transition latency::
|
||||
represented by it to be 1.5 times as high as the transition latency
|
||||
(the default)::
|
||||
|
||||
# echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) > ondemand/sampling_rate
|
||||
# echo `$(($(cat cpuinfo_transition_latency) * 3 / 2)) > ondemand/sampling_rate
|
||||
|
||||
``up_threshold``
|
||||
If the estimated CPU load is above this value (in percent), the governor
|
||||
|
@ -113,3 +113,62 @@ to apply at each uncore* level.
|
||||
|
||||
Support for "current_freq_khz" is available only at each fabric cluster
|
||||
level (i.e., in uncore* directory).
|
||||
|
||||
Efficiency vs. Latency Tradeoff
|
||||
-------------------------------
|
||||
|
||||
The Efficiency Latency Control (ELC) feature improves performance
|
||||
per watt. With this feature hardware power management algorithms
|
||||
optimize trade-off between latency and power consumption. For some
|
||||
latency sensitive workloads further tuning can be done by SW to
|
||||
get desired performance.
|
||||
|
||||
The hardware monitors the average CPU utilization across all cores
|
||||
in a power domain at regular intervals and decides an uncore frequency.
|
||||
While this may result in the best performance per watt, workload may be
|
||||
expecting higher performance at the expense of power. Consider an
|
||||
application that intermittently wakes up to perform memory reads on an
|
||||
otherwise idle system. In such cases, if hardware lowers uncore
|
||||
frequency, then there may be delay in ramp up of frequency to meet
|
||||
target performance.
|
||||
|
||||
The ELC control defines some parameters which can be changed from SW.
|
||||
If the average CPU utilization is below a user-defined threshold
|
||||
(elc_low_threshold_percent attribute below), the user-defined uncore
|
||||
floor frequency will be used (elc_floor_freq_khz attribute below)
|
||||
instead of hardware calculated minimum.
|
||||
|
||||
Similarly in high load scenario where the CPU utilization goes above
|
||||
the high threshold value (elc_high_threshold_percent attribute below)
|
||||
instead of jumping to maximum uncore frequency, frequency is increased
|
||||
in 100MHz steps. This avoids consuming unnecessarily high power
|
||||
immediately with CPU utilization spikes.
|
||||
|
||||
Attributes for efficiency latency control:
|
||||
|
||||
``elc_floor_freq_khz``
|
||||
This attribute is used to get/set the efficiency latency floor frequency.
|
||||
If this variable is lower than the 'min_freq_khz', it is ignored by
|
||||
the firmware.
|
||||
|
||||
``elc_low_threshold_percent``
|
||||
This attribute is used to get/set the efficiency latency control low
|
||||
threshold. This attribute is in percentages of CPU utilization.
|
||||
|
||||
``elc_high_threshold_percent``
|
||||
This attribute is used to get/set the efficiency latency control high
|
||||
threshold. This attribute is in percentages of CPU utilization.
|
||||
|
||||
``elc_high_threshold_enable``
|
||||
This attribute is used to enable/disable the efficiency latency control
|
||||
high threshold. Write '1' to enable, '0' to disable.
|
||||
|
||||
Example system configuration below, which does following:
|
||||
* when CPU utilization is less than 10%: sets uncore frequency to 800MHz
|
||||
* when CPU utilization is higher than 95%: increases uncore frequency in
|
||||
100MHz steps, until power limit is reached
|
||||
|
||||
elc_floor_freq_khz:800000
|
||||
elc_high_threshold_percent:95
|
||||
elc_high_threshold_enable:1
|
||||
elc_low_threshold_percent:10
|
||||
|
@ -129,7 +129,7 @@ Setting the ramoops parameters can be done in several different manners:
|
||||
takes a size, alignment and name as arguments. The name is used
|
||||
to map the memory to a label that can be retrieved by ramoops.
|
||||
|
||||
reserver_mem=2M:4096:oops ramoops.mem_name=oops
|
||||
reserve_mem=2M:4096:oops ramoops.mem_name=oops
|
||||
|
||||
You can specify either RAM memory or peripheral devices' memory. However, when
|
||||
specifying RAM, be sure to reserve the memory by issuing memblock_reserve()
|
||||
|
@ -38,6 +38,11 @@ requests. ``aio-max-nr`` allows you to change the maximum value
|
||||
``aio-max-nr`` does not result in the
|
||||
pre-allocation or re-sizing of any kernel data structures.
|
||||
|
||||
dentry-negative
|
||||
----------------------------
|
||||
|
||||
Policy for negative dentries. Set to 1 to to always delete the dentry when a
|
||||
file is removed, and 0 to disable it. By default, this behavior is disabled.
|
||||
|
||||
dentry-state
|
||||
------------
|
||||
|
@ -182,3 +182,5 @@ More detailed explanation for tainting
|
||||
produce extremely unusual kernel structure layouts (even performance
|
||||
pathological ones), which is important to know when debugging. Set at
|
||||
build time.
|
||||
|
||||
18) ``N`` if an in-kernel test, such as a KUnit test, has been run.
|
||||
|
@ -12,7 +12,7 @@ ones.
|
||||
|
||||
Of course this is a bad idea to rely on the alignment trap to perform
|
||||
unaligned memory access in general. If those access are predictable, you
|
||||
are better to use the macros provided by include/asm/unaligned.h. The
|
||||
are better to use the macros provided by include/linux/unaligned.h. The
|
||||
alignment trap can fixup misaligned access for the exception cases, but at
|
||||
a high performance cost. It better be rare.
|
||||
|
||||
|
@ -359,7 +359,7 @@ Driver updates for STM32 DMA-MDMA chaining support in foo driver
|
||||
descriptor you want a callback to be called at the end of the transfer
|
||||
(dmaengine_prep_slave_sg()) or the period (dmaengine_prep_dma_cyclic()).
|
||||
Depending on the direction, set the callback on the descriptor that finishes
|
||||
the overal transfer:
|
||||
the overall transfer:
|
||||
|
||||
* DMA_DEV_TO_MEM: set the callback on the "MDMA" descriptor
|
||||
* DMA_MEM_TO_DEV: set the callback on the "DMA" descriptor
|
||||
@ -371,7 +371,7 @@ Driver updates for STM32 DMA-MDMA chaining support in foo driver
|
||||
As STM32 MDMA channel transfer is triggered by STM32 DMA, you must issue
|
||||
STM32 MDMA channel before STM32 DMA channel.
|
||||
|
||||
If any, your callback will be called to warn you about the end of the overal
|
||||
If any, your callback will be called to warn you about the end of the overall
|
||||
transfer or the period completion.
|
||||
|
||||
Don't forget to terminate both channels. STM32 DMA channel is configured in
|
||||
|
69
Documentation/arch/arm64/arm-cca.rst
Normal file
@ -0,0 +1,69 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=====================================
|
||||
Arm Confidential Compute Architecture
|
||||
=====================================
|
||||
|
||||
Arm systems that support the Realm Management Extension (RME) contain
|
||||
hardware to allow a VM guest to be run in a way which protects the code
|
||||
and data of the guest from the hypervisor. It extends the older "two
|
||||
world" model (Normal and Secure World) into four worlds: Normal, Secure,
|
||||
Root and Realm. Linux can then also be run as a guest to a monitor
|
||||
running in the Realm world.
|
||||
|
||||
The monitor running in the Realm world is known as the Realm Management
|
||||
Monitor (RMM) and implements the Realm Management Monitor
|
||||
specification[1]. The monitor acts a bit like a hypervisor (e.g. it runs
|
||||
in EL2 and manages the stage 2 page tables etc of the guests running in
|
||||
Realm world), however much of the control is handled by a hypervisor
|
||||
running in the Normal World. The Normal World hypervisor uses the Realm
|
||||
Management Interface (RMI) defined by the RMM specification to request
|
||||
the RMM to perform operations (e.g. mapping memory or executing a vCPU).
|
||||
|
||||
The RMM defines an environment for guests where the address space (IPA)
|
||||
is split into two. The lower half is protected - any memory that is
|
||||
mapped in this half cannot be seen by the Normal World and the RMM
|
||||
restricts what operations the Normal World can perform on this memory
|
||||
(e.g. the Normal World cannot replace pages in this region without the
|
||||
guest's cooperation). The upper half is shared, the Normal World is free
|
||||
to make changes to the pages in this region, and is able to emulate MMIO
|
||||
devices in this region too.
|
||||
|
||||
A guest running in a Realm may also communicate with the RMM using the
|
||||
Realm Services Interface (RSI) to request changes in its environment or
|
||||
to perform attestation about its environment. In particular it may
|
||||
request that areas of the protected address space are transitioned
|
||||
between 'RAM' and 'EMPTY' (in either direction). This allows a Realm
|
||||
guest to give up memory to be returned to the Normal World, or to
|
||||
request new memory from the Normal World. Without an explicit request
|
||||
from the Realm guest the RMM will otherwise prevent the Normal World
|
||||
from making these changes.
|
||||
|
||||
Linux as a Realm Guest
|
||||
----------------------
|
||||
|
||||
To run Linux as a guest within a Realm, the following must be provided
|
||||
either by the VMM or by a `boot loader` run in the Realm before Linux:
|
||||
|
||||
* All protected RAM described to Linux (by DT or ACPI) must be marked
|
||||
RIPAS RAM before handing control over to Linux.
|
||||
|
||||
* MMIO devices must be either unprotected (e.g. emulated by the Normal
|
||||
World) or marked RIPAS DEV.
|
||||
|
||||
* MMIO devices emulated by the Normal World and used very early in boot
|
||||
(specifically earlycon) must be specified in the upper half of IPA.
|
||||
For earlycon this can be done by specifying the address on the
|
||||
command line, e.g. with an IPA size of 33 bits and the base address
|
||||
of the emulated UART at 0x1000000: ``earlycon=uart,mmio,0x101000000``
|
||||
|
||||
* Linux will use bounce buffers for communicating with unprotected
|
||||
devices. It will transition some protected memory to RIPAS EMPTY and
|
||||
expect to be able to access unprotected pages at the same IPA address
|
||||
but with the highest valid IPA bit set. The expectation is that the
|
||||
VMM will remove the physical pages from the protected mapping and
|
||||
provide those pages as unprotected pages.
|
||||
|
||||
References
|
||||
----------
|
||||
[1] https://developer.arm.com/documentation/den0137/
|
@ -41,6 +41,9 @@ to automatically locate and size all RAM, or it may use knowledge of
|
||||
the RAM in the machine, or any other method the boot loader designer
|
||||
sees fit.)
|
||||
|
||||
For Arm Confidential Compute Realms this includes ensuring that all
|
||||
protected RAM has a Realm IPA state (RIPAS) of "RAM".
|
||||
|
||||
|
||||
2. Setup the device tree
|
||||
-------------------------
|
||||
@ -385,6 +388,9 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
- HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1.
|
||||
|
||||
- HCRX_EL2.MCE2 (bit 10) must be initialised to 0b1 and the hypervisor
|
||||
must handle MOPS exceptions as described in :ref:`arm64_mops_hyp`.
|
||||
|
||||
For CPUs with the Extended Translation Control Register feature (FEAT_TCR2):
|
||||
|
||||
- If EL3 is present:
|
||||
@ -411,6 +417,38 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
- HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1.
|
||||
|
||||
- For CPUs with Guarded Control Stacks (FEAT_GCS):
|
||||
|
||||
- GCSCR_EL1 must be initialised to 0.
|
||||
|
||||
- GCSCRE0_EL1 must be initialised to 0.
|
||||
|
||||
- If EL3 is present:
|
||||
|
||||
- SCR_EL3.GCSEn (bit 39) must be initialised to 0b1.
|
||||
|
||||
- If EL2 is present:
|
||||
|
||||
- GCSCR_EL2 must be initialised to 0.
|
||||
|
||||
- If the kernel is entered at EL1 and EL2 is present:
|
||||
|
||||
- HCRX_EL2.GCSEn must be initialised to 0b1.
|
||||
|
||||
- HFGITR_EL2.nGCSEPP (bit 59) must be initialised to 0b1.
|
||||
|
||||
- HFGITR_EL2.nGCSSTR_EL1 (bit 58) must be initialised to 0b1.
|
||||
|
||||
- HFGITR_EL2.nGCSPUSHM_EL1 (bit 57) must be initialised to 0b1.
|
||||
|
||||
- HFGRTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
|
||||
|
||||
- HFGRTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
|
||||
|
||||
- HFGWTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
|
||||
|
||||
- HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
|
||||
|
||||
The requirements described above for CPU mode, caches, MMUs, architected
|
||||
timers, coherency and system registers apply to all CPUs. All CPUs must
|
||||
enter the kernel in the same exception level. Where the values documented
|
||||
|
@ -26,7 +26,7 @@ There are no systems that support the physical addition (or removal) of CPUs
|
||||
while the system is running, and ACPI is not able to sufficiently describe
|
||||
them.
|
||||
|
||||
e.g. New CPUs come with new caches, but the platform's cache toplogy is
|
||||
e.g. New CPUs come with new caches, but the platform's cache topology is
|
||||
described in a static table, the PPTT. How caches are shared between CPUs is
|
||||
not discoverable, and must be described by firmware.
|
||||
|
||||
|
@ -16,9 +16,9 @@ architected discovery mechanism available to userspace code at EL0. The
|
||||
kernel exposes the presence of these features to userspace through a set
|
||||
of flags called hwcaps, exposed in the auxiliary vector.
|
||||
|
||||
Userspace software can test for features by acquiring the AT_HWCAP or
|
||||
AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
|
||||
flags are set, e.g.::
|
||||
Userspace software can test for features by acquiring the AT_HWCAP,
|
||||
AT_HWCAP2 or AT_HWCAP3 entry of the auxiliary vector, and testing
|
||||
whether the relevant flags are set, e.g.::
|
||||
|
||||
bool floating_point_is_present(void)
|
||||
{
|
||||
@ -170,6 +170,10 @@ HWCAP_PACG
|
||||
ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
|
||||
Documentation/arch/arm64/pointer-authentication.rst.
|
||||
|
||||
HWCAP_GCS
|
||||
Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1, as
|
||||
described by Documentation/arch/arm64/gcs.rst.
|
||||
|
||||
HWCAP2_DCPODP
|
||||
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
|
||||
|
||||
|
227
Documentation/arch/arm64/gcs.rst
Normal file
@ -0,0 +1,227 @@
|
||||
===============================================
|
||||
Guarded Control Stack support for AArch64 Linux
|
||||
===============================================
|
||||
|
||||
This document outlines briefly the interface provided to userspace by Linux in
|
||||
order to support use of the ARM Guarded Control Stack (GCS) feature.
|
||||
|
||||
This is an outline of the most important features and issues only and not
|
||||
intended to be exhaustive.
|
||||
|
||||
|
||||
|
||||
1. General
|
||||
-----------
|
||||
|
||||
* GCS is an architecture feature intended to provide greater protection
|
||||
against return oriented programming (ROP) attacks and to simplify the
|
||||
implementation of features that need to collect stack traces such as
|
||||
profiling.
|
||||
|
||||
* When GCS is enabled a separate guarded control stack is maintained by the
|
||||
PE which is writeable only through specific GCS operations. This
|
||||
stores the call stack only, when a procedure call instruction is
|
||||
performed the current PC is pushed onto the GCS and on RET the
|
||||
address in the LR is verified against that on the top of the GCS.
|
||||
|
||||
* When active the current GCS pointer is stored in the system register
|
||||
GCSPR_EL0. This is readable by userspace but can only be updated
|
||||
via specific GCS instructions.
|
||||
|
||||
* The architecture provides instructions for switching between guarded
|
||||
control stacks with checks to ensure that the new stack is a valid
|
||||
target for switching.
|
||||
|
||||
* The functionality of GCS is similar to that provided by the x86 Shadow
|
||||
Stack feature, due to sharing of userspace interfaces the ABI refers to
|
||||
shadow stacks rather than GCS.
|
||||
|
||||
* Support for GCS is reported to userspace via HWCAP_GCS in the aux vector
|
||||
AT_HWCAP2 entry.
|
||||
|
||||
* GCS is enabled per thread. While there is support for disabling GCS
|
||||
at runtime this should be done with great care.
|
||||
|
||||
* GCS memory access faults are reported as normal memory access faults.
|
||||
|
||||
* GCS specific errors (those reported with EC 0x2d) will be reported as
|
||||
SIGSEGV with a si_code of SEGV_CPERR (control protection error).
|
||||
|
||||
* GCS is supported only for AArch64.
|
||||
|
||||
* On systems where GCS is supported GCSPR_EL0 is always readable by EL0
|
||||
regardless of the GCS configuration for the thread.
|
||||
|
||||
* The architecture supports enabling GCS without verifying that return values
|
||||
in LR match those in the GCS, the LR will be ignored. This is not supported
|
||||
by Linux.
|
||||
|
||||
|
||||
|
||||
2. Enabling and disabling Guarded Control Stacks
|
||||
-------------------------------------------------
|
||||
|
||||
* GCS is enabled and disabled for a thread via the PR_SET_SHADOW_STACK_STATUS
|
||||
prctl(), this takes a single flags argument specifying which GCS features
|
||||
should be used.
|
||||
|
||||
* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack
|
||||
and enables GCS for the thread, enabling the functionality controlled by
|
||||
GCSCRE0_EL1.{nTR, RVCHKEN, PCRSEL}.
|
||||
|
||||
* When set the PR_SHADOW_STACK_PUSH flag enables the functionality controlled
|
||||
by GCSCRE0_EL1.PUSHMEn, allowing explicit GCS pushes.
|
||||
|
||||
* When set the PR_SHADOW_STACK_WRITE flag enables the functionality controlled
|
||||
by GCSCRE0_EL1.STREn, allowing explicit stores to the Guarded Control Stack.
|
||||
|
||||
* Any unknown flags will cause PR_SET_SHADOW_STACK_STATUS to return -EINVAL.
|
||||
|
||||
* PR_LOCK_SHADOW_STACK_STATUS is passed a bitmask of features with the same
|
||||
values as used for PR_SET_SHADOW_STACK_STATUS. Any future changes to the
|
||||
status of the specified GCS mode bits will be rejected.
|
||||
|
||||
* PR_LOCK_SHADOW_STACK_STATUS allows any bit to be locked, this allows
|
||||
userspace to prevent changes to any future features.
|
||||
|
||||
* There is no support for a process to remove a lock that has been set for
|
||||
it.
|
||||
|
||||
* PR_SET_SHADOW_STACK_STATUS and PR_LOCK_SHADOW_STACK_STATUS affect only the
|
||||
thread that called them, any other running threads will be unaffected.
|
||||
|
||||
* New threads inherit the GCS configuration of the thread that created them.
|
||||
|
||||
* GCS is disabled on exec().
|
||||
|
||||
* The current GCS configuration for a thread may be read with the
|
||||
PR_GET_SHADOW_STACK_STATUS prctl(), this returns the same flags that
|
||||
are passed to PR_SET_SHADOW_STACK_STATUS.
|
||||
|
||||
* If GCS is disabled for a thread after having previously been enabled then
|
||||
the stack will remain allocated for the lifetime of the thread. At present
|
||||
any attempt to reenable GCS for the thread will be rejected, this may be
|
||||
revisited in future.
|
||||
|
||||
* It should be noted that since enabling GCS will result in GCS becoming
|
||||
active immediately it is not normally possible to return from the function
|
||||
that invoked the prctl() that enabled GCS. It is expected that the normal
|
||||
usage will be that GCS is enabled very early in execution of a program.
|
||||
|
||||
|
||||
|
||||
3. Allocation of Guarded Control Stacks
|
||||
----------------------------------------
|
||||
|
||||
* When GCS is enabled for a thread a new Guarded Control Stack will be
|
||||
allocated for it of half the standard stack size or 2 gigabytes,
|
||||
whichever is smaller.
|
||||
|
||||
* When a new thread is created by a thread which has GCS enabled then a
|
||||
new Guarded Control Stack will be allocated for the new thread with
|
||||
half the size of the standard stack.
|
||||
|
||||
* When a stack is allocated by enabling GCS or during thread creation then
|
||||
the top 8 bytes of the stack will be initialised to 0 and GCSPR_EL0 will
|
||||
be set to point to the address of this 0 value, this can be used to
|
||||
detect the top of the stack.
|
||||
|
||||
* Additional Guarded Control Stacks can be allocated using the
|
||||
map_shadow_stack() system call.
|
||||
|
||||
* Stacks allocated using map_shadow_stack() can optionally have an end of
|
||||
stack marker and cap placed at the top of the stack. If the flag
|
||||
SHADOW_STACK_SET_TOKEN is specified a cap will be placed on the stack,
|
||||
if SHADOW_STACK_SET_MARKER is not specified the cap will be the top 8
|
||||
bytes of the stack and if it is specified then the cap will be the next
|
||||
8 bytes. While specifying just SHADOW_STACK_SET_MARKER by itself is
|
||||
valid since the marker is all bits 0 it has no observable effect.
|
||||
|
||||
* Stacks allocated using map_shadow_stack() must have a size which is a
|
||||
multiple of 8 bytes larger than 8 bytes and must be 8 bytes aligned.
|
||||
|
||||
* An address can be specified to map_shadow_stack(), if one is provided then
|
||||
it must be aligned to a page boundary.
|
||||
|
||||
* When a thread is freed the Guarded Control Stack initially allocated for
|
||||
that thread will be freed. Note carefully that if the stack has been
|
||||
switched this may not be the stack currently in use by the thread.
|
||||
|
||||
|
||||
4. Signal handling
|
||||
--------------------
|
||||
|
||||
* A new signal frame record gcs_context encodes the current GCS mode and
|
||||
pointer for the interrupted context on signal delivery. This will always
|
||||
be present on systems that support GCS.
|
||||
|
||||
* The record contains a flag field which reports the current GCS configuration
|
||||
for the interrupted context as PR_GET_SHADOW_STACK_STATUS would.
|
||||
|
||||
* The signal handler is run with the same GCS configuration as the interrupted
|
||||
context.
|
||||
|
||||
* When GCS is enabled for the interrupted thread a signal handling specific
|
||||
GCS cap token will be written to the GCS, this is an architectural GCS cap
|
||||
with the token type (bits 0..11) all clear. The GCSPR_EL0 reported in the
|
||||
signal frame will point to this cap token.
|
||||
|
||||
* The signal handler will use the same GCS as the interrupted context.
|
||||
|
||||
* When GCS is enabled on signal entry a frame with the address of the signal
|
||||
return handler will be pushed onto the GCS, allowing return from the signal
|
||||
handler via RET as normal. This will not be reported in the gcs_context in
|
||||
the signal frame.
|
||||
|
||||
|
||||
5. Signal return
|
||||
-----------------
|
||||
|
||||
When returning from a signal handler:
|
||||
|
||||
* If there is a gcs_context record in the signal frame then the GCS flags
|
||||
and GCSPR_EL0 will be restored from that context prior to further
|
||||
validation.
|
||||
|
||||
* If there is no gcs_context record in the signal frame then the GCS
|
||||
configuration will be unchanged.
|
||||
|
||||
* If GCS is enabled on return from a signal handler then GCSPR_EL0 must
|
||||
point to a valid GCS signal cap record, this will be popped from the
|
||||
GCS prior to signal return.
|
||||
|
||||
* If the GCS configuration is locked when returning from a signal then any
|
||||
attempt to change the GCS configuration will be treated as an error. This
|
||||
is true even if GCS was not enabled prior to signal entry.
|
||||
|
||||
* GCS may be disabled via signal return but any attempt to enable GCS via
|
||||
signal return will be rejected.
|
||||
|
||||
|
||||
6. ptrace extensions
|
||||
---------------------
|
||||
|
||||
* A new regset NT_ARM_GCS is defined for use with PTRACE_GETREGSET and
|
||||
PTRACE_SETREGSET.
|
||||
|
||||
* The GCS mode, including enable and disable, may be configured via ptrace.
|
||||
If GCS is enabled via ptrace no new GCS will be allocated for the thread.
|
||||
|
||||
* Configuration via ptrace ignores locking of GCS mode bits.
|
||||
|
||||
|
||||
7. ELF coredump extensions
|
||||
---------------------------
|
||||
|
||||
* NT_ARM_GCS notes will be added to each coredump for each thread of the
|
||||
dumped process. The contents will be equivalent to the data that would
|
||||
have been read if a PTRACE_GETREGSET of the corresponding type were
|
||||
executed for each thread when the coredump was generated.
|
||||
|
||||
|
||||
|
||||
8. /proc extensions
|
||||
--------------------
|
||||
|
||||
* Guarded Control Stack pages will include "ss" in their VmFlags in
|
||||
/proc/<pid>/smaps.
|
@ -10,16 +10,19 @@ ARM64 Architecture
|
||||
acpi_object_usage
|
||||
amu
|
||||
arm-acpi
|
||||
arm-cca
|
||||
asymmetric-32bit
|
||||
booting
|
||||
cpu-feature-registers
|
||||
cpu-hotplug
|
||||
elf_hwcaps
|
||||
gcs
|
||||
hugetlbpage
|
||||
kdump
|
||||
legacy_instructions
|
||||
memory
|
||||
memory-tagging-extension
|
||||
mops
|
||||
perf
|
||||
pointer-authentication
|
||||
ptdump
|
||||
|
44
Documentation/arch/arm64/mops.rst
Normal file
@ -0,0 +1,44 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================================
|
||||
Memory copy/set instructions (MOPS)
|
||||
===================================
|
||||
|
||||
A MOPS memory copy/set operation consists of three consecutive CPY* or SET*
|
||||
instructions: a prologue, main and epilogue (for example: CPYP, CPYM, CPYE).
|
||||
|
||||
A main or epilogue instruction can take a MOPS exception for various reasons,
|
||||
for example when a task is migrated to a CPU with a different MOPS
|
||||
implementation, or when the instruction's alignment and size requirements are
|
||||
not met. The software exception handler is then expected to reset the registers
|
||||
and restart execution from the prologue instruction. Normally this is handled
|
||||
by the kernel.
|
||||
|
||||
For more details refer to "D1.3.5.7 Memory Copy and Memory Set exceptions" in
|
||||
the Arm Architecture Reference Manual DDI 0487K.a (Arm ARM).
|
||||
|
||||
.. _arm64_mops_hyp:
|
||||
|
||||
Hypervisor requirements
|
||||
-----------------------
|
||||
|
||||
A hypervisor running a Linux guest must handle all MOPS exceptions from the
|
||||
guest kernel, as Linux may not be able to handle the exception at all times.
|
||||
For example, a MOPS exception can be taken when the hypervisor migrates a vCPU
|
||||
to another physical CPU with a different MOPS implementation.
|
||||
|
||||
To do this, the hypervisor must:
|
||||
|
||||
- Set HCRX_EL2.MCE2 to 1 so that the exception is taken to the hypervisor.
|
||||
|
||||
- Have an exception handler that implements the algorithm from the Arm ARM
|
||||
rules CNTMJ and MWFQH.
|
||||
|
||||
- Set the guest's PSTATE.SS to 0 in the exception handler, to handle a
|
||||
potential step of the current instruction.
|
||||
|
||||
Note: Clearing PSTATE.SS is needed so that a single step exception is taken
|
||||
on the next instruction (the prologue instruction). Otherwise prologue
|
||||
would get silently stepped over and the single step exception taken on the
|
||||
main instruction. Note that if the guest instruction is not being stepped
|
||||
then clearing PSTATE.SS has no effect.
|
@ -146,6 +146,8 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A715 | #3456084 | ARM64_ERRATUM_3194386 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A720 | #3456091 | ARM64_ERRATUM_3194386 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A725 | #3456106 | ARM64_ERRATUM_3194386 |
|
||||
@ -186,6 +188,8 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-N2 | #3324339 | ARM64_ERRATUM_3194386 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-N3 | #3456111 | ARM64_ERRATUM_3194386 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-V1 | #1619801 | N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-V1 | #3324341 | ARM64_ERRATUM_3194386 |
|
||||
@ -289,3 +293,5 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Microsoft | Azure Cobalt 100| #3324339 | ARM64_ERRATUM_3194386 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
|
@ -346,6 +346,10 @@ The regset data starts with struct user_za_header, containing:
|
||||
|
||||
* Writes to NT_ARM_ZT will set PSTATE.ZA to 1.
|
||||
|
||||
* If any register data is provided along with SME_PT_VL_ONEXEC then the
|
||||
registers data will be interpreted with the current vector length, not
|
||||
the vector length configured for use on exec.
|
||||
|
||||
|
||||
8. ELF coredump extensions
|
||||
---------------------------
|
||||
|
@ -402,6 +402,10 @@ The regset data starts with struct user_sve_header, containing:
|
||||
streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
|
||||
if the target was not in streaming mode.
|
||||
|
||||
* If any register data is provided along with SVE_PT_VL_ONEXEC then the
|
||||
registers data will be interpreted with the current vector length, not
|
||||
the vector length configured for use on exec.
|
||||
|
||||
* The effect of writing a partial, incomplete payload is unspecified.
|
||||
|
||||
|
||||
|
@ -85,6 +85,38 @@ to CPUINTC directly::
|
||||
| Devices |
|
||||
+---------+
|
||||
|
||||
Advanced Extended IRQ model
|
||||
===========================
|
||||
|
||||
In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
|
||||
to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go
|
||||
to AVECINTC, and then go to CPUINTC directly, while all other devices interrupts
|
||||
go to PCH-PIC/PCH-LPC and gathered by EIOINTC, and then go to CPUINTC directly::
|
||||
|
||||
+-----+ +-----------------------+ +-------+
|
||||
| IPI | --> | CPUINTC | <-- | Timer |
|
||||
+-----+ +-----------------------+ +-------+
|
||||
^ ^ ^
|
||||
| | |
|
||||
+---------+ +----------+ +---------+ +-------+
|
||||
| EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
|
||||
+---------+ +----------+ +---------+ +-------+
|
||||
^ ^
|
||||
| |
|
||||
+---------+ +---------+
|
||||
| PCH-PIC | | PCH-MSI |
|
||||
+---------+ +---------+
|
||||
^ ^ ^
|
||||
| | |
|
||||
+---------+ +---------+ +---------+
|
||||
| Devices | | PCH-LPC | | Devices |
|
||||
+---------+ +---------+ +---------+
|
||||
^
|
||||
|
|
||||
+---------+
|
||||
| Devices |
|
||||
+---------+
|
||||
|
||||
ACPI-related definitions
|
||||
========================
|
||||
|
||||
|
@ -134,7 +134,7 @@ Hardware
|
||||
|
||||
* PTCR and partition table entries (partition table is in secure
|
||||
memory). An attempt to write to PTCR will cause a Hypervisor
|
||||
Emulation Assitance interrupt.
|
||||
Emulation Assistance interrupt.
|
||||
|
||||
* LDBAR (LD Base Address Register) and IMC (In-Memory Collection)
|
||||
non-architected registers. An attempt to write to them will cause a
|
||||
|
@ -15,7 +15,7 @@ status for the use of Vector in userspace. The intended usage guideline for
|
||||
these interfaces is to give init systems a way to modify the availability of V
|
||||
for processes running under its domain. Calling these interfaces is not
|
||||
recommended in libraries routines because libraries should not override policies
|
||||
configured from the parant process. Also, users must noted that these interfaces
|
||||
configured from the parent process. Also, users must note that these interfaces
|
||||
are not portable to non-Linux, nor non-RISC-V environments, so it is discourage
|
||||
to use in a portable code. To get the availability of V in an ELF program,
|
||||
please read :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the
|
||||
|
@ -999,6 +999,36 @@ the vfio_ap mediated device to which it is assigned as long as each new APQN
|
||||
resulting from plugging it in references a queue device bound to the vfio_ap
|
||||
device driver.
|
||||
|
||||
Driver Features
|
||||
===============
|
||||
The vfio_ap driver exposes a sysfs file containing supported features.
|
||||
This exists so third party tools (like Libvirt and mdevctl) can query the
|
||||
availability of specific features.
|
||||
|
||||
The features list can be found here: /sys/bus/matrix/devices/matrix/features
|
||||
|
||||
Entries are space delimited. Each entry consists of a combination of
|
||||
alphanumeric and underscore characters.
|
||||
|
||||
Example:
|
||||
cat /sys/bus/matrix/devices/matrix/features
|
||||
guest_matrix dyn ap_config
|
||||
|
||||
the following features are advertised:
|
||||
|
||||
---------------+---------------------------------------------------------------+
|
||||
| Flag | Description |
|
||||
+==============+===============================================================+
|
||||
| guest_matrix | guest_matrix attribute exists. It reports the matrix of |
|
||||
| | adapters and domains that are or will be passed through to a |
|
||||
| | guest when the mdev is attached to it. |
|
||||
+--------------+---------------------------------------------------------------+
|
||||
| dyn | Indicates hot plug/unplug of AP adapters, domains and control |
|
||||
| | domains for a guest to which the mdev is attached. |
|
||||
+------------+-----------------------------------------------------------------+
|
||||
| ap_config | ap_config interface for one-shot modifications to mdev config |
|
||||
+--------------+---------------------------------------------------------------+
|
||||
|
||||
Limitations
|
||||
===========
|
||||
Live guest migration is not supported for guests using AP devices without
|
||||
|
@ -26,7 +26,8 @@ Detection
|
||||
=========
|
||||
|
||||
Intel processors may support either or both of the following hardware
|
||||
mechanisms to detect split locks and bus locks.
|
||||
mechanisms to detect split locks and bus locks. Some AMD processors also
|
||||
support bus lock detect.
|
||||
|
||||
#AC exception for split lock detection
|
||||
--------------------------------------
|
||||
|
@ -162,7 +162,7 @@ Mitigation points
|
||||
3. It would take a large number of these precisely-timed NMIs to mount
|
||||
an actual attack. There's presumably not enough bandwidth.
|
||||
4. The NMI in question occurs after a VERW, i.e. when user state is
|
||||
restored and most interesting data is already scrubbed. Whats left
|
||||
restored and most interesting data is already scrubbed. What's left
|
||||
is only the data that NMI touches, and that may or may not be of
|
||||
any interest.
|
||||
|
||||
|
@ -170,18 +170,6 @@ NUMA
|
||||
Don't parse the HMAT table for NUMA setup, or soft-reserved memory
|
||||
partitioning.
|
||||
|
||||
numa=fake=<size>[MG]
|
||||
If given as a memory unit, fills all system RAM with nodes of
|
||||
size interleaved over physical nodes.
|
||||
|
||||
numa=fake=<N>
|
||||
If given as an integer, fills all system RAM with N fake nodes
|
||||
interleaved over physical nodes.
|
||||
|
||||
numa=fake=<N>U
|
||||
If given as an integer followed by 'U', it will divide each
|
||||
physical node into N emulated nodes.
|
||||
|
||||
ACPI
|
||||
====
|
||||
|
||||
@ -317,3 +305,8 @@ The available options are:
|
||||
|
||||
debug
|
||||
Enable debug messages.
|
||||
|
||||
nosnp
|
||||
Do not enable SEV-SNP (applies to host/hypervisor only). Setting
|
||||
'nosnp' avoids the RMP check overhead in memory accesses when
|
||||
users do not want to run SEV-SNP guests.
|
||||
|
@ -125,7 +125,7 @@ FSGSBASE instructions enablement
|
||||
FSGSBASE instructions compiler support
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE
|
||||
GCC version 4.6.4 and newer provide intrinsics for the FSGSBASE
|
||||
instructions. Clang 5 supports them as well.
|
||||
|
||||
=================== ===========================
|
||||
@ -135,7 +135,7 @@ instructions. Clang 5 supports them as well.
|
||||
_writegsbase_u64() Write the GS base register
|
||||
=================== ===========================
|
||||
|
||||
To utilize these instrinsics <immintrin.h> must be included in the source
|
||||
To utilize these intrinsics <immintrin.h> must be included in the source
|
||||
code and the compiler option -mfsgsbase has to be added.
|
||||
|
||||
Compiler support for FS/GS based addressing
|
||||
|
@ -29,15 +29,27 @@ Complete virtual memory map with 4-level page tables
|
||||
Start addr | Offset | End addr | Size | VM area description
|
||||
========================================================================================================================
|
||||
| | | |
|
||||
0000000000000000 | 0 | 00007fffffffffff | 128 TB | user-space virtual memory, different per mm
|
||||
0000000000000000 | 0 | 00007fffffffefff | ~128 TB | user-space virtual memory, different per mm
|
||||
00007ffffffff000 | ~128 TB | 00007fffffffffff | 4 kB | ... guard hole
|
||||
__________________|____________|__________________|_________|___________________________________________________________
|
||||
| | | |
|
||||
0000800000000000 | +128 TB | ffff7fffffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical
|
||||
| | | | virtual memory addresses up to the -128 TB
|
||||
0000800000000000 | +128 TB | 7fffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
|
||||
| | | | virtual memory addresses up to the -8 EB
|
||||
| | | | starting offset of kernel mappings.
|
||||
| | | |
|
||||
| | | | LAM relaxes canonicallity check allowing to create aliases
|
||||
| | | | for userspace memory here.
|
||||
__________________|____________|__________________|_________|___________________________________________________________
|
||||
|
|
||||
| Kernel-space virtual memory, shared between all processes:
|
||||
__________________|____________|__________________|_________|___________________________________________________________
|
||||
| | | |
|
||||
8000000000000000 | -8 EB | ffff7fffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
|
||||
| | | | virtual memory addresses up to the -128 TB
|
||||
| | | | starting offset of kernel mappings.
|
||||
| | | |
|
||||
| | | | LAM_SUP relaxes canonicallity check allowing to create
|
||||
| | | | aliases for kernel memory here.
|
||||
____________________________________________________________|___________________________________________________________
|
||||
| | | |
|
||||
ffff800000000000 | -128 TB | ffff87ffffffffff | 8 TB | ... guard hole, also reserved for hypervisor
|
||||
@ -88,15 +100,26 @@ Complete virtual memory map with 5-level page tables
|
||||
Start addr | Offset | End addr | Size | VM area description
|
||||
========================================================================================================================
|
||||
| | | |
|
||||
0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm
|
||||
0000000000000000 | 0 | 00fffffffffff000 | ~64 PB | user-space virtual memory, different per mm
|
||||
00fffffffffff000 | ~64 PB | 00ffffffffffffff | 4 kB | ... guard hole
|
||||
__________________|____________|__________________|_________|___________________________________________________________
|
||||
| | | |
|
||||
0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical
|
||||
| | | | virtual memory addresses up to the -64 PB
|
||||
0100000000000000 | +64 PB | 7fffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
|
||||
| | | | virtual memory addresses up to the -8EB TB
|
||||
| | | | starting offset of kernel mappings.
|
||||
| | | |
|
||||
| | | | LAM relaxes canonicallity check allowing to create aliases
|
||||
| | | | for userspace memory here.
|
||||
__________________|____________|__________________|_________|___________________________________________________________
|
||||
|
|
||||
| Kernel-space virtual memory, shared between all processes:
|
||||
____________________________________________________________|___________________________________________________________
|
||||
8000000000000000 | -8 EB | feffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
|
||||
| | | | virtual memory addresses up to the -64 PB
|
||||
| | | | starting offset of kernel mappings.
|
||||
| | | |
|
||||
| | | | LAM_SUP relaxes canonicallity check allowing to create
|
||||
| | | | aliases for kernel memory here.
|
||||
____________________________________________________________|___________________________________________________________
|
||||
| | | |
|
||||
ff00000000000000 | -64 PB | ff0fffffffffffff | 4 PB | ... guard hole, also reserved for hypervisor
|
||||
|
@ -9,7 +9,7 @@ controllers), BFQ's main features are:
|
||||
- BFQ guarantees a high system and application responsiveness, and a
|
||||
low latency for time-sensitive applications, such as audio or video
|
||||
players;
|
||||
- BFQ distributes bandwidth, and not just time, among processes or
|
||||
- BFQ distributes bandwidth, not just time, among processes or
|
||||
groups (switching back to time distribution when needed to keep
|
||||
throughput high).
|
||||
|
||||
@ -111,7 +111,7 @@ Higher speed for code-development tasks
|
||||
|
||||
If some additional workload happens to be executed in parallel, then
|
||||
BFQ executes the I/O-related components of typical code-development
|
||||
tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
|
||||
tasks (compilation, checkout, merge, etc.) much more quickly than CFQ,
|
||||
NOOP or DEADLINE.
|
||||
|
||||
High throughput
|
||||
@ -127,9 +127,9 @@ Strong fairness, bandwidth and delay guarantees
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
BFQ distributes the device throughput, and not just the device time,
|
||||
among I/O-bound applications in proportion their weights, with any
|
||||
among I/O-bound applications in proportion to their weights, with any
|
||||
workload and regardless of the device parameters. From these bandwidth
|
||||
guarantees, it is possible to compute tight per-I/O-request delay
|
||||
guarantees, it is possible to compute a tight per-I/O-request delay
|
||||
guarantees by a simple formula. If not configured for strict service
|
||||
guarantees, BFQ switches to time-based resource sharing (only) for
|
||||
applications that would otherwise cause a throughput loss.
|
||||
@ -199,7 +199,7 @@ plus a lot of code, are borrowed from CFQ.
|
||||
|
||||
- On flash-based storage with internal queueing of commands
|
||||
(typically NCQ), device idling happens to be always detrimental
|
||||
for throughput. So, with these devices, BFQ performs idling
|
||||
to throughput. So, with these devices, BFQ performs idling
|
||||
only when strictly needed for service guarantees, i.e., for
|
||||
guaranteeing low latency or fairness. In these cases, overall
|
||||
throughput may be sub-optimal. No solution currently exists to
|
||||
@ -212,7 +212,7 @@ plus a lot of code, are borrowed from CFQ.
|
||||
and to reduce their latency. The most important action taken to
|
||||
achieve this goal is to give to the queues associated with these
|
||||
applications more than their fair share of the device
|
||||
throughput. For brevity, we call just "weight-raising" the whole
|
||||
throughput. For brevity, we call it just "weight-raising" the whole
|
||||
sets of actions taken by BFQ to privilege these queues. In
|
||||
particular, BFQ provides a milder form of weight-raising for
|
||||
interactive applications, and a stronger form for soft real-time
|
||||
@ -231,7 +231,7 @@ plus a lot of code, are borrowed from CFQ.
|
||||
responsive in detecting interleaved I/O (cooperating processes),
|
||||
that it enables BFQ to achieve a high throughput, by queue
|
||||
merging, even for queues for which CFQ needs a different
|
||||
mechanism, preemption, to get a high throughput. As such EQM is a
|
||||
mechanism, preemption, to get a high throughput. As such, EQM is a
|
||||
unified mechanism to achieve a high throughput with interleaved
|
||||
I/O.
|
||||
|
||||
@ -254,7 +254,7 @@ plus a lot of code, are borrowed from CFQ.
|
||||
- First, with any proportional-share scheduler, the maximum
|
||||
deviation with respect to an ideal service is proportional to
|
||||
the maximum budget (slice) assigned to queues. As a consequence,
|
||||
BFQ can keep this deviation tight not only because of the
|
||||
BFQ can keep this deviation tight, not only because of the
|
||||
accurate service of B-WF2Q+, but also because BFQ *does not*
|
||||
need to assign a larger budget to a queue to let the queue
|
||||
receive a higher fraction of the device throughput.
|
||||
@ -327,7 +327,7 @@ applications. Unset this tunable if you need/want to control weights.
|
||||
slice_idle
|
||||
----------
|
||||
|
||||
This parameter specifies how long BFQ should idle for next I/O
|
||||
This parameter specifies how long BFQ should idle for the next I/O
|
||||
request, when certain sync BFQ queues become empty. By default
|
||||
slice_idle is a non-zero value. Idling has a double purpose: boosting
|
||||
throughput and making sure that the desired throughput distribution is
|
||||
@ -365,7 +365,7 @@ terms of I/O-request dispatches. To guarantee that the actual service
|
||||
order then corresponds to the dispatch order, the strict_guarantees
|
||||
tunable must be set too.
|
||||
|
||||
There is an important flipside for idling: apart from the above cases
|
||||
There is an important flip side to idling: apart from the above cases
|
||||
where it is beneficial also for throughput, idling can severely impact
|
||||
throughput. One important case is random workload. Because of this
|
||||
issue, BFQ tends to avoid idling as much as possible, when it is not
|
||||
@ -475,7 +475,7 @@ max_budget
|
||||
|
||||
Maximum amount of service, measured in sectors, that can be provided
|
||||
to a BFQ queue once it is set in service (of course within the limits
|
||||
of the above timeout). According to what said in the description of
|
||||
of the above timeout). According to what was said in the description of
|
||||
the algorithm, larger values increase the throughput in proportion to
|
||||
the percentage of sequential I/O requests issued. The price of larger
|
||||
values is that they coarsen the granularity of short-term bandwidth
|
||||
|
@ -39,13 +39,16 @@ blkdevparts=<blkdev-def>[;<blkdev-def>]
|
||||
create a link to block device partition with the name "PARTNAME".
|
||||
User space application can access partition by partition name.
|
||||
|
||||
ro
|
||||
read-only. Flag the partition as read-only.
|
||||
|
||||
Example:
|
||||
|
||||
eMMC disk names are "mmcblk0" and "mmcblk0boot0".
|
||||
|
||||
bootargs::
|
||||
|
||||
'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
|
||||
'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot)ro,-(kernel)'
|
||||
|
||||
dmesg::
|
||||
|
||||
|
@ -199,24 +199,36 @@ managing and controlling ublk devices with help of several control commands:
|
||||
|
||||
- user recovery feature description
|
||||
|
||||
Two new features are added for user recovery: ``UBLK_F_USER_RECOVERY`` and
|
||||
``UBLK_F_USER_RECOVERY_REISSUE``.
|
||||
Three new features are added for user recovery: ``UBLK_F_USER_RECOVERY``,
|
||||
``UBLK_F_USER_RECOVERY_REISSUE``, and ``UBLK_F_USER_RECOVERY_FAIL_IO``. To
|
||||
enable recovery of ublk devices after the ublk server exits, the ublk server
|
||||
should specify the ``UBLK_F_USER_RECOVERY`` flag when creating the device. The
|
||||
ublk server may additionally specify at most one of
|
||||
``UBLK_F_USER_RECOVERY_REISSUE`` and ``UBLK_F_USER_RECOVERY_FAIL_IO`` to
|
||||
modify how I/O is handled while the ublk server is dying/dead (this is called
|
||||
the ``nosrv`` case in the driver code).
|
||||
|
||||
With ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
|
||||
With just ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
|
||||
handler) is dying, ublk does not delete ``/dev/ublkb*`` during the whole
|
||||
recovery stage and ublk device ID is kept. It is ublk server's
|
||||
responsibility to recover the device context by its own knowledge.
|
||||
Requests which have not been issued to userspace are requeued. Requests
|
||||
which have been issued to userspace are aborted.
|
||||
|
||||
With ``UBLK_F_USER_RECOVERY_REISSUE`` set, after one ubq_daemon(ublk
|
||||
server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
|
||||
With ``UBLK_F_USER_RECOVERY_REISSUE`` additionally set, after one ubq_daemon
|
||||
(ublk server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
|
||||
requests which have been issued to userspace are requeued and will be
|
||||
re-issued to the new process after handling ``UBLK_CMD_END_USER_RECOVERY``.
|
||||
``UBLK_F_USER_RECOVERY_REISSUE`` is designed for backends who tolerate
|
||||
double-write since the driver may issue the same I/O request twice. It
|
||||
might be useful to a read-only FS or a VM backend.
|
||||
|
||||
With ``UBLK_F_USER_RECOVERY_FAIL_IO`` additionally set, after the ublk server
|
||||
exits, requests which have issued to userspace are failed, as are any
|
||||
subsequently issued requests. Applications continuously issuing I/O against
|
||||
devices with this flag set will see a stream of I/O errors until a new ublk
|
||||
server recovers the device.
|
||||
|
||||
Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``.
|
||||
Once the flag is set, all control commands can be sent by unprivileged
|
||||
user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on
|
||||
|
@ -368,7 +368,7 @@ No additional type data follow ``btf_type``.
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_FUNC
|
||||
* ``info.vlen``: linkage information (BTF_FUNC_STATIC, BTF_FUNC_GLOBAL
|
||||
or BTF_FUNC_EXTERN)
|
||||
or BTF_FUNC_EXTERN - see :ref:`BTF_Function_Linkage_Constants`)
|
||||
* ``type``: a BTF_KIND_FUNC_PROTO type
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
@ -424,9 +424,8 @@ following data::
|
||||
__u32 linkage;
|
||||
};
|
||||
|
||||
``struct btf_var`` encoding:
|
||||
* ``linkage``: currently only static variable 0, or globally allocated
|
||||
variable in ELF sections 1
|
||||
``btf_var.linkage`` may take the values: BTF_VAR_STATIC, BTF_VAR_GLOBAL_ALLOCATED or BTF_VAR_GLOBAL_EXTERN -
|
||||
see :ref:`BTF_Var_Linkage_Constants`.
|
||||
|
||||
Not all type of global variables are supported by LLVM at this point.
|
||||
The following is currently available:
|
||||
@ -549,6 +548,38 @@ The ``btf_enum64`` encoding:
|
||||
If the original enum value is signed and the size is less than 8,
|
||||
that value will be sign extended into 8 bytes.
|
||||
|
||||
2.3 Constant Values
|
||||
-------------------
|
||||
|
||||
.. _BTF_Function_Linkage_Constants:
|
||||
|
||||
2.3.1 Function Linkage Constant Values
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. table:: Function Linkage Values and Meanings
|
||||
|
||||
=================== ===== ===========
|
||||
kind value description
|
||||
=================== ===== ===========
|
||||
``BTF_FUNC_STATIC`` 0x0 definition of subprogram not visible outside containing compilation unit
|
||||
``BTF_FUNC_GLOBAL`` 0x1 definition of subprogram visible outside containing compilation unit
|
||||
``BTF_FUNC_EXTERN`` 0x2 declaration of a subprogram whose definition is outside the containing compilation unit
|
||||
=================== ===== ===========
|
||||
|
||||
|
||||
.. _BTF_Var_Linkage_Constants:
|
||||
|
||||
2.3.2 Variable Linkage Constant Values
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. table:: Variable Linkage Values and Meanings
|
||||
|
||||
============================ ===== ===========
|
||||
kind value description
|
||||
============================ ===== ===========
|
||||
``BTF_VAR_STATIC`` 0x0 definition of global variable not visible outside containing compilation unit
|
||||
``BTF_VAR_GLOBAL_ALLOCATED`` 0x1 definition of global variable visible outside containing compilation unit
|
||||
``BTF_VAR_GLOBAL_EXTERN`` 0x2 declaration of global variable whose definition is outside the containing compilation unit
|
||||
============================ ===== ===========
|
||||
|
||||
3. BTF Kernel API
|
||||
=================
|
||||
|
||||
|
@ -121,6 +121,8 @@ described in more detail in the footnotes.
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_LWT_XMIT`` | | ``lwt_xmit`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_NETFILTER`` | | ``netfilter`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_PERF_EVENT`` | | ``perf_event`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` | | ``raw_tp.w+`` [#rawtp]_ | |
|
||||
@ -131,11 +133,23 @@ described in more detail in the footnotes.
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``raw_tracepoint+`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` | |
|
||||
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` [#tc_legacy]_ | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` | |
|
||||
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` [#tc_legacy]_ | |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``tc`` | |
|
||||
| | | ``tc`` [#tc_legacy]_ | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_NETKIT_PRIMARY`` | ``netkit/primary`` | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_NETKIT_PEER`` | ``netkit/peer`` | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TCX_INGRESS`` | ``tc/ingress`` | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TCX_EGRESS`` | ``tc/egress`` | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TCX_INGRESS`` | ``tcx/ingress`` | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TCX_EGRESS`` | ``tcx/egress`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_SK_LOOKUP`` | ``BPF_SK_LOOKUP`` | ``sk_lookup`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
@ -155,7 +169,9 @@ described in more detail in the footnotes.
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_SOCK_OPS`` | ``BPF_CGROUP_SOCK_OPS`` | ``sockops`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` | |
|
||||
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` [#struct_ops]_ | |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``struct_ops.s+`` [#struct_ops]_ | Yes |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_SYSCALL`` | | ``syscall`` | Yes |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
@ -209,5 +225,11 @@ described in more detail in the footnotes.
|
||||
``a-zA-Z0-9_.*?``.
|
||||
.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
|
||||
.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
|
||||
.. [#tc_legacy] The ``tc``, ``classifier`` and ``action`` attach types are deprecated, use
|
||||
``tcx/*`` instead.
|
||||
.. [#struct_ops] The ``struct_ops`` attach format supports ``struct_ops[.s]/<name>`` convention,
|
||||
but ``name`` is ignored and it is recommended to just use plain
|
||||
``SEC("struct_ops[.s]")``. The attachments are defined in a struct initializer
|
||||
that is tagged with ``SEC(".struct_ops[.link]")``.
|
||||
.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
|
||||
.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.
|
||||
|
@ -418,7 +418,7 @@ The rules for correspondence between registers / stack slots are as follows:
|
||||
linked to the registers and stack slots of the parent state with the same
|
||||
indices.
|
||||
|
||||
* For the outer stack frames, only caller saved registers (r6-r9) and stack
|
||||
* For the outer stack frames, only callee saved registers (r6-r9) and stack
|
||||
slots are linked to the registers and stack slots of the parent state with the
|
||||
same indices.
|
||||
|
||||
|