Merge drm/drm-next into drm-intel-next
Sync to v6.12-rc1. Signed-off-by: Jani Nikula <jani.nikula@intel.com>
@ -141,11 +141,13 @@ ForEachMacros:
|
||||
- 'damon_for_each_target_safe'
|
||||
- 'damos_for_each_filter'
|
||||
- 'damos_for_each_filter_safe'
|
||||
- 'damos_for_each_quota_goal'
|
||||
- 'damos_for_each_quota_goal_safe'
|
||||
- 'data__for_each_file'
|
||||
- 'data__for_each_file_new'
|
||||
- 'data__for_each_file_start'
|
||||
- 'device_for_each_child_node'
|
||||
- 'displayid_iter_for_each'
|
||||
- 'device_for_each_child_node_scoped'
|
||||
- 'dma_fence_array_for_each'
|
||||
- 'dma_fence_chain_for_each'
|
||||
- 'dma_fence_unwrap_for_each'
|
||||
@ -172,11 +174,14 @@ ForEachMacros:
|
||||
- 'drm_for_each_plane'
|
||||
- 'drm_for_each_plane_mask'
|
||||
- 'drm_for_each_privobj'
|
||||
- 'drm_gem_for_each_gpuva'
|
||||
- 'drm_gem_for_each_gpuva_safe'
|
||||
- 'drm_gem_for_each_gpuvm_bo'
|
||||
- 'drm_gem_for_each_gpuvm_bo_safe'
|
||||
- 'drm_gpuva_for_each_op'
|
||||
- 'drm_gpuva_for_each_op_from_reverse'
|
||||
- 'drm_gpuva_for_each_op_reverse'
|
||||
- 'drm_gpuva_for_each_op_safe'
|
||||
- 'drm_gpuvm_bo_for_each_va'
|
||||
- 'drm_gpuvm_bo_for_each_va_safe'
|
||||
- 'drm_gpuvm_for_each_va'
|
||||
- 'drm_gpuvm_for_each_va_range'
|
||||
- 'drm_gpuvm_for_each_va_range_safe'
|
||||
@ -192,11 +197,11 @@ ForEachMacros:
|
||||
- 'dsa_switch_for_each_port_continue_reverse'
|
||||
- 'dsa_switch_for_each_port_safe'
|
||||
- 'dsa_switch_for_each_user_port'
|
||||
- 'dsa_switch_for_each_user_port_continue_reverse'
|
||||
- 'dsa_tree_for_each_cpu_port'
|
||||
- 'dsa_tree_for_each_user_port'
|
||||
- 'dsa_tree_for_each_user_port_continue_reverse'
|
||||
- 'dso__for_each_symbol'
|
||||
- 'dsos__for_each_with_build_id'
|
||||
- 'elf_hash_for_each_possible'
|
||||
- 'elf_symtab__for_each_symbol'
|
||||
- 'evlist__for_each_cpu'
|
||||
@ -216,6 +221,7 @@ ForEachMacros:
|
||||
- 'for_each_and_bit'
|
||||
- 'for_each_andnot_bit'
|
||||
- 'for_each_available_child_of_node'
|
||||
- 'for_each_available_child_of_node_scoped'
|
||||
- 'for_each_bench'
|
||||
- 'for_each_bio'
|
||||
- 'for_each_board_func_rsrc'
|
||||
@ -234,6 +240,7 @@ ForEachMacros:
|
||||
- 'for_each_card_widgets_safe'
|
||||
- 'for_each_cgroup_storage_type'
|
||||
- 'for_each_child_of_node'
|
||||
- 'for_each_child_of_node_scoped'
|
||||
- 'for_each_clear_bit'
|
||||
- 'for_each_clear_bit_from'
|
||||
- 'for_each_clear_bitrange'
|
||||
@ -251,6 +258,7 @@ ForEachMacros:
|
||||
- 'for_each_cpu'
|
||||
- 'for_each_cpu_and'
|
||||
- 'for_each_cpu_andnot'
|
||||
- 'for_each_cpu_from'
|
||||
- 'for_each_cpu_or'
|
||||
- 'for_each_cpu_wrap'
|
||||
- 'for_each_dapm_widgets'
|
||||
@ -269,13 +277,14 @@ ForEachMacros:
|
||||
- 'for_each_element'
|
||||
- 'for_each_element_extid'
|
||||
- 'for_each_element_id'
|
||||
- 'for_each_enabled_cpu'
|
||||
- 'for_each_endpoint_of_node'
|
||||
- 'for_each_event'
|
||||
- 'for_each_event_tps'
|
||||
- 'for_each_evictable_lru'
|
||||
- 'for_each_fib6_node_rt_rcu'
|
||||
- 'for_each_fib6_walker_rt'
|
||||
- 'for_each_free_mem_pfn_range_in_zone'
|
||||
- 'for_each_file_lock'
|
||||
- 'for_each_free_mem_pfn_range_in_zone_from'
|
||||
- 'for_each_free_mem_range'
|
||||
- 'for_each_free_mem_range_reverse'
|
||||
@ -286,15 +295,18 @@ ForEachMacros:
|
||||
- 'for_each_group_member'
|
||||
- 'for_each_group_member_head'
|
||||
- 'for_each_hstate'
|
||||
- 'for_each_hwgpio'
|
||||
- 'for_each_if'
|
||||
- 'for_each_inject_fn'
|
||||
- 'for_each_insn'
|
||||
- 'for_each_insn_op_loc'
|
||||
- 'for_each_insn_prefix'
|
||||
- 'for_each_intid'
|
||||
- 'for_each_iommu'
|
||||
- 'for_each_ip_tunnel_rcu'
|
||||
- 'for_each_irq_nr'
|
||||
- 'for_each_lang'
|
||||
- 'for_each_link_ch_maps'
|
||||
- 'for_each_link_codecs'
|
||||
- 'for_each_link_cpus'
|
||||
- 'for_each_link_platforms'
|
||||
@ -332,6 +344,9 @@ ForEachMacros:
|
||||
- 'for_each_new_plane_in_state_reverse'
|
||||
- 'for_each_new_private_obj_in_state'
|
||||
- 'for_each_new_reg'
|
||||
- 'for_each_nhlt_endpoint'
|
||||
- 'for_each_nhlt_endpoint_fmtcfg'
|
||||
- 'for_each_nhlt_fmtcfg'
|
||||
- 'for_each_node'
|
||||
- 'for_each_node_by_name'
|
||||
- 'for_each_node_by_type'
|
||||
@ -387,12 +402,15 @@ ForEachMacros:
|
||||
- 'for_each_reloc_from'
|
||||
- 'for_each_requested_gpio'
|
||||
- 'for_each_requested_gpio_in_range'
|
||||
- 'for_each_reserved_child_of_node'
|
||||
- 'for_each_reserved_mem_range'
|
||||
- 'for_each_reserved_mem_region'
|
||||
- 'for_each_rtd_ch_maps'
|
||||
- 'for_each_rtd_codec_dais'
|
||||
- 'for_each_rtd_components'
|
||||
- 'for_each_rtd_cpu_dais'
|
||||
- 'for_each_rtd_dais'
|
||||
- 'for_each_rtd_dais_reverse'
|
||||
- 'for_each_sband_iftype_data'
|
||||
- 'for_each_script'
|
||||
- 'for_each_sec'
|
||||
@ -533,8 +551,6 @@ ForEachMacros:
|
||||
- 'lwq_for_each_safe'
|
||||
- 'map__for_each_symbol'
|
||||
- 'map__for_each_symbol_by_name'
|
||||
- 'maps__for_each_entry'
|
||||
- 'maps__for_each_entry_safe'
|
||||
- 'mas_for_each'
|
||||
- 'mci_for_each_dimm'
|
||||
- 'media_device_for_each_entity'
|
||||
@ -560,7 +576,9 @@ ForEachMacros:
|
||||
- 'netdev_hw_addr_list_for_each'
|
||||
- 'nft_rule_for_each_expr'
|
||||
- 'nla_for_each_attr'
|
||||
- 'nla_for_each_attr_type'
|
||||
- 'nla_for_each_nested'
|
||||
- 'nla_for_each_nested_type'
|
||||
- 'nlmsg_for_each_attr'
|
||||
- 'nlmsg_for_each_msg'
|
||||
- 'nr_neigh_for_each'
|
||||
@ -579,6 +597,7 @@ ForEachMacros:
|
||||
- 'perf_config_sections__for_each_entry'
|
||||
- 'perf_config_set__for_each_entry'
|
||||
- 'perf_cpu_map__for_each_cpu'
|
||||
- 'perf_cpu_map__for_each_cpu_skip_any'
|
||||
- 'perf_cpu_map__for_each_idx'
|
||||
- 'perf_evlist__for_each_entry'
|
||||
- 'perf_evlist__for_each_entry_reverse'
|
||||
@ -639,7 +658,6 @@ ForEachMacros:
|
||||
- 'shost_for_each_device'
|
||||
- 'sk_for_each'
|
||||
- 'sk_for_each_bound'
|
||||
- 'sk_for_each_bound_bhash2'
|
||||
- 'sk_for_each_entry_offset_rcu'
|
||||
- 'sk_for_each_from'
|
||||
- 'sk_for_each_rcu'
|
||||
@ -653,6 +671,7 @@ ForEachMacros:
|
||||
- 'snd_soc_dapm_widget_for_each_path_safe'
|
||||
- 'snd_soc_dapm_widget_for_each_sink_path'
|
||||
- 'snd_soc_dapm_widget_for_each_source_path'
|
||||
- 'sparsebit_for_each_set_range'
|
||||
- 'strlist__for_each_entry'
|
||||
- 'strlist__for_each_entry_safe'
|
||||
- 'sym_for_each_insn'
|
||||
@ -662,7 +681,6 @@ ForEachMacros:
|
||||
- 'tcf_act_for_each_action'
|
||||
- 'tcf_exts_for_each_action'
|
||||
- 'ttm_resource_manager_for_each_res'
|
||||
- 'twsk_for_each_bound_bhash2'
|
||||
- 'udp_portaddr_for_each_entry'
|
||||
- 'udp_portaddr_for_each_entry_rcu'
|
||||
- 'usb_hub_for_each_child'
|
||||
@ -686,6 +704,9 @@ ForEachMacros:
|
||||
- 'xbc_node_for_each_child'
|
||||
- 'xbc_node_for_each_key_value'
|
||||
- 'xbc_node_for_each_subkey'
|
||||
- 'ynl_attr_for_each'
|
||||
- 'ynl_attr_for_each_nested'
|
||||
- 'ynl_attr_for_each_payload'
|
||||
- 'zorro_for_each_dev'
|
||||
|
||||
IncludeBlocks: Preserve
|
||||
|
4
.gitignore
vendored
@ -24,6 +24,7 @@
|
||||
*.dwo
|
||||
*.elf
|
||||
*.gcno
|
||||
*.gcda
|
||||
*.gz
|
||||
*.i
|
||||
*.ko
|
||||
@ -46,7 +47,6 @@
|
||||
*.so.dbg
|
||||
*.su
|
||||
*.symtypes
|
||||
*.symversions
|
||||
*.tab.[ch]
|
||||
*.tar
|
||||
*.xz
|
||||
@ -70,6 +70,7 @@ modules.order
|
||||
/Module.markers
|
||||
/modules.builtin
|
||||
/modules.builtin.modinfo
|
||||
/modules.builtin.ranges
|
||||
/modules.nsdeps
|
||||
|
||||
#
|
||||
@ -142,7 +143,6 @@ GTAGS
|
||||
# id-utils files
|
||||
ID
|
||||
|
||||
*.orig
|
||||
*~
|
||||
\#*#
|
||||
|
||||
|
5
.mailmap
@ -154,6 +154,9 @@ Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
|
||||
Christian Marangi <ansuelsmth@gmail.com>
|
||||
Christophe Ricard <christophe.ricard@gmail.com>
|
||||
Christoph Hellwig <hch@lst.de>
|
||||
Chuck Lever <chuck.lever@oracle.com> <cel@kernel.org>
|
||||
Chuck Lever <chuck.lever@oracle.com> <cel@netapp.com>
|
||||
Chuck Lever <chuck.lever@oracle.com> <cel@citi.umich.edu>
|
||||
Claudiu Beznea <claudiu.beznea@tuxon.dev> <claudiu.beznea@microchip.com>
|
||||
Colin Ian King <colin.i.king@gmail.com> <colin.king@canonical.com>
|
||||
Corey Minyard <minyard@acm.org>
|
||||
@ -313,6 +316,7 @@ Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
|
||||
Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
|
||||
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
|
||||
Jishnu Prakash <quic_jprakash@quicinc.com> <jprakash@codeaurora.org>
|
||||
Joel Granados <joel.granados@kernel.org> <j.granados@samsung.com>
|
||||
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
|
||||
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
|
||||
John Crispin <john@phrozen.org> <blogic@openwrt.org>
|
||||
@ -529,6 +533,7 @@ Pavankumar Kondeti <quic_pkondeti@quicinc.com> <pkondeti@codeaurora.org>
|
||||
Peter A Jonsson <pj@ludd.ltu.se>
|
||||
Peter Oruba <peter.oruba@amd.com>
|
||||
Peter Oruba <peter@oruba.de>
|
||||
Pierre-Louis Bossart <pierre-louis.bossart@linux.dev> <pierre-louis.bossart@linux.intel.com>
|
||||
Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com>
|
||||
Praveen BP <praveenbp@ti.com>
|
||||
Pradeep Kumar Chitrapu <quic_pradeepc@quicinc.com> <pradeepc@codeaurora.org>
|
||||
|
6
CREDITS
@ -378,6 +378,9 @@ S: 1549 Hiironen Rd.
|
||||
S: Brimson, MN 55602
|
||||
S: USA
|
||||
|
||||
N: Arnd Bergmann
|
||||
D: Maintainer of Cell Broadband Engine Architecture
|
||||
|
||||
N: Hennus Bergman
|
||||
P: 1024/77D50909 76 99 FD 31 91 E1 96 1C 90 BB 22 80 62 F6 BD 63
|
||||
D: Author and maintainer of the QIC-02 tape driver
|
||||
@ -1869,6 +1872,9 @@ S: K osmidomkum 723
|
||||
S: 160 00 Praha 6
|
||||
S: Czech Republic
|
||||
|
||||
N: Jeremy Kerr
|
||||
D: Maintainer of SPU File System
|
||||
|
||||
N: Michael Kerrisk
|
||||
E: mtk.manpages@gmail.com
|
||||
W: https://man7.org/
|
||||
|
@ -11,7 +11,7 @@ Description:
|
||||
Read returns '0' or '1' for read-write or read-only modes
|
||||
respectively.
|
||||
Write parses one of 'YyTt1NnFf0', or [oO][NnFf] for "on"
|
||||
and "off", i.e. what kstrbool() supports.
|
||||
and "off", i.e. what kstrtobool() supports.
|
||||
Note: This file is only present if CONFIG_NVMEM_SYSFS
|
||||
is enabled.
|
||||
|
||||
|
@ -9,9 +9,11 @@ maps an ELF DSO into that program's address space. This DSO is called
|
||||
the vDSO and it often contains useful and highly-optimized alternatives
|
||||
to real syscalls.
|
||||
|
||||
These functions are called just like ordinary C function according to
|
||||
your platform's ABI. Call them from a sensible context. (For example,
|
||||
if you set CS on x86 to something strange, the vDSO functions are
|
||||
These functions are called according to your platform's ABI. On many
|
||||
platforms they are called just like ordinary C function. On other platforms
|
||||
(ex: powerpc) they are called with the same convention as system calls which
|
||||
is different from ordinary C functions. Call them from a sensible context.
|
||||
(For example, if you set CS on x86 to something strange, the vDSO functions are
|
||||
within their rights to crash.) In addition, if you pass a bad
|
||||
pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT.
|
||||
|
||||
|
@ -6,3 +6,10 @@ Description:
|
||||
This item contains just one readonly attribute: port_num.
|
||||
It contains the port number of the /dev/ttyGS<n> device
|
||||
associated with acm function's instance "name".
|
||||
|
||||
What: /config/usb-gadget/gadget/functions/acm.name/protocol
|
||||
Date: Aug 2024
|
||||
KernelVersion: 6.13
|
||||
Description:
|
||||
Reported bInterfaceProtocol for the ACM device. For legacy
|
||||
reasons, this defaults to 1 (USB_CDC_ACM_PROTO_AT_V25TER).
|
||||
|
@ -30,4 +30,12 @@ Description:
|
||||
req_number the number of pre-allocated requests
|
||||
for both capture and playback
|
||||
function_name name of the interface
|
||||
p_it_name playback input terminal name
|
||||
p_it_ch_name playback channels name
|
||||
p_ot_name playback output terminal name
|
||||
p_fu_vol_name playback mute/volume functional unit name
|
||||
c_it_name capture input terminal name
|
||||
c_it_ch_name capture channels name
|
||||
c_ot_name capture output terminal name
|
||||
c_fu_vol_name capture mute/volume functional unit name
|
||||
===================== =======================================
|
||||
|
@ -35,6 +35,17 @@ Description:
|
||||
req_number the number of pre-allocated requests
|
||||
for both capture and playback
|
||||
function_name name of the interface
|
||||
if_ctrl_name topology control name
|
||||
clksrc_in_name input clock name
|
||||
clksrc_out_name output clock name
|
||||
p_it_name playback input terminal name
|
||||
p_it_ch_name playback input first channel name
|
||||
p_ot_name playback output terminal name
|
||||
p_fu_vol_name playback mute/volume function unit name
|
||||
c_it_name capture input terminal name
|
||||
c_it_ch_name capture input first channel name
|
||||
c_ot_name capture output terminal name
|
||||
c_fu_vol_name capture mute/volume functional unit name
|
||||
c_terminal_type code of the capture terminal type
|
||||
p_terminal_type code of the playback terminal type
|
||||
===================== =======================================
|
||||
|
39
Documentation/ABI/testing/debugfs-iio-ad9467
Normal file
@ -0,0 +1,39 @@
|
||||
What: /sys/kernel/debug/iio/iio:deviceX/calibration_table_dump
|
||||
KernelVersion: 6.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This dumps the calibration table that was filled during the
|
||||
digital interface tuning process.
|
||||
|
||||
What: /sys/kernel/debug/iio/iio:deviceX/in_voltage_test_mode_available
|
||||
KernelVersion: 6.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
List all the available test tones:
|
||||
- off
|
||||
- midscale_short
|
||||
- pos_fullscale
|
||||
- neg_fullscale
|
||||
- checkerboard
|
||||
- prbs23
|
||||
- prbs9
|
||||
- one_zero_toggle
|
||||
- user
|
||||
- bit_toggle
|
||||
- sync
|
||||
- one_bit_high
|
||||
- mixed_bit_frequency
|
||||
- ramp
|
||||
|
||||
Note that depending on the actual device being used, some of the
|
||||
above might not be available (and they won't be listed when
|
||||
reading the file).
|
||||
|
||||
What: /sys/kernel/debug/iio/iio:deviceX/in_voltageY_test_mode
|
||||
KernelVersion: 6.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Writing to this file will initiate one of available test tone on
|
||||
channel Y. Reading it, shows which test is running. In cases
|
||||
where an IIO backend is available and supports the test tone,
|
||||
additional information about the data correctness is given.
|
20
Documentation/ABI/testing/debugfs-iio-backend
Normal file
@ -0,0 +1,20 @@
|
||||
What: /sys/kernel/debug/iio/iio:deviceX/backendY/name
|
||||
KernelVersion: 6.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Name of Backend Y connected to device X.
|
||||
|
||||
What: /sys/kernel/debug/iio/iio:deviceX/backendY/direct_reg_access
|
||||
KernelVersion: 6.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Directly access the registers of backend Y. Typical usage is:
|
||||
|
||||
Reading address 0x50
|
||||
echo 0x50 > direct_reg_access
|
||||
cat direct_reg_access
|
||||
|
||||
Writing address 0x50
|
||||
echo 0x50 0x3 > direct_reg_access
|
||||
//readback address 0x50
|
||||
cat direct_reg_access
|
@ -151,3 +151,10 @@ Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
|
||||
Description:
|
||||
The recompress file is write-only and triggers re-compression
|
||||
with secondary compression algorithms.
|
||||
|
||||
What: /sys/block/zram<id>/algorithm_params
|
||||
Date: August 2024
|
||||
Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
|
||||
Description:
|
||||
The algorithm_params file is write-only and is used to setup
|
||||
compression algorithm parameters.
|
||||
|
@ -523,13 +523,27 @@ Description:
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_i_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_q_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibbias
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias
|
||||
KernelVersion: 2.6.35
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -541,6 +555,10 @@ Description:
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_calibbias_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_calibbias_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias_available
|
||||
KernelVersion: 5.8
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -549,25 +567,34 @@ Description:
|
||||
- a small discrete set of values like "0 2 4 6 8"
|
||||
- a range specified as "[min step max]"
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_both_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_ir_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibscale
|
||||
KernelVersion: 2.6.35
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -575,6 +602,20 @@ Description:
|
||||
production inaccuracies). If shared across all channels,
|
||||
<type>_calibscale is used.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminanceY_calibscale_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibscale_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximityY_calibscale_available
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale_available
|
||||
KernelVersion: 4.8
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Available values of calibscale. Maybe expressed as either of:
|
||||
|
||||
- a small discrete set of values like "1 8 16"
|
||||
- a range specified as "[min step max]"
|
||||
|
||||
If shared across all channels, <type>_calibscale_available is used.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_activity_calibgender
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_energy_calibgender
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_distance_calibgender
|
||||
@ -708,6 +749,7 @@ Description:
|
||||
2.5kohm_to_gnd: connected to ground via a 2.5kOhm resistor,
|
||||
6kohm_to_gnd: connected to ground via a 6kOhm resistor,
|
||||
20kohm_to_gnd: connected to ground via a 20kOhm resistor,
|
||||
42kohm_to_gnd: connected to ground via a 42kOhm resistor,
|
||||
90kohm_to_gnd: connected to ground via a 90kOhm resistor,
|
||||
100kohm_to_gnd: connected to ground via an 100kOhm resistor,
|
||||
125kohm_to_gnd: connected to ground via an 125kOhm resistor,
|
||||
@ -2289,3 +2331,11 @@ KernelVersion: 6.7
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
List of available timeout value for tap gesture confirmation.
|
||||
|
||||
What: /sys/.../iio:deviceX/in_shunt_resistor
|
||||
What: /sys/.../iio:deviceX/in_current_shunt_resistor
|
||||
What: /sys/.../iio:deviceX/in_power_shunt_resistor
|
||||
KernelVersion: 6.10
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
The value of current sense resistor in Ohms.
|
||||
|
@ -1,17 +0,0 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_power_shunt_resistor
|
||||
Date: March 2017
|
||||
KernelVersion: 4.12
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description: The value of the shunt resistor used to compute power drain on
|
||||
common input voltage pin (RS+). In Ohms.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_current_shunt_resistor
|
||||
Date: March 2017
|
||||
KernelVersion: 4.12
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description: The value of the shunt resistor used to compute current flowing
|
||||
between RS+ and RS- voltage sense inputs. In Ohms.
|
||||
|
||||
These attributes describe a single physical component, exposed as two distinct
|
||||
attributes as it is used to calculate two different values: power load and
|
||||
current flowing between RS+ and RS- inputs.
|
@ -15,17 +15,3 @@ Description:
|
||||
Set the relative humidity. This value is sent to the sensor for
|
||||
humidity compensation.
|
||||
Default value: 50000 (50 % relative humidity)
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
|
||||
Date: August 2021
|
||||
KernelVersion: 5.15
|
||||
Contact: Andreas Klinger <ak@it-klinger.de>
|
||||
Description:
|
||||
Set the bias value for the resistance which is used for
|
||||
calculation of in_concentration_input as follows:
|
||||
|
||||
x = (in_resistance_raw - in_resistance_calibbias) * 0.65
|
||||
|
||||
in_concentration_input = 500 / (1 + e^x)
|
||||
|
||||
Default value: 30000
|
||||
|
61
Documentation/ABI/testing/sysfs-bus-iio-dac
Normal file
@ -0,0 +1,61 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_toggle_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Toggle enable. Write 1 to enable toggle or 0 to disable it. This
|
||||
is useful when one wants to change the DAC output codes. For
|
||||
autonomous toggling, the way it should be done is:
|
||||
|
||||
- disable toggle operation;
|
||||
- change out_currentY_rawN, where N is the integer value of the symbol;
|
||||
- enable toggle operation.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_rawN
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This attribute has the same meaning as out_currentY_raw. It is
|
||||
specific to toggle enabled channels and refers to the DAC output
|
||||
code in INPUT_N (_rawN), where N is the integer value of the symbol.
|
||||
The same scale and offset as in out_currentY_raw applies.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_symbol
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Performs a SW switch to a predefined output symbol. This attribute
|
||||
is specific to toggle enabled channels and allows switching between
|
||||
multiple predefined symbols. Each symbol corresponds to a different
|
||||
output, denoted as out_currentY_rawN, where N is the integer value
|
||||
of the symbol. Writing an integer value N will select out_currentY_rawN.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Toggle enable. Write 1 to enable toggle or 0 to disable it. This
|
||||
is useful when one wants to change the DAC output codes. For
|
||||
autonomous toggling, the way it should be done is:
|
||||
|
||||
- disable toggle operation;
|
||||
- change out_voltageY_rawN, where N is the integer value of the symbol;
|
||||
- enable toggle operation.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_rawN
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This attribute has the same meaning as out_currentY_raw. It is
|
||||
specific to toggle enabled channels and refers to the DAC output
|
||||
code in INPUT_N (_rawN), where N is the integer value of the symbol.
|
||||
The same scale and offset as in out_currentY_raw applies.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Performs a SW switch to a predefined output symbol. This attribute
|
||||
is specific to toggle enabled channels and allows switching between
|
||||
multiple predefined symbols. Each symbol corresponds to a different
|
||||
output, denoted as out_voltageY_rawN, where N is the integer value
|
||||
of the symbol. Writing an integer value N will select out_voltageY_rawN.
|
@ -53,34 +53,3 @@ KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Returns the available values for the dither phase.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Toggle enable. Write 1 to enable toggle or 0 to disable it. This is
|
||||
useful when one wants to change the DAC output codes. The way it should
|
||||
be done is:
|
||||
|
||||
- disable toggle operation;
|
||||
- change out_voltageY_raw0 and out_voltageY_raw1;
|
||||
- enable toggle operation.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw0
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw1
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
It has the same meaning as out_voltageY_raw. This attribute is
|
||||
specific to toggle enabled channels and refers to the DAC output
|
||||
code in INPUT_A (_raw0) and INPUT_B (_raw1). The same scale and offset
|
||||
as in out_voltageY_raw applies.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Performs a SW toggle. This attribute is specific to toggle
|
||||
enabled channels and allows to toggle between out_voltageY_raw0
|
||||
and out_voltageY_raw1 through software. Writing 0 will select
|
||||
out_voltageY_raw0 while 1 selects out_voltageY_raw1.
|
||||
|
@ -3,7 +3,7 @@ KernelVersion:
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Reading this returns the valid values that can be written to the
|
||||
on_altvoltage0_mode attribute:
|
||||
filter_mode attribute:
|
||||
|
||||
- auto -> Adjust bandpass filter to track changes in input clock rate.
|
||||
- manual -> disable/unregister the clock rate notifier / input clock tracking.
|
||||
|
@ -13,12 +13,3 @@ Description:
|
||||
available for reading data. However, samples can be occasionally skipped
|
||||
or repeated, depending on the beat between the capture and conversion
|
||||
rates.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_shunt_resistor
|
||||
Date: December 2015
|
||||
KernelVersion: 4.4
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
The value of the shunt resistor may be known only at runtime fom an
|
||||
eeprom content read by a client application. This attribute allows to
|
||||
set its value in ohms.
|
||||
|
@ -500,3 +500,75 @@ Description:
|
||||
console drivers from the device. Raw users of pci-sysfs
|
||||
resourceN attributes must be terminated prior to resizing.
|
||||
Success of the resizing operation is not guaranteed.
|
||||
|
||||
What: /sys/bus/pci/devices/.../leds/*:enclosure:*/brightness
|
||||
What: /sys/class/leds/*:enclosure:*/brightness
|
||||
Date: August 2024
|
||||
KernelVersion: 6.12
|
||||
Description:
|
||||
LED indications on PCIe storage enclosures which are controlled
|
||||
through the NPEM interface (Native PCIe Enclosure Management,
|
||||
PCIe r6.1 sec 6.28) are accessible as led class devices, both
|
||||
below /sys/class/leds and below NPEM-capable PCI devices.
|
||||
|
||||
Although these led class devices could be manipulated manually,
|
||||
in practice they are typically manipulated automatically by an
|
||||
application such as ledmon(8).
|
||||
|
||||
The name of a led class device is as follows:
|
||||
<bdf>:enclosure:<indication>
|
||||
where:
|
||||
|
||||
- <bdf> is the domain, bus, device and function number
|
||||
(e.g. 10000:02:05.0)
|
||||
- <indication> is a short description of the LED indication
|
||||
|
||||
Valid indications per PCIe r6.1 table 6-27 are:
|
||||
|
||||
- ok (drive is functioning normally)
|
||||
- locate (drive is being identified by an admin)
|
||||
- fail (drive is not functioning properly)
|
||||
- rebuild (drive is part of an array that is rebuilding)
|
||||
- pfa (drive is predicted to fail soon)
|
||||
- hotspare (drive is marked to be used as a replacement)
|
||||
- ica (drive is part of an array that is degraded)
|
||||
- ifa (drive is part of an array that is failed)
|
||||
- idt (drive is not the right type for the connector)
|
||||
- disabled (drive is disabled, removal is safe)
|
||||
- specific0 to specific7 (enclosure-specific indications)
|
||||
|
||||
Broadly, the indications fall into one of these categories:
|
||||
|
||||
- to signify drive state (ok, locate, fail, idt, disabled)
|
||||
- to signify drive role or state in a software RAID array
|
||||
(rebuild, pfa, hotspare, ica, ifa)
|
||||
- to signify any other role or state (specific0 to specific7)
|
||||
|
||||
Mandatory indications per PCIe r6.1 sec 7.9.19.2 comprise:
|
||||
ok, locate, fail, rebuild. All others are optional.
|
||||
A led class device is only visible if the corresponding
|
||||
indication is supported by the device.
|
||||
|
||||
To manipulate the indications, write 0 (LED_OFF) or 1 (LED_ON)
|
||||
to the "brightness" file. Note that manipulating an indication
|
||||
may implicitly manipulate other indications at the vendor's
|
||||
discretion. E.g. when the user lights up the "ok" indication,
|
||||
the vendor may choose to automatically turn off the "fail"
|
||||
indication. The current state of an indication can be
|
||||
retrieved by reading its "brightness" file.
|
||||
|
||||
The PCIe Base Specification allows vendors leeway to choose
|
||||
different colors or blinking patterns for the indications,
|
||||
but they typically follow the IBPI standard. E.g. the "locate"
|
||||
indication is usually presented as one or two LEDs blinking at
|
||||
4 Hz frequency:
|
||||
https://en.wikipedia.org/wiki/International_Blinking_Pattern_Interpretation
|
||||
|
||||
PCI Firmware Specification r3.3 sec 4.7 defines a DSM interface
|
||||
to facilitate shared access by operating system and platform
|
||||
firmware to a device's NPEM registers. The kernel will use
|
||||
this DSM interface where available, instead of accessing NPEM
|
||||
registers directly. The DSM interface does not support the
|
||||
enclosure-specific indications "specific0" to "specific7",
|
||||
hence the corresponding led class devices are unavailable if
|
||||
the DSM interface is used.
|
||||
|
@ -377,17 +377,33 @@ What: /sys/class/power_supply/<supply_name>/charge_type
|
||||
Date: July 2009
|
||||
Contact: linux-pm@vger.kernel.org
|
||||
Description:
|
||||
Represents the type of charging currently being applied to the
|
||||
battery. "Trickle", "Fast", and "Standard" all mean different
|
||||
charging speeds. "Adaptive" means that the charger uses some
|
||||
algorithm to adjust the charge rate dynamically, without
|
||||
any user configuration required. "Custom" means that the charger
|
||||
uses the charge_control_* properties as configuration for some
|
||||
different algorithm. "Long Life" means the charger reduces its
|
||||
charging rate in order to prolong the battery health. "Bypass"
|
||||
means the charger bypasses the charging path around the
|
||||
integrated converter allowing for a "smart" wall adaptor to
|
||||
perform the power conversion externally.
|
||||
Select the charging algorithm to use for a battery.
|
||||
|
||||
Standard:
|
||||
Fully charge the battery at a moderate rate.
|
||||
Fast:
|
||||
Quickly charge the battery using fast-charge
|
||||
technology. This is typically harder on the battery
|
||||
than standard charging and may lower its lifespan.
|
||||
Trickle:
|
||||
Users who primarily operate the system while
|
||||
plugged into an external power source can extend
|
||||
battery life with this mode. Vendor tooling may
|
||||
call this "Primarily AC Use".
|
||||
Adaptive:
|
||||
Automatically optimize battery charge rate based
|
||||
on typical usage pattern.
|
||||
Custom:
|
||||
Use the charge_control_* properties to determine
|
||||
when to start and stop charging. Advanced users
|
||||
can use this to drastically extend battery life.
|
||||
Long Life:
|
||||
The charger reduces its charging rate in order to
|
||||
prolong the battery health.
|
||||
Bypass:
|
||||
The charger bypasses the charging path around the
|
||||
integrated converter allowing for a "smart" wall
|
||||
adaptor to perform the power conversion externally.
|
||||
|
||||
Access: Read, Write
|
||||
|
||||
@ -592,7 +608,12 @@ Description:
|
||||
the supply, for example it can show if USB-PD capable source
|
||||
is attached.
|
||||
|
||||
Access: Read-Only
|
||||
Access: For power-supplies which consume USB power such
|
||||
as battery charger chips, this indicates the type of
|
||||
the connected USB power source and is Read-Only.
|
||||
|
||||
For power-supplies which act as a USB power-source such as
|
||||
e.g. the UCS1002 USB Port Power Controller this is writable.
|
||||
|
||||
Valid values:
|
||||
"Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD",
|
||||
|
15
Documentation/ABI/testing/sysfs-class-tee
Normal file
@ -0,0 +1,15 @@
|
||||
What: /sys/class/tee/tee{,priv}X/rpmb_routing_model
|
||||
Date: May 2024
|
||||
KernelVersion: 6.10
|
||||
Contact: op-tee@lists.trustedfirmware.org
|
||||
Description:
|
||||
RPMB frames can be routed to the RPMB device via the
|
||||
user-space daemon tee-supplicant or the RPMB subsystem
|
||||
in the kernel. The value "user" means that the driver
|
||||
will route the RPMB frames via user space. Conversely,
|
||||
"kernel" means that the frames are routed via the RPMB
|
||||
subsystem without assistance from tee-supplicant. It
|
||||
should be assumed that RPMB frames are routed via user
|
||||
space if the variable is absent. The primary purpose
|
||||
of this variable is to let systemd know whether
|
||||
tee-supplicant is needed in the early boot with initramfs.
|
@ -115,6 +115,6 @@ What: /sys/devices/system/memory/crash_hotplug
|
||||
Date: Aug 2023
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description:
|
||||
(RO) indicates whether or not the kernel directly supports
|
||||
modifying the crash elfcorehdr for memory hot un/plug and/or
|
||||
on/offline changes.
|
||||
(RO) indicates whether or not the kernel updates relevant kexec
|
||||
segments on memory hot un/plug and/or on/offline events, avoiding the
|
||||
need to reload kdump kernel.
|
||||
|
@ -704,9 +704,9 @@ What: /sys/devices/system/cpu/crash_hotplug
|
||||
Date: Aug 2023
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description:
|
||||
(RO) indicates whether or not the kernel directly supports
|
||||
modifying the crash elfcorehdr for CPU hot un/plug and/or
|
||||
on/offline changes.
|
||||
(RO) indicates whether or not the kernel updates relevant kexec
|
||||
segments on memory hot un/plug and/or on/offline events, avoiding the
|
||||
need to reload kdump kernel.
|
||||
|
||||
What: /sys/devices/system/cpu/enabled
|
||||
Date: Nov 2022
|
||||
|
@ -1532,3 +1532,30 @@ Contact: Bean Huo <beanhuo@micron.com>
|
||||
Description:
|
||||
rtc_update_ms indicates how often the host should synchronize or update the
|
||||
UFS RTC. If set to 0, this will disable UFS RTC periodic update.
|
||||
|
||||
What: /sys/devices/platform/.../ufshci_capabilities/version
|
||||
Date: August 2024
|
||||
Contact: Avri Altman <avri.altman@wdc.com>
|
||||
Description:
|
||||
Host Capabilities register group: UFS version register.
|
||||
Symbol - VER. This file shows the UFSHCD version.
|
||||
Example: Version 3.12 would be represented as 0000_0312h.
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/.../ufshci_capabilities/product_id
|
||||
Date: August 2024
|
||||
Contact: Avri Altman <avri.altman@wdc.com>
|
||||
Description:
|
||||
Host Capabilities register group: product ID register.
|
||||
Symbol - HCPID. This file shows the UFSHCD product id.
|
||||
The content of this register is vendor specific.
|
||||
The file is read only.
|
||||
|
||||
What: /sys/devices/platform/.../ufshci_capabilities/man_id
|
||||
Date: August 2024
|
||||
Contact: Avri Altman <avri.altman@wdc.com>
|
||||
Description:
|
||||
Host Capabilities register group: manufacturer ID register.
|
||||
Symbol - HCMID. This file shows the UFSHCD manufacturer id.
|
||||
The Manufacturer ID is defined by JEDEC in JEDEC-JEP106.
|
||||
The file is read only.
|
||||
|
@ -579,6 +579,12 @@ Description: When ATGC is on, it controls age threshold to bypass GCing young
|
||||
candidates whose age is not beyond the threshold, by default it was
|
||||
initialized as 604800 seconds (equals to 7 days).
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/atgc_enabled
|
||||
Date: Feb 2024
|
||||
Contact: "Jinbao Liu" <liujinbao1@xiaomi.com>
|
||||
Description: It represents whether ATGC is on or off. The value is 1 which
|
||||
indicates that ATGC is on, and 0 indicates that it is off.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_reclaimed_segments
|
||||
Date: July 2021
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
@ -763,3 +769,53 @@ Date: November 2023
|
||||
Contact: "Chao Yu" <chao@kernel.org>
|
||||
Description: It controls to enable/disable IO aware feature for background discard.
|
||||
By default, the value is 1 which indicates IO aware is on.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/blkzone_alloc_policy
|
||||
Date: July 2024
|
||||
Contact: "Yuanhong Liao" <liaoyuanhong@vivo.com>
|
||||
Description: The zone UFS we are currently using consists of two parts:
|
||||
conventional zones and sequential zones. It can be used to control which part
|
||||
to prioritize for writes, with a default value of 0.
|
||||
|
||||
======================== =========================================
|
||||
value description
|
||||
blkzone_alloc_policy = 0 Prioritize writing to sequential zones
|
||||
blkzone_alloc_policy = 1 Only allow writing to sequential zones
|
||||
blkzone_alloc_policy = 2 Prioritize writing to conventional zones
|
||||
======================== =========================================
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/migration_window_granularity
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: Controls migration window granularity of garbage collection on large
|
||||
section. it can control the scanning window granularity for GC migration
|
||||
in a unit of segment, while migration_granularity controls the number
|
||||
of segments which can be migrated at the same turn.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/reserved_segments
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: In order to fine tune GC behavior, we can control the number of
|
||||
reserved segments.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_no_zoned_gc_percent
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: If the percentage of free sections over total sections is above this
|
||||
number, F2FS do not garbage collection for zoned devices through the
|
||||
background GC thread. the default number is "60".
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_boost_zoned_gc_percent
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: If the percentage of free sections over total sections is under this
|
||||
number, F2FS boosts garbage collection for zoned devices through the
|
||||
background GC thread. the default number is "25".
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_valid_thresh_ratio
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: It controls the valid block ratio threshold not to trigger excessive GC
|
||||
for zoned deivces. The initial value of it is 95(%). F2FS will stop the
|
||||
background GC thread from intiating GC for sections having valid blocks
|
||||
exceeding the ratio.
|
||||
|
@ -52,7 +52,7 @@ driver generally needs to perform the following initialization:
|
||||
- Enable DMA/processing engines
|
||||
|
||||
When done using the device, and perhaps the module needs to be unloaded,
|
||||
the driver needs to take the follow steps:
|
||||
the driver needs to take the following steps:
|
||||
|
||||
- Disable the device from generating IRQs
|
||||
- Release the IRQ (free_irq())
|
||||
|
@ -921,10 +921,10 @@ This portion of the ``rcu_data`` structure is declared as follows:
|
||||
|
||||
::
|
||||
|
||||
1 int dynticks_snap;
|
||||
1 int watching_snap;
|
||||
2 unsigned long dynticks_fqs;
|
||||
|
||||
The ``->dynticks_snap`` field is used to take a snapshot of the
|
||||
The ``->watching_snap`` field is used to take a snapshot of the
|
||||
corresponding CPU's dyntick-idle state when forcing quiescent states,
|
||||
and is therefore accessed from other CPUs. Finally, the
|
||||
``->dynticks_fqs`` field is used to count the number of times this CPU
|
||||
@ -935,8 +935,8 @@ This portion of the rcu_data structure is declared as follows:
|
||||
|
||||
::
|
||||
|
||||
1 long dynticks_nesting;
|
||||
2 long dynticks_nmi_nesting;
|
||||
1 long nesting;
|
||||
2 long nmi_nesting;
|
||||
3 atomic_t dynticks;
|
||||
4 bool rcu_need_heavy_qs;
|
||||
5 bool rcu_urgent_qs;
|
||||
@ -945,14 +945,14 @@ These fields in the rcu_data structure maintain the per-CPU dyntick-idle
|
||||
state for the corresponding CPU. The fields may be accessed only from
|
||||
the corresponding CPU (and from tracing) unless otherwise stated.
|
||||
|
||||
The ``->dynticks_nesting`` field counts the nesting depth of process
|
||||
The ``->nesting`` field counts the nesting depth of process
|
||||
execution, so that in normal circumstances this counter has value zero
|
||||
or one. NMIs, irqs, and tracers are counted by the
|
||||
``->dynticks_nmi_nesting`` field. Because NMIs cannot be masked, changes
|
||||
``->nmi_nesting`` field. Because NMIs cannot be masked, changes
|
||||
to this variable have to be undertaken carefully using an algorithm
|
||||
provided by Andy Lutomirski. The initial transition from idle adds one,
|
||||
and nested transitions add two, so that a nesting level of five is
|
||||
represented by a ``->dynticks_nmi_nesting`` value of nine. This counter
|
||||
represented by a ``->nmi_nesting`` value of nine. This counter
|
||||
can therefore be thought of as counting the number of reasons why this
|
||||
CPU cannot be permitted to enter dyntick-idle mode, aside from
|
||||
process-level transitions.
|
||||
@ -960,12 +960,12 @@ process-level transitions.
|
||||
However, it turns out that when running in non-idle kernel context, the
|
||||
Linux kernel is fully capable of entering interrupt handlers that never
|
||||
exit and perhaps also vice versa. Therefore, whenever the
|
||||
``->dynticks_nesting`` field is incremented up from zero, the
|
||||
``->dynticks_nmi_nesting`` field is set to a large positive number, and
|
||||
whenever the ``->dynticks_nesting`` field is decremented down to zero,
|
||||
the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that
|
||||
``->nesting`` field is incremented up from zero, the
|
||||
``->nmi_nesting`` field is set to a large positive number, and
|
||||
whenever the ``->nesting`` field is decremented down to zero,
|
||||
the ``->nmi_nesting`` field is set to zero. Assuming that
|
||||
the number of misnested interrupts is not sufficient to overflow the
|
||||
counter, this approach corrects the ``->dynticks_nmi_nesting`` field
|
||||
counter, this approach corrects the ``->nmi_nesting`` field
|
||||
every time the corresponding CPU enters the idle loop from process
|
||||
context.
|
||||
|
||||
@ -992,8 +992,8 @@ code.
|
||||
+-----------------------------------------------------------------------+
|
||||
| **Quick Quiz**: |
|
||||
+-----------------------------------------------------------------------+
|
||||
| Why not simply combine the ``->dynticks_nesting`` and |
|
||||
| ``->dynticks_nmi_nesting`` counters into a single counter that just |
|
||||
| Why not simply combine the ``->nesting`` and |
|
||||
| ``->nmi_nesting`` counters into a single counter that just |
|
||||
| counts the number of reasons that the corresponding CPU is non-idle? |
|
||||
+-----------------------------------------------------------------------+
|
||||
| **Answer**: |
|
||||
|
@ -147,10 +147,10 @@ RCU read-side critical sections preceding and following the current
|
||||
idle sojourn.
|
||||
This case is handled by calls to the strongly ordered
|
||||
``atomic_add_return()`` read-modify-write atomic operation that
|
||||
is invoked within ``rcu_dynticks_eqs_enter()`` at idle-entry
|
||||
time and within ``rcu_dynticks_eqs_exit()`` at idle-exit time.
|
||||
The grace-period kthread invokes first ``ct_dynticks_cpu_acquire()``
|
||||
(preceded by a full memory barrier) and ``rcu_dynticks_in_eqs_since()``
|
||||
is invoked within ``ct_kernel_exit_state()`` at idle-entry
|
||||
time and within ``ct_kernel_enter_state()`` at idle-exit time.
|
||||
The grace-period kthread invokes first ``ct_rcu_watching_cpu_acquire()``
|
||||
(preceded by a full memory barrier) and ``rcu_watching_snap_stopped_since()``
|
||||
(both of which rely on acquire semantics) to detect idle CPUs.
|
||||
|
||||
+-----------------------------------------------------------------------+
|
||||
|
@ -528,7 +528,7 @@
|
||||
font-style="normal"
|
||||
y="-8652.5312"
|
||||
x="2466.7822"
|
||||
xml:space="preserve">dyntick_save_progress_counter()</text>
|
||||
xml:space="preserve">rcu_watching_snap_save()</text>
|
||||
<text
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
|
||||
id="text202-7-2-7-2-0"
|
||||
@ -537,7 +537,7 @@
|
||||
font-style="normal"
|
||||
y="-8368.1475"
|
||||
x="2463.3262"
|
||||
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
|
||||
xml:space="preserve">rcu_watching_snap_recheck()</text>
|
||||
</g>
|
||||
<g
|
||||
id="g4504"
|
||||
@ -607,7 +607,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
@ -638,7 +638,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6-1"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
@ -844,7 +844,7 @@
|
||||
font-style="normal"
|
||||
y="1547.8876"
|
||||
x="4417.6396"
|
||||
xml:space="preserve">dyntick_save_progress_counter()</text>
|
||||
xml:space="preserve">rcu_watching_snap_save()</text>
|
||||
<g
|
||||
style="fill:none;stroke-width:0.025in"
|
||||
transform="translate(6501.9719,-10685.904)"
|
||||
@ -899,7 +899,7 @@
|
||||
font-style="normal"
|
||||
y="1858.8729"
|
||||
x="4414.1836"
|
||||
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
|
||||
xml:space="preserve">rcu_watching_snap_recheck()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="14659.87"
|
||||
@ -977,7 +977,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
@ -1008,7 +1008,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6-1"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
|
Before Width: | Height: | Size: 50 KiB After Width: | Height: | Size: 50 KiB |
@ -2974,7 +2974,7 @@
|
||||
font-style="normal"
|
||||
y="38114.047"
|
||||
x="-334.33856"
|
||||
xml:space="preserve">dyntick_save_progress_counter()</text>
|
||||
xml:space="preserve">rcu_watching_snap_save()</text>
|
||||
<g
|
||||
style="fill:none;stroke-width:0.025in"
|
||||
transform="translate(1749.9916,25880.249)"
|
||||
@ -3029,7 +3029,7 @@
|
||||
font-style="normal"
|
||||
y="38425.035"
|
||||
x="-337.79462"
|
||||
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
|
||||
xml:space="preserve">rcu_watching_snap_recheck()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="9907.8887"
|
||||
@ -3107,7 +3107,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
@ -3138,7 +3138,7 @@
|
||||
font-weight="bold"
|
||||
font-size="192"
|
||||
id="text202-7-5-3-27-6-1"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
x="3745.7725"
|
||||
|
Before Width: | Height: | Size: 208 KiB After Width: | Height: | Size: 208 KiB |
@ -516,7 +516,7 @@
|
||||
font-style="normal"
|
||||
y="-8652.5312"
|
||||
x="2466.7822"
|
||||
xml:space="preserve">dyntick_save_progress_counter()</text>
|
||||
xml:space="preserve">rcu_watching_snap_save()</text>
|
||||
<text
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
|
||||
id="text202-7-2-7-2-0"
|
||||
@ -525,7 +525,7 @@
|
||||
font-style="normal"
|
||||
y="-8368.1475"
|
||||
x="2463.3262"
|
||||
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
|
||||
xml:space="preserve">rcu_watching_snap_recheck()</text>
|
||||
<text
|
||||
sodipodi:linespacing="125%"
|
||||
style="font-size:192px;font-style:normal;font-weight:bold;line-height:125%;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
|
||||
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 28 KiB |
@ -2649,8 +2649,7 @@ those that are idle from RCU's perspective) and then Tasks Rude RCU can
|
||||
be removed from the kernel.
|
||||
|
||||
The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
|
||||
consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
|
||||
and rcu_barrier_tasks_rude().
|
||||
consisting solely of synchronize_rcu_tasks_rude().
|
||||
|
||||
Tasks Trace RCU
|
||||
~~~~~~~~~~~~~~~
|
||||
|
@ -194,14 +194,13 @@ over a rather long period of time, but improvements are always welcome!
|
||||
when publicizing a pointer to a structure that can
|
||||
be traversed by an RCU read-side critical section.
|
||||
|
||||
5. If any of call_rcu(), call_srcu(), call_rcu_tasks(),
|
||||
call_rcu_tasks_rude(), or call_rcu_tasks_trace() is used,
|
||||
the callback function may be invoked from softirq context,
|
||||
and in any case with bottom halves disabled. In particular,
|
||||
this callback function cannot block. If you need the callback
|
||||
to block, run that code in a workqueue handler scheduled from
|
||||
the callback. The queue_rcu_work() function does this for you
|
||||
in the case of call_rcu().
|
||||
5. If any of call_rcu(), call_srcu(), call_rcu_tasks(), or
|
||||
call_rcu_tasks_trace() is used, the callback function may be
|
||||
invoked from softirq context, and in any case with bottom halves
|
||||
disabled. In particular, this callback function cannot block.
|
||||
If you need the callback to block, run that code in a workqueue
|
||||
handler scheduled from the callback. The queue_rcu_work()
|
||||
function does this for you in the case of call_rcu().
|
||||
|
||||
6. Since synchronize_rcu() can block, it cannot be called
|
||||
from any sort of irq context. The same rule applies
|
||||
@ -254,10 +253,10 @@ over a rather long period of time, but improvements are always welcome!
|
||||
corresponding readers must use rcu_read_lock_trace()
|
||||
and rcu_read_unlock_trace().
|
||||
|
||||
c. If an updater uses call_rcu_tasks_rude() or
|
||||
synchronize_rcu_tasks_rude(), then the corresponding
|
||||
readers must use anything that disables preemption,
|
||||
for example, preempt_disable() and preempt_enable().
|
||||
c. If an updater uses synchronize_rcu_tasks_rude(),
|
||||
then the corresponding readers must use anything that
|
||||
disables preemption, for example, preempt_disable()
|
||||
and preempt_enable().
|
||||
|
||||
Mixing things up will result in confusion and broken kernels, and
|
||||
has even resulted in an exploitable security issue. Therefore,
|
||||
@ -326,11 +325,9 @@ over a rather long period of time, but improvements are always welcome!
|
||||
d. Periodically invoke rcu_barrier(), permitting a limited
|
||||
number of updates per grace period.
|
||||
|
||||
The same cautions apply to call_srcu(), call_rcu_tasks(),
|
||||
call_rcu_tasks_rude(), and call_rcu_tasks_trace(). This is
|
||||
why there is an srcu_barrier(), rcu_barrier_tasks(),
|
||||
rcu_barrier_tasks_rude(), and rcu_barrier_tasks_rude(),
|
||||
respectively.
|
||||
The same cautions apply to call_srcu(), call_rcu_tasks(), and
|
||||
call_rcu_tasks_trace(). This is why there is an srcu_barrier(),
|
||||
rcu_barrier_tasks(), and rcu_barrier_tasks_trace(), respectively.
|
||||
|
||||
Note that although these primitives do take action to avoid
|
||||
memory exhaustion when any given CPU has too many callbacks,
|
||||
@ -383,17 +380,17 @@ over a rather long period of time, but improvements are always welcome!
|
||||
must use whatever locking or other synchronization is required
|
||||
to safely access and/or modify that data structure.
|
||||
|
||||
Do not assume that RCU callbacks will be executed on
|
||||
the same CPU that executed the corresponding call_rcu(),
|
||||
call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(), or
|
||||
call_rcu_tasks_trace(). For example, if a given CPU goes offline
|
||||
while having an RCU callback pending, then that RCU callback
|
||||
will execute on some surviving CPU. (If this was not the case,
|
||||
a self-spawning RCU callback would prevent the victim CPU from
|
||||
ever going offline.) Furthermore, CPUs designated by rcu_nocbs=
|
||||
might well *always* have their RCU callbacks executed on some
|
||||
other CPUs, in fact, for some real-time workloads, this is the
|
||||
whole point of using the rcu_nocbs= kernel boot parameter.
|
||||
Do not assume that RCU callbacks will be executed on the same
|
||||
CPU that executed the corresponding call_rcu(), call_srcu(),
|
||||
call_rcu_tasks(), or call_rcu_tasks_trace(). For example, if
|
||||
a given CPU goes offline while having an RCU callback pending,
|
||||
then that RCU callback will execute on some surviving CPU.
|
||||
(If this was not the case, a self-spawning RCU callback would
|
||||
prevent the victim CPU from ever going offline.) Furthermore,
|
||||
CPUs designated by rcu_nocbs= might well *always* have their
|
||||
RCU callbacks executed on some other CPUs, in fact, for some
|
||||
real-time workloads, this is the whole point of using the
|
||||
rcu_nocbs= kernel boot parameter.
|
||||
|
||||
In addition, do not assume that callbacks queued in a given order
|
||||
will be invoked in that order, even if they all are queued on the
|
||||
@ -507,9 +504,9 @@ over a rather long period of time, but improvements are always welcome!
|
||||
These debugging aids can help you find problems that are
|
||||
otherwise extremely difficult to spot.
|
||||
|
||||
17. If you pass a callback function defined within a module to one of
|
||||
call_rcu(), call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(),
|
||||
or call_rcu_tasks_trace(), then it is necessary to wait for all
|
||||
17. If you pass a callback function defined within a module
|
||||
to one of call_rcu(), call_srcu(), call_rcu_tasks(), or
|
||||
call_rcu_tasks_trace(), then it is necessary to wait for all
|
||||
pending callbacks to be invoked before unloading that module.
|
||||
Note that it is absolutely *not* sufficient to wait for a grace
|
||||
period! For example, synchronize_rcu() implementation is *not*
|
||||
@ -522,7 +519,6 @@ over a rather long period of time, but improvements are always welcome!
|
||||
- call_rcu() -> rcu_barrier()
|
||||
- call_srcu() -> srcu_barrier()
|
||||
- call_rcu_tasks() -> rcu_barrier_tasks()
|
||||
- call_rcu_tasks_rude() -> rcu_barrier_tasks_rude()
|
||||
- call_rcu_tasks_trace() -> rcu_barrier_tasks_trace()
|
||||
|
||||
However, these barrier functions are absolutely *not* guaranteed
|
||||
@ -539,7 +535,6 @@ over a rather long period of time, but improvements are always welcome!
|
||||
- Either synchronize_srcu() or synchronize_srcu_expedited(),
|
||||
together with and srcu_barrier()
|
||||
- synchronize_rcu_tasks() and rcu_barrier_tasks()
|
||||
- synchronize_tasks_rude() and rcu_barrier_tasks_rude()
|
||||
- synchronize_tasks_trace() and rcu_barrier_tasks_trace()
|
||||
|
||||
If necessary, you can use something like workqueues to execute
|
||||
|
@ -1103,7 +1103,7 @@ RCU-Tasks-Rude::
|
||||
|
||||
Critical sections Grace period Barrier
|
||||
|
||||
N/A call_rcu_tasks_rude rcu_barrier_tasks_rude
|
||||
N/A N/A
|
||||
synchronize_rcu_tasks_rude
|
||||
|
||||
|
||||
|
@ -93,7 +93,7 @@ commands (does not impact QAIC).
|
||||
uAPI
|
||||
====
|
||||
|
||||
QAIC creates an accel device per phsyical PCIe device. This accel device exists
|
||||
QAIC creates an accel device per physical PCIe device. This accel device exists
|
||||
for as long as the PCIe device is known to Linux.
|
||||
|
||||
The PCIe device may not be in the state to accept requests from userspace at
|
||||
|
@ -47,3 +47,4 @@ subdirectories.
|
||||
tomoyo
|
||||
Yama
|
||||
SafeSetID
|
||||
ipe
|
||||
|
790
Documentation/admin-guide/LSM/ipe.rst
Normal file
@ -0,0 +1,790 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Integrity Policy Enforcement (IPE)
|
||||
==================================
|
||||
|
||||
.. NOTE::
|
||||
|
||||
This is the documentation for admins, system builders, or individuals
|
||||
attempting to use IPE. If you're looking for more developer-focused
|
||||
documentation about IPE please see :doc:`the design docs </security/ipe>`.
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
Integrity Policy Enforcement (IPE) is a Linux Security Module that takes a
|
||||
complementary approach to access control. Unlike traditional access control
|
||||
mechanisms that rely on labels and paths for decision-making, IPE focuses
|
||||
on the immutable security properties inherent to system components. These
|
||||
properties are fundamental attributes or features of a system component
|
||||
that cannot be altered, ensuring a consistent and reliable basis for
|
||||
security decisions.
|
||||
|
||||
To elaborate, in the context of IPE, system components primarily refer to
|
||||
files or the devices these files reside on. However, this is just a
|
||||
starting point. The concept of system components is flexible and can be
|
||||
extended to include new elements as the system evolves. The immutable
|
||||
properties include the origin of a file, which remains constant and
|
||||
unchangeable over time. For example, IPE policies can be crafted to trust
|
||||
files originating from the initramfs. Since initramfs is typically verified
|
||||
by the bootloader, its files are deemed trustworthy; "file is from
|
||||
initramfs" becomes an immutable property under IPE's consideration.
|
||||
|
||||
The immutable property concept extends to the security features enabled on
|
||||
a file's origin, such as dm-verity or fs-verity, which provide a layer of
|
||||
integrity and trust. For example, IPE allows the definition of policies
|
||||
that trust files from a dm-verity protected device. dm-verity ensures the
|
||||
integrity of an entire device by providing a verifiable and immutable state
|
||||
of its contents. Similarly, fs-verity offers filesystem-level integrity
|
||||
checks, allowing IPE to enforce policies that trust files protected by
|
||||
fs-verity. These two features cannot be turned off once established, so
|
||||
they are considered immutable properties. These examples demonstrate how
|
||||
IPE leverages immutable properties, such as a file's origin and its
|
||||
integrity protection mechanisms, to make access control decisions.
|
||||
|
||||
For the IPE policy, specifically, it grants the ability to enforce
|
||||
stringent access controls by assessing security properties against
|
||||
reference values defined within the policy. This assessment can be based on
|
||||
the existence of a security property (e.g., verifying if a file originates
|
||||
from initramfs) or evaluating the internal state of an immutable security
|
||||
property. The latter includes checking the roothash of a dm-verity
|
||||
protected device, determining whether dm-verity possesses a valid
|
||||
signature, assessing the digest of a fs-verity protected file, or
|
||||
determining whether fs-verity possesses a valid built-in signature. This
|
||||
nuanced approach to policy enforcement enables a highly secure and
|
||||
customizable system defense mechanism, tailored to specific security
|
||||
requirements and trust models.
|
||||
|
||||
To enable IPE, ensure that ``CONFIG_SECURITY_IPE`` (under
|
||||
:menuselection:`Security -> Integrity Policy Enforcement (IPE)`) config
|
||||
option is enabled.
|
||||
|
||||
Use Cases
|
||||
---------
|
||||
|
||||
IPE works best in fixed-function devices: devices in which their purpose
|
||||
is clearly defined and not supposed to be changed (e.g. network firewall
|
||||
device in a data center, an IoT device, etcetera), where all software and
|
||||
configuration is built and provisioned by the system owner.
|
||||
|
||||
IPE is a long-way off for use in general-purpose computing: the Linux
|
||||
community as a whole tends to follow a decentralized trust model (known as
|
||||
the web of trust), which IPE has no support for it yet. Instead, IPE
|
||||
supports PKI (public key infrastructure), which generally designates a
|
||||
set of trusted entities that provide a measure of absolute trust.
|
||||
|
||||
Additionally, while most packages are signed today, the files inside
|
||||
the packages (for instance, the executables), tend to be unsigned. This
|
||||
makes it difficult to utilize IPE in systems where a package manager is
|
||||
expected to be functional, without major changes to the package manager
|
||||
and ecosystem behind it.
|
||||
|
||||
The digest_cache LSM [#digest_cache_lsm]_ is a system that when combined with IPE,
|
||||
could be used to enable and support general-purpose computing use cases.
|
||||
|
||||
Known Limitations
|
||||
-----------------
|
||||
|
||||
IPE cannot verify the integrity of anonymous executable memory, such as
|
||||
the trampolines created by gcc closures and libffi (<3.4.2), or JIT'd code.
|
||||
Unfortunately, as this is dynamically generated code, there is no way
|
||||
for IPE to ensure the integrity of this code to form a trust basis.
|
||||
|
||||
IPE cannot verify the integrity of programs written in interpreted
|
||||
languages when these scripts are invoked by passing these program files
|
||||
to the interpreter. This is because the way interpreters execute these
|
||||
files; the scripts themselves are not evaluated as executable code
|
||||
through one of IPE's hooks, but they are merely text files that are read
|
||||
(as opposed to compiled executables) [#interpreters]_.
|
||||
|
||||
Threat Model
|
||||
------------
|
||||
|
||||
IPE specifically targets the risk of tampering with user-space executable
|
||||
code after the kernel has initially booted, including the kernel modules
|
||||
loaded from userspace via ``modprobe`` or ``insmod``.
|
||||
|
||||
To illustrate, consider a scenario where an untrusted binary, possibly
|
||||
malicious, is downloaded along with all necessary dependencies, including a
|
||||
loader and libc. The primary function of IPE in this context is to prevent
|
||||
the execution of such binaries and their dependencies.
|
||||
|
||||
IPE achieves this by verifying the integrity and authenticity of all
|
||||
executable code before allowing them to run. It conducts a thorough
|
||||
check to ensure that the code's integrity is intact and that they match an
|
||||
authorized reference value (digest, signature, etc) as per the defined
|
||||
policy. If a binary does not pass this verification process, either
|
||||
because its integrity has been compromised or it does not meet the
|
||||
authorization criteria, IPE will deny its execution. Additionally, IPE
|
||||
generates audit logs which may be utilized to detect and analyze failures
|
||||
resulting from policy violation.
|
||||
|
||||
Tampering threat scenarios include modification or replacement of
|
||||
executable code by a range of actors including:
|
||||
|
||||
- Actors with physical access to the hardware
|
||||
- Actors with local network access to the system
|
||||
- Actors with access to the deployment system
|
||||
- Compromised internal systems under external control
|
||||
- Malicious end users of the system
|
||||
- Compromised end users of the system
|
||||
- Remote (external) compromise of the system
|
||||
|
||||
IPE does not mitigate threats arising from malicious but authorized
|
||||
developers (with access to a signing certificate), or compromised
|
||||
developer tools used by them (i.e. return-oriented programming attacks).
|
||||
Additionally, IPE draws hard security boundary between userspace and
|
||||
kernelspace. As a result, kernel-level exploits are considered outside
|
||||
the scope of IPE and mitigation is left to other mechanisms.
|
||||
|
||||
Policy
|
||||
------
|
||||
|
||||
IPE policy is a plain-text [#devdoc]_ policy composed of multiple statements
|
||||
over several lines. There is one required line, at the top of the
|
||||
policy, indicating the policy name, and the policy version, for
|
||||
instance::
|
||||
|
||||
policy_name=Ex_Policy policy_version=0.0.0
|
||||
|
||||
The policy name is a unique key identifying this policy in a human
|
||||
readable name. This is used to create nodes under securityfs as well as
|
||||
uniquely identify policies to deploy new policies vs update existing
|
||||
policies.
|
||||
|
||||
The policy version indicates the current version of the policy (NOT the
|
||||
policy syntax version). This is used to prevent rollback of policy to
|
||||
potentially insecure previous versions of the policy.
|
||||
|
||||
The next portion of IPE policy are rules. Rules are formed by key=value
|
||||
pairs, known as properties. IPE rules require two properties: ``action``,
|
||||
which determines what IPE does when it encounters a match against the
|
||||
rule, and ``op``, which determines when the rule should be evaluated.
|
||||
The ordering is significant, a rule must start with ``op``, and end with
|
||||
``action``. Thus, a minimal rule is::
|
||||
|
||||
op=EXECUTE action=ALLOW
|
||||
|
||||
This example will allow any execution. Additional properties are used to
|
||||
assess immutable security properties about the files being evaluated.
|
||||
These properties are intended to be descriptions of systems within the
|
||||
kernel that can provide a measure of integrity verification, such that IPE
|
||||
can determine the trust of the resource based on the value of the property.
|
||||
|
||||
Rules are evaluated top-to-bottom. As a result, any revocation rules,
|
||||
or denies should be placed early in the file to ensure that these rules
|
||||
are evaluated before a rule with ``action=ALLOW``.
|
||||
|
||||
IPE policy supports comments. The character '#' will function as a
|
||||
comment, ignoring all characters to the right of '#' until the newline.
|
||||
|
||||
The default behavior of IPE evaluations can also be expressed in policy,
|
||||
through the ``DEFAULT`` statement. This can be done at a global level,
|
||||
or a per-operation level::
|
||||
|
||||
# Global
|
||||
DEFAULT action=ALLOW
|
||||
|
||||
# Operation Specific
|
||||
DEFAULT op=EXECUTE action=ALLOW
|
||||
|
||||
A default must be set for all known operations in IPE. If you want to
|
||||
preserve older policies being compatible with newer kernels that can introduce
|
||||
new operations, set a global default of ``ALLOW``, then override the
|
||||
defaults on a per-operation basis (as above).
|
||||
|
||||
With configurable policy-based LSMs, there's several issues with
|
||||
enforcing the configurable policies at startup, around reading and
|
||||
parsing the policy:
|
||||
|
||||
1. The kernel *should* not read files from userspace, so directly reading
|
||||
the policy file is prohibited.
|
||||
2. The kernel command line has a character limit, and one kernel module
|
||||
should not reserve the entire character limit for its own
|
||||
configuration.
|
||||
3. There are various boot loaders in the kernel ecosystem, so handing
|
||||
off a memory block would be costly to maintain.
|
||||
|
||||
As a result, IPE has addressed this problem through a concept of a "boot
|
||||
policy". A boot policy is a minimal policy which is compiled into the
|
||||
kernel. This policy is intended to get the system to a state where
|
||||
userspace is set up and ready to receive commands, at which point a more
|
||||
complex policy can be deployed via securityfs. The boot policy can be
|
||||
specified via ``SECURITY_IPE_BOOT_POLICY`` config option, which accepts
|
||||
a path to a plain-text version of the IPE policy to apply. This policy
|
||||
will be compiled into the kernel. If not specified, IPE will be disabled
|
||||
until a policy is deployed and activated through securityfs.
|
||||
|
||||
Deploying Policies
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Policies can be deployed from userspace through securityfs. These policies
|
||||
are signed through the PKCS#7 message format to enforce some level of
|
||||
authorization of the policies (prohibiting an attacker from gaining
|
||||
unconstrained root, and deploying an "allow all" policy). These
|
||||
policies must be signed by a certificate that chains to the
|
||||
``SYSTEM_TRUSTED_KEYRING``. With openssl, the policy can be signed by::
|
||||
|
||||
openssl smime -sign \
|
||||
-in "$MY_POLICY" \
|
||||
-signer "$MY_CERTIFICATE" \
|
||||
-inkey "$MY_PRIVATE_KEY" \
|
||||
-noattr \
|
||||
-nodetach \
|
||||
-nosmimecap \
|
||||
-outform der \
|
||||
-out "$MY_POLICY.p7b"
|
||||
|
||||
Deploying the policies is done through securityfs, through the
|
||||
``new_policy`` node. To deploy a policy, simply cat the file into the
|
||||
securityfs node::
|
||||
|
||||
cat "$MY_POLICY.p7b" > /sys/kernel/security/ipe/new_policy
|
||||
|
||||
Upon success, this will create one subdirectory under
|
||||
``/sys/kernel/security/ipe/policies/``. The subdirectory will be the
|
||||
``policy_name`` field of the policy deployed, so for the example above,
|
||||
the directory will be ``/sys/kernel/security/ipe/policies/Ex_Policy``.
|
||||
Within this directory, there will be seven files: ``pkcs7``, ``policy``,
|
||||
``name``, ``version``, ``active``, ``update``, and ``delete``.
|
||||
|
||||
The ``pkcs7`` file is read-only. Reading it returns the raw PKCS#7 data
|
||||
that was provided to the kernel, representing the policy. If the policy being
|
||||
read is the boot policy, this will return ``ENOENT``, as it is not signed.
|
||||
|
||||
The ``policy`` file is read only. Reading it returns the PKCS#7 inner
|
||||
content of the policy, which will be the plain text policy.
|
||||
|
||||
The ``active`` file is used to set a policy as the currently active policy.
|
||||
This file is rw, and accepts a value of ``"1"`` to set the policy as active.
|
||||
Since only a single policy can be active at one time, all other policies
|
||||
will be marked inactive. The policy being marked active must have a policy
|
||||
version greater or equal to the currently-running version.
|
||||
|
||||
The ``update`` file is used to update a policy that is already present
|
||||
in the kernel. This file is write-only and accepts a PKCS#7 signed
|
||||
policy. Two checks will always be performed on this policy: First, the
|
||||
``policy_names`` must match with the updated version and the existing
|
||||
version. Second the updated policy must have a policy version greater than
|
||||
or equal to the currently-running version. This is to prevent rollback attacks.
|
||||
|
||||
The ``delete`` file is used to remove a policy that is no longer needed.
|
||||
This file is write-only and accepts a value of ``1`` to delete the policy.
|
||||
On deletion, the securityfs node representing the policy will be removed.
|
||||
However, delete the current active policy is not allowed and will return
|
||||
an operation not permitted error.
|
||||
|
||||
Similarly, writing to both ``update`` and ``new_policy`` could result in
|
||||
bad message(policy syntax error) or file exists error. The latter error happens
|
||||
when trying to deploy a policy with a ``policy_name`` while the kernel already
|
||||
has a deployed policy with the same ``policy_name``.
|
||||
|
||||
Deploying a policy will *not* cause IPE to start enforcing the policy. IPE will
|
||||
only enforce the policy marked active. Note that only one policy can be active
|
||||
at a time.
|
||||
|
||||
Once deployment is successful, the policy can be activated, by writing file
|
||||
``/sys/kernel/security/ipe/policies/$policy_name/active``.
|
||||
For example, the ``Ex_Policy`` can be activated by::
|
||||
|
||||
echo 1 > "/sys/kernel/security/ipe/policies/Ex_Policy/active"
|
||||
|
||||
From above point on, ``Ex_Policy`` is now the enforced policy on the
|
||||
system.
|
||||
|
||||
IPE also provides a way to delete policies. This can be done via the
|
||||
``delete`` securityfs node,
|
||||
``/sys/kernel/security/ipe/policies/$policy_name/delete``.
|
||||
Writing ``1`` to that file deletes the policy::
|
||||
|
||||
echo 1 > "/sys/kernel/security/ipe/policies/$policy_name/delete"
|
||||
|
||||
There is only one requirement to delete a policy: the policy being deleted
|
||||
must be inactive.
|
||||
|
||||
.. NOTE::
|
||||
|
||||
If a traditional MAC system is enabled (SELinux, apparmor, smack), all
|
||||
writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
|
||||
|
||||
Modes
|
||||
~~~~~
|
||||
|
||||
IPE supports two modes of operation: permissive (similar to SELinux's
|
||||
permissive mode) and enforced. In permissive mode, all events are
|
||||
checked and policy violations are logged, but the policy is not really
|
||||
enforced. This allows users to test policies before enforcing them.
|
||||
|
||||
The default mode is enforce, and can be changed via the kernel command
|
||||
line parameter ``ipe.enforce=(0|1)``, or the securityfs node
|
||||
``/sys/kernel/security/ipe/enforce``.
|
||||
|
||||
.. NOTE::
|
||||
|
||||
If a traditional MAC system is enabled (SELinux, apparmor, smack, etcetera),
|
||||
all writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
|
||||
|
||||
Audit Events
|
||||
~~~~~~~~~~~~
|
||||
|
||||
1420 AUDIT_IPE_ACCESS
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
Event Examples::
|
||||
|
||||
type=1420 audit(1653364370.067:61): ipe_op=EXECUTE ipe_hook=MMAP enforcing=1 pid=2241 comm="ld-linux.so" path="/deny/lib/libc.so.6" dev="sda2" ino=14549020 rule="DEFAULT action=DENY"
|
||||
type=1300 audit(1653364370.067:61): SYSCALL arch=c000003e syscall=9 success=no exit=-13 a0=7f1105a28000 a1=195000 a2=5 a3=812 items=0 ppid=2219 pid=2241 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=2 comm="ld-linux.so" exe="/tmp/ipe-test/lib/ld-linux.so" subj=unconfined key=(null)
|
||||
type=1327 audit(1653364370.067:61): 707974686F6E3300746573742F6D61696E2E7079002D6E00
|
||||
|
||||
type=1420 audit(1653364735.161:64): ipe_op=EXECUTE ipe_hook=MMAP enforcing=1 pid=2472 comm="mmap_test" path=? dev=? ino=? rule="DEFAULT action=DENY"
|
||||
type=1300 audit(1653364735.161:64): SYSCALL arch=c000003e syscall=9 success=no exit=-13 a0=0 a1=1000 a2=4 a3=21 items=0 ppid=2219 pid=2472 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=2 comm="mmap_test" exe="/root/overlake_test/upstream_test/vol_fsverity/bin/mmap_test" subj=unconfined key=(null)
|
||||
type=1327 audit(1653364735.161:64): 707974686F6E3300746573742F6D61696E2E7079002D6E00
|
||||
|
||||
This event indicates that IPE made an access control decision; the IPE
|
||||
specific record (1420) is always emitted in conjunction with a
|
||||
``AUDITSYSCALL`` record.
|
||||
|
||||
Determining whether IPE is in permissive or enforced mode can be derived
|
||||
from ``success`` property and exit code of the ``AUDITSYSCALL`` record.
|
||||
|
||||
|
||||
Field descriptions:
|
||||
|
||||
+-----------+------------+-----------+---------------------------------------------------------------------------------+
|
||||
| Field | Value Type | Optional? | Description of Value |
|
||||
+===========+============+===========+=================================================================================+
|
||||
| ipe_op | string | No | The IPE operation name associated with the log |
|
||||
+-----------+------------+-----------+---------------------------------------------------------------------------------+
|
||||
| ipe_hook | string | No | The name of the LSM hook that triggered the IPE event |
|
||||
+-----------+------------+-----------+---------------------------------------------------------------------------------+
|
||||
| enforcing | integer | No | The current IPE enforcing state 1 is in enforcing mode, 0 is in permissive mode |
|
||||
+-----------+------------+-----------+---------------------------------------------------------------------------------+
|
||||
| pid | integer | No | The pid of the process that triggered the IPE event. |
|
||||
+-----------+------------+-----------+---------------------------------------------------------------------------------+
|
||||
| comm | string | No | The command line program name of the process that triggered the IPE event |
|
||||
+-----------+------------+-----------+---------------------------------------------------------------------------------+
|
||||
| path | string | Yes | The absolute path to the evaluated file |
|
||||
+-----------+------------+-----------+---------------------------------------------------------------------------------+
|
||||
| ino | integer | Yes | The inode number of the evaluated file |
|
||||
+-----------+------------+-----------+---------------------------------------------------------------------------------+
|
||||
| dev | string | Yes | The device name of the evaluated file, e.g. vda |
|
||||
+-----------+------------+-----------+---------------------------------------------------------------------------------+
|
||||
| rule | string | No | The matched policy rule |
|
||||
+-----------+------------+-----------+---------------------------------------------------------------------------------+
|
||||
|
||||
1421 AUDIT_IPE_CONFIG_CHANGE
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Event Example::
|
||||
|
||||
type=1421 audit(1653425583.136:54): old_active_pol_name="Allow_All" old_active_pol_version=0.0.0 old_policy_digest=sha256:E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855 new_active_pol_name="boot_verified" new_active_pol_version=0.0.0 new_policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F26765076DD8EED7B8F4DB auid=4294967295 ses=4294967295 lsm=ipe res=1
|
||||
type=1300 audit(1653425583.136:54): SYSCALL arch=c000003e syscall=1 success=yes exit=2 a0=3 a1=5596fcae1fb0 a2=2 a3=2 items=0 ppid=184 pid=229 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=4294967295 comm="python3" exe="/usr/bin/python3.10" key=(null)
|
||||
type=1327 audit(1653425583.136:54): PROCTITLE proctitle=707974686F6E3300746573742F6D61696E2E7079002D66002E2
|
||||
|
||||
This event indicates that IPE switched the active poliy from one to another
|
||||
along with the version and the hash digest of the two policies.
|
||||
Note IPE can only have one policy active at a time, all access decision
|
||||
evaluation is based on the current active policy.
|
||||
The normal procedure to deploy a new policy is loading the policy to deploy
|
||||
into the kernel first, then switch the active policy to it.
|
||||
|
||||
This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
|
||||
|
||||
Field descriptions:
|
||||
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
| Field | Value Type | Optional? | Description of Value |
|
||||
+========================+============+===========+===================================================+
|
||||
| old_active_pol_name | string | Yes | The name of previous active policy |
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
| old_active_pol_version | string | Yes | The version of previous active policy |
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
| old_policy_digest | string | Yes | The hash of previous active policy |
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
| new_active_pol_name | string | No | The name of current active policy |
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
| new_active_pol_version | string | No | The version of current active policy |
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
| new_policy_digest | string | No | The hash of current active policy |
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
| auid | integer | No | The login user ID |
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
| ses | integer | No | The login session ID |
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
| lsm | string | No | The lsm name associated with the event |
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
| res | integer | No | The result of the audited operation(success/fail) |
|
||||
+------------------------+------------+-----------+---------------------------------------------------+
|
||||
|
||||
1422 AUDIT_IPE_POLICY_LOAD
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Event Example::
|
||||
|
||||
type=1422 audit(1653425529.927:53): policy_name="boot_verified" policy_version=0.0.0 policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F26765076DD8EED7B8F4DB auid=4294967295 ses=4294967295 lsm=ipe res=1
|
||||
type=1300 audit(1653425529.927:53): arch=c000003e syscall=1 success=yes exit=2567 a0=3 a1=5596fcae1fb0 a2=a07 a3=2 items=0 ppid=184 pid=229 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=4294967295 comm="python3" exe="/usr/bin/python3.10" key=(null)
|
||||
type=1327 audit(1653425529.927:53): PROCTITLE proctitle=707974686F6E3300746573742F6D61696E2E7079002D66002E2E
|
||||
|
||||
This record indicates a new policy has been loaded into the kernel with the policy name, policy version and policy hash.
|
||||
|
||||
This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
|
||||
|
||||
Field descriptions:
|
||||
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| Field | Value Type | Optional? | Description of Value |
|
||||
+================+============+===========+===================================================+
|
||||
| policy_name | string | No | The policy_name |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| policy_version | string | No | The policy_version |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| policy_digest | string | No | The policy hash |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| auid | integer | No | The login user ID |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| ses | integer | No | The login session ID |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| lsm | string | No | The lsm name associated with the event |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
| res | integer | No | The result of the audited operation(success/fail) |
|
||||
+----------------+------------+-----------+---------------------------------------------------+
|
||||
|
||||
|
||||
1404 AUDIT_MAC_STATUS
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Event Examples::
|
||||
|
||||
type=1404 audit(1653425689.008:55): enforcing=0 old_enforcing=1 auid=4294967295 ses=4294967295 enabled=1 old-enabled=1 lsm=ipe res=1
|
||||
type=1300 audit(1653425689.008:55): arch=c000003e syscall=1 success=yes exit=2 a0=1 a1=55c1065e5c60 a2=2 a3=0 items=0 ppid=405 pid=441 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=)
|
||||
type=1327 audit(1653425689.008:55): proctitle="-bash"
|
||||
|
||||
type=1404 audit(1653425689.008:55): enforcing=1 old_enforcing=0 auid=4294967295 ses=4294967295 enabled=1 old-enabled=1 lsm=ipe res=1
|
||||
type=1300 audit(1653425689.008:55): arch=c000003e syscall=1 success=yes exit=2 a0=1 a1=55c1065e5c60 a2=2 a3=0 items=0 ppid=405 pid=441 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=)
|
||||
type=1327 audit(1653425689.008:55): proctitle="-bash"
|
||||
|
||||
This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
|
||||
|
||||
Field descriptions:
|
||||
|
||||
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
|
||||
| Field | Value Type | Optional? | Description of Value |
|
||||
+===============+============+===========+=================================================================================================+
|
||||
| enforcing | integer | No | The enforcing state IPE is being switched to, 1 is in enforcing mode, 0 is in permissive mode |
|
||||
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
|
||||
| old_enforcing | integer | No | The enforcing state IPE is being switched from, 1 is in enforcing mode, 0 is in permissive mode |
|
||||
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
|
||||
| auid | integer | No | The login user ID |
|
||||
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
|
||||
| ses | integer | No | The login session ID |
|
||||
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
|
||||
| enabled | integer | No | The new TTY audit enabled setting |
|
||||
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
|
||||
| old-enabled | integer | No | The old TTY audit enabled setting |
|
||||
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
|
||||
| lsm | string | No | The lsm name associated with the event |
|
||||
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
|
||||
| res | integer | No | The result of the audited operation(success/fail) |
|
||||
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
|
||||
|
||||
|
||||
Success Auditing
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
IPE supports success auditing. When enabled, all events that pass IPE
|
||||
policy and are not blocked will emit an audit event. This is disabled by
|
||||
default, and can be enabled via the kernel command line
|
||||
``ipe.success_audit=(0|1)`` or
|
||||
``/sys/kernel/security/ipe/success_audit`` securityfs file.
|
||||
|
||||
This is *very* noisy, as IPE will check every userspace binary on the
|
||||
system, but is useful for debugging policies.
|
||||
|
||||
.. NOTE::
|
||||
|
||||
If a traditional MAC system is enabled (SELinux, apparmor, smack, etcetera),
|
||||
all writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
|
||||
|
||||
Properties
|
||||
----------
|
||||
|
||||
As explained above, IPE properties are ``key=value`` pairs expressed in IPE
|
||||
policy. Two properties are built-into the policy parser: 'op' and 'action'.
|
||||
The other properties are used to restrict immutable security properties
|
||||
about the files being evaluated. Currently those properties are:
|
||||
'``boot_verified``', '``dmverity_signature``', '``dmverity_roothash``',
|
||||
'``fsverity_signature``', '``fsverity_digest``'. A description of all
|
||||
properties supported by IPE are listed below:
|
||||
|
||||
op
|
||||
~~
|
||||
|
||||
Indicates the operation for a rule to apply to. Must be in every rule,
|
||||
as the first token. IPE supports the following operations:
|
||||
|
||||
``EXECUTE``
|
||||
|
||||
Pertains to any file attempting to be executed, or loaded as an
|
||||
executable.
|
||||
|
||||
``FIRMWARE``:
|
||||
|
||||
Pertains to firmware being loaded via the firmware_class interface.
|
||||
This covers both the preallocated buffer and the firmware file
|
||||
itself.
|
||||
|
||||
``KMODULE``:
|
||||
|
||||
Pertains to loading kernel modules via ``modprobe`` or ``insmod``.
|
||||
|
||||
``KEXEC_IMAGE``:
|
||||
|
||||
Pertains to kernel images loading via ``kexec``.
|
||||
|
||||
``KEXEC_INITRAMFS``
|
||||
|
||||
Pertains to initrd images loading via ``kexec --initrd``.
|
||||
|
||||
``POLICY``:
|
||||
|
||||
Controls loading policies via reading a kernel-space initiated read.
|
||||
|
||||
An example of such is loading IMA policies by writing the path
|
||||
to the policy file to ``$securityfs/ima/policy``
|
||||
|
||||
``X509_CERT``:
|
||||
|
||||
Controls loading IMA certificates through the Kconfigs,
|
||||
``CONFIG_IMA_X509_PATH`` and ``CONFIG_EVM_X509_PATH``.
|
||||
|
||||
action
|
||||
~~~~~~
|
||||
|
||||
Determines what IPE should do when a rule matches. Must be in every
|
||||
rule, as the final clause. Can be one of:
|
||||
|
||||
``ALLOW``:
|
||||
|
||||
If the rule matches, explicitly allow access to the resource to proceed
|
||||
without executing any more rules.
|
||||
|
||||
``DENY``:
|
||||
|
||||
If the rule matches, explicitly prohibit access to the resource to
|
||||
proceed without executing any more rules.
|
||||
|
||||
boot_verified
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
This property can be utilized for authorization of files from initramfs.
|
||||
The format of this property is::
|
||||
|
||||
boot_verified=(TRUE|FALSE)
|
||||
|
||||
|
||||
.. WARNING::
|
||||
|
||||
This property will trust files from initramfs(rootfs). It should
|
||||
only be used during early booting stage. Before mounting the real
|
||||
rootfs on top of the initramfs, initramfs script will recursively
|
||||
remove all files and directories on the initramfs. This is typically
|
||||
implemented by using switch_root(8) [#switch_root]_. Therefore the
|
||||
initramfs will be empty and not accessible after the real
|
||||
rootfs takes over. It is advised to switch to a different policy
|
||||
that doesn't rely on the property after this point.
|
||||
This ensures that the trust policies remain relevant and effective
|
||||
throughout the system's operation.
|
||||
|
||||
dmverity_roothash
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
This property can be utilized for authorization or revocation of
|
||||
specific dm-verity volumes, identified via their root hashes. It has a
|
||||
dependency on the DM_VERITY module. This property is controlled by
|
||||
the ``IPE_PROP_DM_VERITY`` config option, it will be automatically
|
||||
selected when ``SECURITY_IPE`` and ``DM_VERITY`` are all enabled.
|
||||
The format of this property is::
|
||||
|
||||
dmverity_roothash=DigestName:HexadecimalString
|
||||
|
||||
The supported DigestNames for dmverity_roothash are [#dmveritydigests]_
|
||||
|
||||
+ blake2b-512
|
||||
+ blake2s-256
|
||||
+ sha256
|
||||
+ sha384
|
||||
+ sha512
|
||||
+ sha3-224
|
||||
+ sha3-256
|
||||
+ sha3-384
|
||||
+ sha3-512
|
||||
+ sm3
|
||||
+ rmd160
|
||||
|
||||
dmverity_signature
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This property can be utilized for authorization of all dm-verity
|
||||
volumes that have a signed roothash that validated by a keyring
|
||||
specified by dm-verity's configuration, either the system trusted
|
||||
keyring, or the secondary keyring. It depends on
|
||||
``DM_VERITY_VERIFY_ROOTHASH_SIG`` config option and is controlled by
|
||||
the ``IPE_PROP_DM_VERITY_SIGNATURE`` config option, it will be automatically
|
||||
selected when ``SECURITY_IPE``, ``DM_VERITY`` and
|
||||
``DM_VERITY_VERIFY_ROOTHASH_SIG`` are all enabled.
|
||||
The format of this property is::
|
||||
|
||||
dmverity_signature=(TRUE|FALSE)
|
||||
|
||||
fsverity_digest
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
This property can be utilized for authorization of specific fsverity
|
||||
enabled files, identified via their fsverity digests.
|
||||
It depends on ``FS_VERITY`` config option and is controlled by
|
||||
the ``IPE_PROP_FS_VERITY`` config option, it will be automatically
|
||||
selected when ``SECURITY_IPE`` and ``FS_VERITY`` are all enabled.
|
||||
The format of this property is::
|
||||
|
||||
fsverity_digest=DigestName:HexadecimalString
|
||||
|
||||
The supported DigestNames for fsverity_digest are [#fsveritydigest]_
|
||||
|
||||
+ sha256
|
||||
+ sha512
|
||||
|
||||
fsverity_signature
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This property is used to authorize all fs-verity enabled files that have
|
||||
been verified by fs-verity's built-in signature mechanism. The signature
|
||||
verification relies on a key stored within the ".fs-verity" keyring. It
|
||||
depends on ``FS_VERITY_BUILTIN_SIGNATURES`` config option and
|
||||
it is controlled by the ``IPE_PROP_FS_VERITY`` config option,
|
||||
it will be automatically selected when ``SECURITY_IPE``, ``FS_VERITY``
|
||||
and ``FS_VERITY_BUILTIN_SIGNATURES`` are all enabled.
|
||||
The format of this property is::
|
||||
|
||||
fsverity_signature=(TRUE|FALSE)
|
||||
|
||||
Policy Examples
|
||||
---------------
|
||||
|
||||
Allow all
|
||||
~~~~~~~~~
|
||||
|
||||
::
|
||||
|
||||
policy_name=Allow_All policy_version=0.0.0
|
||||
DEFAULT action=ALLOW
|
||||
|
||||
Allow only initramfs
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
::
|
||||
|
||||
policy_name=Allow_Initramfs policy_version=0.0.0
|
||||
DEFAULT action=DENY
|
||||
|
||||
op=EXECUTE boot_verified=TRUE action=ALLOW
|
||||
|
||||
Allow any signed and validated dm-verity volume and the initramfs
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
::
|
||||
|
||||
policy_name=Allow_Signed_DMV_And_Initramfs policy_version=0.0.0
|
||||
DEFAULT action=DENY
|
||||
|
||||
op=EXECUTE boot_verified=TRUE action=ALLOW
|
||||
op=EXECUTE dmverity_signature=TRUE action=ALLOW
|
||||
|
||||
Prohibit execution from a specific dm-verity volume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
::
|
||||
|
||||
policy_name=Deny_DMV_By_Roothash policy_version=0.0.0
|
||||
DEFAULT action=DENY
|
||||
|
||||
op=EXECUTE dmverity_roothash=sha256:cd2c5bae7c6c579edaae4353049d58eb5f2e8be0244bf05345bc8e5ed257baff action=DENY
|
||||
|
||||
op=EXECUTE boot_verified=TRUE action=ALLOW
|
||||
op=EXECUTE dmverity_signature=TRUE action=ALLOW
|
||||
|
||||
Allow only a specific dm-verity volume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
::
|
||||
|
||||
policy_name=Allow_DMV_By_Roothash policy_version=0.0.0
|
||||
DEFAULT action=DENY
|
||||
|
||||
op=EXECUTE dmverity_roothash=sha256:401fcec5944823ae12f62726e8184407a5fa9599783f030dec146938 action=ALLOW
|
||||
|
||||
Allow any fs-verity file with a valid built-in signature
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
::
|
||||
|
||||
policy_name=Allow_Signed_And_Validated_FSVerity policy_version=0.0.0
|
||||
DEFAULT action=DENY
|
||||
|
||||
op=EXECUTE fsverity_signature=TRUE action=ALLOW
|
||||
|
||||
Allow execution of a specific fs-verity file
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
::
|
||||
|
||||
policy_name=ALLOW_FSV_By_Digest policy_version=0.0.0
|
||||
DEFAULT action=DENY
|
||||
|
||||
op=EXECUTE fsverity_digest=sha256:fd88f2b8824e197f850bf4c5109bea5cf0ee38104f710843bb72da796ba5af9e action=ALLOW
|
||||
|
||||
Additional Information
|
||||
----------------------
|
||||
|
||||
- `Github Repository <https://github.com/microsoft/ipe>`_
|
||||
- :doc:`Developer and design docs for IPE </security/ipe>`
|
||||
|
||||
FAQ
|
||||
---
|
||||
|
||||
Q:
|
||||
What's the difference between other LSMs which provide a measure of
|
||||
trust-based access control?
|
||||
|
||||
A:
|
||||
|
||||
In general, there's two other LSMs that can provide similar functionality:
|
||||
IMA, and Loadpin.
|
||||
|
||||
IMA and IPE are functionally very similar. The significant difference between
|
||||
the two is the policy. [#devdoc]_
|
||||
|
||||
Loadpin and IPE differ fairly dramatically, as Loadpin only covers the IPE's
|
||||
kernel read operations, whereas IPE is capable of controlling execution
|
||||
on top of kernel read. The trust model is also different; Loadpin roots its
|
||||
trust in the initial super-block, whereas trust in IPE is stemmed from kernel
|
||||
itself (via ``SYSTEM_TRUSTED_KEYS``).
|
||||
|
||||
-----------
|
||||
|
||||
.. [#digest_cache_lsm] https://lore.kernel.org/lkml/20240415142436.2545003-1-roberto.sassu@huaweicloud.com/
|
||||
|
||||
.. [#interpreters] There is `some interest in solving this issue <https://lore.kernel.org/lkml/20220321161557.495388-1-mic@digikod.net/>`_.
|
||||
|
||||
.. [#devdoc] Please see :doc:`the design docs </security/ipe>` for more on
|
||||
this topic.
|
||||
|
||||
.. [#switch_root] https://man7.org/linux/man-pages/man8/switch_root.8.html
|
||||
|
||||
.. [#dmveritydigests] These hash algorithms are based on values accepted by
|
||||
the Linux crypto API; IPE does not impose any
|
||||
restrictions on the digest algorithm itself;
|
||||
thus, this list may be out of date.
|
||||
|
||||
.. [#fsveritydigest] These hash algorithms are based on values accepted by the
|
||||
kernel's fsverity support; IPE does not impose any
|
||||
restrictions on the digest algorithm itself;
|
||||
thus, this list may be out of date.
|
@ -102,17 +102,41 @@ Examples::
|
||||
#select lzo compression algorithm
|
||||
echo lzo > /sys/block/zram0/comp_algorithm
|
||||
|
||||
For the time being, the `comp_algorithm` content does not necessarily
|
||||
show every compression algorithm supported by the kernel. We keep this
|
||||
list primarily to simplify device configuration and one can configure
|
||||
a new device with a compression algorithm that is not listed in
|
||||
`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API
|
||||
and, if some of the algorithms were built as modules, it's impossible
|
||||
to list all of them using, for instance, /proc/crypto or any other
|
||||
method. This, however, has an advantage of permitting the usage of
|
||||
custom crypto compression modules (implementing S/W or H/W compression).
|
||||
For the time being, the `comp_algorithm` content shows only compression
|
||||
algorithms that are supported by zram.
|
||||
|
||||
4) Set Disksize
|
||||
4) Set compression algorithm parameters: Optional
|
||||
=================================================
|
||||
|
||||
Compression algorithms may support specific parameters which can be
|
||||
tweaked for particular dataset. ZRAM has an `algorithm_params` device
|
||||
attribute which provides a per-algorithm params configuration.
|
||||
|
||||
For example, several compression algorithms support `level` parameter.
|
||||
In addition, certain compression algorithms support pre-trained dictionaries,
|
||||
which significantly change algorithms' characteristics. In order to configure
|
||||
compression algorithm to use external pre-trained dictionary, pass full
|
||||
path to the `dict` along with other parameters::
|
||||
|
||||
#pass path to pre-trained zstd dictionary
|
||||
echo "algo=zstd dict=/etc/dictioary" > /sys/block/zram0/algorithm_params
|
||||
|
||||
#same, but using algorithm priority
|
||||
echo "priority=1 dict=/etc/dictioary" > \
|
||||
/sys/block/zram0/algorithm_params
|
||||
|
||||
#pass path to pre-trained zstd dictionary and compression level
|
||||
echo "algo=zstd level=8 dict=/etc/dictioary" > \
|
||||
/sys/block/zram0/algorithm_params
|
||||
|
||||
Parameters are algorithm specific: not all algorithms support pre-trained
|
||||
dictionaries, not all algorithms support `level`. Furthermore, for certain
|
||||
algorithms `level` controls the compression level (the higher the value the
|
||||
better the compression ratio, it even can take negatives values for some
|
||||
algorithms), for other algorithms `level` is acceleration level (the higher
|
||||
the value the lower the compression ratio).
|
||||
|
||||
5) Set Disksize
|
||||
===============
|
||||
|
||||
Set disk size by writing the value to sysfs node 'disksize'.
|
||||
@ -132,7 +156,7 @@ There is little point creating a zram of greater than twice the size of memory
|
||||
since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
|
||||
size of the disk when not in use so a huge zram is wasteful.
|
||||
|
||||
5) Set memory limit: Optional
|
||||
6) Set memory limit: Optional
|
||||
=============================
|
||||
|
||||
Set memory limit by writing the value to sysfs node 'mem_limit'.
|
||||
@ -151,7 +175,7 @@ Examples::
|
||||
# To disable memory limit
|
||||
echo 0 > /sys/block/zram0/mem_limit
|
||||
|
||||
6) Activate
|
||||
7) Activate
|
||||
===========
|
||||
|
||||
::
|
||||
@ -162,7 +186,7 @@ Examples::
|
||||
mkfs.ext4 /dev/zram1
|
||||
mount /dev/zram1 /tmp
|
||||
|
||||
7) Add/remove zram devices
|
||||
8) Add/remove zram devices
|
||||
==========================
|
||||
|
||||
zram provides a control interface, which enables dynamic (on-demand) device
|
||||
@ -182,7 +206,7 @@ execute::
|
||||
|
||||
echo X > /sys/class/zram-control/hot_remove
|
||||
|
||||
8) Stats
|
||||
9) Stats
|
||||
========
|
||||
|
||||
Per-device statistics are exported as various nodes under /sys/block/zram<id>/
|
||||
@ -205,6 +229,7 @@ writeback_limit_enable RW show and set writeback_limit feature
|
||||
max_comp_streams RW the number of possible concurrent compress
|
||||
operations
|
||||
comp_algorithm RW show and change the compression algorithm
|
||||
algorithm_params WO setup compression algorithm parameters
|
||||
compact WO trigger memory compaction
|
||||
debug_stat RO this file is used for zram debugging purposes
|
||||
backing_dev RW set up backend storage for zram to write out
|
||||
@ -283,15 +308,15 @@ a single line of text and contains the following stats separated by whitespace:
|
||||
Unit: 4K bytes
|
||||
============== =============================================================
|
||||
|
||||
9) Deactivate
|
||||
=============
|
||||
10) Deactivate
|
||||
==============
|
||||
|
||||
::
|
||||
|
||||
swapoff /dev/zram0
|
||||
umount /dev/zram1
|
||||
|
||||
10) Reset
|
||||
11) Reset
|
||||
=========
|
||||
|
||||
Write any positive value to 'reset' sysfs node::
|
||||
@ -487,11 +512,14 @@ registered compression algorithms, increases our chances of finding the
|
||||
algorithm that successfully compresses a particular page. Sometimes, however,
|
||||
it is convenient (and sometimes even necessary) to limit recompression to
|
||||
only one particular algorithm so that it will not try any other algorithms.
|
||||
This can be achieved by providing a algo=NAME parameter:::
|
||||
This can be achieved by providing a `algo` or `priority` parameter:::
|
||||
|
||||
#use zstd algorithm only (if registered)
|
||||
echo "type=huge algo=zstd" > /sys/block/zramX/recompress
|
||||
|
||||
#use zstd algorithm only (if zstd was registered under priority 1)
|
||||
echo "type=huge priority=1" > /sys/block/zramX/recompress
|
||||
|
||||
memory tracking
|
||||
===============
|
||||
|
||||
|
@ -1,76 +1,144 @@
|
||||
Bisecting a bug
|
||||
+++++++++++++++
|
||||
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
|
||||
.. [see the bottom of this file for redistribution information]
|
||||
|
||||
Last updated: 28 October 2016
|
||||
======================
|
||||
Bisecting a regression
|
||||
======================
|
||||
|
||||
Introduction
|
||||
============
|
||||
This document describes how to use a ``git bisect`` to find the source code
|
||||
change that broke something -- for example when some functionality stopped
|
||||
working after upgrading from Linux 6.0 to 6.1.
|
||||
|
||||
Always try the latest kernel from kernel.org and build from source. If you are
|
||||
not confident in doing that please report the bug to your distribution vendor
|
||||
instead of to a kernel developer.
|
||||
The text focuses on the gist of the process. If you are new to bisecting the
|
||||
kernel, better follow Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
|
||||
instead: it depicts everything from start to finish while covering multiple
|
||||
aspects even kernel developers occasionally forget. This includes detecting
|
||||
situations early where a bisection would be a waste of time, as nobody would
|
||||
care about the result -- for example, because the problem happens after the
|
||||
kernel marked itself as 'tainted', occurs in an abandoned version, was already
|
||||
fixed, or is caused by a .config change you or your Linux distributor performed.
|
||||
|
||||
Finding bugs is not always easy. Have a go though. If you can't find it don't
|
||||
give up. Report as much as you have found to the relevant maintainer. See
|
||||
MAINTAINERS for who that is for the subsystem you have worked on.
|
||||
Finding the change causing a kernel issue using a bisection
|
||||
===========================================================
|
||||
|
||||
Before you submit a bug report read
|
||||
'Documentation/admin-guide/reporting-issues.rst'.
|
||||
*Note: the following process assumes you prepared everything for a bisection.
|
||||
This includes having a Git clone with the appropriate sources, installing the
|
||||
software required to build and install kernels, as well as a .config file stored
|
||||
in a safe place (the following example assumes '~/prepared_kernel_.config') to
|
||||
use as pristine base at each bisection step; ideally, you have also worked out
|
||||
a fully reliable and straight-forward way to reproduce the regression, too.*
|
||||
|
||||
Devices not appearing
|
||||
=====================
|
||||
* Preparation: start the bisection and tell Git about the points in the history
|
||||
you consider to be working and broken, which Git calls 'good' and 'bad'::
|
||||
|
||||
Often this is caused by udev/systemd. Check that first before blaming it
|
||||
on the kernel.
|
||||
git bisect start
|
||||
git bisect good v6.0
|
||||
git bisect bad v6.1
|
||||
|
||||
Finding patch that caused a bug
|
||||
===============================
|
||||
Instead of Git tags like 'v6.0' and 'v6.1' you can specify commit-ids, too.
|
||||
|
||||
Using the provided tools with ``git`` makes finding bugs easy provided the bug
|
||||
is reproducible.
|
||||
1. Copy your prepared .config into the build directory and adjust it to the
|
||||
needs of the codebase Git checked out for testing::
|
||||
|
||||
Steps to do it:
|
||||
cp ~/prepared_kernel_.config .config
|
||||
make olddefconfig
|
||||
|
||||
- build the Kernel from its git source
|
||||
- start bisect with [#f1]_::
|
||||
2. Now build, install, and boot a kernel. This might fail for unrelated reasons,
|
||||
for example, when a compile error happens at the current stage of the
|
||||
bisection a later change resolves. In such cases run ``git bisect skip`` and
|
||||
go back to step 1.
|
||||
|
||||
$ git bisect start
|
||||
3. Check if the functionality that regressed works in the kernel you just built.
|
||||
|
||||
- mark the broken changeset with::
|
||||
If it works, execute::
|
||||
|
||||
$ git bisect bad [commit]
|
||||
git bisect good
|
||||
|
||||
- mark a changeset where the code is known to work with::
|
||||
If it is broken, run::
|
||||
|
||||
$ git bisect good [commit]
|
||||
git bisect bad
|
||||
|
||||
- rebuild the Kernel and test
|
||||
- interact with git bisect by using either::
|
||||
Note, getting this wrong just once will send the rest of the bisection
|
||||
totally off course. To prevent having to start anew later you thus want to
|
||||
ensure what you tell Git is correct; it is thus often wise to spend a few
|
||||
minutes more on testing in case your reproducer is unreliable.
|
||||
|
||||
$ git bisect good
|
||||
After issuing one of these two commands, Git will usually check out another
|
||||
bisection point and print something like 'Bisecting: 675 revisions left to
|
||||
test after this (roughly 10 steps)'. In that case go back to step 1.
|
||||
|
||||
or::
|
||||
If Git instead prints something like 'cafecaca0c0dacafecaca0c0dacafecaca0c0da
|
||||
is the first bad commit', then you have finished the bisection. In that case
|
||||
move to the next point below. Note, right after displaying that line Git will
|
||||
show some details about the culprit including its patch description; this can
|
||||
easily fill your terminal, so you might need to scroll up to see the message
|
||||
mentioning the culprit's commit-id.
|
||||
|
||||
$ git bisect bad
|
||||
In case you missed Git's output, you can always run ``git bisect log`` to
|
||||
print the status: it will show how many steps remain or mention the result of
|
||||
the bisection.
|
||||
|
||||
depending if the bug happened on the changeset you're testing
|
||||
- After some interactions, git bisect will give you the changeset that
|
||||
likely caused the bug.
|
||||
* Recommended complementary task: put the bisection log and the current .config
|
||||
file aside for the bug report; furthermore tell Git to reset the sources to
|
||||
the state before the bisection::
|
||||
|
||||
- For example, if you know that the current version is bad, and version
|
||||
4.8 is good, you could do::
|
||||
git bisect log > ~/bisection-log
|
||||
cp .config ~/bisection-config-culprit
|
||||
git bisect reset
|
||||
|
||||
$ git bisect start
|
||||
$ git bisect bad # Current version is bad
|
||||
$ git bisect good v4.8
|
||||
* Recommended optional task: try reverting the culprit on top of the latest
|
||||
codebase and check if that fixes your bug; if that is the case, it validates
|
||||
the bisection and enables developers to resolve the regression through a
|
||||
revert.
|
||||
|
||||
To try this, update your clone and check out latest mainline. Then tell Git
|
||||
to revert the change by specifying its commit-id::
|
||||
|
||||
git revert --no-edit cafec0cacaca0
|
||||
|
||||
Git might reject this, for example when the bisection landed on a merge
|
||||
commit. In that case, abandon the attempt. Do the same, if Git fails to revert
|
||||
the culprit on its own because later changes depend on it -- at least unless
|
||||
you bisected a stable or longterm kernel series, in which case you want to
|
||||
check out its latest codebase and try a revert there.
|
||||
|
||||
If a revert succeeds, build and test another kernel to check if reverting
|
||||
resolved your regression.
|
||||
|
||||
With that the process is complete. Now report the regression as described by
|
||||
Documentation/admin-guide/reporting-issues.rst.
|
||||
|
||||
|
||||
.. [#f1] You can, optionally, provide both good and bad arguments at git
|
||||
start with ``git bisect start [BAD] [GOOD]``
|
||||
Additional reading material
|
||||
---------------------------
|
||||
|
||||
For further references, please read:
|
||||
* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
|
||||
`fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
|
||||
in the Git documentation.
|
||||
* `Working with git bisect <https://nathanchance.dev/posts/working-with-git-bisect/>`_
|
||||
from kernel developer Nathan Chancellor.
|
||||
* `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_.
|
||||
* `Fully automated bisecting with 'git bisect run' <https://lwn.net/Articles/317154>`_.
|
||||
|
||||
- The man page for ``git-bisect``
|
||||
- `Fighting regressions with git bisect <https://www.kernel.org/pub/software/scm/git/docs/git-bisect-lk2009.html>`_
|
||||
- `Fully automated bisecting with "git bisect run" <https://lwn.net/Articles/317154>`_
|
||||
- `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_
|
||||
..
|
||||
end-of-content
|
||||
..
|
||||
This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
|
||||
you spot a typo or small mistake, feel free to let him know directly and
|
||||
he'll fix it. You are free to do the same in a mostly informal way if you
|
||||
want to contribute changes to the text -- but for copyright reasons please CC
|
||||
linux-doc@vger.kernel.org and 'sign-off' your contribution as
|
||||
Documentation/process/submitting-patches.rst explains in the section 'Sign
|
||||
your work - the Developer's Certificate of Origin'.
|
||||
..
|
||||
This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
|
||||
of the file. If you want to distribute this text under CC-BY-4.0 only,
|
||||
please use 'The Linux kernel development community' for author attribution
|
||||
and link this as source:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/bug-bisect.rst
|
||||
|
||||
..
|
||||
Note: Only the content of this RST file as found in the Linux kernel sources
|
||||
is available under CC-BY-4.0, as versions of this text that were processed
|
||||
(for example by the kernel's build system) might contain content taken from
|
||||
files which use a more restrictive license.
|
||||
|
@ -244,14 +244,14 @@ Reporting the bug
|
||||
Once you find where the bug happened, by inspecting its location,
|
||||
you could either try to fix it yourself or report it upstream.
|
||||
|
||||
In order to report it upstream, you should identify the mailing list
|
||||
used for the development of the affected code. This can be done by using
|
||||
the ``get_maintainer.pl`` script.
|
||||
In order to report it upstream, you should identify the bug tracker, if any, or
|
||||
mailing list used for the development of the affected code. This can be done by
|
||||
using the ``get_maintainer.pl`` script.
|
||||
|
||||
For example, if you find a bug at the gspca's sonixj.c file, you can get
|
||||
its maintainers with::
|
||||
|
||||
$ ./scripts/get_maintainer.pl -f drivers/media/usb/gspca/sonixj.c
|
||||
$ ./scripts/get_maintainer.pl --bug -f drivers/media/usb/gspca/sonixj.c
|
||||
Hans Verkuil <hverkuil@xs4all.nl> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
|
||||
Mauro Carvalho Chehab <mchehab@kernel.org> (maintainer:MEDIA INPUT INFRASTRUCTURE (V4L/DVB),commit_signer:1/1=100%)
|
||||
Tejun Heo <tj@kernel.org> (commit_signer:1/1=100%)
|
||||
@ -267,11 +267,12 @@ Please notice that it will point to:
|
||||
- The driver maintainer (Hans Verkuil);
|
||||
- The subsystem maintainer (Mauro Carvalho Chehab);
|
||||
- The driver and/or subsystem mailing list (linux-media@vger.kernel.org);
|
||||
- the Linux Kernel mailing list (linux-kernel@vger.kernel.org).
|
||||
- The Linux Kernel mailing list (linux-kernel@vger.kernel.org);
|
||||
- The bug reporting URIs for the driver/subsystem (none in the above example).
|
||||
|
||||
Usually, the fastest way to have your bug fixed is to report it to mailing
|
||||
list used for the development of the code (linux-media ML) copying the
|
||||
driver maintainer (Hans).
|
||||
If the listing contains bug reporting URIs at the end, please prefer them over
|
||||
email. Otherwise, please report bugs to the mailing list used for the
|
||||
development of the code (linux-media ML) copying the driver maintainer (Hans).
|
||||
|
||||
If you are totally stumped as to whom to send the report, and
|
||||
``get_maintainer.pl`` didn't provide you anything useful, send it to
|
||||
|
@ -78,18 +78,24 @@ Brief summary of control files.
|
||||
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
|
||||
memory.soft_limit_in_bytes set/show soft limit of memory usage
|
||||
This knob is not available on CONFIG_PREEMPT_RT systems.
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.stat show various statistics
|
||||
memory.use_hierarchy set/show hierarchical account enabled
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.force_empty trigger forced page reclaim
|
||||
memory.pressure_level set memory pressure notifications
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.swappiness set/show swappiness parameter of vmscan
|
||||
(See sysctl's vm.swappiness)
|
||||
memory.move_charge_at_immigrate set/show controls of moving charges
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.oom_control set/show oom controls.
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.numa_stat show the number of memory usage per numa
|
||||
node
|
||||
memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel
|
||||
@ -105,10 +111,18 @@ Brief summary of control files.
|
||||
memory.kmem.max_usage_in_bytes show max kernel memory usage recorded
|
||||
|
||||
memory.kmem.tcp.limit_in_bytes set/show hard limit for tcp buf memory
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.kmem.tcp.usage_in_bytes show current tcp buf memory allocation
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.kmem.tcp.failcnt show the number of tcp buf memory usage
|
||||
hits limits
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.kmem.tcp.max_usage_in_bytes show max tcp buf memory usage recorded
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
==================================== ==========================================
|
||||
|
||||
1. History
|
||||
@ -693,8 +707,10 @@ For compatibility reasons writing 1 to memory.use_hierarchy will always pass::
|
||||
|
||||
# echo 1 > memory.use_hierarchy
|
||||
|
||||
7. Soft limits
|
||||
==============
|
||||
7. Soft limits (DEPRECATED)
|
||||
===========================
|
||||
|
||||
THIS IS DEPRECATED!
|
||||
|
||||
Soft limits allow for greater sharing of memory. The idea behind soft limits
|
||||
is to allow control groups to use as much of the memory as needed, provided
|
||||
@ -834,8 +850,10 @@ It's applicable for root and non-root cgroup.
|
||||
|
||||
.. _cgroup-v1-memory-oom-control:
|
||||
|
||||
10. OOM Control
|
||||
===============
|
||||
10. OOM Control (DEPRECATED)
|
||||
============================
|
||||
|
||||
THIS IS DEPRECATED!
|
||||
|
||||
memory.oom_control file is for OOM notification and other controls.
|
||||
|
||||
@ -882,8 +900,10 @@ At reading, current status of OOM is shown.
|
||||
The number of processes belonging to this cgroup killed by any
|
||||
kind of OOM killer.
|
||||
|
||||
11. Memory Pressure
|
||||
===================
|
||||
11. Memory Pressure (DEPRECATED)
|
||||
================================
|
||||
|
||||
THIS IS DEPRECATED!
|
||||
|
||||
The pressure level notifications can be used to monitor the memory
|
||||
allocation cost; based on the pressure, applications can implement
|
||||
|
@ -533,10 +533,12 @@ cgroup namespace on namespace creation.
|
||||
Because the resource control interface files in a given directory
|
||||
control the distribution of the parent's resources, the delegatee
|
||||
shouldn't be allowed to write to them. For the first method, this is
|
||||
achieved by not granting access to these files. For the second, the
|
||||
kernel rejects writes to all files other than "cgroup.procs" and
|
||||
"cgroup.subtree_control" on a namespace root from inside the
|
||||
namespace.
|
||||
achieved by not granting access to these files. For the second, files
|
||||
outside the namespace should be hidden from the delegatee by the means
|
||||
of at least mount namespacing, and the kernel rejects writes to all
|
||||
files on a namespace root from inside the cgroup namespace, except for
|
||||
those files listed in "/sys/kernel/cgroup/delegate" (including
|
||||
"cgroup.procs", "cgroup.threads", "cgroup.subtree_control", etc.).
|
||||
|
||||
The end results are equivalent for both delegation types. Once
|
||||
delegated, the user can build sub-hierarchy under the directory,
|
||||
@ -981,6 +983,14 @@ All cgroup core files are prefixed with "cgroup."
|
||||
A dying cgroup can consume system resources not exceeding
|
||||
limits, which were active at the moment of cgroup deletion.
|
||||
|
||||
nr_subsys_<cgroup_subsys>
|
||||
Total number of live cgroup subsystems (e.g memory
|
||||
cgroup) at and beneath the current cgroup.
|
||||
|
||||
nr_dying_subsys_<cgroup_subsys>
|
||||
Total number of dying cgroup subsystems (e.g. memory
|
||||
cgroup) at and beneath the current cgroup.
|
||||
|
||||
cgroup.freeze
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
Allowed values are "0" and "1". The default is "0".
|
||||
@ -1333,11 +1343,14 @@ The following nested keys are defined.
|
||||
all the existing limitations and potential future extensions.
|
||||
|
||||
memory.peak
|
||||
A read-only single value file which exists on non-root
|
||||
cgroups.
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
|
||||
The max memory usage recorded for the cgroup and its
|
||||
descendants since the creation of the cgroup.
|
||||
The max memory usage recorded for the cgroup and its descendants since
|
||||
either the creation of the cgroup or the most recent reset for that FD.
|
||||
|
||||
A write of any non-empty string to this file resets it to the
|
||||
current memory usage for subsequent reads through the same
|
||||
file descriptor.
|
||||
|
||||
memory.oom.group
|
||||
A read-write single value file which exists on non-root
|
||||
@ -1614,6 +1627,25 @@ The following nested keys are defined.
|
||||
Usually because failed to allocate some continuous swap space
|
||||
for the huge page.
|
||||
|
||||
numa_pages_migrated (npn)
|
||||
Number of pages migrated by NUMA balancing.
|
||||
|
||||
numa_pte_updates (npn)
|
||||
Number of pages whose page table entries are modified by
|
||||
NUMA balancing to produce NUMA hinting faults on access.
|
||||
|
||||
numa_hint_faults (npn)
|
||||
Number of NUMA hinting faults.
|
||||
|
||||
pgdemote_kswapd
|
||||
Number of pages demoted by kswapd.
|
||||
|
||||
pgdemote_direct
|
||||
Number of pages demoted directly.
|
||||
|
||||
pgdemote_khugepaged
|
||||
Number of pages demoted by khugepaged.
|
||||
|
||||
memory.numa_stat
|
||||
A read-only nested-keyed file which exists on non-root cgroups.
|
||||
|
||||
@ -1663,11 +1695,14 @@ The following nested keys are defined.
|
||||
Healthy workloads are not expected to reach this limit.
|
||||
|
||||
memory.swap.peak
|
||||
A read-only single value file which exists on non-root
|
||||
cgroups.
|
||||
A read-write single value file which exists on non-root cgroups.
|
||||
|
||||
The max swap usage recorded for the cgroup and its
|
||||
descendants since the creation of the cgroup.
|
||||
The max swap usage recorded for the cgroup and its descendants since
|
||||
the creation of the cgroup or the most recent reset for that FD.
|
||||
|
||||
A write of any non-empty string to this file resets it to the
|
||||
current memory usage for subsequent reads through the same
|
||||
file descriptor.
|
||||
|
||||
memory.swap.max
|
||||
A read-write single value file which exists on non-root
|
||||
@ -1731,6 +1766,8 @@ The following nested keys are defined.
|
||||
|
||||
Note that this is subtly different from setting memory.swap.max to
|
||||
0, as it still allows for pages to be written to the zswap pool.
|
||||
This setting has no effect if zswap is disabled, and swapping
|
||||
is allowed unless memory.swap.max is set to 0.
|
||||
|
||||
memory.pressure
|
||||
A read-only nested-keyed file.
|
||||
@ -2940,8 +2977,8 @@ Deprecated v1 Core Features
|
||||
|
||||
- "cgroup.clone_children" is removed.
|
||||
|
||||
- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" file
|
||||
at the root instead.
|
||||
- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" or
|
||||
"cgroup.stat" files at the root instead.
|
||||
|
||||
|
||||
Issues with v1 and Rationales for v2
|
||||
|
@ -3,29 +3,52 @@ dm-delay
|
||||
========
|
||||
|
||||
Device-Mapper's "delay" target delays reads and/or writes
|
||||
and maps them to different devices.
|
||||
and/or flushs and optionally maps them to different devices.
|
||||
|
||||
Parameters::
|
||||
Arguments::
|
||||
|
||||
<device> <offset> <delay> [<write_device> <write_offset> <write_delay>
|
||||
[<flush_device> <flush_offset> <flush_delay>]]
|
||||
|
||||
With separate write parameters, the first set is only used for reads.
|
||||
Table line has to either have 3, 6 or 9 arguments:
|
||||
|
||||
3: apply offset and delay to read, write and flush operations on device
|
||||
|
||||
6: apply offset and delay to device, also apply write_offset and write_delay
|
||||
to write and flush operations on optionally different write_device with
|
||||
optionally different sector offset
|
||||
|
||||
9: same as 6 arguments plus define flush_offset and flush_delay explicitely
|
||||
on/with optionally different flush_device/flush_offset.
|
||||
|
||||
Offsets are specified in sectors.
|
||||
|
||||
Delays are specified in milliseconds.
|
||||
|
||||
|
||||
Example scripts
|
||||
===============
|
||||
|
||||
::
|
||||
|
||||
#!/bin/sh
|
||||
# Create device delaying rw operation for 500ms
|
||||
echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
|
||||
#
|
||||
# Create mapped device named "delayed" delaying read, write and flush operations for 500ms.
|
||||
#
|
||||
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 500"
|
||||
|
||||
::
|
||||
|
||||
#!/bin/sh
|
||||
# Create device delaying only write operation for 500ms and
|
||||
# splitting reads and writes to different devices $1 $2
|
||||
echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
|
||||
#
|
||||
# Create mapped device delaying write and flush operations for 400ms and
|
||||
# splitting reads to device $1 but writes and flushs to different device $2
|
||||
# to different offsets of 2048 and 4096 sectors respectively.
|
||||
#
|
||||
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 2048 0 $2 4096 400"
|
||||
|
||||
::
|
||||
#!/bin/sh
|
||||
#
|
||||
# Create mapped device delaying reads for 50ms, writes for 100ms and flushs for 333ms
|
||||
# onto the same backing device at offset 0 sectors.
|
||||
#
|
||||
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 50 $2 0 100 $1 0 333"
|
||||
|
@ -160,15 +160,24 @@ iv_large_sectors
|
||||
The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
|
||||
if this flag is specified.
|
||||
|
||||
integrity_key_size:<bytes>
|
||||
Use an integrity key of <bytes> size instead of using an integrity key size
|
||||
of the digest size of the used HMAC algorithm.
|
||||
|
||||
|
||||
Module parameters::
|
||||
|
||||
max_read_size
|
||||
max_write_size
|
||||
Maximum size of read or write requests. When a request larger than this size
|
||||
Maximum size of read requests. When a request larger than this size
|
||||
is received, dm-crypt will split the request. The splitting improves
|
||||
concurrency (the split requests could be encrypted in parallel by multiple
|
||||
cores), but it also causes overhead. The user should tune these parameters to
|
||||
cores), but it also causes overhead. The user should tune this parameters to
|
||||
fit the actual workload.
|
||||
|
||||
max_write_size
|
||||
Maximum size of write requests. When a request larger than this size
|
||||
is received, dm-crypt will split the request. The splitting improves
|
||||
concurrency (the split requests could be encrypted in parallel by multiple
|
||||
cores), but it also causes overhead. The user should tune this parameters to
|
||||
fit the actual workload.
|
||||
|
||||
|
||||
|
@ -251,7 +251,12 @@ The messages are:
|
||||
by the vdostats userspace program to interpret the output
|
||||
buffer.
|
||||
|
||||
dump:
|
||||
config:
|
||||
Outputs useful vdo configuration information. Mostly used
|
||||
by users who want to recreate a similar VDO volume and
|
||||
want to know the creation configuration used.
|
||||
|
||||
dump:
|
||||
Dumps many internal structures to the system log. This is
|
||||
not always safe to run, so it should only be used to debug
|
||||
a hung vdo. Optional parameters to specify structures to
|
||||
|
@ -212,16 +212,6 @@ When mounting an ext4 filesystem, the following option are accepted:
|
||||
that ext4's inode table readahead algorithm will pre-read into the
|
||||
buffer cache. The default value is 32 blocks.
|
||||
|
||||
nouser_xattr
|
||||
Disables Extended User Attributes. See the attr(5) manual page for
|
||||
more information about extended attributes.
|
||||
|
||||
noacl
|
||||
This option disables POSIX Access Control List support. If ACL support
|
||||
is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL
|
||||
is enabled by default on mount. See the acl(5) manual page for more
|
||||
information about acl.
|
||||
|
||||
bsddf (*)
|
||||
Make 'df' act like BSD.
|
||||
|
||||
|
@ -158,3 +158,72 @@ poisoned BTB entry and using that safe one for all function returns.
|
||||
In older Zen1 and Zen2, this is accomplished using a reinterpretation
|
||||
technique similar to Retbleed one: srso_untrain_ret() and
|
||||
srso_safe_ret().
|
||||
|
||||
Checking the safe RET mitigation actually works
|
||||
-----------------------------------------------
|
||||
|
||||
In case one wants to validate whether the SRSO safe RET mitigation works
|
||||
on a kernel, one could use two performance counters
|
||||
|
||||
* PMC_0xc8 - Count of RET/RET lw retired
|
||||
* PMC_0xc9 - Count of RET/RET lw retired mispredicted
|
||||
|
||||
and compare the number of RETs retired properly vs those retired
|
||||
mispredicted, in kernel mode. Another way of specifying those events
|
||||
is::
|
||||
|
||||
# perf list ex_ret_near_ret
|
||||
|
||||
List of pre-defined events (to be used in -e or -M):
|
||||
|
||||
core:
|
||||
ex_ret_near_ret
|
||||
[Retired Near Returns]
|
||||
ex_ret_near_ret_mispred
|
||||
[Retired Near Returns Mispredicted]
|
||||
|
||||
Either the command using the event mnemonics::
|
||||
|
||||
# perf stat -e ex_ret_near_ret:k -e ex_ret_near_ret_mispred:k sleep 10s
|
||||
|
||||
or using the raw PMC numbers::
|
||||
|
||||
# perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
|
||||
|
||||
should give the same amount. I.e., every RET retired should be
|
||||
mispredicted::
|
||||
|
||||
[root@brent: ~/kernel/linux/tools/perf> ./perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
|
||||
|
||||
Performance counter stats for 'sleep 10s':
|
||||
|
||||
137,167 cpu/event=0xc8,umask=0/k
|
||||
137,173 cpu/event=0xc9,umask=0/k
|
||||
|
||||
10.004110303 seconds time elapsed
|
||||
|
||||
0.000000000 seconds user
|
||||
0.004462000 seconds sys
|
||||
|
||||
vs the case when the mitigation is disabled (spec_rstack_overflow=off)
|
||||
or not functioning properly, showing usually a lot smaller number of
|
||||
mispredicted retired RETs vs the overall count of retired RETs during
|
||||
a workload::
|
||||
|
||||
[root@brent: ~/kernel/linux/tools/perf> ./perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
|
||||
|
||||
Performance counter stats for 'sleep 10s':
|
||||
|
||||
201,627 cpu/event=0xc8,umask=0/k
|
||||
4,074 cpu/event=0xc9,umask=0/k
|
||||
|
||||
10.003267252 seconds time elapsed
|
||||
|
||||
0.002729000 seconds user
|
||||
0.000000000 seconds sys
|
||||
|
||||
Also, there is a selftest which performs the above, go to
|
||||
tools/testing/selftests/x86/ and do::
|
||||
|
||||
make srso
|
||||
./srso
|
||||
|
@ -333,12 +333,17 @@
|
||||
allowed anymore to lift isolation
|
||||
requirements as needed. This option
|
||||
does not override iommu=pt
|
||||
force_enable - Force enable the IOMMU on platforms known
|
||||
to be buggy with IOMMU enabled. Use this
|
||||
option with care.
|
||||
pgtbl_v1 - Use v1 page table for DMA-API (Default).
|
||||
pgtbl_v2 - Use v2 page table for DMA-API.
|
||||
irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
|
||||
force_enable - Force enable the IOMMU on platforms known
|
||||
to be buggy with IOMMU enabled. Use this
|
||||
option with care.
|
||||
pgtbl_v1 - Use v1 page table for DMA-API (Default).
|
||||
pgtbl_v2 - Use v2 page table for DMA-API.
|
||||
irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
|
||||
nohugepages - Limit page-sizes used for v1 page-tables
|
||||
to 4 KiB.
|
||||
v2_pgsizes_only - Limit page-sizes used for v1 page-tables
|
||||
to 4KiB/2Mib/1GiB.
|
||||
|
||||
|
||||
amd_iommu_dump= [HW,X86-64]
|
||||
Enable AMD IOMMU driver option to dump the ACPI table
|
||||
@ -517,6 +522,18 @@
|
||||
Format: <io>,<irq>,<mode>
|
||||
See header of drivers/net/hamradio/baycom_ser_hdx.c.
|
||||
|
||||
bdev_allow_write_mounted=
|
||||
Format: <bool>
|
||||
Control the ability to open a mounted block device
|
||||
for writing, i.e., allow / disallow writes that bypass
|
||||
the FS. This was implemented as a means to prevent
|
||||
fuzzers from crashing the kernel by overwriting the
|
||||
metadata underneath a mounted FS without its awareness.
|
||||
This also prevents destructive formatting of mounted
|
||||
filesystems by naive storage tooling that don't use
|
||||
O_EXCL. Default is Y and can be changed through the
|
||||
Kconfig option CONFIG_BLK_DEV_WRITE_MOUNTED.
|
||||
|
||||
bert_disable [ACPI]
|
||||
Disable BERT OS support on buggy BIOSes.
|
||||
|
||||
@ -2350,6 +2367,18 @@
|
||||
ipcmni_extend [KNL,EARLY] Extend the maximum number of unique System V
|
||||
IPC identifiers from 32,768 to 16,777,216.
|
||||
|
||||
ipe.enforce= [IPE]
|
||||
Format: <bool>
|
||||
Determine whether IPE starts in permissive (0) or
|
||||
enforce (1) mode. The default is enforce.
|
||||
|
||||
ipe.success_audit=
|
||||
[IPE]
|
||||
Format: <bool>
|
||||
Start IPE with success auditing enabled, emitting
|
||||
an audit event when a binary is allowed. The default
|
||||
is 0.
|
||||
|
||||
irqaffinity= [SMP] Set the default irq affinity mask
|
||||
The argument is a cpu list, as described above.
|
||||
|
||||
@ -2648,6 +2677,23 @@
|
||||
|
||||
Default is Y (on).
|
||||
|
||||
kvm.enable_virt_at_load=[KVM,ARM64,LOONGARCH,MIPS,RISCV,X86]
|
||||
If enabled, KVM will enable virtualization in hardware
|
||||
when KVM is loaded, and disable virtualization when KVM
|
||||
is unloaded (if KVM is built as a module).
|
||||
|
||||
If disabled, KVM will dynamically enable and disable
|
||||
virtualization on-demand when creating and destroying
|
||||
VMs, i.e. on the 0=>1 and 1=>0 transitions of the
|
||||
number of VMs.
|
||||
|
||||
Enabling virtualization at module lode avoids potential
|
||||
latency for creation of the 0=>1 VM, as KVM serializes
|
||||
virtualization enabling across all online CPUs. The
|
||||
"cost" of enabling virtualization when KVM is loaded,
|
||||
is that doing so may interfere with using out-of-tree
|
||||
hypervisors that want to "own" virtualization hardware.
|
||||
|
||||
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
|
||||
Default is false (don't support).
|
||||
|
||||
@ -4123,6 +4169,21 @@
|
||||
Disable NUMA, Only set up a single NUMA node
|
||||
spanning all memory.
|
||||
|
||||
numa=fake=<size>[MG]
|
||||
[KNL, ARM64, RISCV, X86, EARLY]
|
||||
If given as a memory unit, fills all system RAM with
|
||||
nodes of size interleaved over physical nodes.
|
||||
|
||||
numa=fake=<N>
|
||||
[KNL, ARM64, RISCV, X86, EARLY]
|
||||
If given as an integer, fills all system RAM with N
|
||||
fake nodes interleaved over physical nodes.
|
||||
|
||||
numa=fake=<N>U
|
||||
[KNL, ARM64, RISCV, X86, EARLY]
|
||||
If given as an integer followed by 'U', it will
|
||||
divide each physical node into N emulated nodes.
|
||||
|
||||
numa_balancing= [KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
|
||||
NUMA balancing.
|
||||
Allowed values are enable and disable
|
||||
@ -4788,6 +4849,16 @@
|
||||
printk.time= Show timing data prefixed to each printk message line
|
||||
Format: <bool> (1/Y/y=enable, 0/N/n=disable)
|
||||
|
||||
proc_mem.force_override= [KNL]
|
||||
Format: {always | ptrace | never}
|
||||
Traditionally /proc/pid/mem allows memory permissions to be
|
||||
overridden without restrictions. This option may be set to
|
||||
restrict that. Can be one of:
|
||||
- 'always': traditional behavior always allows mem overrides.
|
||||
- 'ptrace': only allow mem overrides for active ptracers.
|
||||
- 'never': never allow mem overrides.
|
||||
If not specified, default is the CONFIG_PROC_MEM_* choice.
|
||||
|
||||
processor.max_cstate= [HW,ACPI]
|
||||
Limit processor to maximum C-state
|
||||
max_cstate=9 overrides any DMI blacklist limit.
|
||||
@ -4935,6 +5006,10 @@
|
||||
Set maximum number of finished RCU callbacks to
|
||||
process in one batch.
|
||||
|
||||
rcutree.csd_lock_suppress_rcu_stall= [KNL]
|
||||
Do only a one-line RCU CPU stall warning when
|
||||
there is an ongoing too-long CSD-lock wait.
|
||||
|
||||
rcutree.do_rcu_barrier= [KNL]
|
||||
Request a call to rcu_barrier(). This is
|
||||
throttled so that userspace tests can safely
|
||||
@ -5382,7 +5457,13 @@
|
||||
Time to wait (s) after boot before inducing stall.
|
||||
|
||||
rcutorture.stall_cpu_irqsoff= [KNL]
|
||||
Disable interrupts while stalling if set.
|
||||
Disable interrupts while stalling if set, but only
|
||||
on the first stall in the set.
|
||||
|
||||
rcutorture.stall_cpu_repeat= [KNL]
|
||||
Number of times to repeat the stall sequence,
|
||||
so that rcutorture.stall_cpu_repeat=3 will result
|
||||
in four stall sequences.
|
||||
|
||||
rcutorture.stall_gp_kthread= [KNL]
|
||||
Duration (s) of forced sleep within RCU
|
||||
@ -5570,14 +5651,6 @@
|
||||
of zero will disable batching. Batching is
|
||||
always disabled for synchronize_rcu_tasks().
|
||||
|
||||
rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
|
||||
Set timeout in milliseconds RCU Tasks
|
||||
Rude asynchronous callback batching for
|
||||
call_rcu_tasks_rude(). A negative value
|
||||
will take the default. A value of zero will
|
||||
disable batching. Batching is always disabled
|
||||
for synchronize_rcu_tasks_rude().
|
||||
|
||||
rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
|
||||
Set timeout in milliseconds RCU Tasks
|
||||
Trace asynchronous callback batching for
|
||||
@ -6614,6 +6687,15 @@
|
||||
<deci-seconds>: poll all this frequency
|
||||
0: no polling (default)
|
||||
|
||||
thp_anon= [KNL]
|
||||
Format: <size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state>
|
||||
state is one of "always", "madvise", "never" or "inherit".
|
||||
Control the default behavior of the system with respect
|
||||
to anonymous transparent hugepages.
|
||||
Can be used multiple times for multiple anon THP sizes.
|
||||
See Documentation/admin-guide/mm/transhuge.rst for more
|
||||
details.
|
||||
|
||||
threadirqs [KNL,EARLY]
|
||||
Force threading of all interrupt handlers except those
|
||||
marked explicitly IRQF_NO_THREAD.
|
||||
@ -6743,6 +6825,51 @@
|
||||
the same thing would happen if it was left off). The irq_handler_entry
|
||||
event, and all events under the "initcall" system.
|
||||
|
||||
Flags can be added to the instance to modify its behavior when it is
|
||||
created. The flags are separated by '^'.
|
||||
|
||||
The available flags are:
|
||||
|
||||
traceoff - Have the tracing instance tracing disabled after it is created.
|
||||
traceprintk - Have trace_printk() write into this trace instance
|
||||
(note, "printk" and "trace_printk" can also be used)
|
||||
|
||||
trace_instance=foo^traceoff^traceprintk,sched,irq
|
||||
|
||||
The flags must come before the defined events.
|
||||
|
||||
If memory has been reserved (see memmap for x86), the instance
|
||||
can use that memory:
|
||||
|
||||
memmap=12M$0x284500000 trace_instance=boot_map@0x284500000:12M
|
||||
|
||||
The above will create a "boot_map" instance that uses the physical
|
||||
memory at 0x284500000 that is 12Megs. The per CPU buffers of that
|
||||
instance will be split up accordingly.
|
||||
|
||||
Alternatively, the memory can be reserved by the reserve_mem option:
|
||||
|
||||
reserve_mem=12M:4096:trace trace_instance=boot_map@trace
|
||||
|
||||
This will reserve 12 megabytes at boot up with a 4096 byte alignment
|
||||
and place the ring buffer in this memory. Note that due to KASLR, the
|
||||
memory may not be the same location each time, which will not preserve
|
||||
the buffer content.
|
||||
|
||||
Also note that the layout of the ring buffer data may change between
|
||||
kernel versions where the validator will fail and reset the ring buffer
|
||||
if the layout is not the same as the previous kernel.
|
||||
|
||||
If the ring buffer is used for persistent bootups and has events enabled,
|
||||
it is recommend to disable tracing so that events from a previous boot do not
|
||||
mix with events of the current boot (unless you are debugging a random crash
|
||||
at boot up).
|
||||
|
||||
reserve_mem=12M:4096:trace trace_instance=boot_map^traceoff^traceprintk@trace,sched,irq
|
||||
|
||||
See also Documentation/trace/debugging.rst
|
||||
|
||||
|
||||
trace_options=[option-list]
|
||||
[FTRACE] Enable or disable tracer options at boot.
|
||||
The option-list is a comma delimited list of options
|
||||
@ -7352,6 +7479,13 @@
|
||||
it can be updated at runtime by writing to the
|
||||
corresponding sysfs file.
|
||||
|
||||
workqueue.panic_on_stall=<uint>
|
||||
Panic when workqueue stall is detected by
|
||||
CONFIG_WQ_WATCHDOG. It sets the number times of the
|
||||
stall to trigger panic.
|
||||
|
||||
The default is 0, which disables the panic on stall.
|
||||
|
||||
workqueue.cpu_intensive_thresh_us=
|
||||
Per-cpu work items which run for longer than this
|
||||
threshold are automatically considered CPU intensive
|
||||
|
@ -42,10 +42,14 @@ dongles):
|
||||
``persistent_config``: by default this is off, but when set to 1 the driver
|
||||
will store the current settings to the device's internal eeprom and restore
|
||||
it the next time the device is connected to the USB port.
|
||||
|
||||
- RainShadow Tech. Note: this driver does not support the persistent_config
|
||||
module option of the Pulse-Eight driver. The hardware supports it, but I
|
||||
have no plans to add this feature. But I accept patches :-)
|
||||
|
||||
- Extron DA HD 4K PLUS HDMI Distribution Amplifier. See
|
||||
:ref:`extron_da_hd_4k_plus` for more information.
|
||||
|
||||
Miscellaneous:
|
||||
|
||||
- vivid: emulates a CEC receiver and CEC transmitter.
|
||||
@ -378,3 +382,86 @@ it later using ``--analyze-pin``.
|
||||
|
||||
You can also use this as a full-fledged CEC device by configuring it
|
||||
using ``cec-ctl --tv -p0.0.0.0`` or ``cec-ctl --playback -p1.0.0.0``.
|
||||
|
||||
.. _extron_da_hd_4k_plus:
|
||||
|
||||
Extron DA HD 4K PLUS CEC Adapter driver
|
||||
=======================================
|
||||
|
||||
This driver is for the Extron DA HD 4K PLUS series of HDMI Distribution
|
||||
Amplifiers: https://www.extron.com/product/dahd4kplusseries
|
||||
|
||||
The 2, 4 and 6 port models are supported.
|
||||
|
||||
Firmware version 1.02.0001 or higher is required.
|
||||
|
||||
Note that older Extron hardware revisions have a problem with the CEC voltage,
|
||||
which may mean that CEC will not work. This is fixed in hardware revisions
|
||||
E34814 and up.
|
||||
|
||||
The CEC support has two modes: the first is a manual mode where userspace has
|
||||
to manually control CEC for the HDMI Input and all HDMI Outputs. While this gives
|
||||
full control, it is also complicated.
|
||||
|
||||
The second mode is an automatic mode, which is selected if the module option
|
||||
``vendor_id`` is set. In that case the driver controls CEC and CEC messages
|
||||
received in the input will be distributed to the outputs. It is still possible
|
||||
to use the /dev/cecX devices to talk to the connected devices directly, but it is
|
||||
the driver that configures everything and deals with things like Hotplug Detect
|
||||
changes.
|
||||
|
||||
The driver also takes care of the EDIDs: /dev/videoX devices are created to
|
||||
read the EDIDs and (for the HDMI Input port) to set the EDID.
|
||||
|
||||
By default userspace is responsible to set the EDID for the HDMI Input
|
||||
according to the EDIDs of the connected displays. But if the ``manufacturer_name``
|
||||
module option is set, then the driver will take care of setting the EDID
|
||||
of the HDMI Input based on the supported resolutions of the connected displays.
|
||||
Currently the driver only supports resolutions 1080p60 and 4kp60: if all connected
|
||||
displays support 4kp60, then it will advertise 4kp60 on the HDMI input, otherwise
|
||||
it will fall back to an EDID that just reports 1080p60.
|
||||
|
||||
The status of the Extron is reported in ``/sys/kernel/debug/cec/cecX/status``.
|
||||
|
||||
The extron-da-hd-4k-plus driver implements the following module options:
|
||||
|
||||
``debug``
|
||||
---------
|
||||
|
||||
If set to 1, then all serial port traffic is shown.
|
||||
|
||||
``vendor_id``
|
||||
-------------
|
||||
|
||||
The CEC Vendor ID to report to connected displays.
|
||||
|
||||
If set, then the driver will take care of distributing CEC messages received
|
||||
on the input to the HDMI outputs. This is done for the following CEC messages:
|
||||
|
||||
- <Standby>
|
||||
- <Image View On> and <Text View On>
|
||||
- <Give Device Power Status>
|
||||
- <Set System Audio Mode>
|
||||
- <Request Current Latency>
|
||||
|
||||
If not set, then userspace is responsible for this, and it will have to
|
||||
configure the CEC devices for HDMI Input and the HDMI Outputs manually.
|
||||
|
||||
``manufacturer_name``
|
||||
---------------------
|
||||
|
||||
A three character manufacturer name that is used in the EDID for the HDMI
|
||||
Input. If not set, then userspace is reponsible for configuring an EDID.
|
||||
If set, then the driver will update the EDID automatically based on the
|
||||
resolutions supported by the connected displays, and it will not be possible
|
||||
anymore to manually set the EDID for the HDMI Input.
|
||||
|
||||
``hpd_never_low``
|
||||
-----------------
|
||||
|
||||
If set, then the Hotplug Detect pin of the HDMI Input will always be high,
|
||||
even if nothing is connected to the HDMI Outputs. If not set (the default)
|
||||
then the Hotplug Detect pin of the HDMI input will go low if all the detected
|
||||
Hotplug Detect pins of the HDMI Outputs are also low.
|
||||
|
||||
This option may be changed dynamically.
|
||||
|
@ -227,8 +227,13 @@ Common FPDL3/GMSL output parameters
|
||||
open.*
|
||||
|
||||
**frame_rate** (RW):
|
||||
Output video frame rate in frames per second. The default frame rate is
|
||||
60Hz.
|
||||
Output video signal frame rate limit in frames per second. Due to
|
||||
the limited output pixel clock steps, the card can not always generate
|
||||
a frame rate perfectly matching the value required by the connected display.
|
||||
Using this parameter one can limit the frame rate by "crippling" the signal
|
||||
so that the lines are not equal (the porches of the last line differ) but
|
||||
the signal appears like having the exact frame rate to the connected display.
|
||||
The default frame rate limit is 60Hz.
|
||||
|
||||
**hsync_polarity** (RW):
|
||||
HSYNC signal polarity.
|
||||
@ -253,33 +258,33 @@ Common FPDL3/GMSL output parameters
|
||||
and there is a non-linear stepping between two consecutive allowed
|
||||
frequencies. The driver finds the nearest allowed frequency to the given
|
||||
value and sets it. When reading this property, you get the exact
|
||||
frequency set by the driver. The default frequency is 70000kHz.
|
||||
frequency set by the driver. The default frequency is 61150kHz.
|
||||
|
||||
*Note: This parameter can not be changed while the output v4l2 device is
|
||||
open.*
|
||||
|
||||
**hsync_width** (RW):
|
||||
Width of the HSYNC signal in pixels. The default value is 16.
|
||||
Width of the HSYNC signal in pixels. The default value is 40.
|
||||
|
||||
**vsync_width** (RW):
|
||||
Width of the VSYNC signal in video lines. The default value is 2.
|
||||
Width of the VSYNC signal in video lines. The default value is 20.
|
||||
|
||||
**hback_porch** (RW):
|
||||
Number of PCLK pulses between deassertion of the HSYNC signal and the first
|
||||
valid pixel in the video line (marked by DE=1). The default value is 32.
|
||||
valid pixel in the video line (marked by DE=1). The default value is 50.
|
||||
|
||||
**hfront_porch** (RW):
|
||||
Number of PCLK pulses between the end of the last valid pixel in the video
|
||||
line (marked by DE=1) and assertion of the HSYNC signal. The default value
|
||||
is 32.
|
||||
is 50.
|
||||
|
||||
**vback_porch** (RW):
|
||||
Number of video lines between deassertion of the VSYNC signal and the video
|
||||
line with the first valid pixel (marked by DE=1). The default value is 2.
|
||||
line with the first valid pixel (marked by DE=1). The default value is 31.
|
||||
|
||||
**vfront_porch** (RW):
|
||||
Number of video lines between the end of the last valid pixel line (marked
|
||||
by DE=1) and assertion of the VSYNC signal. The default value is 2.
|
||||
by DE=1) and assertion of the VSYNC signal. The default value is 30.
|
||||
|
||||
FPDL3 specific input parameters
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -114,11 +114,18 @@ to be applied to the hardware during a video stream, allowing userspace
|
||||
to dynamically modify values such as black level, cross talk corrections
|
||||
and others.
|
||||
|
||||
The buffer format is defined by struct :c:type:`rkisp1_params_cfg`, and
|
||||
userspace should set
|
||||
The ISP driver supports two different parameters configuration methods, the
|
||||
`fixed parameters format` or the `extensible parameters format`.
|
||||
|
||||
When using the `fixed parameters` method the buffer format is defined by struct
|
||||
:c:type:`rkisp1_params_cfg`, and userspace should set
|
||||
:ref:`V4L2_META_FMT_RK_ISP1_PARAMS <v4l2-meta-fmt-rk-isp1-params>` as the
|
||||
dataformat.
|
||||
|
||||
When using the `extensible parameters` method the buffer format is defined by
|
||||
struct :c:type:`rkisp1_ext_params_cfg`, and userspace should set
|
||||
:ref:`V4L2_META_FMT_RK_ISP1_EXT_PARAMS <v4l2-meta-fmt-rk-isp1-ext-params>` as
|
||||
the dataformat.
|
||||
|
||||
Capturing Video Frames Example
|
||||
==============================
|
||||
|
@ -328,7 +328,7 @@ and an HDMI input, one input for each input type. Those are described in more
|
||||
detail below.
|
||||
|
||||
Special attention has been given to the rate at which new frames become
|
||||
available. The jitter will be around 1 jiffie (that depends on the HZ
|
||||
available. The jitter will be around 1 jiffy (that depends on the HZ
|
||||
configuration of your kernel, so usually 1/100, 1/250 or 1/1000 of a second),
|
||||
but the long-term behavior is exactly following the framerate. So a
|
||||
framerate of 59.94 Hz is really different from 60 Hz. If the framerate
|
||||
@ -1343,7 +1343,7 @@ Some Future Improvements
|
||||
Just as a reminder and in no particular order:
|
||||
|
||||
- Add a virtual alsa driver to test audio
|
||||
- Add virtual sub-devices and media controller support
|
||||
- Add virtual sub-devices
|
||||
- Some support for testing compressed video
|
||||
- Add support to loop raw VBI output to raw VBI input
|
||||
- Add support to loop teletext sliced VBI output to VBI input
|
||||
@ -1358,4 +1358,4 @@ Just as a reminder and in no particular order:
|
||||
- Make a thread for the RDS generation, that would help in particular for the
|
||||
"Controls" RDS Rx I/O Mode as the read-only RDS controls could be updated
|
||||
in real-time.
|
||||
- Changing the EDID should cause hotplug detect emulation to happen.
|
||||
- Changing the EDID doesn't wait 100 ms before setting the HPD signal.
|
||||
|
@ -7,7 +7,7 @@ Getting Started
|
||||
This document briefly describes how you can use DAMON by demonstrating its
|
||||
default user space tool. Please note that this document describes only a part
|
||||
of its features for brevity. Please refer to the usage `doc
|
||||
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_ of the tool for more
|
||||
<https://github.com/damonitor/damo/blob/next/USAGE.md>`_ of the tool for more
|
||||
details.
|
||||
|
||||
|
||||
@ -26,7 +26,7 @@ User Space Tool
|
||||
|
||||
For the demonstration, we will use the default user space tool for DAMON,
|
||||
called DAMON Operator (DAMO). It is available at
|
||||
https://github.com/awslabs/damo. The examples below assume that ``damo`` is on
|
||||
https://github.com/damonitor/damo. The examples below assume that ``damo`` is on
|
||||
your ``$PATH``. It's not mandatory, though.
|
||||
|
||||
Because DAMO is using the sysfs interface (refer to :doc:`usage` for the
|
||||
|
@ -7,19 +7,19 @@ Detailed Usages
|
||||
DAMON provides below interfaces for different users.
|
||||
|
||||
- *DAMON user space tool.*
|
||||
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
|
||||
`This <https://github.com/damonitor/damo>`_ is for privileged people such as
|
||||
system administrators who want a just-working human-friendly interface.
|
||||
Using this, users can use the DAMON’s major features in a human-friendly way.
|
||||
It may not be highly tuned for special cases, though. For more detail,
|
||||
please refer to its `usage document
|
||||
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
|
||||
<https://github.com/damonitor/damo/blob/next/USAGE.md>`_.
|
||||
- *sysfs interface.*
|
||||
:ref:`This <sysfs_interface>` is for privileged user space programmers who
|
||||
want more optimized use of DAMON. Using this, users can use DAMON’s major
|
||||
features by reading from and writing to special sysfs files. Therefore,
|
||||
you can write and use your personalized DAMON sysfs wrapper programs that
|
||||
reads/writes the sysfs files instead of you. The `DAMON user space tool
|
||||
<https://github.com/awslabs/damo>`_ is one example of such programs.
|
||||
<https://github.com/damonitor/damo>`_ is one example of such programs.
|
||||
- *Kernel Space Programming Interface.*
|
||||
:doc:`This </mm/damon/api>` is for kernel space programmers. Using this,
|
||||
users can utilize every feature of DAMON most flexibly and efficiently by
|
||||
@ -543,7 +543,7 @@ memory rate becomes larger than 60%, or lower than 30%". ::
|
||||
# echo 300 > watermarks/low
|
||||
|
||||
Please note that it's highly recommended to use user space tools like `damo
|
||||
<https://github.com/awslabs/damo>`_ rather than manually reading and writing
|
||||
<https://github.com/damonitor/damo>`_ rather than manually reading and writing
|
||||
the files as above. Above is only for an example.
|
||||
|
||||
.. _tracepoint:
|
||||
|
@ -294,8 +294,9 @@ The following files are currently defined:
|
||||
``crash_hotplug`` read-only: when changes to the system memory map
|
||||
occur due to hot un/plug of memory, this file contains
|
||||
'1' if the kernel updates the kdump capture kernel memory
|
||||
map itself (via elfcorehdr), or '0' if userspace must update
|
||||
the kdump capture kernel memory map.
|
||||
map itself (via elfcorehdr and other relevant kexec
|
||||
segments), or '0' if userspace must update the kdump
|
||||
capture kernel memory map.
|
||||
|
||||
Availability depends on the CONFIG_MEMORY_HOTPLUG kernel
|
||||
configuration option.
|
||||
|
@ -202,6 +202,16 @@ PMD-mappable transparent hugepage::
|
||||
|
||||
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
|
||||
|
||||
All THPs at fault and collapse time will be added to _deferred_list,
|
||||
and will therefore be split under memory presure if they are considered
|
||||
"underused". A THP is underused if the number of zero-filled pages in
|
||||
the THP is above max_ptes_none (see below). It is possible to disable
|
||||
this behaviour by writing 0 to shrink_underused, and enable it by writing
|
||||
1 to it::
|
||||
|
||||
echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused
|
||||
echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused
|
||||
|
||||
khugepaged will be automatically started when PMD-sized THP is enabled
|
||||
(either of the per-size anon control or the top-level control are set
|
||||
to "always" or "madvise"), and it'll be automatically shutdown when
|
||||
@ -284,13 +294,37 @@ that THP is shared. Exceeding the number would block the collapse::
|
||||
|
||||
A higher value may increase memory footprint for some workloads.
|
||||
|
||||
Boot parameter
|
||||
==============
|
||||
Boot parameters
|
||||
===============
|
||||
|
||||
You can change the sysfs boot time defaults of Transparent Hugepage
|
||||
Support by passing the parameter ``transparent_hugepage=always`` or
|
||||
``transparent_hugepage=madvise`` or ``transparent_hugepage=never``
|
||||
to the kernel command line.
|
||||
You can change the sysfs boot time default for the top-level "enabled"
|
||||
control by passing the parameter ``transparent_hugepage=always`` or
|
||||
``transparent_hugepage=madvise`` or ``transparent_hugepage=never`` to the
|
||||
kernel command line.
|
||||
|
||||
Alternatively, each supported anonymous THP size can be controlled by
|
||||
passing ``thp_anon=<size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state>``,
|
||||
where ``<size>`` is the THP size (must be a power of 2 of PAGE_SIZE and
|
||||
supported anonymous THP) and ``<state>`` is one of ``always``, ``madvise``,
|
||||
``never`` or ``inherit``.
|
||||
|
||||
For example, the following will set 16K, 32K, 64K THP to ``always``,
|
||||
set 128K, 512K to ``inherit``, set 256K to ``madvise`` and 1M, 2M
|
||||
to ``never``::
|
||||
|
||||
thp_anon=16K-64K:always;128K,512K:inherit;256K:madvise;1M-2M:never
|
||||
|
||||
``thp_anon=`` may be specified multiple times to configure all THP sizes as
|
||||
required. If ``thp_anon=`` is specified at least once, any anon THP sizes
|
||||
not explicitly configured on the command line are implicitly set to
|
||||
``never``.
|
||||
|
||||
``transparent_hugepage`` setting only affects the global toggle. If
|
||||
``thp_anon`` is not specified, PMD_ORDER THP will default to ``inherit``.
|
||||
However, if a valid ``thp_anon`` setting is provided by the user, the
|
||||
PMD_ORDER THP policy will be overridden. If the policy for PMD_ORDER
|
||||
is not defined within a valid ``thp_anon``, its policy will default to
|
||||
``never``.
|
||||
|
||||
Hugepages in tmpfs/shmem
|
||||
========================
|
||||
@ -447,6 +481,12 @@ thp_deferred_split_page
|
||||
splitting it would free up some memory. Pages on split queue are
|
||||
going to be split under memory pressure.
|
||||
|
||||
thp_underused_split_page
|
||||
is incremented when a huge page on the split queue was split
|
||||
because it was underused. A THP is underused if the number of
|
||||
zero pages in the THP is above a certain threshold
|
||||
(/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none).
|
||||
|
||||
thp_split_pmd
|
||||
is incremented every time a PMD split into table of PTEs.
|
||||
This can happen, for instance, when application calls mprotect() or
|
||||
@ -527,6 +567,18 @@ split_deferred
|
||||
it would free up some memory. Pages on split queue are going to
|
||||
be split under memory pressure, if splitting is possible.
|
||||
|
||||
nr_anon
|
||||
the number of anonymous THP we have in the whole system. These THPs
|
||||
might be currently entirely mapped or have partially unmapped/unused
|
||||
subpages.
|
||||
|
||||
nr_anon_partially_mapped
|
||||
the number of anonymous THP which are likely partially mapped, possibly
|
||||
wasting memory, and have been queued for deferred memory reclamation.
|
||||
Note that in corner some cases (e.g., failed migration), we might detect
|
||||
an anonymous THP as "partially mapped" and count it here, even though it
|
||||
is not actually partially mapped anymore.
|
||||
|
||||
As the system ages, allocating huge pages may be expensive as the
|
||||
system uses memory compaction to copy data around memory to free a
|
||||
huge page for use. There are some counters in ``/proc/vmstat`` to help
|
||||
|
17
Documentation/admin-guide/perf/arm-ni.rst
Normal file
@ -0,0 +1,17 @@
|
||||
====================================
|
||||
Arm Network-on Chip Interconnect PMU
|
||||
====================================
|
||||
|
||||
NI-700 and friends implement a distinct PMU for each clock domain within the
|
||||
interconnect. Correspondingly, the driver exposes multiple PMU devices named
|
||||
arm_ni_<x>_cd_<y>, where <x> is an (arbitrary) instance identifier and <y> is
|
||||
the clock domain ID within that particular instance. If multiple NI instances
|
||||
exist within a system, the PMU devices can be correlated with the underlying
|
||||
hardware instance via sysfs parentage.
|
||||
|
||||
Each PMU exposes base event aliases for the interface types present in its clock
|
||||
domain. These require qualifying with the "eventid" and "nodeid" parameters
|
||||
to specify the event code to count and the interface at which to count it
|
||||
(per the configured hardware ID as reflected in the xxNI_NODE_INFO register).
|
||||
The exception is the "cycles" alias for the PMU cycle counter, which is encoded
|
||||
with the PMU node type and needs no further qualification.
|
@ -46,16 +46,16 @@ Some of the events only exist for specific configurations.
|
||||
DesignWare Cores (DWC) PCIe PMU Driver
|
||||
=======================================
|
||||
|
||||
This driver adds PMU devices for each PCIe Root Port named based on the BDF of
|
||||
This driver adds PMU devices for each PCIe Root Port named based on the SBDF of
|
||||
the Root Port. For example,
|
||||
|
||||
30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
|
||||
0001:30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
|
||||
|
||||
the PMU device name for this Root Port is dwc_rootport_3018.
|
||||
the PMU device name for this Root Port is dwc_rootport_13018.
|
||||
|
||||
The DWC PCIe PMU driver registers a perf PMU driver, which provides
|
||||
description of available events and configuration options in sysfs, see
|
||||
/sys/bus/event_source/devices/dwc_rootport_{bdf}.
|
||||
/sys/bus/event_source/devices/dwc_rootport_{sbdf}.
|
||||
|
||||
The "format" directory describes format of the config fields of the
|
||||
perf_event_attr structure. The "events" directory provides configuration
|
||||
@ -66,16 +66,16 @@ The "perf list" command shall list the available events from sysfs, e.g.::
|
||||
|
||||
$# perf list | grep dwc_rootport
|
||||
<...>
|
||||
dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/ [Kernel PMU event]
|
||||
dwc_rootport_13018/Rx_PCIe_TLP_Data_Payload/ [Kernel PMU event]
|
||||
<...>
|
||||
dwc_rootport_3018/rx_memory_read,lane=?/ [Kernel PMU event]
|
||||
dwc_rootport_13018/rx_memory_read,lane=?/ [Kernel PMU event]
|
||||
|
||||
Time Based Analysis Event Usage
|
||||
-------------------------------
|
||||
|
||||
Example usage of counting PCIe RX TLP data payload (Units of bytes)::
|
||||
|
||||
$# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/
|
||||
$# perf stat -a -e dwc_rootport_13018/Rx_PCIe_TLP_Data_Payload/
|
||||
|
||||
The average RX/TX bandwidth can be calculated using the following formula:
|
||||
|
||||
@ -88,7 +88,7 @@ Lane Event Usage
|
||||
Each lane has the same event set and to avoid generating a list of hundreds
|
||||
of events, the user need to specify the lane ID explicitly, e.g.::
|
||||
|
||||
$# perf stat -a -e dwc_rootport_3018/rx_memory_read,lane=4/
|
||||
$# perf stat -a -e dwc_rootport_13018/rx_memory_read,lane=4/
|
||||
|
||||
The driver does not support sampling, therefore "perf record" will not
|
||||
work. Per-task (without "-a") perf sessions are not supported.
|
||||
|
@ -28,7 +28,9 @@ The "identifier" sysfs file allows users to identify the version of the
|
||||
PMU hardware device.
|
||||
|
||||
The "bus" sysfs file allows users to get the bus number of Root Ports
|
||||
monitored by PMU.
|
||||
monitored by PMU. Furthermore users can get the Root Ports range in
|
||||
[bdf_min, bdf_max] from "bdf_min" and "bdf_max" sysfs attributes
|
||||
respectively.
|
||||
|
||||
Example usage of perf::
|
||||
|
||||
|
@ -16,6 +16,7 @@ Performance monitor support
|
||||
starfive_starlink_pmu
|
||||
arm-ccn
|
||||
arm-cmn
|
||||
arm-ni
|
||||
xgene-pmu
|
||||
arm_dsu_pmu
|
||||
thunderx2-pmu
|
||||
|
@ -251,7 +251,9 @@ performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
|
||||
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
|
||||
table, so we need to expose it to sysfs. If boost is not active, but
|
||||
still supported, this maximum frequency will be larger than the one in
|
||||
``cpuinfo``.
|
||||
``cpuinfo``. On systems that support preferred core, the driver will have
|
||||
different values for some cores than others and this will reflect the values
|
||||
advertised by the platform at bootup.
|
||||
This attribute is read-only.
|
||||
|
||||
``amd_pstate_lowest_nonlinear_freq``
|
||||
@ -262,6 +264,17 @@ lowest non-linear performance in `AMD CPPC Performance Capability
|
||||
<perf_cap_>`_.)
|
||||
This attribute is read-only.
|
||||
|
||||
``amd_pstate_hw_prefcore``
|
||||
|
||||
Whether the platform supports the preferred core feature and it has been
|
||||
enabled. This attribute is read-only.
|
||||
|
||||
``amd_pstate_prefcore_ranking``
|
||||
|
||||
The performance ranking of the core. This number doesn't have any unit, but
|
||||
larger numbers are preferred at the time of reading. This can change at
|
||||
runtime based on platform conditions. This attribute is read-only.
|
||||
|
||||
``energy_performance_available_preferences``
|
||||
|
||||
A list of all the supported EPP preferences that could be used for
|
||||
|
@ -113,3 +113,62 @@ to apply at each uncore* level.
|
||||
|
||||
Support for "current_freq_khz" is available only at each fabric cluster
|
||||
level (i.e., in uncore* directory).
|
||||
|
||||
Efficiency vs. Latency Tradeoff
|
||||
-------------------------------
|
||||
|
||||
The Efficiency Latency Control (ELC) feature improves performance
|
||||
per watt. With this feature hardware power management algorithms
|
||||
optimize trade-off between latency and power consumption. For some
|
||||
latency sensitive workloads further tuning can be done by SW to
|
||||
get desired performance.
|
||||
|
||||
The hardware monitors the average CPU utilization across all cores
|
||||
in a power domain at regular intervals and decides an uncore frequency.
|
||||
While this may result in the best performance per watt, workload may be
|
||||
expecting higher performance at the expense of power. Consider an
|
||||
application that intermittently wakes up to perform memory reads on an
|
||||
otherwise idle system. In such cases, if hardware lowers uncore
|
||||
frequency, then there may be delay in ramp up of frequency to meet
|
||||
target performance.
|
||||
|
||||
The ELC control defines some parameters which can be changed from SW.
|
||||
If the average CPU utilization is below a user-defined threshold
|
||||
(elc_low_threshold_percent attribute below), the user-defined uncore
|
||||
floor frequency will be used (elc_floor_freq_khz attribute below)
|
||||
instead of hardware calculated minimum.
|
||||
|
||||
Similarly in high load scenario where the CPU utilization goes above
|
||||
the high threshold value (elc_high_threshold_percent attribute below)
|
||||
instead of jumping to maximum uncore frequency, frequency is increased
|
||||
in 100MHz steps. This avoids consuming unnecessarily high power
|
||||
immediately with CPU utilization spikes.
|
||||
|
||||
Attributes for efficiency latency control:
|
||||
|
||||
``elc_floor_freq_khz``
|
||||
This attribute is used to get/set the efficiency latency floor frequency.
|
||||
If this variable is lower than the 'min_freq_khz', it is ignored by
|
||||
the firmware.
|
||||
|
||||
``elc_low_threshold_percent``
|
||||
This attribute is used to get/set the efficiency latency control low
|
||||
threshold. This attribute is in percentages of CPU utilization.
|
||||
|
||||
``elc_high_threshold_percent``
|
||||
This attribute is used to get/set the efficiency latency control high
|
||||
threshold. This attribute is in percentages of CPU utilization.
|
||||
|
||||
``elc_high_threshold_enable``
|
||||
This attribute is used to enable/disable the efficiency latency control
|
||||
high threshold. Write '1' to enable, '0' to disable.
|
||||
|
||||
Example system configuration below, which does following:
|
||||
* when CPU utilization is less than 10%: sets uncore frequency to 800MHz
|
||||
* when CPU utilization is higher than 95%: increases uncore frequency in
|
||||
100MHz steps, until power limit is reached
|
||||
|
||||
elc_floor_freq_khz:800000
|
||||
elc_high_threshold_percent:95
|
||||
elc_high_threshold_enable:1
|
||||
elc_low_threshold_percent:10
|
||||
|
@ -129,7 +129,7 @@ Setting the ramoops parameters can be done in several different manners:
|
||||
takes a size, alignment and name as arguments. The name is used
|
||||
to map the memory to a label that can be retrieved by ramoops.
|
||||
|
||||
reserver_mem=2M:4096:oops ramoops.mem_name=oops
|
||||
reserve_mem=2M:4096:oops ramoops.mem_name=oops
|
||||
|
||||
You can specify either RAM memory or peripheral devices' memory. However, when
|
||||
specifying RAM, be sure to reserve the memory by issuing memblock_reserve()
|
||||
|
@ -182,3 +182,5 @@ More detailed explanation for tainting
|
||||
produce extremely unusual kernel structure layouts (even performance
|
||||
pathological ones), which is important to know when debugging. Set at
|
||||
build time.
|
||||
|
||||
18) ``N`` if an in-kernel test, such as a KUnit test, has been run.
|
||||
|
@ -359,7 +359,7 @@ Driver updates for STM32 DMA-MDMA chaining support in foo driver
|
||||
descriptor you want a callback to be called at the end of the transfer
|
||||
(dmaengine_prep_slave_sg()) or the period (dmaengine_prep_dma_cyclic()).
|
||||
Depending on the direction, set the callback on the descriptor that finishes
|
||||
the overal transfer:
|
||||
the overall transfer:
|
||||
|
||||
* DMA_DEV_TO_MEM: set the callback on the "MDMA" descriptor
|
||||
* DMA_MEM_TO_DEV: set the callback on the "DMA" descriptor
|
||||
@ -371,7 +371,7 @@ Driver updates for STM32 DMA-MDMA chaining support in foo driver
|
||||
As STM32 MDMA channel transfer is triggered by STM32 DMA, you must issue
|
||||
STM32 MDMA channel before STM32 DMA channel.
|
||||
|
||||
If any, your callback will be called to warn you about the end of the overal
|
||||
If any, your callback will be called to warn you about the end of the overall
|
||||
transfer or the period completion.
|
||||
|
||||
Don't forget to terminate both channels. STM32 DMA channel is configured in
|
||||
|
@ -26,7 +26,7 @@ There are no systems that support the physical addition (or removal) of CPUs
|
||||
while the system is running, and ACPI is not able to sufficiently describe
|
||||
them.
|
||||
|
||||
e.g. New CPUs come with new caches, but the platform's cache toplogy is
|
||||
e.g. New CPUs come with new caches, but the platform's cache topology is
|
||||
described in a static table, the PPTT. How caches are shared between CPUs is
|
||||
not discoverable, and must be described by firmware.
|
||||
|
||||
|
@ -365,6 +365,8 @@ HWCAP2_SME_SF8DP2
|
||||
HWCAP2_SME_SF8DP4
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
|
||||
|
||||
HWCAP2_POE
|
||||
Functionality implied by ID_AA64MMFR3_EL1.S1POE == 0b0001.
|
||||
|
||||
4. Unused AT_HWCAP bits
|
||||
-----------------------
|
||||
|
@ -55,6 +55,8 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Ampere | AmpereOne | AC03_CPU_38 | AMPERE_ERRATUM_AC03_CPU_38 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Ampere | AmpereOne AC04 | AC04_CPU_10 | AMPERE_ERRATUM_AC03_CPU_38 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
@ -249,8 +251,8 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Hisilicon | Hip08 SMMU PMCG | #162001900 | N/A |
|
||||
| | Hip09 SMMU PMCG | | |
|
||||
| Hisilicon | Hip{08,09,10,10C| #162001900 | N/A |
|
||||
| | ,11} SMMU PMCG | | |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
|
||||
|
@ -85,6 +85,38 @@ to CPUINTC directly::
|
||||
| Devices |
|
||||
+---------+
|
||||
|
||||
Advanced Extended IRQ model
|
||||
===========================
|
||||
|
||||
In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
|
||||
to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go
|
||||
to AVECINTC, and then go to CPUINTC directly, while all other devices interrupts
|
||||
go to PCH-PIC/PCH-LPC and gathered by EIOINTC, and then go to CPUINTC directly::
|
||||
|
||||
+-----+ +-----------------------+ +-------+
|
||||
| IPI | --> | CPUINTC | <-- | Timer |
|
||||
+-----+ +-----------------------+ +-------+
|
||||
^ ^ ^
|
||||
| | |
|
||||
+---------+ +----------+ +---------+ +-------+
|
||||
| EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
|
||||
+---------+ +----------+ +---------+ +-------+
|
||||
^ ^
|
||||
| |
|
||||
+---------+ +---------+
|
||||
| PCH-PIC | | PCH-MSI |
|
||||
+---------+ +---------+
|
||||
^ ^ ^
|
||||
| | |
|
||||
+---------+ +---------+ +---------+
|
||||
| Devices | | PCH-LPC | | Devices |
|
||||
+---------+ +---------+ +---------+
|
||||
^
|
||||
|
|
||||
+---------+
|
||||
| Devices |
|
||||
+---------+
|
||||
|
||||
ACPI-related definitions
|
||||
========================
|
||||
|
||||
|
@ -134,7 +134,7 @@ Hardware
|
||||
|
||||
* PTCR and partition table entries (partition table is in secure
|
||||
memory). An attempt to write to PTCR will cause a Hypervisor
|
||||
Emulation Assitance interrupt.
|
||||
Emulation Assistance interrupt.
|
||||
|
||||
* LDBAR (LD Base Address Register) and IMC (In-Memory Collection)
|
||||
non-architected registers. An attempt to write to them will cause a
|
||||
|
@ -15,7 +15,7 @@ status for the use of Vector in userspace. The intended usage guideline for
|
||||
these interfaces is to give init systems a way to modify the availability of V
|
||||
for processes running under its domain. Calling these interfaces is not
|
||||
recommended in libraries routines because libraries should not override policies
|
||||
configured from the parant process. Also, users must noted that these interfaces
|
||||
configured from the parent process. Also, users must note that these interfaces
|
||||
are not portable to non-Linux, nor non-RISC-V environments, so it is discourage
|
||||
to use in a portable code. To get the availability of V in an ELF program,
|
||||
please read :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the
|
||||
|
@ -999,6 +999,36 @@ the vfio_ap mediated device to which it is assigned as long as each new APQN
|
||||
resulting from plugging it in references a queue device bound to the vfio_ap
|
||||
device driver.
|
||||
|
||||
Driver Features
|
||||
===============
|
||||
The vfio_ap driver exposes a sysfs file containing supported features.
|
||||
This exists so third party tools (like Libvirt and mdevctl) can query the
|
||||
availability of specific features.
|
||||
|
||||
The features list can be found here: /sys/bus/matrix/devices/matrix/features
|
||||
|
||||
Entries are space delimited. Each entry consists of a combination of
|
||||
alphanumeric and underscore characters.
|
||||
|
||||
Example:
|
||||
cat /sys/bus/matrix/devices/matrix/features
|
||||
guest_matrix dyn ap_config
|
||||
|
||||
the following features are advertised:
|
||||
|
||||
---------------+---------------------------------------------------------------+
|
||||
| Flag | Description |
|
||||
+==============+===============================================================+
|
||||
| guest_matrix | guest_matrix attribute exists. It reports the matrix of |
|
||||
| | adapters and domains that are or will be passed through to a |
|
||||
| | guest when the mdev is attached to it. |
|
||||
+--------------+---------------------------------------------------------------+
|
||||
| dyn | Indicates hot plug/unplug of AP adapters, domains and control |
|
||||
| | domains for a guest to which the mdev is attached. |
|
||||
+------------+-----------------------------------------------------------------+
|
||||
| ap_config | ap_config interface for one-shot modifications to mdev config |
|
||||
+--------------+---------------------------------------------------------------+
|
||||
|
||||
Limitations
|
||||
===========
|
||||
Live guest migration is not supported for guests using AP devices without
|
||||
|
@ -162,7 +162,7 @@ Mitigation points
|
||||
3. It would take a large number of these precisely-timed NMIs to mount
|
||||
an actual attack. There's presumably not enough bandwidth.
|
||||
4. The NMI in question occurs after a VERW, i.e. when user state is
|
||||
restored and most interesting data is already scrubbed. Whats left
|
||||
restored and most interesting data is already scrubbed. What's left
|
||||
is only the data that NMI touches, and that may or may not be of
|
||||
any interest.
|
||||
|
||||
|
@ -170,18 +170,6 @@ NUMA
|
||||
Don't parse the HMAT table for NUMA setup, or soft-reserved memory
|
||||
partitioning.
|
||||
|
||||
numa=fake=<size>[MG]
|
||||
If given as a memory unit, fills all system RAM with nodes of
|
||||
size interleaved over physical nodes.
|
||||
|
||||
numa=fake=<N>
|
||||
If given as an integer, fills all system RAM with N fake nodes
|
||||
interleaved over physical nodes.
|
||||
|
||||
numa=fake=<N>U
|
||||
If given as an integer followed by 'U', it will divide each
|
||||
physical node into N emulated nodes.
|
||||
|
||||
ACPI
|
||||
====
|
||||
|
||||
|
@ -125,7 +125,7 @@ FSGSBASE instructions enablement
|
||||
FSGSBASE instructions compiler support
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE
|
||||
GCC version 4.6.4 and newer provide intrinsics for the FSGSBASE
|
||||
instructions. Clang 5 supports them as well.
|
||||
|
||||
=================== ===========================
|
||||
@ -135,7 +135,7 @@ instructions. Clang 5 supports them as well.
|
||||
_writegsbase_u64() Write the GS base register
|
||||
=================== ===========================
|
||||
|
||||
To utilize these instrinsics <immintrin.h> must be included in the source
|
||||
To utilize these intrinsics <immintrin.h> must be included in the source
|
||||
code and the compiler option -mfsgsbase has to be added.
|
||||
|
||||
Compiler support for FS/GS based addressing
|
||||
|
@ -9,7 +9,7 @@ controllers), BFQ's main features are:
|
||||
- BFQ guarantees a high system and application responsiveness, and a
|
||||
low latency for time-sensitive applications, such as audio or video
|
||||
players;
|
||||
- BFQ distributes bandwidth, and not just time, among processes or
|
||||
- BFQ distributes bandwidth, not just time, among processes or
|
||||
groups (switching back to time distribution when needed to keep
|
||||
throughput high).
|
||||
|
||||
@ -111,7 +111,7 @@ Higher speed for code-development tasks
|
||||
|
||||
If some additional workload happens to be executed in parallel, then
|
||||
BFQ executes the I/O-related components of typical code-development
|
||||
tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
|
||||
tasks (compilation, checkout, merge, etc.) much more quickly than CFQ,
|
||||
NOOP or DEADLINE.
|
||||
|
||||
High throughput
|
||||
@ -127,9 +127,9 @@ Strong fairness, bandwidth and delay guarantees
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
BFQ distributes the device throughput, and not just the device time,
|
||||
among I/O-bound applications in proportion their weights, with any
|
||||
among I/O-bound applications in proportion to their weights, with any
|
||||
workload and regardless of the device parameters. From these bandwidth
|
||||
guarantees, it is possible to compute tight per-I/O-request delay
|
||||
guarantees, it is possible to compute a tight per-I/O-request delay
|
||||
guarantees by a simple formula. If not configured for strict service
|
||||
guarantees, BFQ switches to time-based resource sharing (only) for
|
||||
applications that would otherwise cause a throughput loss.
|
||||
@ -199,7 +199,7 @@ plus a lot of code, are borrowed from CFQ.
|
||||
|
||||
- On flash-based storage with internal queueing of commands
|
||||
(typically NCQ), device idling happens to be always detrimental
|
||||
for throughput. So, with these devices, BFQ performs idling
|
||||
to throughput. So, with these devices, BFQ performs idling
|
||||
only when strictly needed for service guarantees, i.e., for
|
||||
guaranteeing low latency or fairness. In these cases, overall
|
||||
throughput may be sub-optimal. No solution currently exists to
|
||||
@ -212,7 +212,7 @@ plus a lot of code, are borrowed from CFQ.
|
||||
and to reduce their latency. The most important action taken to
|
||||
achieve this goal is to give to the queues associated with these
|
||||
applications more than their fair share of the device
|
||||
throughput. For brevity, we call just "weight-raising" the whole
|
||||
throughput. For brevity, we call it just "weight-raising" the whole
|
||||
sets of actions taken by BFQ to privilege these queues. In
|
||||
particular, BFQ provides a milder form of weight-raising for
|
||||
interactive applications, and a stronger form for soft real-time
|
||||
@ -231,7 +231,7 @@ plus a lot of code, are borrowed from CFQ.
|
||||
responsive in detecting interleaved I/O (cooperating processes),
|
||||
that it enables BFQ to achieve a high throughput, by queue
|
||||
merging, even for queues for which CFQ needs a different
|
||||
mechanism, preemption, to get a high throughput. As such EQM is a
|
||||
mechanism, preemption, to get a high throughput. As such, EQM is a
|
||||
unified mechanism to achieve a high throughput with interleaved
|
||||
I/O.
|
||||
|
||||
@ -254,7 +254,7 @@ plus a lot of code, are borrowed from CFQ.
|
||||
- First, with any proportional-share scheduler, the maximum
|
||||
deviation with respect to an ideal service is proportional to
|
||||
the maximum budget (slice) assigned to queues. As a consequence,
|
||||
BFQ can keep this deviation tight not only because of the
|
||||
BFQ can keep this deviation tight, not only because of the
|
||||
accurate service of B-WF2Q+, but also because BFQ *does not*
|
||||
need to assign a larger budget to a queue to let the queue
|
||||
receive a higher fraction of the device throughput.
|
||||
@ -327,7 +327,7 @@ applications. Unset this tunable if you need/want to control weights.
|
||||
slice_idle
|
||||
----------
|
||||
|
||||
This parameter specifies how long BFQ should idle for next I/O
|
||||
This parameter specifies how long BFQ should idle for the next I/O
|
||||
request, when certain sync BFQ queues become empty. By default
|
||||
slice_idle is a non-zero value. Idling has a double purpose: boosting
|
||||
throughput and making sure that the desired throughput distribution is
|
||||
@ -365,7 +365,7 @@ terms of I/O-request dispatches. To guarantee that the actual service
|
||||
order then corresponds to the dispatch order, the strict_guarantees
|
||||
tunable must be set too.
|
||||
|
||||
There is an important flipside for idling: apart from the above cases
|
||||
There is an important flip side to idling: apart from the above cases
|
||||
where it is beneficial also for throughput, idling can severely impact
|
||||
throughput. One important case is random workload. Because of this
|
||||
issue, BFQ tends to avoid idling as much as possible, when it is not
|
||||
@ -475,7 +475,7 @@ max_budget
|
||||
|
||||
Maximum amount of service, measured in sectors, that can be provided
|
||||
to a BFQ queue once it is set in service (of course within the limits
|
||||
of the above timeout). According to what said in the description of
|
||||
of the above timeout). According to what was said in the description of
|
||||
the algorithm, larger values increase the throughput in proportion to
|
||||
the percentage of sequential I/O requests issued. The price of larger
|
||||
values is that they coarsen the granularity of short-term bandwidth
|
||||
|
@ -368,7 +368,7 @@ No additional type data follow ``btf_type``.
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_FUNC
|
||||
* ``info.vlen``: linkage information (BTF_FUNC_STATIC, BTF_FUNC_GLOBAL
|
||||
or BTF_FUNC_EXTERN)
|
||||
or BTF_FUNC_EXTERN - see :ref:`BTF_Function_Linkage_Constants`)
|
||||
* ``type``: a BTF_KIND_FUNC_PROTO type
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
@ -424,9 +424,8 @@ following data::
|
||||
__u32 linkage;
|
||||
};
|
||||
|
||||
``struct btf_var`` encoding:
|
||||
* ``linkage``: currently only static variable 0, or globally allocated
|
||||
variable in ELF sections 1
|
||||
``btf_var.linkage`` may take the values: BTF_VAR_STATIC, BTF_VAR_GLOBAL_ALLOCATED or BTF_VAR_GLOBAL_EXTERN -
|
||||
see :ref:`BTF_Var_Linkage_Constants`.
|
||||
|
||||
Not all type of global variables are supported by LLVM at this point.
|
||||
The following is currently available:
|
||||
@ -549,6 +548,38 @@ The ``btf_enum64`` encoding:
|
||||
If the original enum value is signed and the size is less than 8,
|
||||
that value will be sign extended into 8 bytes.
|
||||
|
||||
2.3 Constant Values
|
||||
-------------------
|
||||
|
||||
.. _BTF_Function_Linkage_Constants:
|
||||
|
||||
2.3.1 Function Linkage Constant Values
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. table:: Function Linkage Values and Meanings
|
||||
|
||||
=================== ===== ===========
|
||||
kind value description
|
||||
=================== ===== ===========
|
||||
``BTF_FUNC_STATIC`` 0x0 definition of subprogram not visible outside containing compilation unit
|
||||
``BTF_FUNC_GLOBAL`` 0x1 definition of subprogram visible outside containing compilation unit
|
||||
``BTF_FUNC_EXTERN`` 0x2 declaration of a subprogram whose definition is outside the containing compilation unit
|
||||
=================== ===== ===========
|
||||
|
||||
|
||||
.. _BTF_Var_Linkage_Constants:
|
||||
|
||||
2.3.2 Variable Linkage Constant Values
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. table:: Variable Linkage Values and Meanings
|
||||
|
||||
============================ ===== ===========
|
||||
kind value description
|
||||
============================ ===== ===========
|
||||
``BTF_VAR_STATIC`` 0x0 definition of global variable not visible outside containing compilation unit
|
||||
``BTF_VAR_GLOBAL_ALLOCATED`` 0x1 definition of global variable visible outside containing compilation unit
|
||||
``BTF_VAR_GLOBAL_EXTERN`` 0x2 declaration of global variable whose definition is outside the containing compilation unit
|
||||
============================ ===== ===========
|
||||
|
||||
3. BTF Kernel API
|
||||
=================
|
||||
|
||||
|
@ -121,6 +121,8 @@ described in more detail in the footnotes.
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_LWT_XMIT`` | | ``lwt_xmit`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_NETFILTER`` | | ``netfilter`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_PERF_EVENT`` | | ``perf_event`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` | | ``raw_tp.w+`` [#rawtp]_ | |
|
||||
@ -131,11 +133,23 @@ described in more detail in the footnotes.
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``raw_tracepoint+`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` | |
|
||||
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` [#tc_legacy]_ | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` | |
|
||||
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` [#tc_legacy]_ | |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``tc`` | |
|
||||
| | | ``tc`` [#tc_legacy]_ | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_NETKIT_PRIMARY`` | ``netkit/primary`` | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_NETKIT_PEER`` | ``netkit/peer`` | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TCX_INGRESS`` | ``tc/ingress`` | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TCX_EGRESS`` | ``tc/egress`` | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TCX_INGRESS`` | ``tcx/ingress`` | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TCX_EGRESS`` | ``tcx/egress`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_SK_LOOKUP`` | ``BPF_SK_LOOKUP`` | ``sk_lookup`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
@ -155,7 +169,9 @@ described in more detail in the footnotes.
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_SOCK_OPS`` | ``BPF_CGROUP_SOCK_OPS`` | ``sockops`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` | |
|
||||
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` [#struct_ops]_ | |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``struct_ops.s+`` [#struct_ops]_ | Yes |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_SYSCALL`` | | ``syscall`` | Yes |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
@ -209,5 +225,11 @@ described in more detail in the footnotes.
|
||||
``a-zA-Z0-9_.*?``.
|
||||
.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
|
||||
.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
|
||||
.. [#tc_legacy] The ``tc``, ``classifier`` and ``action`` attach types are deprecated, use
|
||||
``tcx/*`` instead.
|
||||
.. [#struct_ops] The ``struct_ops`` attach format supports ``struct_ops[.s]/<name>`` convention,
|
||||
but ``name`` is ignored and it is recommended to just use plain
|
||||
``SEC("struct_ops[.s]")``. The attachments are defined in a struct initializer
|
||||
that is tagged with ``SEC(".struct_ops[.link]")``.
|
||||
.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
|
||||
.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.
|
||||
|
@ -418,7 +418,7 @@ The rules for correspondence between registers / stack slots are as follows:
|
||||
linked to the registers and stack slots of the parent state with the same
|
||||
indices.
|
||||
|
||||
* For the outer stack frames, only caller saved registers (r6-r9) and stack
|
||||
* For the outer stack frames, only callee saved registers (r6-r9) and stack
|
||||
slots are linked to the registers and stack slots of the parent state with the
|
||||
same indices.
|
||||
|
||||
|
8
Documentation/core-api/cleanup.rst
Normal file
@ -0,0 +1,8 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================
|
||||
Scope-based Cleanup Helpers
|
||||
===========================
|
||||
|
||||
.. kernel-doc:: include/linux/cleanup.h
|
||||
:doc: scope-based cleanup helpers
|
@ -737,8 +737,9 @@ can process the event further.
|
||||
|
||||
When changes to the CPUs in the system occur, the sysfs file
|
||||
/sys/devices/system/cpu/crash_hotplug contains '1' if the kernel
|
||||
updates the kdump capture kernel list of CPUs itself (via elfcorehdr),
|
||||
or '0' if userspace must update the kdump capture kernel list of CPUs.
|
||||
updates the kdump capture kernel list of CPUs itself (via elfcorehdr and
|
||||
other relevant kexec segment), or '0' if userspace must update the kdump
|
||||
capture kernel list of CPUs.
|
||||
|
||||
The availability depends on the CONFIG_HOTPLUG_CPU kernel configuration
|
||||
option.
|
||||
@ -750,8 +751,9 @@ file can be used in a udev rule as follows:
|
||||
SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
|
||||
|
||||
For a CPU hot un/plug event, if the architecture supports kernel updates
|
||||
of the elfcorehdr (which contains the list of CPUs), then the rule skips
|
||||
the unload-then-reload of the kdump capture kernel.
|
||||
of the elfcorehdr (which contains the list of CPUs) and other relevant
|
||||
kexec segments, then the rule skips the unload-then-reload of the kdump
|
||||
capture kernel.
|
||||
|
||||
Kernel Inline Documentations Reference
|
||||
======================================
|
||||
|
@ -35,6 +35,7 @@ Library functionality that is used throughout the kernel.
|
||||
|
||||
kobject
|
||||
kref
|
||||
cleanup
|
||||
assoc_array
|
||||
xarray
|
||||
maple_tree
|
||||
@ -49,6 +50,7 @@ Library functionality that is used throughout the kernel.
|
||||
wrappers/atomic_t
|
||||
wrappers/atomic_bitops
|
||||
floating-point
|
||||
union_find
|
||||
|
||||
Low level entry and exit
|
||||
========================
|
||||
|
@ -45,8 +45,9 @@ here we briefly outline their recommended usage:
|
||||
* If the allocation is performed from an atomic context, e.g interrupt
|
||||
handler, use ``GFP_NOWAIT``. This flag prevents direct reclaim and
|
||||
IO or filesystem operations. Consequently, under memory pressure
|
||||
``GFP_NOWAIT`` allocation is likely to fail. Allocations which
|
||||
have a reasonable fallback should be using ``GFP_NOWARN``.
|
||||
``GFP_NOWAIT`` allocation is likely to fail. Users of this flag need
|
||||
to provide a suitable fallback to cope with such failures where
|
||||
appropriate.
|
||||
* If you think that accessing memory reserves is justified and the kernel
|
||||
will be stressed unless allocation succeeds, you may use ``GFP_ATOMIC``.
|
||||
* Untrusted allocations triggered from userspace should be a subject
|
||||
|
@ -576,13 +576,12 @@ The field width is passed by value, the bitmap is passed by reference.
|
||||
Helper macros cpumask_pr_args() and nodemask_pr_args() are available to ease
|
||||
printing cpumask and nodemask.
|
||||
|
||||
Flags bitfields such as page flags, page_type, gfp_flags
|
||||
Flags bitfields such as page flags and gfp_flags
|
||||
--------------------------------------------------------
|
||||
|
||||
::
|
||||
|
||||
%pGp 0x17ffffc0002036(referenced|uptodate|lru|active|private|node=0|zone=2|lastcpupid=0x1fffff)
|
||||
%pGt 0xffffff7f(buddy)
|
||||
%pGg GFP_USER|GFP_DMA32|GFP_NOWARN
|
||||
%pGv read|exec|mayread|maywrite|mayexec|denywrite
|
||||
|
||||
@ -591,7 +590,6 @@ would construct the value. The type of flags is given by the third
|
||||
character. Currently supported are:
|
||||
|
||||
- p - [p]age flags, expects value of type (``unsigned long *``)
|
||||
- t - page [t]ype, expects value of type (``unsigned int *``)
|
||||
- v - [v]ma_flags, expects value of type (``unsigned long *``)
|
||||
- g - [g]fp_flags, expects value of type (``gfp_t *``)
|
||||
|
||||
|
106
Documentation/core-api/union_find.rst
Normal file
@ -0,0 +1,106 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
====================
|
||||
Union-Find in Linux
|
||||
====================
|
||||
|
||||
|
||||
:Date: June 21, 2024
|
||||
:Author: Xavier <xavier_qy@163.com>
|
||||
|
||||
What is union-find, and what is it used for?
|
||||
------------------------------------------------
|
||||
|
||||
Union-find is a data structure used to handle the merging and querying
|
||||
of disjoint sets. The primary operations supported by union-find are:
|
||||
|
||||
Initialization: Resetting each element as an individual set, with
|
||||
each set's initial parent node pointing to itself.
|
||||
|
||||
Find: Determine which set a particular element belongs to, usually by
|
||||
returning a “representative element” of that set. This operation
|
||||
is used to check if two elements are in the same set.
|
||||
|
||||
Union: Merge two sets into one.
|
||||
|
||||
As a data structure used to maintain sets (groups), union-find is commonly
|
||||
utilized to solve problems related to offline queries, dynamic connectivity,
|
||||
and graph theory. It is also a key component in Kruskal's algorithm for
|
||||
computing the minimum spanning tree, which is crucial in scenarios like
|
||||
network routing. Consequently, union-find is widely referenced. Additionally,
|
||||
union-find has applications in symbolic computation, register allocation,
|
||||
and more.
|
||||
|
||||
Space Complexity: O(n), where n is the number of nodes.
|
||||
|
||||
Time Complexity: Using path compression can reduce the time complexity of
|
||||
the find operation, and using union by rank can reduce the time complexity
|
||||
of the union operation. These optimizations reduce the average time
|
||||
complexity of each find and union operation to O(α(n)), where α(n) is the
|
||||
inverse Ackermann function. This can be roughly considered a constant time
|
||||
complexity for practical purposes.
|
||||
|
||||
This document covers use of the Linux union-find implementation. For more
|
||||
information on the nature and implementation of union-find, see:
|
||||
|
||||
Wikipedia entry on union-find
|
||||
https://en.wikipedia.org/wiki/Disjoint-set_data_structure
|
||||
|
||||
Linux implementation of union-find
|
||||
-----------------------------------
|
||||
|
||||
Linux's union-find implementation resides in the file "lib/union_find.c".
|
||||
To use it, "#include <linux/union_find.h>".
|
||||
|
||||
The union-find data structure is defined as follows::
|
||||
|
||||
struct uf_node {
|
||||
struct uf_node *parent;
|
||||
unsigned int rank;
|
||||
};
|
||||
|
||||
In this structure, parent points to the parent node of the current node.
|
||||
The rank field represents the height of the current tree. During a union
|
||||
operation, the tree with the smaller rank is attached under the tree with the
|
||||
larger rank to maintain balance.
|
||||
|
||||
Initializing union-find
|
||||
-----------------------
|
||||
|
||||
You can complete the initialization using either static or initialization
|
||||
interface. Initialize the parent pointer to point to itself and set the rank
|
||||
to 0.
|
||||
Example::
|
||||
|
||||
struct uf_node my_node = UF_INIT_NODE(my_node);
|
||||
|
||||
or
|
||||
|
||||
uf_node_init(&my_node);
|
||||
|
||||
Find the Root Node of union-find
|
||||
--------------------------------
|
||||
|
||||
This operation is mainly used to determine whether two nodes belong to the same
|
||||
set in the union-find. If they have the same root, they are in the same set.
|
||||
During the find operation, path compression is performed to improve the
|
||||
efficiency of subsequent find operations.
|
||||
Example::
|
||||
|
||||
int connected;
|
||||
struct uf_node *root1 = uf_find(&node_1);
|
||||
struct uf_node *root2 = uf_find(&node_2);
|
||||
if (root1 == root2)
|
||||
connected = 1;
|
||||
else
|
||||
connected = 0;
|
||||
|
||||
Union Two Sets in union-find
|
||||
----------------------------
|
||||
|
||||
To union two sets in the union-find, you first find their respective root nodes
|
||||
and then link the smaller node to the larger node based on the rank of the root
|
||||
nodes.
|
||||
Example::
|
||||
|
||||
uf_union(&node_1, &node_2);
|
@ -75,6 +75,17 @@ Only files which are linked to the main kernel image or are compiled as
|
||||
kernel modules are supported by this mechanism.
|
||||
|
||||
|
||||
Module specific configs
|
||||
-----------------------
|
||||
|
||||
Gcov kernel configs for specific modules are described below:
|
||||
|
||||
CONFIG_GCOV_PROFILE_RDS:
|
||||
Enables GCOV profiling on RDS for checking which functions or
|
||||
lines are executed. This config is used by the rds selftest to
|
||||
generate coverage reports. If left unset the report is omitted.
|
||||
|
||||
|
||||
Files
|
||||
-----
|
||||
|
||||
|
@ -361,7 +361,8 @@ Alternatives Considered
|
||||
-----------------------
|
||||
|
||||
An alternative data race detection approach for the kernel can be found in the
|
||||
`Kernel Thread Sanitizer (KTSAN) <https://github.com/google/ktsan/wiki>`_.
|
||||
`Kernel Thread Sanitizer (KTSAN)
|
||||
<https://github.com/google/kernel-sanitizers/blob/master/KTSAN.md>`_.
|
||||
KTSAN is a happens-before data race detector, which explicitly establishes the
|
||||
happens-before order between memory operations, which can then be used to
|
||||
determine data races as defined in `Data Races`_.
|
||||
|
@ -53,6 +53,13 @@ configurable via the Kconfig option ``CONFIG_KFENCE_DEFERRABLE``.
|
||||
The KUnit test suite is very likely to fail when using a deferrable timer
|
||||
since it currently causes very unpredictable sample intervals.
|
||||
|
||||
By default KFENCE will only sample 1 heap allocation within each sample
|
||||
interval. *Burst mode* allows to sample successive heap allocations, where the
|
||||
kernel boot parameter ``kfence.burst`` can be set to a non-zero value which
|
||||
denotes the *additional* successive allocations within a sample interval;
|
||||
setting ``kfence.burst=N`` means that ``1 + N`` successive allocations are
|
||||
attempted through KFENCE for each sample interval.
|
||||
|
||||
The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
|
||||
further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
|
||||
255), the number of available guarded objects can be controlled. Each object
|
||||
|
10
Documentation/dev-tools/kunit/api/clk.rst
Normal file
@ -0,0 +1,10 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
========
|
||||
Clk API
|
||||
========
|
||||
|
||||
The KUnit clk API is used to test clk providers and clk consumers.
|
||||
|
||||
.. kernel-doc:: drivers/clk/clk_kunit_helpers.c
|
||||
:export:
|
@ -9,11 +9,17 @@ API Reference
|
||||
test
|
||||
resource
|
||||
functionredirection
|
||||
clk
|
||||
of
|
||||
platformdevice
|
||||
|
||||
|
||||
This page documents the KUnit kernel testing API. It is divided into the
|
||||
following sections:
|
||||
|
||||
Core KUnit API
|
||||
==============
|
||||
|
||||
Documentation/dev-tools/kunit/api/test.rst
|
||||
|
||||
- Documents all of the standard testing API
|
||||
@ -25,3 +31,18 @@ Documentation/dev-tools/kunit/api/resource.rst
|
||||
Documentation/dev-tools/kunit/api/functionredirection.rst
|
||||
|
||||
- Documents the KUnit Function Redirection API
|
||||
|
||||
Driver KUnit API
|
||||
================
|
||||
|
||||
Documentation/dev-tools/kunit/api/clk.rst
|
||||
|
||||
- Documents the KUnit clk API
|
||||
|
||||
Documentation/dev-tools/kunit/api/of.rst
|
||||
|
||||
- Documents the KUnit device tree (OF) API
|
||||
|
||||
Documentation/dev-tools/kunit/api/platformdevice.rst
|
||||
|
||||
- Documents the KUnit platform device API
|
||||
|
13
Documentation/dev-tools/kunit/api/of.rst
Normal file
@ -0,0 +1,13 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
====================
|
||||
Device Tree (OF) API
|
||||
====================
|
||||
|
||||
The KUnit device tree API is used to test device tree (of_*) dependent code.
|
||||
|
||||
.. kernel-doc:: include/kunit/of.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: drivers/of/of_kunit_helpers.c
|
||||
:export:
|
10
Documentation/dev-tools/kunit/api/platformdevice.rst
Normal file
@ -0,0 +1,10 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================
|
||||
Platform Device API
|
||||
===================
|
||||
|
||||
The KUnit platform device API is used to test platform devices.
|
||||
|
||||
.. kernel-doc:: lib/kunit/platform.c
|
||||
:export:
|
@ -188,15 +188,26 @@ For example, a Kconfig entry might look like:
|
||||
Test File and Module Names
|
||||
==========================
|
||||
|
||||
KUnit tests can often be compiled as a module. These modules should be named
|
||||
after the test suite, followed by ``_test``. If this is likely to conflict with
|
||||
non-KUnit tests, the suffix ``_kunit`` can also be used.
|
||||
KUnit tests are often compiled as a separate module. To avoid conflicting
|
||||
with regular modules, KUnit modules should be named after the test suite,
|
||||
followed by ``_kunit`` (e.g. if "foobar" is the core module, then
|
||||
"foobar_kunit" is the KUnit test module).
|
||||
|
||||
The easiest way of achieving this is to name the file containing the test suite
|
||||
``<suite>_test.c`` (or, as above, ``<suite>_kunit.c``). This file should be
|
||||
placed next to the code under test.
|
||||
Test source files, whether compiled as a separate module or an
|
||||
``#include`` in another source file, are best kept in a ``tests/``
|
||||
subdirectory to not conflict with other source files (e.g. for
|
||||
tab-completion).
|
||||
|
||||
Note that the ``_test`` suffix has also been used in some existing
|
||||
tests. The ``_kunit`` suffix is preferred, as it makes the distinction
|
||||
between KUnit and non-KUnit tests clearer.
|
||||
|
||||
So for the common case, name the file containing the test suite
|
||||
``tests/<suite>_kunit.c``. The ``tests`` directory should be placed at
|
||||
the same level as the code under test. For example, tests for
|
||||
``lib/string.c`` live in ``lib/tests/string_kunit.c``.
|
||||
|
||||
If the suite name contains some or all of the name of the test's parent
|
||||
directory, it may make sense to modify the source filename to reduce redundancy.
|
||||
For example, a ``foo_firmware`` suite could be in the ``foo/firmware_test.c``
|
||||
file.
|
||||
directory, it may make sense to modify the source filename to reduce
|
||||
redundancy. For example, a ``foo_firmware`` suite could be in the
|
||||
``foo/tests/firmware_kunit.c`` file.
|
||||
|
@ -1,17 +0,0 @@
|
||||
* ARC HS Performance Counters
|
||||
|
||||
The ARC HS can be configured with a pipeline performance monitor for counting
|
||||
CPU and cache events like cache misses and hits. Like conventional PCT there
|
||||
are 100+ hardware conditions dynamically mapped to up to 32 counters.
|
||||
It also supports overflow interrupts.
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible : should contain
|
||||
"snps,archs-pct"
|
||||
|
||||
Example:
|
||||
|
||||
pmu {
|
||||
compatible = "snps,archs-pct";
|
||||
};
|
33
Documentation/devicetree/bindings/arc/snps,archs-pct.yaml
Normal file
@ -0,0 +1,33 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/arc/snps,archs-pct.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: ARC HS Performance Counters
|
||||
|
||||
maintainers:
|
||||
- Aryabhatta Dey <aryabhattadey35@gmail.com>
|
||||
|
||||
description:
|
||||
The ARC HS can be configured with a pipeline performance monitor for counting
|
||||
CPU and cache events like cache misses and hits. Like conventional PCT there
|
||||
are 100+ hardware conditions dynamically mapped to up to 32 counters.
|
||||
It also supports overflow interrupts.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: snps,archs-pct
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
clocks:
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- clocks
|
||||
|
||||
additionalProperties: false
|
@ -25,10 +25,18 @@ select:
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: amlogic,meson-gx-ao-secure
|
||||
- const: syscon
|
||||
|
||||
oneOf:
|
||||
- items:
|
||||
- const: amlogic,meson-gx-ao-secure
|
||||
- const: syscon
|
||||
- items:
|
||||
- enum:
|
||||
- amlogic,a4-ao-secure
|
||||
- amlogic,c3-ao-secure
|
||||
- amlogic,s4-ao-secure
|
||||
- amlogic,t7-ao-secure
|
||||
- const: amlogic,meson-gx-ao-secure
|
||||
- const: syscon
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
|
@ -17,7 +17,7 @@ description: |
|
||||
The Coresight dummy source component is for the specific coresight source
|
||||
devices kernel don't have permission to access or configure. For some SOCs,
|
||||
there would be Coresight source trace components on sub-processor which
|
||||
are conneted to AP processor via debug bus. For these devices, a dummy driver
|
||||
are connected to AP processor via debug bus. For these devices, a dummy driver
|
||||
is needed to register them as Coresight source devices, so that paths can be
|
||||
created in the driver. It provides Coresight API for operations on dummy
|
||||
source devices, such as enabling and disabling them. It also provides the
|
||||
|
@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
title: ARM Corstone1000
|
||||
|
||||
maintainers:
|
||||
- Vishnu Banavath <vishnu.banavath@arm.com>
|
||||
- Rui Miguel Silva <rui.silva@linaro.org>
|
||||
- Abdellatif El Khlifi <abdellatif.elkhlifi@arm.com>
|
||||
- Hugues Kamba Mpiana <hugues.kambampiana@arm.com>
|
||||
|
||||
description: |+
|
||||
ARM's Corstone1000 includes pre-verified Corstone SSE-710 subsystem that
|
||||
|