Merge drm/drm-next into drm-intel-next

Sync to v6.12-rc1.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
This commit is contained in:
Jani Nikula 2024-09-30 11:49:10 +03:00
commit e056857125
10332 changed files with 414254 additions and 170170 deletions

View File

@ -141,11 +141,13 @@ ForEachMacros:
- 'damon_for_each_target_safe'
- 'damos_for_each_filter'
- 'damos_for_each_filter_safe'
- 'damos_for_each_quota_goal'
- 'damos_for_each_quota_goal_safe'
- 'data__for_each_file'
- 'data__for_each_file_new'
- 'data__for_each_file_start'
- 'device_for_each_child_node'
- 'displayid_iter_for_each'
- 'device_for_each_child_node_scoped'
- 'dma_fence_array_for_each'
- 'dma_fence_chain_for_each'
- 'dma_fence_unwrap_for_each'
@ -172,11 +174,14 @@ ForEachMacros:
- 'drm_for_each_plane'
- 'drm_for_each_plane_mask'
- 'drm_for_each_privobj'
- 'drm_gem_for_each_gpuva'
- 'drm_gem_for_each_gpuva_safe'
- 'drm_gem_for_each_gpuvm_bo'
- 'drm_gem_for_each_gpuvm_bo_safe'
- 'drm_gpuva_for_each_op'
- 'drm_gpuva_for_each_op_from_reverse'
- 'drm_gpuva_for_each_op_reverse'
- 'drm_gpuva_for_each_op_safe'
- 'drm_gpuvm_bo_for_each_va'
- 'drm_gpuvm_bo_for_each_va_safe'
- 'drm_gpuvm_for_each_va'
- 'drm_gpuvm_for_each_va_range'
- 'drm_gpuvm_for_each_va_range_safe'
@ -192,11 +197,11 @@ ForEachMacros:
- 'dsa_switch_for_each_port_continue_reverse'
- 'dsa_switch_for_each_port_safe'
- 'dsa_switch_for_each_user_port'
- 'dsa_switch_for_each_user_port_continue_reverse'
- 'dsa_tree_for_each_cpu_port'
- 'dsa_tree_for_each_user_port'
- 'dsa_tree_for_each_user_port_continue_reverse'
- 'dso__for_each_symbol'
- 'dsos__for_each_with_build_id'
- 'elf_hash_for_each_possible'
- 'elf_symtab__for_each_symbol'
- 'evlist__for_each_cpu'
@ -216,6 +221,7 @@ ForEachMacros:
- 'for_each_and_bit'
- 'for_each_andnot_bit'
- 'for_each_available_child_of_node'
- 'for_each_available_child_of_node_scoped'
- 'for_each_bench'
- 'for_each_bio'
- 'for_each_board_func_rsrc'
@ -234,6 +240,7 @@ ForEachMacros:
- 'for_each_card_widgets_safe'
- 'for_each_cgroup_storage_type'
- 'for_each_child_of_node'
- 'for_each_child_of_node_scoped'
- 'for_each_clear_bit'
- 'for_each_clear_bit_from'
- 'for_each_clear_bitrange'
@ -251,6 +258,7 @@ ForEachMacros:
- 'for_each_cpu'
- 'for_each_cpu_and'
- 'for_each_cpu_andnot'
- 'for_each_cpu_from'
- 'for_each_cpu_or'
- 'for_each_cpu_wrap'
- 'for_each_dapm_widgets'
@ -269,13 +277,14 @@ ForEachMacros:
- 'for_each_element'
- 'for_each_element_extid'
- 'for_each_element_id'
- 'for_each_enabled_cpu'
- 'for_each_endpoint_of_node'
- 'for_each_event'
- 'for_each_event_tps'
- 'for_each_evictable_lru'
- 'for_each_fib6_node_rt_rcu'
- 'for_each_fib6_walker_rt'
- 'for_each_free_mem_pfn_range_in_zone'
- 'for_each_file_lock'
- 'for_each_free_mem_pfn_range_in_zone_from'
- 'for_each_free_mem_range'
- 'for_each_free_mem_range_reverse'
@ -286,15 +295,18 @@ ForEachMacros:
- 'for_each_group_member'
- 'for_each_group_member_head'
- 'for_each_hstate'
- 'for_each_hwgpio'
- 'for_each_if'
- 'for_each_inject_fn'
- 'for_each_insn'
- 'for_each_insn_op_loc'
- 'for_each_insn_prefix'
- 'for_each_intid'
- 'for_each_iommu'
- 'for_each_ip_tunnel_rcu'
- 'for_each_irq_nr'
- 'for_each_lang'
- 'for_each_link_ch_maps'
- 'for_each_link_codecs'
- 'for_each_link_cpus'
- 'for_each_link_platforms'
@ -332,6 +344,9 @@ ForEachMacros:
- 'for_each_new_plane_in_state_reverse'
- 'for_each_new_private_obj_in_state'
- 'for_each_new_reg'
- 'for_each_nhlt_endpoint'
- 'for_each_nhlt_endpoint_fmtcfg'
- 'for_each_nhlt_fmtcfg'
- 'for_each_node'
- 'for_each_node_by_name'
- 'for_each_node_by_type'
@ -387,12 +402,15 @@ ForEachMacros:
- 'for_each_reloc_from'
- 'for_each_requested_gpio'
- 'for_each_requested_gpio_in_range'
- 'for_each_reserved_child_of_node'
- 'for_each_reserved_mem_range'
- 'for_each_reserved_mem_region'
- 'for_each_rtd_ch_maps'
- 'for_each_rtd_codec_dais'
- 'for_each_rtd_components'
- 'for_each_rtd_cpu_dais'
- 'for_each_rtd_dais'
- 'for_each_rtd_dais_reverse'
- 'for_each_sband_iftype_data'
- 'for_each_script'
- 'for_each_sec'
@ -533,8 +551,6 @@ ForEachMacros:
- 'lwq_for_each_safe'
- 'map__for_each_symbol'
- 'map__for_each_symbol_by_name'
- 'maps__for_each_entry'
- 'maps__for_each_entry_safe'
- 'mas_for_each'
- 'mci_for_each_dimm'
- 'media_device_for_each_entity'
@ -560,7 +576,9 @@ ForEachMacros:
- 'netdev_hw_addr_list_for_each'
- 'nft_rule_for_each_expr'
- 'nla_for_each_attr'
- 'nla_for_each_attr_type'
- 'nla_for_each_nested'
- 'nla_for_each_nested_type'
- 'nlmsg_for_each_attr'
- 'nlmsg_for_each_msg'
- 'nr_neigh_for_each'
@ -579,6 +597,7 @@ ForEachMacros:
- 'perf_config_sections__for_each_entry'
- 'perf_config_set__for_each_entry'
- 'perf_cpu_map__for_each_cpu'
- 'perf_cpu_map__for_each_cpu_skip_any'
- 'perf_cpu_map__for_each_idx'
- 'perf_evlist__for_each_entry'
- 'perf_evlist__for_each_entry_reverse'
@ -639,7 +658,6 @@ ForEachMacros:
- 'shost_for_each_device'
- 'sk_for_each'
- 'sk_for_each_bound'
- 'sk_for_each_bound_bhash2'
- 'sk_for_each_entry_offset_rcu'
- 'sk_for_each_from'
- 'sk_for_each_rcu'
@ -653,6 +671,7 @@ ForEachMacros:
- 'snd_soc_dapm_widget_for_each_path_safe'
- 'snd_soc_dapm_widget_for_each_sink_path'
- 'snd_soc_dapm_widget_for_each_source_path'
- 'sparsebit_for_each_set_range'
- 'strlist__for_each_entry'
- 'strlist__for_each_entry_safe'
- 'sym_for_each_insn'
@ -662,7 +681,6 @@ ForEachMacros:
- 'tcf_act_for_each_action'
- 'tcf_exts_for_each_action'
- 'ttm_resource_manager_for_each_res'
- 'twsk_for_each_bound_bhash2'
- 'udp_portaddr_for_each_entry'
- 'udp_portaddr_for_each_entry_rcu'
- 'usb_hub_for_each_child'
@ -686,6 +704,9 @@ ForEachMacros:
- 'xbc_node_for_each_child'
- 'xbc_node_for_each_key_value'
- 'xbc_node_for_each_subkey'
- 'ynl_attr_for_each'
- 'ynl_attr_for_each_nested'
- 'ynl_attr_for_each_payload'
- 'zorro_for_each_dev'
IncludeBlocks: Preserve

4
.gitignore vendored
View File

@ -24,6 +24,7 @@
*.dwo
*.elf
*.gcno
*.gcda
*.gz
*.i
*.ko
@ -46,7 +47,6 @@
*.so.dbg
*.su
*.symtypes
*.symversions
*.tab.[ch]
*.tar
*.xz
@ -70,6 +70,7 @@ modules.order
/Module.markers
/modules.builtin
/modules.builtin.modinfo
/modules.builtin.ranges
/modules.nsdeps
#
@ -142,7 +143,6 @@ GTAGS
# id-utils files
ID
*.orig
*~
\#*#

View File

@ -154,6 +154,9 @@ Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
Christian Marangi <ansuelsmth@gmail.com>
Christophe Ricard <christophe.ricard@gmail.com>
Christoph Hellwig <hch@lst.de>
Chuck Lever <chuck.lever@oracle.com> <cel@kernel.org>
Chuck Lever <chuck.lever@oracle.com> <cel@netapp.com>
Chuck Lever <chuck.lever@oracle.com> <cel@citi.umich.edu>
Claudiu Beznea <claudiu.beznea@tuxon.dev> <claudiu.beznea@microchip.com>
Colin Ian King <colin.i.king@gmail.com> <colin.king@canonical.com>
Corey Minyard <minyard@acm.org>
@ -313,6 +316,7 @@ Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
Jishnu Prakash <quic_jprakash@quicinc.com> <jprakash@codeaurora.org>
Joel Granados <joel.granados@kernel.org> <j.granados@samsung.com>
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
John Crispin <john@phrozen.org> <blogic@openwrt.org>
@ -529,6 +533,7 @@ Pavankumar Kondeti <quic_pkondeti@quicinc.com> <pkondeti@codeaurora.org>
Peter A Jonsson <pj@ludd.ltu.se>
Peter Oruba <peter.oruba@amd.com>
Peter Oruba <peter@oruba.de>
Pierre-Louis Bossart <pierre-louis.bossart@linux.dev> <pierre-louis.bossart@linux.intel.com>
Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com>
Praveen BP <praveenbp@ti.com>
Pradeep Kumar Chitrapu <quic_pradeepc@quicinc.com> <pradeepc@codeaurora.org>

View File

@ -378,6 +378,9 @@ S: 1549 Hiironen Rd.
S: Brimson, MN 55602
S: USA
N: Arnd Bergmann
D: Maintainer of Cell Broadband Engine Architecture
N: Hennus Bergman
P: 1024/77D50909 76 99 FD 31 91 E1 96 1C 90 BB 22 80 62 F6 BD 63
D: Author and maintainer of the QIC-02 tape driver
@ -1869,6 +1872,9 @@ S: K osmidomkum 723
S: 160 00 Praha 6
S: Czech Republic
N: Jeremy Kerr
D: Maintainer of SPU File System
N: Michael Kerrisk
E: mtk.manpages@gmail.com
W: https://man7.org/

View File

@ -11,7 +11,7 @@ Description:
Read returns '0' or '1' for read-write or read-only modes
respectively.
Write parses one of 'YyTt1NnFf0', or [oO][NnFf] for "on"
and "off", i.e. what kstrbool() supports.
and "off", i.e. what kstrtobool() supports.
Note: This file is only present if CONFIG_NVMEM_SYSFS
is enabled.

View File

@ -9,9 +9,11 @@ maps an ELF DSO into that program's address space. This DSO is called
the vDSO and it often contains useful and highly-optimized alternatives
to real syscalls.
These functions are called just like ordinary C function according to
your platform's ABI. Call them from a sensible context. (For example,
if you set CS on x86 to something strange, the vDSO functions are
These functions are called according to your platform's ABI. On many
platforms they are called just like ordinary C function. On other platforms
(ex: powerpc) they are called with the same convention as system calls which
is different from ordinary C functions. Call them from a sensible context.
(For example, if you set CS on x86 to something strange, the vDSO functions are
within their rights to crash.) In addition, if you pass a bad
pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT.

View File

@ -6,3 +6,10 @@ Description:
This item contains just one readonly attribute: port_num.
It contains the port number of the /dev/ttyGS<n> device
associated with acm function's instance "name".
What: /config/usb-gadget/gadget/functions/acm.name/protocol
Date: Aug 2024
KernelVersion: 6.13
Description:
Reported bInterfaceProtocol for the ACM device. For legacy
reasons, this defaults to 1 (USB_CDC_ACM_PROTO_AT_V25TER).

View File

@ -30,4 +30,12 @@ Description:
req_number the number of pre-allocated requests
for both capture and playback
function_name name of the interface
p_it_name playback input terminal name
p_it_ch_name playback channels name
p_ot_name playback output terminal name
p_fu_vol_name playback mute/volume functional unit name
c_it_name capture input terminal name
c_it_ch_name capture channels name
c_ot_name capture output terminal name
c_fu_vol_name capture mute/volume functional unit name
===================== =======================================

View File

@ -35,6 +35,17 @@ Description:
req_number the number of pre-allocated requests
for both capture and playback
function_name name of the interface
if_ctrl_name topology control name
clksrc_in_name input clock name
clksrc_out_name output clock name
p_it_name playback input terminal name
p_it_ch_name playback input first channel name
p_ot_name playback output terminal name
p_fu_vol_name playback mute/volume function unit name
c_it_name capture input terminal name
c_it_ch_name capture input first channel name
c_ot_name capture output terminal name
c_fu_vol_name capture mute/volume functional unit name
c_terminal_type code of the capture terminal type
p_terminal_type code of the playback terminal type
===================== =======================================

View File

@ -0,0 +1,39 @@
What: /sys/kernel/debug/iio/iio:deviceX/calibration_table_dump
KernelVersion: 6.11
Contact: linux-iio@vger.kernel.org
Description:
This dumps the calibration table that was filled during the
digital interface tuning process.
What: /sys/kernel/debug/iio/iio:deviceX/in_voltage_test_mode_available
KernelVersion: 6.11
Contact: linux-iio@vger.kernel.org
Description:
List all the available test tones:
- off
- midscale_short
- pos_fullscale
- neg_fullscale
- checkerboard
- prbs23
- prbs9
- one_zero_toggle
- user
- bit_toggle
- sync
- one_bit_high
- mixed_bit_frequency
- ramp
Note that depending on the actual device being used, some of the
above might not be available (and they won't be listed when
reading the file).
What: /sys/kernel/debug/iio/iio:deviceX/in_voltageY_test_mode
KernelVersion: 6.11
Contact: linux-iio@vger.kernel.org
Description:
Writing to this file will initiate one of available test tone on
channel Y. Reading it, shows which test is running. In cases
where an IIO backend is available and supports the test tone,
additional information about the data correctness is given.

View File

@ -0,0 +1,20 @@
What: /sys/kernel/debug/iio/iio:deviceX/backendY/name
KernelVersion: 6.11
Contact: linux-iio@vger.kernel.org
Description:
Name of Backend Y connected to device X.
What: /sys/kernel/debug/iio/iio:deviceX/backendY/direct_reg_access
KernelVersion: 6.11
Contact: linux-iio@vger.kernel.org
Description:
Directly access the registers of backend Y. Typical usage is:
Reading address 0x50
echo 0x50 > direct_reg_access
cat direct_reg_access
Writing address 0x50
echo 0x50 0x3 > direct_reg_access
//readback address 0x50
cat direct_reg_access

View File

@ -151,3 +151,10 @@ Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
Description:
The recompress file is write-only and triggers re-compression
with secondary compression algorithms.
What: /sys/block/zram<id>/algorithm_params
Date: August 2024
Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
Description:
The algorithm_params file is write-only and is used to setup
compression algorithm parameters.

View File

@ -523,13 +523,27 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_i_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_q_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibbias
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@ -541,6 +555,10 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/in_accel_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias_available
KernelVersion: 5.8
Contact: linux-iio@vger.kernel.org
Description:
@ -549,25 +567,34 @@ Description:
- a small discrete set of values like "0 2 4 6 8"
- a range specified as "[min step max]"
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_both_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_ir_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibscale
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibscale
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@ -575,6 +602,20 @@ Description:
production inaccuracies). If shared across all channels,
<type>_calibscale is used.
What: /sys/bus/iio/devices/iio:deviceX/in_illuminanceY_calibscale_available
What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibscale_available
What: /sys/bus/iio/devices/iio:deviceX/in_proximityY_calibscale_available
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale_available
KernelVersion: 4.8
Contact: linux-iio@vger.kernel.org
Description:
Available values of calibscale. Maybe expressed as either of:
- a small discrete set of values like "1 8 16"
- a range specified as "[min step max]"
If shared across all channels, <type>_calibscale_available is used.
What: /sys/bus/iio/devices/iio:deviceX/in_activity_calibgender
What: /sys/bus/iio/devices/iio:deviceX/in_energy_calibgender
What: /sys/bus/iio/devices/iio:deviceX/in_distance_calibgender
@ -708,6 +749,7 @@ Description:
2.5kohm_to_gnd: connected to ground via a 2.5kOhm resistor,
6kohm_to_gnd: connected to ground via a 6kOhm resistor,
20kohm_to_gnd: connected to ground via a 20kOhm resistor,
42kohm_to_gnd: connected to ground via a 42kOhm resistor,
90kohm_to_gnd: connected to ground via a 90kOhm resistor,
100kohm_to_gnd: connected to ground via an 100kOhm resistor,
125kohm_to_gnd: connected to ground via an 125kOhm resistor,
@ -2289,3 +2331,11 @@ KernelVersion: 6.7
Contact: linux-iio@vger.kernel.org
Description:
List of available timeout value for tap gesture confirmation.
What: /sys/.../iio:deviceX/in_shunt_resistor
What: /sys/.../iio:deviceX/in_current_shunt_resistor
What: /sys/.../iio:deviceX/in_power_shunt_resistor
KernelVersion: 6.10
Contact: linux-iio@vger.kernel.org
Description:
The value of current sense resistor in Ohms.

View File

@ -1,17 +0,0 @@
What: /sys/bus/iio/devices/iio:deviceX/in_power_shunt_resistor
Date: March 2017
KernelVersion: 4.12
Contact: linux-iio@vger.kernel.org
Description: The value of the shunt resistor used to compute power drain on
common input voltage pin (RS+). In Ohms.
What: /sys/bus/iio/devices/iio:deviceX/in_current_shunt_resistor
Date: March 2017
KernelVersion: 4.12
Contact: linux-iio@vger.kernel.org
Description: The value of the shunt resistor used to compute current flowing
between RS+ and RS- voltage sense inputs. In Ohms.
These attributes describe a single physical component, exposed as two distinct
attributes as it is used to calculate two different values: power load and
current flowing between RS+ and RS- inputs.

View File

@ -15,17 +15,3 @@ Description:
Set the relative humidity. This value is sent to the sensor for
humidity compensation.
Default value: 50000 (50 % relative humidity)
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
Date: August 2021
KernelVersion: 5.15
Contact: Andreas Klinger <ak@it-klinger.de>
Description:
Set the bias value for the resistance which is used for
calculation of in_concentration_input as follows:
x = (in_resistance_raw - in_resistance_calibbias) * 0.65
in_concentration_input = 500 / (1 + e^x)
Default value: 30000

View File

@ -0,0 +1,61 @@
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_toggle_en
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Toggle enable. Write 1 to enable toggle or 0 to disable it. This
is useful when one wants to change the DAC output codes. For
autonomous toggling, the way it should be done is:
- disable toggle operation;
- change out_currentY_rawN, where N is the integer value of the symbol;
- enable toggle operation.
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_rawN
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
This attribute has the same meaning as out_currentY_raw. It is
specific to toggle enabled channels and refers to the DAC output
code in INPUT_N (_rawN), where N is the integer value of the symbol.
The same scale and offset as in out_currentY_raw applies.
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_symbol
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Performs a SW switch to a predefined output symbol. This attribute
is specific to toggle enabled channels and allows switching between
multiple predefined symbols. Each symbol corresponds to a different
output, denoted as out_currentY_rawN, where N is the integer value
of the symbol. Writing an integer value N will select out_currentY_rawN.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Toggle enable. Write 1 to enable toggle or 0 to disable it. This
is useful when one wants to change the DAC output codes. For
autonomous toggling, the way it should be done is:
- disable toggle operation;
- change out_voltageY_rawN, where N is the integer value of the symbol;
- enable toggle operation.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_rawN
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
This attribute has the same meaning as out_currentY_raw. It is
specific to toggle enabled channels and refers to the DAC output
code in INPUT_N (_rawN), where N is the integer value of the symbol.
The same scale and offset as in out_currentY_raw applies.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Performs a SW switch to a predefined output symbol. This attribute
is specific to toggle enabled channels and allows switching between
multiple predefined symbols. Each symbol corresponds to a different
output, denoted as out_voltageY_rawN, where N is the integer value
of the symbol. Writing an integer value N will select out_voltageY_rawN.

View File

@ -53,34 +53,3 @@ KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Returns the available values for the dither phase.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Toggle enable. Write 1 to enable toggle or 0 to disable it. This is
useful when one wants to change the DAC output codes. The way it should
be done is:
- disable toggle operation;
- change out_voltageY_raw0 and out_voltageY_raw1;
- enable toggle operation.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw0
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw1
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
It has the same meaning as out_voltageY_raw. This attribute is
specific to toggle enabled channels and refers to the DAC output
code in INPUT_A (_raw0) and INPUT_B (_raw1). The same scale and offset
as in out_voltageY_raw applies.
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Performs a SW toggle. This attribute is specific to toggle
enabled channels and allows to toggle between out_voltageY_raw0
and out_voltageY_raw1 through software. Writing 0 will select
out_voltageY_raw0 while 1 selects out_voltageY_raw1.

View File

@ -3,7 +3,7 @@ KernelVersion:
Contact: linux-iio@vger.kernel.org
Description:
Reading this returns the valid values that can be written to the
on_altvoltage0_mode attribute:
filter_mode attribute:
- auto -> Adjust bandpass filter to track changes in input clock rate.
- manual -> disable/unregister the clock rate notifier / input clock tracking.

View File

@ -13,12 +13,3 @@ Description:
available for reading data. However, samples can be occasionally skipped
or repeated, depending on the beat between the capture and conversion
rates.
What: /sys/bus/iio/devices/iio:deviceX/in_shunt_resistor
Date: December 2015
KernelVersion: 4.4
Contact: linux-iio@vger.kernel.org
Description:
The value of the shunt resistor may be known only at runtime fom an
eeprom content read by a client application. This attribute allows to
set its value in ohms.

View File

@ -500,3 +500,75 @@ Description:
console drivers from the device. Raw users of pci-sysfs
resourceN attributes must be terminated prior to resizing.
Success of the resizing operation is not guaranteed.
What: /sys/bus/pci/devices/.../leds/*:enclosure:*/brightness
What: /sys/class/leds/*:enclosure:*/brightness
Date: August 2024
KernelVersion: 6.12
Description:
LED indications on PCIe storage enclosures which are controlled
through the NPEM interface (Native PCIe Enclosure Management,
PCIe r6.1 sec 6.28) are accessible as led class devices, both
below /sys/class/leds and below NPEM-capable PCI devices.
Although these led class devices could be manipulated manually,
in practice they are typically manipulated automatically by an
application such as ledmon(8).
The name of a led class device is as follows:
<bdf>:enclosure:<indication>
where:
- <bdf> is the domain, bus, device and function number
(e.g. 10000:02:05.0)
- <indication> is a short description of the LED indication
Valid indications per PCIe r6.1 table 6-27 are:
- ok (drive is functioning normally)
- locate (drive is being identified by an admin)
- fail (drive is not functioning properly)
- rebuild (drive is part of an array that is rebuilding)
- pfa (drive is predicted to fail soon)
- hotspare (drive is marked to be used as a replacement)
- ica (drive is part of an array that is degraded)
- ifa (drive is part of an array that is failed)
- idt (drive is not the right type for the connector)
- disabled (drive is disabled, removal is safe)
- specific0 to specific7 (enclosure-specific indications)
Broadly, the indications fall into one of these categories:
- to signify drive state (ok, locate, fail, idt, disabled)
- to signify drive role or state in a software RAID array
(rebuild, pfa, hotspare, ica, ifa)
- to signify any other role or state (specific0 to specific7)
Mandatory indications per PCIe r6.1 sec 7.9.19.2 comprise:
ok, locate, fail, rebuild. All others are optional.
A led class device is only visible if the corresponding
indication is supported by the device.
To manipulate the indications, write 0 (LED_OFF) or 1 (LED_ON)
to the "brightness" file. Note that manipulating an indication
may implicitly manipulate other indications at the vendor's
discretion. E.g. when the user lights up the "ok" indication,
the vendor may choose to automatically turn off the "fail"
indication. The current state of an indication can be
retrieved by reading its "brightness" file.
The PCIe Base Specification allows vendors leeway to choose
different colors or blinking patterns for the indications,
but they typically follow the IBPI standard. E.g. the "locate"
indication is usually presented as one or two LEDs blinking at
4 Hz frequency:
https://en.wikipedia.org/wiki/International_Blinking_Pattern_Interpretation
PCI Firmware Specification r3.3 sec 4.7 defines a DSM interface
to facilitate shared access by operating system and platform
firmware to a device's NPEM registers. The kernel will use
this DSM interface where available, instead of accessing NPEM
registers directly. The DSM interface does not support the
enclosure-specific indications "specific0" to "specific7",
hence the corresponding led class devices are unavailable if
the DSM interface is used.

View File

@ -377,17 +377,33 @@ What: /sys/class/power_supply/<supply_name>/charge_type
Date: July 2009
Contact: linux-pm@vger.kernel.org
Description:
Represents the type of charging currently being applied to the
battery. "Trickle", "Fast", and "Standard" all mean different
charging speeds. "Adaptive" means that the charger uses some
algorithm to adjust the charge rate dynamically, without
any user configuration required. "Custom" means that the charger
uses the charge_control_* properties as configuration for some
different algorithm. "Long Life" means the charger reduces its
charging rate in order to prolong the battery health. "Bypass"
means the charger bypasses the charging path around the
integrated converter allowing for a "smart" wall adaptor to
perform the power conversion externally.
Select the charging algorithm to use for a battery.
Standard:
Fully charge the battery at a moderate rate.
Fast:
Quickly charge the battery using fast-charge
technology. This is typically harder on the battery
than standard charging and may lower its lifespan.
Trickle:
Users who primarily operate the system while
plugged into an external power source can extend
battery life with this mode. Vendor tooling may
call this "Primarily AC Use".
Adaptive:
Automatically optimize battery charge rate based
on typical usage pattern.
Custom:
Use the charge_control_* properties to determine
when to start and stop charging. Advanced users
can use this to drastically extend battery life.
Long Life:
The charger reduces its charging rate in order to
prolong the battery health.
Bypass:
The charger bypasses the charging path around the
integrated converter allowing for a "smart" wall
adaptor to perform the power conversion externally.
Access: Read, Write
@ -592,7 +608,12 @@ Description:
the supply, for example it can show if USB-PD capable source
is attached.
Access: Read-Only
Access: For power-supplies which consume USB power such
as battery charger chips, this indicates the type of
the connected USB power source and is Read-Only.
For power-supplies which act as a USB power-source such as
e.g. the UCS1002 USB Port Power Controller this is writable.
Valid values:
"Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD",

View File

@ -0,0 +1,15 @@
What: /sys/class/tee/tee{,priv}X/rpmb_routing_model
Date: May 2024
KernelVersion: 6.10
Contact: op-tee@lists.trustedfirmware.org
Description:
RPMB frames can be routed to the RPMB device via the
user-space daemon tee-supplicant or the RPMB subsystem
in the kernel. The value "user" means that the driver
will route the RPMB frames via user space. Conversely,
"kernel" means that the frames are routed via the RPMB
subsystem without assistance from tee-supplicant. It
should be assumed that RPMB frames are routed via user
space if the variable is absent. The primary purpose
of this variable is to let systemd know whether
tee-supplicant is needed in the early boot with initramfs.

View File

@ -115,6 +115,6 @@ What: /sys/devices/system/memory/crash_hotplug
Date: Aug 2023
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description:
(RO) indicates whether or not the kernel directly supports
modifying the crash elfcorehdr for memory hot un/plug and/or
on/offline changes.
(RO) indicates whether or not the kernel updates relevant kexec
segments on memory hot un/plug and/or on/offline events, avoiding the
need to reload kdump kernel.

View File

@ -704,9 +704,9 @@ What: /sys/devices/system/cpu/crash_hotplug
Date: Aug 2023
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description:
(RO) indicates whether or not the kernel directly supports
modifying the crash elfcorehdr for CPU hot un/plug and/or
on/offline changes.
(RO) indicates whether or not the kernel updates relevant kexec
segments on memory hot un/plug and/or on/offline events, avoiding the
need to reload kdump kernel.
What: /sys/devices/system/cpu/enabled
Date: Nov 2022

View File

@ -1532,3 +1532,30 @@ Contact: Bean Huo <beanhuo@micron.com>
Description:
rtc_update_ms indicates how often the host should synchronize or update the
UFS RTC. If set to 0, this will disable UFS RTC periodic update.
What: /sys/devices/platform/.../ufshci_capabilities/version
Date: August 2024
Contact: Avri Altman <avri.altman@wdc.com>
Description:
Host Capabilities register group: UFS version register.
Symbol - VER. This file shows the UFSHCD version.
Example: Version 3.12 would be represented as 0000_0312h.
The file is read only.
What: /sys/devices/platform/.../ufshci_capabilities/product_id
Date: August 2024
Contact: Avri Altman <avri.altman@wdc.com>
Description:
Host Capabilities register group: product ID register.
Symbol - HCPID. This file shows the UFSHCD product id.
The content of this register is vendor specific.
The file is read only.
What: /sys/devices/platform/.../ufshci_capabilities/man_id
Date: August 2024
Contact: Avri Altman <avri.altman@wdc.com>
Description:
Host Capabilities register group: manufacturer ID register.
Symbol - HCMID. This file shows the UFSHCD manufacturer id.
The Manufacturer ID is defined by JEDEC in JEDEC-JEP106.
The file is read only.

View File

@ -579,6 +579,12 @@ Description: When ATGC is on, it controls age threshold to bypass GCing young
candidates whose age is not beyond the threshold, by default it was
initialized as 604800 seconds (equals to 7 days).
What: /sys/fs/f2fs/<disk>/atgc_enabled
Date: Feb 2024
Contact: "Jinbao Liu" <liujinbao1@xiaomi.com>
Description: It represents whether ATGC is on or off. The value is 1 which
indicates that ATGC is on, and 0 indicates that it is off.
What: /sys/fs/f2fs/<disk>/gc_reclaimed_segments
Date: July 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
@ -763,3 +769,53 @@ Date: November 2023
Contact: "Chao Yu" <chao@kernel.org>
Description: It controls to enable/disable IO aware feature for background discard.
By default, the value is 1 which indicates IO aware is on.
What: /sys/fs/f2fs/<disk>/blkzone_alloc_policy
Date: July 2024
Contact: "Yuanhong Liao" <liaoyuanhong@vivo.com>
Description: The zone UFS we are currently using consists of two parts:
conventional zones and sequential zones. It can be used to control which part
to prioritize for writes, with a default value of 0.
======================== =========================================
value description
blkzone_alloc_policy = 0 Prioritize writing to sequential zones
blkzone_alloc_policy = 1 Only allow writing to sequential zones
blkzone_alloc_policy = 2 Prioritize writing to conventional zones
======================== =========================================
What: /sys/fs/f2fs/<disk>/migration_window_granularity
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Controls migration window granularity of garbage collection on large
section. it can control the scanning window granularity for GC migration
in a unit of segment, while migration_granularity controls the number
of segments which can be migrated at the same turn.
What: /sys/fs/f2fs/<disk>/reserved_segments
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: In order to fine tune GC behavior, we can control the number of
reserved segments.
What: /sys/fs/f2fs/<disk>/gc_no_zoned_gc_percent
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: If the percentage of free sections over total sections is above this
number, F2FS do not garbage collection for zoned devices through the
background GC thread. the default number is "60".
What: /sys/fs/f2fs/<disk>/gc_boost_zoned_gc_percent
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: If the percentage of free sections over total sections is under this
number, F2FS boosts garbage collection for zoned devices through the
background GC thread. the default number is "25".
What: /sys/fs/f2fs/<disk>/gc_valid_thresh_ratio
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: It controls the valid block ratio threshold not to trigger excessive GC
for zoned deivces. The initial value of it is 95(%). F2FS will stop the
background GC thread from intiating GC for sections having valid blocks
exceeding the ratio.

View File

@ -52,7 +52,7 @@ driver generally needs to perform the following initialization:
- Enable DMA/processing engines
When done using the device, and perhaps the module needs to be unloaded,
the driver needs to take the follow steps:
the driver needs to take the following steps:
- Disable the device from generating IRQs
- Release the IRQ (free_irq())

View File

@ -921,10 +921,10 @@ This portion of the ``rcu_data`` structure is declared as follows:
::
1 int dynticks_snap;
1 int watching_snap;
2 unsigned long dynticks_fqs;
The ``->dynticks_snap`` field is used to take a snapshot of the
The ``->watching_snap`` field is used to take a snapshot of the
corresponding CPU's dyntick-idle state when forcing quiescent states,
and is therefore accessed from other CPUs. Finally, the
``->dynticks_fqs`` field is used to count the number of times this CPU
@ -935,8 +935,8 @@ This portion of the rcu_data structure is declared as follows:
::
1 long dynticks_nesting;
2 long dynticks_nmi_nesting;
1 long nesting;
2 long nmi_nesting;
3 atomic_t dynticks;
4 bool rcu_need_heavy_qs;
5 bool rcu_urgent_qs;
@ -945,14 +945,14 @@ These fields in the rcu_data structure maintain the per-CPU dyntick-idle
state for the corresponding CPU. The fields may be accessed only from
the corresponding CPU (and from tracing) unless otherwise stated.
The ``->dynticks_nesting`` field counts the nesting depth of process
The ``->nesting`` field counts the nesting depth of process
execution, so that in normal circumstances this counter has value zero
or one. NMIs, irqs, and tracers are counted by the
``->dynticks_nmi_nesting`` field. Because NMIs cannot be masked, changes
``->nmi_nesting`` field. Because NMIs cannot be masked, changes
to this variable have to be undertaken carefully using an algorithm
provided by Andy Lutomirski. The initial transition from idle adds one,
and nested transitions add two, so that a nesting level of five is
represented by a ``->dynticks_nmi_nesting`` value of nine. This counter
represented by a ``->nmi_nesting`` value of nine. This counter
can therefore be thought of as counting the number of reasons why this
CPU cannot be permitted to enter dyntick-idle mode, aside from
process-level transitions.
@ -960,12 +960,12 @@ process-level transitions.
However, it turns out that when running in non-idle kernel context, the
Linux kernel is fully capable of entering interrupt handlers that never
exit and perhaps also vice versa. Therefore, whenever the
``->dynticks_nesting`` field is incremented up from zero, the
``->dynticks_nmi_nesting`` field is set to a large positive number, and
whenever the ``->dynticks_nesting`` field is decremented down to zero,
the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that
``->nesting`` field is incremented up from zero, the
``->nmi_nesting`` field is set to a large positive number, and
whenever the ``->nesting`` field is decremented down to zero,
the ``->nmi_nesting`` field is set to zero. Assuming that
the number of misnested interrupts is not sufficient to overflow the
counter, this approach corrects the ``->dynticks_nmi_nesting`` field
counter, this approach corrects the ``->nmi_nesting`` field
every time the corresponding CPU enters the idle loop from process
context.
@ -992,8 +992,8 @@ code.
+-----------------------------------------------------------------------+
| **Quick Quiz**: |
+-----------------------------------------------------------------------+
| Why not simply combine the ``->dynticks_nesting`` and |
| ``->dynticks_nmi_nesting`` counters into a single counter that just |
| Why not simply combine the ``->nesting`` and |
| ``->nmi_nesting`` counters into a single counter that just |
| counts the number of reasons that the corresponding CPU is non-idle? |
+-----------------------------------------------------------------------+
| **Answer**: |

View File

@ -147,10 +147,10 @@ RCU read-side critical sections preceding and following the current
idle sojourn.
This case is handled by calls to the strongly ordered
``atomic_add_return()`` read-modify-write atomic operation that
is invoked within ``rcu_dynticks_eqs_enter()`` at idle-entry
time and within ``rcu_dynticks_eqs_exit()`` at idle-exit time.
The grace-period kthread invokes first ``ct_dynticks_cpu_acquire()``
(preceded by a full memory barrier) and ``rcu_dynticks_in_eqs_since()``
is invoked within ``ct_kernel_exit_state()`` at idle-entry
time and within ``ct_kernel_enter_state()`` at idle-exit time.
The grace-period kthread invokes first ``ct_rcu_watching_cpu_acquire()``
(preceded by a full memory barrier) and ``rcu_watching_snap_stopped_since()``
(both of which rely on acquire semantics) to detect idle CPUs.
+-----------------------------------------------------------------------+

View File

@ -528,7 +528,7 @@
font-style="normal"
y="-8652.5312"
x="2466.7822"
xml:space="preserve">dyntick_save_progress_counter()</text>
xml:space="preserve">rcu_watching_snap_save()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-2-7-2-0"
@ -537,7 +537,7 @@
font-style="normal"
y="-8368.1475"
x="2463.3262"
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
xml:space="preserve">rcu_watching_snap_recheck()</text>
</g>
<g
id="g4504"
@ -607,7 +607,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@ -638,7 +638,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

@ -844,7 +844,7 @@
font-style="normal"
y="1547.8876"
x="4417.6396"
xml:space="preserve">dyntick_save_progress_counter()</text>
xml:space="preserve">rcu_watching_snap_save()</text>
<g
style="fill:none;stroke-width:0.025in"
transform="translate(6501.9719,-10685.904)"
@ -899,7 +899,7 @@
font-style="normal"
y="1858.8729"
x="4414.1836"
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
xml:space="preserve"
x="14659.87"
@ -977,7 +977,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@ -1008,7 +1008,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 50 KiB

View File

@ -2974,7 +2974,7 @@
font-style="normal"
y="38114.047"
x="-334.33856"
xml:space="preserve">dyntick_save_progress_counter()</text>
xml:space="preserve">rcu_watching_snap_save()</text>
<g
style="fill:none;stroke-width:0.025in"
transform="translate(1749.9916,25880.249)"
@ -3029,7 +3029,7 @@
font-style="normal"
y="38425.035"
x="-337.79462"
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
xml:space="preserve"
x="9907.8887"
@ -3107,7 +3107,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@ -3138,7 +3138,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"

Before

Width:  |  Height:  |  Size: 208 KiB

After

Width:  |  Height:  |  Size: 208 KiB

View File

@ -516,7 +516,7 @@
font-style="normal"
y="-8652.5312"
x="2466.7822"
xml:space="preserve">dyntick_save_progress_counter()</text>
xml:space="preserve">rcu_watching_snap_save()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-2-7-2-0"
@ -525,7 +525,7 @@
font-style="normal"
y="-8368.1475"
x="2463.3262"
xml:space="preserve">rcu_implicit_dynticks_qs()</text>
xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
sodipodi:linespacing="125%"
style="font-size:192px;font-style:normal;font-weight:bold;line-height:125%;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -2649,8 +2649,7 @@ those that are idle from RCU's perspective) and then Tasks Rude RCU can
be removed from the kernel.
The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
and rcu_barrier_tasks_rude().
consisting solely of synchronize_rcu_tasks_rude().
Tasks Trace RCU
~~~~~~~~~~~~~~~

View File

@ -194,14 +194,13 @@ over a rather long period of time, but improvements are always welcome!
when publicizing a pointer to a structure that can
be traversed by an RCU read-side critical section.
5. If any of call_rcu(), call_srcu(), call_rcu_tasks(),
call_rcu_tasks_rude(), or call_rcu_tasks_trace() is used,
the callback function may be invoked from softirq context,
and in any case with bottom halves disabled. In particular,
this callback function cannot block. If you need the callback
to block, run that code in a workqueue handler scheduled from
the callback. The queue_rcu_work() function does this for you
in the case of call_rcu().
5. If any of call_rcu(), call_srcu(), call_rcu_tasks(), or
call_rcu_tasks_trace() is used, the callback function may be
invoked from softirq context, and in any case with bottom halves
disabled. In particular, this callback function cannot block.
If you need the callback to block, run that code in a workqueue
handler scheduled from the callback. The queue_rcu_work()
function does this for you in the case of call_rcu().
6. Since synchronize_rcu() can block, it cannot be called
from any sort of irq context. The same rule applies
@ -254,10 +253,10 @@ over a rather long period of time, but improvements are always welcome!
corresponding readers must use rcu_read_lock_trace()
and rcu_read_unlock_trace().
c. If an updater uses call_rcu_tasks_rude() or
synchronize_rcu_tasks_rude(), then the corresponding
readers must use anything that disables preemption,
for example, preempt_disable() and preempt_enable().
c. If an updater uses synchronize_rcu_tasks_rude(),
then the corresponding readers must use anything that
disables preemption, for example, preempt_disable()
and preempt_enable().
Mixing things up will result in confusion and broken kernels, and
has even resulted in an exploitable security issue. Therefore,
@ -326,11 +325,9 @@ over a rather long period of time, but improvements are always welcome!
d. Periodically invoke rcu_barrier(), permitting a limited
number of updates per grace period.
The same cautions apply to call_srcu(), call_rcu_tasks(),
call_rcu_tasks_rude(), and call_rcu_tasks_trace(). This is
why there is an srcu_barrier(), rcu_barrier_tasks(),
rcu_barrier_tasks_rude(), and rcu_barrier_tasks_rude(),
respectively.
The same cautions apply to call_srcu(), call_rcu_tasks(), and
call_rcu_tasks_trace(). This is why there is an srcu_barrier(),
rcu_barrier_tasks(), and rcu_barrier_tasks_trace(), respectively.
Note that although these primitives do take action to avoid
memory exhaustion when any given CPU has too many callbacks,
@ -383,17 +380,17 @@ over a rather long period of time, but improvements are always welcome!
must use whatever locking or other synchronization is required
to safely access and/or modify that data structure.
Do not assume that RCU callbacks will be executed on
the same CPU that executed the corresponding call_rcu(),
call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(), or
call_rcu_tasks_trace(). For example, if a given CPU goes offline
while having an RCU callback pending, then that RCU callback
will execute on some surviving CPU. (If this was not the case,
a self-spawning RCU callback would prevent the victim CPU from
ever going offline.) Furthermore, CPUs designated by rcu_nocbs=
might well *always* have their RCU callbacks executed on some
other CPUs, in fact, for some real-time workloads, this is the
whole point of using the rcu_nocbs= kernel boot parameter.
Do not assume that RCU callbacks will be executed on the same
CPU that executed the corresponding call_rcu(), call_srcu(),
call_rcu_tasks(), or call_rcu_tasks_trace(). For example, if
a given CPU goes offline while having an RCU callback pending,
then that RCU callback will execute on some surviving CPU.
(If this was not the case, a self-spawning RCU callback would
prevent the victim CPU from ever going offline.) Furthermore,
CPUs designated by rcu_nocbs= might well *always* have their
RCU callbacks executed on some other CPUs, in fact, for some
real-time workloads, this is the whole point of using the
rcu_nocbs= kernel boot parameter.
In addition, do not assume that callbacks queued in a given order
will be invoked in that order, even if they all are queued on the
@ -507,9 +504,9 @@ over a rather long period of time, but improvements are always welcome!
These debugging aids can help you find problems that are
otherwise extremely difficult to spot.
17. If you pass a callback function defined within a module to one of
call_rcu(), call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(),
or call_rcu_tasks_trace(), then it is necessary to wait for all
17. If you pass a callback function defined within a module
to one of call_rcu(), call_srcu(), call_rcu_tasks(), or
call_rcu_tasks_trace(), then it is necessary to wait for all
pending callbacks to be invoked before unloading that module.
Note that it is absolutely *not* sufficient to wait for a grace
period! For example, synchronize_rcu() implementation is *not*
@ -522,7 +519,6 @@ over a rather long period of time, but improvements are always welcome!
- call_rcu() -> rcu_barrier()
- call_srcu() -> srcu_barrier()
- call_rcu_tasks() -> rcu_barrier_tasks()
- call_rcu_tasks_rude() -> rcu_barrier_tasks_rude()
- call_rcu_tasks_trace() -> rcu_barrier_tasks_trace()
However, these barrier functions are absolutely *not* guaranteed
@ -539,7 +535,6 @@ over a rather long period of time, but improvements are always welcome!
- Either synchronize_srcu() or synchronize_srcu_expedited(),
together with and srcu_barrier()
- synchronize_rcu_tasks() and rcu_barrier_tasks()
- synchronize_tasks_rude() and rcu_barrier_tasks_rude()
- synchronize_tasks_trace() and rcu_barrier_tasks_trace()
If necessary, you can use something like workqueues to execute

View File

@ -1103,7 +1103,7 @@ RCU-Tasks-Rude::
Critical sections Grace period Barrier
N/A call_rcu_tasks_rude rcu_barrier_tasks_rude
N/A N/A
synchronize_rcu_tasks_rude

View File

@ -93,7 +93,7 @@ commands (does not impact QAIC).
uAPI
====
QAIC creates an accel device per phsyical PCIe device. This accel device exists
QAIC creates an accel device per physical PCIe device. This accel device exists
for as long as the PCIe device is known to Linux.
The PCIe device may not be in the state to accept requests from userspace at

View File

@ -47,3 +47,4 @@ subdirectories.
tomoyo
Yama
SafeSetID
ipe

View File

@ -0,0 +1,790 @@
.. SPDX-License-Identifier: GPL-2.0
Integrity Policy Enforcement (IPE)
==================================
.. NOTE::
This is the documentation for admins, system builders, or individuals
attempting to use IPE. If you're looking for more developer-focused
documentation about IPE please see :doc:`the design docs </security/ipe>`.
Overview
--------
Integrity Policy Enforcement (IPE) is a Linux Security Module that takes a
complementary approach to access control. Unlike traditional access control
mechanisms that rely on labels and paths for decision-making, IPE focuses
on the immutable security properties inherent to system components. These
properties are fundamental attributes or features of a system component
that cannot be altered, ensuring a consistent and reliable basis for
security decisions.
To elaborate, in the context of IPE, system components primarily refer to
files or the devices these files reside on. However, this is just a
starting point. The concept of system components is flexible and can be
extended to include new elements as the system evolves. The immutable
properties include the origin of a file, which remains constant and
unchangeable over time. For example, IPE policies can be crafted to trust
files originating from the initramfs. Since initramfs is typically verified
by the bootloader, its files are deemed trustworthy; "file is from
initramfs" becomes an immutable property under IPE's consideration.
The immutable property concept extends to the security features enabled on
a file's origin, such as dm-verity or fs-verity, which provide a layer of
integrity and trust. For example, IPE allows the definition of policies
that trust files from a dm-verity protected device. dm-verity ensures the
integrity of an entire device by providing a verifiable and immutable state
of its contents. Similarly, fs-verity offers filesystem-level integrity
checks, allowing IPE to enforce policies that trust files protected by
fs-verity. These two features cannot be turned off once established, so
they are considered immutable properties. These examples demonstrate how
IPE leverages immutable properties, such as a file's origin and its
integrity protection mechanisms, to make access control decisions.
For the IPE policy, specifically, it grants the ability to enforce
stringent access controls by assessing security properties against
reference values defined within the policy. This assessment can be based on
the existence of a security property (e.g., verifying if a file originates
from initramfs) or evaluating the internal state of an immutable security
property. The latter includes checking the roothash of a dm-verity
protected device, determining whether dm-verity possesses a valid
signature, assessing the digest of a fs-verity protected file, or
determining whether fs-verity possesses a valid built-in signature. This
nuanced approach to policy enforcement enables a highly secure and
customizable system defense mechanism, tailored to specific security
requirements and trust models.
To enable IPE, ensure that ``CONFIG_SECURITY_IPE`` (under
:menuselection:`Security -> Integrity Policy Enforcement (IPE)`) config
option is enabled.
Use Cases
---------
IPE works best in fixed-function devices: devices in which their purpose
is clearly defined and not supposed to be changed (e.g. network firewall
device in a data center, an IoT device, etcetera), where all software and
configuration is built and provisioned by the system owner.
IPE is a long-way off for use in general-purpose computing: the Linux
community as a whole tends to follow a decentralized trust model (known as
the web of trust), which IPE has no support for it yet. Instead, IPE
supports PKI (public key infrastructure), which generally designates a
set of trusted entities that provide a measure of absolute trust.
Additionally, while most packages are signed today, the files inside
the packages (for instance, the executables), tend to be unsigned. This
makes it difficult to utilize IPE in systems where a package manager is
expected to be functional, without major changes to the package manager
and ecosystem behind it.
The digest_cache LSM [#digest_cache_lsm]_ is a system that when combined with IPE,
could be used to enable and support general-purpose computing use cases.
Known Limitations
-----------------
IPE cannot verify the integrity of anonymous executable memory, such as
the trampolines created by gcc closures and libffi (<3.4.2), or JIT'd code.
Unfortunately, as this is dynamically generated code, there is no way
for IPE to ensure the integrity of this code to form a trust basis.
IPE cannot verify the integrity of programs written in interpreted
languages when these scripts are invoked by passing these program files
to the interpreter. This is because the way interpreters execute these
files; the scripts themselves are not evaluated as executable code
through one of IPE's hooks, but they are merely text files that are read
(as opposed to compiled executables) [#interpreters]_.
Threat Model
------------
IPE specifically targets the risk of tampering with user-space executable
code after the kernel has initially booted, including the kernel modules
loaded from userspace via ``modprobe`` or ``insmod``.
To illustrate, consider a scenario where an untrusted binary, possibly
malicious, is downloaded along with all necessary dependencies, including a
loader and libc. The primary function of IPE in this context is to prevent
the execution of such binaries and their dependencies.
IPE achieves this by verifying the integrity and authenticity of all
executable code before allowing them to run. It conducts a thorough
check to ensure that the code's integrity is intact and that they match an
authorized reference value (digest, signature, etc) as per the defined
policy. If a binary does not pass this verification process, either
because its integrity has been compromised or it does not meet the
authorization criteria, IPE will deny its execution. Additionally, IPE
generates audit logs which may be utilized to detect and analyze failures
resulting from policy violation.
Tampering threat scenarios include modification or replacement of
executable code by a range of actors including:
- Actors with physical access to the hardware
- Actors with local network access to the system
- Actors with access to the deployment system
- Compromised internal systems under external control
- Malicious end users of the system
- Compromised end users of the system
- Remote (external) compromise of the system
IPE does not mitigate threats arising from malicious but authorized
developers (with access to a signing certificate), or compromised
developer tools used by them (i.e. return-oriented programming attacks).
Additionally, IPE draws hard security boundary between userspace and
kernelspace. As a result, kernel-level exploits are considered outside
the scope of IPE and mitigation is left to other mechanisms.
Policy
------
IPE policy is a plain-text [#devdoc]_ policy composed of multiple statements
over several lines. There is one required line, at the top of the
policy, indicating the policy name, and the policy version, for
instance::
policy_name=Ex_Policy policy_version=0.0.0
The policy name is a unique key identifying this policy in a human
readable name. This is used to create nodes under securityfs as well as
uniquely identify policies to deploy new policies vs update existing
policies.
The policy version indicates the current version of the policy (NOT the
policy syntax version). This is used to prevent rollback of policy to
potentially insecure previous versions of the policy.
The next portion of IPE policy are rules. Rules are formed by key=value
pairs, known as properties. IPE rules require two properties: ``action``,
which determines what IPE does when it encounters a match against the
rule, and ``op``, which determines when the rule should be evaluated.
The ordering is significant, a rule must start with ``op``, and end with
``action``. Thus, a minimal rule is::
op=EXECUTE action=ALLOW
This example will allow any execution. Additional properties are used to
assess immutable security properties about the files being evaluated.
These properties are intended to be descriptions of systems within the
kernel that can provide a measure of integrity verification, such that IPE
can determine the trust of the resource based on the value of the property.
Rules are evaluated top-to-bottom. As a result, any revocation rules,
or denies should be placed early in the file to ensure that these rules
are evaluated before a rule with ``action=ALLOW``.
IPE policy supports comments. The character '#' will function as a
comment, ignoring all characters to the right of '#' until the newline.
The default behavior of IPE evaluations can also be expressed in policy,
through the ``DEFAULT`` statement. This can be done at a global level,
or a per-operation level::
# Global
DEFAULT action=ALLOW
# Operation Specific
DEFAULT op=EXECUTE action=ALLOW
A default must be set for all known operations in IPE. If you want to
preserve older policies being compatible with newer kernels that can introduce
new operations, set a global default of ``ALLOW``, then override the
defaults on a per-operation basis (as above).
With configurable policy-based LSMs, there's several issues with
enforcing the configurable policies at startup, around reading and
parsing the policy:
1. The kernel *should* not read files from userspace, so directly reading
the policy file is prohibited.
2. The kernel command line has a character limit, and one kernel module
should not reserve the entire character limit for its own
configuration.
3. There are various boot loaders in the kernel ecosystem, so handing
off a memory block would be costly to maintain.
As a result, IPE has addressed this problem through a concept of a "boot
policy". A boot policy is a minimal policy which is compiled into the
kernel. This policy is intended to get the system to a state where
userspace is set up and ready to receive commands, at which point a more
complex policy can be deployed via securityfs. The boot policy can be
specified via ``SECURITY_IPE_BOOT_POLICY`` config option, which accepts
a path to a plain-text version of the IPE policy to apply. This policy
will be compiled into the kernel. If not specified, IPE will be disabled
until a policy is deployed and activated through securityfs.
Deploying Policies
~~~~~~~~~~~~~~~~~~
Policies can be deployed from userspace through securityfs. These policies
are signed through the PKCS#7 message format to enforce some level of
authorization of the policies (prohibiting an attacker from gaining
unconstrained root, and deploying an "allow all" policy). These
policies must be signed by a certificate that chains to the
``SYSTEM_TRUSTED_KEYRING``. With openssl, the policy can be signed by::
openssl smime -sign \
-in "$MY_POLICY" \
-signer "$MY_CERTIFICATE" \
-inkey "$MY_PRIVATE_KEY" \
-noattr \
-nodetach \
-nosmimecap \
-outform der \
-out "$MY_POLICY.p7b"
Deploying the policies is done through securityfs, through the
``new_policy`` node. To deploy a policy, simply cat the file into the
securityfs node::
cat "$MY_POLICY.p7b" > /sys/kernel/security/ipe/new_policy
Upon success, this will create one subdirectory under
``/sys/kernel/security/ipe/policies/``. The subdirectory will be the
``policy_name`` field of the policy deployed, so for the example above,
the directory will be ``/sys/kernel/security/ipe/policies/Ex_Policy``.
Within this directory, there will be seven files: ``pkcs7``, ``policy``,
``name``, ``version``, ``active``, ``update``, and ``delete``.
The ``pkcs7`` file is read-only. Reading it returns the raw PKCS#7 data
that was provided to the kernel, representing the policy. If the policy being
read is the boot policy, this will return ``ENOENT``, as it is not signed.
The ``policy`` file is read only. Reading it returns the PKCS#7 inner
content of the policy, which will be the plain text policy.
The ``active`` file is used to set a policy as the currently active policy.
This file is rw, and accepts a value of ``"1"`` to set the policy as active.
Since only a single policy can be active at one time, all other policies
will be marked inactive. The policy being marked active must have a policy
version greater or equal to the currently-running version.
The ``update`` file is used to update a policy that is already present
in the kernel. This file is write-only and accepts a PKCS#7 signed
policy. Two checks will always be performed on this policy: First, the
``policy_names`` must match with the updated version and the existing
version. Second the updated policy must have a policy version greater than
or equal to the currently-running version. This is to prevent rollback attacks.
The ``delete`` file is used to remove a policy that is no longer needed.
This file is write-only and accepts a value of ``1`` to delete the policy.
On deletion, the securityfs node representing the policy will be removed.
However, delete the current active policy is not allowed and will return
an operation not permitted error.
Similarly, writing to both ``update`` and ``new_policy`` could result in
bad message(policy syntax error) or file exists error. The latter error happens
when trying to deploy a policy with a ``policy_name`` while the kernel already
has a deployed policy with the same ``policy_name``.
Deploying a policy will *not* cause IPE to start enforcing the policy. IPE will
only enforce the policy marked active. Note that only one policy can be active
at a time.
Once deployment is successful, the policy can be activated, by writing file
``/sys/kernel/security/ipe/policies/$policy_name/active``.
For example, the ``Ex_Policy`` can be activated by::
echo 1 > "/sys/kernel/security/ipe/policies/Ex_Policy/active"
From above point on, ``Ex_Policy`` is now the enforced policy on the
system.
IPE also provides a way to delete policies. This can be done via the
``delete`` securityfs node,
``/sys/kernel/security/ipe/policies/$policy_name/delete``.
Writing ``1`` to that file deletes the policy::
echo 1 > "/sys/kernel/security/ipe/policies/$policy_name/delete"
There is only one requirement to delete a policy: the policy being deleted
must be inactive.
.. NOTE::
If a traditional MAC system is enabled (SELinux, apparmor, smack), all
writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
Modes
~~~~~
IPE supports two modes of operation: permissive (similar to SELinux's
permissive mode) and enforced. In permissive mode, all events are
checked and policy violations are logged, but the policy is not really
enforced. This allows users to test policies before enforcing them.
The default mode is enforce, and can be changed via the kernel command
line parameter ``ipe.enforce=(0|1)``, or the securityfs node
``/sys/kernel/security/ipe/enforce``.
.. NOTE::
If a traditional MAC system is enabled (SELinux, apparmor, smack, etcetera),
all writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
Audit Events
~~~~~~~~~~~~
1420 AUDIT_IPE_ACCESS
^^^^^^^^^^^^^^^^^^^^^
Event Examples::
type=1420 audit(1653364370.067:61): ipe_op=EXECUTE ipe_hook=MMAP enforcing=1 pid=2241 comm="ld-linux.so" path="/deny/lib/libc.so.6" dev="sda2" ino=14549020 rule="DEFAULT action=DENY"
type=1300 audit(1653364370.067:61): SYSCALL arch=c000003e syscall=9 success=no exit=-13 a0=7f1105a28000 a1=195000 a2=5 a3=812 items=0 ppid=2219 pid=2241 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=2 comm="ld-linux.so" exe="/tmp/ipe-test/lib/ld-linux.so" subj=unconfined key=(null)
type=1327 audit(1653364370.067:61): 707974686F6E3300746573742F6D61696E2E7079002D6E00
type=1420 audit(1653364735.161:64): ipe_op=EXECUTE ipe_hook=MMAP enforcing=1 pid=2472 comm="mmap_test" path=? dev=? ino=? rule="DEFAULT action=DENY"
type=1300 audit(1653364735.161:64): SYSCALL arch=c000003e syscall=9 success=no exit=-13 a0=0 a1=1000 a2=4 a3=21 items=0 ppid=2219 pid=2472 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=2 comm="mmap_test" exe="/root/overlake_test/upstream_test/vol_fsverity/bin/mmap_test" subj=unconfined key=(null)
type=1327 audit(1653364735.161:64): 707974686F6E3300746573742F6D61696E2E7079002D6E00
This event indicates that IPE made an access control decision; the IPE
specific record (1420) is always emitted in conjunction with a
``AUDITSYSCALL`` record.
Determining whether IPE is in permissive or enforced mode can be derived
from ``success`` property and exit code of the ``AUDITSYSCALL`` record.
Field descriptions:
+-----------+------------+-----------+---------------------------------------------------------------------------------+
| Field | Value Type | Optional? | Description of Value |
+===========+============+===========+=================================================================================+
| ipe_op | string | No | The IPE operation name associated with the log |
+-----------+------------+-----------+---------------------------------------------------------------------------------+
| ipe_hook | string | No | The name of the LSM hook that triggered the IPE event |
+-----------+------------+-----------+---------------------------------------------------------------------------------+
| enforcing | integer | No | The current IPE enforcing state 1 is in enforcing mode, 0 is in permissive mode |
+-----------+------------+-----------+---------------------------------------------------------------------------------+
| pid | integer | No | The pid of the process that triggered the IPE event. |
+-----------+------------+-----------+---------------------------------------------------------------------------------+
| comm | string | No | The command line program name of the process that triggered the IPE event |
+-----------+------------+-----------+---------------------------------------------------------------------------------+
| path | string | Yes | The absolute path to the evaluated file |
+-----------+------------+-----------+---------------------------------------------------------------------------------+
| ino | integer | Yes | The inode number of the evaluated file |
+-----------+------------+-----------+---------------------------------------------------------------------------------+
| dev | string | Yes | The device name of the evaluated file, e.g. vda |
+-----------+------------+-----------+---------------------------------------------------------------------------------+
| rule | string | No | The matched policy rule |
+-----------+------------+-----------+---------------------------------------------------------------------------------+
1421 AUDIT_IPE_CONFIG_CHANGE
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Event Example::
type=1421 audit(1653425583.136:54): old_active_pol_name="Allow_All" old_active_pol_version=0.0.0 old_policy_digest=sha256:E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855 new_active_pol_name="boot_verified" new_active_pol_version=0.0.0 new_policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F26765076DD8EED7B8F4DB auid=4294967295 ses=4294967295 lsm=ipe res=1
type=1300 audit(1653425583.136:54): SYSCALL arch=c000003e syscall=1 success=yes exit=2 a0=3 a1=5596fcae1fb0 a2=2 a3=2 items=0 ppid=184 pid=229 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=4294967295 comm="python3" exe="/usr/bin/python3.10" key=(null)
type=1327 audit(1653425583.136:54): PROCTITLE proctitle=707974686F6E3300746573742F6D61696E2E7079002D66002E2
This event indicates that IPE switched the active poliy from one to another
along with the version and the hash digest of the two policies.
Note IPE can only have one policy active at a time, all access decision
evaluation is based on the current active policy.
The normal procedure to deploy a new policy is loading the policy to deploy
into the kernel first, then switch the active policy to it.
This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
Field descriptions:
+------------------------+------------+-----------+---------------------------------------------------+
| Field | Value Type | Optional? | Description of Value |
+========================+============+===========+===================================================+
| old_active_pol_name | string | Yes | The name of previous active policy |
+------------------------+------------+-----------+---------------------------------------------------+
| old_active_pol_version | string | Yes | The version of previous active policy |
+------------------------+------------+-----------+---------------------------------------------------+
| old_policy_digest | string | Yes | The hash of previous active policy |
+------------------------+------------+-----------+---------------------------------------------------+
| new_active_pol_name | string | No | The name of current active policy |
+------------------------+------------+-----------+---------------------------------------------------+
| new_active_pol_version | string | No | The version of current active policy |
+------------------------+------------+-----------+---------------------------------------------------+
| new_policy_digest | string | No | The hash of current active policy |
+------------------------+------------+-----------+---------------------------------------------------+
| auid | integer | No | The login user ID |
+------------------------+------------+-----------+---------------------------------------------------+
| ses | integer | No | The login session ID |
+------------------------+------------+-----------+---------------------------------------------------+
| lsm | string | No | The lsm name associated with the event |
+------------------------+------------+-----------+---------------------------------------------------+
| res | integer | No | The result of the audited operation(success/fail) |
+------------------------+------------+-----------+---------------------------------------------------+
1422 AUDIT_IPE_POLICY_LOAD
^^^^^^^^^^^^^^^^^^^^^^^^^^
Event Example::
type=1422 audit(1653425529.927:53): policy_name="boot_verified" policy_version=0.0.0 policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F26765076DD8EED7B8F4DB auid=4294967295 ses=4294967295 lsm=ipe res=1
type=1300 audit(1653425529.927:53): arch=c000003e syscall=1 success=yes exit=2567 a0=3 a1=5596fcae1fb0 a2=a07 a3=2 items=0 ppid=184 pid=229 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=4294967295 comm="python3" exe="/usr/bin/python3.10" key=(null)
type=1327 audit(1653425529.927:53): PROCTITLE proctitle=707974686F6E3300746573742F6D61696E2E7079002D66002E2E
This record indicates a new policy has been loaded into the kernel with the policy name, policy version and policy hash.
This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
Field descriptions:
+----------------+------------+-----------+---------------------------------------------------+
| Field | Value Type | Optional? | Description of Value |
+================+============+===========+===================================================+
| policy_name | string | No | The policy_name |
+----------------+------------+-----------+---------------------------------------------------+
| policy_version | string | No | The policy_version |
+----------------+------------+-----------+---------------------------------------------------+
| policy_digest | string | No | The policy hash |
+----------------+------------+-----------+---------------------------------------------------+
| auid | integer | No | The login user ID |
+----------------+------------+-----------+---------------------------------------------------+
| ses | integer | No | The login session ID |
+----------------+------------+-----------+---------------------------------------------------+
| lsm | string | No | The lsm name associated with the event |
+----------------+------------+-----------+---------------------------------------------------+
| res | integer | No | The result of the audited operation(success/fail) |
+----------------+------------+-----------+---------------------------------------------------+
1404 AUDIT_MAC_STATUS
^^^^^^^^^^^^^^^^^^^^^
Event Examples::
type=1404 audit(1653425689.008:55): enforcing=0 old_enforcing=1 auid=4294967295 ses=4294967295 enabled=1 old-enabled=1 lsm=ipe res=1
type=1300 audit(1653425689.008:55): arch=c000003e syscall=1 success=yes exit=2 a0=1 a1=55c1065e5c60 a2=2 a3=0 items=0 ppid=405 pid=441 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=)
type=1327 audit(1653425689.008:55): proctitle="-bash"
type=1404 audit(1653425689.008:55): enforcing=1 old_enforcing=0 auid=4294967295 ses=4294967295 enabled=1 old-enabled=1 lsm=ipe res=1
type=1300 audit(1653425689.008:55): arch=c000003e syscall=1 success=yes exit=2 a0=1 a1=55c1065e5c60 a2=2 a3=0 items=0 ppid=405 pid=441 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=)
type=1327 audit(1653425689.008:55): proctitle="-bash"
This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
Field descriptions:
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
| Field | Value Type | Optional? | Description of Value |
+===============+============+===========+=================================================================================================+
| enforcing | integer | No | The enforcing state IPE is being switched to, 1 is in enforcing mode, 0 is in permissive mode |
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
| old_enforcing | integer | No | The enforcing state IPE is being switched from, 1 is in enforcing mode, 0 is in permissive mode |
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
| auid | integer | No | The login user ID |
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
| ses | integer | No | The login session ID |
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
| enabled | integer | No | The new TTY audit enabled setting |
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
| old-enabled | integer | No | The old TTY audit enabled setting |
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
| lsm | string | No | The lsm name associated with the event |
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
| res | integer | No | The result of the audited operation(success/fail) |
+---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
Success Auditing
^^^^^^^^^^^^^^^^
IPE supports success auditing. When enabled, all events that pass IPE
policy and are not blocked will emit an audit event. This is disabled by
default, and can be enabled via the kernel command line
``ipe.success_audit=(0|1)`` or
``/sys/kernel/security/ipe/success_audit`` securityfs file.
This is *very* noisy, as IPE will check every userspace binary on the
system, but is useful for debugging policies.
.. NOTE::
If a traditional MAC system is enabled (SELinux, apparmor, smack, etcetera),
all writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
Properties
----------
As explained above, IPE properties are ``key=value`` pairs expressed in IPE
policy. Two properties are built-into the policy parser: 'op' and 'action'.
The other properties are used to restrict immutable security properties
about the files being evaluated. Currently those properties are:
'``boot_verified``', '``dmverity_signature``', '``dmverity_roothash``',
'``fsverity_signature``', '``fsverity_digest``'. A description of all
properties supported by IPE are listed below:
op
~~
Indicates the operation for a rule to apply to. Must be in every rule,
as the first token. IPE supports the following operations:
``EXECUTE``
Pertains to any file attempting to be executed, or loaded as an
executable.
``FIRMWARE``:
Pertains to firmware being loaded via the firmware_class interface.
This covers both the preallocated buffer and the firmware file
itself.
``KMODULE``:
Pertains to loading kernel modules via ``modprobe`` or ``insmod``.
``KEXEC_IMAGE``:
Pertains to kernel images loading via ``kexec``.
``KEXEC_INITRAMFS``
Pertains to initrd images loading via ``kexec --initrd``.
``POLICY``:
Controls loading policies via reading a kernel-space initiated read.
An example of such is loading IMA policies by writing the path
to the policy file to ``$securityfs/ima/policy``
``X509_CERT``:
Controls loading IMA certificates through the Kconfigs,
``CONFIG_IMA_X509_PATH`` and ``CONFIG_EVM_X509_PATH``.
action
~~~~~~
Determines what IPE should do when a rule matches. Must be in every
rule, as the final clause. Can be one of:
``ALLOW``:
If the rule matches, explicitly allow access to the resource to proceed
without executing any more rules.
``DENY``:
If the rule matches, explicitly prohibit access to the resource to
proceed without executing any more rules.
boot_verified
~~~~~~~~~~~~~
This property can be utilized for authorization of files from initramfs.
The format of this property is::
boot_verified=(TRUE|FALSE)
.. WARNING::
This property will trust files from initramfs(rootfs). It should
only be used during early booting stage. Before mounting the real
rootfs on top of the initramfs, initramfs script will recursively
remove all files and directories on the initramfs. This is typically
implemented by using switch_root(8) [#switch_root]_. Therefore the
initramfs will be empty and not accessible after the real
rootfs takes over. It is advised to switch to a different policy
that doesn't rely on the property after this point.
This ensures that the trust policies remain relevant and effective
throughout the system's operation.
dmverity_roothash
~~~~~~~~~~~~~~~~~
This property can be utilized for authorization or revocation of
specific dm-verity volumes, identified via their root hashes. It has a
dependency on the DM_VERITY module. This property is controlled by
the ``IPE_PROP_DM_VERITY`` config option, it will be automatically
selected when ``SECURITY_IPE`` and ``DM_VERITY`` are all enabled.
The format of this property is::
dmverity_roothash=DigestName:HexadecimalString
The supported DigestNames for dmverity_roothash are [#dmveritydigests]_
+ blake2b-512
+ blake2s-256
+ sha256
+ sha384
+ sha512
+ sha3-224
+ sha3-256
+ sha3-384
+ sha3-512
+ sm3
+ rmd160
dmverity_signature
~~~~~~~~~~~~~~~~~~
This property can be utilized for authorization of all dm-verity
volumes that have a signed roothash that validated by a keyring
specified by dm-verity's configuration, either the system trusted
keyring, or the secondary keyring. It depends on
``DM_VERITY_VERIFY_ROOTHASH_SIG`` config option and is controlled by
the ``IPE_PROP_DM_VERITY_SIGNATURE`` config option, it will be automatically
selected when ``SECURITY_IPE``, ``DM_VERITY`` and
``DM_VERITY_VERIFY_ROOTHASH_SIG`` are all enabled.
The format of this property is::
dmverity_signature=(TRUE|FALSE)
fsverity_digest
~~~~~~~~~~~~~~~
This property can be utilized for authorization of specific fsverity
enabled files, identified via their fsverity digests.
It depends on ``FS_VERITY`` config option and is controlled by
the ``IPE_PROP_FS_VERITY`` config option, it will be automatically
selected when ``SECURITY_IPE`` and ``FS_VERITY`` are all enabled.
The format of this property is::
fsverity_digest=DigestName:HexadecimalString
The supported DigestNames for fsverity_digest are [#fsveritydigest]_
+ sha256
+ sha512
fsverity_signature
~~~~~~~~~~~~~~~~~~
This property is used to authorize all fs-verity enabled files that have
been verified by fs-verity's built-in signature mechanism. The signature
verification relies on a key stored within the ".fs-verity" keyring. It
depends on ``FS_VERITY_BUILTIN_SIGNATURES`` config option and
it is controlled by the ``IPE_PROP_FS_VERITY`` config option,
it will be automatically selected when ``SECURITY_IPE``, ``FS_VERITY``
and ``FS_VERITY_BUILTIN_SIGNATURES`` are all enabled.
The format of this property is::
fsverity_signature=(TRUE|FALSE)
Policy Examples
---------------
Allow all
~~~~~~~~~
::
policy_name=Allow_All policy_version=0.0.0
DEFAULT action=ALLOW
Allow only initramfs
~~~~~~~~~~~~~~~~~~~~
::
policy_name=Allow_Initramfs policy_version=0.0.0
DEFAULT action=DENY
op=EXECUTE boot_verified=TRUE action=ALLOW
Allow any signed and validated dm-verity volume and the initramfs
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
::
policy_name=Allow_Signed_DMV_And_Initramfs policy_version=0.0.0
DEFAULT action=DENY
op=EXECUTE boot_verified=TRUE action=ALLOW
op=EXECUTE dmverity_signature=TRUE action=ALLOW
Prohibit execution from a specific dm-verity volume
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
::
policy_name=Deny_DMV_By_Roothash policy_version=0.0.0
DEFAULT action=DENY
op=EXECUTE dmverity_roothash=sha256:cd2c5bae7c6c579edaae4353049d58eb5f2e8be0244bf05345bc8e5ed257baff action=DENY
op=EXECUTE boot_verified=TRUE action=ALLOW
op=EXECUTE dmverity_signature=TRUE action=ALLOW
Allow only a specific dm-verity volume
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
::
policy_name=Allow_DMV_By_Roothash policy_version=0.0.0
DEFAULT action=DENY
op=EXECUTE dmverity_roothash=sha256:401fcec5944823ae12f62726e8184407a5fa9599783f030dec146938 action=ALLOW
Allow any fs-verity file with a valid built-in signature
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
::
policy_name=Allow_Signed_And_Validated_FSVerity policy_version=0.0.0
DEFAULT action=DENY
op=EXECUTE fsverity_signature=TRUE action=ALLOW
Allow execution of a specific fs-verity file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
::
policy_name=ALLOW_FSV_By_Digest policy_version=0.0.0
DEFAULT action=DENY
op=EXECUTE fsverity_digest=sha256:fd88f2b8824e197f850bf4c5109bea5cf0ee38104f710843bb72da796ba5af9e action=ALLOW
Additional Information
----------------------
- `Github Repository <https://github.com/microsoft/ipe>`_
- :doc:`Developer and design docs for IPE </security/ipe>`
FAQ
---
Q:
What's the difference between other LSMs which provide a measure of
trust-based access control?
A:
In general, there's two other LSMs that can provide similar functionality:
IMA, and Loadpin.
IMA and IPE are functionally very similar. The significant difference between
the two is the policy. [#devdoc]_
Loadpin and IPE differ fairly dramatically, as Loadpin only covers the IPE's
kernel read operations, whereas IPE is capable of controlling execution
on top of kernel read. The trust model is also different; Loadpin roots its
trust in the initial super-block, whereas trust in IPE is stemmed from kernel
itself (via ``SYSTEM_TRUSTED_KEYS``).
-----------
.. [#digest_cache_lsm] https://lore.kernel.org/lkml/20240415142436.2545003-1-roberto.sassu@huaweicloud.com/
.. [#interpreters] There is `some interest in solving this issue <https://lore.kernel.org/lkml/20220321161557.495388-1-mic@digikod.net/>`_.
.. [#devdoc] Please see :doc:`the design docs </security/ipe>` for more on
this topic.
.. [#switch_root] https://man7.org/linux/man-pages/man8/switch_root.8.html
.. [#dmveritydigests] These hash algorithms are based on values accepted by
the Linux crypto API; IPE does not impose any
restrictions on the digest algorithm itself;
thus, this list may be out of date.
.. [#fsveritydigest] These hash algorithms are based on values accepted by the
kernel's fsverity support; IPE does not impose any
restrictions on the digest algorithm itself;
thus, this list may be out of date.

View File

@ -102,17 +102,41 @@ Examples::
#select lzo compression algorithm
echo lzo > /sys/block/zram0/comp_algorithm
For the time being, the `comp_algorithm` content does not necessarily
show every compression algorithm supported by the kernel. We keep this
list primarily to simplify device configuration and one can configure
a new device with a compression algorithm that is not listed in
`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API
and, if some of the algorithms were built as modules, it's impossible
to list all of them using, for instance, /proc/crypto or any other
method. This, however, has an advantage of permitting the usage of
custom crypto compression modules (implementing S/W or H/W compression).
For the time being, the `comp_algorithm` content shows only compression
algorithms that are supported by zram.
4) Set Disksize
4) Set compression algorithm parameters: Optional
=================================================
Compression algorithms may support specific parameters which can be
tweaked for particular dataset. ZRAM has an `algorithm_params` device
attribute which provides a per-algorithm params configuration.
For example, several compression algorithms support `level` parameter.
In addition, certain compression algorithms support pre-trained dictionaries,
which significantly change algorithms' characteristics. In order to configure
compression algorithm to use external pre-trained dictionary, pass full
path to the `dict` along with other parameters::
#pass path to pre-trained zstd dictionary
echo "algo=zstd dict=/etc/dictioary" > /sys/block/zram0/algorithm_params
#same, but using algorithm priority
echo "priority=1 dict=/etc/dictioary" > \
/sys/block/zram0/algorithm_params
#pass path to pre-trained zstd dictionary and compression level
echo "algo=zstd level=8 dict=/etc/dictioary" > \
/sys/block/zram0/algorithm_params
Parameters are algorithm specific: not all algorithms support pre-trained
dictionaries, not all algorithms support `level`. Furthermore, for certain
algorithms `level` controls the compression level (the higher the value the
better the compression ratio, it even can take negatives values for some
algorithms), for other algorithms `level` is acceleration level (the higher
the value the lower the compression ratio).
5) Set Disksize
===============
Set disk size by writing the value to sysfs node 'disksize'.
@ -132,7 +156,7 @@ There is little point creating a zram of greater than twice the size of memory
since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
size of the disk when not in use so a huge zram is wasteful.
5) Set memory limit: Optional
6) Set memory limit: Optional
=============================
Set memory limit by writing the value to sysfs node 'mem_limit'.
@ -151,7 +175,7 @@ Examples::
# To disable memory limit
echo 0 > /sys/block/zram0/mem_limit
6) Activate
7) Activate
===========
::
@ -162,7 +186,7 @@ Examples::
mkfs.ext4 /dev/zram1
mount /dev/zram1 /tmp
7) Add/remove zram devices
8) Add/remove zram devices
==========================
zram provides a control interface, which enables dynamic (on-demand) device
@ -182,7 +206,7 @@ execute::
echo X > /sys/class/zram-control/hot_remove
8) Stats
9) Stats
========
Per-device statistics are exported as various nodes under /sys/block/zram<id>/
@ -205,6 +229,7 @@ writeback_limit_enable RW show and set writeback_limit feature
max_comp_streams RW the number of possible concurrent compress
operations
comp_algorithm RW show and change the compression algorithm
algorithm_params WO setup compression algorithm parameters
compact WO trigger memory compaction
debug_stat RO this file is used for zram debugging purposes
backing_dev RW set up backend storage for zram to write out
@ -283,15 +308,15 @@ a single line of text and contains the following stats separated by whitespace:
Unit: 4K bytes
============== =============================================================
9) Deactivate
=============
10) Deactivate
==============
::
swapoff /dev/zram0
umount /dev/zram1
10) Reset
11) Reset
=========
Write any positive value to 'reset' sysfs node::
@ -487,11 +512,14 @@ registered compression algorithms, increases our chances of finding the
algorithm that successfully compresses a particular page. Sometimes, however,
it is convenient (and sometimes even necessary) to limit recompression to
only one particular algorithm so that it will not try any other algorithms.
This can be achieved by providing a algo=NAME parameter:::
This can be achieved by providing a `algo` or `priority` parameter:::
#use zstd algorithm only (if registered)
echo "type=huge algo=zstd" > /sys/block/zramX/recompress
#use zstd algorithm only (if zstd was registered under priority 1)
echo "type=huge priority=1" > /sys/block/zramX/recompress
memory tracking
===============

View File

@ -1,76 +1,144 @@
Bisecting a bug
+++++++++++++++
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
.. [see the bottom of this file for redistribution information]
Last updated: 28 October 2016
======================
Bisecting a regression
======================
Introduction
============
This document describes how to use a ``git bisect`` to find the source code
change that broke something -- for example when some functionality stopped
working after upgrading from Linux 6.0 to 6.1.
Always try the latest kernel from kernel.org and build from source. If you are
not confident in doing that please report the bug to your distribution vendor
instead of to a kernel developer.
The text focuses on the gist of the process. If you are new to bisecting the
kernel, better follow Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
instead: it depicts everything from start to finish while covering multiple
aspects even kernel developers occasionally forget. This includes detecting
situations early where a bisection would be a waste of time, as nobody would
care about the result -- for example, because the problem happens after the
kernel marked itself as 'tainted', occurs in an abandoned version, was already
fixed, or is caused by a .config change you or your Linux distributor performed.
Finding bugs is not always easy. Have a go though. If you can't find it don't
give up. Report as much as you have found to the relevant maintainer. See
MAINTAINERS for who that is for the subsystem you have worked on.
Finding the change causing a kernel issue using a bisection
===========================================================
Before you submit a bug report read
'Documentation/admin-guide/reporting-issues.rst'.
*Note: the following process assumes you prepared everything for a bisection.
This includes having a Git clone with the appropriate sources, installing the
software required to build and install kernels, as well as a .config file stored
in a safe place (the following example assumes '~/prepared_kernel_.config') to
use as pristine base at each bisection step; ideally, you have also worked out
a fully reliable and straight-forward way to reproduce the regression, too.*
Devices not appearing
=====================
* Preparation: start the bisection and tell Git about the points in the history
you consider to be working and broken, which Git calls 'good' and 'bad'::
Often this is caused by udev/systemd. Check that first before blaming it
on the kernel.
git bisect start
git bisect good v6.0
git bisect bad v6.1
Finding patch that caused a bug
===============================
Instead of Git tags like 'v6.0' and 'v6.1' you can specify commit-ids, too.
Using the provided tools with ``git`` makes finding bugs easy provided the bug
is reproducible.
1. Copy your prepared .config into the build directory and adjust it to the
needs of the codebase Git checked out for testing::
Steps to do it:
cp ~/prepared_kernel_.config .config
make olddefconfig
- build the Kernel from its git source
- start bisect with [#f1]_::
2. Now build, install, and boot a kernel. This might fail for unrelated reasons,
for example, when a compile error happens at the current stage of the
bisection a later change resolves. In such cases run ``git bisect skip`` and
go back to step 1.
$ git bisect start
3. Check if the functionality that regressed works in the kernel you just built.
- mark the broken changeset with::
If it works, execute::
$ git bisect bad [commit]
git bisect good
- mark a changeset where the code is known to work with::
If it is broken, run::
$ git bisect good [commit]
git bisect bad
- rebuild the Kernel and test
- interact with git bisect by using either::
Note, getting this wrong just once will send the rest of the bisection
totally off course. To prevent having to start anew later you thus want to
ensure what you tell Git is correct; it is thus often wise to spend a few
minutes more on testing in case your reproducer is unreliable.
$ git bisect good
After issuing one of these two commands, Git will usually check out another
bisection point and print something like 'Bisecting: 675 revisions left to
test after this (roughly 10 steps)'. In that case go back to step 1.
or::
If Git instead prints something like 'cafecaca0c0dacafecaca0c0dacafecaca0c0da
is the first bad commit', then you have finished the bisection. In that case
move to the next point below. Note, right after displaying that line Git will
show some details about the culprit including its patch description; this can
easily fill your terminal, so you might need to scroll up to see the message
mentioning the culprit's commit-id.
$ git bisect bad
In case you missed Git's output, you can always run ``git bisect log`` to
print the status: it will show how many steps remain or mention the result of
the bisection.
depending if the bug happened on the changeset you're testing
- After some interactions, git bisect will give you the changeset that
likely caused the bug.
* Recommended complementary task: put the bisection log and the current .config
file aside for the bug report; furthermore tell Git to reset the sources to
the state before the bisection::
- For example, if you know that the current version is bad, and version
4.8 is good, you could do::
git bisect log > ~/bisection-log
cp .config ~/bisection-config-culprit
git bisect reset
$ git bisect start
$ git bisect bad # Current version is bad
$ git bisect good v4.8
* Recommended optional task: try reverting the culprit on top of the latest
codebase and check if that fixes your bug; if that is the case, it validates
the bisection and enables developers to resolve the regression through a
revert.
To try this, update your clone and check out latest mainline. Then tell Git
to revert the change by specifying its commit-id::
git revert --no-edit cafec0cacaca0
Git might reject this, for example when the bisection landed on a merge
commit. In that case, abandon the attempt. Do the same, if Git fails to revert
the culprit on its own because later changes depend on it -- at least unless
you bisected a stable or longterm kernel series, in which case you want to
check out its latest codebase and try a revert there.
If a revert succeeds, build and test another kernel to check if reverting
resolved your regression.
With that the process is complete. Now report the regression as described by
Documentation/admin-guide/reporting-issues.rst.
.. [#f1] You can, optionally, provide both good and bad arguments at git
start with ``git bisect start [BAD] [GOOD]``
Additional reading material
---------------------------
For further references, please read:
* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
`fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
in the Git documentation.
* `Working with git bisect <https://nathanchance.dev/posts/working-with-git-bisect/>`_
from kernel developer Nathan Chancellor.
* `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_.
* `Fully automated bisecting with 'git bisect run' <https://lwn.net/Articles/317154>`_.
- The man page for ``git-bisect``
- `Fighting regressions with git bisect <https://www.kernel.org/pub/software/scm/git/docs/git-bisect-lk2009.html>`_
- `Fully automated bisecting with "git bisect run" <https://lwn.net/Articles/317154>`_
- `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_
..
end-of-content
..
This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
you spot a typo or small mistake, feel free to let him know directly and
he'll fix it. You are free to do the same in a mostly informal way if you
want to contribute changes to the text -- but for copyright reasons please CC
linux-doc@vger.kernel.org and 'sign-off' your contribution as
Documentation/process/submitting-patches.rst explains in the section 'Sign
your work - the Developer's Certificate of Origin'.
..
This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
of the file. If you want to distribute this text under CC-BY-4.0 only,
please use 'The Linux kernel development community' for author attribution
and link this as source:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/bug-bisect.rst
..
Note: Only the content of this RST file as found in the Linux kernel sources
is available under CC-BY-4.0, as versions of this text that were processed
(for example by the kernel's build system) might contain content taken from
files which use a more restrictive license.

View File

@ -244,14 +244,14 @@ Reporting the bug
Once you find where the bug happened, by inspecting its location,
you could either try to fix it yourself or report it upstream.
In order to report it upstream, you should identify the mailing list
used for the development of the affected code. This can be done by using
the ``get_maintainer.pl`` script.
In order to report it upstream, you should identify the bug tracker, if any, or
mailing list used for the development of the affected code. This can be done by
using the ``get_maintainer.pl`` script.
For example, if you find a bug at the gspca's sonixj.c file, you can get
its maintainers with::
$ ./scripts/get_maintainer.pl -f drivers/media/usb/gspca/sonixj.c
$ ./scripts/get_maintainer.pl --bug -f drivers/media/usb/gspca/sonixj.c
Hans Verkuil <hverkuil@xs4all.nl> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
Mauro Carvalho Chehab <mchehab@kernel.org> (maintainer:MEDIA INPUT INFRASTRUCTURE (V4L/DVB),commit_signer:1/1=100%)
Tejun Heo <tj@kernel.org> (commit_signer:1/1=100%)
@ -267,11 +267,12 @@ Please notice that it will point to:
- The driver maintainer (Hans Verkuil);
- The subsystem maintainer (Mauro Carvalho Chehab);
- The driver and/or subsystem mailing list (linux-media@vger.kernel.org);
- the Linux Kernel mailing list (linux-kernel@vger.kernel.org).
- The Linux Kernel mailing list (linux-kernel@vger.kernel.org);
- The bug reporting URIs for the driver/subsystem (none in the above example).
Usually, the fastest way to have your bug fixed is to report it to mailing
list used for the development of the code (linux-media ML) copying the
driver maintainer (Hans).
If the listing contains bug reporting URIs at the end, please prefer them over
email. Otherwise, please report bugs to the mailing list used for the
development of the code (linux-media ML) copying the driver maintainer (Hans).
If you are totally stumped as to whom to send the report, and
``get_maintainer.pl`` didn't provide you anything useful, send it to

View File

@ -78,18 +78,24 @@ Brief summary of control files.
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
memory.soft_limit_in_bytes set/show soft limit of memory usage
This knob is not available on CONFIG_PREEMPT_RT systems.
This knob is deprecated and shouldn't be
used.
memory.stat show various statistics
memory.use_hierarchy set/show hierarchical account enabled
This knob is deprecated and shouldn't be
used.
memory.force_empty trigger forced page reclaim
memory.pressure_level set memory pressure notifications
This knob is deprecated and shouldn't be
used.
memory.swappiness set/show swappiness parameter of vmscan
(See sysctl's vm.swappiness)
memory.move_charge_at_immigrate set/show controls of moving charges
This knob is deprecated and shouldn't be
used.
memory.oom_control set/show oom controls.
This knob is deprecated and shouldn't be
used.
memory.numa_stat show the number of memory usage per numa
node
memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel
@ -105,10 +111,18 @@ Brief summary of control files.
memory.kmem.max_usage_in_bytes show max kernel memory usage recorded
memory.kmem.tcp.limit_in_bytes set/show hard limit for tcp buf memory
This knob is deprecated and shouldn't be
used.
memory.kmem.tcp.usage_in_bytes show current tcp buf memory allocation
This knob is deprecated and shouldn't be
used.
memory.kmem.tcp.failcnt show the number of tcp buf memory usage
hits limits
This knob is deprecated and shouldn't be
used.
memory.kmem.tcp.max_usage_in_bytes show max tcp buf memory usage recorded
This knob is deprecated and shouldn't be
used.
==================================== ==========================================
1. History
@ -693,8 +707,10 @@ For compatibility reasons writing 1 to memory.use_hierarchy will always pass::
# echo 1 > memory.use_hierarchy
7. Soft limits
==============
7. Soft limits (DEPRECATED)
===========================
THIS IS DEPRECATED!
Soft limits allow for greater sharing of memory. The idea behind soft limits
is to allow control groups to use as much of the memory as needed, provided
@ -834,8 +850,10 @@ It's applicable for root and non-root cgroup.
.. _cgroup-v1-memory-oom-control:
10. OOM Control
===============
10. OOM Control (DEPRECATED)
============================
THIS IS DEPRECATED!
memory.oom_control file is for OOM notification and other controls.
@ -882,8 +900,10 @@ At reading, current status of OOM is shown.
The number of processes belonging to this cgroup killed by any
kind of OOM killer.
11. Memory Pressure
===================
11. Memory Pressure (DEPRECATED)
================================
THIS IS DEPRECATED!
The pressure level notifications can be used to monitor the memory
allocation cost; based on the pressure, applications can implement

View File

@ -533,10 +533,12 @@ cgroup namespace on namespace creation.
Because the resource control interface files in a given directory
control the distribution of the parent's resources, the delegatee
shouldn't be allowed to write to them. For the first method, this is
achieved by not granting access to these files. For the second, the
kernel rejects writes to all files other than "cgroup.procs" and
"cgroup.subtree_control" on a namespace root from inside the
namespace.
achieved by not granting access to these files. For the second, files
outside the namespace should be hidden from the delegatee by the means
of at least mount namespacing, and the kernel rejects writes to all
files on a namespace root from inside the cgroup namespace, except for
those files listed in "/sys/kernel/cgroup/delegate" (including
"cgroup.procs", "cgroup.threads", "cgroup.subtree_control", etc.).
The end results are equivalent for both delegation types. Once
delegated, the user can build sub-hierarchy under the directory,
@ -981,6 +983,14 @@ All cgroup core files are prefixed with "cgroup."
A dying cgroup can consume system resources not exceeding
limits, which were active at the moment of cgroup deletion.
nr_subsys_<cgroup_subsys>
Total number of live cgroup subsystems (e.g memory
cgroup) at and beneath the current cgroup.
nr_dying_subsys_<cgroup_subsys>
Total number of dying cgroup subsystems (e.g. memory
cgroup) at and beneath the current cgroup.
cgroup.freeze
A read-write single value file which exists on non-root cgroups.
Allowed values are "0" and "1". The default is "0".
@ -1333,11 +1343,14 @@ The following nested keys are defined.
all the existing limitations and potential future extensions.
memory.peak
A read-only single value file which exists on non-root
cgroups.
A read-write single value file which exists on non-root cgroups.
The max memory usage recorded for the cgroup and its
descendants since the creation of the cgroup.
The max memory usage recorded for the cgroup and its descendants since
either the creation of the cgroup or the most recent reset for that FD.
A write of any non-empty string to this file resets it to the
current memory usage for subsequent reads through the same
file descriptor.
memory.oom.group
A read-write single value file which exists on non-root
@ -1614,6 +1627,25 @@ The following nested keys are defined.
Usually because failed to allocate some continuous swap space
for the huge page.
numa_pages_migrated (npn)
Number of pages migrated by NUMA balancing.
numa_pte_updates (npn)
Number of pages whose page table entries are modified by
NUMA balancing to produce NUMA hinting faults on access.
numa_hint_faults (npn)
Number of NUMA hinting faults.
pgdemote_kswapd
Number of pages demoted by kswapd.
pgdemote_direct
Number of pages demoted directly.
pgdemote_khugepaged
Number of pages demoted by khugepaged.
memory.numa_stat
A read-only nested-keyed file which exists on non-root cgroups.
@ -1663,11 +1695,14 @@ The following nested keys are defined.
Healthy workloads are not expected to reach this limit.
memory.swap.peak
A read-only single value file which exists on non-root
cgroups.
A read-write single value file which exists on non-root cgroups.
The max swap usage recorded for the cgroup and its
descendants since the creation of the cgroup.
The max swap usage recorded for the cgroup and its descendants since
the creation of the cgroup or the most recent reset for that FD.
A write of any non-empty string to this file resets it to the
current memory usage for subsequent reads through the same
file descriptor.
memory.swap.max
A read-write single value file which exists on non-root
@ -1731,6 +1766,8 @@ The following nested keys are defined.
Note that this is subtly different from setting memory.swap.max to
0, as it still allows for pages to be written to the zswap pool.
This setting has no effect if zswap is disabled, and swapping
is allowed unless memory.swap.max is set to 0.
memory.pressure
A read-only nested-keyed file.
@ -2940,8 +2977,8 @@ Deprecated v1 Core Features
- "cgroup.clone_children" is removed.
- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" file
at the root instead.
- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" or
"cgroup.stat" files at the root instead.
Issues with v1 and Rationales for v2

View File

@ -3,29 +3,52 @@ dm-delay
========
Device-Mapper's "delay" target delays reads and/or writes
and maps them to different devices.
and/or flushs and optionally maps them to different devices.
Parameters::
Arguments::
<device> <offset> <delay> [<write_device> <write_offset> <write_delay>
[<flush_device> <flush_offset> <flush_delay>]]
With separate write parameters, the first set is only used for reads.
Table line has to either have 3, 6 or 9 arguments:
3: apply offset and delay to read, write and flush operations on device
6: apply offset and delay to device, also apply write_offset and write_delay
to write and flush operations on optionally different write_device with
optionally different sector offset
9: same as 6 arguments plus define flush_offset and flush_delay explicitely
on/with optionally different flush_device/flush_offset.
Offsets are specified in sectors.
Delays are specified in milliseconds.
Example scripts
===============
::
#!/bin/sh
# Create device delaying rw operation for 500ms
echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
#
# Create mapped device named "delayed" delaying read, write and flush operations for 500ms.
#
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 500"
::
#!/bin/sh
# Create device delaying only write operation for 500ms and
# splitting reads and writes to different devices $1 $2
echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
#
# Create mapped device delaying write and flush operations for 400ms and
# splitting reads to device $1 but writes and flushs to different device $2
# to different offsets of 2048 and 4096 sectors respectively.
#
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 2048 0 $2 4096 400"
::
#!/bin/sh
#
# Create mapped device delaying reads for 50ms, writes for 100ms and flushs for 333ms
# onto the same backing device at offset 0 sectors.
#
dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 50 $2 0 100 $1 0 333"

View File

@ -160,15 +160,24 @@ iv_large_sectors
The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
if this flag is specified.
integrity_key_size:<bytes>
Use an integrity key of <bytes> size instead of using an integrity key size
of the digest size of the used HMAC algorithm.
Module parameters::
max_read_size
max_write_size
Maximum size of read or write requests. When a request larger than this size
Maximum size of read requests. When a request larger than this size
is received, dm-crypt will split the request. The splitting improves
concurrency (the split requests could be encrypted in parallel by multiple
cores), but it also causes overhead. The user should tune these parameters to
cores), but it also causes overhead. The user should tune this parameters to
fit the actual workload.
max_write_size
Maximum size of write requests. When a request larger than this size
is received, dm-crypt will split the request. The splitting improves
concurrency (the split requests could be encrypted in parallel by multiple
cores), but it also causes overhead. The user should tune this parameters to
fit the actual workload.

View File

@ -251,7 +251,12 @@ The messages are:
by the vdostats userspace program to interpret the output
buffer.
dump:
config:
Outputs useful vdo configuration information. Mostly used
by users who want to recreate a similar VDO volume and
want to know the creation configuration used.
dump:
Dumps many internal structures to the system log. This is
not always safe to run, so it should only be used to debug
a hung vdo. Optional parameters to specify structures to

View File

@ -212,16 +212,6 @@ When mounting an ext4 filesystem, the following option are accepted:
that ext4's inode table readahead algorithm will pre-read into the
buffer cache. The default value is 32 blocks.
nouser_xattr
Disables Extended User Attributes. See the attr(5) manual page for
more information about extended attributes.
noacl
This option disables POSIX Access Control List support. If ACL support
is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL
is enabled by default on mount. See the acl(5) manual page for more
information about acl.
bsddf (*)
Make 'df' act like BSD.

View File

@ -158,3 +158,72 @@ poisoned BTB entry and using that safe one for all function returns.
In older Zen1 and Zen2, this is accomplished using a reinterpretation
technique similar to Retbleed one: srso_untrain_ret() and
srso_safe_ret().
Checking the safe RET mitigation actually works
-----------------------------------------------
In case one wants to validate whether the SRSO safe RET mitigation works
on a kernel, one could use two performance counters
* PMC_0xc8 - Count of RET/RET lw retired
* PMC_0xc9 - Count of RET/RET lw retired mispredicted
and compare the number of RETs retired properly vs those retired
mispredicted, in kernel mode. Another way of specifying those events
is::
# perf list ex_ret_near_ret
List of pre-defined events (to be used in -e or -M):
core:
ex_ret_near_ret
[Retired Near Returns]
ex_ret_near_ret_mispred
[Retired Near Returns Mispredicted]
Either the command using the event mnemonics::
# perf stat -e ex_ret_near_ret:k -e ex_ret_near_ret_mispred:k sleep 10s
or using the raw PMC numbers::
# perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
should give the same amount. I.e., every RET retired should be
mispredicted::
[root@brent: ~/kernel/linux/tools/perf> ./perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
Performance counter stats for 'sleep 10s':
137,167 cpu/event=0xc8,umask=0/k
137,173 cpu/event=0xc9,umask=0/k
10.004110303 seconds time elapsed
0.000000000 seconds user
0.004462000 seconds sys
vs the case when the mitigation is disabled (spec_rstack_overflow=off)
or not functioning properly, showing usually a lot smaller number of
mispredicted retired RETs vs the overall count of retired RETs during
a workload::
[root@brent: ~/kernel/linux/tools/perf> ./perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
Performance counter stats for 'sleep 10s':
201,627 cpu/event=0xc8,umask=0/k
4,074 cpu/event=0xc9,umask=0/k
10.003267252 seconds time elapsed
0.002729000 seconds user
0.000000000 seconds sys
Also, there is a selftest which performs the above, go to
tools/testing/selftests/x86/ and do::
make srso
./srso

View File

@ -333,12 +333,17 @@
allowed anymore to lift isolation
requirements as needed. This option
does not override iommu=pt
force_enable - Force enable the IOMMU on platforms known
to be buggy with IOMMU enabled. Use this
option with care.
pgtbl_v1 - Use v1 page table for DMA-API (Default).
pgtbl_v2 - Use v2 page table for DMA-API.
irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
force_enable - Force enable the IOMMU on platforms known
to be buggy with IOMMU enabled. Use this
option with care.
pgtbl_v1 - Use v1 page table for DMA-API (Default).
pgtbl_v2 - Use v2 page table for DMA-API.
irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
nohugepages - Limit page-sizes used for v1 page-tables
to 4 KiB.
v2_pgsizes_only - Limit page-sizes used for v1 page-tables
to 4KiB/2Mib/1GiB.
amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table
@ -517,6 +522,18 @@
Format: <io>,<irq>,<mode>
See header of drivers/net/hamradio/baycom_ser_hdx.c.
bdev_allow_write_mounted=
Format: <bool>
Control the ability to open a mounted block device
for writing, i.e., allow / disallow writes that bypass
the FS. This was implemented as a means to prevent
fuzzers from crashing the kernel by overwriting the
metadata underneath a mounted FS without its awareness.
This also prevents destructive formatting of mounted
filesystems by naive storage tooling that don't use
O_EXCL. Default is Y and can be changed through the
Kconfig option CONFIG_BLK_DEV_WRITE_MOUNTED.
bert_disable [ACPI]
Disable BERT OS support on buggy BIOSes.
@ -2350,6 +2367,18 @@
ipcmni_extend [KNL,EARLY] Extend the maximum number of unique System V
IPC identifiers from 32,768 to 16,777,216.
ipe.enforce= [IPE]
Format: <bool>
Determine whether IPE starts in permissive (0) or
enforce (1) mode. The default is enforce.
ipe.success_audit=
[IPE]
Format: <bool>
Start IPE with success auditing enabled, emitting
an audit event when a binary is allowed. The default
is 0.
irqaffinity= [SMP] Set the default irq affinity mask
The argument is a cpu list, as described above.
@ -2648,6 +2677,23 @@
Default is Y (on).
kvm.enable_virt_at_load=[KVM,ARM64,LOONGARCH,MIPS,RISCV,X86]
If enabled, KVM will enable virtualization in hardware
when KVM is loaded, and disable virtualization when KVM
is unloaded (if KVM is built as a module).
If disabled, KVM will dynamically enable and disable
virtualization on-demand when creating and destroying
VMs, i.e. on the 0=>1 and 1=>0 transitions of the
number of VMs.
Enabling virtualization at module lode avoids potential
latency for creation of the 0=>1 VM, as KVM serializes
virtualization enabling across all online CPUs. The
"cost" of enabling virtualization when KVM is loaded,
is that doing so may interfere with using out-of-tree
hypervisors that want to "own" virtualization hardware.
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
Default is false (don't support).
@ -4123,6 +4169,21 @@
Disable NUMA, Only set up a single NUMA node
spanning all memory.
numa=fake=<size>[MG]
[KNL, ARM64, RISCV, X86, EARLY]
If given as a memory unit, fills all system RAM with
nodes of size interleaved over physical nodes.
numa=fake=<N>
[KNL, ARM64, RISCV, X86, EARLY]
If given as an integer, fills all system RAM with N
fake nodes interleaved over physical nodes.
numa=fake=<N>U
[KNL, ARM64, RISCV, X86, EARLY]
If given as an integer followed by 'U', it will
divide each physical node into N emulated nodes.
numa_balancing= [KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
NUMA balancing.
Allowed values are enable and disable
@ -4788,6 +4849,16 @@
printk.time= Show timing data prefixed to each printk message line
Format: <bool> (1/Y/y=enable, 0/N/n=disable)
proc_mem.force_override= [KNL]
Format: {always | ptrace | never}
Traditionally /proc/pid/mem allows memory permissions to be
overridden without restrictions. This option may be set to
restrict that. Can be one of:
- 'always': traditional behavior always allows mem overrides.
- 'ptrace': only allow mem overrides for active ptracers.
- 'never': never allow mem overrides.
If not specified, default is the CONFIG_PROC_MEM_* choice.
processor.max_cstate= [HW,ACPI]
Limit processor to maximum C-state
max_cstate=9 overrides any DMI blacklist limit.
@ -4935,6 +5006,10 @@
Set maximum number of finished RCU callbacks to
process in one batch.
rcutree.csd_lock_suppress_rcu_stall= [KNL]
Do only a one-line RCU CPU stall warning when
there is an ongoing too-long CSD-lock wait.
rcutree.do_rcu_barrier= [KNL]
Request a call to rcu_barrier(). This is
throttled so that userspace tests can safely
@ -5382,7 +5457,13 @@
Time to wait (s) after boot before inducing stall.
rcutorture.stall_cpu_irqsoff= [KNL]
Disable interrupts while stalling if set.
Disable interrupts while stalling if set, but only
on the first stall in the set.
rcutorture.stall_cpu_repeat= [KNL]
Number of times to repeat the stall sequence,
so that rcutorture.stall_cpu_repeat=3 will result
in four stall sequences.
rcutorture.stall_gp_kthread= [KNL]
Duration (s) of forced sleep within RCU
@ -5570,14 +5651,6 @@
of zero will disable batching. Batching is
always disabled for synchronize_rcu_tasks().
rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
Set timeout in milliseconds RCU Tasks
Rude asynchronous callback batching for
call_rcu_tasks_rude(). A negative value
will take the default. A value of zero will
disable batching. Batching is always disabled
for synchronize_rcu_tasks_rude().
rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
Set timeout in milliseconds RCU Tasks
Trace asynchronous callback batching for
@ -6614,6 +6687,15 @@
<deci-seconds>: poll all this frequency
0: no polling (default)
thp_anon= [KNL]
Format: <size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state>
state is one of "always", "madvise", "never" or "inherit".
Control the default behavior of the system with respect
to anonymous transparent hugepages.
Can be used multiple times for multiple anon THP sizes.
See Documentation/admin-guide/mm/transhuge.rst for more
details.
threadirqs [KNL,EARLY]
Force threading of all interrupt handlers except those
marked explicitly IRQF_NO_THREAD.
@ -6743,6 +6825,51 @@
the same thing would happen if it was left off). The irq_handler_entry
event, and all events under the "initcall" system.
Flags can be added to the instance to modify its behavior when it is
created. The flags are separated by '^'.
The available flags are:
traceoff - Have the tracing instance tracing disabled after it is created.
traceprintk - Have trace_printk() write into this trace instance
(note, "printk" and "trace_printk" can also be used)
trace_instance=foo^traceoff^traceprintk,sched,irq
The flags must come before the defined events.
If memory has been reserved (see memmap for x86), the instance
can use that memory:
memmap=12M$0x284500000 trace_instance=boot_map@0x284500000:12M
The above will create a "boot_map" instance that uses the physical
memory at 0x284500000 that is 12Megs. The per CPU buffers of that
instance will be split up accordingly.
Alternatively, the memory can be reserved by the reserve_mem option:
reserve_mem=12M:4096:trace trace_instance=boot_map@trace
This will reserve 12 megabytes at boot up with a 4096 byte alignment
and place the ring buffer in this memory. Note that due to KASLR, the
memory may not be the same location each time, which will not preserve
the buffer content.
Also note that the layout of the ring buffer data may change between
kernel versions where the validator will fail and reset the ring buffer
if the layout is not the same as the previous kernel.
If the ring buffer is used for persistent bootups and has events enabled,
it is recommend to disable tracing so that events from a previous boot do not
mix with events of the current boot (unless you are debugging a random crash
at boot up).
reserve_mem=12M:4096:trace trace_instance=boot_map^traceoff^traceprintk@trace,sched,irq
See also Documentation/trace/debugging.rst
trace_options=[option-list]
[FTRACE] Enable or disable tracer options at boot.
The option-list is a comma delimited list of options
@ -7352,6 +7479,13 @@
it can be updated at runtime by writing to the
corresponding sysfs file.
workqueue.panic_on_stall=<uint>
Panic when workqueue stall is detected by
CONFIG_WQ_WATCHDOG. It sets the number times of the
stall to trigger panic.
The default is 0, which disables the panic on stall.
workqueue.cpu_intensive_thresh_us=
Per-cpu work items which run for longer than this
threshold are automatically considered CPU intensive

View File

@ -42,10 +42,14 @@ dongles):
``persistent_config``: by default this is off, but when set to 1 the driver
will store the current settings to the device's internal eeprom and restore
it the next time the device is connected to the USB port.
- RainShadow Tech. Note: this driver does not support the persistent_config
module option of the Pulse-Eight driver. The hardware supports it, but I
have no plans to add this feature. But I accept patches :-)
- Extron DA HD 4K PLUS HDMI Distribution Amplifier. See
:ref:`extron_da_hd_4k_plus` for more information.
Miscellaneous:
- vivid: emulates a CEC receiver and CEC transmitter.
@ -378,3 +382,86 @@ it later using ``--analyze-pin``.
You can also use this as a full-fledged CEC device by configuring it
using ``cec-ctl --tv -p0.0.0.0`` or ``cec-ctl --playback -p1.0.0.0``.
.. _extron_da_hd_4k_plus:
Extron DA HD 4K PLUS CEC Adapter driver
=======================================
This driver is for the Extron DA HD 4K PLUS series of HDMI Distribution
Amplifiers: https://www.extron.com/product/dahd4kplusseries
The 2, 4 and 6 port models are supported.
Firmware version 1.02.0001 or higher is required.
Note that older Extron hardware revisions have a problem with the CEC voltage,
which may mean that CEC will not work. This is fixed in hardware revisions
E34814 and up.
The CEC support has two modes: the first is a manual mode where userspace has
to manually control CEC for the HDMI Input and all HDMI Outputs. While this gives
full control, it is also complicated.
The second mode is an automatic mode, which is selected if the module option
``vendor_id`` is set. In that case the driver controls CEC and CEC messages
received in the input will be distributed to the outputs. It is still possible
to use the /dev/cecX devices to talk to the connected devices directly, but it is
the driver that configures everything and deals with things like Hotplug Detect
changes.
The driver also takes care of the EDIDs: /dev/videoX devices are created to
read the EDIDs and (for the HDMI Input port) to set the EDID.
By default userspace is responsible to set the EDID for the HDMI Input
according to the EDIDs of the connected displays. But if the ``manufacturer_name``
module option is set, then the driver will take care of setting the EDID
of the HDMI Input based on the supported resolutions of the connected displays.
Currently the driver only supports resolutions 1080p60 and 4kp60: if all connected
displays support 4kp60, then it will advertise 4kp60 on the HDMI input, otherwise
it will fall back to an EDID that just reports 1080p60.
The status of the Extron is reported in ``/sys/kernel/debug/cec/cecX/status``.
The extron-da-hd-4k-plus driver implements the following module options:
``debug``
---------
If set to 1, then all serial port traffic is shown.
``vendor_id``
-------------
The CEC Vendor ID to report to connected displays.
If set, then the driver will take care of distributing CEC messages received
on the input to the HDMI outputs. This is done for the following CEC messages:
- <Standby>
- <Image View On> and <Text View On>
- <Give Device Power Status>
- <Set System Audio Mode>
- <Request Current Latency>
If not set, then userspace is responsible for this, and it will have to
configure the CEC devices for HDMI Input and the HDMI Outputs manually.
``manufacturer_name``
---------------------
A three character manufacturer name that is used in the EDID for the HDMI
Input. If not set, then userspace is reponsible for configuring an EDID.
If set, then the driver will update the EDID automatically based on the
resolutions supported by the connected displays, and it will not be possible
anymore to manually set the EDID for the HDMI Input.
``hpd_never_low``
-----------------
If set, then the Hotplug Detect pin of the HDMI Input will always be high,
even if nothing is connected to the HDMI Outputs. If not set (the default)
then the Hotplug Detect pin of the HDMI input will go low if all the detected
Hotplug Detect pins of the HDMI Outputs are also low.
This option may be changed dynamically.

View File

@ -227,8 +227,13 @@ Common FPDL3/GMSL output parameters
open.*
**frame_rate** (RW):
Output video frame rate in frames per second. The default frame rate is
60Hz.
Output video signal frame rate limit in frames per second. Due to
the limited output pixel clock steps, the card can not always generate
a frame rate perfectly matching the value required by the connected display.
Using this parameter one can limit the frame rate by "crippling" the signal
so that the lines are not equal (the porches of the last line differ) but
the signal appears like having the exact frame rate to the connected display.
The default frame rate limit is 60Hz.
**hsync_polarity** (RW):
HSYNC signal polarity.
@ -253,33 +258,33 @@ Common FPDL3/GMSL output parameters
and there is a non-linear stepping between two consecutive allowed
frequencies. The driver finds the nearest allowed frequency to the given
value and sets it. When reading this property, you get the exact
frequency set by the driver. The default frequency is 70000kHz.
frequency set by the driver. The default frequency is 61150kHz.
*Note: This parameter can not be changed while the output v4l2 device is
open.*
**hsync_width** (RW):
Width of the HSYNC signal in pixels. The default value is 16.
Width of the HSYNC signal in pixels. The default value is 40.
**vsync_width** (RW):
Width of the VSYNC signal in video lines. The default value is 2.
Width of the VSYNC signal in video lines. The default value is 20.
**hback_porch** (RW):
Number of PCLK pulses between deassertion of the HSYNC signal and the first
valid pixel in the video line (marked by DE=1). The default value is 32.
valid pixel in the video line (marked by DE=1). The default value is 50.
**hfront_porch** (RW):
Number of PCLK pulses between the end of the last valid pixel in the video
line (marked by DE=1) and assertion of the HSYNC signal. The default value
is 32.
is 50.
**vback_porch** (RW):
Number of video lines between deassertion of the VSYNC signal and the video
line with the first valid pixel (marked by DE=1). The default value is 2.
line with the first valid pixel (marked by DE=1). The default value is 31.
**vfront_porch** (RW):
Number of video lines between the end of the last valid pixel line (marked
by DE=1) and assertion of the VSYNC signal. The default value is 2.
by DE=1) and assertion of the VSYNC signal. The default value is 30.
FPDL3 specific input parameters
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -114,11 +114,18 @@ to be applied to the hardware during a video stream, allowing userspace
to dynamically modify values such as black level, cross talk corrections
and others.
The buffer format is defined by struct :c:type:`rkisp1_params_cfg`, and
userspace should set
The ISP driver supports two different parameters configuration methods, the
`fixed parameters format` or the `extensible parameters format`.
When using the `fixed parameters` method the buffer format is defined by struct
:c:type:`rkisp1_params_cfg`, and userspace should set
:ref:`V4L2_META_FMT_RK_ISP1_PARAMS <v4l2-meta-fmt-rk-isp1-params>` as the
dataformat.
When using the `extensible parameters` method the buffer format is defined by
struct :c:type:`rkisp1_ext_params_cfg`, and userspace should set
:ref:`V4L2_META_FMT_RK_ISP1_EXT_PARAMS <v4l2-meta-fmt-rk-isp1-ext-params>` as
the dataformat.
Capturing Video Frames Example
==============================

View File

@ -328,7 +328,7 @@ and an HDMI input, one input for each input type. Those are described in more
detail below.
Special attention has been given to the rate at which new frames become
available. The jitter will be around 1 jiffie (that depends on the HZ
available. The jitter will be around 1 jiffy (that depends on the HZ
configuration of your kernel, so usually 1/100, 1/250 or 1/1000 of a second),
but the long-term behavior is exactly following the framerate. So a
framerate of 59.94 Hz is really different from 60 Hz. If the framerate
@ -1343,7 +1343,7 @@ Some Future Improvements
Just as a reminder and in no particular order:
- Add a virtual alsa driver to test audio
- Add virtual sub-devices and media controller support
- Add virtual sub-devices
- Some support for testing compressed video
- Add support to loop raw VBI output to raw VBI input
- Add support to loop teletext sliced VBI output to VBI input
@ -1358,4 +1358,4 @@ Just as a reminder and in no particular order:
- Make a thread for the RDS generation, that would help in particular for the
"Controls" RDS Rx I/O Mode as the read-only RDS controls could be updated
in real-time.
- Changing the EDID should cause hotplug detect emulation to happen.
- Changing the EDID doesn't wait 100 ms before setting the HPD signal.

View File

@ -7,7 +7,7 @@ Getting Started
This document briefly describes how you can use DAMON by demonstrating its
default user space tool. Please note that this document describes only a part
of its features for brevity. Please refer to the usage `doc
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_ of the tool for more
<https://github.com/damonitor/damo/blob/next/USAGE.md>`_ of the tool for more
details.
@ -26,7 +26,7 @@ User Space Tool
For the demonstration, we will use the default user space tool for DAMON,
called DAMON Operator (DAMO). It is available at
https://github.com/awslabs/damo. The examples below assume that ``damo`` is on
https://github.com/damonitor/damo. The examples below assume that ``damo`` is on
your ``$PATH``. It's not mandatory, though.
Because DAMO is using the sysfs interface (refer to :doc:`usage` for the

View File

@ -7,19 +7,19 @@ Detailed Usages
DAMON provides below interfaces for different users.
- *DAMON user space tool.*
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
`This <https://github.com/damonitor/damo>`_ is for privileged people such as
system administrators who want a just-working human-friendly interface.
Using this, users can use the DAMONs major features in a human-friendly way.
It may not be highly tuned for special cases, though. For more detail,
please refer to its `usage document
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
<https://github.com/damonitor/damo/blob/next/USAGE.md>`_.
- *sysfs interface.*
:ref:`This <sysfs_interface>` is for privileged user space programmers who
want more optimized use of DAMON. Using this, users can use DAMONs major
features by reading from and writing to special sysfs files. Therefore,
you can write and use your personalized DAMON sysfs wrapper programs that
reads/writes the sysfs files instead of you. The `DAMON user space tool
<https://github.com/awslabs/damo>`_ is one example of such programs.
<https://github.com/damonitor/damo>`_ is one example of such programs.
- *Kernel Space Programming Interface.*
:doc:`This </mm/damon/api>` is for kernel space programmers. Using this,
users can utilize every feature of DAMON most flexibly and efficiently by
@ -543,7 +543,7 @@ memory rate becomes larger than 60%, or lower than 30%". ::
# echo 300 > watermarks/low
Please note that it's highly recommended to use user space tools like `damo
<https://github.com/awslabs/damo>`_ rather than manually reading and writing
<https://github.com/damonitor/damo>`_ rather than manually reading and writing
the files as above. Above is only for an example.
.. _tracepoint:

View File

@ -294,8 +294,9 @@ The following files are currently defined:
``crash_hotplug`` read-only: when changes to the system memory map
occur due to hot un/plug of memory, this file contains
'1' if the kernel updates the kdump capture kernel memory
map itself (via elfcorehdr), or '0' if userspace must update
the kdump capture kernel memory map.
map itself (via elfcorehdr and other relevant kexec
segments), or '0' if userspace must update the kdump
capture kernel memory map.
Availability depends on the CONFIG_MEMORY_HOTPLUG kernel
configuration option.

View File

@ -202,6 +202,16 @@ PMD-mappable transparent hugepage::
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
All THPs at fault and collapse time will be added to _deferred_list,
and will therefore be split under memory presure if they are considered
"underused". A THP is underused if the number of zero-filled pages in
the THP is above max_ptes_none (see below). It is possible to disable
this behaviour by writing 0 to shrink_underused, and enable it by writing
1 to it::
echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused
echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused
khugepaged will be automatically started when PMD-sized THP is enabled
(either of the per-size anon control or the top-level control are set
to "always" or "madvise"), and it'll be automatically shutdown when
@ -284,13 +294,37 @@ that THP is shared. Exceeding the number would block the collapse::
A higher value may increase memory footprint for some workloads.
Boot parameter
==============
Boot parameters
===============
You can change the sysfs boot time defaults of Transparent Hugepage
Support by passing the parameter ``transparent_hugepage=always`` or
``transparent_hugepage=madvise`` or ``transparent_hugepage=never``
to the kernel command line.
You can change the sysfs boot time default for the top-level "enabled"
control by passing the parameter ``transparent_hugepage=always`` or
``transparent_hugepage=madvise`` or ``transparent_hugepage=never`` to the
kernel command line.
Alternatively, each supported anonymous THP size can be controlled by
passing ``thp_anon=<size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state>``,
where ``<size>`` is the THP size (must be a power of 2 of PAGE_SIZE and
supported anonymous THP) and ``<state>`` is one of ``always``, ``madvise``,
``never`` or ``inherit``.
For example, the following will set 16K, 32K, 64K THP to ``always``,
set 128K, 512K to ``inherit``, set 256K to ``madvise`` and 1M, 2M
to ``never``::
thp_anon=16K-64K:always;128K,512K:inherit;256K:madvise;1M-2M:never
``thp_anon=`` may be specified multiple times to configure all THP sizes as
required. If ``thp_anon=`` is specified at least once, any anon THP sizes
not explicitly configured on the command line are implicitly set to
``never``.
``transparent_hugepage`` setting only affects the global toggle. If
``thp_anon`` is not specified, PMD_ORDER THP will default to ``inherit``.
However, if a valid ``thp_anon`` setting is provided by the user, the
PMD_ORDER THP policy will be overridden. If the policy for PMD_ORDER
is not defined within a valid ``thp_anon``, its policy will default to
``never``.
Hugepages in tmpfs/shmem
========================
@ -447,6 +481,12 @@ thp_deferred_split_page
splitting it would free up some memory. Pages on split queue are
going to be split under memory pressure.
thp_underused_split_page
is incremented when a huge page on the split queue was split
because it was underused. A THP is underused if the number of
zero pages in the THP is above a certain threshold
(/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none).
thp_split_pmd
is incremented every time a PMD split into table of PTEs.
This can happen, for instance, when application calls mprotect() or
@ -527,6 +567,18 @@ split_deferred
it would free up some memory. Pages on split queue are going to
be split under memory pressure, if splitting is possible.
nr_anon
the number of anonymous THP we have in the whole system. These THPs
might be currently entirely mapped or have partially unmapped/unused
subpages.
nr_anon_partially_mapped
the number of anonymous THP which are likely partially mapped, possibly
wasting memory, and have been queued for deferred memory reclamation.
Note that in corner some cases (e.g., failed migration), we might detect
an anonymous THP as "partially mapped" and count it here, even though it
is not actually partially mapped anymore.
As the system ages, allocating huge pages may be expensive as the
system uses memory compaction to copy data around memory to free a
huge page for use. There are some counters in ``/proc/vmstat`` to help

View File

@ -0,0 +1,17 @@
====================================
Arm Network-on Chip Interconnect PMU
====================================
NI-700 and friends implement a distinct PMU for each clock domain within the
interconnect. Correspondingly, the driver exposes multiple PMU devices named
arm_ni_<x>_cd_<y>, where <x> is an (arbitrary) instance identifier and <y> is
the clock domain ID within that particular instance. If multiple NI instances
exist within a system, the PMU devices can be correlated with the underlying
hardware instance via sysfs parentage.
Each PMU exposes base event aliases for the interface types present in its clock
domain. These require qualifying with the "eventid" and "nodeid" parameters
to specify the event code to count and the interface at which to count it
(per the configured hardware ID as reflected in the xxNI_NODE_INFO register).
The exception is the "cycles" alias for the PMU cycle counter, which is encoded
with the PMU node type and needs no further qualification.

View File

@ -46,16 +46,16 @@ Some of the events only exist for specific configurations.
DesignWare Cores (DWC) PCIe PMU Driver
=======================================
This driver adds PMU devices for each PCIe Root Port named based on the BDF of
This driver adds PMU devices for each PCIe Root Port named based on the SBDF of
the Root Port. For example,
30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
0001:30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
the PMU device name for this Root Port is dwc_rootport_3018.
the PMU device name for this Root Port is dwc_rootport_13018.
The DWC PCIe PMU driver registers a perf PMU driver, which provides
description of available events and configuration options in sysfs, see
/sys/bus/event_source/devices/dwc_rootport_{bdf}.
/sys/bus/event_source/devices/dwc_rootport_{sbdf}.
The "format" directory describes format of the config fields of the
perf_event_attr structure. The "events" directory provides configuration
@ -66,16 +66,16 @@ The "perf list" command shall list the available events from sysfs, e.g.::
$# perf list | grep dwc_rootport
<...>
dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/ [Kernel PMU event]
dwc_rootport_13018/Rx_PCIe_TLP_Data_Payload/ [Kernel PMU event]
<...>
dwc_rootport_3018/rx_memory_read,lane=?/ [Kernel PMU event]
dwc_rootport_13018/rx_memory_read,lane=?/ [Kernel PMU event]
Time Based Analysis Event Usage
-------------------------------
Example usage of counting PCIe RX TLP data payload (Units of bytes)::
$# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/
$# perf stat -a -e dwc_rootport_13018/Rx_PCIe_TLP_Data_Payload/
The average RX/TX bandwidth can be calculated using the following formula:
@ -88,7 +88,7 @@ Lane Event Usage
Each lane has the same event set and to avoid generating a list of hundreds
of events, the user need to specify the lane ID explicitly, e.g.::
$# perf stat -a -e dwc_rootport_3018/rx_memory_read,lane=4/
$# perf stat -a -e dwc_rootport_13018/rx_memory_read,lane=4/
The driver does not support sampling, therefore "perf record" will not
work. Per-task (without "-a") perf sessions are not supported.

View File

@ -28,7 +28,9 @@ The "identifier" sysfs file allows users to identify the version of the
PMU hardware device.
The "bus" sysfs file allows users to get the bus number of Root Ports
monitored by PMU.
monitored by PMU. Furthermore users can get the Root Ports range in
[bdf_min, bdf_max] from "bdf_min" and "bdf_max" sysfs attributes
respectively.
Example usage of perf::

View File

@ -16,6 +16,7 @@ Performance monitor support
starfive_starlink_pmu
arm-ccn
arm-cmn
arm-ni
xgene-pmu
arm_dsu_pmu
thunderx2-pmu

View File

@ -251,7 +251,9 @@ performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
table, so we need to expose it to sysfs. If boost is not active, but
still supported, this maximum frequency will be larger than the one in
``cpuinfo``.
``cpuinfo``. On systems that support preferred core, the driver will have
different values for some cores than others and this will reflect the values
advertised by the platform at bootup.
This attribute is read-only.
``amd_pstate_lowest_nonlinear_freq``
@ -262,6 +264,17 @@ lowest non-linear performance in `AMD CPPC Performance Capability
<perf_cap_>`_.)
This attribute is read-only.
``amd_pstate_hw_prefcore``
Whether the platform supports the preferred core feature and it has been
enabled. This attribute is read-only.
``amd_pstate_prefcore_ranking``
The performance ranking of the core. This number doesn't have any unit, but
larger numbers are preferred at the time of reading. This can change at
runtime based on platform conditions. This attribute is read-only.
``energy_performance_available_preferences``
A list of all the supported EPP preferences that could be used for

View File

@ -113,3 +113,62 @@ to apply at each uncore* level.
Support for "current_freq_khz" is available only at each fabric cluster
level (i.e., in uncore* directory).
Efficiency vs. Latency Tradeoff
-------------------------------
The Efficiency Latency Control (ELC) feature improves performance
per watt. With this feature hardware power management algorithms
optimize trade-off between latency and power consumption. For some
latency sensitive workloads further tuning can be done by SW to
get desired performance.
The hardware monitors the average CPU utilization across all cores
in a power domain at regular intervals and decides an uncore frequency.
While this may result in the best performance per watt, workload may be
expecting higher performance at the expense of power. Consider an
application that intermittently wakes up to perform memory reads on an
otherwise idle system. In such cases, if hardware lowers uncore
frequency, then there may be delay in ramp up of frequency to meet
target performance.
The ELC control defines some parameters which can be changed from SW.
If the average CPU utilization is below a user-defined threshold
(elc_low_threshold_percent attribute below), the user-defined uncore
floor frequency will be used (elc_floor_freq_khz attribute below)
instead of hardware calculated minimum.
Similarly in high load scenario where the CPU utilization goes above
the high threshold value (elc_high_threshold_percent attribute below)
instead of jumping to maximum uncore frequency, frequency is increased
in 100MHz steps. This avoids consuming unnecessarily high power
immediately with CPU utilization spikes.
Attributes for efficiency latency control:
``elc_floor_freq_khz``
This attribute is used to get/set the efficiency latency floor frequency.
If this variable is lower than the 'min_freq_khz', it is ignored by
the firmware.
``elc_low_threshold_percent``
This attribute is used to get/set the efficiency latency control low
threshold. This attribute is in percentages of CPU utilization.
``elc_high_threshold_percent``
This attribute is used to get/set the efficiency latency control high
threshold. This attribute is in percentages of CPU utilization.
``elc_high_threshold_enable``
This attribute is used to enable/disable the efficiency latency control
high threshold. Write '1' to enable, '0' to disable.
Example system configuration below, which does following:
* when CPU utilization is less than 10%: sets uncore frequency to 800MHz
* when CPU utilization is higher than 95%: increases uncore frequency in
100MHz steps, until power limit is reached
elc_floor_freq_khz:800000
elc_high_threshold_percent:95
elc_high_threshold_enable:1
elc_low_threshold_percent:10

View File

@ -129,7 +129,7 @@ Setting the ramoops parameters can be done in several different manners:
takes a size, alignment and name as arguments. The name is used
to map the memory to a label that can be retrieved by ramoops.
reserver_mem=2M:4096:oops ramoops.mem_name=oops
reserve_mem=2M:4096:oops ramoops.mem_name=oops
You can specify either RAM memory or peripheral devices' memory. However, when
specifying RAM, be sure to reserve the memory by issuing memblock_reserve()

View File

@ -182,3 +182,5 @@ More detailed explanation for tainting
produce extremely unusual kernel structure layouts (even performance
pathological ones), which is important to know when debugging. Set at
build time.
18) ``N`` if an in-kernel test, such as a KUnit test, has been run.

View File

@ -359,7 +359,7 @@ Driver updates for STM32 DMA-MDMA chaining support in foo driver
descriptor you want a callback to be called at the end of the transfer
(dmaengine_prep_slave_sg()) or the period (dmaengine_prep_dma_cyclic()).
Depending on the direction, set the callback on the descriptor that finishes
the overal transfer:
the overall transfer:
* DMA_DEV_TO_MEM: set the callback on the "MDMA" descriptor
* DMA_MEM_TO_DEV: set the callback on the "DMA" descriptor
@ -371,7 +371,7 @@ Driver updates for STM32 DMA-MDMA chaining support in foo driver
As STM32 MDMA channel transfer is triggered by STM32 DMA, you must issue
STM32 MDMA channel before STM32 DMA channel.
If any, your callback will be called to warn you about the end of the overal
If any, your callback will be called to warn you about the end of the overall
transfer or the period completion.
Don't forget to terminate both channels. STM32 DMA channel is configured in

View File

@ -26,7 +26,7 @@ There are no systems that support the physical addition (or removal) of CPUs
while the system is running, and ACPI is not able to sufficiently describe
them.
e.g. New CPUs come with new caches, but the platform's cache toplogy is
e.g. New CPUs come with new caches, but the platform's cache topology is
described in a static table, the PPTT. How caches are shared between CPUs is
not discoverable, and must be described by firmware.

View File

@ -365,6 +365,8 @@ HWCAP2_SME_SF8DP2
HWCAP2_SME_SF8DP4
Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
HWCAP2_POE
Functionality implied by ID_AA64MMFR3_EL1.S1POE == 0b0001.
4. Unused AT_HWCAP bits
-----------------------

View File

@ -55,6 +55,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Ampere | AmpereOne | AC03_CPU_38 | AMPERE_ERRATUM_AC03_CPU_38 |
+----------------+-----------------+-----------------+-----------------------------+
| Ampere | AmpereOne AC04 | AC04_CPU_10 | AMPERE_ERRATUM_AC03_CPU_38 |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
+----------------+-----------------+-----------------+-----------------------------+
@ -249,8 +251,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
| Hisilicon | Hip08 SMMU PMCG | #162001900 | N/A |
| | Hip09 SMMU PMCG | | |
| Hisilicon | Hip{08,09,10,10C| #162001900 | N/A |
| | ,11} SMMU PMCG | | |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |

View File

@ -85,6 +85,38 @@ to CPUINTC directly::
| Devices |
+---------+
Advanced Extended IRQ model
===========================
In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go
to AVECINTC, and then go to CPUINTC directly, while all other devices interrupts
go to PCH-PIC/PCH-LPC and gathered by EIOINTC, and then go to CPUINTC directly::
+-----+ +-----------------------+ +-------+
| IPI | --> | CPUINTC | <-- | Timer |
+-----+ +-----------------------+ +-------+
^ ^ ^
| | |
+---------+ +----------+ +---------+ +-------+
| EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
+---------+ +----------+ +---------+ +-------+
^ ^
| |
+---------+ +---------+
| PCH-PIC | | PCH-MSI |
+---------+ +---------+
^ ^ ^
| | |
+---------+ +---------+ +---------+
| Devices | | PCH-LPC | | Devices |
+---------+ +---------+ +---------+
^
|
+---------+
| Devices |
+---------+
ACPI-related definitions
========================

View File

@ -134,7 +134,7 @@ Hardware
* PTCR and partition table entries (partition table is in secure
memory). An attempt to write to PTCR will cause a Hypervisor
Emulation Assitance interrupt.
Emulation Assistance interrupt.
* LDBAR (LD Base Address Register) and IMC (In-Memory Collection)
non-architected registers. An attempt to write to them will cause a

View File

@ -15,7 +15,7 @@ status for the use of Vector in userspace. The intended usage guideline for
these interfaces is to give init systems a way to modify the availability of V
for processes running under its domain. Calling these interfaces is not
recommended in libraries routines because libraries should not override policies
configured from the parant process. Also, users must noted that these interfaces
configured from the parent process. Also, users must note that these interfaces
are not portable to non-Linux, nor non-RISC-V environments, so it is discourage
to use in a portable code. To get the availability of V in an ELF program,
please read :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the

View File

@ -999,6 +999,36 @@ the vfio_ap mediated device to which it is assigned as long as each new APQN
resulting from plugging it in references a queue device bound to the vfio_ap
device driver.
Driver Features
===============
The vfio_ap driver exposes a sysfs file containing supported features.
This exists so third party tools (like Libvirt and mdevctl) can query the
availability of specific features.
The features list can be found here: /sys/bus/matrix/devices/matrix/features
Entries are space delimited. Each entry consists of a combination of
alphanumeric and underscore characters.
Example:
cat /sys/bus/matrix/devices/matrix/features
guest_matrix dyn ap_config
the following features are advertised:
---------------+---------------------------------------------------------------+
| Flag | Description |
+==============+===============================================================+
| guest_matrix | guest_matrix attribute exists. It reports the matrix of |
| | adapters and domains that are or will be passed through to a |
| | guest when the mdev is attached to it. |
+--------------+---------------------------------------------------------------+
| dyn | Indicates hot plug/unplug of AP adapters, domains and control |
| | domains for a guest to which the mdev is attached. |
+------------+-----------------------------------------------------------------+
| ap_config | ap_config interface for one-shot modifications to mdev config |
+--------------+---------------------------------------------------------------+
Limitations
===========
Live guest migration is not supported for guests using AP devices without

View File

@ -162,7 +162,7 @@ Mitigation points
3. It would take a large number of these precisely-timed NMIs to mount
an actual attack. There's presumably not enough bandwidth.
4. The NMI in question occurs after a VERW, i.e. when user state is
restored and most interesting data is already scrubbed. Whats left
restored and most interesting data is already scrubbed. What's left
is only the data that NMI touches, and that may or may not be of
any interest.

View File

@ -170,18 +170,6 @@ NUMA
Don't parse the HMAT table for NUMA setup, or soft-reserved memory
partitioning.
numa=fake=<size>[MG]
If given as a memory unit, fills all system RAM with nodes of
size interleaved over physical nodes.
numa=fake=<N>
If given as an integer, fills all system RAM with N fake nodes
interleaved over physical nodes.
numa=fake=<N>U
If given as an integer followed by 'U', it will divide each
physical node into N emulated nodes.
ACPI
====

View File

@ -125,7 +125,7 @@ FSGSBASE instructions enablement
FSGSBASE instructions compiler support
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE
GCC version 4.6.4 and newer provide intrinsics for the FSGSBASE
instructions. Clang 5 supports them as well.
=================== ===========================
@ -135,7 +135,7 @@ instructions. Clang 5 supports them as well.
_writegsbase_u64() Write the GS base register
=================== ===========================
To utilize these instrinsics <immintrin.h> must be included in the source
To utilize these intrinsics <immintrin.h> must be included in the source
code and the compiler option -mfsgsbase has to be added.
Compiler support for FS/GS based addressing

View File

@ -9,7 +9,7 @@ controllers), BFQ's main features are:
- BFQ guarantees a high system and application responsiveness, and a
low latency for time-sensitive applications, such as audio or video
players;
- BFQ distributes bandwidth, and not just time, among processes or
- BFQ distributes bandwidth, not just time, among processes or
groups (switching back to time distribution when needed to keep
throughput high).
@ -111,7 +111,7 @@ Higher speed for code-development tasks
If some additional workload happens to be executed in parallel, then
BFQ executes the I/O-related components of typical code-development
tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
tasks (compilation, checkout, merge, etc.) much more quickly than CFQ,
NOOP or DEADLINE.
High throughput
@ -127,9 +127,9 @@ Strong fairness, bandwidth and delay guarantees
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
BFQ distributes the device throughput, and not just the device time,
among I/O-bound applications in proportion their weights, with any
among I/O-bound applications in proportion to their weights, with any
workload and regardless of the device parameters. From these bandwidth
guarantees, it is possible to compute tight per-I/O-request delay
guarantees, it is possible to compute a tight per-I/O-request delay
guarantees by a simple formula. If not configured for strict service
guarantees, BFQ switches to time-based resource sharing (only) for
applications that would otherwise cause a throughput loss.
@ -199,7 +199,7 @@ plus a lot of code, are borrowed from CFQ.
- On flash-based storage with internal queueing of commands
(typically NCQ), device idling happens to be always detrimental
for throughput. So, with these devices, BFQ performs idling
to throughput. So, with these devices, BFQ performs idling
only when strictly needed for service guarantees, i.e., for
guaranteeing low latency or fairness. In these cases, overall
throughput may be sub-optimal. No solution currently exists to
@ -212,7 +212,7 @@ plus a lot of code, are borrowed from CFQ.
and to reduce their latency. The most important action taken to
achieve this goal is to give to the queues associated with these
applications more than their fair share of the device
throughput. For brevity, we call just "weight-raising" the whole
throughput. For brevity, we call it just "weight-raising" the whole
sets of actions taken by BFQ to privilege these queues. In
particular, BFQ provides a milder form of weight-raising for
interactive applications, and a stronger form for soft real-time
@ -231,7 +231,7 @@ plus a lot of code, are borrowed from CFQ.
responsive in detecting interleaved I/O (cooperating processes),
that it enables BFQ to achieve a high throughput, by queue
merging, even for queues for which CFQ needs a different
mechanism, preemption, to get a high throughput. As such EQM is a
mechanism, preemption, to get a high throughput. As such, EQM is a
unified mechanism to achieve a high throughput with interleaved
I/O.
@ -254,7 +254,7 @@ plus a lot of code, are borrowed from CFQ.
- First, with any proportional-share scheduler, the maximum
deviation with respect to an ideal service is proportional to
the maximum budget (slice) assigned to queues. As a consequence,
BFQ can keep this deviation tight not only because of the
BFQ can keep this deviation tight, not only because of the
accurate service of B-WF2Q+, but also because BFQ *does not*
need to assign a larger budget to a queue to let the queue
receive a higher fraction of the device throughput.
@ -327,7 +327,7 @@ applications. Unset this tunable if you need/want to control weights.
slice_idle
----------
This parameter specifies how long BFQ should idle for next I/O
This parameter specifies how long BFQ should idle for the next I/O
request, when certain sync BFQ queues become empty. By default
slice_idle is a non-zero value. Idling has a double purpose: boosting
throughput and making sure that the desired throughput distribution is
@ -365,7 +365,7 @@ terms of I/O-request dispatches. To guarantee that the actual service
order then corresponds to the dispatch order, the strict_guarantees
tunable must be set too.
There is an important flipside for idling: apart from the above cases
There is an important flip side to idling: apart from the above cases
where it is beneficial also for throughput, idling can severely impact
throughput. One important case is random workload. Because of this
issue, BFQ tends to avoid idling as much as possible, when it is not
@ -475,7 +475,7 @@ max_budget
Maximum amount of service, measured in sectors, that can be provided
to a BFQ queue once it is set in service (of course within the limits
of the above timeout). According to what said in the description of
of the above timeout). According to what was said in the description of
the algorithm, larger values increase the throughput in proportion to
the percentage of sequential I/O requests issued. The price of larger
values is that they coarsen the granularity of short-term bandwidth

View File

@ -368,7 +368,7 @@ No additional type data follow ``btf_type``.
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_FUNC
* ``info.vlen``: linkage information (BTF_FUNC_STATIC, BTF_FUNC_GLOBAL
or BTF_FUNC_EXTERN)
or BTF_FUNC_EXTERN - see :ref:`BTF_Function_Linkage_Constants`)
* ``type``: a BTF_KIND_FUNC_PROTO type
No additional type data follow ``btf_type``.
@ -424,9 +424,8 @@ following data::
__u32 linkage;
};
``struct btf_var`` encoding:
* ``linkage``: currently only static variable 0, or globally allocated
variable in ELF sections 1
``btf_var.linkage`` may take the values: BTF_VAR_STATIC, BTF_VAR_GLOBAL_ALLOCATED or BTF_VAR_GLOBAL_EXTERN -
see :ref:`BTF_Var_Linkage_Constants`.
Not all type of global variables are supported by LLVM at this point.
The following is currently available:
@ -549,6 +548,38 @@ The ``btf_enum64`` encoding:
If the original enum value is signed and the size is less than 8,
that value will be sign extended into 8 bytes.
2.3 Constant Values
-------------------
.. _BTF_Function_Linkage_Constants:
2.3.1 Function Linkage Constant Values
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. table:: Function Linkage Values and Meanings
=================== ===== ===========
kind value description
=================== ===== ===========
``BTF_FUNC_STATIC`` 0x0 definition of subprogram not visible outside containing compilation unit
``BTF_FUNC_GLOBAL`` 0x1 definition of subprogram visible outside containing compilation unit
``BTF_FUNC_EXTERN`` 0x2 declaration of a subprogram whose definition is outside the containing compilation unit
=================== ===== ===========
.. _BTF_Var_Linkage_Constants:
2.3.2 Variable Linkage Constant Values
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. table:: Variable Linkage Values and Meanings
============================ ===== ===========
kind value description
============================ ===== ===========
``BTF_VAR_STATIC`` 0x0 definition of global variable not visible outside containing compilation unit
``BTF_VAR_GLOBAL_ALLOCATED`` 0x1 definition of global variable visible outside containing compilation unit
``BTF_VAR_GLOBAL_EXTERN`` 0x2 declaration of global variable whose definition is outside the containing compilation unit
============================ ===== ===========
3. BTF Kernel API
=================

View File

@ -121,6 +121,8 @@ described in more detail in the footnotes.
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LWT_XMIT`` | | ``lwt_xmit`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_NETFILTER`` | | ``netfilter`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_PERF_EVENT`` | | ``perf_event`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` | | ``raw_tp.w+`` [#rawtp]_ | |
@ -131,11 +133,23 @@ described in more detail in the footnotes.
+ + +----------------------------------+-----------+
| | | ``raw_tracepoint+`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` | |
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` [#tc_legacy]_ | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` | |
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` [#tc_legacy]_ | |
+ + +----------------------------------+-----------+
| | | ``tc`` | |
| | | ``tc`` [#tc_legacy]_ | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_NETKIT_PRIMARY`` | ``netkit/primary`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_NETKIT_PEER`` | ``netkit/peer`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_INGRESS`` | ``tc/ingress`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_EGRESS`` | ``tc/egress`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_INGRESS`` | ``tcx/ingress`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TCX_EGRESS`` | ``tcx/egress`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SK_LOOKUP`` | ``BPF_SK_LOOKUP`` | ``sk_lookup`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
@ -155,7 +169,9 @@ described in more detail in the footnotes.
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SOCK_OPS`` | ``BPF_CGROUP_SOCK_OPS`` | ``sockops`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` | |
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` [#struct_ops]_ | |
+ + +----------------------------------+-----------+
| | | ``struct_ops.s+`` [#struct_ops]_ | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SYSCALL`` | | ``syscall`` | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
@ -209,5 +225,11 @@ described in more detail in the footnotes.
``a-zA-Z0-9_.*?``.
.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
.. [#tc_legacy] The ``tc``, ``classifier`` and ``action`` attach types are deprecated, use
``tcx/*`` instead.
.. [#struct_ops] The ``struct_ops`` attach format supports ``struct_ops[.s]/<name>`` convention,
but ``name`` is ignored and it is recommended to just use plain
``SEC("struct_ops[.s]")``. The attachments are defined in a struct initializer
that is tagged with ``SEC(".struct_ops[.link]")``.
.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.

View File

@ -418,7 +418,7 @@ The rules for correspondence between registers / stack slots are as follows:
linked to the registers and stack slots of the parent state with the same
indices.
* For the outer stack frames, only caller saved registers (r6-r9) and stack
* For the outer stack frames, only callee saved registers (r6-r9) and stack
slots are linked to the registers and stack slots of the parent state with the
same indices.

View File

@ -0,0 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
===========================
Scope-based Cleanup Helpers
===========================
.. kernel-doc:: include/linux/cleanup.h
:doc: scope-based cleanup helpers

View File

@ -737,8 +737,9 @@ can process the event further.
When changes to the CPUs in the system occur, the sysfs file
/sys/devices/system/cpu/crash_hotplug contains '1' if the kernel
updates the kdump capture kernel list of CPUs itself (via elfcorehdr),
or '0' if userspace must update the kdump capture kernel list of CPUs.
updates the kdump capture kernel list of CPUs itself (via elfcorehdr and
other relevant kexec segment), or '0' if userspace must update the kdump
capture kernel list of CPUs.
The availability depends on the CONFIG_HOTPLUG_CPU kernel configuration
option.
@ -750,8 +751,9 @@ file can be used in a udev rule as follows:
SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
For a CPU hot un/plug event, if the architecture supports kernel updates
of the elfcorehdr (which contains the list of CPUs), then the rule skips
the unload-then-reload of the kdump capture kernel.
of the elfcorehdr (which contains the list of CPUs) and other relevant
kexec segments, then the rule skips the unload-then-reload of the kdump
capture kernel.
Kernel Inline Documentations Reference
======================================

View File

@ -35,6 +35,7 @@ Library functionality that is used throughout the kernel.
kobject
kref
cleanup
assoc_array
xarray
maple_tree
@ -49,6 +50,7 @@ Library functionality that is used throughout the kernel.
wrappers/atomic_t
wrappers/atomic_bitops
floating-point
union_find
Low level entry and exit
========================

View File

@ -45,8 +45,9 @@ here we briefly outline their recommended usage:
* If the allocation is performed from an atomic context, e.g interrupt
handler, use ``GFP_NOWAIT``. This flag prevents direct reclaim and
IO or filesystem operations. Consequently, under memory pressure
``GFP_NOWAIT`` allocation is likely to fail. Allocations which
have a reasonable fallback should be using ``GFP_NOWARN``.
``GFP_NOWAIT`` allocation is likely to fail. Users of this flag need
to provide a suitable fallback to cope with such failures where
appropriate.
* If you think that accessing memory reserves is justified and the kernel
will be stressed unless allocation succeeds, you may use ``GFP_ATOMIC``.
* Untrusted allocations triggered from userspace should be a subject

View File

@ -576,13 +576,12 @@ The field width is passed by value, the bitmap is passed by reference.
Helper macros cpumask_pr_args() and nodemask_pr_args() are available to ease
printing cpumask and nodemask.
Flags bitfields such as page flags, page_type, gfp_flags
Flags bitfields such as page flags and gfp_flags
--------------------------------------------------------
::
%pGp 0x17ffffc0002036(referenced|uptodate|lru|active|private|node=0|zone=2|lastcpupid=0x1fffff)
%pGt 0xffffff7f(buddy)
%pGg GFP_USER|GFP_DMA32|GFP_NOWARN
%pGv read|exec|mayread|maywrite|mayexec|denywrite
@ -591,7 +590,6 @@ would construct the value. The type of flags is given by the third
character. Currently supported are:
- p - [p]age flags, expects value of type (``unsigned long *``)
- t - page [t]ype, expects value of type (``unsigned int *``)
- v - [v]ma_flags, expects value of type (``unsigned long *``)
- g - [g]fp_flags, expects value of type (``gfp_t *``)

View File

@ -0,0 +1,106 @@
.. SPDX-License-Identifier: GPL-2.0
====================
Union-Find in Linux
====================
:Date: June 21, 2024
:Author: Xavier <xavier_qy@163.com>
What is union-find, and what is it used for?
------------------------------------------------
Union-find is a data structure used to handle the merging and querying
of disjoint sets. The primary operations supported by union-find are:
Initialization: Resetting each element as an individual set, with
each set's initial parent node pointing to itself.
Find: Determine which set a particular element belongs to, usually by
returning a “representative element” of that set. This operation
is used to check if two elements are in the same set.
Union: Merge two sets into one.
As a data structure used to maintain sets (groups), union-find is commonly
utilized to solve problems related to offline queries, dynamic connectivity,
and graph theory. It is also a key component in Kruskal's algorithm for
computing the minimum spanning tree, which is crucial in scenarios like
network routing. Consequently, union-find is widely referenced. Additionally,
union-find has applications in symbolic computation, register allocation,
and more.
Space Complexity: O(n), where n is the number of nodes.
Time Complexity: Using path compression can reduce the time complexity of
the find operation, and using union by rank can reduce the time complexity
of the union operation. These optimizations reduce the average time
complexity of each find and union operation to O(α(n)), where α(n) is the
inverse Ackermann function. This can be roughly considered a constant time
complexity for practical purposes.
This document covers use of the Linux union-find implementation. For more
information on the nature and implementation of union-find, see:
Wikipedia entry on union-find
https://en.wikipedia.org/wiki/Disjoint-set_data_structure
Linux implementation of union-find
-----------------------------------
Linux's union-find implementation resides in the file "lib/union_find.c".
To use it, "#include <linux/union_find.h>".
The union-find data structure is defined as follows::
struct uf_node {
struct uf_node *parent;
unsigned int rank;
};
In this structure, parent points to the parent node of the current node.
The rank field represents the height of the current tree. During a union
operation, the tree with the smaller rank is attached under the tree with the
larger rank to maintain balance.
Initializing union-find
-----------------------
You can complete the initialization using either static or initialization
interface. Initialize the parent pointer to point to itself and set the rank
to 0.
Example::
struct uf_node my_node = UF_INIT_NODE(my_node);
or
uf_node_init(&my_node);
Find the Root Node of union-find
--------------------------------
This operation is mainly used to determine whether two nodes belong to the same
set in the union-find. If they have the same root, they are in the same set.
During the find operation, path compression is performed to improve the
efficiency of subsequent find operations.
Example::
int connected;
struct uf_node *root1 = uf_find(&node_1);
struct uf_node *root2 = uf_find(&node_2);
if (root1 == root2)
connected = 1;
else
connected = 0;
Union Two Sets in union-find
----------------------------
To union two sets in the union-find, you first find their respective root nodes
and then link the smaller node to the larger node based on the rank of the root
nodes.
Example::
uf_union(&node_1, &node_2);

View File

@ -75,6 +75,17 @@ Only files which are linked to the main kernel image or are compiled as
kernel modules are supported by this mechanism.
Module specific configs
-----------------------
Gcov kernel configs for specific modules are described below:
CONFIG_GCOV_PROFILE_RDS:
Enables GCOV profiling on RDS for checking which functions or
lines are executed. This config is used by the rds selftest to
generate coverage reports. If left unset the report is omitted.
Files
-----

View File

@ -361,7 +361,8 @@ Alternatives Considered
-----------------------
An alternative data race detection approach for the kernel can be found in the
`Kernel Thread Sanitizer (KTSAN) <https://github.com/google/ktsan/wiki>`_.
`Kernel Thread Sanitizer (KTSAN)
<https://github.com/google/kernel-sanitizers/blob/master/KTSAN.md>`_.
KTSAN is a happens-before data race detector, which explicitly establishes the
happens-before order between memory operations, which can then be used to
determine data races as defined in `Data Races`_.

View File

@ -53,6 +53,13 @@ configurable via the Kconfig option ``CONFIG_KFENCE_DEFERRABLE``.
The KUnit test suite is very likely to fail when using a deferrable timer
since it currently causes very unpredictable sample intervals.
By default KFENCE will only sample 1 heap allocation within each sample
interval. *Burst mode* allows to sample successive heap allocations, where the
kernel boot parameter ``kfence.burst`` can be set to a non-zero value which
denotes the *additional* successive allocations within a sample interval;
setting ``kfence.burst=N`` means that ``1 + N`` successive allocations are
attempted through KFENCE for each sample interval.
The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
255), the number of available guarded objects can be controlled. Each object

View File

@ -0,0 +1,10 @@
.. SPDX-License-Identifier: GPL-2.0
========
Clk API
========
The KUnit clk API is used to test clk providers and clk consumers.
.. kernel-doc:: drivers/clk/clk_kunit_helpers.c
:export:

View File

@ -9,11 +9,17 @@ API Reference
test
resource
functionredirection
clk
of
platformdevice
This page documents the KUnit kernel testing API. It is divided into the
following sections:
Core KUnit API
==============
Documentation/dev-tools/kunit/api/test.rst
- Documents all of the standard testing API
@ -25,3 +31,18 @@ Documentation/dev-tools/kunit/api/resource.rst
Documentation/dev-tools/kunit/api/functionredirection.rst
- Documents the KUnit Function Redirection API
Driver KUnit API
================
Documentation/dev-tools/kunit/api/clk.rst
- Documents the KUnit clk API
Documentation/dev-tools/kunit/api/of.rst
- Documents the KUnit device tree (OF) API
Documentation/dev-tools/kunit/api/platformdevice.rst
- Documents the KUnit platform device API

View File

@ -0,0 +1,13 @@
.. SPDX-License-Identifier: GPL-2.0
====================
Device Tree (OF) API
====================
The KUnit device tree API is used to test device tree (of_*) dependent code.
.. kernel-doc:: include/kunit/of.h
:internal:
.. kernel-doc:: drivers/of/of_kunit_helpers.c
:export:

View File

@ -0,0 +1,10 @@
.. SPDX-License-Identifier: GPL-2.0
===================
Platform Device API
===================
The KUnit platform device API is used to test platform devices.
.. kernel-doc:: lib/kunit/platform.c
:export:

View File

@ -188,15 +188,26 @@ For example, a Kconfig entry might look like:
Test File and Module Names
==========================
KUnit tests can often be compiled as a module. These modules should be named
after the test suite, followed by ``_test``. If this is likely to conflict with
non-KUnit tests, the suffix ``_kunit`` can also be used.
KUnit tests are often compiled as a separate module. To avoid conflicting
with regular modules, KUnit modules should be named after the test suite,
followed by ``_kunit`` (e.g. if "foobar" is the core module, then
"foobar_kunit" is the KUnit test module).
The easiest way of achieving this is to name the file containing the test suite
``<suite>_test.c`` (or, as above, ``<suite>_kunit.c``). This file should be
placed next to the code under test.
Test source files, whether compiled as a separate module or an
``#include`` in another source file, are best kept in a ``tests/``
subdirectory to not conflict with other source files (e.g. for
tab-completion).
Note that the ``_test`` suffix has also been used in some existing
tests. The ``_kunit`` suffix is preferred, as it makes the distinction
between KUnit and non-KUnit tests clearer.
So for the common case, name the file containing the test suite
``tests/<suite>_kunit.c``. The ``tests`` directory should be placed at
the same level as the code under test. For example, tests for
``lib/string.c`` live in ``lib/tests/string_kunit.c``.
If the suite name contains some or all of the name of the test's parent
directory, it may make sense to modify the source filename to reduce redundancy.
For example, a ``foo_firmware`` suite could be in the ``foo/firmware_test.c``
file.
directory, it may make sense to modify the source filename to reduce
redundancy. For example, a ``foo_firmware`` suite could be in the
``foo/tests/firmware_kunit.c`` file.

View File

@ -1,17 +0,0 @@
* ARC HS Performance Counters
The ARC HS can be configured with a pipeline performance monitor for counting
CPU and cache events like cache misses and hits. Like conventional PCT there
are 100+ hardware conditions dynamically mapped to up to 32 counters.
It also supports overflow interrupts.
Required properties:
- compatible : should contain
"snps,archs-pct"
Example:
pmu {
compatible = "snps,archs-pct";
};

View File

@ -0,0 +1,33 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/arc/snps,archs-pct.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: ARC HS Performance Counters
maintainers:
- Aryabhatta Dey <aryabhattadey35@gmail.com>
description:
The ARC HS can be configured with a pipeline performance monitor for counting
CPU and cache events like cache misses and hits. Like conventional PCT there
are 100+ hardware conditions dynamically mapped to up to 32 counters.
It also supports overflow interrupts.
properties:
compatible:
const: snps,archs-pct
reg:
maxItems: 1
clocks:
maxItems: 1
required:
- compatible
- reg
- clocks
additionalProperties: false

View File

@ -25,10 +25,18 @@ select:
properties:
compatible:
items:
- const: amlogic,meson-gx-ao-secure
- const: syscon
oneOf:
- items:
- const: amlogic,meson-gx-ao-secure
- const: syscon
- items:
- enum:
- amlogic,a4-ao-secure
- amlogic,c3-ao-secure
- amlogic,s4-ao-secure
- amlogic,t7-ao-secure
- const: amlogic,meson-gx-ao-secure
- const: syscon
reg:
maxItems: 1

View File

@ -17,7 +17,7 @@ description: |
The Coresight dummy source component is for the specific coresight source
devices kernel don't have permission to access or configure. For some SOCs,
there would be Coresight source trace components on sub-processor which
are conneted to AP processor via debug bus. For these devices, a dummy driver
are connected to AP processor via debug bus. For these devices, a dummy driver
is needed to register them as Coresight source devices, so that paths can be
created in the driver. It provides Coresight API for operations on dummy
source devices, such as enabling and disabling them. It also provides the

View File

@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: ARM Corstone1000
maintainers:
- Vishnu Banavath <vishnu.banavath@arm.com>
- Rui Miguel Silva <rui.silva@linaro.org>
- Abdellatif El Khlifi <abdellatif.elkhlifi@arm.com>
- Hugues Kamba Mpiana <hugues.kambampiana@arm.com>
description: |+
ARM's Corstone1000 includes pre-verified Corstone SSE-710 subsystem that

Some files were not shown because too many files have changed in this diff Show More