mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-04 12:16:41 +00:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
No conflicts. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
93817be8b6
@ -40,9 +40,8 @@ properties:
|
||||
value to be used for converting remote channel measurements to
|
||||
temperature.
|
||||
$ref: /schemas/types.yaml#/definitions/int32
|
||||
items:
|
||||
minimum: -128
|
||||
maximum: 127
|
||||
minimum: -128
|
||||
maximum: 127
|
||||
|
||||
ti,beta-compensation:
|
||||
description:
|
||||
|
@ -30,6 +30,7 @@ properties:
|
||||
- socionext,uniphier-ld11-aidet
|
||||
- socionext,uniphier-ld20-aidet
|
||||
- socionext,uniphier-pxs3-aidet
|
||||
- socionext,uniphier-nx1-aidet
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
@ -13,8 +13,8 @@ disappeared as of Linux 3.0.
|
||||
|
||||
There are two places where extended attributes can be found. The first
|
||||
place is between the end of each inode entry and the beginning of the
|
||||
next inode entry. For example, if inode.i\_extra\_isize = 28 and
|
||||
sb.inode\_size = 256, then there are 256 - (128 + 28) = 100 bytes
|
||||
next inode entry. For example, if inode.i_extra_isize = 28 and
|
||||
sb.inode_size = 256, then there are 256 - (128 + 28) = 100 bytes
|
||||
available for in-inode extended attribute storage. The second place
|
||||
where extended attributes can be found is in the block pointed to by
|
||||
``inode.i_file_acl``. As of Linux 3.11, it is not possible for this
|
||||
@ -38,8 +38,8 @@ Extended attributes, when stored after the inode, have a header
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- h\_magic
|
||||
- __le32
|
||||
- h_magic
|
||||
- Magic number for identification, 0xEA020000. This value is set by the
|
||||
Linux driver, though e2fsprogs doesn't seem to check it(?)
|
||||
|
||||
@ -55,28 +55,28 @@ The beginning of an extended attribute block is in
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- h\_magic
|
||||
- __le32
|
||||
- h_magic
|
||||
- Magic number for identification, 0xEA020000.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- h\_refcount
|
||||
- __le32
|
||||
- h_refcount
|
||||
- Reference count.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- h\_blocks
|
||||
- __le32
|
||||
- h_blocks
|
||||
- Number of disk blocks used.
|
||||
* - 0xC
|
||||
- \_\_le32
|
||||
- h\_hash
|
||||
- __le32
|
||||
- h_hash
|
||||
- Hash value of all attributes.
|
||||
* - 0x10
|
||||
- \_\_le32
|
||||
- h\_checksum
|
||||
- __le32
|
||||
- h_checksum
|
||||
- Checksum of the extended attribute block.
|
||||
* - 0x14
|
||||
- \_\_u32
|
||||
- h\_reserved[3]
|
||||
- __u32
|
||||
- h_reserved[3]
|
||||
- Zero.
|
||||
|
||||
The checksum is calculated against the FS UUID, the 64-bit block number
|
||||
@ -100,46 +100,46 @@ Attributes stored inside an inode do not need be stored in sorted order.
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_u8
|
||||
- e\_name\_len
|
||||
- __u8
|
||||
- e_name_len
|
||||
- Length of name.
|
||||
* - 0x1
|
||||
- \_\_u8
|
||||
- e\_name\_index
|
||||
- __u8
|
||||
- e_name_index
|
||||
- Attribute name index. There is a discussion of this below.
|
||||
* - 0x2
|
||||
- \_\_le16
|
||||
- e\_value\_offs
|
||||
- __le16
|
||||
- e_value_offs
|
||||
- Location of this attribute's value on the disk block where it is stored.
|
||||
Multiple attributes can share the same value. For an inode attribute
|
||||
this value is relative to the start of the first entry; for a block this
|
||||
value is relative to the start of the block (i.e. the header).
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- e\_value\_inum
|
||||
- __le32
|
||||
- e_value_inum
|
||||
- The inode where the value is stored. Zero indicates the value is in the
|
||||
same block as this entry. This field is only used if the
|
||||
INCOMPAT\_EA\_INODE feature is enabled.
|
||||
INCOMPAT_EA_INODE feature is enabled.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- e\_value\_size
|
||||
- __le32
|
||||
- e_value_size
|
||||
- Length of attribute value.
|
||||
* - 0xC
|
||||
- \_\_le32
|
||||
- e\_hash
|
||||
- __le32
|
||||
- e_hash
|
||||
- Hash value of attribute name and attribute value. The kernel doesn't
|
||||
update the hash for in-inode attributes, so for that case this value
|
||||
must be zero, because e2fsck validates any non-zero hash regardless of
|
||||
where the xattr lives.
|
||||
* - 0x10
|
||||
- char
|
||||
- e\_name[e\_name\_len]
|
||||
- e_name[e_name_len]
|
||||
- Attribute name. Does not include trailing NULL.
|
||||
|
||||
Attribute values can follow the end of the entry table. There appears to
|
||||
be a requirement that they be aligned to 4-byte boundaries. The values
|
||||
are stored starting at the end of the block and grow towards the
|
||||
xattr\_header/xattr\_entry table. When the two collide, the overflow is
|
||||
xattr_header/xattr_entry table. When the two collide, the overflow is
|
||||
put into a separate disk block. If the disk block fills up, the
|
||||
filesystem returns -ENOSPC.
|
||||
|
||||
@ -167,15 +167,15 @@ the key name. Here is a map of name index values to key prefixes:
|
||||
* - 1
|
||||
- “user.”
|
||||
* - 2
|
||||
- “system.posix\_acl\_access”
|
||||
- “system.posix_acl_access”
|
||||
* - 3
|
||||
- “system.posix\_acl\_default”
|
||||
- “system.posix_acl_default”
|
||||
* - 4
|
||||
- “trusted.”
|
||||
* - 6
|
||||
- “security.”
|
||||
* - 7
|
||||
- “system.” (inline\_data only?)
|
||||
- “system.” (inline_data only?)
|
||||
* - 8
|
||||
- “system.richacl” (SuSE kernels only?)
|
||||
|
||||
|
@ -23,7 +23,7 @@ means that a block group addresses 32 gigabytes instead of 128 megabytes,
|
||||
also shrinking the amount of file system overhead for metadata.
|
||||
|
||||
The administrator can set a block cluster size at mkfs time (which is
|
||||
stored in the s\_log\_cluster\_size field in the superblock); from then
|
||||
stored in the s_log_cluster_size field in the superblock); from then
|
||||
on, the block bitmaps track clusters, not individual blocks. This means
|
||||
that block groups can be several gigabytes in size (instead of just
|
||||
128MiB); however, the minimum allocation unit becomes a cluster, not a
|
||||
|
@ -9,15 +9,15 @@ group.
|
||||
The inode bitmap records which entries in the inode table are in use.
|
||||
|
||||
As with most bitmaps, one bit represents the usage status of one data
|
||||
block or inode table entry. This implies a block group size of 8 \*
|
||||
number\_of\_bytes\_in\_a\_logical\_block.
|
||||
block or inode table entry. This implies a block group size of 8 *
|
||||
number_of_bytes_in_a_logical_block.
|
||||
|
||||
NOTE: If ``BLOCK_UNINIT`` is set for a given block group, various parts
|
||||
of the kernel and e2fsprogs code pretends that the block bitmap contains
|
||||
zeros (i.e. all blocks in the group are free). However, it is not
|
||||
necessarily the case that no blocks are in use -- if ``meta_bg`` is set,
|
||||
the bitmaps and group descriptor live inside the group. Unfortunately,
|
||||
ext2fs\_test\_block\_bitmap2() will return '0' for those locations,
|
||||
ext2fs_test_block_bitmap2() will return '0' for those locations,
|
||||
which produces confusing debugfs output.
|
||||
|
||||
Inode Table
|
||||
|
@ -56,39 +56,39 @@ established that the super block and the group descriptor table, if
|
||||
present, will be at the beginning of the block group. The bitmaps and
|
||||
the inode table can be anywhere, and it is quite possible for the
|
||||
bitmaps to come after the inode table, or for both to be in different
|
||||
groups (flex\_bg). Leftover space is used for file data blocks, indirect
|
||||
groups (flex_bg). Leftover space is used for file data blocks, indirect
|
||||
block maps, extent tree blocks, and extended attributes.
|
||||
|
||||
Flexible Block Groups
|
||||
---------------------
|
||||
|
||||
Starting in ext4, there is a new feature called flexible block groups
|
||||
(flex\_bg). In a flex\_bg, several block groups are tied together as one
|
||||
(flex_bg). In a flex_bg, several block groups are tied together as one
|
||||
logical block group; the bitmap spaces and the inode table space in the
|
||||
first block group of the flex\_bg are expanded to include the bitmaps
|
||||
and inode tables of all other block groups in the flex\_bg. For example,
|
||||
if the flex\_bg size is 4, then group 0 will contain (in order) the
|
||||
first block group of the flex_bg are expanded to include the bitmaps
|
||||
and inode tables of all other block groups in the flex_bg. For example,
|
||||
if the flex_bg size is 4, then group 0 will contain (in order) the
|
||||
superblock, group descriptors, data block bitmaps for groups 0-3, inode
|
||||
bitmaps for groups 0-3, inode tables for groups 0-3, and the remaining
|
||||
space in group 0 is for file data. The effect of this is to group the
|
||||
block group metadata close together for faster loading, and to enable
|
||||
large files to be continuous on disk. Backup copies of the superblock
|
||||
and group descriptors are always at the beginning of block groups, even
|
||||
if flex\_bg is enabled. The number of block groups that make up a
|
||||
flex\_bg is given by 2 ^ ``sb.s_log_groups_per_flex``.
|
||||
if flex_bg is enabled. The number of block groups that make up a
|
||||
flex_bg is given by 2 ^ ``sb.s_log_groups_per_flex``.
|
||||
|
||||
Meta Block Groups
|
||||
-----------------
|
||||
|
||||
Without the option META\_BG, for safety concerns, all block group
|
||||
Without the option META_BG, for safety concerns, all block group
|
||||
descriptors copies are kept in the first block group. Given the default
|
||||
128MiB(2^27 bytes) block group size and 64-byte group descriptors, ext4
|
||||
can have at most 2^27/64 = 2^21 block groups. This limits the entire
|
||||
filesystem size to 2^21 * 2^27 = 2^48bytes or 256TiB.
|
||||
|
||||
The solution to this problem is to use the metablock group feature
|
||||
(META\_BG), which is already in ext3 for all 2.6 releases. With the
|
||||
META\_BG feature, ext4 filesystems are partitioned into many metablock
|
||||
(META_BG), which is already in ext3 for all 2.6 releases. With the
|
||||
META_BG feature, ext4 filesystems are partitioned into many metablock
|
||||
groups. Each metablock group is a cluster of block groups whose group
|
||||
descriptor structures can be stored in a single disk block. For ext4
|
||||
filesystems with 4 KB block size, a single metablock group partition
|
||||
@ -110,7 +110,7 @@ bytes, a meta-block group contains 32 block groups for filesystems with
|
||||
a 1KB block size, and 128 block groups for filesystems with a 4KB
|
||||
blocksize. Filesystems can either be created using this new block group
|
||||
descriptor layout, or existing filesystems can be resized on-line, and
|
||||
the field s\_first\_meta\_bg in the superblock will indicate the first
|
||||
the field s_first_meta_bg in the superblock will indicate the first
|
||||
block group using this new layout.
|
||||
|
||||
Please see an important note about ``BLOCK_UNINIT`` in the section about
|
||||
@ -121,15 +121,15 @@ Lazy Block Group Initialization
|
||||
|
||||
A new feature for ext4 are three block group descriptor flags that
|
||||
enable mkfs to skip initializing other parts of the block group
|
||||
metadata. Specifically, the INODE\_UNINIT and BLOCK\_UNINIT flags mean
|
||||
metadata. Specifically, the INODE_UNINIT and BLOCK_UNINIT flags mean
|
||||
that the inode and block bitmaps for that group can be calculated and
|
||||
therefore the on-disk bitmap blocks are not initialized. This is
|
||||
generally the case for an empty block group or a block group containing
|
||||
only fixed-location block group metadata. The INODE\_ZEROED flag means
|
||||
only fixed-location block group metadata. The INODE_ZEROED flag means
|
||||
that the inode table has been initialized; mkfs will unset this flag and
|
||||
rely on the kernel to initialize the inode tables in the background.
|
||||
|
||||
By not writing zeroes to the bitmaps and inode table, mkfs time is
|
||||
reduced considerably. Note the feature flag is RO\_COMPAT\_GDT\_CSUM,
|
||||
but the dumpe2fs output prints this as “uninit\_bg”. They are the same
|
||||
reduced considerably. Note the feature flag is RO_COMPAT_GDT_CSUM,
|
||||
but the dumpe2fs output prints this as “uninit_bg”. They are the same
|
||||
thing.
|
||||
|
@ -1,7 +1,7 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| i.i\_block Offset | Where It Points |
|
||||
| i.i_block Offset | Where It Points |
|
||||
+=====================+==============================================================================================================================================================================================================================+
|
||||
| 0 to 11 | Direct map to file blocks 0 to 11. |
|
||||
+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
@ -4,7 +4,7 @@ Checksums
|
||||
---------
|
||||
|
||||
Starting in early 2012, metadata checksums were added to all major ext4
|
||||
and jbd2 data structures. The associated feature flag is metadata\_csum.
|
||||
and jbd2 data structures. The associated feature flag is metadata_csum.
|
||||
The desired checksum algorithm is indicated in the superblock, though as
|
||||
of October 2012 the only supported algorithm is crc32c. Some data
|
||||
structures did not have space to fit a full 32-bit checksum, so only the
|
||||
@ -20,7 +20,7 @@ encounters directory blocks that lack sufficient empty space to add a
|
||||
checksum, it will request that you run ``e2fsck -D`` to have the
|
||||
directories rebuilt with checksums. This has the added benefit of
|
||||
removing slack space from the directory files and rebalancing the htree
|
||||
indexes. If you \_ignore\_ this step, your directories will not be
|
||||
indexes. If you _ignore_ this step, your directories will not be
|
||||
protected by a checksum!
|
||||
|
||||
The following table describes the data elements that go into each type
|
||||
@ -35,39 +35,39 @@ of checksum. The checksum function is whatever the superblock describes
|
||||
- Length
|
||||
- Ingredients
|
||||
* - Superblock
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- The entire superblock up to the checksum field. The UUID lives inside
|
||||
the superblock.
|
||||
* - MMP
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + the entire MMP block up to the checksum field.
|
||||
* - Extended Attributes
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + the entire extended attribute block. The checksum field is set to
|
||||
zero.
|
||||
* - Directory Entries
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + inode number + inode generation + the directory block up to the
|
||||
fake entry enclosing the checksum field.
|
||||
* - HTREE Nodes
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + inode number + inode generation + all valid extents + HTREE tail.
|
||||
The checksum field is set to zero.
|
||||
* - Extents
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + inode number + inode generation + the entire extent block up to
|
||||
the checksum field.
|
||||
* - Bitmaps
|
||||
- \_\_le32 or \_\_le16
|
||||
- __le32 or __le16
|
||||
- UUID + the entire bitmap. Checksums are stored in the group descriptor,
|
||||
and truncated if the group descriptor size is 32 bytes (i.e. ^64bit)
|
||||
* - Inodes
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + inode number + inode generation + the entire inode. The checksum
|
||||
field is set to zero. Each inode has its own checksum.
|
||||
* - Group Descriptors
|
||||
- \_\_le16
|
||||
- If metadata\_csum, then UUID + group number + the entire descriptor;
|
||||
else if gdt\_csum, then crc16(UUID + group number + the entire
|
||||
- __le16
|
||||
- If metadata_csum, then UUID + group number + the entire descriptor;
|
||||
else if gdt_csum, then crc16(UUID + group number + the entire
|
||||
descriptor). In all cases, only the lower 16 bits are stored.
|
||||
|
||||
|
@ -42,24 +42,24 @@ is at most 263 bytes long, though on disk you'll need to reference
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- inode
|
||||
- Number of the inode that this directory entry points to.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- rec\_len
|
||||
- __le16
|
||||
- rec_len
|
||||
- Length of this directory entry. Must be a multiple of 4.
|
||||
* - 0x6
|
||||
- \_\_le16
|
||||
- name\_len
|
||||
- __le16
|
||||
- name_len
|
||||
- Length of the file name.
|
||||
* - 0x8
|
||||
- char
|
||||
- name[EXT4\_NAME\_LEN]
|
||||
- name[EXT4_NAME_LEN]
|
||||
- File name.
|
||||
|
||||
Since file names cannot be longer than 255 bytes, the new directory
|
||||
entry format shortens the name\_len field and uses the space for a file
|
||||
entry format shortens the name_len field and uses the space for a file
|
||||
type flag, probably to avoid having to load every inode during directory
|
||||
tree traversal. This format is ``ext4_dir_entry_2``, which is at most
|
||||
263 bytes long, though on disk you'll need to reference
|
||||
@ -74,24 +74,24 @@ tree traversal. This format is ``ext4_dir_entry_2``, which is at most
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- inode
|
||||
- Number of the inode that this directory entry points to.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- rec\_len
|
||||
- __le16
|
||||
- rec_len
|
||||
- Length of this directory entry.
|
||||
* - 0x6
|
||||
- \_\_u8
|
||||
- name\_len
|
||||
- __u8
|
||||
- name_len
|
||||
- Length of the file name.
|
||||
* - 0x7
|
||||
- \_\_u8
|
||||
- file\_type
|
||||
- __u8
|
||||
- file_type
|
||||
- File type code, see ftype_ table below.
|
||||
* - 0x8
|
||||
- char
|
||||
- name[EXT4\_NAME\_LEN]
|
||||
- name[EXT4_NAME_LEN]
|
||||
- File name.
|
||||
|
||||
.. _ftype:
|
||||
@ -137,19 +137,19 @@ entry uses this extension, it may be up to 271 bytes.
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- hash
|
||||
- The hash of the directory name
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- minor\_hash
|
||||
- __le32
|
||||
- minor_hash
|
||||
- The minor hash of the directory name
|
||||
|
||||
|
||||
In order to add checksums to these classic directory blocks, a phony
|
||||
``struct ext4_dir_entry`` is placed at the end of each leaf block to
|
||||
hold the checksum. The directory entry is 12 bytes long. The inode
|
||||
number and name\_len fields are set to zero to fool old software into
|
||||
number and name_len fields are set to zero to fool old software into
|
||||
ignoring an apparently empty directory entry, and the checksum is stored
|
||||
in the place where the name normally goes. The structure is
|
||||
``struct ext4_dir_entry_tail``:
|
||||
@ -163,24 +163,24 @@ in the place where the name normally goes. The structure is
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- det\_reserved\_zero1
|
||||
- __le32
|
||||
- det_reserved_zero1
|
||||
- Inode number, which must be zero.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- det\_rec\_len
|
||||
- __le16
|
||||
- det_rec_len
|
||||
- Length of this directory entry, which must be 12.
|
||||
* - 0x6
|
||||
- \_\_u8
|
||||
- det\_reserved\_zero2
|
||||
- __u8
|
||||
- det_reserved_zero2
|
||||
- Length of the file name, which must be zero.
|
||||
* - 0x7
|
||||
- \_\_u8
|
||||
- det\_reserved\_ft
|
||||
- __u8
|
||||
- det_reserved_ft
|
||||
- File type, which must be 0xDE.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- det\_checksum
|
||||
- __le32
|
||||
- det_checksum
|
||||
- Directory leaf block checksum.
|
||||
|
||||
The leaf directory block checksum is calculated against the FS UUID, the
|
||||
@ -194,7 +194,7 @@ Hash Tree Directories
|
||||
A linear array of directory entries isn't great for performance, so a
|
||||
new feature was added to ext3 to provide a faster (but peculiar)
|
||||
balanced tree keyed off a hash of the directory entry name. If the
|
||||
EXT4\_INDEX\_FL (0x1000) flag is set in the inode, this directory uses a
|
||||
EXT4_INDEX_FL (0x1000) flag is set in the inode, this directory uses a
|
||||
hashed btree (htree) to organize and find directory entries. For
|
||||
backwards read-only compatibility with ext2, this tree is actually
|
||||
hidden inside the directory file, masquerading as “empty” directory data
|
||||
@ -206,14 +206,14 @@ rest of the directory block is empty so that it moves on.
|
||||
The root of the tree always lives in the first data block of the
|
||||
directory. By ext2 custom, the '.' and '..' entries must appear at the
|
||||
beginning of this first block, so they are put here as two
|
||||
``struct ext4_dir_entry_2``\ s and not stored in the tree. The rest of
|
||||
``struct ext4_dir_entry_2`` s and not stored in the tree. The rest of
|
||||
the root node contains metadata about the tree and finally a hash->block
|
||||
map to find nodes that are lower in the htree. If
|
||||
``dx_root.info.indirect_levels`` is non-zero then the htree has two
|
||||
levels; the data block pointed to by the root node's map is an interior
|
||||
node, which is indexed by a minor hash. Interior nodes in this tree
|
||||
contains a zeroed out ``struct ext4_dir_entry_2`` followed by a
|
||||
minor\_hash->block map to find leafe nodes. Leaf nodes contain a linear
|
||||
minor_hash->block map to find leafe nodes. Leaf nodes contain a linear
|
||||
array of all ``struct ext4_dir_entry_2``; all of these entries
|
||||
(presumably) hash to the same value. If there is an overflow, the
|
||||
entries simply overflow into the next leaf node, and the
|
||||
@ -245,83 +245,83 @@ of a data block:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- dot.inode
|
||||
- inode number of this directory.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- dot.rec\_len
|
||||
- __le16
|
||||
- dot.rec_len
|
||||
- Length of this record, 12.
|
||||
* - 0x6
|
||||
- u8
|
||||
- dot.name\_len
|
||||
- dot.name_len
|
||||
- Length of the name, 1.
|
||||
* - 0x7
|
||||
- u8
|
||||
- dot.file\_type
|
||||
- dot.file_type
|
||||
- File type of this entry, 0x2 (directory) (if the feature flag is set).
|
||||
* - 0x8
|
||||
- char
|
||||
- dot.name[4]
|
||||
- “.\\0\\0\\0”
|
||||
- “.\0\0\0”
|
||||
* - 0xC
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- dotdot.inode
|
||||
- inode number of parent directory.
|
||||
* - 0x10
|
||||
- \_\_le16
|
||||
- dotdot.rec\_len
|
||||
- block\_size - 12. The record length is long enough to cover all htree
|
||||
- __le16
|
||||
- dotdot.rec_len
|
||||
- block_size - 12. The record length is long enough to cover all htree
|
||||
data.
|
||||
* - 0x12
|
||||
- u8
|
||||
- dotdot.name\_len
|
||||
- dotdot.name_len
|
||||
- Length of the name, 2.
|
||||
* - 0x13
|
||||
- u8
|
||||
- dotdot.file\_type
|
||||
- dotdot.file_type
|
||||
- File type of this entry, 0x2 (directory) (if the feature flag is set).
|
||||
* - 0x14
|
||||
- char
|
||||
- dotdot\_name[4]
|
||||
- “..\\0\\0”
|
||||
- dotdot_name[4]
|
||||
- “..\0\0”
|
||||
* - 0x18
|
||||
- \_\_le32
|
||||
- struct dx\_root\_info.reserved\_zero
|
||||
- __le32
|
||||
- struct dx_root_info.reserved_zero
|
||||
- Zero.
|
||||
* - 0x1C
|
||||
- u8
|
||||
- struct dx\_root\_info.hash\_version
|
||||
- struct dx_root_info.hash_version
|
||||
- Hash type, see dirhash_ table below.
|
||||
* - 0x1D
|
||||
- u8
|
||||
- struct dx\_root\_info.info\_length
|
||||
- struct dx_root_info.info_length
|
||||
- Length of the tree information, 0x8.
|
||||
* - 0x1E
|
||||
- u8
|
||||
- struct dx\_root\_info.indirect\_levels
|
||||
- Depth of the htree. Cannot be larger than 3 if the INCOMPAT\_LARGEDIR
|
||||
- struct dx_root_info.indirect_levels
|
||||
- Depth of the htree. Cannot be larger than 3 if the INCOMPAT_LARGEDIR
|
||||
feature is set; cannot be larger than 2 otherwise.
|
||||
* - 0x1F
|
||||
- u8
|
||||
- struct dx\_root\_info.unused\_flags
|
||||
- struct dx_root_info.unused_flags
|
||||
-
|
||||
* - 0x20
|
||||
- \_\_le16
|
||||
- __le16
|
||||
- limit
|
||||
- Maximum number of dx\_entries that can follow this header, plus 1 for
|
||||
- Maximum number of dx_entries that can follow this header, plus 1 for
|
||||
the header itself.
|
||||
* - 0x22
|
||||
- \_\_le16
|
||||
- __le16
|
||||
- count
|
||||
- Actual number of dx\_entries that follow this header, plus 1 for the
|
||||
- Actual number of dx_entries that follow this header, plus 1 for the
|
||||
header itself.
|
||||
* - 0x24
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- block
|
||||
- The block number (within the directory file) that goes with hash=0.
|
||||
* - 0x28
|
||||
- struct dx\_entry
|
||||
- struct dx_entry
|
||||
- entries[0]
|
||||
- As many 8-byte ``struct dx_entry`` as fits in the rest of the data block.
|
||||
|
||||
@ -362,38 +362,38 @@ also the full length of a data block:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- fake.inode
|
||||
- Zero, to make it look like this entry is not in use.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- fake.rec\_len
|
||||
- The size of the block, in order to hide all of the dx\_node data.
|
||||
- __le16
|
||||
- fake.rec_len
|
||||
- The size of the block, in order to hide all of the dx_node data.
|
||||
* - 0x6
|
||||
- u8
|
||||
- name\_len
|
||||
- name_len
|
||||
- Zero. There is no name for this “unused” directory entry.
|
||||
* - 0x7
|
||||
- u8
|
||||
- file\_type
|
||||
- file_type
|
||||
- Zero. There is no file type for this “unused” directory entry.
|
||||
* - 0x8
|
||||
- \_\_le16
|
||||
- __le16
|
||||
- limit
|
||||
- Maximum number of dx\_entries that can follow this header, plus 1 for
|
||||
- Maximum number of dx_entries that can follow this header, plus 1 for
|
||||
the header itself.
|
||||
* - 0xA
|
||||
- \_\_le16
|
||||
- __le16
|
||||
- count
|
||||
- Actual number of dx\_entries that follow this header, plus 1 for the
|
||||
- Actual number of dx_entries that follow this header, plus 1 for the
|
||||
header itself.
|
||||
* - 0xE
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- block
|
||||
- The block number (within the directory file) that goes with the lowest
|
||||
hash value of this block. This value is stored in the parent block.
|
||||
* - 0x12
|
||||
- struct dx\_entry
|
||||
- struct dx_entry
|
||||
- entries[0]
|
||||
- As many 8-byte ``struct dx_entry`` as fits in the rest of the data block.
|
||||
|
||||
@ -410,11 +410,11 @@ long:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- hash
|
||||
- Hash code.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- block
|
||||
- Block number (within the directory file, not filesystem blocks) of the
|
||||
next node in the htree.
|
||||
@ -423,13 +423,13 @@ long:
|
||||
author.)
|
||||
|
||||
If metadata checksums are enabled, the last 8 bytes of the directory
|
||||
block (precisely the length of one dx\_entry) are used to store a
|
||||
block (precisely the length of one dx_entry) are used to store a
|
||||
``struct dx_tail``, which contains the checksum. The ``limit`` and
|
||||
``count`` entries in the dx\_root/dx\_node structures are adjusted as
|
||||
necessary to fit the dx\_tail into the block. If there is no space for
|
||||
the dx\_tail, the user is notified to run e2fsck -D to rebuild the
|
||||
``count`` entries in the dx_root/dx_node structures are adjusted as
|
||||
necessary to fit the dx_tail into the block. If there is no space for
|
||||
the dx_tail, the user is notified to run e2fsck -D to rebuild the
|
||||
directory index (which will ensure that there's space for the checksum.
|
||||
The dx\_tail structure is 8 bytes long and looks like this:
|
||||
The dx_tail structure is 8 bytes long and looks like this:
|
||||
|
||||
.. list-table::
|
||||
:widths: 8 8 24 40
|
||||
@ -441,13 +441,13 @@ The dx\_tail structure is 8 bytes long and looks like this:
|
||||
- Description
|
||||
* - 0x0
|
||||
- u32
|
||||
- dt\_reserved
|
||||
- dt_reserved
|
||||
- Zero.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- dt\_checksum
|
||||
- __le32
|
||||
- dt_checksum
|
||||
- Checksum of the htree directory block.
|
||||
|
||||
The checksum is calculated against the FS UUID, the htree index header
|
||||
(dx\_root or dx\_node), all of the htree indices (dx\_entry) that are in
|
||||
use, and the tail block (dx\_tail).
|
||||
(dx_root or dx_node), all of the htree indices (dx_entry) that are in
|
||||
use, and the tail block (dx_tail).
|
||||
|
@ -5,14 +5,14 @@ Large Extended Attribute Values
|
||||
|
||||
To enable ext4 to store extended attribute values that do not fit in the
|
||||
inode or in the single extended attribute block attached to an inode,
|
||||
the EA\_INODE feature allows us to store the value in the data blocks of
|
||||
the EA_INODE feature allows us to store the value in the data blocks of
|
||||
a regular file inode. This “EA inode” is linked only from the extended
|
||||
attribute name index and must not appear in a directory entry. The
|
||||
inode's i\_atime field is used to store a checksum of the xattr value;
|
||||
and i\_ctime/i\_version store a 64-bit reference count, which enables
|
||||
inode's i_atime field is used to store a checksum of the xattr value;
|
||||
and i_ctime/i_version store a 64-bit reference count, which enables
|
||||
sharing of large xattr values between multiple owning inodes. For
|
||||
backward compatibility with older versions of this feature, the
|
||||
i\_mtime/i\_generation *may* store a back-reference to the inode number
|
||||
and i\_generation of the **one** owning inode (in cases where the EA
|
||||
i_mtime/i_generation *may* store a back-reference to the inode number
|
||||
and i_generation of the **one** owning inode (in cases where the EA
|
||||
inode is not referenced by multiple inodes) to verify that the EA inode
|
||||
is the correct one being accessed.
|
||||
|
@ -7,34 +7,34 @@ Each block group on the filesystem has one of these descriptors
|
||||
associated with it. As noted in the Layout section above, the group
|
||||
descriptors (if present) are the second item in the block group. The
|
||||
standard configuration is for each block group to contain a full copy of
|
||||
the block group descriptor table unless the sparse\_super feature flag
|
||||
the block group descriptor table unless the sparse_super feature flag
|
||||
is set.
|
||||
|
||||
Notice how the group descriptor records the location of both bitmaps and
|
||||
the inode table (i.e. they can float). This means that within a block
|
||||
group, the only data structures with fixed locations are the superblock
|
||||
and the group descriptor table. The flex\_bg mechanism uses this
|
||||
and the group descriptor table. The flex_bg mechanism uses this
|
||||
property to group several block groups into a flex group and lay out all
|
||||
of the groups' bitmaps and inode tables into one long run in the first
|
||||
group of the flex group.
|
||||
|
||||
If the meta\_bg feature flag is set, then several block groups are
|
||||
grouped together into a meta group. Note that in the meta\_bg case,
|
||||
If the meta_bg feature flag is set, then several block groups are
|
||||
grouped together into a meta group. Note that in the meta_bg case,
|
||||
however, the first and last two block groups within the larger meta
|
||||
group contain only group descriptors for the groups inside the meta
|
||||
group.
|
||||
|
||||
flex\_bg and meta\_bg do not appear to be mutually exclusive features.
|
||||
flex_bg and meta_bg do not appear to be mutually exclusive features.
|
||||
|
||||
In ext2, ext3, and ext4 (when the 64bit feature is not enabled), the
|
||||
block group descriptor was only 32 bytes long and therefore ends at
|
||||
bg\_checksum. On an ext4 filesystem with the 64bit feature enabled, the
|
||||
bg_checksum. On an ext4 filesystem with the 64bit feature enabled, the
|
||||
block group descriptor expands to at least the 64 bytes described below;
|
||||
the size is stored in the superblock.
|
||||
|
||||
If gdt\_csum is set and metadata\_csum is not set, the block group
|
||||
If gdt_csum is set and metadata_csum is not set, the block group
|
||||
checksum is the crc16 of the FS UUID, the group number, and the group
|
||||
descriptor structure. If metadata\_csum is set, then the block group
|
||||
descriptor structure. If metadata_csum is set, then the block group
|
||||
checksum is the lower 16 bits of the checksum of the FS UUID, the group
|
||||
number, and the group descriptor structure. Both block and inode bitmap
|
||||
checksums are calculated against the FS UUID, the group number, and the
|
||||
@ -51,59 +51,59 @@ The block group descriptor is laid out in ``struct ext4_group_desc``.
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- bg\_block\_bitmap\_lo
|
||||
- __le32
|
||||
- bg_block_bitmap_lo
|
||||
- Lower 32-bits of location of block bitmap.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- bg\_inode\_bitmap\_lo
|
||||
- __le32
|
||||
- bg_inode_bitmap_lo
|
||||
- Lower 32-bits of location of inode bitmap.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- bg\_inode\_table\_lo
|
||||
- __le32
|
||||
- bg_inode_table_lo
|
||||
- Lower 32-bits of location of inode table.
|
||||
* - 0xC
|
||||
- \_\_le16
|
||||
- bg\_free\_blocks\_count\_lo
|
||||
- __le16
|
||||
- bg_free_blocks_count_lo
|
||||
- Lower 16-bits of free block count.
|
||||
* - 0xE
|
||||
- \_\_le16
|
||||
- bg\_free\_inodes\_count\_lo
|
||||
- __le16
|
||||
- bg_free_inodes_count_lo
|
||||
- Lower 16-bits of free inode count.
|
||||
* - 0x10
|
||||
- \_\_le16
|
||||
- bg\_used\_dirs\_count\_lo
|
||||
- __le16
|
||||
- bg_used_dirs_count_lo
|
||||
- Lower 16-bits of directory count.
|
||||
* - 0x12
|
||||
- \_\_le16
|
||||
- bg\_flags
|
||||
- __le16
|
||||
- bg_flags
|
||||
- Block group flags. See the bgflags_ table below.
|
||||
* - 0x14
|
||||
- \_\_le32
|
||||
- bg\_exclude\_bitmap\_lo
|
||||
- __le32
|
||||
- bg_exclude_bitmap_lo
|
||||
- Lower 32-bits of location of snapshot exclusion bitmap.
|
||||
* - 0x18
|
||||
- \_\_le16
|
||||
- bg\_block\_bitmap\_csum\_lo
|
||||
- __le16
|
||||
- bg_block_bitmap_csum_lo
|
||||
- Lower 16-bits of the block bitmap checksum.
|
||||
* - 0x1A
|
||||
- \_\_le16
|
||||
- bg\_inode\_bitmap\_csum\_lo
|
||||
- __le16
|
||||
- bg_inode_bitmap_csum_lo
|
||||
- Lower 16-bits of the inode bitmap checksum.
|
||||
* - 0x1C
|
||||
- \_\_le16
|
||||
- bg\_itable\_unused\_lo
|
||||
- __le16
|
||||
- bg_itable_unused_lo
|
||||
- Lower 16-bits of unused inode count. If set, we needn't scan past the
|
||||
``(sb.s_inodes_per_group - gdt.bg_itable_unused)``\ th entry in the
|
||||
``(sb.s_inodes_per_group - gdt.bg_itable_unused)`` th entry in the
|
||||
inode table for this group.
|
||||
* - 0x1E
|
||||
- \_\_le16
|
||||
- bg\_checksum
|
||||
- Group descriptor checksum; crc16(sb\_uuid+group\_num+bg\_desc) if the
|
||||
RO\_COMPAT\_GDT\_CSUM feature is set, or
|
||||
crc32c(sb\_uuid+group\_num+bg\_desc) & 0xFFFF if the
|
||||
RO\_COMPAT\_METADATA\_CSUM feature is set. The bg\_checksum
|
||||
field in bg\_desc is skipped when calculating crc16 checksum,
|
||||
- __le16
|
||||
- bg_checksum
|
||||
- Group descriptor checksum; crc16(sb_uuid+group_num+bg_desc) if the
|
||||
RO_COMPAT_GDT_CSUM feature is set, or
|
||||
crc32c(sb_uuid+group_num+bg_desc) & 0xFFFF if the
|
||||
RO_COMPAT_METADATA_CSUM feature is set. The bg_checksum
|
||||
field in bg_desc is skipped when calculating crc16 checksum,
|
||||
and set to zero if crc32c checksum is used.
|
||||
* -
|
||||
-
|
||||
@ -111,48 +111,48 @@ The block group descriptor is laid out in ``struct ext4_group_desc``.
|
||||
- These fields only exist if the 64bit feature is enabled and s_desc_size
|
||||
> 32.
|
||||
* - 0x20
|
||||
- \_\_le32
|
||||
- bg\_block\_bitmap\_hi
|
||||
- __le32
|
||||
- bg_block_bitmap_hi
|
||||
- Upper 32-bits of location of block bitmap.
|
||||
* - 0x24
|
||||
- \_\_le32
|
||||
- bg\_inode\_bitmap\_hi
|
||||
- __le32
|
||||
- bg_inode_bitmap_hi
|
||||
- Upper 32-bits of location of inodes bitmap.
|
||||
* - 0x28
|
||||
- \_\_le32
|
||||
- bg\_inode\_table\_hi
|
||||
- __le32
|
||||
- bg_inode_table_hi
|
||||
- Upper 32-bits of location of inodes table.
|
||||
* - 0x2C
|
||||
- \_\_le16
|
||||
- bg\_free\_blocks\_count\_hi
|
||||
- __le16
|
||||
- bg_free_blocks_count_hi
|
||||
- Upper 16-bits of free block count.
|
||||
* - 0x2E
|
||||
- \_\_le16
|
||||
- bg\_free\_inodes\_count\_hi
|
||||
- __le16
|
||||
- bg_free_inodes_count_hi
|
||||
- Upper 16-bits of free inode count.
|
||||
* - 0x30
|
||||
- \_\_le16
|
||||
- bg\_used\_dirs\_count\_hi
|
||||
- __le16
|
||||
- bg_used_dirs_count_hi
|
||||
- Upper 16-bits of directory count.
|
||||
* - 0x32
|
||||
- \_\_le16
|
||||
- bg\_itable\_unused\_hi
|
||||
- __le16
|
||||
- bg_itable_unused_hi
|
||||
- Upper 16-bits of unused inode count.
|
||||
* - 0x34
|
||||
- \_\_le32
|
||||
- bg\_exclude\_bitmap\_hi
|
||||
- __le32
|
||||
- bg_exclude_bitmap_hi
|
||||
- Upper 32-bits of location of snapshot exclusion bitmap.
|
||||
* - 0x38
|
||||
- \_\_le16
|
||||
- bg\_block\_bitmap\_csum\_hi
|
||||
- __le16
|
||||
- bg_block_bitmap_csum_hi
|
||||
- Upper 16-bits of the block bitmap checksum.
|
||||
* - 0x3A
|
||||
- \_\_le16
|
||||
- bg\_inode\_bitmap\_csum\_hi
|
||||
- __le16
|
||||
- bg_inode_bitmap_csum_hi
|
||||
- Upper 16-bits of the inode bitmap checksum.
|
||||
* - 0x3C
|
||||
- \_\_u32
|
||||
- bg\_reserved
|
||||
- __u32
|
||||
- bg_reserved
|
||||
- Padding to 64 bytes.
|
||||
|
||||
.. _bgflags:
|
||||
@ -166,8 +166,8 @@ Block group flags can be any combination of the following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- inode table and bitmap are not initialized (EXT4\_BG\_INODE\_UNINIT).
|
||||
- inode table and bitmap are not initialized (EXT4_BG_INODE_UNINIT).
|
||||
* - 0x2
|
||||
- block bitmap is not initialized (EXT4\_BG\_BLOCK\_UNINIT).
|
||||
- block bitmap is not initialized (EXT4_BG_BLOCK_UNINIT).
|
||||
* - 0x4
|
||||
- inode table is zeroed (EXT4\_BG\_INODE\_ZEROED).
|
||||
- inode table is zeroed (EXT4_BG_INODE_ZEROED).
|
||||
|
@ -1,6 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
The Contents of inode.i\_block
|
||||
The Contents of inode.i_block
|
||||
------------------------------
|
||||
|
||||
Depending on the type of file an inode describes, the 60 bytes of
|
||||
@ -47,7 +47,7 @@ In ext4, the file to logical block map has been replaced with an extent
|
||||
tree. Under the old scheme, allocating a contiguous run of 1,000 blocks
|
||||
requires an indirect block to map all 1,000 entries; with extents, the
|
||||
mapping is reduced to a single ``struct ext4_extent`` with
|
||||
``ee_len = 1000``. If flex\_bg is enabled, it is possible to allocate
|
||||
``ee_len = 1000``. If flex_bg is enabled, it is possible to allocate
|
||||
very large files with a single extent, at a considerable reduction in
|
||||
metadata block use, and some improvement in disk efficiency. The inode
|
||||
must have the extents flag (0x80000) flag set for this feature to be in
|
||||
@ -76,28 +76,28 @@ which is 12 bytes long:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le16
|
||||
- eh\_magic
|
||||
- __le16
|
||||
- eh_magic
|
||||
- Magic number, 0xF30A.
|
||||
* - 0x2
|
||||
- \_\_le16
|
||||
- eh\_entries
|
||||
- __le16
|
||||
- eh_entries
|
||||
- Number of valid entries following the header.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- eh\_max
|
||||
- __le16
|
||||
- eh_max
|
||||
- Maximum number of entries that could follow the header.
|
||||
* - 0x6
|
||||
- \_\_le16
|
||||
- eh\_depth
|
||||
- __le16
|
||||
- eh_depth
|
||||
- Depth of this extent node in the extent tree. 0 = this extent node
|
||||
points to data blocks; otherwise, this extent node points to other
|
||||
extent nodes. The extent tree can be at most 5 levels deep: a logical
|
||||
block number can be at most ``2^32``, and the smallest ``n`` that
|
||||
satisfies ``4*(((blocksize - 12)/12)^n) >= 2^32`` is 5.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- eh\_generation
|
||||
- __le32
|
||||
- eh_generation
|
||||
- Generation of the tree. (Used by Lustre, but not standard ext4).
|
||||
|
||||
Internal nodes of the extent tree, also known as index nodes, are
|
||||
@ -112,22 +112,22 @@ recorded as ``struct ext4_extent_idx``, and are 12 bytes long:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- ei\_block
|
||||
- __le32
|
||||
- ei_block
|
||||
- This index node covers file blocks from 'block' onward.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- ei\_leaf\_lo
|
||||
- __le32
|
||||
- ei_leaf_lo
|
||||
- Lower 32-bits of the block number of the extent node that is the next
|
||||
level lower in the tree. The tree node pointed to can be either another
|
||||
internal node or a leaf node, described below.
|
||||
* - 0x8
|
||||
- \_\_le16
|
||||
- ei\_leaf\_hi
|
||||
- __le16
|
||||
- ei_leaf_hi
|
||||
- Upper 16-bits of the previous field.
|
||||
* - 0xA
|
||||
- \_\_u16
|
||||
- ei\_unused
|
||||
- __u16
|
||||
- ei_unused
|
||||
-
|
||||
|
||||
Leaf nodes of the extent tree are recorded as ``struct ext4_extent``,
|
||||
@ -142,24 +142,24 @@ and are also 12 bytes long:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- ee\_block
|
||||
- __le32
|
||||
- ee_block
|
||||
- First file block number that this extent covers.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- ee\_len
|
||||
- __le16
|
||||
- ee_len
|
||||
- Number of blocks covered by extent. If the value of this field is <=
|
||||
32768, the extent is initialized. If the value of the field is > 32768,
|
||||
the extent is uninitialized and the actual extent length is ``ee_len`` -
|
||||
32768. Therefore, the maximum length of a initialized extent is 32768
|
||||
blocks, and the maximum length of an uninitialized extent is 32767.
|
||||
* - 0x6
|
||||
- \_\_le16
|
||||
- ee\_start\_hi
|
||||
- __le16
|
||||
- ee_start_hi
|
||||
- Upper 16-bits of the block number to which this extent points.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- ee\_start\_lo
|
||||
- __le32
|
||||
- ee_start_lo
|
||||
- Lower 32-bits of the block number to which this extent points.
|
||||
|
||||
Prior to the introduction of metadata checksums, the extent header +
|
||||
@ -182,8 +182,8 @@ including) the checksum itself.
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- eb\_checksum
|
||||
- __le32
|
||||
- eb_checksum
|
||||
- Checksum of the extent block, crc32c(uuid+inum+igeneration+extentblock)
|
||||
|
||||
Inline Data
|
||||
|
@ -11,12 +11,12 @@ file is smaller than 60 bytes, then the data are stored inline in
|
||||
attribute space, then it might be found as an extended attribute
|
||||
“system.data” within the inode body (“ibody EA”). This of course
|
||||
constrains the amount of extended attributes one can attach to an inode.
|
||||
If the data size increases beyond i\_block + ibody EA, a regular block
|
||||
If the data size increases beyond i_block + ibody EA, a regular block
|
||||
is allocated and the contents moved to that block.
|
||||
|
||||
Pending a change to compact the extended attribute key used to store
|
||||
inline data, one ought to be able to store 160 bytes of data in a
|
||||
256-byte inode (as of June 2015, when i\_extra\_isize is 28). Prior to
|
||||
256-byte inode (as of June 2015, when i_extra_isize is 28). Prior to
|
||||
that, the limit was 156 bytes due to inefficient use of inode space.
|
||||
|
||||
The inline data feature requires the presence of an extended attribute
|
||||
@ -25,12 +25,12 @@ for “system.data”, even if the attribute value is zero length.
|
||||
Inline Directories
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The first four bytes of i\_block are the inode number of the parent
|
||||
The first four bytes of i_block are the inode number of the parent
|
||||
directory. Following that is a 56-byte space for an array of directory
|
||||
entries; see ``struct ext4_dir_entry``. If there is a “system.data”
|
||||
attribute in the inode body, the EA value is an array of
|
||||
``struct ext4_dir_entry`` as well. Note that for inline directories, the
|
||||
i\_block and EA space are treated as separate dirent blocks; directory
|
||||
i_block and EA space are treated as separate dirent blocks; directory
|
||||
entries cannot span the two.
|
||||
|
||||
Inline directory entries are not checksummed, as the inode checksum
|
||||
|
@ -38,138 +38,138 @@ The inode table entry is laid out in ``struct ext4_inode``.
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le16
|
||||
- i\_mode
|
||||
- __le16
|
||||
- i_mode
|
||||
- File mode. See the table i_mode_ below.
|
||||
* - 0x2
|
||||
- \_\_le16
|
||||
- i\_uid
|
||||
- __le16
|
||||
- i_uid
|
||||
- Lower 16-bits of Owner UID.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- i\_size\_lo
|
||||
- __le32
|
||||
- i_size_lo
|
||||
- Lower 32-bits of size in bytes.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- i\_atime
|
||||
- Last access time, in seconds since the epoch. However, if the EA\_INODE
|
||||
- __le32
|
||||
- i_atime
|
||||
- Last access time, in seconds since the epoch. However, if the EA_INODE
|
||||
inode flag is set, this inode stores an extended attribute value and
|
||||
this field contains the checksum of the value.
|
||||
* - 0xC
|
||||
- \_\_le32
|
||||
- i\_ctime
|
||||
- __le32
|
||||
- i_ctime
|
||||
- Last inode change time, in seconds since the epoch. However, if the
|
||||
EA\_INODE inode flag is set, this inode stores an extended attribute
|
||||
EA_INODE inode flag is set, this inode stores an extended attribute
|
||||
value and this field contains the lower 32 bits of the attribute value's
|
||||
reference count.
|
||||
* - 0x10
|
||||
- \_\_le32
|
||||
- i\_mtime
|
||||
- __le32
|
||||
- i_mtime
|
||||
- Last data modification time, in seconds since the epoch. However, if the
|
||||
EA\_INODE inode flag is set, this inode stores an extended attribute
|
||||
EA_INODE inode flag is set, this inode stores an extended attribute
|
||||
value and this field contains the number of the inode that owns the
|
||||
extended attribute.
|
||||
* - 0x14
|
||||
- \_\_le32
|
||||
- i\_dtime
|
||||
- __le32
|
||||
- i_dtime
|
||||
- Deletion Time, in seconds since the epoch.
|
||||
* - 0x18
|
||||
- \_\_le16
|
||||
- i\_gid
|
||||
- __le16
|
||||
- i_gid
|
||||
- Lower 16-bits of GID.
|
||||
* - 0x1A
|
||||
- \_\_le16
|
||||
- i\_links\_count
|
||||
- __le16
|
||||
- i_links_count
|
||||
- Hard link count. Normally, ext4 does not permit an inode to have more
|
||||
than 65,000 hard links. This applies to files as well as directories,
|
||||
which means that there cannot be more than 64,998 subdirectories in a
|
||||
directory (each subdirectory's '..' entry counts as a hard link, as does
|
||||
the '.' entry in the directory itself). With the DIR\_NLINK feature
|
||||
the '.' entry in the directory itself). With the DIR_NLINK feature
|
||||
enabled, ext4 supports more than 64,998 subdirectories by setting this
|
||||
field to 1 to indicate that the number of hard links is not known.
|
||||
* - 0x1C
|
||||
- \_\_le32
|
||||
- i\_blocks\_lo
|
||||
- Lower 32-bits of “block” count. If the huge\_file feature flag is not
|
||||
- __le32
|
||||
- i_blocks_lo
|
||||
- Lower 32-bits of “block” count. If the huge_file feature flag is not
|
||||
set on the filesystem, the file consumes ``i_blocks_lo`` 512-byte blocks
|
||||
on disk. If huge\_file is set and EXT4\_HUGE\_FILE\_FL is NOT set in
|
||||
on disk. If huge_file is set and EXT4_HUGE_FILE_FL is NOT set in
|
||||
``inode.i_flags``, then the file consumes ``i_blocks_lo + (i_blocks_hi
|
||||
<< 32)`` 512-byte blocks on disk. If huge\_file is set and
|
||||
EXT4\_HUGE\_FILE\_FL IS set in ``inode.i_flags``, then this file
|
||||
<< 32)`` 512-byte blocks on disk. If huge_file is set and
|
||||
EXT4_HUGE_FILE_FL IS set in ``inode.i_flags``, then this file
|
||||
consumes (``i_blocks_lo + i_blocks_hi`` << 32) filesystem blocks on
|
||||
disk.
|
||||
* - 0x20
|
||||
- \_\_le32
|
||||
- i\_flags
|
||||
- __le32
|
||||
- i_flags
|
||||
- Inode flags. See the table i_flags_ below.
|
||||
* - 0x24
|
||||
- 4 bytes
|
||||
- i\_osd1
|
||||
- i_osd1
|
||||
- See the table i_osd1_ for more details.
|
||||
* - 0x28
|
||||
- 60 bytes
|
||||
- i\_block[EXT4\_N\_BLOCKS=15]
|
||||
- Block map or extent tree. See the section “The Contents of inode.i\_block”.
|
||||
- i_block[EXT4_N_BLOCKS=15]
|
||||
- Block map or extent tree. See the section “The Contents of inode.i_block”.
|
||||
* - 0x64
|
||||
- \_\_le32
|
||||
- i\_generation
|
||||
- __le32
|
||||
- i_generation
|
||||
- File version (for NFS).
|
||||
* - 0x68
|
||||
- \_\_le32
|
||||
- i\_file\_acl\_lo
|
||||
- __le32
|
||||
- i_file_acl_lo
|
||||
- Lower 32-bits of extended attribute block. ACLs are of course one of
|
||||
many possible extended attributes; I think the name of this field is a
|
||||
result of the first use of extended attributes being for ACLs.
|
||||
* - 0x6C
|
||||
- \_\_le32
|
||||
- i\_size\_high / i\_dir\_acl
|
||||
- __le32
|
||||
- i_size_high / i_dir_acl
|
||||
- Upper 32-bits of file/directory size. In ext2/3 this field was named
|
||||
i\_dir\_acl, though it was usually set to zero and never used.
|
||||
i_dir_acl, though it was usually set to zero and never used.
|
||||
* - 0x70
|
||||
- \_\_le32
|
||||
- i\_obso\_faddr
|
||||
- __le32
|
||||
- i_obso_faddr
|
||||
- (Obsolete) fragment address.
|
||||
* - 0x74
|
||||
- 12 bytes
|
||||
- i\_osd2
|
||||
- i_osd2
|
||||
- See the table i_osd2_ for more details.
|
||||
* - 0x80
|
||||
- \_\_le16
|
||||
- i\_extra\_isize
|
||||
- __le16
|
||||
- i_extra_isize
|
||||
- Size of this inode - 128. Alternately, the size of the extended inode
|
||||
fields beyond the original ext2 inode, including this field.
|
||||
* - 0x82
|
||||
- \_\_le16
|
||||
- i\_checksum\_hi
|
||||
- __le16
|
||||
- i_checksum_hi
|
||||
- Upper 16-bits of the inode checksum.
|
||||
* - 0x84
|
||||
- \_\_le32
|
||||
- i\_ctime\_extra
|
||||
- __le32
|
||||
- i_ctime_extra
|
||||
- Extra change time bits. This provides sub-second precision. See Inode
|
||||
Timestamps section.
|
||||
* - 0x88
|
||||
- \_\_le32
|
||||
- i\_mtime\_extra
|
||||
- __le32
|
||||
- i_mtime_extra
|
||||
- Extra modification time bits. This provides sub-second precision.
|
||||
* - 0x8C
|
||||
- \_\_le32
|
||||
- i\_atime\_extra
|
||||
- __le32
|
||||
- i_atime_extra
|
||||
- Extra access time bits. This provides sub-second precision.
|
||||
* - 0x90
|
||||
- \_\_le32
|
||||
- i\_crtime
|
||||
- __le32
|
||||
- i_crtime
|
||||
- File creation time, in seconds since the epoch.
|
||||
* - 0x94
|
||||
- \_\_le32
|
||||
- i\_crtime\_extra
|
||||
- __le32
|
||||
- i_crtime_extra
|
||||
- Extra file creation time bits. This provides sub-second precision.
|
||||
* - 0x98
|
||||
- \_\_le32
|
||||
- i\_version\_hi
|
||||
- __le32
|
||||
- i_version_hi
|
||||
- Upper 32-bits for version number.
|
||||
* - 0x9C
|
||||
- \_\_le32
|
||||
- i\_projid
|
||||
- __le32
|
||||
- i_projid
|
||||
- Project ID.
|
||||
|
||||
.. _i_mode:
|
||||
@ -183,45 +183,45 @@ The ``i_mode`` value is a combination of the following flags:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- S\_IXOTH (Others may execute)
|
||||
- S_IXOTH (Others may execute)
|
||||
* - 0x2
|
||||
- S\_IWOTH (Others may write)
|
||||
- S_IWOTH (Others may write)
|
||||
* - 0x4
|
||||
- S\_IROTH (Others may read)
|
||||
- S_IROTH (Others may read)
|
||||
* - 0x8
|
||||
- S\_IXGRP (Group members may execute)
|
||||
- S_IXGRP (Group members may execute)
|
||||
* - 0x10
|
||||
- S\_IWGRP (Group members may write)
|
||||
- S_IWGRP (Group members may write)
|
||||
* - 0x20
|
||||
- S\_IRGRP (Group members may read)
|
||||
- S_IRGRP (Group members may read)
|
||||
* - 0x40
|
||||
- S\_IXUSR (Owner may execute)
|
||||
- S_IXUSR (Owner may execute)
|
||||
* - 0x80
|
||||
- S\_IWUSR (Owner may write)
|
||||
- S_IWUSR (Owner may write)
|
||||
* - 0x100
|
||||
- S\_IRUSR (Owner may read)
|
||||
- S_IRUSR (Owner may read)
|
||||
* - 0x200
|
||||
- S\_ISVTX (Sticky bit)
|
||||
- S_ISVTX (Sticky bit)
|
||||
* - 0x400
|
||||
- S\_ISGID (Set GID)
|
||||
- S_ISGID (Set GID)
|
||||
* - 0x800
|
||||
- S\_ISUID (Set UID)
|
||||
- S_ISUID (Set UID)
|
||||
* -
|
||||
- These are mutually-exclusive file types:
|
||||
* - 0x1000
|
||||
- S\_IFIFO (FIFO)
|
||||
- S_IFIFO (FIFO)
|
||||
* - 0x2000
|
||||
- S\_IFCHR (Character device)
|
||||
- S_IFCHR (Character device)
|
||||
* - 0x4000
|
||||
- S\_IFDIR (Directory)
|
||||
- S_IFDIR (Directory)
|
||||
* - 0x6000
|
||||
- S\_IFBLK (Block device)
|
||||
- S_IFBLK (Block device)
|
||||
* - 0x8000
|
||||
- S\_IFREG (Regular file)
|
||||
- S_IFREG (Regular file)
|
||||
* - 0xA000
|
||||
- S\_IFLNK (Symbolic link)
|
||||
- S_IFLNK (Symbolic link)
|
||||
* - 0xC000
|
||||
- S\_IFSOCK (Socket)
|
||||
- S_IFSOCK (Socket)
|
||||
|
||||
.. _i_flags:
|
||||
|
||||
@ -234,56 +234,56 @@ The ``i_flags`` field is a combination of these values:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- This file requires secure deletion (EXT4\_SECRM\_FL). (not implemented)
|
||||
- This file requires secure deletion (EXT4_SECRM_FL). (not implemented)
|
||||
* - 0x2
|
||||
- This file should be preserved, should undeletion be desired
|
||||
(EXT4\_UNRM\_FL). (not implemented)
|
||||
(EXT4_UNRM_FL). (not implemented)
|
||||
* - 0x4
|
||||
- File is compressed (EXT4\_COMPR\_FL). (not really implemented)
|
||||
- File is compressed (EXT4_COMPR_FL). (not really implemented)
|
||||
* - 0x8
|
||||
- All writes to the file must be synchronous (EXT4\_SYNC\_FL).
|
||||
- All writes to the file must be synchronous (EXT4_SYNC_FL).
|
||||
* - 0x10
|
||||
- File is immutable (EXT4\_IMMUTABLE\_FL).
|
||||
- File is immutable (EXT4_IMMUTABLE_FL).
|
||||
* - 0x20
|
||||
- File can only be appended (EXT4\_APPEND\_FL).
|
||||
- File can only be appended (EXT4_APPEND_FL).
|
||||
* - 0x40
|
||||
- The dump(1) utility should not dump this file (EXT4\_NODUMP\_FL).
|
||||
- The dump(1) utility should not dump this file (EXT4_NODUMP_FL).
|
||||
* - 0x80
|
||||
- Do not update access time (EXT4\_NOATIME\_FL).
|
||||
- Do not update access time (EXT4_NOATIME_FL).
|
||||
* - 0x100
|
||||
- Dirty compressed file (EXT4\_DIRTY\_FL). (not used)
|
||||
- Dirty compressed file (EXT4_DIRTY_FL). (not used)
|
||||
* - 0x200
|
||||
- File has one or more compressed clusters (EXT4\_COMPRBLK\_FL). (not used)
|
||||
- File has one or more compressed clusters (EXT4_COMPRBLK_FL). (not used)
|
||||
* - 0x400
|
||||
- Do not compress file (EXT4\_NOCOMPR\_FL). (not used)
|
||||
- Do not compress file (EXT4_NOCOMPR_FL). (not used)
|
||||
* - 0x800
|
||||
- Encrypted inode (EXT4\_ENCRYPT\_FL). This bit value previously was
|
||||
EXT4\_ECOMPR\_FL (compression error), which was never used.
|
||||
- Encrypted inode (EXT4_ENCRYPT_FL). This bit value previously was
|
||||
EXT4_ECOMPR_FL (compression error), which was never used.
|
||||
* - 0x1000
|
||||
- Directory has hashed indexes (EXT4\_INDEX\_FL).
|
||||
- Directory has hashed indexes (EXT4_INDEX_FL).
|
||||
* - 0x2000
|
||||
- AFS magic directory (EXT4\_IMAGIC\_FL).
|
||||
- AFS magic directory (EXT4_IMAGIC_FL).
|
||||
* - 0x4000
|
||||
- File data must always be written through the journal
|
||||
(EXT4\_JOURNAL\_DATA\_FL).
|
||||
(EXT4_JOURNAL_DATA_FL).
|
||||
* - 0x8000
|
||||
- File tail should not be merged (EXT4\_NOTAIL\_FL). (not used by ext4)
|
||||
- File tail should not be merged (EXT4_NOTAIL_FL). (not used by ext4)
|
||||
* - 0x10000
|
||||
- All directory entry data should be written synchronously (see
|
||||
``dirsync``) (EXT4\_DIRSYNC\_FL).
|
||||
``dirsync``) (EXT4_DIRSYNC_FL).
|
||||
* - 0x20000
|
||||
- Top of directory hierarchy (EXT4\_TOPDIR\_FL).
|
||||
- Top of directory hierarchy (EXT4_TOPDIR_FL).
|
||||
* - 0x40000
|
||||
- This is a huge file (EXT4\_HUGE\_FILE\_FL).
|
||||
- This is a huge file (EXT4_HUGE_FILE_FL).
|
||||
* - 0x80000
|
||||
- Inode uses extents (EXT4\_EXTENTS\_FL).
|
||||
- Inode uses extents (EXT4_EXTENTS_FL).
|
||||
* - 0x100000
|
||||
- Verity protected file (EXT4\_VERITY\_FL).
|
||||
- Verity protected file (EXT4_VERITY_FL).
|
||||
* - 0x200000
|
||||
- Inode stores a large extended attribute value in its data blocks
|
||||
(EXT4\_EA\_INODE\_FL).
|
||||
(EXT4_EA_INODE_FL).
|
||||
* - 0x400000
|
||||
- This file has blocks allocated past EOF (EXT4\_EOFBLOCKS\_FL).
|
||||
- This file has blocks allocated past EOF (EXT4_EOFBLOCKS_FL).
|
||||
(deprecated)
|
||||
* - 0x01000000
|
||||
- Inode is a snapshot (``EXT4_SNAPFILE_FL``). (not in mainline)
|
||||
@ -294,21 +294,21 @@ The ``i_flags`` field is a combination of these values:
|
||||
- Snapshot shrink has completed (``EXT4_SNAPFILE_SHRUNK_FL``). (not in
|
||||
mainline)
|
||||
* - 0x10000000
|
||||
- Inode has inline data (EXT4\_INLINE\_DATA\_FL).
|
||||
- Inode has inline data (EXT4_INLINE_DATA_FL).
|
||||
* - 0x20000000
|
||||
- Create children with the same project ID (EXT4\_PROJINHERIT\_FL).
|
||||
- Create children with the same project ID (EXT4_PROJINHERIT_FL).
|
||||
* - 0x80000000
|
||||
- Reserved for ext4 library (EXT4\_RESERVED\_FL).
|
||||
- Reserved for ext4 library (EXT4_RESERVED_FL).
|
||||
* -
|
||||
- Aggregate flags:
|
||||
* - 0x705BDFFF
|
||||
- User-visible flags.
|
||||
* - 0x604BC0FF
|
||||
- User-modifiable flags. Note that while EXT4\_JOURNAL\_DATA\_FL and
|
||||
EXT4\_EXTENTS\_FL can be set with setattr, they are not in the kernel's
|
||||
EXT4\_FL\_USER\_MODIFIABLE mask, since it needs to handle the setting of
|
||||
- User-modifiable flags. Note that while EXT4_JOURNAL_DATA_FL and
|
||||
EXT4_EXTENTS_FL can be set with setattr, they are not in the kernel's
|
||||
EXT4_FL_USER_MODIFIABLE mask, since it needs to handle the setting of
|
||||
these flags in a special manner and they are masked out of the set of
|
||||
flags that are saved directly to i\_flags.
|
||||
flags that are saved directly to i_flags.
|
||||
|
||||
.. _i_osd1:
|
||||
|
||||
@ -325,9 +325,9 @@ Linux:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- l\_i\_version
|
||||
- Inode version. However, if the EA\_INODE inode flag is set, this inode
|
||||
- __le32
|
||||
- l_i_version
|
||||
- Inode version. However, if the EA_INODE inode flag is set, this inode
|
||||
stores an extended attribute value and this field contains the upper 32
|
||||
bits of the attribute value's reference count.
|
||||
|
||||
@ -342,8 +342,8 @@ Hurd:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- h\_i\_translator
|
||||
- __le32
|
||||
- h_i_translator
|
||||
- ??
|
||||
|
||||
Masix:
|
||||
@ -357,8 +357,8 @@ Masix:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- m\_i\_reserved
|
||||
- __le32
|
||||
- m_i_reserved
|
||||
- ??
|
||||
|
||||
.. _i_osd2:
|
||||
@ -376,30 +376,30 @@ Linux:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le16
|
||||
- l\_i\_blocks\_high
|
||||
- __le16
|
||||
- l_i_blocks_high
|
||||
- Upper 16-bits of the block count. Please see the note attached to
|
||||
i\_blocks\_lo.
|
||||
i_blocks_lo.
|
||||
* - 0x2
|
||||
- \_\_le16
|
||||
- l\_i\_file\_acl\_high
|
||||
- __le16
|
||||
- l_i_file_acl_high
|
||||
- Upper 16-bits of the extended attribute block (historically, the file
|
||||
ACL location). See the Extended Attributes section below.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- l\_i\_uid\_high
|
||||
- __le16
|
||||
- l_i_uid_high
|
||||
- Upper 16-bits of the Owner UID.
|
||||
* - 0x6
|
||||
- \_\_le16
|
||||
- l\_i\_gid\_high
|
||||
- __le16
|
||||
- l_i_gid_high
|
||||
- Upper 16-bits of the GID.
|
||||
* - 0x8
|
||||
- \_\_le16
|
||||
- l\_i\_checksum\_lo
|
||||
- __le16
|
||||
- l_i_checksum_lo
|
||||
- Lower 16-bits of the inode checksum.
|
||||
* - 0xA
|
||||
- \_\_le16
|
||||
- l\_i\_reserved
|
||||
- __le16
|
||||
- l_i_reserved
|
||||
- Unused.
|
||||
|
||||
Hurd:
|
||||
@ -413,24 +413,24 @@ Hurd:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le16
|
||||
- h\_i\_reserved1
|
||||
- __le16
|
||||
- h_i_reserved1
|
||||
- ??
|
||||
* - 0x2
|
||||
- \_\_u16
|
||||
- h\_i\_mode\_high
|
||||
- __u16
|
||||
- h_i_mode_high
|
||||
- Upper 16-bits of the file mode.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- h\_i\_uid\_high
|
||||
- __le16
|
||||
- h_i_uid_high
|
||||
- Upper 16-bits of the Owner UID.
|
||||
* - 0x6
|
||||
- \_\_le16
|
||||
- h\_i\_gid\_high
|
||||
- __le16
|
||||
- h_i_gid_high
|
||||
- Upper 16-bits of the GID.
|
||||
* - 0x8
|
||||
- \_\_u32
|
||||
- h\_i\_author
|
||||
- __u32
|
||||
- h_i_author
|
||||
- Author code?
|
||||
|
||||
Masix:
|
||||
@ -444,17 +444,17 @@ Masix:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le16
|
||||
- h\_i\_reserved1
|
||||
- __le16
|
||||
- h_i_reserved1
|
||||
- ??
|
||||
* - 0x2
|
||||
- \_\_u16
|
||||
- m\_i\_file\_acl\_high
|
||||
- __u16
|
||||
- m_i_file_acl_high
|
||||
- Upper 16-bits of the extended attribute block (historically, the file
|
||||
ACL location).
|
||||
* - 0x4
|
||||
- \_\_u32
|
||||
- m\_i\_reserved2[2]
|
||||
- __u32
|
||||
- m_i_reserved2[2]
|
||||
- ??
|
||||
|
||||
Inode Size
|
||||
@ -466,11 +466,11 @@ In ext2 and ext3, the inode structure size was fixed at 128 bytes
|
||||
on-disk inode at format time for all inodes in the filesystem to provide
|
||||
space beyond the end of the original ext2 inode. The on-disk inode
|
||||
record size is recorded in the superblock as ``s_inode_size``. The
|
||||
number of bytes actually used by struct ext4\_inode beyond the original
|
||||
number of bytes actually used by struct ext4_inode beyond the original
|
||||
128-byte ext2 inode is recorded in the ``i_extra_isize`` field for each
|
||||
inode, which allows struct ext4\_inode to grow for a new kernel without
|
||||
inode, which allows struct ext4_inode to grow for a new kernel without
|
||||
having to upgrade all of the on-disk inodes. Access to fields beyond
|
||||
EXT2\_GOOD\_OLD\_INODE\_SIZE should be verified to be within
|
||||
EXT2_GOOD_OLD_INODE_SIZE should be verified to be within
|
||||
``i_extra_isize``. By default, ext4 inode records are 256 bytes, and (as
|
||||
of August 2019) the inode structure is 160 bytes
|
||||
(``i_extra_isize = 32``). The extra space between the end of the inode
|
||||
@ -516,7 +516,7 @@ creation time (crtime); this field is 64-bits wide and decoded in the
|
||||
same manner as 64-bit [cma]time. Neither crtime nor dtime are accessible
|
||||
through the regular stat() interface, though debugfs will report them.
|
||||
|
||||
We use the 32-bit signed time value plus (2^32 \* (extra epoch bits)).
|
||||
We use the 32-bit signed time value plus (2^32 * (extra epoch bits)).
|
||||
In other words:
|
||||
|
||||
.. list-table::
|
||||
@ -525,8 +525,8 @@ In other words:
|
||||
|
||||
* - Extra epoch bits
|
||||
- MSB of 32-bit time
|
||||
- Adjustment for signed 32-bit to 64-bit tv\_sec
|
||||
- Decoded 64-bit tv\_sec
|
||||
- Adjustment for signed 32-bit to 64-bit tv_sec
|
||||
- Decoded 64-bit tv_sec
|
||||
- valid time range
|
||||
* - 0 0
|
||||
- 1
|
||||
|
@ -63,8 +63,8 @@ Generally speaking, the journal has this format:
|
||||
:header-rows: 1
|
||||
|
||||
* - Superblock
|
||||
- descriptor\_block (data\_blocks or revocation\_block) [more data or
|
||||
revocations] commmit\_block
|
||||
- descriptor_block (data_blocks or revocation_block) [more data or
|
||||
revocations] commmit_block
|
||||
- [more transactions...]
|
||||
* -
|
||||
- One transaction
|
||||
@ -93,8 +93,8 @@ superblock.
|
||||
* - 1024 bytes of padding
|
||||
- ext4 Superblock
|
||||
- Journal Superblock
|
||||
- descriptor\_block (data\_blocks or revocation\_block) [more data or
|
||||
revocations] commmit\_block
|
||||
- descriptor_block (data_blocks or revocation_block) [more data or
|
||||
revocations] commmit_block
|
||||
- [more transactions...]
|
||||
* -
|
||||
-
|
||||
@ -117,17 +117,17 @@ Every block in the journal starts with a common 12-byte header
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_be32
|
||||
- h\_magic
|
||||
- __be32
|
||||
- h_magic
|
||||
- jbd2 magic number, 0xC03B3998.
|
||||
* - 0x4
|
||||
- \_\_be32
|
||||
- h\_blocktype
|
||||
- __be32
|
||||
- h_blocktype
|
||||
- Description of what this block contains. See the jbd2_blocktype_ table
|
||||
below.
|
||||
* - 0x8
|
||||
- \_\_be32
|
||||
- h\_sequence
|
||||
- __be32
|
||||
- h_sequence
|
||||
- The transaction ID that goes with this block.
|
||||
|
||||
.. _jbd2_blocktype:
|
||||
@ -177,99 +177,99 @@ which is 1024 bytes long:
|
||||
-
|
||||
- Static information describing the journal.
|
||||
* - 0x0
|
||||
- journal\_header\_t (12 bytes)
|
||||
- s\_header
|
||||
- journal_header_t (12 bytes)
|
||||
- s_header
|
||||
- Common header identifying this as a superblock.
|
||||
* - 0xC
|
||||
- \_\_be32
|
||||
- s\_blocksize
|
||||
- __be32
|
||||
- s_blocksize
|
||||
- Journal device block size.
|
||||
* - 0x10
|
||||
- \_\_be32
|
||||
- s\_maxlen
|
||||
- __be32
|
||||
- s_maxlen
|
||||
- Total number of blocks in this journal.
|
||||
* - 0x14
|
||||
- \_\_be32
|
||||
- s\_first
|
||||
- __be32
|
||||
- s_first
|
||||
- First block of log information.
|
||||
* -
|
||||
-
|
||||
-
|
||||
- Dynamic information describing the current state of the log.
|
||||
* - 0x18
|
||||
- \_\_be32
|
||||
- s\_sequence
|
||||
- __be32
|
||||
- s_sequence
|
||||
- First commit ID expected in log.
|
||||
* - 0x1C
|
||||
- \_\_be32
|
||||
- s\_start
|
||||
- __be32
|
||||
- s_start
|
||||
- Block number of the start of log. Contrary to the comments, this field
|
||||
being zero does not imply that the journal is clean!
|
||||
* - 0x20
|
||||
- \_\_be32
|
||||
- s\_errno
|
||||
- Error value, as set by jbd2\_journal\_abort().
|
||||
- __be32
|
||||
- s_errno
|
||||
- Error value, as set by jbd2_journal_abort().
|
||||
* -
|
||||
-
|
||||
-
|
||||
- The remaining fields are only valid in a v2 superblock.
|
||||
* - 0x24
|
||||
- \_\_be32
|
||||
- s\_feature\_compat;
|
||||
- __be32
|
||||
- s_feature_compat;
|
||||
- Compatible feature set. See the table jbd2_compat_ below.
|
||||
* - 0x28
|
||||
- \_\_be32
|
||||
- s\_feature\_incompat
|
||||
- __be32
|
||||
- s_feature_incompat
|
||||
- Incompatible feature set. See the table jbd2_incompat_ below.
|
||||
* - 0x2C
|
||||
- \_\_be32
|
||||
- s\_feature\_ro\_compat
|
||||
- __be32
|
||||
- s_feature_ro_compat
|
||||
- Read-only compatible feature set. There aren't any of these currently.
|
||||
* - 0x30
|
||||
- \_\_u8
|
||||
- s\_uuid[16]
|
||||
- __u8
|
||||
- s_uuid[16]
|
||||
- 128-bit uuid for journal. This is compared against the copy in the ext4
|
||||
super block at mount time.
|
||||
* - 0x40
|
||||
- \_\_be32
|
||||
- s\_nr\_users
|
||||
- __be32
|
||||
- s_nr_users
|
||||
- Number of file systems sharing this journal.
|
||||
* - 0x44
|
||||
- \_\_be32
|
||||
- s\_dynsuper
|
||||
- __be32
|
||||
- s_dynsuper
|
||||
- Location of dynamic super block copy. (Not used?)
|
||||
* - 0x48
|
||||
- \_\_be32
|
||||
- s\_max\_transaction
|
||||
- __be32
|
||||
- s_max_transaction
|
||||
- Limit of journal blocks per transaction. (Not used?)
|
||||
* - 0x4C
|
||||
- \_\_be32
|
||||
- s\_max\_trans\_data
|
||||
- __be32
|
||||
- s_max_trans_data
|
||||
- Limit of data blocks per transaction. (Not used?)
|
||||
* - 0x50
|
||||
- \_\_u8
|
||||
- s\_checksum\_type
|
||||
- __u8
|
||||
- s_checksum_type
|
||||
- Checksum algorithm used for the journal. See jbd2_checksum_type_ for
|
||||
more info.
|
||||
* - 0x51
|
||||
- \_\_u8[3]
|
||||
- s\_padding2
|
||||
- __u8[3]
|
||||
- s_padding2
|
||||
-
|
||||
* - 0x54
|
||||
- \_\_be32
|
||||
- s\_num\_fc\_blocks
|
||||
- __be32
|
||||
- s_num_fc_blocks
|
||||
- Number of fast commit blocks in the journal.
|
||||
* - 0x58
|
||||
- \_\_u32
|
||||
- s\_padding[42]
|
||||
- __u32
|
||||
- s_padding[42]
|
||||
-
|
||||
* - 0xFC
|
||||
- \_\_be32
|
||||
- s\_checksum
|
||||
- __be32
|
||||
- s_checksum
|
||||
- Checksum of the entire superblock, with this field set to zero.
|
||||
* - 0x100
|
||||
- \_\_u8
|
||||
- s\_users[16\*48]
|
||||
- __u8
|
||||
- s_users[16*48]
|
||||
- ids of all file systems sharing the log. e2fsprogs/Linux don't allow
|
||||
shared external journals, but I imagine Lustre (or ocfs2?), which use
|
||||
the jbd2 code, might.
|
||||
@ -286,7 +286,7 @@ The journal compat features are any combination of the following:
|
||||
- Description
|
||||
* - 0x1
|
||||
- Journal maintains checksums on the data blocks.
|
||||
(JBD2\_FEATURE\_COMPAT\_CHECKSUM)
|
||||
(JBD2_FEATURE_COMPAT_CHECKSUM)
|
||||
|
||||
.. _jbd2_incompat:
|
||||
|
||||
@ -299,23 +299,23 @@ The journal incompat features are any combination of the following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- Journal has block revocation records. (JBD2\_FEATURE\_INCOMPAT\_REVOKE)
|
||||
- Journal has block revocation records. (JBD2_FEATURE_INCOMPAT_REVOKE)
|
||||
* - 0x2
|
||||
- Journal can deal with 64-bit block numbers.
|
||||
(JBD2\_FEATURE\_INCOMPAT\_64BIT)
|
||||
(JBD2_FEATURE_INCOMPAT_64BIT)
|
||||
* - 0x4
|
||||
- Journal commits asynchronously. (JBD2\_FEATURE\_INCOMPAT\_ASYNC\_COMMIT)
|
||||
- Journal commits asynchronously. (JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
|
||||
* - 0x8
|
||||
- This journal uses v2 of the checksum on-disk format. Each journal
|
||||
metadata block gets its own checksum, and the block tags in the
|
||||
descriptor table contain checksums for each of the data blocks in the
|
||||
journal. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2)
|
||||
journal. (JBD2_FEATURE_INCOMPAT_CSUM_V2)
|
||||
* - 0x10
|
||||
- This journal uses v3 of the checksum on-disk format. This is the same as
|
||||
v2, but the journal block tag size is fixed regardless of the size of
|
||||
block numbers. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3)
|
||||
block numbers. (JBD2_FEATURE_INCOMPAT_CSUM_V3)
|
||||
* - 0x20
|
||||
- Journal has fast commit blocks. (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT)
|
||||
- Journal has fast commit blocks. (JBD2_FEATURE_INCOMPAT_FAST_COMMIT)
|
||||
|
||||
.. _jbd2_checksum_type:
|
||||
|
||||
@ -355,11 +355,11 @@ Descriptor blocks consume at least 36 bytes, but use a full block:
|
||||
- Name
|
||||
- Descriptor
|
||||
* - 0x0
|
||||
- journal\_header\_t
|
||||
- journal_header_t
|
||||
- (open coded)
|
||||
- Common block header.
|
||||
* - 0xC
|
||||
- struct journal\_block\_tag\_s
|
||||
- struct journal_block_tag_s
|
||||
- open coded array[]
|
||||
- Enough tags either to fill up the block or to describe all the data
|
||||
blocks that follow this descriptor block.
|
||||
@ -367,7 +367,7 @@ Descriptor blocks consume at least 36 bytes, but use a full block:
|
||||
Journal block tags have any of the following formats, depending on which
|
||||
journal feature and block tag flags are set.
|
||||
|
||||
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is set, the journal block tag is
|
||||
If JBD2_FEATURE_INCOMPAT_CSUM_V3 is set, the journal block tag is
|
||||
defined as ``struct journal_block_tag3_s``, which looks like the
|
||||
following. The size is 16 or 32 bytes.
|
||||
|
||||
@ -380,24 +380,24 @@ following. The size is 16 or 32 bytes.
|
||||
- Name
|
||||
- Descriptor
|
||||
* - 0x0
|
||||
- \_\_be32
|
||||
- t\_blocknr
|
||||
- __be32
|
||||
- t_blocknr
|
||||
- Lower 32-bits of the location of where the corresponding data block
|
||||
should end up on disk.
|
||||
* - 0x4
|
||||
- \_\_be32
|
||||
- t\_flags
|
||||
- __be32
|
||||
- t_flags
|
||||
- Flags that go with the descriptor. See the table jbd2_tag_flags_ for
|
||||
more info.
|
||||
* - 0x8
|
||||
- \_\_be32
|
||||
- t\_blocknr\_high
|
||||
- __be32
|
||||
- t_blocknr_high
|
||||
- Upper 32-bits of the location of where the corresponding data block
|
||||
should end up on disk. This is zero if JBD2\_FEATURE\_INCOMPAT\_64BIT is
|
||||
should end up on disk. This is zero if JBD2_FEATURE_INCOMPAT_64BIT is
|
||||
not enabled.
|
||||
* - 0xC
|
||||
- \_\_be32
|
||||
- t\_checksum
|
||||
- __be32
|
||||
- t_checksum
|
||||
- Checksum of the journal UUID, the sequence number, and the data block.
|
||||
* -
|
||||
-
|
||||
@ -433,7 +433,7 @@ The journal tag flags are any combination of the following:
|
||||
* - 0x8
|
||||
- This is the last tag in this descriptor block.
|
||||
|
||||
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is NOT set, the journal block tag
|
||||
If JBD2_FEATURE_INCOMPAT_CSUM_V3 is NOT set, the journal block tag
|
||||
is defined as ``struct journal_block_tag_s``, which looks like the
|
||||
following. The size is 8, 12, 24, or 28 bytes:
|
||||
|
||||
@ -446,18 +446,18 @@ following. The size is 8, 12, 24, or 28 bytes:
|
||||
- Name
|
||||
- Descriptor
|
||||
* - 0x0
|
||||
- \_\_be32
|
||||
- t\_blocknr
|
||||
- __be32
|
||||
- t_blocknr
|
||||
- Lower 32-bits of the location of where the corresponding data block
|
||||
should end up on disk.
|
||||
* - 0x4
|
||||
- \_\_be16
|
||||
- t\_checksum
|
||||
- __be16
|
||||
- t_checksum
|
||||
- Checksum of the journal UUID, the sequence number, and the data block.
|
||||
Note that only the lower 16 bits are stored.
|
||||
* - 0x6
|
||||
- \_\_be16
|
||||
- t\_flags
|
||||
- __be16
|
||||
- t_flags
|
||||
- Flags that go with the descriptor. See the table jbd2_tag_flags_ for
|
||||
more info.
|
||||
* -
|
||||
@ -466,8 +466,8 @@ following. The size is 8, 12, 24, or 28 bytes:
|
||||
- This next field is only present if the super block indicates support for
|
||||
64-bit block numbers.
|
||||
* - 0x8
|
||||
- \_\_be32
|
||||
- t\_blocknr\_high
|
||||
- __be32
|
||||
- t_blocknr_high
|
||||
- Upper 32-bits of the location of where the corresponding data block
|
||||
should end up on disk.
|
||||
* -
|
||||
@ -483,8 +483,8 @@ following. The size is 8, 12, 24, or 28 bytes:
|
||||
``j_uuid`` field in ``struct journal_s``, but only tune2fs touches that
|
||||
field.
|
||||
|
||||
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or
|
||||
JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a
|
||||
If JBD2_FEATURE_INCOMPAT_CSUM_V2 or
|
||||
JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the block is a
|
||||
``struct jbd2_journal_block_tail``, which looks like this:
|
||||
|
||||
.. list-table::
|
||||
@ -496,8 +496,8 @@ JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a
|
||||
- Name
|
||||
- Descriptor
|
||||
* - 0x0
|
||||
- \_\_be32
|
||||
- t\_checksum
|
||||
- __be32
|
||||
- t_checksum
|
||||
- Checksum of the journal UUID + the descriptor block, with this field set
|
||||
to zero.
|
||||
|
||||
@ -538,25 +538,25 @@ length, but use a full block:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- journal\_header\_t
|
||||
- r\_header
|
||||
- journal_header_t
|
||||
- r_header
|
||||
- Common block header.
|
||||
* - 0xC
|
||||
- \_\_be32
|
||||
- r\_count
|
||||
- __be32
|
||||
- r_count
|
||||
- Number of bytes used in this block.
|
||||
* - 0x10
|
||||
- \_\_be32 or \_\_be64
|
||||
- __be32 or __be64
|
||||
- blocks[0]
|
||||
- Blocks to revoke.
|
||||
|
||||
After r\_count is a linear array of block numbers that are effectively
|
||||
After r_count is a linear array of block numbers that are effectively
|
||||
revoked by this transaction. The size of each block number is 8 bytes if
|
||||
the superblock advertises 64-bit block number support, or 4 bytes
|
||||
otherwise.
|
||||
|
||||
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or
|
||||
JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the revocation
|
||||
If JBD2_FEATURE_INCOMPAT_CSUM_V2 or
|
||||
JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the revocation
|
||||
block is a ``struct jbd2_journal_revoke_tail``, which has this format:
|
||||
|
||||
.. list-table::
|
||||
@ -568,8 +568,8 @@ block is a ``struct jbd2_journal_revoke_tail``, which has this format:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_be32
|
||||
- r\_checksum
|
||||
- __be32
|
||||
- r_checksum
|
||||
- Checksum of the journal UUID + revocation block
|
||||
|
||||
Commit Block
|
||||
@ -592,38 +592,38 @@ bytes long (but uses a full block):
|
||||
- Name
|
||||
- Descriptor
|
||||
* - 0x0
|
||||
- journal\_header\_s
|
||||
- journal_header_s
|
||||
- (open coded)
|
||||
- Common block header.
|
||||
* - 0xC
|
||||
- unsigned char
|
||||
- h\_chksum\_type
|
||||
- h_chksum_type
|
||||
- The type of checksum to use to verify the integrity of the data blocks
|
||||
in the transaction. See jbd2_checksum_type_ for more info.
|
||||
* - 0xD
|
||||
- unsigned char
|
||||
- h\_chksum\_size
|
||||
- h_chksum_size
|
||||
- The number of bytes used by the checksum. Most likely 4.
|
||||
* - 0xE
|
||||
- unsigned char
|
||||
- h\_padding[2]
|
||||
- h_padding[2]
|
||||
-
|
||||
* - 0x10
|
||||
- \_\_be32
|
||||
- h\_chksum[JBD2\_CHECKSUM\_BYTES]
|
||||
- __be32
|
||||
- h_chksum[JBD2_CHECKSUM_BYTES]
|
||||
- 32 bytes of space to store checksums. If
|
||||
JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3
|
||||
JBD2_FEATURE_INCOMPAT_CSUM_V2 or JBD2_FEATURE_INCOMPAT_CSUM_V3
|
||||
are set, the first ``__be32`` is the checksum of the journal UUID and
|
||||
the entire commit block, with this field zeroed. If
|
||||
JBD2\_FEATURE\_COMPAT\_CHECKSUM is set, the first ``__be32`` is the
|
||||
JBD2_FEATURE_COMPAT_CHECKSUM is set, the first ``__be32`` is the
|
||||
crc32 of all the blocks already written to the transaction.
|
||||
* - 0x30
|
||||
- \_\_be64
|
||||
- h\_commit\_sec
|
||||
- __be64
|
||||
- h_commit_sec
|
||||
- The time that the transaction was committed, in seconds since the epoch.
|
||||
* - 0x38
|
||||
- \_\_be32
|
||||
- h\_commit\_nsec
|
||||
- __be32
|
||||
- h_commit_nsec
|
||||
- Nanoseconds component of the above timestamp.
|
||||
|
||||
Fast commits
|
||||
|
@ -7,8 +7,8 @@ Multiple mount protection (MMP) is a feature that protects the
|
||||
filesystem against multiple hosts trying to use the filesystem
|
||||
simultaneously. When a filesystem is opened (for mounting, or fsck,
|
||||
etc.), the MMP code running on the node (call it node A) checks a
|
||||
sequence number. If the sequence number is EXT4\_MMP\_SEQ\_CLEAN, the
|
||||
open continues. If the sequence number is EXT4\_MMP\_SEQ\_FSCK, then
|
||||
sequence number. If the sequence number is EXT4_MMP_SEQ_CLEAN, the
|
||||
open continues. If the sequence number is EXT4_MMP_SEQ_FSCK, then
|
||||
fsck is (hopefully) running, and open fails immediately. Otherwise, the
|
||||
open code will wait for twice the specified MMP check interval and check
|
||||
the sequence number again. If the sequence number has changed, then the
|
||||
@ -40,38 +40,38 @@ The MMP structure (``struct mmp_struct``) is as follows:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- mmp\_magic
|
||||
- __le32
|
||||
- mmp_magic
|
||||
- Magic number for MMP, 0x004D4D50 (“MMP”).
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- mmp\_seq
|
||||
- __le32
|
||||
- mmp_seq
|
||||
- Sequence number, updated periodically.
|
||||
* - 0x8
|
||||
- \_\_le64
|
||||
- mmp\_time
|
||||
- __le64
|
||||
- mmp_time
|
||||
- Time that the MMP block was last updated.
|
||||
* - 0x10
|
||||
- char[64]
|
||||
- mmp\_nodename
|
||||
- mmp_nodename
|
||||
- Hostname of the node that opened the filesystem.
|
||||
* - 0x50
|
||||
- char[32]
|
||||
- mmp\_bdevname
|
||||
- mmp_bdevname
|
||||
- Block device name of the filesystem.
|
||||
* - 0x70
|
||||
- \_\_le16
|
||||
- mmp\_check\_interval
|
||||
- __le16
|
||||
- mmp_check_interval
|
||||
- The MMP re-check interval, in seconds.
|
||||
* - 0x72
|
||||
- \_\_le16
|
||||
- mmp\_pad1
|
||||
- __le16
|
||||
- mmp_pad1
|
||||
- Zero.
|
||||
* - 0x74
|
||||
- \_\_le32[226]
|
||||
- mmp\_pad2
|
||||
- __le32[226]
|
||||
- mmp_pad2
|
||||
- Zero.
|
||||
* - 0x3FC
|
||||
- \_\_le32
|
||||
- mmp\_checksum
|
||||
- __le32
|
||||
- mmp_checksum
|
||||
- Checksum of the MMP block.
|
||||
|
@ -7,7 +7,7 @@ An ext4 file system is split into a series of block groups. To reduce
|
||||
performance difficulties due to fragmentation, the block allocator tries
|
||||
very hard to keep each file's blocks within the same group, thereby
|
||||
reducing seek times. The size of a block group is specified in
|
||||
``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 \*
|
||||
``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 *
|
||||
``block_size_in_bytes``. With the default block size of 4KiB, each group
|
||||
will contain 32,768 blocks, for a length of 128MiB. The number of block
|
||||
groups is the size of the device divided by the size of a block group.
|
||||
|
@ -34,7 +34,7 @@ ext4 reserves some inode for special features, as follows:
|
||||
* - 10
|
||||
- Replica inode, used for some non-upstream feature?
|
||||
* - 11
|
||||
- Traditional first non-reserved inode. Usually this is the lost+found directory. See s\_first\_ino in the superblock.
|
||||
- Traditional first non-reserved inode. Usually this is the lost+found directory. See s_first_ino in the superblock.
|
||||
|
||||
Note that there are also some inodes allocated from non-reserved inode numbers
|
||||
for other filesystem features which are not referenced from standard directory
|
||||
@ -47,9 +47,9 @@ hierarchy. These are generally reference from the superblock. They are:
|
||||
* - Superblock field
|
||||
- Description
|
||||
|
||||
* - s\_lpf\_ino
|
||||
* - s_lpf_ino
|
||||
- Inode number of lost+found directory.
|
||||
* - s\_prj\_quota\_inum
|
||||
* - s_prj_quota_inum
|
||||
- Inode number of quota file tracking project quotas
|
||||
* - s\_orphan\_file\_inum
|
||||
* - s_orphan_file_inum
|
||||
- Inode number of file tracking orphan inodes.
|
||||
|
@ -7,7 +7,7 @@ The superblock records various information about the enclosing
|
||||
filesystem, such as block counts, inode counts, supported features,
|
||||
maintenance information, and more.
|
||||
|
||||
If the sparse\_super feature flag is set, redundant copies of the
|
||||
If the sparse_super feature flag is set, redundant copies of the
|
||||
superblock and group descriptors are kept only in the groups whose group
|
||||
number is either 0 or a power of 3, 5, or 7. If the flag is not set,
|
||||
redundant copies are kept in all groups.
|
||||
@ -27,107 +27,107 @@ The ext4 superblock is laid out as follows in
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- s\_inodes\_count
|
||||
- __le32
|
||||
- s_inodes_count
|
||||
- Total inode count.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- s\_blocks\_count\_lo
|
||||
- __le32
|
||||
- s_blocks_count_lo
|
||||
- Total block count.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- s\_r\_blocks\_count\_lo
|
||||
- __le32
|
||||
- s_r_blocks_count_lo
|
||||
- This number of blocks can only be allocated by the super-user.
|
||||
* - 0xC
|
||||
- \_\_le32
|
||||
- s\_free\_blocks\_count\_lo
|
||||
- __le32
|
||||
- s_free_blocks_count_lo
|
||||
- Free block count.
|
||||
* - 0x10
|
||||
- \_\_le32
|
||||
- s\_free\_inodes\_count
|
||||
- __le32
|
||||
- s_free_inodes_count
|
||||
- Free inode count.
|
||||
* - 0x14
|
||||
- \_\_le32
|
||||
- s\_first\_data\_block
|
||||
- __le32
|
||||
- s_first_data_block
|
||||
- First data block. This must be at least 1 for 1k-block filesystems and
|
||||
is typically 0 for all other block sizes.
|
||||
* - 0x18
|
||||
- \_\_le32
|
||||
- s\_log\_block\_size
|
||||
- Block size is 2 ^ (10 + s\_log\_block\_size).
|
||||
- __le32
|
||||
- s_log_block_size
|
||||
- Block size is 2 ^ (10 + s_log_block_size).
|
||||
* - 0x1C
|
||||
- \_\_le32
|
||||
- s\_log\_cluster\_size
|
||||
- Cluster size is 2 ^ (10 + s\_log\_cluster\_size) blocks if bigalloc is
|
||||
enabled. Otherwise s\_log\_cluster\_size must equal s\_log\_block\_size.
|
||||
- __le32
|
||||
- s_log_cluster_size
|
||||
- Cluster size is 2 ^ (10 + s_log_cluster_size) blocks if bigalloc is
|
||||
enabled. Otherwise s_log_cluster_size must equal s_log_block_size.
|
||||
* - 0x20
|
||||
- \_\_le32
|
||||
- s\_blocks\_per\_group
|
||||
- __le32
|
||||
- s_blocks_per_group
|
||||
- Blocks per group.
|
||||
* - 0x24
|
||||
- \_\_le32
|
||||
- s\_clusters\_per\_group
|
||||
- __le32
|
||||
- s_clusters_per_group
|
||||
- Clusters per group, if bigalloc is enabled. Otherwise
|
||||
s\_clusters\_per\_group must equal s\_blocks\_per\_group.
|
||||
s_clusters_per_group must equal s_blocks_per_group.
|
||||
* - 0x28
|
||||
- \_\_le32
|
||||
- s\_inodes\_per\_group
|
||||
- __le32
|
||||
- s_inodes_per_group
|
||||
- Inodes per group.
|
||||
* - 0x2C
|
||||
- \_\_le32
|
||||
- s\_mtime
|
||||
- __le32
|
||||
- s_mtime
|
||||
- Mount time, in seconds since the epoch.
|
||||
* - 0x30
|
||||
- \_\_le32
|
||||
- s\_wtime
|
||||
- __le32
|
||||
- s_wtime
|
||||
- Write time, in seconds since the epoch.
|
||||
* - 0x34
|
||||
- \_\_le16
|
||||
- s\_mnt\_count
|
||||
- __le16
|
||||
- s_mnt_count
|
||||
- Number of mounts since the last fsck.
|
||||
* - 0x36
|
||||
- \_\_le16
|
||||
- s\_max\_mnt\_count
|
||||
- __le16
|
||||
- s_max_mnt_count
|
||||
- Number of mounts beyond which a fsck is needed.
|
||||
* - 0x38
|
||||
- \_\_le16
|
||||
- s\_magic
|
||||
- __le16
|
||||
- s_magic
|
||||
- Magic signature, 0xEF53
|
||||
* - 0x3A
|
||||
- \_\_le16
|
||||
- s\_state
|
||||
- __le16
|
||||
- s_state
|
||||
- File system state. See super_state_ for more info.
|
||||
* - 0x3C
|
||||
- \_\_le16
|
||||
- s\_errors
|
||||
- __le16
|
||||
- s_errors
|
||||
- Behaviour when detecting errors. See super_errors_ for more info.
|
||||
* - 0x3E
|
||||
- \_\_le16
|
||||
- s\_minor\_rev\_level
|
||||
- __le16
|
||||
- s_minor_rev_level
|
||||
- Minor revision level.
|
||||
* - 0x40
|
||||
- \_\_le32
|
||||
- s\_lastcheck
|
||||
- __le32
|
||||
- s_lastcheck
|
||||
- Time of last check, in seconds since the epoch.
|
||||
* - 0x44
|
||||
- \_\_le32
|
||||
- s\_checkinterval
|
||||
- __le32
|
||||
- s_checkinterval
|
||||
- Maximum time between checks, in seconds.
|
||||
* - 0x48
|
||||
- \_\_le32
|
||||
- s\_creator\_os
|
||||
- __le32
|
||||
- s_creator_os
|
||||
- Creator OS. See the table super_creator_ for more info.
|
||||
* - 0x4C
|
||||
- \_\_le32
|
||||
- s\_rev\_level
|
||||
- __le32
|
||||
- s_rev_level
|
||||
- Revision level. See the table super_revision_ for more info.
|
||||
* - 0x50
|
||||
- \_\_le16
|
||||
- s\_def\_resuid
|
||||
- __le16
|
||||
- s_def_resuid
|
||||
- Default uid for reserved blocks.
|
||||
* - 0x52
|
||||
- \_\_le16
|
||||
- s\_def\_resgid
|
||||
- __le16
|
||||
- s_def_resgid
|
||||
- Default gid for reserved blocks.
|
||||
* -
|
||||
-
|
||||
@ -143,50 +143,50 @@ The ext4 superblock is laid out as follows in
|
||||
about a feature in either the compatible or incompatible feature set, it
|
||||
must abort and not try to meddle with things it doesn't understand...
|
||||
* - 0x54
|
||||
- \_\_le32
|
||||
- s\_first\_ino
|
||||
- __le32
|
||||
- s_first_ino
|
||||
- First non-reserved inode.
|
||||
* - 0x58
|
||||
- \_\_le16
|
||||
- s\_inode\_size
|
||||
- __le16
|
||||
- s_inode_size
|
||||
- Size of inode structure, in bytes.
|
||||
* - 0x5A
|
||||
- \_\_le16
|
||||
- s\_block\_group\_nr
|
||||
- __le16
|
||||
- s_block_group_nr
|
||||
- Block group # of this superblock.
|
||||
* - 0x5C
|
||||
- \_\_le32
|
||||
- s\_feature\_compat
|
||||
- __le32
|
||||
- s_feature_compat
|
||||
- Compatible feature set flags. Kernel can still read/write this fs even
|
||||
if it doesn't understand a flag; fsck should not do that. See the
|
||||
super_compat_ table for more info.
|
||||
* - 0x60
|
||||
- \_\_le32
|
||||
- s\_feature\_incompat
|
||||
- __le32
|
||||
- s_feature_incompat
|
||||
- Incompatible feature set. If the kernel or fsck doesn't understand one
|
||||
of these bits, it should stop. See the super_incompat_ table for more
|
||||
info.
|
||||
* - 0x64
|
||||
- \_\_le32
|
||||
- s\_feature\_ro\_compat
|
||||
- __le32
|
||||
- s_feature_ro_compat
|
||||
- Readonly-compatible feature set. If the kernel doesn't understand one of
|
||||
these bits, it can still mount read-only. See the super_rocompat_ table
|
||||
for more info.
|
||||
* - 0x68
|
||||
- \_\_u8
|
||||
- s\_uuid[16]
|
||||
- __u8
|
||||
- s_uuid[16]
|
||||
- 128-bit UUID for volume.
|
||||
* - 0x78
|
||||
- char
|
||||
- s\_volume\_name[16]
|
||||
- s_volume_name[16]
|
||||
- Volume label.
|
||||
* - 0x88
|
||||
- char
|
||||
- s\_last\_mounted[64]
|
||||
- s_last_mounted[64]
|
||||
- Directory where filesystem was last mounted.
|
||||
* - 0xC8
|
||||
- \_\_le32
|
||||
- s\_algorithm\_usage\_bitmap
|
||||
- __le32
|
||||
- s_algorithm_usage_bitmap
|
||||
- For compression (Not used in e2fsprogs/Linux)
|
||||
* -
|
||||
-
|
||||
@ -194,18 +194,18 @@ The ext4 superblock is laid out as follows in
|
||||
- Performance hints. Directory preallocation should only happen if the
|
||||
EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
|
||||
* - 0xCC
|
||||
- \_\_u8
|
||||
- s\_prealloc\_blocks
|
||||
- __u8
|
||||
- s_prealloc_blocks
|
||||
- #. of blocks to try to preallocate for ... files? (Not used in
|
||||
e2fsprogs/Linux)
|
||||
* - 0xCD
|
||||
- \_\_u8
|
||||
- s\_prealloc\_dir\_blocks
|
||||
- __u8
|
||||
- s_prealloc_dir_blocks
|
||||
- #. of blocks to preallocate for directories. (Not used in
|
||||
e2fsprogs/Linux)
|
||||
* - 0xCE
|
||||
- \_\_le16
|
||||
- s\_reserved\_gdt\_blocks
|
||||
- __le16
|
||||
- s_reserved_gdt_blocks
|
||||
- Number of reserved GDT entries for future filesystem expansion.
|
||||
* -
|
||||
-
|
||||
@ -213,281 +213,281 @@ The ext4 superblock is laid out as follows in
|
||||
- Journalling support is valid only if EXT4_FEATURE_COMPAT_HAS_JOURNAL is
|
||||
set.
|
||||
* - 0xD0
|
||||
- \_\_u8
|
||||
- s\_journal\_uuid[16]
|
||||
- __u8
|
||||
- s_journal_uuid[16]
|
||||
- UUID of journal superblock
|
||||
* - 0xE0
|
||||
- \_\_le32
|
||||
- s\_journal\_inum
|
||||
- __le32
|
||||
- s_journal_inum
|
||||
- inode number of journal file.
|
||||
* - 0xE4
|
||||
- \_\_le32
|
||||
- s\_journal\_dev
|
||||
- __le32
|
||||
- s_journal_dev
|
||||
- Device number of journal file, if the external journal feature flag is
|
||||
set.
|
||||
* - 0xE8
|
||||
- \_\_le32
|
||||
- s\_last\_orphan
|
||||
- __le32
|
||||
- s_last_orphan
|
||||
- Start of list of orphaned inodes to delete.
|
||||
* - 0xEC
|
||||
- \_\_le32
|
||||
- s\_hash\_seed[4]
|
||||
- __le32
|
||||
- s_hash_seed[4]
|
||||
- HTREE hash seed.
|
||||
* - 0xFC
|
||||
- \_\_u8
|
||||
- s\_def\_hash\_version
|
||||
- __u8
|
||||
- s_def_hash_version
|
||||
- Default hash algorithm to use for directory hashes. See super_def_hash_
|
||||
for more info.
|
||||
* - 0xFD
|
||||
- \_\_u8
|
||||
- s\_jnl\_backup\_type
|
||||
- If this value is 0 or EXT3\_JNL\_BACKUP\_BLOCKS (1), then the
|
||||
- __u8
|
||||
- s_jnl_backup_type
|
||||
- If this value is 0 or EXT3_JNL_BACKUP_BLOCKS (1), then the
|
||||
``s_jnl_blocks`` field contains a duplicate copy of the inode's
|
||||
``i_block[]`` array and ``i_size``.
|
||||
* - 0xFE
|
||||
- \_\_le16
|
||||
- s\_desc\_size
|
||||
- __le16
|
||||
- s_desc_size
|
||||
- Size of group descriptors, in bytes, if the 64bit incompat feature flag
|
||||
is set.
|
||||
* - 0x100
|
||||
- \_\_le32
|
||||
- s\_default\_mount\_opts
|
||||
- __le32
|
||||
- s_default_mount_opts
|
||||
- Default mount options. See the super_mountopts_ table for more info.
|
||||
* - 0x104
|
||||
- \_\_le32
|
||||
- s\_first\_meta\_bg
|
||||
- First metablock block group, if the meta\_bg feature is enabled.
|
||||
- __le32
|
||||
- s_first_meta_bg
|
||||
- First metablock block group, if the meta_bg feature is enabled.
|
||||
* - 0x108
|
||||
- \_\_le32
|
||||
- s\_mkfs\_time
|
||||
- __le32
|
||||
- s_mkfs_time
|
||||
- When the filesystem was created, in seconds since the epoch.
|
||||
* - 0x10C
|
||||
- \_\_le32
|
||||
- s\_jnl\_blocks[17]
|
||||
- __le32
|
||||
- s_jnl_blocks[17]
|
||||
- Backup copy of the journal inode's ``i_block[]`` array in the first 15
|
||||
elements and i\_size\_high and i\_size in the 16th and 17th elements,
|
||||
elements and i_size_high and i_size in the 16th and 17th elements,
|
||||
respectively.
|
||||
* -
|
||||
-
|
||||
-
|
||||
- 64bit support is valid only if EXT4_FEATURE_COMPAT_64BIT is set.
|
||||
* - 0x150
|
||||
- \_\_le32
|
||||
- s\_blocks\_count\_hi
|
||||
- __le32
|
||||
- s_blocks_count_hi
|
||||
- High 32-bits of the block count.
|
||||
* - 0x154
|
||||
- \_\_le32
|
||||
- s\_r\_blocks\_count\_hi
|
||||
- __le32
|
||||
- s_r_blocks_count_hi
|
||||
- High 32-bits of the reserved block count.
|
||||
* - 0x158
|
||||
- \_\_le32
|
||||
- s\_free\_blocks\_count\_hi
|
||||
- __le32
|
||||
- s_free_blocks_count_hi
|
||||
- High 32-bits of the free block count.
|
||||
* - 0x15C
|
||||
- \_\_le16
|
||||
- s\_min\_extra\_isize
|
||||
- __le16
|
||||
- s_min_extra_isize
|
||||
- All inodes have at least # bytes.
|
||||
* - 0x15E
|
||||
- \_\_le16
|
||||
- s\_want\_extra\_isize
|
||||
- __le16
|
||||
- s_want_extra_isize
|
||||
- New inodes should reserve # bytes.
|
||||
* - 0x160
|
||||
- \_\_le32
|
||||
- s\_flags
|
||||
- __le32
|
||||
- s_flags
|
||||
- Miscellaneous flags. See the super_flags_ table for more info.
|
||||
* - 0x164
|
||||
- \_\_le16
|
||||
- s\_raid\_stride
|
||||
- __le16
|
||||
- s_raid_stride
|
||||
- RAID stride. This is the number of logical blocks read from or written
|
||||
to the disk before moving to the next disk. This affects the placement
|
||||
of filesystem metadata, which will hopefully make RAID storage faster.
|
||||
* - 0x166
|
||||
- \_\_le16
|
||||
- s\_mmp\_interval
|
||||
- __le16
|
||||
- s_mmp_interval
|
||||
- #. seconds to wait in multi-mount prevention (MMP) checking. In theory,
|
||||
MMP is a mechanism to record in the superblock which host and device
|
||||
have mounted the filesystem, in order to prevent multiple mounts. This
|
||||
feature does not seem to be implemented...
|
||||
* - 0x168
|
||||
- \_\_le64
|
||||
- s\_mmp\_block
|
||||
- __le64
|
||||
- s_mmp_block
|
||||
- Block # for multi-mount protection data.
|
||||
* - 0x170
|
||||
- \_\_le32
|
||||
- s\_raid\_stripe\_width
|
||||
- __le32
|
||||
- s_raid_stripe_width
|
||||
- RAID stripe width. This is the number of logical blocks read from or
|
||||
written to the disk before coming back to the current disk. This is used
|
||||
by the block allocator to try to reduce the number of read-modify-write
|
||||
operations in a RAID5/6.
|
||||
* - 0x174
|
||||
- \_\_u8
|
||||
- s\_log\_groups\_per\_flex
|
||||
- __u8
|
||||
- s_log_groups_per_flex
|
||||
- Size of a flexible block group is 2 ^ ``s_log_groups_per_flex``.
|
||||
* - 0x175
|
||||
- \_\_u8
|
||||
- s\_checksum\_type
|
||||
- __u8
|
||||
- s_checksum_type
|
||||
- Metadata checksum algorithm type. The only valid value is 1 (crc32c).
|
||||
* - 0x176
|
||||
- \_\_le16
|
||||
- s\_reserved\_pad
|
||||
- __le16
|
||||
- s_reserved_pad
|
||||
-
|
||||
* - 0x178
|
||||
- \_\_le64
|
||||
- s\_kbytes\_written
|
||||
- __le64
|
||||
- s_kbytes_written
|
||||
- Number of KiB written to this filesystem over its lifetime.
|
||||
* - 0x180
|
||||
- \_\_le32
|
||||
- s\_snapshot\_inum
|
||||
- __le32
|
||||
- s_snapshot_inum
|
||||
- inode number of active snapshot. (Not used in e2fsprogs/Linux.)
|
||||
* - 0x184
|
||||
- \_\_le32
|
||||
- s\_snapshot\_id
|
||||
- __le32
|
||||
- s_snapshot_id
|
||||
- Sequential ID of active snapshot. (Not used in e2fsprogs/Linux.)
|
||||
* - 0x188
|
||||
- \_\_le64
|
||||
- s\_snapshot\_r\_blocks\_count
|
||||
- __le64
|
||||
- s_snapshot_r_blocks_count
|
||||
- Number of blocks reserved for active snapshot's future use. (Not used in
|
||||
e2fsprogs/Linux.)
|
||||
* - 0x190
|
||||
- \_\_le32
|
||||
- s\_snapshot\_list
|
||||
- __le32
|
||||
- s_snapshot_list
|
||||
- inode number of the head of the on-disk snapshot list. (Not used in
|
||||
e2fsprogs/Linux.)
|
||||
* - 0x194
|
||||
- \_\_le32
|
||||
- s\_error\_count
|
||||
- __le32
|
||||
- s_error_count
|
||||
- Number of errors seen.
|
||||
* - 0x198
|
||||
- \_\_le32
|
||||
- s\_first\_error\_time
|
||||
- __le32
|
||||
- s_first_error_time
|
||||
- First time an error happened, in seconds since the epoch.
|
||||
* - 0x19C
|
||||
- \_\_le32
|
||||
- s\_first\_error\_ino
|
||||
- __le32
|
||||
- s_first_error_ino
|
||||
- inode involved in first error.
|
||||
* - 0x1A0
|
||||
- \_\_le64
|
||||
- s\_first\_error\_block
|
||||
- __le64
|
||||
- s_first_error_block
|
||||
- Number of block involved of first error.
|
||||
* - 0x1A8
|
||||
- \_\_u8
|
||||
- s\_first\_error\_func[32]
|
||||
- __u8
|
||||
- s_first_error_func[32]
|
||||
- Name of function where the error happened.
|
||||
* - 0x1C8
|
||||
- \_\_le32
|
||||
- s\_first\_error\_line
|
||||
- __le32
|
||||
- s_first_error_line
|
||||
- Line number where error happened.
|
||||
* - 0x1CC
|
||||
- \_\_le32
|
||||
- s\_last\_error\_time
|
||||
- __le32
|
||||
- s_last_error_time
|
||||
- Time of most recent error, in seconds since the epoch.
|
||||
* - 0x1D0
|
||||
- \_\_le32
|
||||
- s\_last\_error\_ino
|
||||
- __le32
|
||||
- s_last_error_ino
|
||||
- inode involved in most recent error.
|
||||
* - 0x1D4
|
||||
- \_\_le32
|
||||
- s\_last\_error\_line
|
||||
- __le32
|
||||
- s_last_error_line
|
||||
- Line number where most recent error happened.
|
||||
* - 0x1D8
|
||||
- \_\_le64
|
||||
- s\_last\_error\_block
|
||||
- __le64
|
||||
- s_last_error_block
|
||||
- Number of block involved in most recent error.
|
||||
* - 0x1E0
|
||||
- \_\_u8
|
||||
- s\_last\_error\_func[32]
|
||||
- __u8
|
||||
- s_last_error_func[32]
|
||||
- Name of function where the most recent error happened.
|
||||
* - 0x200
|
||||
- \_\_u8
|
||||
- s\_mount\_opts[64]
|
||||
- __u8
|
||||
- s_mount_opts[64]
|
||||
- ASCIIZ string of mount options.
|
||||
* - 0x240
|
||||
- \_\_le32
|
||||
- s\_usr\_quota\_inum
|
||||
- __le32
|
||||
- s_usr_quota_inum
|
||||
- Inode number of user `quota <quota>`__ file.
|
||||
* - 0x244
|
||||
- \_\_le32
|
||||
- s\_grp\_quota\_inum
|
||||
- __le32
|
||||
- s_grp_quota_inum
|
||||
- Inode number of group `quota <quota>`__ file.
|
||||
* - 0x248
|
||||
- \_\_le32
|
||||
- s\_overhead\_blocks
|
||||
- __le32
|
||||
- s_overhead_blocks
|
||||
- Overhead blocks/clusters in fs. (Huh? This field is always zero, which
|
||||
means that the kernel calculates it dynamically.)
|
||||
* - 0x24C
|
||||
- \_\_le32
|
||||
- s\_backup\_bgs[2]
|
||||
- Block groups containing superblock backups (if sparse\_super2)
|
||||
- __le32
|
||||
- s_backup_bgs[2]
|
||||
- Block groups containing superblock backups (if sparse_super2)
|
||||
* - 0x254
|
||||
- \_\_u8
|
||||
- s\_encrypt\_algos[4]
|
||||
- __u8
|
||||
- s_encrypt_algos[4]
|
||||
- Encryption algorithms in use. There can be up to four algorithms in use
|
||||
at any time; valid algorithm codes are given in the super_encrypt_ table
|
||||
below.
|
||||
* - 0x258
|
||||
- \_\_u8
|
||||
- s\_encrypt\_pw\_salt[16]
|
||||
- __u8
|
||||
- s_encrypt_pw_salt[16]
|
||||
- Salt for the string2key algorithm for encryption.
|
||||
* - 0x268
|
||||
- \_\_le32
|
||||
- s\_lpf\_ino
|
||||
- __le32
|
||||
- s_lpf_ino
|
||||
- Inode number of lost+found
|
||||
* - 0x26C
|
||||
- \_\_le32
|
||||
- s\_prj\_quota\_inum
|
||||
- __le32
|
||||
- s_prj_quota_inum
|
||||
- Inode that tracks project quotas.
|
||||
* - 0x270
|
||||
- \_\_le32
|
||||
- s\_checksum\_seed
|
||||
- Checksum seed used for metadata\_csum calculations. This value is
|
||||
crc32c(~0, $orig\_fs\_uuid).
|
||||
- __le32
|
||||
- s_checksum_seed
|
||||
- Checksum seed used for metadata_csum calculations. This value is
|
||||
crc32c(~0, $orig_fs_uuid).
|
||||
* - 0x274
|
||||
- \_\_u8
|
||||
- s\_wtime_hi
|
||||
- __u8
|
||||
- s_wtime_hi
|
||||
- Upper 8 bits of the s_wtime field.
|
||||
* - 0x275
|
||||
- \_\_u8
|
||||
- s\_mtime_hi
|
||||
- __u8
|
||||
- s_mtime_hi
|
||||
- Upper 8 bits of the s_mtime field.
|
||||
* - 0x276
|
||||
- \_\_u8
|
||||
- s\_mkfs_time_hi
|
||||
- __u8
|
||||
- s_mkfs_time_hi
|
||||
- Upper 8 bits of the s_mkfs_time field.
|
||||
* - 0x277
|
||||
- \_\_u8
|
||||
- s\_lastcheck_hi
|
||||
- __u8
|
||||
- s_lastcheck_hi
|
||||
- Upper 8 bits of the s_lastcheck_hi field.
|
||||
* - 0x278
|
||||
- \_\_u8
|
||||
- s\_first_error_time_hi
|
||||
- __u8
|
||||
- s_first_error_time_hi
|
||||
- Upper 8 bits of the s_first_error_time_hi field.
|
||||
* - 0x279
|
||||
- \_\_u8
|
||||
- s\_last_error_time_hi
|
||||
- __u8
|
||||
- s_last_error_time_hi
|
||||
- Upper 8 bits of the s_last_error_time_hi field.
|
||||
* - 0x27A
|
||||
- \_\_u8
|
||||
- s\_pad[2]
|
||||
- __u8
|
||||
- s_pad[2]
|
||||
- Zero padding.
|
||||
* - 0x27C
|
||||
- \_\_le16
|
||||
- s\_encoding
|
||||
- __le16
|
||||
- s_encoding
|
||||
- Filename charset encoding.
|
||||
* - 0x27E
|
||||
- \_\_le16
|
||||
- s\_encoding_flags
|
||||
- __le16
|
||||
- s_encoding_flags
|
||||
- Filename charset encoding flags.
|
||||
* - 0x280
|
||||
- \_\_le32
|
||||
- s\_orphan\_file\_inum
|
||||
- __le32
|
||||
- s_orphan_file_inum
|
||||
- Orphan file inode number.
|
||||
* - 0x284
|
||||
- \_\_le32
|
||||
- s\_reserved[94]
|
||||
- __le32
|
||||
- s_reserved[94]
|
||||
- Padding to the end of the block.
|
||||
* - 0x3FC
|
||||
- \_\_le32
|
||||
- s\_checksum
|
||||
- __le32
|
||||
- s_checksum
|
||||
- Superblock checksum.
|
||||
|
||||
.. _super_state:
|
||||
@ -574,44 +574,44 @@ following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- Directory preallocation (COMPAT\_DIR\_PREALLOC).
|
||||
- Directory preallocation (COMPAT_DIR_PREALLOC).
|
||||
* - 0x2
|
||||
- “imagic inodes”. Not clear from the code what this does
|
||||
(COMPAT\_IMAGIC\_INODES).
|
||||
(COMPAT_IMAGIC_INODES).
|
||||
* - 0x4
|
||||
- Has a journal (COMPAT\_HAS\_JOURNAL).
|
||||
- Has a journal (COMPAT_HAS_JOURNAL).
|
||||
* - 0x8
|
||||
- Supports extended attributes (COMPAT\_EXT\_ATTR).
|
||||
- Supports extended attributes (COMPAT_EXT_ATTR).
|
||||
* - 0x10
|
||||
- Has reserved GDT blocks for filesystem expansion
|
||||
(COMPAT\_RESIZE\_INODE). Requires RO\_COMPAT\_SPARSE\_SUPER.
|
||||
(COMPAT_RESIZE_INODE). Requires RO_COMPAT_SPARSE_SUPER.
|
||||
* - 0x20
|
||||
- Has directory indices (COMPAT\_DIR\_INDEX).
|
||||
- Has directory indices (COMPAT_DIR_INDEX).
|
||||
* - 0x40
|
||||
- “Lazy BG”. Not in Linux kernel, seems to have been for uninitialized
|
||||
block groups? (COMPAT\_LAZY\_BG)
|
||||
block groups? (COMPAT_LAZY_BG)
|
||||
* - 0x80
|
||||
- “Exclude inode”. Not used. (COMPAT\_EXCLUDE\_INODE).
|
||||
- “Exclude inode”. Not used. (COMPAT_EXCLUDE_INODE).
|
||||
* - 0x100
|
||||
- “Exclude bitmap”. Seems to be used to indicate the presence of
|
||||
snapshot-related exclude bitmaps? Not defined in kernel or used in
|
||||
e2fsprogs (COMPAT\_EXCLUDE\_BITMAP).
|
||||
e2fsprogs (COMPAT_EXCLUDE_BITMAP).
|
||||
* - 0x200
|
||||
- Sparse Super Block, v2. If this flag is set, the SB field s\_backup\_bgs
|
||||
- Sparse Super Block, v2. If this flag is set, the SB field s_backup_bgs
|
||||
points to the two block groups that contain backup superblocks
|
||||
(COMPAT\_SPARSE\_SUPER2).
|
||||
(COMPAT_SPARSE_SUPER2).
|
||||
* - 0x400
|
||||
- Fast commits supported. Although fast commits blocks are
|
||||
backward incompatible, fast commit blocks are not always
|
||||
present in the journal. If fast commit blocks are present in
|
||||
the journal, JBD2 incompat feature
|
||||
(JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets
|
||||
set (COMPAT\_FAST\_COMMIT).
|
||||
(JBD2_FEATURE_INCOMPAT_FAST_COMMIT) gets
|
||||
set (COMPAT_FAST_COMMIT).
|
||||
* - 0x1000
|
||||
- Orphan file allocated. This is the special file for more efficient
|
||||
tracking of unlinked but still open inodes. When there may be any
|
||||
entries in the file, we additionally set proper rocompat feature
|
||||
(RO\_COMPAT\_ORPHAN\_PRESENT).
|
||||
(RO_COMPAT_ORPHAN_PRESENT).
|
||||
|
||||
.. _super_incompat:
|
||||
|
||||
@ -625,45 +625,45 @@ following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- Compression (INCOMPAT\_COMPRESSION).
|
||||
- Compression (INCOMPAT_COMPRESSION).
|
||||
* - 0x2
|
||||
- Directory entries record the file type. See ext4\_dir\_entry\_2 below
|
||||
(INCOMPAT\_FILETYPE).
|
||||
- Directory entries record the file type. See ext4_dir_entry_2 below
|
||||
(INCOMPAT_FILETYPE).
|
||||
* - 0x4
|
||||
- Filesystem needs recovery (INCOMPAT\_RECOVER).
|
||||
- Filesystem needs recovery (INCOMPAT_RECOVER).
|
||||
* - 0x8
|
||||
- Filesystem has a separate journal device (INCOMPAT\_JOURNAL\_DEV).
|
||||
- Filesystem has a separate journal device (INCOMPAT_JOURNAL_DEV).
|
||||
* - 0x10
|
||||
- Meta block groups. See the earlier discussion of this feature
|
||||
(INCOMPAT\_META\_BG).
|
||||
(INCOMPAT_META_BG).
|
||||
* - 0x40
|
||||
- Files in this filesystem use extents (INCOMPAT\_EXTENTS).
|
||||
- Files in this filesystem use extents (INCOMPAT_EXTENTS).
|
||||
* - 0x80
|
||||
- Enable a filesystem size of 2^64 blocks (INCOMPAT\_64BIT).
|
||||
- Enable a filesystem size of 2^64 blocks (INCOMPAT_64BIT).
|
||||
* - 0x100
|
||||
- Multiple mount protection (INCOMPAT\_MMP).
|
||||
- Multiple mount protection (INCOMPAT_MMP).
|
||||
* - 0x200
|
||||
- Flexible block groups. See the earlier discussion of this feature
|
||||
(INCOMPAT\_FLEX\_BG).
|
||||
(INCOMPAT_FLEX_BG).
|
||||
* - 0x400
|
||||
- Inodes can be used to store large extended attribute values
|
||||
(INCOMPAT\_EA\_INODE).
|
||||
(INCOMPAT_EA_INODE).
|
||||
* - 0x1000
|
||||
- Data in directory entry (INCOMPAT\_DIRDATA). (Not implemented?)
|
||||
- Data in directory entry (INCOMPAT_DIRDATA). (Not implemented?)
|
||||
* - 0x2000
|
||||
- Metadata checksum seed is stored in the superblock. This feature enables
|
||||
the administrator to change the UUID of a metadata\_csum filesystem
|
||||
the administrator to change the UUID of a metadata_csum filesystem
|
||||
while the filesystem is mounted; without it, the checksum definition
|
||||
requires all metadata blocks to be rewritten (INCOMPAT\_CSUM\_SEED).
|
||||
requires all metadata blocks to be rewritten (INCOMPAT_CSUM_SEED).
|
||||
* - 0x4000
|
||||
- Large directory >2GB or 3-level htree (INCOMPAT\_LARGEDIR). Prior to
|
||||
- Large directory >2GB or 3-level htree (INCOMPAT_LARGEDIR). Prior to
|
||||
this feature, directories could not be larger than 4GiB and could not
|
||||
have an htree more than 2 levels deep. If this feature is enabled,
|
||||
directories can be larger than 4GiB and have a maximum htree depth of 3.
|
||||
* - 0x8000
|
||||
- Data in inode (INCOMPAT\_INLINE\_DATA).
|
||||
- Data in inode (INCOMPAT_INLINE_DATA).
|
||||
* - 0x10000
|
||||
- Encrypted inodes are present on the filesystem. (INCOMPAT\_ENCRYPT).
|
||||
- Encrypted inodes are present on the filesystem. (INCOMPAT_ENCRYPT).
|
||||
|
||||
.. _super_rocompat:
|
||||
|
||||
@ -678,54 +678,54 @@ the following:
|
||||
- Description
|
||||
* - 0x1
|
||||
- Sparse superblocks. See the earlier discussion of this feature
|
||||
(RO\_COMPAT\_SPARSE\_SUPER).
|
||||
(RO_COMPAT_SPARSE_SUPER).
|
||||
* - 0x2
|
||||
- This filesystem has been used to store a file greater than 2GiB
|
||||
(RO\_COMPAT\_LARGE\_FILE).
|
||||
(RO_COMPAT_LARGE_FILE).
|
||||
* - 0x4
|
||||
- Not used in kernel or e2fsprogs (RO\_COMPAT\_BTREE\_DIR).
|
||||
- Not used in kernel or e2fsprogs (RO_COMPAT_BTREE_DIR).
|
||||
* - 0x8
|
||||
- This filesystem has files whose sizes are represented in units of
|
||||
logical blocks, not 512-byte sectors. This implies a very large file
|
||||
indeed! (RO\_COMPAT\_HUGE\_FILE)
|
||||
indeed! (RO_COMPAT_HUGE_FILE)
|
||||
* - 0x10
|
||||
- Group descriptors have checksums. In addition to detecting corruption,
|
||||
this is useful for lazy formatting with uninitialized groups
|
||||
(RO\_COMPAT\_GDT\_CSUM).
|
||||
(RO_COMPAT_GDT_CSUM).
|
||||
* - 0x20
|
||||
- Indicates that the old ext3 32,000 subdirectory limit no longer applies
|
||||
(RO\_COMPAT\_DIR\_NLINK). A directory's i\_links\_count will be set to 1
|
||||
(RO_COMPAT_DIR_NLINK). A directory's i_links_count will be set to 1
|
||||
if it is incremented past 64,999.
|
||||
* - 0x40
|
||||
- Indicates that large inodes exist on this filesystem
|
||||
(RO\_COMPAT\_EXTRA\_ISIZE).
|
||||
(RO_COMPAT_EXTRA_ISIZE).
|
||||
* - 0x80
|
||||
- This filesystem has a snapshot (RO\_COMPAT\_HAS\_SNAPSHOT).
|
||||
- This filesystem has a snapshot (RO_COMPAT_HAS_SNAPSHOT).
|
||||
* - 0x100
|
||||
- `Quota <Quota>`__ (RO\_COMPAT\_QUOTA).
|
||||
- `Quota <Quota>`__ (RO_COMPAT_QUOTA).
|
||||
* - 0x200
|
||||
- This filesystem supports “bigalloc”, which means that file extents are
|
||||
tracked in units of clusters (of blocks) instead of blocks
|
||||
(RO\_COMPAT\_BIGALLOC).
|
||||
(RO_COMPAT_BIGALLOC).
|
||||
* - 0x400
|
||||
- This filesystem supports metadata checksumming.
|
||||
(RO\_COMPAT\_METADATA\_CSUM; implies RO\_COMPAT\_GDT\_CSUM, though
|
||||
GDT\_CSUM must not be set)
|
||||
(RO_COMPAT_METADATA_CSUM; implies RO_COMPAT_GDT_CSUM, though
|
||||
GDT_CSUM must not be set)
|
||||
* - 0x800
|
||||
- Filesystem supports replicas. This feature is neither in the kernel nor
|
||||
e2fsprogs. (RO\_COMPAT\_REPLICA)
|
||||
e2fsprogs. (RO_COMPAT_REPLICA)
|
||||
* - 0x1000
|
||||
- Read-only filesystem image; the kernel will not mount this image
|
||||
read-write and most tools will refuse to write to the image.
|
||||
(RO\_COMPAT\_READONLY)
|
||||
(RO_COMPAT_READONLY)
|
||||
* - 0x2000
|
||||
- Filesystem tracks project quotas. (RO\_COMPAT\_PROJECT)
|
||||
- Filesystem tracks project quotas. (RO_COMPAT_PROJECT)
|
||||
* - 0x8000
|
||||
- Verity inodes may be present on the filesystem. (RO\_COMPAT\_VERITY)
|
||||
- Verity inodes may be present on the filesystem. (RO_COMPAT_VERITY)
|
||||
* - 0x10000
|
||||
- Indicates orphan file may have valid orphan entries and thus we need
|
||||
to clean them up when mounting the filesystem
|
||||
(RO\_COMPAT\_ORPHAN\_PRESENT).
|
||||
(RO_COMPAT_ORPHAN_PRESENT).
|
||||
|
||||
.. _super_def_hash:
|
||||
|
||||
@ -761,36 +761,36 @@ The ``s_default_mount_opts`` field is any combination of the following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x0001
|
||||
- Print debugging info upon (re)mount. (EXT4\_DEFM\_DEBUG)
|
||||
- Print debugging info upon (re)mount. (EXT4_DEFM_DEBUG)
|
||||
* - 0x0002
|
||||
- New files take the gid of the containing directory (instead of the fsgid
|
||||
of the current process). (EXT4\_DEFM\_BSDGROUPS)
|
||||
of the current process). (EXT4_DEFM_BSDGROUPS)
|
||||
* - 0x0004
|
||||
- Support userspace-provided extended attributes. (EXT4\_DEFM\_XATTR\_USER)
|
||||
- Support userspace-provided extended attributes. (EXT4_DEFM_XATTR_USER)
|
||||
* - 0x0008
|
||||
- Support POSIX access control lists (ACLs). (EXT4\_DEFM\_ACL)
|
||||
- Support POSIX access control lists (ACLs). (EXT4_DEFM_ACL)
|
||||
* - 0x0010
|
||||
- Do not support 32-bit UIDs. (EXT4\_DEFM\_UID16)
|
||||
- Do not support 32-bit UIDs. (EXT4_DEFM_UID16)
|
||||
* - 0x0020
|
||||
- All data and metadata are commited to the journal.
|
||||
(EXT4\_DEFM\_JMODE\_DATA)
|
||||
(EXT4_DEFM_JMODE_DATA)
|
||||
* - 0x0040
|
||||
- All data are flushed to the disk before metadata are committed to the
|
||||
journal. (EXT4\_DEFM\_JMODE\_ORDERED)
|
||||
journal. (EXT4_DEFM_JMODE_ORDERED)
|
||||
* - 0x0060
|
||||
- Data ordering is not preserved; data may be written after the metadata
|
||||
has been written. (EXT4\_DEFM\_JMODE\_WBACK)
|
||||
has been written. (EXT4_DEFM_JMODE_WBACK)
|
||||
* - 0x0100
|
||||
- Disable write flushes. (EXT4\_DEFM\_NOBARRIER)
|
||||
- Disable write flushes. (EXT4_DEFM_NOBARRIER)
|
||||
* - 0x0200
|
||||
- Track which blocks in a filesystem are metadata and therefore should not
|
||||
be used as data blocks. This option will be enabled by default on 3.18,
|
||||
hopefully. (EXT4\_DEFM\_BLOCK\_VALIDITY)
|
||||
hopefully. (EXT4_DEFM_BLOCK_VALIDITY)
|
||||
* - 0x0400
|
||||
- Enable DISCARD support, where the storage device is told about blocks
|
||||
becoming unused. (EXT4\_DEFM\_DISCARD)
|
||||
becoming unused. (EXT4_DEFM_DISCARD)
|
||||
* - 0x0800
|
||||
- Disable delayed allocation. (EXT4\_DEFM\_NODELALLOC)
|
||||
- Disable delayed allocation. (EXT4_DEFM_NODELALLOC)
|
||||
|
||||
.. _super_flags:
|
||||
|
||||
@ -820,12 +820,12 @@ The ``s_encrypt_algos`` list can contain any of the following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0
|
||||
- Invalid algorithm (ENCRYPTION\_MODE\_INVALID).
|
||||
- Invalid algorithm (ENCRYPTION_MODE_INVALID).
|
||||
* - 1
|
||||
- 256-bit AES in XTS mode (ENCRYPTION\_MODE\_AES\_256\_XTS).
|
||||
- 256-bit AES in XTS mode (ENCRYPTION_MODE_AES_256_XTS).
|
||||
* - 2
|
||||
- 256-bit AES in GCM mode (ENCRYPTION\_MODE\_AES\_256\_GCM).
|
||||
- 256-bit AES in GCM mode (ENCRYPTION_MODE_AES_256_GCM).
|
||||
* - 3
|
||||
- 256-bit AES in CBC mode (ENCRYPTION\_MODE\_AES\_256\_CBC).
|
||||
- 256-bit AES in CBC mode (ENCRYPTION_MODE_AES_256_CBC).
|
||||
|
||||
Total size of the superblock is 1024 bytes.
|
||||
|
@ -45,10 +45,12 @@ Name Alias Usage Preserved
|
||||
``$r23``-``$r31`` ``$s0``-``$s8`` Static registers Yes
|
||||
================= =============== =================== ============
|
||||
|
||||
Note: The register ``$r21`` is reserved in the ELF psABI, but used by the Linux
|
||||
kernel for storing the percpu base address. It normally has no ABI name, but is
|
||||
called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1`` in some old code,
|
||||
however they are deprecated aliases of ``$a0`` and ``$a1`` respectively.
|
||||
.. Note::
|
||||
The register ``$r21`` is reserved in the ELF psABI, but used by the Linux
|
||||
kernel for storing the percpu base address. It normally has no ABI name,
|
||||
but is called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1``
|
||||
in some old code,however they are deprecated aliases of ``$a0`` and ``$a1``
|
||||
respectively.
|
||||
|
||||
FPRs
|
||||
----
|
||||
@ -69,8 +71,9 @@ Name Alias Usage Preserved
|
||||
``$f24``-``$f31`` ``$fs0``-``$fs7`` Static registers Yes
|
||||
================= ================== =================== ============
|
||||
|
||||
Note: You may see ``$fv0`` or ``$fv1`` in some old code, however they are deprecated
|
||||
aliases of ``$fa0`` and ``$fa1`` respectively.
|
||||
.. Note::
|
||||
You may see ``$fv0`` or ``$fv1`` in some old code, however they are
|
||||
deprecated aliases of ``$fa0`` and ``$fa1`` respectively.
|
||||
|
||||
VRs
|
||||
----
|
||||
|
@ -145,12 +145,16 @@ Documentation of Loongson's LS7A chipset:
|
||||
|
||||
https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (in English)
|
||||
|
||||
Note: CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described
|
||||
in Section 7.4 of "LoongArch Reference Manual, Vol 1"; LIOINTC is "Legacy I/O
|
||||
Interrupts" described in Section 11.1 of "Loongson 3A5000 Processor Reference
|
||||
Manual"; EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of
|
||||
"Loongson 3A5000 Processor Reference Manual"; HTVECINTC is "HyperTransport
|
||||
Interrupts" described in Section 14.3 of "Loongson 3A5000 Processor Reference
|
||||
Manual"; PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of
|
||||
"Loongson 7A1000 Bridge User Manual"; PCH-LPC is "LPC Interrupts" described in
|
||||
Section 24.3 of "Loongson 7A1000 Bridge User Manual".
|
||||
.. Note::
|
||||
- CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described
|
||||
in Section 7.4 of "LoongArch Reference Manual, Vol 1";
|
||||
- LIOINTC is "Legacy I/OInterrupts" described in Section 11.1 of
|
||||
"Loongson 3A5000 Processor Reference Manual";
|
||||
- EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of
|
||||
"Loongson 3A5000 Processor Reference Manual";
|
||||
- HTVECINTC is "HyperTransport Interrupts" described in Section 14.3 of
|
||||
"Loongson 3A5000 Processor Reference Manual";
|
||||
- PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of
|
||||
"Loongson 7A1000 Bridge User Manual";
|
||||
- PCH-LPC is "LPC Interrupts" described in Section 24.3 of
|
||||
"Loongson 7A1000 Bridge User Manual".
|
||||
|
@ -46,10 +46,11 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0,而其
|
||||
``$r23``-``$r31`` ``$s0``-``$s8`` 静态寄存器 是
|
||||
================= =============== =================== ==========
|
||||
|
||||
注意:``$r21``寄存器在ELF psABI中保留未使用,但是在Linux内核用于保存每CPU
|
||||
变量基地址。该寄存器没有ABI命名,不过在内核中称为``$u0``。在一些遗留代码
|
||||
中有时可能见到``$v0``和``$v1``,它们是``$a0``和``$a1``的别名,属于已经废弃
|
||||
的用法。
|
||||
.. note::
|
||||
注意: ``$r21`` 寄存器在ELF psABI中保留未使用,但是在Linux内核用于保
|
||||
存每CPU变量基地址。该寄存器没有ABI命名,不过在内核中称为 ``$u0`` 。在
|
||||
一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 ``$a0`` 和
|
||||
``$a1`` 的别名,属于已经废弃的用法。
|
||||
|
||||
浮点寄存器
|
||||
----------
|
||||
@ -68,8 +69,9 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0,而其
|
||||
``$f24``-``$f31`` ``$fs0``-``$fs7`` 静态寄存器 是
|
||||
================= ================== =================== ==========
|
||||
|
||||
注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 ``$a0``
|
||||
和 ``$a1`` 的别名,属于已经废弃的用法。
|
||||
.. note::
|
||||
注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是
|
||||
``$a0`` 和 ``$a1`` 的别名,属于已经废弃的用法。
|
||||
|
||||
|
||||
向量寄存器
|
||||
|
@ -147,9 +147,11 @@ PCH-LPC::
|
||||
|
||||
https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (英文版)
|
||||
|
||||
注:CPUINTC即《龙芯架构参考手册卷一》第7.4节所描述的CSR.ECFG/CSR.ESTAT寄存器及其中断
|
||||
控制逻辑;LIOINTC即《龙芯3A5000处理器使用手册》第11.1节所描述的“传统I/O中断”;EIOINTC
|
||||
即《龙芯3A5000处理器使用手册》第11.2节所描述的“扩展I/O中断”;HTVECINTC即《龙芯3A5000
|
||||
处理器使用手册》第14.3节所描述的“HyperTransport中断”;PCH-PIC/PCH-MSI即《龙芯7A1000桥
|
||||
片用户手册》第5章所描述的“中断控制器”;PCH-LPC即《龙芯7A1000桥片用户手册》第24.3节所
|
||||
描述的“LPC中断”。
|
||||
.. note::
|
||||
- CPUINTC:即《龙芯架构参考手册卷一》第7.4节所描述的CSR.ECFG/CSR.ESTAT寄存器及其
|
||||
中断控制逻辑;
|
||||
- LIOINTC:即《龙芯3A5000处理器使用手册》第11.1节所描述的“传统I/O中断”;
|
||||
- EIOINTC:即《龙芯3A5000处理器使用手册》第11.2节所描述的“扩展I/O中断”;
|
||||
- HTVECINTC:即《龙芯3A5000处理器使用手册》第14.3节所描述的“HyperTransport中断”;
|
||||
- PCH-PIC/PCH-MSI:即《龙芯7A1000桥片用户手册》第5章所描述的“中断控制器”;
|
||||
- PCH-LPC:即《龙芯7A1000桥片用户手册》第24.3节所描述的“LPC中断”。
|
||||
|
46
MAINTAINERS
46
MAINTAINERS
@ -3661,7 +3661,7 @@ BPF JIT for ARM
|
||||
M: Shubham Bansal <illusionist.neo@gmail.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Odd Fixes
|
||||
F: arch/arm/net/
|
||||
|
||||
BPF JIT for ARM64
|
||||
@ -3685,14 +3685,15 @@ BPF JIT for NFP NICs
|
||||
M: Jakub Kicinski <kuba@kernel.org>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Supported
|
||||
S: Odd Fixes
|
||||
F: drivers/net/ethernet/netronome/nfp/bpf/
|
||||
|
||||
BPF JIT for POWERPC (32-BIT AND 64-BIT)
|
||||
M: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
|
||||
M: Michael Ellerman <mpe@ellerman.id.au>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Supported
|
||||
F: arch/powerpc/net/
|
||||
|
||||
BPF JIT for RISC-V (32-bit)
|
||||
@ -3718,7 +3719,7 @@ M: Heiko Carstens <hca@linux.ibm.com>
|
||||
M: Vasily Gorbik <gor@linux.ibm.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Supported
|
||||
F: arch/s390/net/
|
||||
X: arch/s390/net/pnet.c
|
||||
|
||||
@ -3726,14 +3727,14 @@ BPF JIT for SPARC (32-BIT AND 64-BIT)
|
||||
M: David S. Miller <davem@davemloft.net>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Odd Fixes
|
||||
F: arch/sparc/net/
|
||||
|
||||
BPF JIT for X86 32-BIT
|
||||
M: Wang YanQing <udknight@gmail.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Odd Fixes
|
||||
F: arch/x86/net/bpf_jit_comp32.c
|
||||
|
||||
BPF JIT for X86 64-BIT
|
||||
@ -3756,6 +3757,19 @@ F: include/linux/bpf_lsm.h
|
||||
F: kernel/bpf/bpf_lsm.c
|
||||
F: security/bpf/
|
||||
|
||||
BPF L7 FRAMEWORK
|
||||
M: John Fastabend <john.fastabend@gmail.com>
|
||||
M: Jakub Sitnicki <jakub@cloudflare.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
F: include/linux/skmsg.h
|
||||
F: net/core/skmsg.c
|
||||
F: net/core/sock_map.c
|
||||
F: net/ipv4/tcp_bpf.c
|
||||
F: net/ipv4/udp_bpf.c
|
||||
F: net/unix/unix_bpf.c
|
||||
|
||||
BPFTOOL
|
||||
M: Quentin Monnet <quentin@isovalent.com>
|
||||
L: bpf@vger.kernel.org
|
||||
@ -9275,6 +9289,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
|
||||
F: Documentation/devicetree/bindings/i2c/i2c.txt
|
||||
F: Documentation/i2c/
|
||||
F: drivers/i2c/*
|
||||
F: include/dt-bindings/i2c/i2c.h
|
||||
F: include/linux/i2c-dev.h
|
||||
F: include/linux/i2c-smbus.h
|
||||
F: include/linux/i2c.h
|
||||
@ -9290,6 +9305,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
|
||||
F: Documentation/devicetree/bindings/i2c/
|
||||
F: drivers/i2c/algos/
|
||||
F: drivers/i2c/busses/
|
||||
F: include/dt-bindings/i2c/
|
||||
|
||||
I2C-TAOS-EVM DRIVER
|
||||
M: Jean Delvare <jdelvare@suse.com>
|
||||
@ -11095,20 +11111,6 @@ S: Maintained
|
||||
F: include/net/l3mdev.h
|
||||
F: net/l3mdev
|
||||
|
||||
L7 BPF FRAMEWORK
|
||||
M: John Fastabend <john.fastabend@gmail.com>
|
||||
M: Daniel Borkmann <daniel@iogearbox.net>
|
||||
M: Jakub Sitnicki <jakub@cloudflare.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
F: include/linux/skmsg.h
|
||||
F: net/core/skmsg.c
|
||||
F: net/core/sock_map.c
|
||||
F: net/ipv4/tcp_bpf.c
|
||||
F: net/ipv4/udp_bpf.c
|
||||
F: net/unix/unix_bpf.c
|
||||
|
||||
LANDLOCK SECURITY MODULE
|
||||
M: Mickaël Salaün <mic@digikod.net>
|
||||
L: linux-security-module@vger.kernel.org
|
||||
@ -13951,7 +13953,6 @@ F: net/ipv6/tcp*.c
|
||||
NETWORKING [TLS]
|
||||
M: Boris Pismenny <borisp@nvidia.com>
|
||||
M: John Fastabend <john.fastabend@gmail.com>
|
||||
M: Daniel Borkmann <daniel@iogearbox.net>
|
||||
M: Jakub Kicinski <kuba@kernel.org>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
@ -14868,6 +14869,7 @@ F: include/dt-bindings/
|
||||
|
||||
OPENCOMPUTE PTP CLOCK DRIVER
|
||||
M: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
M: Vadim Fedorenko <vadfed@fb.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/ptp/ptp_ocp.c
|
||||
@ -19304,7 +19306,7 @@ R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
|
||||
R: Mika Westerberg <mika.westerberg@linux.intel.com>
|
||||
R: Jan Dabros <jsd@semihalf.com>
|
||||
L: linux-i2c@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Supported
|
||||
F: drivers/i2c/busses/i2c-designware-*
|
||||
|
||||
SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
|
||||
|
2
Makefile
2
Makefile
@ -2,7 +2,7 @@
|
||||
VERSION = 5
|
||||
PATCHLEVEL = 19
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc2
|
||||
EXTRAVERSION = -rc3
|
||||
NAME = Superb Owl
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -3101,7 +3101,6 @@ void cpu_set_feature(unsigned int num)
|
||||
WARN_ON(num >= MAX_CPU_FEATURES);
|
||||
elf_hwcap |= BIT(num);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpu_set_feature);
|
||||
|
||||
bool cpu_have_feature(unsigned int num)
|
||||
{
|
||||
|
@ -102,7 +102,6 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
|
||||
* x19-x29 per the AAPCS, and we created frame records upon entry, so we need
|
||||
* to restore x0-x8, x29, and x30.
|
||||
*/
|
||||
ftrace_common_return:
|
||||
/* Restore function arguments */
|
||||
ldp x0, x1, [sp]
|
||||
ldp x2, x3, [sp, #S_X2]
|
||||
|
@ -77,6 +77,66 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the address the callsite must branch to in order to reach '*addr'.
|
||||
*
|
||||
* Due to the limited range of 'BL' instructions, modules may be placed too far
|
||||
* away to branch directly and must use a PLT.
|
||||
*
|
||||
* Returns true when '*addr' contains a reachable target address, or has been
|
||||
* modified to contain a PLT address. Returns false otherwise.
|
||||
*/
|
||||
static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
|
||||
struct module *mod,
|
||||
unsigned long *addr)
|
||||
{
|
||||
unsigned long pc = rec->ip;
|
||||
long offset = (long)*addr - (long)pc;
|
||||
struct plt_entry *plt;
|
||||
|
||||
/*
|
||||
* When the target is within range of the 'BL' instruction, use 'addr'
|
||||
* as-is and branch to that directly.
|
||||
*/
|
||||
if (offset >= -SZ_128M && offset < SZ_128M)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* When the target is outside of the range of a 'BL' instruction, we
|
||||
* must use a PLT to reach it. We can only place PLTs for modules, and
|
||||
* only when module PLT support is built-in.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* 'mod' is only set at module load time, but if we end up
|
||||
* dealing with an out-of-range condition, we can assume it
|
||||
* is due to a module being loaded far away from the kernel.
|
||||
*
|
||||
* NOTE: __module_text_address() must be called with preemption
|
||||
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
|
||||
* retains its validity throughout the remainder of this code.
|
||||
*/
|
||||
if (!mod) {
|
||||
preempt_disable();
|
||||
mod = __module_text_address(pc);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
if (WARN_ON(!mod))
|
||||
return false;
|
||||
|
||||
plt = get_ftrace_plt(mod, *addr);
|
||||
if (!plt) {
|
||||
pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
|
||||
return false;
|
||||
}
|
||||
|
||||
*addr = (unsigned long)plt;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn on the call to ftrace_caller() in instrumented function
|
||||
*/
|
||||
@ -84,40 +144,9 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
||||
{
|
||||
unsigned long pc = rec->ip;
|
||||
u32 old, new;
|
||||
long offset = (long)pc - (long)addr;
|
||||
|
||||
if (offset < -SZ_128M || offset >= SZ_128M) {
|
||||
struct module *mod;
|
||||
struct plt_entry *plt;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* On kernels that support module PLTs, the offset between the
|
||||
* branch instruction and its target may legally exceed the
|
||||
* range of an ordinary relative 'bl' opcode. In this case, we
|
||||
* need to branch via a trampoline in the module.
|
||||
*
|
||||
* NOTE: __module_text_address() must be called with preemption
|
||||
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
|
||||
* retains its validity throughout the remainder of this code.
|
||||
*/
|
||||
preempt_disable();
|
||||
mod = __module_text_address(pc);
|
||||
preempt_enable();
|
||||
|
||||
if (WARN_ON(!mod))
|
||||
return -EINVAL;
|
||||
|
||||
plt = get_ftrace_plt(mod, addr);
|
||||
if (!plt) {
|
||||
pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
addr = (unsigned long)plt;
|
||||
}
|
||||
if (!ftrace_find_callable_addr(rec, NULL, &addr))
|
||||
return -EINVAL;
|
||||
|
||||
old = aarch64_insn_gen_nop();
|
||||
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
||||
@ -132,6 +161,11 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
|
||||
unsigned long pc = rec->ip;
|
||||
u32 old, new;
|
||||
|
||||
if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
|
||||
return -EINVAL;
|
||||
if (!ftrace_find_callable_addr(rec, NULL, &addr))
|
||||
return -EINVAL;
|
||||
|
||||
old = aarch64_insn_gen_branch_imm(pc, old_addr,
|
||||
AARCH64_INSN_BRANCH_LINK);
|
||||
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
||||
@ -181,54 +215,15 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
|
||||
unsigned long addr)
|
||||
{
|
||||
unsigned long pc = rec->ip;
|
||||
bool validate = true;
|
||||
u32 old = 0, new;
|
||||
long offset = (long)pc - (long)addr;
|
||||
|
||||
if (offset < -SZ_128M || offset >= SZ_128M) {
|
||||
u32 replaced;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* 'mod' is only set at module load time, but if we end up
|
||||
* dealing with an out-of-range condition, we can assume it
|
||||
* is due to a module being loaded far away from the kernel.
|
||||
*/
|
||||
if (!mod) {
|
||||
preempt_disable();
|
||||
mod = __module_text_address(pc);
|
||||
preempt_enable();
|
||||
|
||||
if (WARN_ON(!mod))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* The instruction we are about to patch may be a branch and
|
||||
* link instruction that was redirected via a PLT entry. In
|
||||
* this case, the normal validation will fail, but we can at
|
||||
* least check that we are dealing with a branch and link
|
||||
* instruction that points into the right module.
|
||||
*/
|
||||
if (aarch64_insn_read((void *)pc, &replaced))
|
||||
return -EFAULT;
|
||||
|
||||
if (!aarch64_insn_is_bl(replaced) ||
|
||||
!within_module(pc + aarch64_get_branch_offset(replaced),
|
||||
mod))
|
||||
return -EINVAL;
|
||||
|
||||
validate = false;
|
||||
} else {
|
||||
old = aarch64_insn_gen_branch_imm(pc, addr,
|
||||
AARCH64_INSN_BRANCH_LINK);
|
||||
}
|
||||
if (!ftrace_find_callable_addr(rec, mod, &addr))
|
||||
return -EINVAL;
|
||||
|
||||
old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
||||
new = aarch64_insn_gen_nop();
|
||||
|
||||
return ftrace_modify_code(pc, old, new, validate);
|
||||
return ftrace_modify_code(pc, old, new, true);
|
||||
}
|
||||
|
||||
void arch_ftrace_update_code(int command)
|
||||
|
@ -303,14 +303,13 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
|
||||
early_fixmap_init();
|
||||
early_ioremap_init();
|
||||
|
||||
/*
|
||||
* Initialise the static keys early as they may be enabled by the
|
||||
* cpufeature code, early parameters, and DT setup.
|
||||
*/
|
||||
jump_label_init();
|
||||
|
||||
setup_machine_fdt(__fdt_pointer);
|
||||
|
||||
/*
|
||||
* Initialise the static keys early as they may be enabled by the
|
||||
* cpufeature code and early parameters.
|
||||
*/
|
||||
jump_label_init();
|
||||
parse_early_param();
|
||||
|
||||
/*
|
||||
|
@ -218,8 +218,6 @@ SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area)
|
||||
*/
|
||||
SYM_FUNC_START(__pi___dma_map_area)
|
||||
add x1, x0, x1
|
||||
cmp w2, #DMA_FROM_DEVICE
|
||||
b.eq __pi_dcache_inval_poc
|
||||
b __pi_dcache_clean_poc
|
||||
SYM_FUNC_END(__pi___dma_map_area)
|
||||
SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)
|
||||
|
@ -101,6 +101,7 @@ SECTIONS
|
||||
|
||||
STABS_DEBUG
|
||||
DWARF_DEBUG
|
||||
ELF_DETAILS
|
||||
|
||||
.gptab.sdata : {
|
||||
*(.gptab.data)
|
||||
|
@ -364,8 +364,13 @@ config RISCV_ISA_SVPBMT
|
||||
select RISCV_ALTERNATIVE
|
||||
default y
|
||||
help
|
||||
Adds support to dynamically detect the presence of the SVPBMT extension
|
||||
(Supervisor-mode: page-based memory types) and enable its usage.
|
||||
Adds support to dynamically detect the presence of the SVPBMT
|
||||
ISA-extension (Supervisor-mode: page-based memory types) and
|
||||
enable its usage.
|
||||
|
||||
The memory type for a page contains a combination of attributes
|
||||
that indicate the cacheability, idempotency, and ordering
|
||||
properties for access to that page.
|
||||
|
||||
The SVPBMT extension is only available on 64Bit cpus.
|
||||
|
||||
|
@ -35,6 +35,7 @@ config ERRATA_SIFIVE_CIP_1200
|
||||
|
||||
config ERRATA_THEAD
|
||||
bool "T-HEAD errata"
|
||||
depends on !XIP_KERNEL
|
||||
select RISCV_ALTERNATIVE
|
||||
help
|
||||
All T-HEAD errata Kconfig depend on this Kconfig. Disabling
|
||||
|
@ -192,6 +192,15 @@ plic: interrupt-controller@c000000 {
|
||||
riscv,ndev = <186>;
|
||||
};
|
||||
|
||||
pdma: dma-controller@3000000 {
|
||||
compatible = "sifive,fu540-c000-pdma", "sifive,pdma0";
|
||||
reg = <0x0 0x3000000 0x0 0x8000>;
|
||||
interrupt-parent = <&plic>;
|
||||
interrupts = <5 6>, <7 8>, <9 10>, <11 12>;
|
||||
dma-channels = <4>;
|
||||
#dma-cells = <1>;
|
||||
};
|
||||
|
||||
clkcfg: clkcfg@20002000 {
|
||||
compatible = "microchip,mpfs-clkcfg";
|
||||
reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>;
|
||||
|
@ -293,7 +293,6 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
|
||||
unsigned int stage)
|
||||
{
|
||||
u32 cpu_req_feature = cpufeature_probe(stage);
|
||||
u32 cpu_apply_feature = 0;
|
||||
struct alt_entry *alt;
|
||||
u32 tmp;
|
||||
|
||||
@ -307,10 +306,8 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
|
||||
}
|
||||
|
||||
tmp = (1U << alt->errata_id);
|
||||
if (cpu_req_feature & tmp) {
|
||||
if (cpu_req_feature & tmp)
|
||||
patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
|
||||
cpu_apply_feature |= tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -124,6 +124,51 @@ static u64 get_cc_mask(void)
|
||||
return BIT_ULL(gpa_width - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* The TDX module spec states that #VE may be injected for a limited set of
|
||||
* reasons:
|
||||
*
|
||||
* - Emulation of the architectural #VE injection on EPT violation;
|
||||
*
|
||||
* - As a result of guest TD execution of a disallowed instruction,
|
||||
* a disallowed MSR access, or CPUID virtualization;
|
||||
*
|
||||
* - A notification to the guest TD about anomalous behavior;
|
||||
*
|
||||
* The last one is opt-in and is not used by the kernel.
|
||||
*
|
||||
* The Intel Software Developer's Manual describes cases when instruction
|
||||
* length field can be used in section "Information for VM Exits Due to
|
||||
* Instruction Execution".
|
||||
*
|
||||
* For TDX, it ultimately means GET_VEINFO provides reliable instruction length
|
||||
* information if #VE occurred due to instruction execution, but not for EPT
|
||||
* violations.
|
||||
*/
|
||||
static int ve_instr_len(struct ve_info *ve)
|
||||
{
|
||||
switch (ve->exit_reason) {
|
||||
case EXIT_REASON_HLT:
|
||||
case EXIT_REASON_MSR_READ:
|
||||
case EXIT_REASON_MSR_WRITE:
|
||||
case EXIT_REASON_CPUID:
|
||||
case EXIT_REASON_IO_INSTRUCTION:
|
||||
/* It is safe to use ve->instr_len for #VE due instructions */
|
||||
return ve->instr_len;
|
||||
case EXIT_REASON_EPT_VIOLATION:
|
||||
/*
|
||||
* For EPT violations, ve->insn_len is not defined. For those,
|
||||
* the kernel must decode instructions manually and should not
|
||||
* be using this function.
|
||||
*/
|
||||
WARN_ONCE(1, "ve->instr_len is not defined for EPT violations");
|
||||
return 0;
|
||||
default:
|
||||
WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason);
|
||||
return ve->instr_len;
|
||||
}
|
||||
}
|
||||
|
||||
static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
|
||||
{
|
||||
struct tdx_hypercall_args args = {
|
||||
@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
|
||||
return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
|
||||
}
|
||||
|
||||
static bool handle_halt(void)
|
||||
static int handle_halt(struct ve_info *ve)
|
||||
{
|
||||
/*
|
||||
* Since non safe halt is mainly used in CPU offlining
|
||||
@ -158,9 +203,9 @@ static bool handle_halt(void)
|
||||
const bool do_sti = false;
|
||||
|
||||
if (__halt(irq_disabled, do_sti))
|
||||
return false;
|
||||
return -EIO;
|
||||
|
||||
return true;
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
void __cpuidle tdx_safe_halt(void)
|
||||
@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void)
|
||||
WARN_ONCE(1, "HLT instruction emulation failed\n");
|
||||
}
|
||||
|
||||
static bool read_msr(struct pt_regs *regs)
|
||||
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
struct tdx_hypercall_args args = {
|
||||
.r10 = TDX_HYPERCALL_STANDARD,
|
||||
@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs)
|
||||
* (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
|
||||
*/
|
||||
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
|
||||
return false;
|
||||
return -EIO;
|
||||
|
||||
regs->ax = lower_32_bits(args.r11);
|
||||
regs->dx = upper_32_bits(args.r11);
|
||||
return true;
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
static bool write_msr(struct pt_regs *regs)
|
||||
static int write_msr(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
struct tdx_hypercall_args args = {
|
||||
.r10 = TDX_HYPERCALL_STANDARD,
|
||||
@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs)
|
||||
* can be found in TDX Guest-Host-Communication Interface
|
||||
* (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
|
||||
*/
|
||||
return !__tdx_hypercall(&args, 0);
|
||||
if (__tdx_hypercall(&args, 0))
|
||||
return -EIO;
|
||||
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
static bool handle_cpuid(struct pt_regs *regs)
|
||||
static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
struct tdx_hypercall_args args = {
|
||||
.r10 = TDX_HYPERCALL_STANDARD,
|
||||
@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs)
|
||||
*/
|
||||
if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
|
||||
regs->ax = regs->bx = regs->cx = regs->dx = 0;
|
||||
return true;
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs)
|
||||
* (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
|
||||
*/
|
||||
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
|
||||
return false;
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
|
||||
@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs)
|
||||
regs->cx = args.r14;
|
||||
regs->dx = args.r15;
|
||||
|
||||
return true;
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
static bool mmio_read(int size, unsigned long addr, unsigned long *val)
|
||||
@ -283,10 +331,10 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
|
||||
EPT_WRITE, addr, val);
|
||||
}
|
||||
|
||||
static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
||||
static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
unsigned long *reg, val, vaddr;
|
||||
char buffer[MAX_INSN_SIZE];
|
||||
unsigned long *reg, val;
|
||||
struct insn insn = {};
|
||||
enum mmio_type mmio;
|
||||
int size, extend_size;
|
||||
@ -294,34 +342,49 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
||||
|
||||
/* Only in-kernel MMIO is supported */
|
||||
if (WARN_ON_ONCE(user_mode(regs)))
|
||||
return false;
|
||||
return -EFAULT;
|
||||
|
||||
if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
|
||||
return false;
|
||||
return -EFAULT;
|
||||
|
||||
if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
|
||||
return false;
|
||||
return -EINVAL;
|
||||
|
||||
mmio = insn_decode_mmio(&insn, &size);
|
||||
if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
|
||||
return false;
|
||||
return -EINVAL;
|
||||
|
||||
if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
|
||||
reg = insn_get_modrm_reg_ptr(&insn, regs);
|
||||
if (!reg)
|
||||
return false;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ve->instr_len = insn.length;
|
||||
/*
|
||||
* Reject EPT violation #VEs that split pages.
|
||||
*
|
||||
* MMIO accesses are supposed to be naturally aligned and therefore
|
||||
* never cross page boundaries. Seeing split page accesses indicates
|
||||
* a bug or a load_unaligned_zeropad() that stepped into an MMIO page.
|
||||
*
|
||||
* load_unaligned_zeropad() will recover using exception fixups.
|
||||
*/
|
||||
vaddr = (unsigned long)insn_get_addr_ref(&insn, regs);
|
||||
if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE)
|
||||
return -EFAULT;
|
||||
|
||||
/* Handle writes first */
|
||||
switch (mmio) {
|
||||
case MMIO_WRITE:
|
||||
memcpy(&val, reg, size);
|
||||
return mmio_write(size, ve->gpa, val);
|
||||
if (!mmio_write(size, ve->gpa, val))
|
||||
return -EIO;
|
||||
return insn.length;
|
||||
case MMIO_WRITE_IMM:
|
||||
val = insn.immediate.value;
|
||||
return mmio_write(size, ve->gpa, val);
|
||||
if (!mmio_write(size, ve->gpa, val))
|
||||
return -EIO;
|
||||
return insn.length;
|
||||
case MMIO_READ:
|
||||
case MMIO_READ_ZERO_EXTEND:
|
||||
case MMIO_READ_SIGN_EXTEND:
|
||||
@ -334,15 +397,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
||||
* decoded or handled properly. It was likely not using io.h
|
||||
* helpers or accessed MMIO accidentally.
|
||||
*/
|
||||
return false;
|
||||
return -EINVAL;
|
||||
default:
|
||||
WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
|
||||
return false;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Handle reads */
|
||||
if (!mmio_read(size, ve->gpa, &val))
|
||||
return false;
|
||||
return -EIO;
|
||||
|
||||
switch (mmio) {
|
||||
case MMIO_READ:
|
||||
@ -364,13 +427,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
||||
default:
|
||||
/* All other cases has to be covered with the first switch() */
|
||||
WARN_ON_ONCE(1);
|
||||
return false;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (extend_size)
|
||||
memset(reg, extend_val, extend_size);
|
||||
memcpy(reg, &val, size);
|
||||
return true;
|
||||
return insn.length;
|
||||
}
|
||||
|
||||
static bool handle_in(struct pt_regs *regs, int size, int port)
|
||||
@ -421,13 +484,14 @@ static bool handle_out(struct pt_regs *regs, int size, int port)
|
||||
*
|
||||
* Return True on success or False on failure.
|
||||
*/
|
||||
static bool handle_io(struct pt_regs *regs, u32 exit_qual)
|
||||
static int handle_io(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
u32 exit_qual = ve->exit_qual;
|
||||
int size, port;
|
||||
bool in;
|
||||
bool in, ret;
|
||||
|
||||
if (VE_IS_IO_STRING(exit_qual))
|
||||
return false;
|
||||
return -EIO;
|
||||
|
||||
in = VE_IS_IO_IN(exit_qual);
|
||||
size = VE_GET_IO_SIZE(exit_qual);
|
||||
@ -435,9 +499,13 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
|
||||
|
||||
|
||||
if (in)
|
||||
return handle_in(regs, size, port);
|
||||
ret = handle_in(regs, size, port);
|
||||
else
|
||||
return handle_out(regs, size, port);
|
||||
ret = handle_out(regs, size, port);
|
||||
if (!ret)
|
||||
return -EIO;
|
||||
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -447,13 +515,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
|
||||
__init bool tdx_early_handle_ve(struct pt_regs *regs)
|
||||
{
|
||||
struct ve_info ve;
|
||||
int insn_len;
|
||||
|
||||
tdx_get_ve_info(&ve);
|
||||
|
||||
if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
|
||||
return false;
|
||||
|
||||
return handle_io(regs, ve.exit_qual);
|
||||
insn_len = handle_io(regs, &ve);
|
||||
if (insn_len < 0)
|
||||
return false;
|
||||
|
||||
regs->ip += insn_len;
|
||||
return true;
|
||||
}
|
||||
|
||||
void tdx_get_ve_info(struct ve_info *ve)
|
||||
@ -486,54 +560,65 @@ void tdx_get_ve_info(struct ve_info *ve)
|
||||
ve->instr_info = upper_32_bits(out.r10);
|
||||
}
|
||||
|
||||
/* Handle the user initiated #VE */
|
||||
static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
|
||||
/*
|
||||
* Handle the user initiated #VE.
|
||||
*
|
||||
* On success, returns the number of bytes RIP should be incremented (>=0)
|
||||
* or -errno on error.
|
||||
*/
|
||||
static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
switch (ve->exit_reason) {
|
||||
case EXIT_REASON_CPUID:
|
||||
return handle_cpuid(regs);
|
||||
return handle_cpuid(regs, ve);
|
||||
default:
|
||||
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
|
||||
return false;
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle the kernel #VE */
|
||||
static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
|
||||
/*
|
||||
* Handle the kernel #VE.
|
||||
*
|
||||
* On success, returns the number of bytes RIP should be incremented (>=0)
|
||||
* or -errno on error.
|
||||
*/
|
||||
static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
switch (ve->exit_reason) {
|
||||
case EXIT_REASON_HLT:
|
||||
return handle_halt();
|
||||
return handle_halt(ve);
|
||||
case EXIT_REASON_MSR_READ:
|
||||
return read_msr(regs);
|
||||
return read_msr(regs, ve);
|
||||
case EXIT_REASON_MSR_WRITE:
|
||||
return write_msr(regs);
|
||||
return write_msr(regs, ve);
|
||||
case EXIT_REASON_CPUID:
|
||||
return handle_cpuid(regs);
|
||||
return handle_cpuid(regs, ve);
|
||||
case EXIT_REASON_EPT_VIOLATION:
|
||||
return handle_mmio(regs, ve);
|
||||
case EXIT_REASON_IO_INSTRUCTION:
|
||||
return handle_io(regs, ve->exit_qual);
|
||||
return handle_io(regs, ve);
|
||||
default:
|
||||
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
|
||||
return false;
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
bool ret;
|
||||
int insn_len;
|
||||
|
||||
if (user_mode(regs))
|
||||
ret = virt_exception_user(regs, ve);
|
||||
insn_len = virt_exception_user(regs, ve);
|
||||
else
|
||||
ret = virt_exception_kernel(regs, ve);
|
||||
insn_len = virt_exception_kernel(regs, ve);
|
||||
if (insn_len < 0)
|
||||
return false;
|
||||
|
||||
/* After successful #VE handling, move the IP */
|
||||
if (ret)
|
||||
regs->ip += ve->instr_len;
|
||||
regs->ip += insn_len;
|
||||
|
||||
return ret;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool tdx_tlb_flush_required(bool private)
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/io.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/sev.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
#include <asm/mshyperv.h>
|
||||
@ -405,6 +406,11 @@ void __init hyperv_init(void)
|
||||
}
|
||||
|
||||
if (hv_isolation_type_snp()) {
|
||||
/* Negotiate GHCB Version. */
|
||||
if (!hv_ghcb_negotiate_protocol())
|
||||
hv_ghcb_terminate(SEV_TERM_SET_GEN,
|
||||
GHCB_SEV_ES_PROT_UNSUPPORTED);
|
||||
|
||||
hv_ghcb_pg = alloc_percpu(union hv_ghcb *);
|
||||
if (!hv_ghcb_pg)
|
||||
goto free_vp_assist_page;
|
||||
|
@ -53,6 +53,8 @@ union hv_ghcb {
|
||||
} hypercall;
|
||||
} __packed __aligned(HV_HYP_PAGE_SIZE);
|
||||
|
||||
static u16 hv_ghcb_version __ro_after_init;
|
||||
|
||||
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
|
||||
{
|
||||
union hv_ghcb *hv_ghcb;
|
||||
@ -96,12 +98,85 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
|
||||
return status;
|
||||
}
|
||||
|
||||
static inline u64 rd_ghcb_msr(void)
|
||||
{
|
||||
return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
|
||||
}
|
||||
|
||||
static inline void wr_ghcb_msr(u64 val)
|
||||
{
|
||||
native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val);
|
||||
}
|
||||
|
||||
static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code,
|
||||
u64 exit_info_1, u64 exit_info_2)
|
||||
{
|
||||
/* Fill in protocol and format specifiers */
|
||||
ghcb->protocol_version = hv_ghcb_version;
|
||||
ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
|
||||
|
||||
ghcb_set_sw_exit_code(ghcb, exit_code);
|
||||
ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
|
||||
ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
|
||||
|
||||
VMGEXIT();
|
||||
|
||||
if (ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0))
|
||||
return ES_VMM_ERROR;
|
||||
else
|
||||
return ES_OK;
|
||||
}
|
||||
|
||||
void hv_ghcb_terminate(unsigned int set, unsigned int reason)
|
||||
{
|
||||
u64 val = GHCB_MSR_TERM_REQ;
|
||||
|
||||
/* Tell the hypervisor what went wrong. */
|
||||
val |= GHCB_SEV_TERM_REASON(set, reason);
|
||||
|
||||
/* Request Guest Termination from Hypvervisor */
|
||||
wr_ghcb_msr(val);
|
||||
VMGEXIT();
|
||||
|
||||
while (true)
|
||||
asm volatile("hlt\n" : : : "memory");
|
||||
}
|
||||
|
||||
bool hv_ghcb_negotiate_protocol(void)
|
||||
{
|
||||
u64 ghcb_gpa;
|
||||
u64 val;
|
||||
|
||||
/* Save ghcb page gpa. */
|
||||
ghcb_gpa = rd_ghcb_msr();
|
||||
|
||||
/* Do the GHCB protocol version negotiation */
|
||||
wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
|
||||
VMGEXIT();
|
||||
val = rd_ghcb_msr();
|
||||
|
||||
if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
|
||||
return false;
|
||||
|
||||
if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
|
||||
GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
|
||||
return false;
|
||||
|
||||
hv_ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val),
|
||||
GHCB_PROTOCOL_MAX);
|
||||
|
||||
/* Write ghcb page back after negotiating protocol. */
|
||||
wr_ghcb_msr(ghcb_gpa);
|
||||
VMGEXIT();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void hv_ghcb_msr_write(u64 msr, u64 value)
|
||||
{
|
||||
union hv_ghcb *hv_ghcb;
|
||||
void **ghcb_base;
|
||||
unsigned long flags;
|
||||
struct es_em_ctxt ctxt;
|
||||
|
||||
if (!hv_ghcb_pg)
|
||||
return;
|
||||
@ -120,8 +195,7 @@ void hv_ghcb_msr_write(u64 msr, u64 value)
|
||||
ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
|
||||
ghcb_set_rdx(&hv_ghcb->ghcb, upper_32_bits(value));
|
||||
|
||||
if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt,
|
||||
SVM_EXIT_MSR, 1, 0))
|
||||
if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 1, 0))
|
||||
pr_warn("Fail to write msr via ghcb %llx.\n", msr);
|
||||
|
||||
local_irq_restore(flags);
|
||||
@ -133,7 +207,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value)
|
||||
union hv_ghcb *hv_ghcb;
|
||||
void **ghcb_base;
|
||||
unsigned long flags;
|
||||
struct es_em_ctxt ctxt;
|
||||
|
||||
/* Check size of union hv_ghcb here. */
|
||||
BUILD_BUG_ON(sizeof(union hv_ghcb) != HV_HYP_PAGE_SIZE);
|
||||
@ -152,8 +225,7 @@ void hv_ghcb_msr_read(u64 msr, u64 *value)
|
||||
}
|
||||
|
||||
ghcb_set_rcx(&hv_ghcb->ghcb, msr);
|
||||
if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt,
|
||||
SVM_EXIT_MSR, 0, 0))
|
||||
if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 0, 0))
|
||||
pr_warn("Fail to read msr via ghcb %llx.\n", msr);
|
||||
else
|
||||
*value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax)
|
||||
|
@ -4,9 +4,6 @@
|
||||
|
||||
#include <asm/e820/types.h>
|
||||
|
||||
struct device;
|
||||
struct resource;
|
||||
|
||||
extern struct e820_table *e820_table;
|
||||
extern struct e820_table *e820_table_kexec;
|
||||
extern struct e820_table *e820_table_firmware;
|
||||
@ -46,8 +43,6 @@ extern void e820__register_nosave_regions(unsigned long limit_pfn);
|
||||
|
||||
extern int e820__get_entry_type(u64 start, u64 end);
|
||||
|
||||
extern void remove_e820_regions(struct device *dev, struct resource *avail);
|
||||
|
||||
/*
|
||||
* Returns true iff the specified range [start,end) is completely contained inside
|
||||
* the ISA region.
|
||||
|
@ -323,7 +323,7 @@ static inline u32 efi64_convert_status(efi_status_t status)
|
||||
#define __efi64_argmap_get_memory_space_descriptor(phys, desc) \
|
||||
(__efi64_split(phys), (desc))
|
||||
|
||||
#define __efi64_argmap_set_memory_space_descriptor(phys, size, flags) \
|
||||
#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \
|
||||
(__efi64_split(phys), __efi64_split(size), __efi64_split(flags))
|
||||
|
||||
/*
|
||||
|
@ -179,9 +179,13 @@ int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible);
|
||||
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
||||
void hv_ghcb_msr_write(u64 msr, u64 value);
|
||||
void hv_ghcb_msr_read(u64 msr, u64 *value);
|
||||
bool hv_ghcb_negotiate_protocol(void);
|
||||
void hv_ghcb_terminate(unsigned int set, unsigned int reason);
|
||||
#else
|
||||
static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
|
||||
static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
|
||||
static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
|
||||
static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
|
||||
#endif
|
||||
|
||||
extern bool hv_isolation_type_snp(void);
|
||||
|
@ -69,6 +69,8 @@ void pcibios_scan_specific_bus(int busn);
|
||||
|
||||
/* pci-irq.c */
|
||||
|
||||
struct pci_dev;
|
||||
|
||||
struct irq_info {
|
||||
u8 bus, devfn; /* Bus, device and function */
|
||||
struct {
|
||||
@ -246,3 +248,9 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val)
|
||||
# define x86_default_pci_init_irq NULL
|
||||
# define x86_default_pci_fixup_irqs NULL
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PCI) && defined(CONFIG_ACPI)
|
||||
extern bool pci_use_e820;
|
||||
#else
|
||||
#define pci_use_e820 false
|
||||
#endif
|
||||
|
@ -108,19 +108,16 @@ extern unsigned long _brk_end;
|
||||
void *extend_brk(size_t size, size_t align);
|
||||
|
||||
/*
|
||||
* Reserve space in the brk section. The name must be unique within the file,
|
||||
* and somewhat descriptive. The size is in bytes.
|
||||
* Reserve space in the .brk section, which is a block of memory from which the
|
||||
* caller is allowed to allocate very early (before even memblock is available)
|
||||
* by calling extend_brk(). All allocated memory will be eventually converted
|
||||
* to memblock. Any leftover unallocated memory will be freed.
|
||||
*
|
||||
* The allocation is done using inline asm (rather than using a section
|
||||
* attribute on a normal variable) in order to allow the use of @nobits, so
|
||||
* that it doesn't take up any space in the vmlinux file.
|
||||
* The size is in bytes.
|
||||
*/
|
||||
#define RESERVE_BRK(name, size) \
|
||||
asm(".pushsection .brk_reservation,\"aw\",@nobits\n\t" \
|
||||
".brk." #name ":\n\t" \
|
||||
".skip " __stringify(size) "\n\t" \
|
||||
".size .brk." #name ", " __stringify(size) "\n\t" \
|
||||
".popsection\n\t")
|
||||
#define RESERVE_BRK(name, size) \
|
||||
__section(".bss..brk") __aligned(1) __used \
|
||||
static char __brk_##name[size]
|
||||
|
||||
extern void probe_roms(void);
|
||||
#ifdef __i386__
|
||||
@ -133,12 +130,19 @@ asmlinkage void __init x86_64_start_reservations(char *real_mode_data);
|
||||
|
||||
#endif /* __i386__ */
|
||||
#endif /* _SETUP */
|
||||
#else
|
||||
#define RESERVE_BRK(name,sz) \
|
||||
.pushsection .brk_reservation,"aw",@nobits; \
|
||||
.brk.name: \
|
||||
1: .skip sz; \
|
||||
.size .brk.name,.-1b; \
|
||||
|
||||
#else /* __ASSEMBLY */
|
||||
|
||||
.macro __RESERVE_BRK name, size
|
||||
.pushsection .bss..brk, "aw"
|
||||
SYM_DATA_START(__brk_\name)
|
||||
.skip \size
|
||||
SYM_DATA_END(__brk_\name)
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
#define RESERVE_BRK(name, size) __RESERVE_BRK name, size
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_X86_SETUP_H */
|
||||
|
@ -36,10 +36,6 @@ KCSAN_SANITIZE := n
|
||||
|
||||
OBJECT_FILES_NON_STANDARD_test_nx.o := y
|
||||
|
||||
ifdef CONFIG_FRAME_POINTER
|
||||
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
|
||||
endif
|
||||
|
||||
# If instrumentation of this dir is enabled, boot hangs during first second.
|
||||
# Probably could be more selective here, but note that files related to irqs,
|
||||
# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
|
||||
|
@ -175,6 +175,7 @@ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)
|
||||
|
||||
jmp ftrace_epilogue
|
||||
SYM_FUNC_END(ftrace_caller);
|
||||
STACK_FRAME_NON_STANDARD_FP(ftrace_caller)
|
||||
|
||||
SYM_FUNC_START(ftrace_epilogue)
|
||||
/*
|
||||
@ -282,6 +283,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
|
||||
jmp ftrace_epilogue
|
||||
|
||||
SYM_FUNC_END(ftrace_regs_caller)
|
||||
STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
|
||||
|
||||
|
||||
#else /* ! CONFIG_DYNAMIC_FTRACE */
|
||||
@ -311,10 +313,14 @@ trace:
|
||||
jmp ftrace_stub
|
||||
SYM_FUNC_END(__fentry__)
|
||||
EXPORT_SYMBOL(__fentry__)
|
||||
STACK_FRAME_NON_STANDARD_FP(__fentry__)
|
||||
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
SYM_FUNC_START(return_to_handler)
|
||||
SYM_CODE_START(return_to_handler)
|
||||
UNWIND_HINT_EMPTY
|
||||
ANNOTATE_NOENDBR
|
||||
subq $16, %rsp
|
||||
|
||||
/* Save the return values */
|
||||
@ -339,7 +345,6 @@ SYM_FUNC_START(return_to_handler)
|
||||
int3
|
||||
.Ldo_rop:
|
||||
mov %rdi, (%rsp)
|
||||
UNWIND_HINT_FUNC
|
||||
RET
|
||||
SYM_FUNC_END(return_to_handler)
|
||||
SYM_CODE_END(return_to_handler)
|
||||
#endif
|
||||
|
@ -1,7 +1,8 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/dev_printk.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/printk.h>
|
||||
#include <asm/e820/api.h>
|
||||
#include <asm/pci_x86.h>
|
||||
|
||||
static void resource_clip(struct resource *res, resource_size_t start,
|
||||
resource_size_t end)
|
||||
@ -24,14 +25,14 @@ static void resource_clip(struct resource *res, resource_size_t start,
|
||||
res->start = end + 1;
|
||||
}
|
||||
|
||||
void remove_e820_regions(struct device *dev, struct resource *avail)
|
||||
static void remove_e820_regions(struct resource *avail)
|
||||
{
|
||||
int i;
|
||||
struct e820_entry *entry;
|
||||
u64 e820_start, e820_end;
|
||||
struct resource orig = *avail;
|
||||
|
||||
if (!(avail->flags & IORESOURCE_MEM))
|
||||
if (!pci_use_e820)
|
||||
return;
|
||||
|
||||
for (i = 0; i < e820_table->nr_entries; i++) {
|
||||
@ -41,7 +42,7 @@ void remove_e820_regions(struct device *dev, struct resource *avail)
|
||||
|
||||
resource_clip(avail, e820_start, e820_end);
|
||||
if (orig.start != avail->start || orig.end != avail->end) {
|
||||
dev_info(dev, "clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n",
|
||||
pr_info("clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n",
|
||||
&orig, avail, e820_start, e820_end);
|
||||
orig = *avail;
|
||||
}
|
||||
@ -55,6 +56,9 @@ void arch_remove_reservations(struct resource *avail)
|
||||
* the low 1MB unconditionally, as this area is needed for some ISA
|
||||
* cards requiring a memory range, e.g. the i82365 PCMCIA controller.
|
||||
*/
|
||||
if (avail->flags & IORESOURCE_MEM)
|
||||
if (avail->flags & IORESOURCE_MEM) {
|
||||
resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
|
||||
|
||||
remove_e820_regions(avail);
|
||||
}
|
||||
}
|
||||
|
@ -67,11 +67,6 @@ RESERVE_BRK(dmi_alloc, 65536);
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Range of the BSS area. The size of the BSS area is determined
|
||||
* at link time, with RESERVE_BRK() facility reserving additional
|
||||
* chunks.
|
||||
*/
|
||||
unsigned long _brk_start = (unsigned long)__brk_base;
|
||||
unsigned long _brk_end = (unsigned long)__brk_base;
|
||||
|
||||
|
@ -385,10 +385,10 @@ SECTIONS
|
||||
__end_of_kernel_reserve = .;
|
||||
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
|
||||
.brk (NOLOAD) : AT(ADDR(.brk) - LOAD_OFFSET) {
|
||||
__brk_base = .;
|
||||
. += 64 * 1024; /* 64k alignment slop space */
|
||||
*(.brk_reservation) /* areas brk users have reserved */
|
||||
*(.bss..brk) /* areas brk users have reserved */
|
||||
__brk_limit = .;
|
||||
}
|
||||
|
||||
|
@ -1420,8 +1420,9 @@ st: if (is_imm8(insn->off))
|
||||
case BPF_JMP | BPF_CALL:
|
||||
func = (u8 *) __bpf_call_base + imm32;
|
||||
if (tail_call_reachable) {
|
||||
/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
|
||||
EMIT3_off32(0x48, 0x8B, 0x85,
|
||||
-(bpf_prog->aux->stack_depth + 8));
|
||||
-round_up(bpf_prog->aux->stack_depth, 8) - 8);
|
||||
if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <linux/pci-acpi.h>
|
||||
#include <asm/numa.h>
|
||||
#include <asm/pci_x86.h>
|
||||
#include <asm/e820/api.h>
|
||||
|
||||
struct pci_root_info {
|
||||
struct acpi_pci_root_info common;
|
||||
@ -20,7 +19,7 @@ struct pci_root_info {
|
||||
#endif
|
||||
};
|
||||
|
||||
static bool pci_use_e820 = true;
|
||||
bool pci_use_e820 = true;
|
||||
static bool pci_use_crs = true;
|
||||
static bool pci_ignore_seg;
|
||||
|
||||
@ -387,11 +386,6 @@ static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
|
||||
|
||||
status = acpi_pci_probe_root_resources(ci);
|
||||
|
||||
if (pci_use_e820) {
|
||||
resource_list_for_each_entry(entry, &ci->resources)
|
||||
remove_e820_regions(&device->dev, entry->res);
|
||||
}
|
||||
|
||||
if (pci_use_crs) {
|
||||
resource_list_for_each_entry_safe(entry, tmp, &ci->resources)
|
||||
if (resource_is_pcicfg_ioport(entry->res))
|
||||
|
@ -7046,6 +7046,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
#endif
|
||||
|
||||
blk_stat_disable_accounting(bfqd->queue);
|
||||
wbt_enable_default(bfqd->queue);
|
||||
|
||||
kfree(bfqd);
|
||||
@ -7188,7 +7189,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
|
||||
bfq_init_root_group(bfqd->root_group, bfqd);
|
||||
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
|
||||
|
||||
/* We dispatch from request queue wide instead of hw queue */
|
||||
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
|
||||
|
||||
wbt_disable_default(q);
|
||||
blk_stat_enable_accounting(q);
|
||||
|
||||
return 0;
|
||||
|
||||
out_free:
|
||||
|
@ -564,6 +564,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
int ret;
|
||||
|
||||
if (!e) {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
|
||||
q->elevator = NULL;
|
||||
q->nr_requests = q->tag_set->queue_depth;
|
||||
return 0;
|
||||
|
@ -579,6 +579,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
|
||||
if (!blk_mq_hw_queue_mapped(data.hctx))
|
||||
goto out_queue_exit;
|
||||
cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
|
||||
if (cpu >= nr_cpu_ids)
|
||||
goto out_queue_exit;
|
||||
data.ctx = __blk_mq_get_ctx(q, cpu);
|
||||
|
||||
if (!q->elevator)
|
||||
@ -2140,20 +2142,6 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_run_hw_queue);
|
||||
|
||||
/*
|
||||
* Is the request queue handled by an IO scheduler that does not respect
|
||||
* hardware queues when dispatching?
|
||||
*/
|
||||
static bool blk_mq_has_sqsched(struct request_queue *q)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e && e->type->ops.dispatch_request &&
|
||||
!(e->type->elevator_features & ELEVATOR_F_MQ_AWARE))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return prefered queue to dispatch from (if any) for non-mq aware IO
|
||||
* scheduler.
|
||||
@ -2186,7 +2174,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
|
||||
unsigned long i;
|
||||
|
||||
sq_hctx = NULL;
|
||||
if (blk_mq_has_sqsched(q))
|
||||
if (blk_queue_sq_sched(q))
|
||||
sq_hctx = blk_mq_get_sq_hctx(q);
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (blk_mq_hctx_stopped(hctx))
|
||||
@ -2214,7 +2202,7 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
|
||||
unsigned long i;
|
||||
|
||||
sq_hctx = NULL;
|
||||
if (blk_mq_has_sqsched(q))
|
||||
if (blk_queue_sq_sched(q))
|
||||
sq_hctx = blk_mq_get_sq_hctx(q);
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (blk_mq_hctx_stopped(hctx))
|
||||
@ -3443,8 +3431,9 @@ static void blk_mq_exit_hctx(struct request_queue *q,
|
||||
if (blk_mq_hw_queue_mapped(hctx))
|
||||
blk_mq_tag_idle(hctx);
|
||||
|
||||
blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
|
||||
set->queue_depth, flush_rq);
|
||||
if (blk_queue_init_done(q))
|
||||
blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
|
||||
set->queue_depth, flush_rq);
|
||||
if (set->ops->exit_request)
|
||||
set->ops->exit_request(set, flush_rq, hctx_idx);
|
||||
|
||||
@ -4438,12 +4427,14 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
|
||||
if (!qe)
|
||||
return false;
|
||||
|
||||
/* q->elevator needs protection from ->sysfs_lock */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
|
||||
INIT_LIST_HEAD(&qe->node);
|
||||
qe->q = q;
|
||||
qe->type = q->elevator->type;
|
||||
list_add(&qe->node, head);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
/*
|
||||
* After elevator_switch_mq, the previous elevator_queue will be
|
||||
* released by elevator_release. The reference of the io scheduler
|
||||
|
@ -421,6 +421,8 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
|
||||
blk_stat_enable_accounting(q);
|
||||
|
||||
blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
|
||||
|
||||
eq->elevator_data = kqd;
|
||||
q->elevator = eq;
|
||||
|
||||
@ -1033,7 +1035,6 @@ static struct elevator_type kyber_sched = {
|
||||
#endif
|
||||
.elevator_attrs = kyber_sched_attrs,
|
||||
.elevator_name = "kyber",
|
||||
.elevator_features = ELEVATOR_F_MQ_AWARE,
|
||||
.elevator_owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
|
@ -642,6 +642,9 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
spin_lock_init(&dd->lock);
|
||||
spin_lock_init(&dd->zone_lock);
|
||||
|
||||
/* We dispatch from request queue wide instead of hw queue */
|
||||
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
|
||||
|
||||
q->elevator = eq;
|
||||
return 0;
|
||||
|
||||
|
@ -3,8 +3,8 @@
|
||||
# Makefile for the linux kernel signature checking certificates.
|
||||
#
|
||||
|
||||
obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o
|
||||
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o
|
||||
obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
|
||||
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o
|
||||
obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o
|
||||
ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),)
|
||||
|
||||
|
@ -15,10 +15,9 @@
|
||||
#include <linux/err.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/uidgid.h>
|
||||
#include <linux/verification.h>
|
||||
#include <keys/asymmetric-type.h>
|
||||
#include <keys/system_keyring.h>
|
||||
#include "blacklist.h"
|
||||
#include "common.h"
|
||||
|
||||
/*
|
||||
* According to crypto/asymmetric_keys/x509_cert_parser.c:x509_note_pkey_algo(),
|
||||
@ -365,8 +364,9 @@ static __init int load_revocation_certificate_list(void)
|
||||
if (revocation_certificate_list_size)
|
||||
pr_notice("Loading compiled-in revocation X.509 certificates\n");
|
||||
|
||||
return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size,
|
||||
blacklist_keyring);
|
||||
return x509_load_certificate_list(revocation_certificate_list,
|
||||
revocation_certificate_list_size,
|
||||
blacklist_keyring);
|
||||
}
|
||||
late_initcall(load_revocation_certificate_list);
|
||||
#endif
|
||||
|
@ -1,9 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#ifndef _CERT_COMMON_H
|
||||
#define _CERT_COMMON_H
|
||||
|
||||
int load_certificate_list(const u8 cert_list[], const unsigned long list_size,
|
||||
const struct key *keyring);
|
||||
|
||||
#endif
|
@ -16,7 +16,6 @@
|
||||
#include <keys/asymmetric-type.h>
|
||||
#include <keys/system_keyring.h>
|
||||
#include <crypto/pkcs7.h>
|
||||
#include "common.h"
|
||||
|
||||
static struct key *builtin_trusted_keys;
|
||||
#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
|
||||
@ -183,7 +182,8 @@ __init int load_module_cert(struct key *keyring)
|
||||
|
||||
pr_notice("Loading compiled-in module X.509 certificates\n");
|
||||
|
||||
return load_certificate_list(system_certificate_list, module_cert_size, keyring);
|
||||
return x509_load_certificate_list(system_certificate_list,
|
||||
module_cert_size, keyring);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -204,7 +204,7 @@ static __init int load_system_certificate_list(void)
|
||||
size = system_certificate_list_size - module_cert_size;
|
||||
#endif
|
||||
|
||||
return load_certificate_list(p, size, builtin_trusted_keys);
|
||||
return x509_load_certificate_list(p, size, builtin_trusted_keys);
|
||||
}
|
||||
late_initcall(load_system_certificate_list);
|
||||
|
||||
|
@ -15,6 +15,7 @@ source "crypto/async_tx/Kconfig"
|
||||
#
|
||||
menuconfig CRYPTO
|
||||
tristate "Cryptographic API"
|
||||
select LIB_MEMNEQ
|
||||
help
|
||||
This option provides the core Cryptographic API.
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
#
|
||||
|
||||
obj-$(CONFIG_CRYPTO) += crypto.o
|
||||
crypto-y := api.o cipher.o compress.o memneq.o
|
||||
crypto-y := api.o cipher.o compress.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_ENGINE) += crypto_engine.o
|
||||
obj-$(CONFIG_CRYPTO_FIPS) += fips.o
|
||||
|
@ -75,4 +75,14 @@ config SIGNED_PE_FILE_VERIFICATION
|
||||
This option provides support for verifying the signature(s) on a
|
||||
signed PE binary.
|
||||
|
||||
config FIPS_SIGNATURE_SELFTEST
|
||||
bool "Run FIPS selftests on the X.509+PKCS7 signature verification"
|
||||
help
|
||||
This option causes some selftests to be run on the signature
|
||||
verification code, using some built in data. This is required
|
||||
for FIPS.
|
||||
depends on KEYS
|
||||
depends on ASYMMETRIC_KEY_TYPE
|
||||
depends on PKCS7_MESSAGE_PARSER
|
||||
|
||||
endif # ASYMMETRIC_KEY_TYPE
|
||||
|
@ -20,7 +20,9 @@ x509_key_parser-y := \
|
||||
x509.asn1.o \
|
||||
x509_akid.asn1.o \
|
||||
x509_cert_parser.o \
|
||||
x509_loader.o \
|
||||
x509_public_key.o
|
||||
x509_key_parser-$(CONFIG_FIPS_SIGNATURE_SELFTEST) += selftest.o
|
||||
|
||||
$(obj)/x509_cert_parser.o: \
|
||||
$(obj)/x509.asn1.h \
|
||||
|
224
crypto/asymmetric_keys/selftest.c
Normal file
224
crypto/asymmetric_keys/selftest.c
Normal file
@ -0,0 +1,224 @@
|
||||
/* Self-testing for signature checking.
|
||||
*
|
||||
* Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/cred.h>
|
||||
#include <linux/key.h>
|
||||
#include <crypto/pkcs7.h>
|
||||
#include "x509_parser.h"
|
||||
|
||||
struct certs_test {
|
||||
const u8 *data;
|
||||
size_t data_len;
|
||||
const u8 *pkcs7;
|
||||
size_t pkcs7_len;
|
||||
};
|
||||
|
||||
/*
|
||||
* Set of X.509 certificates to provide public keys for the tests. These will
|
||||
* be loaded into a temporary keyring for the duration of the testing.
|
||||
*/
|
||||
static const __initconst u8 certs_selftest_keys[] = {
|
||||
"\x30\x82\x05\x55\x30\x82\x03\x3d\xa0\x03\x02\x01\x02\x02\x14\x73"
|
||||
"\x98\xea\x98\x2d\xd0\x2e\xa8\xb1\xcf\x57\xc7\xf2\x97\xb3\xe6\x1a"
|
||||
"\xfc\x8c\x0a\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x01\x0b"
|
||||
"\x05\x00\x30\x34\x31\x32\x30\x30\x06\x03\x55\x04\x03\x0c\x29\x43"
|
||||
"\x65\x72\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69\x66"
|
||||
"\x69\x63\x61\x74\x69\x6f\x6e\x20\x73\x65\x6c\x66\x2d\x74\x65\x73"
|
||||
"\x74\x69\x6e\x67\x20\x6b\x65\x79\x30\x20\x17\x0d\x32\x32\x30\x35"
|
||||
"\x31\x38\x32\x32\x33\x32\x34\x31\x5a\x18\x0f\x32\x31\x32\x32\x30"
|
||||
"\x34\x32\x34\x32\x32\x33\x32\x34\x31\x5a\x30\x34\x31\x32\x30\x30"
|
||||
"\x06\x03\x55\x04\x03\x0c\x29\x43\x65\x72\x74\x69\x66\x69\x63\x61"
|
||||
"\x74\x65\x20\x76\x65\x72\x69\x66\x69\x63\x61\x74\x69\x6f\x6e\x20"
|
||||
"\x73\x65\x6c\x66\x2d\x74\x65\x73\x74\x69\x6e\x67\x20\x6b\x65\x79"
|
||||
"\x30\x82\x02\x22\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x01"
|
||||
"\x01\x05\x00\x03\x82\x02\x0f\x00\x30\x82\x02\x0a\x02\x82\x02\x01"
|
||||
"\x00\xcc\xac\x49\xdd\x3b\xca\xb0\x15\x7e\x84\x6a\xb2\x0a\x69\x5f"
|
||||
"\x1c\x0a\x61\x82\x3b\x4f\x2c\xa3\x95\x2c\x08\x58\x4b\xb1\x5d\x99"
|
||||
"\xe0\xc3\xc1\x79\xc2\xb3\xeb\xc0\x1e\x6d\x3e\x54\x1d\xbd\xb7\x92"
|
||||
"\x7b\x4d\xb5\x95\x58\xb2\x52\x2e\xc6\x24\x4b\x71\x63\x80\x32\x77"
|
||||
"\xa7\x38\x5e\xdb\x72\xae\x6e\x0d\xec\xfb\xb6\x6d\x01\x7f\xe9\x55"
|
||||
"\x66\xdf\xbf\x1d\x76\x78\x02\x31\xe8\xe5\x07\xf8\xb7\x82\x5c\x0d"
|
||||
"\xd4\xbb\xfb\xa2\x59\x0d\x2e\x3a\x78\x95\x3a\x8b\x46\x06\x47\x44"
|
||||
"\x46\xd7\xcd\x06\x6a\x41\x13\xe3\x19\xf6\xbb\x6e\x38\xf4\x83\x01"
|
||||
"\xa3\xbf\x4a\x39\x4f\xd7\x0a\xe9\x38\xb3\xf5\x94\x14\x4e\xdd\xf7"
|
||||
"\x43\xfd\x24\xb2\x49\x3c\xa5\xf7\x7a\x7c\xd4\x45\x3d\x97\x75\x68"
|
||||
"\xf1\xed\x4c\x42\x0b\x70\xca\x85\xf3\xde\xe5\x88\x2c\xc5\xbe\xb6"
|
||||
"\x97\x34\xba\x24\x02\xcd\x8b\x86\x9f\xa9\x73\xca\x73\xcf\x92\x81"
|
||||
"\xee\x75\x55\xbb\x18\x67\x5c\xff\x3f\xb5\xdd\x33\x1b\x0c\xe9\x78"
|
||||
"\xdb\x5c\xcf\xaa\x5c\x43\x42\xdf\x5e\xa9\x6d\xec\xd7\xd7\xff\xe6"
|
||||
"\xa1\x3a\x92\x1a\xda\xae\xf6\x8c\x6f\x7b\xd5\xb4\x6e\x06\xe9\x8f"
|
||||
"\xe8\xde\x09\x31\x89\xed\x0e\x11\xa1\xfa\x8a\xe9\xe9\x64\x59\x62"
|
||||
"\x53\xda\xd1\x70\xbe\x11\xd4\x99\x97\x11\xcf\x99\xde\x0b\x9d\x94"
|
||||
"\x7e\xaa\xb8\x52\xea\x37\xdb\x90\x7e\x35\xbd\xd9\xfe\x6d\x0a\x48"
|
||||
"\x70\x28\xdd\xd5\x0d\x7f\x03\x80\x93\x14\x23\x8f\xb9\x22\xcd\x7c"
|
||||
"\x29\xfe\xf1\x72\xb5\x5c\x0b\x12\xcf\x9c\x15\xf6\x11\x4c\x7a\x45"
|
||||
"\x25\x8c\x45\x0a\x34\xac\x2d\x9a\x81\xca\x0b\x13\x22\xcd\xeb\x1a"
|
||||
"\x38\x88\x18\x97\x96\x08\x81\xaa\xcc\x8f\x0f\x8a\x32\x7b\x76\x68"
|
||||
"\x03\x68\x43\xbf\x11\xba\x55\x60\xfd\x80\x1c\x0d\x9b\x69\xb6\x09"
|
||||
"\x72\xbc\x0f\x41\x2f\x07\x82\xc6\xe3\xb2\x13\x91\xc4\x6d\x14\x95"
|
||||
"\x31\xbe\x19\xbd\xbc\xed\xe1\x4c\x74\xa2\xe0\x78\x0b\xbb\x94\xec"
|
||||
"\x4c\x53\x3a\xa2\xb5\x84\x1d\x4b\x65\x7e\xdc\xf7\xdb\x36\x7d\xbe"
|
||||
"\x9e\x3b\x36\x66\x42\x66\x76\x35\xbf\xbe\xf0\xc1\x3c\x7c\xe9\x42"
|
||||
"\x5c\x24\x53\x03\x05\xa8\x67\x24\x50\x02\x75\xff\x24\x46\x3b\x35"
|
||||
"\x89\x76\xe6\x70\xda\xc5\x51\x8c\x9a\xe5\x05\xb0\x0b\xd0\x2d\xd4"
|
||||
"\x7d\x57\x75\x94\x6b\xf9\x0a\xad\x0e\x41\x00\x15\xd0\x4f\xc0\x7f"
|
||||
"\x90\x2d\x18\x48\x8f\x28\xfe\x5d\xa7\xcd\x99\x9e\xbd\x02\x6c\x8a"
|
||||
"\x31\xf3\x1c\xc7\x4b\xe6\x93\xcd\x42\xa2\xe4\x68\x10\x47\x9d\xfc"
|
||||
"\x21\x02\x03\x01\x00\x01\xa3\x5d\x30\x5b\x30\x0c\x06\x03\x55\x1d"
|
||||
"\x13\x01\x01\xff\x04\x02\x30\x00\x30\x0b\x06\x03\x55\x1d\x0f\x04"
|
||||
"\x04\x03\x02\x07\x80\x30\x1d\x06\x03\x55\x1d\x0e\x04\x16\x04\x14"
|
||||
"\xf5\x87\x03\xbb\x33\xce\x1b\x73\xee\x02\xec\xcd\xee\x5b\x88\x17"
|
||||
"\x51\x8f\xe3\xdb\x30\x1f\x06\x03\x55\x1d\x23\x04\x18\x30\x16\x80"
|
||||
"\x14\xf5\x87\x03\xbb\x33\xce\x1b\x73\xee\x02\xec\xcd\xee\x5b\x88"
|
||||
"\x17\x51\x8f\xe3\xdb\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01"
|
||||
"\x01\x0b\x05\x00\x03\x82\x02\x01\x00\xc0\x2e\x12\x41\x7b\x73\x85"
|
||||
"\x16\xc8\xdb\x86\x79\xe8\xf5\xcd\x44\xf4\xc6\xe2\x81\x23\x5e\x47"
|
||||
"\xcb\xab\x25\xf1\x1e\x58\x3e\x31\x7f\x78\xad\x85\xeb\xfe\x14\x88"
|
||||
"\x60\xf7\x7f\xd2\x26\xa2\xf4\x98\x2a\xfd\xba\x05\x0c\x20\x33\x12"
|
||||
"\xcc\x4d\x14\x61\x64\x81\x93\xd3\x33\xed\xc8\xff\xf1\x78\xcc\x5f"
|
||||
"\x51\x9f\x09\xd7\xbe\x0d\x5c\x74\xfd\x9b\xdf\x52\x4a\xc9\xa8\x71"
|
||||
"\x25\x33\x04\x10\x67\x36\xd0\xb3\x0b\xc9\xa1\x40\x72\xae\x41\x7b"
|
||||
"\x68\xe6\xe4\x7b\xd0\x28\xf7\x6d\xe7\x3f\x50\xfc\x91\x7c\x91\x56"
|
||||
"\xd4\xdf\xa6\xbb\xe8\x4d\x1b\x58\xaa\x28\xfa\xc1\x19\xeb\x11\x2f"
|
||||
"\x24\x8b\x7c\xc5\xa9\x86\x26\xaa\x6e\xb7\x9b\xd5\xf8\x06\xfb\x02"
|
||||
"\x52\x7b\x9c\x9e\xa1\xe0\x07\x8b\x5e\xe4\xb8\x55\x29\xf6\x48\x52"
|
||||
"\x1c\x1b\x54\x2d\x46\xd8\xe5\x71\xb9\x60\xd1\x45\xb5\x92\x89\x8a"
|
||||
"\x63\x58\x2a\xb3\xc6\xb2\x76\xe2\x3c\x82\x59\x04\xae\x5a\xc4\x99"
|
||||
"\x7b\x2e\x4b\x46\x57\xb8\x29\x24\xb2\xfd\xee\x2c\x0d\xa4\x83\xfa"
|
||||
"\x65\x2a\x07\x35\x8b\x97\xcf\xbd\x96\x2e\xd1\x7e\x6c\xc2\x1e\x87"
|
||||
"\xb6\x6c\x76\x65\xb5\xb2\x62\xda\x8b\xe9\x73\xe3\xdb\x33\xdd\x13"
|
||||
"\x3a\x17\x63\x6a\x76\xde\x8d\x8f\xe0\x47\x61\x28\x3a\x83\xff\x8f"
|
||||
"\xe7\xc7\xe0\x4a\xa3\xe5\x07\xcf\xe9\x8c\x35\x35\x2e\xe7\x80\x66"
|
||||
"\x31\xbf\x91\x58\x0a\xe1\x25\x3d\x38\xd3\xa4\xf0\x59\x34\x47\x07"
|
||||
"\x62\x0f\xbe\x30\xdd\x81\x88\x58\xf0\x28\xb0\x96\xe5\x82\xf8\x05"
|
||||
"\xb7\x13\x01\xbc\xfa\xc6\x1f\x86\x72\xcc\xf9\xee\x8e\xd9\xd6\x04"
|
||||
"\x8c\x24\x6c\xbf\x0f\x5d\x37\x39\xcf\x45\xc1\x93\x3a\xd2\xed\x5c"
|
||||
"\x58\x79\x74\x86\x62\x30\x7e\x8e\xbb\xdd\x7a\xa9\xed\xca\x40\xcb"
|
||||
"\x62\x47\xf4\xb4\x9f\x52\x7f\x72\x63\xa8\xf0\x2b\xaf\x45\x2a\x48"
|
||||
"\x19\x6d\xe3\xfb\xf9\x19\x66\x69\xc8\xcc\x62\x87\x6c\x53\x2b\x2d"
|
||||
"\x6e\x90\x6c\x54\x3a\x82\x25\x41\xcb\x18\x6a\xa4\x22\xa8\xa1\xc4"
|
||||
"\x47\xd7\x81\x00\x1c\x15\x51\x0f\x1a\xaf\xef\x9f\xa6\x61\x8c\xbd"
|
||||
"\x6b\x8b\xed\xe6\xac\x0e\xb6\x3a\x4c\x92\xe6\x0f\x91\x0a\x0f\x71"
|
||||
"\xc7\xa0\xb9\x0d\x3a\x17\x5a\x6f\x35\xc8\xe7\x50\x4f\x46\xe8\x70"
|
||||
"\x60\x48\x06\x82\x8b\x66\x58\xe6\x73\x91\x9c\x12\x3d\x35\x8e\x46"
|
||||
"\xad\x5a\xf5\xb3\xdb\x69\x21\x04\xfd\xd3\x1c\xdf\x94\x9d\x56\xb0"
|
||||
"\x0a\xd1\x95\x76\x8d\xec\x9e\xdd\x0b\x15\x97\x64\xad\xe5\xf2\x62"
|
||||
"\x02\xfc\x9e\x5f\x56\x42\x39\x05\xb3"
|
||||
};
|
||||
|
||||
/*
|
||||
* Signed data and detached signature blobs that form the verification tests.
|
||||
*/
|
||||
static const __initconst u8 certs_selftest_1_data[] = {
|
||||
"\x54\x68\x69\x73\x20\x69\x73\x20\x73\x6f\x6d\x65\x20\x74\x65\x73"
|
||||
"\x74\x20\x64\x61\x74\x61\x20\x75\x73\x65\x64\x20\x66\x6f\x72\x20"
|
||||
"\x73\x65\x6c\x66\x2d\x74\x65\x73\x74\x69\x6e\x67\x20\x63\x65\x72"
|
||||
"\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69\x66\x69\x63"
|
||||
"\x61\x74\x69\x6f\x6e\x2e\x0a"
|
||||
};
|
||||
|
||||
static const __initconst u8 certs_selftest_1_pkcs7[] = {
|
||||
"\x30\x82\x02\xab\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x07\x02\xa0"
|
||||
"\x82\x02\x9c\x30\x82\x02\x98\x02\x01\x01\x31\x0d\x30\x0b\x06\x09"
|
||||
"\x60\x86\x48\x01\x65\x03\x04\x02\x01\x30\x0b\x06\x09\x2a\x86\x48"
|
||||
"\x86\xf7\x0d\x01\x07\x01\x31\x82\x02\x75\x30\x82\x02\x71\x02\x01"
|
||||
"\x01\x30\x4c\x30\x34\x31\x32\x30\x30\x06\x03\x55\x04\x03\x0c\x29"
|
||||
"\x43\x65\x72\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69"
|
||||
"\x66\x69\x63\x61\x74\x69\x6f\x6e\x20\x73\x65\x6c\x66\x2d\x74\x65"
|
||||
"\x73\x74\x69\x6e\x67\x20\x6b\x65\x79\x02\x14\x73\x98\xea\x98\x2d"
|
||||
"\xd0\x2e\xa8\xb1\xcf\x57\xc7\xf2\x97\xb3\xe6\x1a\xfc\x8c\x0a\x30"
|
||||
"\x0b\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x30\x0d\x06\x09"
|
||||
"\x2a\x86\x48\x86\xf7\x0d\x01\x01\x01\x05\x00\x04\x82\x02\x00\xac"
|
||||
"\xb0\xf2\x07\xd6\x99\x6d\xc0\xc0\xd9\x8d\x31\x0d\x7e\x04\xeb\xc3"
|
||||
"\x88\x90\xc4\x58\x46\xd4\xe2\xa0\xa3\x25\xe3\x04\x50\x37\x85\x8c"
|
||||
"\x91\xc6\xfc\xc5\xd4\x92\xfd\x05\xd8\xb8\xa3\xb8\xba\x89\x13\x00"
|
||||
"\x88\x79\x99\x51\x6b\x5b\x28\x31\xc0\xb3\x1b\x7a\x68\x2c\x00\xdb"
|
||||
"\x4b\x46\x11\xf3\xfa\x50\x8e\x19\x89\xa2\x4c\xda\x4c\x89\x01\x11"
|
||||
"\x89\xee\xd3\xc8\xc1\xe7\xa7\xf6\xb2\xa2\xf8\x65\xb8\x35\x20\x33"
|
||||
"\xba\x12\x62\xd5\xbd\xaa\x71\xe5\x5b\xc0\x6a\x32\xff\x6a\x2e\x23"
|
||||
"\xef\x2b\xb6\x58\xb1\xfb\x5f\x82\x34\x40\x6d\x9f\xbc\x27\xac\x37"
|
||||
"\x23\x99\xcf\x7d\x20\xb2\x39\x01\xc0\x12\xce\xd7\x5d\x2f\xb6\xab"
|
||||
"\xb5\x56\x4f\xef\xf4\x72\x07\x58\x65\xa9\xeb\x1f\x75\x1c\x5f\x0c"
|
||||
"\x88\xe0\xa4\xe2\xcd\x73\x2b\x9e\xb2\x05\x7e\x12\xf8\xd0\x66\x41"
|
||||
"\xcc\x12\x63\xd4\xd6\xac\x9b\x1d\x14\x77\x8d\x1c\x57\xd5\x27\xc6"
|
||||
"\x49\xa2\x41\x43\xf3\x59\x29\xe5\xcb\xd1\x75\xbc\x3a\x97\x2a\x72"
|
||||
"\x22\x66\xc5\x3b\xc1\xba\xfc\x53\x18\x98\xe2\x21\x64\xc6\x52\x87"
|
||||
"\x13\xd5\x7c\x42\xe8\xfb\x9c\x9a\x45\x32\xd5\xa5\x22\x62\x9d\xd4"
|
||||
"\xcb\xa4\xfa\x77\xbb\x50\x24\x0b\x8b\x88\x99\x15\x56\xa9\x1e\x92"
|
||||
"\xbf\x5d\x94\x77\xb6\xf1\x67\x01\x60\x06\x58\x5c\xdf\x18\x52\x79"
|
||||
"\x37\x30\x93\x7d\x87\x04\xf1\xe0\x55\x59\x52\xf3\xc2\xb1\x1c\x5b"
|
||||
"\x12\x7c\x49\x87\xfb\xf7\xed\xdd\x95\x71\xec\x4b\x1a\x85\x08\xb0"
|
||||
"\xa0\x36\xc4\x7b\xab\x40\xe0\xf1\x98\xcc\xaf\x19\x40\x8f\x47\x6f"
|
||||
"\xf0\x6c\x84\x29\x7f\x7f\x04\x46\xcb\x08\x0f\xe0\xc1\xc9\x70\x6e"
|
||||
"\x95\x3b\xa4\xbc\x29\x2b\x53\x67\x45\x1b\x0d\xbc\x13\xa5\x76\x31"
|
||||
"\xaf\xb9\xd0\xe0\x60\x12\xd2\xf4\xb7\x7c\x58\x7e\xf6\x2d\xbb\x24"
|
||||
"\x14\x5a\x20\x24\xa8\x12\xdf\x25\xbd\x42\xce\x96\x7c\x2e\xba\x14"
|
||||
"\x1b\x81\x9f\x18\x45\xa4\xc6\x70\x3e\x0e\xf0\xd3\x7b\x9c\x10\xbe"
|
||||
"\xb8\x7a\x89\xc5\x9e\xd9\x97\xdf\xd7\xe7\xc6\x1d\xc0\x20\x6c\xb8"
|
||||
"\x1e\x3a\x63\xb8\x39\x8e\x8e\x62\xd5\xd2\xb4\xcd\xff\x46\xfc\x8e"
|
||||
"\xec\x07\x35\x0c\xff\xb0\x05\xe6\xf4\xe5\xfe\xa2\xe3\x0a\xe6\x36"
|
||||
"\xa7\x4a\x7e\x62\x1d\xc4\x50\x39\x35\x4e\x28\xcb\x4a\xfb\x9d\xdb"
|
||||
"\xdd\x23\xd6\x53\xb1\x74\x77\x12\xf7\x9c\xf0\x9a\x6b\xf7\xa9\x64"
|
||||
"\x2d\x86\x21\x2a\xcf\xc6\x54\xf5\xc9\xad\xfa\xb5\x12\xb4\xf3\x51"
|
||||
"\x77\x55\x3c\x6f\x0c\x32\xd3\x8c\x44\x39\x71\x25\xfe\x96\xd2"
|
||||
};
|
||||
|
||||
/*
|
||||
* List of tests to be run.
|
||||
*/
|
||||
#define TEST(data, pkcs7) { data, sizeof(data) - 1, pkcs7, sizeof(pkcs7) - 1 }
|
||||
static const struct certs_test certs_tests[] __initconst = {
|
||||
TEST(certs_selftest_1_data, certs_selftest_1_pkcs7),
|
||||
};
|
||||
|
||||
int __init fips_signature_selftest(void)
|
||||
{
|
||||
struct key *keyring;
|
||||
int ret, i;
|
||||
|
||||
pr_notice("Running certificate verification selftests\n");
|
||||
|
||||
keyring = keyring_alloc(".certs_selftest",
|
||||
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
|
||||
(KEY_POS_ALL & ~KEY_POS_SETATTR) |
|
||||
KEY_USR_VIEW | KEY_USR_READ |
|
||||
KEY_USR_SEARCH,
|
||||
KEY_ALLOC_NOT_IN_QUOTA,
|
||||
NULL, NULL);
|
||||
if (IS_ERR(keyring))
|
||||
panic("Can't allocate certs selftest keyring: %ld\n",
|
||||
PTR_ERR(keyring));
|
||||
|
||||
ret = x509_load_certificate_list(certs_selftest_keys,
|
||||
sizeof(certs_selftest_keys) - 1, keyring);
|
||||
if (ret < 0)
|
||||
panic("Can't allocate certs selftest keyring: %d\n", ret);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(certs_tests); i++) {
|
||||
const struct certs_test *test = &certs_tests[i];
|
||||
struct pkcs7_message *pkcs7;
|
||||
|
||||
pkcs7 = pkcs7_parse_message(test->pkcs7, test->pkcs7_len);
|
||||
if (IS_ERR(pkcs7))
|
||||
panic("Certs selftest %d: pkcs7_parse_message() = %d\n", i, ret);
|
||||
|
||||
pkcs7_supply_detached_data(pkcs7, test->data, test->data_len);
|
||||
|
||||
ret = pkcs7_verify(pkcs7, VERIFYING_MODULE_SIGNATURE);
|
||||
if (ret < 0)
|
||||
panic("Certs selftest %d: pkcs7_verify() = %d\n", i, ret);
|
||||
|
||||
ret = pkcs7_validate_trust(pkcs7, keyring);
|
||||
if (ret < 0)
|
||||
panic("Certs selftest %d: pkcs7_validate_trust() = %d\n", i, ret);
|
||||
|
||||
pkcs7_free_message(pkcs7);
|
||||
}
|
||||
|
||||
key_put(keyring);
|
||||
return 0;
|
||||
}
|
@ -2,11 +2,11 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/key.h>
|
||||
#include "common.h"
|
||||
#include <keys/asymmetric-type.h>
|
||||
|
||||
int load_certificate_list(const u8 cert_list[],
|
||||
const unsigned long list_size,
|
||||
const struct key *keyring)
|
||||
int x509_load_certificate_list(const u8 cert_list[],
|
||||
const unsigned long list_size,
|
||||
const struct key *keyring)
|
||||
{
|
||||
key_ref_t key;
|
||||
const u8 *p, *end;
|
@ -40,6 +40,15 @@ struct x509_certificate {
|
||||
bool blacklisted;
|
||||
};
|
||||
|
||||
/*
|
||||
* selftest.c
|
||||
*/
|
||||
#ifdef CONFIG_FIPS_SIGNATURE_SELFTEST
|
||||
extern int __init fips_signature_selftest(void);
|
||||
#else
|
||||
static inline int fips_signature_selftest(void) { return 0; }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* x509_cert_parser.c
|
||||
*/
|
||||
|
@ -244,9 +244,15 @@ static struct asymmetric_key_parser x509_key_parser = {
|
||||
/*
|
||||
* Module stuff
|
||||
*/
|
||||
extern int __init certs_selftest(void);
|
||||
static int __init x509_key_init(void)
|
||||
{
|
||||
return register_asymmetric_key_parser(&x509_key_parser);
|
||||
int ret;
|
||||
|
||||
ret = register_asymmetric_key_parser(&x509_key_parser);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
return fips_signature_selftest();
|
||||
}
|
||||
|
||||
static void __exit x509_key_exit(void)
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/memory.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/backing-dev.h>
|
||||
|
||||
#include "base.h"
|
||||
|
||||
@ -20,6 +21,7 @@
|
||||
void __init driver_init(void)
|
||||
{
|
||||
/* These are the core pieces */
|
||||
bdi_init(&noop_backing_dev_info);
|
||||
devtmpfs_init();
|
||||
devices_init();
|
||||
buses_init();
|
||||
|
@ -1239,14 +1239,14 @@ static int fsl_mc_bus_probe(struct platform_device *pdev)
|
||||
static int fsl_mc_bus_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct fsl_mc *mc = platform_get_drvdata(pdev);
|
||||
struct fsl_mc_io *mc_io;
|
||||
|
||||
if (!fsl_mc_is_root_dprc(&mc->root_mc_bus_dev->dev))
|
||||
return -EINVAL;
|
||||
|
||||
mc_io = mc->root_mc_bus_dev->mc_io;
|
||||
fsl_mc_device_remove(mc->root_mc_bus_dev);
|
||||
|
||||
fsl_destroy_mc_io(mc->root_mc_bus_dev->mc_io);
|
||||
mc->root_mc_bus_dev->mc_io = NULL;
|
||||
fsl_destroy_mc_io(mc_io);
|
||||
|
||||
bus_unregister_notifier(&fsl_mc_bus_type, &fsl_mc_nb);
|
||||
|
||||
|
@ -1019,7 +1019,7 @@ static struct parport_driver lp_driver = {
|
||||
|
||||
static int __init lp_init(void)
|
||||
{
|
||||
int i, err = 0;
|
||||
int i, err;
|
||||
|
||||
if (parport_nr[0] == LP_PARPORT_OFF)
|
||||
return 0;
|
||||
|
@ -565,4 +565,3 @@ void __init hv_init_clocksource(void)
|
||||
hv_sched_clock_offset = hv_read_reference_counter();
|
||||
hv_setup_sched_clock(read_hv_sched_clock_msr);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_init_clocksource);
|
||||
|
@ -684,7 +684,7 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev)
|
||||
if (!devpriv->usb_rx_buf)
|
||||
return -ENOMEM;
|
||||
|
||||
size = max(usb_endpoint_maxp(devpriv->ep_rx), MIN_BUF_SIZE);
|
||||
size = max(usb_endpoint_maxp(devpriv->ep_tx), MIN_BUF_SIZE);
|
||||
devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL);
|
||||
if (!devpriv->usb_tx_buf)
|
||||
return -ENOMEM;
|
||||
|
@ -32,8 +32,11 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct udmabuf *ubuf = vma->vm_private_data;
|
||||
pgoff_t pgoff = vmf->pgoff;
|
||||
|
||||
vmf->page = ubuf->pages[vmf->pgoff];
|
||||
if (pgoff >= ubuf->pagecount)
|
||||
return VM_FAULT_SIGBUS;
|
||||
vmf->page = ubuf->pages[pgoff];
|
||||
get_page(vmf->page);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1211,7 +1211,7 @@ static int ioctl_get_cycle_timer2(struct client *client, union ioctl_arg *arg)
|
||||
struct fw_cdev_get_cycle_timer2 *a = &arg->get_cycle_timer2;
|
||||
struct fw_card *card = client->device->card;
|
||||
struct timespec64 ts = {0, 0};
|
||||
u32 cycle_time;
|
||||
u32 cycle_time = 0;
|
||||
int ret = 0;
|
||||
|
||||
local_irq_disable();
|
||||
|
@ -372,8 +372,7 @@ static ssize_t rom_index_show(struct device *dev,
|
||||
struct fw_device *device = fw_device(dev->parent);
|
||||
struct fw_unit *unit = fw_unit(dev);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n",
|
||||
(int)(unit->directory - device->config_rom));
|
||||
return sysfs_emit(buf, "%td\n", unit->directory - device->config_rom);
|
||||
}
|
||||
|
||||
static struct device_attribute fw_unit_attributes[] = {
|
||||
@ -403,8 +402,7 @@ static ssize_t guid_show(struct device *dev,
|
||||
int ret;
|
||||
|
||||
down_read(&fw_device_rwsem);
|
||||
ret = snprintf(buf, PAGE_SIZE, "0x%08x%08x\n",
|
||||
device->config_rom[3], device->config_rom[4]);
|
||||
ret = sysfs_emit(buf, "0x%08x%08x\n", device->config_rom[3], device->config_rom[4]);
|
||||
up_read(&fw_device_rwsem);
|
||||
|
||||
return ret;
|
||||
|
@ -26,8 +26,6 @@
|
||||
#include <linux/sysfb.h>
|
||||
#include <video/vga.h>
|
||||
|
||||
#include <asm/efi.h>
|
||||
|
||||
enum {
|
||||
OVERRIDE_NONE = 0x0,
|
||||
OVERRIDE_BASE = 0x1,
|
||||
|
@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
atomic64_read(&adev->visible_pin_size),
|
||||
vram_gtt.vram_size);
|
||||
vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size;
|
||||
vram_gtt.gtt_size *= PAGE_SIZE;
|
||||
vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
|
||||
return copy_to_user(out, &vram_gtt,
|
||||
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
|
||||
@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
|
||||
|
||||
mem.gtt.total_heap_size = gtt_man->size;
|
||||
mem.gtt.total_heap_size *= PAGE_SIZE;
|
||||
mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
|
||||
atomic64_read(&adev->gart_pin_size);
|
||||
mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
|
||||
|
@ -2812,7 +2812,7 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = {
|
||||
|
||||
static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
|
||||
{
|
||||
u32 max_cll, min_cll, max, min, q, r;
|
||||
u32 max_avg, min_cll, max, min, q, r;
|
||||
struct amdgpu_dm_backlight_caps *caps;
|
||||
struct amdgpu_display_manager *dm;
|
||||
struct drm_connector *conn_base;
|
||||
@ -2842,7 +2842,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
|
||||
caps = &dm->backlight_caps[i];
|
||||
caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
|
||||
caps->aux_support = false;
|
||||
max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
|
||||
max_avg = conn_base->hdr_sink_metadata.hdmi_type1.max_fall;
|
||||
min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
|
||||
|
||||
if (caps->ext_caps->bits.oled == 1 /*||
|
||||
@ -2870,8 +2870,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
|
||||
* The results of the above expressions can be verified at
|
||||
* pre_computed_values.
|
||||
*/
|
||||
q = max_cll >> 5;
|
||||
r = max_cll % 32;
|
||||
q = max_avg >> 5;
|
||||
r = max_avg % 32;
|
||||
max = (1 << q) * pre_computed_values[r];
|
||||
|
||||
// min luminance: maxLum * (CV/255)^2 / 100
|
||||
|
@ -176,15 +176,15 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = {
|
||||
}, {
|
||||
DRV_PTR(mixer_driver, CONFIG_DRM_EXYNOS_MIXER),
|
||||
DRM_COMPONENT_DRIVER
|
||||
}, {
|
||||
DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC),
|
||||
DRM_COMPONENT_DRIVER
|
||||
}, {
|
||||
DRV_PTR(dp_driver, CONFIG_DRM_EXYNOS_DP),
|
||||
DRM_COMPONENT_DRIVER
|
||||
}, {
|
||||
DRV_PTR(dsi_driver, CONFIG_DRM_EXYNOS_DSI),
|
||||
DRM_COMPONENT_DRIVER
|
||||
}, {
|
||||
DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC),
|
||||
DRM_COMPONENT_DRIVER
|
||||
}, {
|
||||
DRV_PTR(hdmi_driver, CONFIG_DRM_EXYNOS_HDMI),
|
||||
DRM_COMPONENT_DRIVER
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <drm/drm_print.h>
|
||||
|
||||
#include "exynos_drm_drv.h"
|
||||
#include "exynos_drm_crtc.h"
|
||||
|
||||
/* Sysreg registers for MIC */
|
||||
#define DSD_CFG_MUX 0x1004
|
||||
@ -100,9 +101,7 @@ struct exynos_mic {
|
||||
|
||||
bool i80_mode;
|
||||
struct videomode vm;
|
||||
struct drm_encoder *encoder;
|
||||
struct drm_bridge bridge;
|
||||
struct drm_bridge *next_bridge;
|
||||
|
||||
bool enabled;
|
||||
};
|
||||
@ -229,8 +228,6 @@ static void mic_set_reg_on(struct exynos_mic *mic, bool enable)
|
||||
writel(reg, mic->reg + MIC_OP);
|
||||
}
|
||||
|
||||
static void mic_disable(struct drm_bridge *bridge) { }
|
||||
|
||||
static void mic_post_disable(struct drm_bridge *bridge)
|
||||
{
|
||||
struct exynos_mic *mic = bridge->driver_private;
|
||||
@ -297,34 +294,30 @@ static void mic_pre_enable(struct drm_bridge *bridge)
|
||||
mutex_unlock(&mic_mutex);
|
||||
}
|
||||
|
||||
static void mic_enable(struct drm_bridge *bridge) { }
|
||||
|
||||
static int mic_attach(struct drm_bridge *bridge,
|
||||
enum drm_bridge_attach_flags flags)
|
||||
{
|
||||
struct exynos_mic *mic = bridge->driver_private;
|
||||
|
||||
return drm_bridge_attach(bridge->encoder, mic->next_bridge,
|
||||
&mic->bridge, flags);
|
||||
}
|
||||
|
||||
static const struct drm_bridge_funcs mic_bridge_funcs = {
|
||||
.disable = mic_disable,
|
||||
.post_disable = mic_post_disable,
|
||||
.mode_set = mic_mode_set,
|
||||
.pre_enable = mic_pre_enable,
|
||||
.enable = mic_enable,
|
||||
.attach = mic_attach,
|
||||
};
|
||||
|
||||
static int exynos_mic_bind(struct device *dev, struct device *master,
|
||||
void *data)
|
||||
{
|
||||
struct exynos_mic *mic = dev_get_drvdata(dev);
|
||||
struct drm_device *drm_dev = data;
|
||||
struct exynos_drm_crtc *crtc = exynos_drm_crtc_get_by_type(drm_dev,
|
||||
EXYNOS_DISPLAY_TYPE_LCD);
|
||||
struct drm_encoder *e, *encoder = NULL;
|
||||
|
||||
drm_for_each_encoder(e, drm_dev)
|
||||
if (e->possible_crtcs == drm_crtc_mask(&crtc->base))
|
||||
encoder = e;
|
||||
if (!encoder)
|
||||
return -ENODEV;
|
||||
|
||||
mic->bridge.driver_private = mic;
|
||||
|
||||
return 0;
|
||||
return drm_bridge_attach(encoder, &mic->bridge, NULL, 0);
|
||||
}
|
||||
|
||||
static void exynos_mic_unbind(struct device *dev, struct device *master,
|
||||
@ -388,7 +381,6 @@ static int exynos_mic_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct exynos_mic *mic;
|
||||
struct device_node *remote;
|
||||
struct resource res;
|
||||
int ret, i;
|
||||
|
||||
@ -432,16 +424,6 @@ static int exynos_mic_probe(struct platform_device *pdev)
|
||||
}
|
||||
}
|
||||
|
||||
remote = of_graph_get_remote_node(dev->of_node, 1, 0);
|
||||
mic->next_bridge = of_drm_find_bridge(remote);
|
||||
if (IS_ERR(mic->next_bridge)) {
|
||||
DRM_DEV_ERROR(dev, "mic: Failed to find next bridge\n");
|
||||
ret = PTR_ERR(mic->next_bridge);
|
||||
goto err;
|
||||
}
|
||||
|
||||
of_node_put(remote);
|
||||
|
||||
platform_set_drvdata(pdev, mic);
|
||||
|
||||
mic->bridge.funcs = &mic_bridge_funcs;
|
||||
|
@ -999,7 +999,8 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
|
||||
}
|
||||
}
|
||||
|
||||
err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
|
||||
/* Reserve enough slots to accommodate composite fences */
|
||||
err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
|
||||
{
|
||||
intel_wakeref_t wakeref;
|
||||
|
||||
intel_gt_sysfs_unregister(gt);
|
||||
intel_rps_driver_unregister(>->rps);
|
||||
intel_gsc_fini(>->gsc);
|
||||
|
||||
|
@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
|
||||
|
||||
static struct intel_gt *kobj_to_gt(struct kobject *kobj)
|
||||
{
|
||||
return container_of(kobj, struct kobj_gt, base)->gt;
|
||||
return container_of(kobj, struct intel_gt, sysfs_gt);
|
||||
}
|
||||
|
||||
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
|
||||
@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
|
||||
};
|
||||
ATTRIBUTE_GROUPS(id);
|
||||
|
||||
/* A kobject needs a release() method even if it does nothing */
|
||||
static void kobj_gt_release(struct kobject *kobj)
|
||||
{
|
||||
kfree(kobj);
|
||||
}
|
||||
|
||||
static struct kobj_type kobj_gt_type = {
|
||||
@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
|
||||
|
||||
void intel_gt_sysfs_register(struct intel_gt *gt)
|
||||
{
|
||||
struct kobj_gt *kg;
|
||||
|
||||
/*
|
||||
* We need to make things right with the
|
||||
* ABI compatibility. The files were originally
|
||||
@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
|
||||
if (gt_is_root(gt))
|
||||
intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
|
||||
|
||||
kg = kzalloc(sizeof(*kg), GFP_KERNEL);
|
||||
if (!kg)
|
||||
/* init and xfer ownership to sysfs tree */
|
||||
if (kobject_init_and_add(>->sysfs_gt, &kobj_gt_type,
|
||||
gt->i915->sysfs_gt, "gt%d", gt->info.id))
|
||||
goto exit_fail;
|
||||
|
||||
kobject_init(&kg->base, &kobj_gt_type);
|
||||
kg->gt = gt;
|
||||
|
||||
/* xfer ownership to sysfs tree */
|
||||
if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id))
|
||||
goto exit_kobj_put;
|
||||
|
||||
intel_gt_sysfs_pm_init(gt, &kg->base);
|
||||
intel_gt_sysfs_pm_init(gt, >->sysfs_gt);
|
||||
|
||||
return;
|
||||
|
||||
exit_kobj_put:
|
||||
kobject_put(&kg->base);
|
||||
|
||||
exit_fail:
|
||||
kobject_put(>->sysfs_gt);
|
||||
drm_warn(>->i915->drm,
|
||||
"failed to initialize gt%d sysfs root\n", gt->info.id);
|
||||
}
|
||||
|
||||
void intel_gt_sysfs_unregister(struct intel_gt *gt)
|
||||
{
|
||||
kobject_put(>->sysfs_gt);
|
||||
}
|
||||
|
@ -13,11 +13,6 @@
|
||||
|
||||
struct intel_gt;
|
||||
|
||||
struct kobj_gt {
|
||||
struct kobject base;
|
||||
struct intel_gt *gt;
|
||||
};
|
||||
|
||||
bool is_object_gt(struct kobject *kobj);
|
||||
|
||||
struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
|
||||
@ -28,6 +23,7 @@ intel_gt_create_kobj(struct intel_gt *gt,
|
||||
const char *name);
|
||||
|
||||
void intel_gt_sysfs_register(struct intel_gt *gt);
|
||||
void intel_gt_sysfs_unregister(struct intel_gt *gt);
|
||||
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
|
||||
const char *name);
|
||||
|
||||
|
@ -224,6 +224,9 @@ struct intel_gt {
|
||||
} mocs;
|
||||
|
||||
struct intel_pxp pxp;
|
||||
|
||||
/* gt/gtN sysfs */
|
||||
struct kobject sysfs_gt;
|
||||
};
|
||||
|
||||
enum intel_gt_scratch_field {
|
||||
|
@ -156,7 +156,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
|
||||
[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
|
||||
[INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
|
||||
};
|
||||
static const struct uc_fw_platform_requirement *fw_blobs;
|
||||
const struct uc_fw_platform_requirement *fw_blobs;
|
||||
enum intel_platform p = INTEL_INFO(i915)->platform;
|
||||
u32 fw_count;
|
||||
u8 rev = INTEL_REVID(i915);
|
||||
|
@ -166,7 +166,14 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
|
||||
struct device *kdev = kobj_to_dev(kobj);
|
||||
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
|
||||
struct i915_gpu_coredump *gpu;
|
||||
ssize_t ret;
|
||||
ssize_t ret = 0;
|
||||
|
||||
/*
|
||||
* FIXME: Concurrent clients triggering resets and reading + clearing
|
||||
* dumps can cause inconsistent sysfs reads when a user calls in with a
|
||||
* non-zero offset to complete a prior partial read but the
|
||||
* gpu_coredump has been cleared or replaced.
|
||||
*/
|
||||
|
||||
gpu = i915_first_error_state(i915);
|
||||
if (IS_ERR(gpu)) {
|
||||
@ -178,8 +185,10 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
|
||||
const char *str = "No error state collected\n";
|
||||
size_t len = strlen(str);
|
||||
|
||||
ret = min_t(size_t, count, len - off);
|
||||
memcpy(buf, str + off, ret);
|
||||
if (off < len) {
|
||||
ret = min_t(size_t, count, len - off);
|
||||
memcpy(buf, str + off, ret);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -259,4 +268,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
|
||||
|
||||
device_remove_bin_file(kdev, &dpf_attrs_1);
|
||||
device_remove_bin_file(kdev, &dpf_attrs);
|
||||
|
||||
kobject_put(dev_priv->sysfs_gt);
|
||||
}
|
||||
|
@ -23,6 +23,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/dma-fence-array.h>
|
||||
#include <drm/drm_gem.h>
|
||||
|
||||
#include "display/intel_frontbuffer.h"
|
||||
@ -1823,6 +1824,21 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
/*
|
||||
* Reserve fences slot early to prevent an allocation after preparing
|
||||
* the workload and associating fences with dma_resv.
|
||||
*/
|
||||
if (fence && !(flags & __EXEC_OBJECT_NO_RESERVE)) {
|
||||
struct dma_fence *curr;
|
||||
int idx;
|
||||
|
||||
dma_fence_array_for_each(curr, idx, fence)
|
||||
;
|
||||
err = dma_resv_reserve_fences(vma->obj->base.resv, idx);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
}
|
||||
|
||||
if (flags & EXEC_OBJECT_WRITE) {
|
||||
struct intel_frontbuffer *front;
|
||||
|
||||
@ -1832,31 +1848,23 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
|
||||
i915_active_add_request(&front->write, rq);
|
||||
intel_frontbuffer_put(front);
|
||||
}
|
||||
}
|
||||
|
||||
if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
|
||||
err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
}
|
||||
if (fence) {
|
||||
struct dma_fence *curr;
|
||||
enum dma_resv_usage usage;
|
||||
int idx;
|
||||
|
||||
if (fence) {
|
||||
dma_resv_add_fence(vma->obj->base.resv, fence,
|
||||
DMA_RESV_USAGE_WRITE);
|
||||
obj->read_domains = 0;
|
||||
if (flags & EXEC_OBJECT_WRITE) {
|
||||
usage = DMA_RESV_USAGE_WRITE;
|
||||
obj->write_domain = I915_GEM_DOMAIN_RENDER;
|
||||
obj->read_domains = 0;
|
||||
}
|
||||
} else {
|
||||
if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
|
||||
err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
} else {
|
||||
usage = DMA_RESV_USAGE_READ;
|
||||
}
|
||||
|
||||
if (fence) {
|
||||
dma_resv_add_fence(vma->obj->base.resv, fence,
|
||||
DMA_RESV_USAGE_READ);
|
||||
obj->write_domain = 0;
|
||||
}
|
||||
dma_fence_array_for_each(curr, idx, fence)
|
||||
dma_resv_add_fence(vma->obj->base.resv, curr, usage);
|
||||
}
|
||||
|
||||
if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
|
||||
|
@ -109,11 +109,11 @@ void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
|
||||
return;
|
||||
|
||||
spin_lock(&bo->bdev->lru_lock);
|
||||
if (bo->bulk_move && bo->resource)
|
||||
ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
|
||||
if (bo->resource)
|
||||
ttm_resource_del_bulk_move(bo->resource, bo);
|
||||
bo->bulk_move = bulk;
|
||||
if (bo->bulk_move && bo->resource)
|
||||
ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
|
||||
if (bo->resource)
|
||||
ttm_resource_add_bulk_move(bo->resource, bo);
|
||||
spin_unlock(&bo->bdev->lru_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(ttm_bo_set_bulk_move);
|
||||
@ -689,8 +689,11 @@ void ttm_bo_pin(struct ttm_buffer_object *bo)
|
||||
{
|
||||
dma_resv_assert_held(bo->base.resv);
|
||||
WARN_ON_ONCE(!kref_read(&bo->kref));
|
||||
if (!(bo->pin_count++) && bo->bulk_move && bo->resource)
|
||||
ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
|
||||
spin_lock(&bo->bdev->lru_lock);
|
||||
if (bo->resource)
|
||||
ttm_resource_del_bulk_move(bo->resource, bo);
|
||||
++bo->pin_count;
|
||||
spin_unlock(&bo->bdev->lru_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(ttm_bo_pin);
|
||||
|
||||
@ -707,8 +710,11 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
|
||||
if (WARN_ON_ONCE(!bo->pin_count))
|
||||
return;
|
||||
|
||||
if (!(--bo->pin_count) && bo->bulk_move && bo->resource)
|
||||
ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
|
||||
spin_lock(&bo->bdev->lru_lock);
|
||||
--bo->pin_count;
|
||||
if (bo->resource)
|
||||
ttm_resource_add_bulk_move(bo->resource, bo);
|
||||
spin_unlock(&bo->bdev->lru_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(ttm_bo_unpin);
|
||||
|
||||
|
@ -156,8 +156,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
|
||||
|
||||
ttm_resource_manager_for_each_res(man, &cursor, res) {
|
||||
struct ttm_buffer_object *bo = res->bo;
|
||||
uint32_t num_pages = PFN_UP(bo->base.size);
|
||||
uint32_t num_pages;
|
||||
|
||||
if (!bo)
|
||||
continue;
|
||||
|
||||
num_pages = PFN_UP(bo->base.size);
|
||||
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
|
||||
/* ttm_bo_swapout has dropped the lru_lock */
|
||||
if (!ret)
|
||||
|
@ -91,8 +91,8 @@ static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos,
|
||||
}
|
||||
|
||||
/* Add the resource to a bulk_move cursor */
|
||||
void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
|
||||
struct ttm_resource *res)
|
||||
static void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
|
||||
struct ttm_resource *res)
|
||||
{
|
||||
struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
|
||||
|
||||
@ -105,8 +105,8 @@ void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
|
||||
}
|
||||
|
||||
/* Remove the resource from a bulk_move range */
|
||||
void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
|
||||
struct ttm_resource *res)
|
||||
static void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
|
||||
struct ttm_resource *res)
|
||||
{
|
||||
struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
|
||||
|
||||
@ -122,6 +122,22 @@ void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
|
||||
}
|
||||
}
|
||||
|
||||
/* Add the resource to a bulk move if the BO is configured for it */
|
||||
void ttm_resource_add_bulk_move(struct ttm_resource *res,
|
||||
struct ttm_buffer_object *bo)
|
||||
{
|
||||
if (bo->bulk_move && !bo->pin_count)
|
||||
ttm_lru_bulk_move_add(bo->bulk_move, res);
|
||||
}
|
||||
|
||||
/* Remove the resource from a bulk move if the BO is configured for it */
|
||||
void ttm_resource_del_bulk_move(struct ttm_resource *res,
|
||||
struct ttm_buffer_object *bo)
|
||||
{
|
||||
if (bo->bulk_move && !bo->pin_count)
|
||||
ttm_lru_bulk_move_del(bo->bulk_move, res);
|
||||
}
|
||||
|
||||
/* Move a resource to the LRU or bulk tail */
|
||||
void ttm_resource_move_to_lru_tail(struct ttm_resource *res)
|
||||
{
|
||||
@ -169,15 +185,14 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
|
||||
res->bus.is_iomem = false;
|
||||
res->bus.caching = ttm_cached;
|
||||
res->bo = bo;
|
||||
INIT_LIST_HEAD(&res->lru);
|
||||
|
||||
man = ttm_manager_type(bo->bdev, place->mem_type);
|
||||
spin_lock(&bo->bdev->lru_lock);
|
||||
man->usage += res->num_pages << PAGE_SHIFT;
|
||||
if (bo->bulk_move)
|
||||
ttm_lru_bulk_move_add(bo->bulk_move, res);
|
||||
if (bo->pin_count)
|
||||
list_add_tail(&res->lru, &bo->bdev->pinned);
|
||||
else
|
||||
ttm_resource_move_to_lru_tail(res);
|
||||
list_add_tail(&res->lru, &man->lru[bo->priority]);
|
||||
man->usage += res->num_pages << PAGE_SHIFT;
|
||||
spin_unlock(&bo->bdev->lru_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(ttm_resource_init);
|
||||
@ -210,8 +225,16 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo,
|
||||
{
|
||||
struct ttm_resource_manager *man =
|
||||
ttm_manager_type(bo->bdev, place->mem_type);
|
||||
int ret;
|
||||
|
||||
return man->func->alloc(man, bo, place, res_ptr);
|
||||
ret = man->func->alloc(man, bo, place, res_ptr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock(&bo->bdev->lru_lock);
|
||||
ttm_resource_add_bulk_move(*res_ptr, bo);
|
||||
spin_unlock(&bo->bdev->lru_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
|
||||
@ -221,12 +244,9 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
|
||||
if (!*res)
|
||||
return;
|
||||
|
||||
if (bo->bulk_move) {
|
||||
spin_lock(&bo->bdev->lru_lock);
|
||||
ttm_lru_bulk_move_del(bo->bulk_move, *res);
|
||||
spin_unlock(&bo->bdev->lru_lock);
|
||||
}
|
||||
|
||||
spin_lock(&bo->bdev->lru_lock);
|
||||
ttm_resource_del_bulk_move(*res, bo);
|
||||
spin_unlock(&bo->bdev->lru_lock);
|
||||
man = ttm_manager_type(bo->bdev, (*res)->mem_type);
|
||||
man->func->free(man, *res);
|
||||
*res = NULL;
|
||||
|
@ -199,7 +199,8 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device,
|
||||
if (!input_device->hid_desc)
|
||||
goto cleanup;
|
||||
|
||||
input_device->report_desc_size = desc->desc[0].wDescriptorLength;
|
||||
input_device->report_desc_size = le16_to_cpu(
|
||||
desc->desc[0].wDescriptorLength);
|
||||
if (input_device->report_desc_size == 0) {
|
||||
input_device->dev_info_status = -EINVAL;
|
||||
goto cleanup;
|
||||
@ -217,7 +218,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device,
|
||||
|
||||
memcpy(input_device->report_desc,
|
||||
((unsigned char *)desc) + desc->bLength,
|
||||
desc->desc[0].wDescriptorLength);
|
||||
le16_to_cpu(desc->desc[0].wDescriptorLength));
|
||||
|
||||
/* Send the ack */
|
||||
memset(&ack, 0, sizeof(struct mousevsc_prt_msg));
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/hyperv.h>
|
||||
#include <asm/mshyperv.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
|
||||
#include "hyperv_vmbus.h"
|
||||
|
||||
@ -638,6 +639,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
|
||||
*/
|
||||
if (newchannel->offermsg.offer.sub_channel_index == 0) {
|
||||
mutex_unlock(&vmbus_connection.channel_mutex);
|
||||
cpus_read_unlock();
|
||||
/*
|
||||
* Don't call free_channel(), because newchannel->kobj
|
||||
* is not initialized yet.
|
||||
@ -728,16 +730,20 @@ static void init_vp_index(struct vmbus_channel *channel)
|
||||
u32 i, ncpu = num_online_cpus();
|
||||
cpumask_var_t available_mask;
|
||||
struct cpumask *allocated_mask;
|
||||
const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
|
||||
u32 target_cpu;
|
||||
int numa_node;
|
||||
|
||||
if (!perf_chn ||
|
||||
!alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
|
||||
!alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
|
||||
cpumask_empty(hk_mask)) {
|
||||
/*
|
||||
* If the channel is not a performance critical
|
||||
* channel, bind it to VMBUS_CONNECT_CPU.
|
||||
* In case alloc_cpumask_var() fails, bind it to
|
||||
* VMBUS_CONNECT_CPU.
|
||||
* If all the cpus are isolated, bind it to
|
||||
* VMBUS_CONNECT_CPU.
|
||||
*/
|
||||
channel->target_cpu = VMBUS_CONNECT_CPU;
|
||||
if (perf_chn)
|
||||
@ -758,17 +764,19 @@ static void init_vp_index(struct vmbus_channel *channel)
|
||||
}
|
||||
allocated_mask = &hv_context.hv_numa_map[numa_node];
|
||||
|
||||
if (cpumask_equal(allocated_mask, cpumask_of_node(numa_node))) {
|
||||
retry:
|
||||
cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
|
||||
cpumask_and(available_mask, available_mask, hk_mask);
|
||||
|
||||
if (cpumask_empty(available_mask)) {
|
||||
/*
|
||||
* We have cycled through all the CPUs in the node;
|
||||
* reset the allocated map.
|
||||
*/
|
||||
cpumask_clear(allocated_mask);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
cpumask_xor(available_mask, allocated_mask,
|
||||
cpumask_of_node(numa_node));
|
||||
|
||||
target_cpu = cpumask_first(available_mask);
|
||||
cpumask_set_cpu(target_cpu, allocated_mask);
|
||||
|
||||
|
@ -394,7 +394,7 @@ kvp_send_key(struct work_struct *dummy)
|
||||
in_msg = kvp_transaction.kvp_msg;
|
||||
|
||||
/*
|
||||
* The key/value strings sent from the host are encoded in
|
||||
* The key/value strings sent from the host are encoded
|
||||
* in utf16; convert it to utf8 strings.
|
||||
* The host assures us that the utf16 strings will not exceed
|
||||
* the max lengths specified. We will however, reserve room
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/clockchips.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
|
||||
#include <linux/delay.h>
|
||||
@ -1770,6 +1771,9 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
||||
if (target_cpu >= nr_cpumask_bits)
|
||||
return -EINVAL;
|
||||
|
||||
if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
|
||||
return -EINVAL;
|
||||
|
||||
/* No CPUs should come up or down during this. */
|
||||
cpus_read_lock();
|
||||
|
||||
|
@ -259,7 +259,7 @@ static const struct ec_board_info board_info[] = {
|
||||
},
|
||||
{
|
||||
.board_names = {
|
||||
"ROG CROSSHAIR VIII FORMULA"
|
||||
"ROG CROSSHAIR VIII FORMULA",
|
||||
"ROG CROSSHAIR VIII HERO",
|
||||
"ROG CROSSHAIR VIII HERO (WI-FI)",
|
||||
},
|
||||
|
@ -1228,10 +1228,15 @@ EXPORT_SYMBOL_GPL(occ_setup);
|
||||
|
||||
void occ_shutdown(struct occ *occ)
|
||||
{
|
||||
mutex_lock(&occ->lock);
|
||||
|
||||
occ_shutdown_sysfs(occ);
|
||||
|
||||
if (occ->hwmon)
|
||||
hwmon_device_unregister(occ->hwmon);
|
||||
occ->hwmon = NULL;
|
||||
|
||||
mutex_unlock(&occ->lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(occ_shutdown);
|
||||
|
||||
|
@ -477,9 +477,6 @@ int i2c_dw_prepare_clk(struct dw_i2c_dev *dev, bool prepare)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (IS_ERR(dev->clk))
|
||||
return PTR_ERR(dev->clk);
|
||||
|
||||
if (prepare) {
|
||||
/* Optional interface clock */
|
||||
ret = clk_prepare_enable(dev->pclk);
|
||||
|
@ -320,8 +320,17 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
|
||||
goto exit_reset;
|
||||
}
|
||||
|
||||
dev->clk = devm_clk_get(&pdev->dev, NULL);
|
||||
if (!i2c_dw_prepare_clk(dev, true)) {
|
||||
dev->clk = devm_clk_get_optional(&pdev->dev, NULL);
|
||||
if (IS_ERR(dev->clk)) {
|
||||
ret = PTR_ERR(dev->clk);
|
||||
goto exit_reset;
|
||||
}
|
||||
|
||||
ret = i2c_dw_prepare_clk(dev, true);
|
||||
if (ret)
|
||||
goto exit_reset;
|
||||
|
||||
if (dev->clk) {
|
||||
u64 clk_khz;
|
||||
|
||||
dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz;
|
||||
|
@ -1420,17 +1420,22 @@ static int mtk_i2c_probe(struct platform_device *pdev)
|
||||
if (ret < 0) {
|
||||
dev_err(&pdev->dev,
|
||||
"Request I2C IRQ %d fail\n", irq);
|
||||
return ret;
|
||||
goto err_bulk_unprepare;
|
||||
}
|
||||
|
||||
i2c_set_adapdata(&i2c->adap, i2c);
|
||||
ret = i2c_add_adapter(&i2c->adap);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err_bulk_unprepare;
|
||||
|
||||
platform_set_drvdata(pdev, i2c);
|
||||
|
||||
return 0;
|
||||
|
||||
err_bulk_unprepare:
|
||||
clk_bulk_unprepare(I2C_MT65XX_CLK_MAX, i2c->clocks);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mtk_i2c_remove(struct platform_device *pdev)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user