mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 07:23:14 +00:00
8dcc1a9d90
zonefs is a very simple file system exposing each zone of a zoned block device as a file. Unlike a regular file system with zoned block device support (e.g. f2fs), zonefs does not hide the sequential write constraint of zoned block devices to the user. Files representing sequential write zones of the device must be written sequentially starting from the end of the file (append only writes). As such, zonefs is in essence closer to a raw block device access interface than to a full featured POSIX file system. The goal of zonefs is to simplify the implementation of zoned block device support in applications by replacing raw block device file accesses with a richer file API, avoiding relying on direct block device file ioctls which may be more obscure to developers. One example of this approach is the implementation of LSM (log-structured merge) tree structures (such as used in RocksDB and LevelDB) on zoned block devices by allowing SSTables to be stored in a zone file similarly to a regular file system rather than as a range of sectors of a zoned device. The introduction of the higher level construct "one file is one zone" can help reducing the amount of changes needed in the application as well as introducing support for different application programming languages. Zonefs on-disk metadata is reduced to an immutable super block to persistently store a magic number and optional feature flags and values. On mount, zonefs uses blkdev_report_zones() to obtain the device zone configuration and populates the mount point with a static file tree solely based on this information. E.g. file sizes come from the device zone type and write pointer offset managed by the device itself. The zone files created on mount have the following characteristics. 1) Files representing zones of the same type are grouped together under a common sub-directory: * For conventional zones, the sub-directory "cnv" is used. * For sequential write zones, the sub-directory "seq" is used. These two directories are the only directories that exist in zonefs. Users cannot create other directories and cannot rename nor delete the "cnv" and "seq" sub-directories. 2) The name of zone files is the number of the file within the zone type sub-directory, in order of increasing zone start sector. 3) The size of conventional zone files is fixed to the device zone size. Conventional zone files cannot be truncated. 4) The size of sequential zone files represent the file's zone write pointer position relative to the zone start sector. Truncating these files is allowed only down to 0, in which case, the zone is reset to rewind the zone write pointer position to the start of the zone, or up to the zone size, in which case the file's zone is transitioned to the FULL state (finish zone operation). 5) All read and write operations to files are not allowed beyond the file zone size. Any access exceeding the zone size is failed with the -EFBIG error. 6) Creating, deleting, renaming or modifying any attribute of files and sub-directories is not allowed. 7) There are no restrictions on the type of read and write operations that can be issued to conventional zone files. Buffered, direct and mmap read & write operations are accepted. For sequential zone files, there are no restrictions on read operations, but all write operations must be direct IO append writes. mmap write of sequential files is not allowed. Several optional features of zonefs can be enabled at format time. * Conventional zone aggregation: ranges of contiguous conventional zones can be aggregated into a single larger file instead of the default one file per zone. * File ownership: The owner UID and GID of zone files is by default 0 (root) but can be changed to any valid UID/GID. * File access permissions: the default 640 access permissions can be changed. The mkzonefs tool is used to format zoned block devices for use with zonefs. This tool is available on Github at: git@github.com:damien-lemoal/zonefs-tools.git. zonefs-tools also includes a test suite which can be run against any zoned block device, including null_blk block device created with zoned mode. Example: the following formats a 15TB host-managed SMR HDD with 256 MB zones with the conventional zones aggregation feature enabled. $ sudo mkzonefs -o aggr_cnv /dev/sdX $ sudo mount -t zonefs /dev/sdX /mnt $ ls -l /mnt/ total 0 dr-xr-xr-x 2 root root 1 Nov 25 13:23 cnv dr-xr-xr-x 2 root root 55356 Nov 25 13:23 seq The size of the zone files sub-directories indicate the number of files existing for each type of zones. In this example, there is only one conventional zone file (all conventional zones are aggregated under a single file). $ ls -l /mnt/cnv total 137101312 -rw-r----- 1 root root 140391743488 Nov 25 13:23 0 This aggregated conventional zone file can be used as a regular file. $ sudo mkfs.ext4 /mnt/cnv/0 $ sudo mount -o loop /mnt/cnv/0 /data The "seq" sub-directory grouping files for sequential write zones has in this example 55356 zones. $ ls -lv /mnt/seq total 14511243264 -rw-r----- 1 root root 0 Nov 25 13:23 0 -rw-r----- 1 root root 0 Nov 25 13:23 1 -rw-r----- 1 root root 0 Nov 25 13:23 2 ... -rw-r----- 1 root root 0 Nov 25 13:23 55354 -rw-r----- 1 root root 0 Nov 25 13:23 55355 For sequential write zone files, the file size changes as data is appended at the end of the file, similarly to any regular file system. $ dd if=/dev/zero of=/mnt/seq/0 bs=4K count=1 conv=notrunc oflag=direct 1+0 records in 1+0 records out 4096 bytes (4.1 kB, 4.0 KiB) copied, 0.000452219 s, 9.1 MB/s $ ls -l /mnt/seq/0 -rw-r----- 1 root root 4096 Nov 25 13:23 /mnt/seq/0 The written file can be truncated to the zone size, preventing any further write operation. $ truncate -s 268435456 /mnt/seq/0 $ ls -l /mnt/seq/0 -rw-r----- 1 root root 268435456 Nov 25 13:49 /mnt/seq/0 Truncation to 0 size allows freeing the file zone storage space and restart append-writes to the file. $ truncate -s 0 /mnt/seq/0 $ ls -l /mnt/seq/0 -rw-r----- 1 root root 0 Nov 25 13:49 /mnt/seq/0 Since files are statically mapped to zones on the disk, the number of blocks of a file as reported by stat() and fstat() indicates the size of the file zone. $ stat /mnt/seq/0 File: /mnt/seq/0 Size: 0 Blocks: 524288 IO Block: 4096 regular empty file Device: 870h/2160d Inode: 50431 Links: 1 Access: (0640/-rw-r-----) Uid: ( 0/ root) Gid: ( 0/ root) Access: 2019-11-25 13:23:57.048971997 +0900 Modify: 2019-11-25 13:52:25.553805765 +0900 Change: 2019-11-25 13:52:25.553805765 +0900 Birth: - The number of blocks of the file ("Blocks") in units of 512B blocks gives the maximum file size of 524288 * 512 B = 256 MB, corresponding to the device zone size in this example. Of note is that the "IO block" field always indicates the minimum IO size for writes and corresponds to the device physical sector size. This code contains contributions from: * Johannes Thumshirn <jthumshirn@suse.de>, * Darrick J. Wong <darrick.wong@oracle.com>, * Christoph Hellwig <hch@lst.de>, * Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> and * Ting Yao <tingyao@hust.edu.cn>. Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Reviewed-by: Dave Chinner <dchinner@redhat.com>
330 lines
7.7 KiB
Plaintext
330 lines
7.7 KiB
Plaintext
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
# File system configuration
|
|
#
|
|
|
|
menu "File systems"
|
|
|
|
# Use unaligned word dcache accesses
|
|
config DCACHE_WORD_ACCESS
|
|
bool
|
|
|
|
config VALIDATE_FS_PARSER
|
|
bool "Validate filesystem parameter description"
|
|
help
|
|
Enable this to perform validation of the parameter description for a
|
|
filesystem when it is registered.
|
|
|
|
if BLOCK
|
|
|
|
config FS_IOMAP
|
|
bool
|
|
|
|
source "fs/ext2/Kconfig"
|
|
source "fs/ext4/Kconfig"
|
|
source "fs/jbd2/Kconfig"
|
|
|
|
config FS_MBCACHE
|
|
# Meta block cache for Extended Attributes (ext2/ext3/ext4)
|
|
tristate
|
|
default y if EXT2_FS=y && EXT2_FS_XATTR
|
|
default y if EXT4_FS=y
|
|
default m if EXT2_FS_XATTR || EXT4_FS
|
|
|
|
source "fs/reiserfs/Kconfig"
|
|
source "fs/jfs/Kconfig"
|
|
|
|
source "fs/xfs/Kconfig"
|
|
source "fs/gfs2/Kconfig"
|
|
source "fs/ocfs2/Kconfig"
|
|
source "fs/btrfs/Kconfig"
|
|
source "fs/nilfs2/Kconfig"
|
|
source "fs/f2fs/Kconfig"
|
|
source "fs/zonefs/Kconfig"
|
|
|
|
config FS_DAX
|
|
bool "Direct Access (DAX) support"
|
|
depends on MMU
|
|
depends on !(ARM || MIPS || SPARC)
|
|
select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
|
|
select FS_IOMAP
|
|
select DAX
|
|
help
|
|
Direct Access (DAX) can be used on memory-backed block devices.
|
|
If the block device supports DAX and the filesystem supports DAX,
|
|
then you can avoid using the pagecache to buffer I/Os. Turning
|
|
on this option will compile in support for DAX; you will need to
|
|
mount the filesystem using the -o dax option.
|
|
|
|
If you do not have a block device that is capable of using this,
|
|
or if unsure, say N. Saying Y will increase the size of the kernel
|
|
by about 5kB.
|
|
|
|
config FS_DAX_PMD
|
|
bool
|
|
default FS_DAX
|
|
depends on FS_DAX
|
|
depends on ZONE_DEVICE
|
|
depends on TRANSPARENT_HUGEPAGE
|
|
|
|
# Selected by DAX drivers that do not expect filesystem DAX to support
|
|
# get_user_pages() of DAX mappings. I.e. "limited" indicates no support
|
|
# for fork() of processes with MAP_SHARED mappings or support for
|
|
# direct-I/O to a DAX mapping.
|
|
config FS_DAX_LIMITED
|
|
bool
|
|
|
|
endif # BLOCK
|
|
|
|
# Posix ACL utility routines
|
|
#
|
|
# Note: Posix ACLs can be implemented without these helpers. Never use
|
|
# this symbol for ifdefs in core code.
|
|
#
|
|
config FS_POSIX_ACL
|
|
def_bool n
|
|
|
|
config EXPORTFS
|
|
tristate
|
|
|
|
config EXPORTFS_BLOCK_OPS
|
|
bool "Enable filesystem export operations for block IO"
|
|
help
|
|
This option enables the export operations for a filesystem to support
|
|
external block IO.
|
|
|
|
config FILE_LOCKING
|
|
bool "Enable POSIX file locking API" if EXPERT
|
|
default y
|
|
help
|
|
This option enables standard file locking support, required
|
|
for filesystems like NFS and for the flock() system
|
|
call. Disabling this option saves about 11k.
|
|
|
|
config MANDATORY_FILE_LOCKING
|
|
bool "Enable Mandatory file locking"
|
|
depends on FILE_LOCKING
|
|
default y
|
|
help
|
|
This option enables files appropriately marked files on appropriely
|
|
mounted filesystems to support mandatory locking.
|
|
|
|
To the best of my knowledge this is dead code that no one cares about.
|
|
|
|
source "fs/crypto/Kconfig"
|
|
|
|
source "fs/verity/Kconfig"
|
|
|
|
source "fs/notify/Kconfig"
|
|
|
|
source "fs/quota/Kconfig"
|
|
|
|
source "fs/autofs/Kconfig"
|
|
source "fs/fuse/Kconfig"
|
|
source "fs/overlayfs/Kconfig"
|
|
|
|
menu "Caches"
|
|
|
|
source "fs/fscache/Kconfig"
|
|
source "fs/cachefiles/Kconfig"
|
|
|
|
endmenu
|
|
|
|
if BLOCK
|
|
menu "CD-ROM/DVD Filesystems"
|
|
|
|
source "fs/isofs/Kconfig"
|
|
source "fs/udf/Kconfig"
|
|
|
|
endmenu
|
|
endif # BLOCK
|
|
|
|
if BLOCK
|
|
menu "DOS/FAT/NT Filesystems"
|
|
|
|
source "fs/fat/Kconfig"
|
|
source "fs/ntfs/Kconfig"
|
|
|
|
endmenu
|
|
endif # BLOCK
|
|
|
|
menu "Pseudo filesystems"
|
|
|
|
source "fs/proc/Kconfig"
|
|
source "fs/kernfs/Kconfig"
|
|
source "fs/sysfs/Kconfig"
|
|
|
|
config TMPFS
|
|
bool "Tmpfs virtual memory file system support (former shm fs)"
|
|
depends on SHMEM
|
|
help
|
|
Tmpfs is a file system which keeps all files in virtual memory.
|
|
|
|
Everything in tmpfs is temporary in the sense that no files will be
|
|
created on your hard drive. The files live in memory and swap
|
|
space. If you unmount a tmpfs instance, everything stored therein is
|
|
lost.
|
|
|
|
See <file:Documentation/filesystems/tmpfs.txt> for details.
|
|
|
|
config TMPFS_POSIX_ACL
|
|
bool "Tmpfs POSIX Access Control Lists"
|
|
depends on TMPFS
|
|
select TMPFS_XATTR
|
|
select FS_POSIX_ACL
|
|
help
|
|
POSIX Access Control Lists (ACLs) support additional access rights
|
|
for users and groups beyond the standard owner/group/world scheme,
|
|
and this option selects support for ACLs specifically for tmpfs
|
|
filesystems.
|
|
|
|
If you've selected TMPFS, it's possible that you'll also need
|
|
this option as there are a number of Linux distros that require
|
|
POSIX ACL support under /dev for certain features to work properly.
|
|
For example, some distros need this feature for ALSA-related /dev
|
|
files for sound to work properly. In short, if you're not sure,
|
|
say Y.
|
|
|
|
config TMPFS_XATTR
|
|
bool "Tmpfs extended attributes"
|
|
depends on TMPFS
|
|
default n
|
|
help
|
|
Extended attributes are name:value pairs associated with inodes by
|
|
the kernel or by users (see the attr(5) manual page for details).
|
|
|
|
Currently this enables support for the trusted.* and
|
|
security.* namespaces.
|
|
|
|
You need this for POSIX ACL support on tmpfs.
|
|
|
|
If unsure, say N.
|
|
|
|
config HUGETLBFS
|
|
bool "HugeTLB file system support"
|
|
depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \
|
|
SYS_SUPPORTS_HUGETLBFS || BROKEN
|
|
help
|
|
hugetlbfs is a filesystem backing for HugeTLB pages, based on
|
|
ramfs. For architectures that support it, say Y here and read
|
|
<file:Documentation/admin-guide/mm/hugetlbpage.rst> for details.
|
|
|
|
If unsure, say N.
|
|
|
|
config HUGETLB_PAGE
|
|
def_bool HUGETLBFS
|
|
|
|
config MEMFD_CREATE
|
|
def_bool TMPFS || HUGETLBFS
|
|
|
|
config ARCH_HAS_GIGANTIC_PAGE
|
|
bool
|
|
|
|
source "fs/configfs/Kconfig"
|
|
source "fs/efivarfs/Kconfig"
|
|
|
|
endmenu
|
|
|
|
menuconfig MISC_FILESYSTEMS
|
|
bool "Miscellaneous filesystems"
|
|
default y
|
|
---help---
|
|
Say Y here to get to see options for various miscellaneous
|
|
filesystems, such as filesystems that came from other
|
|
operating systems.
|
|
|
|
This option alone does not add any kernel code.
|
|
|
|
If you say N, all options in this submenu will be skipped and
|
|
disabled; if unsure, say Y here.
|
|
|
|
if MISC_FILESYSTEMS
|
|
|
|
source "fs/orangefs/Kconfig"
|
|
source "fs/adfs/Kconfig"
|
|
source "fs/affs/Kconfig"
|
|
source "fs/ecryptfs/Kconfig"
|
|
source "fs/hfs/Kconfig"
|
|
source "fs/hfsplus/Kconfig"
|
|
source "fs/befs/Kconfig"
|
|
source "fs/bfs/Kconfig"
|
|
source "fs/efs/Kconfig"
|
|
source "fs/jffs2/Kconfig"
|
|
# UBIFS File system configuration
|
|
source "fs/ubifs/Kconfig"
|
|
source "fs/cramfs/Kconfig"
|
|
source "fs/squashfs/Kconfig"
|
|
source "fs/freevxfs/Kconfig"
|
|
source "fs/minix/Kconfig"
|
|
source "fs/omfs/Kconfig"
|
|
source "fs/hpfs/Kconfig"
|
|
source "fs/qnx4/Kconfig"
|
|
source "fs/qnx6/Kconfig"
|
|
source "fs/romfs/Kconfig"
|
|
source "fs/pstore/Kconfig"
|
|
source "fs/sysv/Kconfig"
|
|
source "fs/ufs/Kconfig"
|
|
source "fs/erofs/Kconfig"
|
|
|
|
endif # MISC_FILESYSTEMS
|
|
|
|
menuconfig NETWORK_FILESYSTEMS
|
|
bool "Network File Systems"
|
|
default y
|
|
depends on NET
|
|
---help---
|
|
Say Y here to get to see options for network filesystems and
|
|
filesystem-related networking code, such as NFS daemon and
|
|
RPCSEC security modules.
|
|
|
|
This option alone does not add any kernel code.
|
|
|
|
If you say N, all options in this submenu will be skipped and
|
|
disabled; if unsure, say Y here.
|
|
|
|
if NETWORK_FILESYSTEMS
|
|
|
|
source "fs/nfs/Kconfig"
|
|
source "fs/nfsd/Kconfig"
|
|
|
|
config GRACE_PERIOD
|
|
tristate
|
|
|
|
config LOCKD
|
|
tristate
|
|
depends on FILE_LOCKING
|
|
select GRACE_PERIOD
|
|
|
|
config LOCKD_V4
|
|
bool
|
|
depends on NFSD_V3 || NFS_V3
|
|
depends on FILE_LOCKING
|
|
default y
|
|
|
|
config NFS_ACL_SUPPORT
|
|
tristate
|
|
select FS_POSIX_ACL
|
|
|
|
config NFS_COMMON
|
|
bool
|
|
depends on NFSD || NFS_FS || LOCKD
|
|
default y
|
|
|
|
source "net/sunrpc/Kconfig"
|
|
source "fs/ceph/Kconfig"
|
|
source "fs/cifs/Kconfig"
|
|
source "fs/coda/Kconfig"
|
|
source "fs/afs/Kconfig"
|
|
source "fs/9p/Kconfig"
|
|
|
|
endif # NETWORK_FILESYSTEMS
|
|
|
|
source "fs/nls/Kconfig"
|
|
source "fs/dlm/Kconfig"
|
|
source "fs/unicode/Kconfig"
|
|
|
|
config IO_WQ
|
|
bool
|
|
|
|
endmenu
|