vfs-6.10.misc

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZj3HuwAKCRCRxhvAZXjc
 orYvAQCZOr68uJaEaXAArYTdnMdQ6HIzG+FVlwrqtrhz0BV07wEAqgmtSR9XKh+L
 0+DNepg4R8PZOHH371eSSsLNRCUCkAs=
 =SVsU
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.10.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull misc vfs updates from Christian Brauner:
 "This contains the usual miscellaneous features, cleanups, and fixes
  for vfs and individual fses.

  Features:

   - Free up FMODE_* bits. I've freed up bits 6, 7, 8, and 24. That
     means we now have six free FMODE_* bits in total (but bit #6
     already got used for FMODE_WRITE_RESTRICTED)

   - Add FOP_HUGE_PAGES flag (follow-up to FMODE_* cleanup)

   - Add fd_raw cleanup class so we can make use of automatic cleanup
     provided by CLASS(fd_raw, f)(fd) for O_PATH fds as well

   - Optimize seq_puts()

   - Simplify __seq_puts()

   - Add new anon_inode_getfile_fmode() api to allow specifying f_mode
     instead of open-coding it in multiple places

   - Annotate struct file_handle with __counted_by() and use
     struct_size()

   - Warn in get_file() whether f_count resurrection from zero is
     attempted (epoll/drm discussion)

   - Folio-sophize aio

   - Export the subvolume id in statx() for both btrfs and bcachefs

   - Relax linkat(AT_EMPTY_PATH) requirements

   - Add F_DUPFD_QUERY fcntl() allowing to compare two file descriptors
     for dup*() equality replacing kcmp()

  Cleanups:

   - Compile out swapfile inode checks when swap isn't enabled

   - Use (1 << n) notation for FMODE_* bitshifts for clarity

   - Remove redundant variable assignment in fs/direct-io

   - Cleanup uses of strncpy in orangefs

   - Speed up and cleanup writeback

   - Move fsparam_string_empty() helper into header since it's currently
     open-coded in multiple places

   - Add kernel-doc comments to proc_create_net_data_write()

   - Don't needlessly read dentry->d_flags twice

  Fixes:

   - Fix out-of-range warning in nilfs2

   - Fix ecryptfs overflow due to wrong encryption packet size
     calculation

   - Fix overly long line in xfs file_operations (follow-up to FMODE_*
     cleanup)

   - Don't raise FOP_BUFFER_{R,W}ASYNC for directories in xfs (follow-up
     to FMODE_* cleanup)

   - Don't call xfs_file_open from xfs_dir_open (follow-up to FMODE_*
     cleanup)

   - Fix stable offset api to prevent endless loops

   - Fix afs file server rotations

   - Prevent xattr node from overflowing the eraseblock in jffs2

   - Move fdinfo PTRACE_MODE_READ procfs check into the .permission()
     operation instead of .open() operation since this caused userspace
     regressions"

* tag 'vfs-6.10.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (39 commits)
  afs: Fix fileserver rotation getting stuck
  selftests: add F_DUPDFD_QUERY selftests
  fcntl: add F_DUPFD_QUERY fcntl()
  file: add fd_raw cleanup class
  fs: WARN when f_count resurrection is attempted
  seq_file: Simplify __seq_puts()
  seq_file: Optimize seq_puts()
  proc: Move fdinfo PTRACE_MODE_READ check into the inode .permission operation
  fs: Create anon_inode_getfile_fmode()
  xfs: don't call xfs_file_open from xfs_dir_open
  xfs: drop fop_flags for directories
  xfs: fix overly long line in the file_operations
  shmem: Fix shmem_rename2()
  libfs: Add simple_offset_rename() API
  libfs: Fix simple_offset_rename_exchange()
  jffs2: prevent xattr node from overflowing the eraseblock
  vfs, swap: compile out IS_SWAPFILE() on swapless configs
  vfs: relax linkat() AT_EMPTY_PATH - aka flink() - requirements
  fs/direct-io: remove redundant assignment to variable retval
  fs/dcache: Re-use value stored to dentry->d_flags instead of re-reading
  ...
This commit is contained in:
Linus Torvalds 2024-05-13 11:40:06 -07:00
commit 1b0aabcc9a
50 changed files with 439 additions and 250 deletions

View File

@ -912,7 +912,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
disk_unblock_events(disk);
bdev_file->f_flags |= O_LARGEFILE;
bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
bdev_file->f_mode |= FMODE_CAN_ODIRECT;
if (bdev_nowait(bdev))
bdev_file->f_mode |= FMODE_NOWAIT;
if (mode & BLK_OPEN_RESTRICT_WRITES)

View File

@ -863,6 +863,7 @@ const struct file_operations def_blk_fops = {
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
.fop_flags = FOP_BUFFER_RASYNC,
};
static __init int blkdev_init(void)

View File

@ -377,7 +377,7 @@ static const struct file_operations dax_fops = {
.release = dax_release,
.get_unmapped_area = dax_get_unmapped_area,
.mmap = dax_mmap,
.mmap_supported_flags = MAP_SYNC,
.fop_flags = FOP_MMAP_SYNC,
};
static void dev_dax_cdev_del(void *cdev)

View File

@ -541,11 +541,13 @@ bool afs_select_fileserver(struct afs_operation *op)
test_bit(AFS_SE_EXCLUDED, &se->flags) ||
!test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
continue;
es = op->server_states->endpoint_state;
es = op->server_states[i].endpoint_state;
sal = es->addresses;
afs_get_address_preferences_rcu(op->net, sal);
for (j = 0; j < sal->nr_addrs; j++) {
if (es->failed_set & (1 << j))
continue;
if (!sal->addrs[j].peer)
continue;
if (sal->addrs[j].prio > best_prio) {
@ -605,6 +607,8 @@ bool afs_select_fileserver(struct afs_operation *op)
best_prio = -1;
addr_index = 0;
for (i = 0; i < alist->nr_addrs; i++) {
if (!(set & (1 << i)))
continue;
if (alist->addrs[i].prio > best_prio) {
addr_index = i;
best_prio = alist->addrs[i].prio;
@ -674,7 +678,7 @@ bool afs_select_fileserver(struct afs_operation *op)
for (i = 0; i < op->server_list->nr_servers; i++) {
struct afs_endpoint_state *estate;
estate = op->server_states->endpoint_state;
estate = op->server_states[i].endpoint_state;
error = READ_ONCE(estate->error);
if (error < 0)
afs_op_accumulate_error(op, error, estate->abort_code);

View File

@ -122,7 +122,7 @@ struct kioctx {
unsigned long mmap_base;
unsigned long mmap_size;
struct page **ring_pages;
struct folio **ring_folios;
long nr_pages;
struct rcu_work free_rwork; /* see free_ioctx() */
@ -160,7 +160,7 @@ struct kioctx {
spinlock_t completion_lock;
} ____cacheline_aligned_in_smp;
struct page *internal_pages[AIO_RING_PAGES];
struct folio *internal_folios[AIO_RING_PAGES];
struct file *aio_ring_file;
unsigned id;
@ -334,19 +334,20 @@ static void aio_free_ring(struct kioctx *ctx)
put_aio_ring_file(ctx);
for (i = 0; i < ctx->nr_pages; i++) {
struct page *page;
pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
page_count(ctx->ring_pages[i]));
page = ctx->ring_pages[i];
if (!page)
struct folio *folio = ctx->ring_folios[i];
if (!folio)
continue;
ctx->ring_pages[i] = NULL;
put_page(page);
pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i,
folio_ref_count(folio));
ctx->ring_folios[i] = NULL;
folio_put(folio);
}
if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
kfree(ctx->ring_pages);
ctx->ring_pages = NULL;
if (ctx->ring_folios && ctx->ring_folios != ctx->internal_folios) {
kfree(ctx->ring_folios);
ctx->ring_folios = NULL;
}
}
@ -441,7 +442,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
idx = src->index;
if (idx < (pgoff_t)ctx->nr_pages) {
/* Make sure the old folio hasn't already been changed */
if (ctx->ring_pages[idx] != &src->page)
if (ctx->ring_folios[idx] != src)
rc = -EAGAIN;
} else
rc = -EINVAL;
@ -465,8 +466,8 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
*/
spin_lock_irqsave(&ctx->completion_lock, flags);
folio_migrate_copy(dst, src);
BUG_ON(ctx->ring_pages[idx] != &src->page);
ctx->ring_pages[idx] = &dst->page;
BUG_ON(ctx->ring_folios[idx] != src);
ctx->ring_folios[idx] = dst;
spin_unlock_irqrestore(&ctx->completion_lock, flags);
/* The old folio is no longer accessible. */
@ -516,28 +517,30 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
/ sizeof(struct io_event);
ctx->ring_pages = ctx->internal_pages;
ctx->ring_folios = ctx->internal_folios;
if (nr_pages > AIO_RING_PAGES) {
ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
ctx->ring_folios = kcalloc(nr_pages, sizeof(struct folio *),
GFP_KERNEL);
if (!ctx->ring_pages) {
if (!ctx->ring_folios) {
put_aio_ring_file(ctx);
return -ENOMEM;
}
}
for (i = 0; i < nr_pages; i++) {
struct page *page;
page = find_or_create_page(file->f_mapping,
i, GFP_USER | __GFP_ZERO);
if (!page)
break;
pr_debug("pid(%d) page[%d]->count=%d\n",
current->pid, i, page_count(page));
SetPageUptodate(page);
unlock_page(page);
struct folio *folio;
ctx->ring_pages[i] = page;
folio = __filemap_get_folio(file->f_mapping, i,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
GFP_USER | __GFP_ZERO);
if (IS_ERR(folio))
break;
pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i,
folio_ref_count(folio));
folio_end_read(folio, true);
ctx->ring_folios[i] = folio;
}
ctx->nr_pages = i;
@ -570,7 +573,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */
ring = page_address(ctx->ring_pages[0]);
ring = folio_address(ctx->ring_folios[0]);
ring->nr = nr_events; /* user copy */
ring->id = ~0U;
ring->head = ring->tail = 0;
@ -578,7 +581,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ring->compat_features = AIO_RING_COMPAT_FEATURES;
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring);
flush_dcache_page(ctx->ring_pages[0]);
flush_dcache_folio(ctx->ring_folios[0]);
return 0;
}
@ -689,9 +692,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
/* While kioctx setup is in progress,
* we are protected from page migration
* changes ring_pages by ->ring_lock.
* changes ring_folios by ->ring_lock.
*/
ring = page_address(ctx->ring_pages[0]);
ring = folio_address(ctx->ring_folios[0]);
ring->id = ctx->id;
return 0;
}
@ -1033,7 +1036,7 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* against ctx->completed_events below will make sure we do the
* safe/right thing.
*/
ring = page_address(ctx->ring_pages[0]);
ring = folio_address(ctx->ring_folios[0]);
head = ring->head;
refill_reqs_available(ctx, head, ctx->tail);
@ -1145,12 +1148,12 @@ static void aio_complete(struct aio_kiocb *iocb)
if (++tail >= ctx->nr_events)
tail = 0;
ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
ev_page = folio_address(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
*event = iocb->ki_res;
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
flush_dcache_folio(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
(void __user *)(unsigned long)iocb->ki_res.obj,
@ -1163,10 +1166,10 @@ static void aio_complete(struct aio_kiocb *iocb)
ctx->tail = tail;
ring = page_address(ctx->ring_pages[0]);
ring = folio_address(ctx->ring_folios[0]);
head = ring->head;
ring->tail = tail;
flush_dcache_page(ctx->ring_pages[0]);
flush_dcache_folio(ctx->ring_folios[0]);
ctx->completed_events++;
if (ctx->completed_events > 1)
@ -1238,8 +1241,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
sched_annotate_sleep();
mutex_lock(&ctx->ring_lock);
/* Access to ->ring_pages here is protected by ctx->ring_lock. */
ring = page_address(ctx->ring_pages[0]);
/* Access to ->ring_folios here is protected by ctx->ring_lock. */
ring = folio_address(ctx->ring_folios[0]);
head = ring->head;
tail = ring->tail;
@ -1260,20 +1263,20 @@ static long aio_read_events_ring(struct kioctx *ctx,
while (ret < nr) {
long avail;
struct io_event *ev;
struct page *page;
struct folio *folio;
avail = (head <= tail ? tail : ctx->nr_events) - head;
if (head == tail)
break;
pos = head + AIO_EVENTS_OFFSET;
page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
folio = ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE];
pos %= AIO_EVENTS_PER_PAGE;
avail = min(avail, nr - ret);
avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
ev = page_address(page);
ev = folio_address(folio);
copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail);
@ -1287,9 +1290,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
head %= ctx->nr_events;
}
ring = page_address(ctx->ring_pages[0]);
ring = folio_address(ctx->ring_folios[0]);
ring->head = head;
flush_dcache_page(ctx->ring_pages[0]);
flush_dcache_folio(ctx->ring_folios[0]);
pr_debug("%li h%u t%u\n", ret, head, tail);
out:

View File

@ -148,6 +148,38 @@ struct file *anon_inode_getfile(const char *name,
}
EXPORT_SYMBOL_GPL(anon_inode_getfile);
/**
* anon_inode_getfile_fmode - creates a new file instance by hooking it up to an
* anonymous inode, and a dentry that describe the "class"
* of the file
*
* @name: [in] name of the "class" of the new file
* @fops: [in] file operations for the new file
* @priv: [in] private data for the new file (will be file's private_data)
* @flags: [in] flags
* @f_mode: [in] fmode
*
* Creates a new file by hooking it on a single inode. This is useful for files
* that do not need to have a full-fledged inode in order to operate correctly.
* All the files created with anon_inode_getfile() will share a single inode,
* hence saving memory and avoiding code duplication for the file/inode/dentry
* setup. Allows setting the fmode. Returns the newly created file* or an error
* pointer.
*/
struct file *anon_inode_getfile_fmode(const char *name,
const struct file_operations *fops,
void *priv, int flags, fmode_t f_mode)
{
struct file *file;
file = __anon_inode_getfile(name, fops, priv, flags, NULL, false);
if (!IS_ERR(file))
file->f_mode |= f_mode;
return file;
}
EXPORT_SYMBOL_GPL(anon_inode_getfile_fmode);
/**
* anon_inode_create_getfile - Like anon_inode_getfile(), but creates a new
* !S_PRIVATE anon inode rather than reuse the
@ -271,6 +303,7 @@ int anon_inode_create_getfd(const char *name, const struct file_operations *fops
return __anon_inode_getfd(name, fops, priv, flags, context_inode, true);
}
static int __init anon_inode_init(void)
{
anon_inode_mnt = kern_mount(&anon_inode_fs_type);

View File

@ -844,6 +844,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
stat->blksize = block_bytes(c);
stat->blocks = inode->v.i_blocks;
stat->subvol = inode->ei_subvol;
stat->result_mask |= STATX_SUBVOL;
if (request_mask & STATX_BTIME) {
stat->result_mask |= STATX_BTIME;
stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);

View File

@ -3719,8 +3719,7 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
{
int ret;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
FMODE_CAN_ODIRECT;
filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
ret = fsverity_file_open(inode, filp);
if (ret)
@ -3850,6 +3849,7 @@ const struct file_operations btrfs_file_operations = {
.compat_ioctl = btrfs_compat_ioctl,
#endif
.remap_file_range = btrfs_remap_file_range,
.fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
};
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)

View File

@ -8789,6 +8789,9 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
generic_fillattr(idmap, request_mask, inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
stat->subvol = BTRFS_I(inode)->root->root_key.objectid;
stat->result_mask |= STATX_SUBVOL;
spin_lock(&BTRFS_I(inode)->lock);
delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
inode_bytes = inode_get_bytes(inode);

View File

@ -355,7 +355,7 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
flags &= ~DCACHE_ENTRY_TYPE;
WRITE_ONCE(dentry->d_flags, flags);
dentry->d_inode = NULL;
if (dentry->d_flags & DCACHE_LRU_LIST)
if (flags & DCACHE_LRU_LIST)
this_cpu_inc(nr_dentry_negative);
}

View File

@ -1217,7 +1217,6 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
*/
inode_dio_begin(inode);
retval = 0;
sdio.blkbits = blkbits;
sdio.blkfactor = i_blkbits - blkbits;
sdio.block_in_file = offset >> blkbits;

View File

@ -300,9 +300,11 @@ write_tag_66_packet(char *signature, u8 cipher_code,
* | Key Identifier Size | 1 or 2 bytes |
* | Key Identifier | arbitrary |
* | File Encryption Key Size | 1 or 2 bytes |
* | Cipher Code | 1 byte |
* | File Encryption Key | arbitrary |
* | Checksum | 2 bytes |
*/
data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size);
data_len = (8 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size);
*packet = kmalloc(data_len, GFP_KERNEL);
message = *packet;
if (!message) {

View File

@ -885,8 +885,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
return ret;
}
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC |
FMODE_DIO_PARALLEL_WRITE;
filp->f_mode |= FMODE_NOWAIT;
return dquot_file_open(inode, filp);
}
@ -938,7 +937,6 @@ const struct file_operations ext4_file_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.mmap = ext4_file_mmap,
.mmap_supported_flags = MAP_SYNC,
.open = ext4_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
@ -946,6 +944,8 @@ const struct file_operations ext4_file_operations = {
.splice_read = ext4_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate,
.fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC |
FOP_DIO_PARALLEL_WRITE,
};
const struct inode_operations ext4_file_inode_operations = {

View File

@ -1723,10 +1723,6 @@ static const struct constant_table ext4_param_dax[] = {
{}
};
/* String parameter that allows empty argument */
#define fsparam_string_empty(NAME, OPT) \
__fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
/*
* Mount option specification
* We don't use fsparam_flag_no because of the way we set the

View File

@ -569,7 +569,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
if (err)
return err;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
filp->f_mode |= FMODE_NOWAIT;
filp->f_mode |= FMODE_CAN_ODIRECT;
return dquot_file_open(inode, filp);
@ -5045,4 +5045,5 @@ const struct file_operations f2fs_file_operations = {
.splice_read = f2fs_file_splice_read,
.splice_write = iter_file_splice_write,
.fadvise = f2fs_file_fadvise,
.fop_flags = FOP_BUFFER_RASYNC,
};

View File

@ -327,6 +327,22 @@ static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
return 0;
}
/* Is the file descriptor a dup of the file? */
static long f_dupfd_query(int fd, struct file *filp)
{
CLASS(fd_raw, f)(fd);
/*
* We can do the 'fdput()' immediately, as the only thing that
* matters is the pointer value which isn't changed by the fdput.
*
* Technically we didn't need a ref at all, and 'fdget()' was
* overkill, but given our lockless file pointer lookup, the
* alternatives are complicated.
*/
return f.file == filp;
}
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
@ -342,6 +358,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_DUPFD_CLOEXEC:
err = f_dupfd(argi, filp, O_CLOEXEC);
break;
case F_DUPFD_QUERY:
err = f_dupfd_query(argi, filp);
break;
case F_GETFD:
err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
break;
@ -446,6 +465,7 @@ static int check_fcntl_cmd(unsigned cmd)
switch (cmd) {
case F_DUPFD:
case F_DUPFD_CLOEXEC:
case F_DUPFD_QUERY:
case F_GETFD:
case F_SETFD:
case F_GETFL:

View File

@ -36,7 +36,7 @@ static long do_sys_name_to_handle(const struct path *path,
if (f_handle.handle_bytes > MAX_HANDLE_SZ)
return -EINVAL;
handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
GFP_KERNEL);
if (!handle)
return -ENOMEM;
@ -71,7 +71,7 @@ static long do_sys_name_to_handle(const struct path *path,
/* copy the mount id */
if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
copy_to_user(ufh, handle,
sizeof(struct file_handle) + handle_bytes))
struct_size(handle, f_handle, handle_bytes)))
retval = -EFAULT;
kfree(handle);
return retval;
@ -192,7 +192,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
retval = -EINVAL;
goto out_err;
}
handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
GFP_KERNEL);
if (!handle) {
retval = -ENOMEM;

View File

@ -166,8 +166,7 @@ static void wb_wakeup_delayed(struct bdi_writeback *wb)
spin_unlock_irq(&wb->work_lock);
}
static void finish_writeback_work(struct bdi_writeback *wb,
struct wb_writeback_work *work)
static void finish_writeback_work(struct wb_writeback_work *work)
{
struct wb_completion *done = work->done;
@ -196,7 +195,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
list_add_tail(&work->list, &wb->work_list);
mod_delayed_work(bdi_wq, &wb->dwork, 0);
} else
finish_writeback_work(wb, work);
finish_writeback_work(work);
spin_unlock_irq(&wb->work_lock);
}
@ -1561,7 +1560,8 @@ static void inode_sleep_on_writeback(struct inode *inode)
* thread's back can have unexpected consequences.
*/
static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
struct writeback_control *wbc)
struct writeback_control *wbc,
unsigned long dirtied_before)
{
if (inode->i_state & I_FREEING)
return;
@ -1594,7 +1594,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* We didn't write back all the pages. nfs_writepages()
* sometimes bales out without doing anything.
*/
if (wbc->nr_to_write <= 0) {
if (wbc->nr_to_write <= 0 &&
!inode_dirtied_after(inode, dirtied_before)) {
/* Slice used up. Queue for next turn. */
requeue_io(inode, wb);
} else {
@ -1862,6 +1863,11 @@ static long writeback_sb_inodes(struct super_block *sb,
unsigned long start_time = jiffies;
long write_chunk;
long total_wrote = 0; /* count both pages and inodes */
unsigned long dirtied_before = jiffies;
if (work->for_kupdate)
dirtied_before = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev);
@ -1967,7 +1973,7 @@ static long writeback_sb_inodes(struct super_block *sb,
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_DIRTY_ALL))
total_wrote++;
requeue_inode(inode, tmp_wb, &wbc);
requeue_inode(inode, tmp_wb, &wbc, dirtied_before);
inode_sync_complete(inode);
spin_unlock(&inode->i_lock);
@ -2069,6 +2075,7 @@ static long wb_writeback(struct bdi_writeback *wb,
struct inode *inode;
long progress;
struct blk_plug plug;
bool queued = false;
blk_start_plug(&plug);
for (;;) {
@ -2098,21 +2105,24 @@ static long wb_writeback(struct bdi_writeback *wb,
spin_lock(&wb->list_lock);
trace_writeback_start(wb, work);
if (list_empty(&wb->b_io)) {
/*
* Kupdate and background works are special and we want to
* include all inodes that need writing. Livelock avoidance is
* handled by these works yielding to any other work so we are
* safe.
* Kupdate and background works are special and we want
* to include all inodes that need writing. Livelock
* avoidance is handled by these works yielding to any
* other work so we are safe.
*/
if (work->for_kupdate) {
dirtied_before = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
msecs_to_jiffies(dirty_expire_interval *
10);
} else if (work->for_background)
dirtied_before = jiffies;
trace_writeback_start(wb, work);
if (list_empty(&wb->b_io))
queue_io(wb, work, dirtied_before);
queued = true;
}
if (work->sb)
progress = writeback_sb_inodes(work->sb, wb, work);
else
@ -2127,7 +2137,7 @@ static long wb_writeback(struct bdi_writeback *wb,
* mean the overall work is done. So we keep looping as long
* as made some progress on cleaning pages or inodes.
*/
if (progress) {
if (progress || !queued) {
spin_unlock(&wb->list_lock);
continue;
}
@ -2262,7 +2272,7 @@ static long wb_do_writeback(struct bdi_writeback *wb)
while ((work = get_next_work_item(wb)) != NULL) {
trace_writeback_exec(wb, work);
wrote += wb_writeback(wb, work);
finish_writeback_work(wb, work);
finish_writeback_work(work);
}
/*
@ -2322,8 +2332,7 @@ void wb_workfn(struct work_struct *work)
}
/*
* Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero,
* write back the whole world.
* Start writeback of all dirty pages on this bdi.
*/
static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
enum wb_reason reason)
@ -2726,7 +2735,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
*/
void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
{
return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
}
EXPORT_SYMBOL(writeback_inodes_sb);

View File

@ -40,7 +40,7 @@
#include <linux/sched/mm.h>
static const struct address_space_operations hugetlbfs_aops;
const struct file_operations hugetlbfs_file_operations;
static const struct file_operations hugetlbfs_file_operations;
static const struct inode_operations hugetlbfs_dir_inode_operations;
static const struct inode_operations hugetlbfs_inode_operations;
@ -1301,13 +1301,14 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
const struct file_operations hugetlbfs_file_operations = {
static const struct file_operations hugetlbfs_file_operations = {
.read_iter = hugetlbfs_read_iter,
.mmap = hugetlbfs_file_mmap,
.fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
.llseek = default_llseek,
.fallocate = hugetlbfs_fallocate,
.fop_flags = FOP_HUGE_PAGES,
};
static const struct inode_operations hugetlbfs_dir_inode_operations = {

View File

@ -1110,6 +1110,9 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
return rc;
request = PAD(sizeof(struct jffs2_raw_xattr) + strlen(xname) + 1 + size);
if (request > c->sector_size - c->cleanmarker_size)
return -ERANGE;
rc = jffs2_reserve_space(c, request, &length,
ALLOC_NORMAL, JFFS2_SUMMARY_XATTR_SIZE);
if (rc) {

View File

@ -295,6 +295,18 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
return 0;
}
static int simple_offset_replace(struct offset_ctx *octx, struct dentry *dentry,
long offset)
{
int ret;
ret = mtree_store(&octx->mt, offset, dentry, GFP_KERNEL);
if (ret)
return ret;
offset_set(dentry, offset);
return 0;
}
/**
* simple_offset_remove - Remove an entry to a directory's offset map
* @octx: directory offset ctx to be updated
@ -345,6 +357,36 @@ int simple_offset_empty(struct dentry *dentry)
return ret;
}
/**
* simple_offset_rename - handle directory offsets for rename
* @old_dir: parent directory of source entry
* @old_dentry: dentry of source entry
* @new_dir: parent_directory of destination entry
* @new_dentry: dentry of destination
*
* Caller provides appropriate serialization.
*
* User space expects the directory offset value of the replaced
* (new) directory entry to be unchanged after a rename.
*
* Returns zero on success, a negative errno value on failure.
*/
int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
long new_offset = dentry2offset(new_dentry);
simple_offset_remove(old_ctx, old_dentry);
if (new_offset) {
offset_set(new_dentry, 0);
return simple_offset_replace(new_ctx, old_dentry, new_offset);
}
return simple_offset_add(new_ctx, old_dentry);
}
/**
* simple_offset_rename_exchange - exchange rename with directory offsets
* @old_dir: parent of dentry being moved
@ -352,6 +394,9 @@ int simple_offset_empty(struct dentry *dentry)
* @new_dir: destination parent
* @new_dentry: destination dentry
*
* This API preserves the directory offset values. Caller provides
* appropriate serialization.
*
* Returns zero on success. Otherwise a negative errno is returned and the
* rename is rolled back.
*/
@ -369,11 +414,11 @@ int simple_offset_rename_exchange(struct inode *old_dir,
simple_offset_remove(old_ctx, old_dentry);
simple_offset_remove(new_ctx, new_dentry);
ret = simple_offset_add(new_ctx, old_dentry);
ret = simple_offset_replace(new_ctx, old_dentry, new_index);
if (ret)
goto out_restore;
ret = simple_offset_add(old_ctx, new_dentry);
ret = simple_offset_replace(old_ctx, new_dentry, old_index);
if (ret) {
simple_offset_remove(new_ctx, old_dentry);
goto out_restore;
@ -388,10 +433,8 @@ int simple_offset_rename_exchange(struct inode *old_dir,
return 0;
out_restore:
offset_set(old_dentry, old_index);
mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL);
offset_set(new_dentry, new_index);
mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL);
(void)simple_offset_replace(old_ctx, old_dentry, old_index);
(void)simple_offset_replace(new_ctx, new_dentry, new_index);
return ret;
}

View File

@ -2422,6 +2422,14 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
if (!f.file)
return ERR_PTR(-EBADF);
if (flags & LOOKUP_LINKAT_EMPTY) {
if (f.file->f_cred != current_cred() &&
!ns_capable(f.file->f_cred->user_ns, CAP_DAC_READ_SEARCH)) {
fdput(f);
return ERR_PTR(-ENOENT);
}
}
dentry = f.file->f_path.dentry;
if (*s && unlikely(!d_can_lookup(dentry))) {
@ -4641,14 +4649,13 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
goto out_putnames;
}
/*
* To use null names we require CAP_DAC_READ_SEARCH
* To use null names we require CAP_DAC_READ_SEARCH or
* that the open-time creds of the dfd matches current.
* This ensures that not everyone will be able to create
* handlink using the passed filedescriptor.
* a hardlink using the passed file descriptor.
*/
if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) {
error = -ENOENT;
goto out_putnames;
}
if (flags & AT_EMPTY_PATH)
how |= LOOKUP_LINKAT_EMPTY;
if (flags & AT_SYMLINK_FOLLOW)
how |= LOOKUP_FOLLOW;

View File

@ -60,7 +60,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
if (argv->v_nmembs == 0)
return 0;
if (argv->v_size > PAGE_SIZE)
if ((size_t)argv->v_size > PAGE_SIZE)
return -EINVAL;
/*

View File

@ -33,9 +33,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW;
new_op->upcall.req.lookup.parent_refn = parent->refn;
strncpy(new_op->upcall.req.lookup.d_name,
dentry->d_name.name,
ORANGEFS_NAME_MAX - 1);
strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name);
gossip_debug(GOSSIP_DCACHE_DEBUG,
"%s:%s:%d interrupt flag [%d]\n",

View File

@ -41,8 +41,7 @@ static int orangefs_create(struct mnt_idmap *idmap,
fill_default_sys_attrs(new_op->upcall.req.create.attributes,
ORANGEFS_TYPE_METAFILE, mode);
strncpy(new_op->upcall.req.create.d_name,
dentry->d_name.name, ORANGEFS_NAME_MAX - 1);
strscpy(new_op->upcall.req.create.d_name, dentry->d_name.name);
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
@ -137,8 +136,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
&parent->refn.khandle);
new_op->upcall.req.lookup.parent_refn = parent->refn;
strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name,
ORANGEFS_NAME_MAX - 1);
strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name);
gossip_debug(GOSSIP_NAME_DEBUG,
"%s: doing lookup on %s under %pU,%d\n",
@ -192,8 +190,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
return -ENOMEM;
new_op->upcall.req.remove.parent_refn = parent->refn;
strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name,
ORANGEFS_NAME_MAX - 1);
strscpy(new_op->upcall.req.remove.d_name, dentry->d_name.name);
ret = service_operation(new_op, "orangefs_unlink",
get_interruptible_flag(inode));
@ -247,10 +244,8 @@ static int orangefs_symlink(struct mnt_idmap *idmap,
ORANGEFS_TYPE_SYMLINK,
mode);
strncpy(new_op->upcall.req.sym.entry_name,
dentry->d_name.name,
ORANGEFS_NAME_MAX - 1);
strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX - 1);
strscpy(new_op->upcall.req.sym.entry_name, dentry->d_name.name);
strscpy(new_op->upcall.req.sym.target, symname);
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
@ -324,8 +319,7 @@ static int orangefs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes,
ORANGEFS_TYPE_DIRECTORY, mode);
strncpy(new_op->upcall.req.mkdir.d_name,
dentry->d_name.name, ORANGEFS_NAME_MAX - 1);
strscpy(new_op->upcall.req.mkdir.d_name, dentry->d_name.name);
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
@ -405,12 +399,8 @@ static int orangefs_rename(struct mnt_idmap *idmap,
new_op->upcall.req.rename.old_parent_refn = ORANGEFS_I(old_dir)->refn;
new_op->upcall.req.rename.new_parent_refn = ORANGEFS_I(new_dir)->refn;
strncpy(new_op->upcall.req.rename.d_old_name,
old_dentry->d_name.name,
ORANGEFS_NAME_MAX - 1);
strncpy(new_op->upcall.req.rename.d_new_name,
new_dentry->d_name.name,
ORANGEFS_NAME_MAX - 1);
strscpy(new_op->upcall.req.rename.d_old_name, old_dentry->d_name.name);
strscpy(new_op->upcall.req.rename.d_new_name, new_dentry->d_name.name);
ret = service_operation(new_op,
"orangefs_rename",

View File

@ -253,9 +253,8 @@ int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb)
new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT);
if (!new_op)
return -ENOMEM;
strncpy(new_op->upcall.req.fs_mount.orangefs_config_server,
orangefs_sb->devname,
ORANGEFS_MAX_SERVER_ADDR_LEN);
strscpy(new_op->upcall.req.fs_mount.orangefs_config_server,
orangefs_sb->devname);
gossip_debug(GOSSIP_SUPER_DEBUG,
"Attempting ORANGEFS Remount via host %s\n",
@ -400,8 +399,7 @@ static int orangefs_unmount(int id, __s32 fs_id, const char *devname)
return -ENOMEM;
op->upcall.req.fs_umount.id = id;
op->upcall.req.fs_umount.fs_id = fs_id;
strncpy(op->upcall.req.fs_umount.orangefs_config_server,
devname, ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
strscpy(op->upcall.req.fs_umount.orangefs_config_server, devname);
r = service_operation(op, "orangefs_fs_umount", 0);
/* Not much to do about an error here. */
if (r)
@ -494,9 +492,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
if (!new_op)
return ERR_PTR(-ENOMEM);
strncpy(new_op->upcall.req.fs_mount.orangefs_config_server,
devname,
ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
strscpy(new_op->upcall.req.fs_mount.orangefs_config_server, devname);
gossip_debug(GOSSIP_SUPER_DEBUG,
"Attempting ORANGEFS Mount via host %s\n",
@ -543,9 +539,8 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
* on successful mount, store the devname and data
* used
*/
strncpy(ORANGEFS_SB(sb)->devname,
devname,
ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
strscpy(ORANGEFS_SB(sb)->devname, devname);
/* mount_pending must be cleared */
ORANGEFS_SB(sb)->mount_pending = 0;

View File

@ -139,10 +139,6 @@ static int ovl_verity_mode_def(void)
return OVL_VERITY_OFF;
}
#define fsparam_string_empty(NAME, OPT) \
__fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
const struct fs_parameter_spec ovl_parameter_spec[] = {
fsparam_string_empty("lowerdir", Opt_lowerdir),
fsparam_string("lowerdir+", Opt_lowerdir_add),

View File

@ -74,7 +74,18 @@ static int seq_show(struct seq_file *m, void *v)
return 0;
}
static int proc_fdinfo_access_allowed(struct inode *inode)
static int seq_fdinfo_open(struct inode *inode, struct file *file)
{
return single_open(file, seq_show, inode);
}
/**
* Shared /proc/pid/fdinfo and /proc/pid/fdinfo/fd permission helper to ensure
* that the current task has PTRACE_MODE_READ in addition to the normal
* POSIX-like checks.
*/
static int proc_fdinfo_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
bool allowed = false;
struct task_struct *task = get_proc_task(inode);
@ -88,18 +99,13 @@ static int proc_fdinfo_access_allowed(struct inode *inode)
if (!allowed)
return -EACCES;
return 0;
return generic_permission(idmap, inode, mask);
}
static int seq_fdinfo_open(struct inode *inode, struct file *file)
{
int ret = proc_fdinfo_access_allowed(inode);
if (ret)
return ret;
return single_open(file, seq_show, inode);
}
static const struct inode_operations proc_fdinfo_file_inode_operations = {
.permission = proc_fdinfo_permission,
.setattr = proc_setattr,
};
static const struct file_operations proc_fdinfo_file_operations = {
.open = seq_fdinfo_open,
@ -388,6 +394,8 @@ static struct dentry *proc_fdinfo_instantiate(struct dentry *dentry,
ei = PROC_I(inode);
ei->fd = data->fd;
inode->i_op = &proc_fdinfo_file_inode_operations;
inode->i_fop = &proc_fdinfo_file_operations;
tid_fd_update_inode(task, inode, 0);
@ -407,23 +415,13 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx)
proc_fdinfo_instantiate);
}
static int proc_open_fdinfo(struct inode *inode, struct file *file)
{
int ret = proc_fdinfo_access_allowed(inode);
if (ret)
return ret;
return 0;
}
const struct inode_operations proc_fdinfo_inode_operations = {
.lookup = proc_lookupfdinfo,
.permission = proc_fdinfo_permission,
.setattr = proc_setattr,
};
const struct file_operations proc_fdinfo_operations = {
.open = proc_open_fdinfo,
.read = generic_read_dir,
.iterate_shared = proc_readfdinfo,
.llseek = generic_file_llseek,

View File

@ -135,6 +135,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data);
* @parent: The parent directory in which to create.
* @ops: The seq_file ops with which to read the file.
* @write: The write method with which to 'modify' the file.
* @state_size: The size of the per-file private state to allocate.
* @data: Data for retrieval by pde_data().
*
* Create a network namespaced proc file in the @parent directory with the

View File

@ -1685,7 +1685,7 @@ int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
if ((iocb->ki_flags & IOCB_NOWAIT) &&
!((iocb->ki_flags & IOCB_DIRECT) ||
(file->f_mode & FMODE_BUF_WASYNC)))
(file->f_op->fop_flags & FOP_BUFFER_WASYNC)))
return -EINVAL;
return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);

View File

@ -669,18 +669,11 @@ void seq_putc(struct seq_file *m, char c)
}
EXPORT_SYMBOL(seq_putc);
void seq_puts(struct seq_file *m, const char *s)
void __seq_puts(struct seq_file *m, const char *s)
{
int len = strlen(s);
if (m->count + len >= m->size) {
seq_set_overflow(m);
return;
}
memcpy(m->buf + m->count, s, len);
m->count += len;
seq_write(m, s, strlen(s));
}
EXPORT_SYMBOL(seq_puts);
EXPORT_SYMBOL(__seq_puts);
/**
* seq_put_decimal_ull_width - A helper routine for putting decimal numbers

View File

@ -658,6 +658,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_mnt_id = stat->mnt_id;
tmp.stx_dio_mem_align = stat->dio_mem_align;
tmp.stx_dio_offset_align = stat->dio_offset_align;
tmp.stx_subvol = stat->subvol;
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}

View File

@ -1230,8 +1230,7 @@ xfs_file_open(
{
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
return -EIO;
file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
return generic_file_open(inode, file);
}
@ -1244,7 +1243,9 @@ xfs_dir_open(
unsigned int mode;
int error;
error = xfs_file_open(inode, file);
if (xfs_is_shutdown(ip->i_mount))
return -EIO;
error = generic_file_open(inode, file);
if (error)
return error;
@ -1490,7 +1491,6 @@ const struct file_operations xfs_file_operations = {
.compat_ioctl = xfs_file_compat_ioctl,
#endif
.mmap = xfs_file_mmap,
.mmap_supported_flags = MAP_SYNC,
.open = xfs_file_open,
.release = xfs_file_release,
.fsync = xfs_file_fsync,
@ -1498,6 +1498,8 @@ const struct file_operations xfs_file_operations = {
.fallocate = xfs_file_fallocate,
.fadvise = xfs_file_fadvise,
.remap_file_range = xfs_file_remap_range,
.fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC |
FOP_BUFFER_WASYNC | FOP_DIO_PARALLEL_WRITE,
};
const struct file_operations xfs_dir_file_operations = {

View File

@ -9,12 +9,17 @@
#ifndef _LINUX_ANON_INODES_H
#define _LINUX_ANON_INODES_H
#include <linux/types.h>
struct file_operations;
struct inode;
struct file *anon_inode_getfile(const char *name,
const struct file_operations *fops,
void *priv, int flags);
struct file *anon_inode_getfile_fmode(const char *name,
const struct file_operations *fops,
void *priv, int flags, fmode_t f_mode);
struct file *anon_inode_create_getfile(const char *name,
const struct file_operations *fops,
void *priv, int flags,

View File

@ -84,6 +84,7 @@ static inline void fdput_pos(struct fd f)
}
DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd)
DEFINE_CLASS(fd_raw, struct fd, fdput(_T), fdget_raw(fd), int fd)
extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
extern int replace_fd(unsigned fd, struct file *file, unsigned flags);

View File

@ -110,23 +110,26 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
*/
/* file is open for reading */
#define FMODE_READ ((__force fmode_t)0x1)
#define FMODE_READ ((__force fmode_t)(1 << 0))
/* file is open for writing */
#define FMODE_WRITE ((__force fmode_t)0x2)
#define FMODE_WRITE ((__force fmode_t)(1 << 1))
/* file is seekable */
#define FMODE_LSEEK ((__force fmode_t)0x4)
#define FMODE_LSEEK ((__force fmode_t)(1 << 2))
/* file can be accessed using pread */
#define FMODE_PREAD ((__force fmode_t)0x8)
#define FMODE_PREAD ((__force fmode_t)(1 << 3))
/* file can be accessed using pwrite */
#define FMODE_PWRITE ((__force fmode_t)0x10)
#define FMODE_PWRITE ((__force fmode_t)(1 << 4))
/* File is opened for execution with sys_execve / sys_uselib */
#define FMODE_EXEC ((__force fmode_t)0x20)
#define FMODE_EXEC ((__force fmode_t)(1 << 5))
/* File writes are restricted (block device specific) */
#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40)
#define FMODE_WRITE_RESTRICTED ((__force fmode_t)(1 << 6))
/* FMODE_* bits 7 to 8 */
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)0x200)
#define FMODE_32BITHASH ((__force fmode_t)(1 << 9))
/* 64bit hashes as llseek() offset (for directories) */
#define FMODE_64BITHASH ((__force fmode_t)0x400)
#define FMODE_64BITHASH ((__force fmode_t)(1 << 10))
/*
* Don't update ctime and mtime.
@ -134,60 +137,53 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
* Currently a special hack for the XFS open_by_handle ioctl, but we'll
* hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
*/
#define FMODE_NOCMTIME ((__force fmode_t)0x800)
#define FMODE_NOCMTIME ((__force fmode_t)(1 << 11))
/* Expect random access pattern */
#define FMODE_RANDOM ((__force fmode_t)0x1000)
#define FMODE_RANDOM ((__force fmode_t)(1 << 12))
/* File is huge (eg. /dev/mem): treat loff_t as unsigned */
#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)(1 << 13))
/* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH ((__force fmode_t)0x4000)
#define FMODE_PATH ((__force fmode_t)(1 << 14))
/* File needs atomic accesses to f_pos */
#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000)
#define FMODE_ATOMIC_POS ((__force fmode_t)(1 << 15))
/* Write access to underlying fs */
#define FMODE_WRITER ((__force fmode_t)0x10000)
#define FMODE_WRITER ((__force fmode_t)(1 << 16))
/* Has read method(s) */
#define FMODE_CAN_READ ((__force fmode_t)0x20000)
#define FMODE_CAN_READ ((__force fmode_t)(1 << 17))
/* Has write method(s) */
#define FMODE_CAN_WRITE ((__force fmode_t)0x40000)
#define FMODE_CAN_WRITE ((__force fmode_t)(1 << 18))
#define FMODE_OPENED ((__force fmode_t)0x80000)
#define FMODE_CREATED ((__force fmode_t)0x100000)
#define FMODE_OPENED ((__force fmode_t)(1 << 19))
#define FMODE_CREATED ((__force fmode_t)(1 << 20))
/* File is stream-like */
#define FMODE_STREAM ((__force fmode_t)0x200000)
#define FMODE_STREAM ((__force fmode_t)(1 << 21))
/* File supports DIRECT IO */
#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000)
#define FMODE_CAN_ODIRECT ((__force fmode_t)(1 << 22))
#define FMODE_NOREUSE ((__force fmode_t)0x800000)
#define FMODE_NOREUSE ((__force fmode_t)(1 << 23))
/* File supports non-exclusive O_DIRECT writes from multiple threads */
#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000)
/* FMODE_* bit 24 */
/* File is embedded in backing_file object */
#define FMODE_BACKING ((__force fmode_t)0x2000000)
#define FMODE_BACKING ((__force fmode_t)(1 << 25))
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
#define FMODE_NONOTIFY ((__force fmode_t)(1 << 26))
/* File is capable of returning -EAGAIN if I/O will block */
#define FMODE_NOWAIT ((__force fmode_t)0x8000000)
#define FMODE_NOWAIT ((__force fmode_t)(1 << 27))
/* File represents mount that needs unmounting */
#define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000)
#define FMODE_NEED_UNMOUNT ((__force fmode_t)(1 << 28))
/* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
/* File supports async buffered reads */
#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
/* File supports async nowait buffered writes */
#define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000)
#define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29))
/*
* Attribute flags. These should be or-ed together to figure out what
@ -1035,12 +1031,13 @@ struct file_handle {
__u32 handle_bytes;
int handle_type;
/* file identifier */
unsigned char f_handle[];
unsigned char f_handle[] __counted_by(handle_bytes);
};
static inline struct file *get_file(struct file *f)
{
atomic_long_inc(&f->f_count);
long prior = atomic_long_fetch_inc_relaxed(&f->f_count);
WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n");
return f;
}
@ -2003,8 +2000,11 @@ struct iov_iter;
struct io_uring_cmd;
struct offset_ctx;
typedef unsigned int __bitwise fop_flags_t;
struct file_operations {
struct module *owner;
fop_flags_t fop_flags;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
@ -2017,7 +2017,6 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
@ -2048,6 +2047,17 @@ struct file_operations {
unsigned int poll_flags);
} __randomize_layout;
/* Supports async buffered reads */
#define FOP_BUFFER_RASYNC ((__force fop_flags_t)(1 << 0))
/* Supports async buffered writes */
#define FOP_BUFFER_WASYNC ((__force fop_flags_t)(1 << 1))
/* Supports synchronous page faults for mappings */
#define FOP_MMAP_SYNC ((__force fop_flags_t)(1 << 2))
/* Supports non-exclusive O_DIRECT writes from multiple threads */
#define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3))
/* Contains huge pages */
#define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4))
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
int (*) (struct file *, struct dir_context *));
@ -2253,7 +2263,13 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
#ifdef CONFIG_SWAP
#define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
#else
#define IS_SWAPFILE(inode) ((void)(inode), 0U)
#endif
#define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
#define IS_IMA(inode) ((inode)->i_flags & S_IMA)
#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
@ -3340,6 +3356,8 @@ void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
int simple_offset_empty(struct dentry *dentry);
int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
int simple_offset_rename_exchange(struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,

View File

@ -132,4 +132,8 @@ static inline bool fs_validate_description(const char *name,
#define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL)
#define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL)
/* String parameter that allows empty argument */
#define fsparam_string_empty(NAME, OPT) \
__fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
#endif /* _LINUX_FS_PARSER_H */

View File

@ -554,17 +554,13 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
}
extern const struct file_operations hugetlbfs_file_operations;
extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
int creat_flags, int page_size_log);
static inline bool is_file_hugepages(struct file *file)
static inline bool is_file_hugepages(const struct file *file)
{
if (file->f_op == &hugetlbfs_file_operations)
return true;
return is_file_shm_hugepages(file);
return file->f_op->fop_flags & FOP_HUGE_PAGES;
}
static inline struct hstate *hstate_inode(struct inode *i)

View File

@ -44,6 +44,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
#define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */
#define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */
#define LOOKUP_CACHED 0x200000 /* Only do cached lookup */
#define LOOKUP_LINKAT_EMPTY 0x400000 /* Linkat request with empty path. */
/* LOOKUP_* flags which do scope-related checks based on the dirfd. */
#define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT)

View File

@ -118,7 +118,18 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
__printf(2, 3)
void seq_printf(struct seq_file *m, const char *fmt, ...);
void seq_putc(struct seq_file *m, char c);
void seq_puts(struct seq_file *m, const char *s);
void __seq_puts(struct seq_file *m, const char *s);
static __always_inline void seq_puts(struct seq_file *m, const char *s)
{
if (!__builtin_constant_p(*s))
__seq_puts(m, s);
else if (s[0] && !s[1])
seq_putc(m, s[0]);
else
seq_write(m, s, __builtin_strlen(s));
}
void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
unsigned long long num, unsigned int width);
void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,

View File

@ -16,7 +16,6 @@ struct sysv_shm {
long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
unsigned long shmlba);
bool is_file_shm_hugepages(struct file *file);
void exit_shm(struct task_struct *task);
#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
#else
@ -30,10 +29,6 @@ static inline long do_shmat(int shmid, char __user *shmaddr,
{
return -ENOSYS;
}
static inline bool is_file_shm_hugepages(struct file *file)
{
return false;
}
static inline void exit_shm(struct task_struct *task)
{
}

View File

@ -53,6 +53,7 @@ struct kstat {
u32 dio_mem_align;
u32 dio_offset_align;
u64 change_cookie;
u64 subvol;
};
/* These definitions are internal to the kernel for now. Mainly used by nfsd. */

View File

@ -8,6 +8,14 @@
#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
/*
* Request nofications on a directory.
* See below for events that may be notified.
*/
#define F_NOTIFY (F_LINUX_SPECIFIC_BASE + 2)
#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
/*
* Cancel a blocking posix lock; internal use only until we expose an
* asynchronous lock api to userspace:
@ -17,12 +25,6 @@
/* Create a file descriptor with FD_CLOEXEC set. */
#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
/*
* Request nofications on a directory.
* See below for events that may be notified.
*/
#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2)
/*
* Set and get of pipe page size array
*/

View File

@ -126,8 +126,9 @@ struct statx {
__u64 stx_mnt_id;
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
__u64 stx_subvol; /* Subvolume identifier */
/* 0xa0 */
__u64 __spare3[12]; /* Spare space for future expansion */
__u64 __spare3[11]; /* Spare space for future expansion */
/* 0x100 */
};
@ -155,6 +156,7 @@ struct statx {
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */

View File

@ -471,7 +471,7 @@ static void io_prep_async_work(struct io_kiocb *req)
/* don't serialize this request if the fs doesn't need it */
if (should_hash && (req->file->f_flags & O_DIRECT) &&
(req->file->f_mode & FMODE_DIO_PARALLEL_WRITE))
(req->file->f_op->fop_flags & FOP_DIO_PARALLEL_WRITE))
should_hash = false;
if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));

View File

@ -683,7 +683,8 @@ static bool io_rw_should_retry(struct io_kiocb *req)
* just use poll if we can, and don't attempt if the fs doesn't
* support callback based unlocks
*/
if (io_file_can_poll(req) || !(req->file->f_mode & FMODE_BUF_RASYNC))
if (io_file_can_poll(req) ||
!(req->file->f_op->fop_flags & FOP_BUFFER_RASYNC))
return false;
wait->wait.func = io_async_buf_func;
@ -1029,9 +1030,9 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(!io_file_supports_nowait(req)))
goto copy_iov;
/* File path supports NOWAIT for non-direct_IO only for block devices. */
/* Check if we can support NOWAIT. */
if (!(kiocb->ki_flags & IOCB_DIRECT) &&
!(kiocb->ki_filp->f_mode & FMODE_BUF_WASYNC) &&
!(req->file->f_op->fop_flags & FOP_BUFFER_WASYNC) &&
(req->flags & REQ_F_ISREG))
goto copy_iov;

View File

@ -662,8 +662,8 @@ static const struct file_operations shm_file_operations = {
};
/*
* shm_file_operations_huge is now identical to shm_file_operations,
* but we keep it distinct for the sake of is_file_shm_hugepages().
* shm_file_operations_huge is now identical to shm_file_operations
* except for fop_flags
*/
static const struct file_operations shm_file_operations_huge = {
.mmap = shm_mmap,
@ -672,13 +672,9 @@ static const struct file_operations shm_file_operations_huge = {
.get_unmapped_area = shm_get_unmapped_area,
.llseek = noop_llseek,
.fallocate = shm_fallocate,
.fop_flags = FOP_HUGE_PAGES,
};
bool is_file_shm_hugepages(struct file *file)
{
return file->f_op == &shm_file_operations_huge;
}
static const struct vm_operations_struct shm_vm_ops = {
.open = shm_open, /* callback for a new vm-area open */
.close = shm_close, /* callback for when the vm-area is released */

View File

@ -1294,7 +1294,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (!file_mmap_ok(file, inode, pgoff, len))
return -EOVERFLOW;
flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
flags_mask = LEGACY_MAP_MASK;
if (file->f_op->fop_flags & FOP_MMAP_SYNC)
flags_mask |= MAP_SYNC;
switch (flags & MAP_TYPE) {
case MAP_SHARED:

View File

@ -3467,8 +3467,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
return error;
}
simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry);
error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry);
error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry);
if (error)
return error;

View File

@ -17,6 +17,15 @@
#include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
#ifndef F_LINUX_SPECIFIC_BASE
#define F_LINUX_SPECIFIC_BASE 1024
#endif
#ifndef F_DUPFD_QUERY
#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
#endif
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags)
{
@ -45,6 +54,15 @@ TEST(core_close_range)
SKIP(return, "close_range() syscall not supported");
}
for (i = 0; i < 100; i++) {
ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]);
if (ret < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(ret, 0);
}
}
EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
for (i = 0; i <= 50; i++)
@ -358,7 +376,7 @@ TEST(close_range_cloexec_unshare)
*/
TEST(close_range_cloexec_syzbot)
{
int fd1, fd2, fd3, flags, ret, status;
int fd1, fd2, fd3, fd4, flags, ret, status;
pid_t pid;
struct __clone_args args = {
.flags = CLONE_FILES,
@ -372,6 +390,13 @@ TEST(close_range_cloexec_syzbot)
fd2 = dup2(fd1, 1000);
EXPECT_GT(fd2, 0);
flags = fcntl(fd1, F_DUPFD_QUERY, fd2);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 1);
}
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
@ -396,6 +421,15 @@ TEST(close_range_cloexec_syzbot)
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 1);
}
/*
* Duplicating the file descriptor must remove the
* FD_CLOEXEC flag.
@ -426,6 +460,24 @@ TEST(close_range_cloexec_syzbot)
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 1);
}
fd4 = open("/dev/null", O_RDWR);
EXPECT_GT(fd4, 0);
/* Same inode, different file pointers. */
flags = fcntl(fd1, F_DUPFD_QUERY, fd4);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 0);
}
flags = fcntl(fd3, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
@ -433,6 +485,7 @@ TEST(close_range_cloexec_syzbot)
EXPECT_EQ(close(fd1), 0);
EXPECT_EQ(close(fd2), 0);
EXPECT_EQ(close(fd3), 0);
EXPECT_EQ(close(fd4), 0);
}
/*