vfs-6.10.misc

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZj3HuwAKCRCRxhvAZXjc
 orYvAQCZOr68uJaEaXAArYTdnMdQ6HIzG+FVlwrqtrhz0BV07wEAqgmtSR9XKh+L
 0+DNepg4R8PZOHH371eSSsLNRCUCkAs=
 =SVsU
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.10.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull misc vfs updates from Christian Brauner:
 "This contains the usual miscellaneous features, cleanups, and fixes
  for vfs and individual fses.

  Features:

   - Free up FMODE_* bits. I've freed up bits 6, 7, 8, and 24. That
     means we now have six free FMODE_* bits in total (but bit #6
     already got used for FMODE_WRITE_RESTRICTED)

   - Add FOP_HUGE_PAGES flag (follow-up to FMODE_* cleanup)

   - Add fd_raw cleanup class so we can make use of automatic cleanup
     provided by CLASS(fd_raw, f)(fd) for O_PATH fds as well

   - Optimize seq_puts()

   - Simplify __seq_puts()

   - Add new anon_inode_getfile_fmode() api to allow specifying f_mode
     instead of open-coding it in multiple places

   - Annotate struct file_handle with __counted_by() and use
     struct_size()

   - Warn in get_file() whether f_count resurrection from zero is
     attempted (epoll/drm discussion)

   - Folio-sophize aio

   - Export the subvolume id in statx() for both btrfs and bcachefs

   - Relax linkat(AT_EMPTY_PATH) requirements

   - Add F_DUPFD_QUERY fcntl() allowing to compare two file descriptors
     for dup*() equality replacing kcmp()

  Cleanups:

   - Compile out swapfile inode checks when swap isn't enabled

   - Use (1 << n) notation for FMODE_* bitshifts for clarity

   - Remove redundant variable assignment in fs/direct-io

   - Cleanup uses of strncpy in orangefs

   - Speed up and cleanup writeback

   - Move fsparam_string_empty() helper into header since it's currently
     open-coded in multiple places

   - Add kernel-doc comments to proc_create_net_data_write()

   - Don't needlessly read dentry->d_flags twice

  Fixes:

   - Fix out-of-range warning in nilfs2

   - Fix ecryptfs overflow due to wrong encryption packet size
     calculation

   - Fix overly long line in xfs file_operations (follow-up to FMODE_*
     cleanup)

   - Don't raise FOP_BUFFER_{R,W}ASYNC for directories in xfs (follow-up
     to FMODE_* cleanup)

   - Don't call xfs_file_open from xfs_dir_open (follow-up to FMODE_*
     cleanup)

   - Fix stable offset api to prevent endless loops

   - Fix afs file server rotations

   - Prevent xattr node from overflowing the eraseblock in jffs2

   - Move fdinfo PTRACE_MODE_READ procfs check into the .permission()
     operation instead of .open() operation since this caused userspace
     regressions"

* tag 'vfs-6.10.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (39 commits)
  afs: Fix fileserver rotation getting stuck
  selftests: add F_DUPDFD_QUERY selftests
  fcntl: add F_DUPFD_QUERY fcntl()
  file: add fd_raw cleanup class
  fs: WARN when f_count resurrection is attempted
  seq_file: Simplify __seq_puts()
  seq_file: Optimize seq_puts()
  proc: Move fdinfo PTRACE_MODE_READ check into the inode .permission operation
  fs: Create anon_inode_getfile_fmode()
  xfs: don't call xfs_file_open from xfs_dir_open
  xfs: drop fop_flags for directories
  xfs: fix overly long line in the file_operations
  shmem: Fix shmem_rename2()
  libfs: Add simple_offset_rename() API
  libfs: Fix simple_offset_rename_exchange()
  jffs2: prevent xattr node from overflowing the eraseblock
  vfs, swap: compile out IS_SWAPFILE() on swapless configs
  vfs: relax linkat() AT_EMPTY_PATH - aka flink() - requirements
  fs/direct-io: remove redundant assignment to variable retval
  fs/dcache: Re-use value stored to dentry->d_flags instead of re-reading
  ...
This commit is contained in:
Linus Torvalds 2024-05-13 11:40:06 -07:00
commit 1b0aabcc9a
50 changed files with 439 additions and 250 deletions

View File

@ -912,7 +912,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
disk_unblock_events(disk); disk_unblock_events(disk);
bdev_file->f_flags |= O_LARGEFILE; bdev_file->f_flags |= O_LARGEFILE;
bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; bdev_file->f_mode |= FMODE_CAN_ODIRECT;
if (bdev_nowait(bdev)) if (bdev_nowait(bdev))
bdev_file->f_mode |= FMODE_NOWAIT; bdev_file->f_mode |= FMODE_NOWAIT;
if (mode & BLK_OPEN_RESTRICT_WRITES) if (mode & BLK_OPEN_RESTRICT_WRITES)

View File

@ -863,6 +863,7 @@ const struct file_operations def_blk_fops = {
.splice_read = filemap_splice_read, .splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write, .splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate, .fallocate = blkdev_fallocate,
.fop_flags = FOP_BUFFER_RASYNC,
}; };
static __init int blkdev_init(void) static __init int blkdev_init(void)

View File

@ -377,7 +377,7 @@ static const struct file_operations dax_fops = {
.release = dax_release, .release = dax_release,
.get_unmapped_area = dax_get_unmapped_area, .get_unmapped_area = dax_get_unmapped_area,
.mmap = dax_mmap, .mmap = dax_mmap,
.mmap_supported_flags = MAP_SYNC, .fop_flags = FOP_MMAP_SYNC,
}; };
static void dev_dax_cdev_del(void *cdev) static void dev_dax_cdev_del(void *cdev)

View File

@ -541,11 +541,13 @@ bool afs_select_fileserver(struct afs_operation *op)
test_bit(AFS_SE_EXCLUDED, &se->flags) || test_bit(AFS_SE_EXCLUDED, &se->flags) ||
!test_bit(AFS_SERVER_FL_RESPONDING, &s->flags)) !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
continue; continue;
es = op->server_states->endpoint_state; es = op->server_states[i].endpoint_state;
sal = es->addresses; sal = es->addresses;
afs_get_address_preferences_rcu(op->net, sal); afs_get_address_preferences_rcu(op->net, sal);
for (j = 0; j < sal->nr_addrs; j++) { for (j = 0; j < sal->nr_addrs; j++) {
if (es->failed_set & (1 << j))
continue;
if (!sal->addrs[j].peer) if (!sal->addrs[j].peer)
continue; continue;
if (sal->addrs[j].prio > best_prio) { if (sal->addrs[j].prio > best_prio) {
@ -605,6 +607,8 @@ bool afs_select_fileserver(struct afs_operation *op)
best_prio = -1; best_prio = -1;
addr_index = 0; addr_index = 0;
for (i = 0; i < alist->nr_addrs; i++) { for (i = 0; i < alist->nr_addrs; i++) {
if (!(set & (1 << i)))
continue;
if (alist->addrs[i].prio > best_prio) { if (alist->addrs[i].prio > best_prio) {
addr_index = i; addr_index = i;
best_prio = alist->addrs[i].prio; best_prio = alist->addrs[i].prio;
@ -674,7 +678,7 @@ bool afs_select_fileserver(struct afs_operation *op)
for (i = 0; i < op->server_list->nr_servers; i++) { for (i = 0; i < op->server_list->nr_servers; i++) {
struct afs_endpoint_state *estate; struct afs_endpoint_state *estate;
estate = op->server_states->endpoint_state; estate = op->server_states[i].endpoint_state;
error = READ_ONCE(estate->error); error = READ_ONCE(estate->error);
if (error < 0) if (error < 0)
afs_op_accumulate_error(op, error, estate->abort_code); afs_op_accumulate_error(op, error, estate->abort_code);

View File

@ -122,7 +122,7 @@ struct kioctx {
unsigned long mmap_base; unsigned long mmap_base;
unsigned long mmap_size; unsigned long mmap_size;
struct page **ring_pages; struct folio **ring_folios;
long nr_pages; long nr_pages;
struct rcu_work free_rwork; /* see free_ioctx() */ struct rcu_work free_rwork; /* see free_ioctx() */
@ -160,7 +160,7 @@ struct kioctx {
spinlock_t completion_lock; spinlock_t completion_lock;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
struct page *internal_pages[AIO_RING_PAGES]; struct folio *internal_folios[AIO_RING_PAGES];
struct file *aio_ring_file; struct file *aio_ring_file;
unsigned id; unsigned id;
@ -334,19 +334,20 @@ static void aio_free_ring(struct kioctx *ctx)
put_aio_ring_file(ctx); put_aio_ring_file(ctx);
for (i = 0; i < ctx->nr_pages; i++) { for (i = 0; i < ctx->nr_pages; i++) {
struct page *page; struct folio *folio = ctx->ring_folios[i];
pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
page_count(ctx->ring_pages[i])); if (!folio)
page = ctx->ring_pages[i];
if (!page)
continue; continue;
ctx->ring_pages[i] = NULL;
put_page(page); pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i,
folio_ref_count(folio));
ctx->ring_folios[i] = NULL;
folio_put(folio);
} }
if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { if (ctx->ring_folios && ctx->ring_folios != ctx->internal_folios) {
kfree(ctx->ring_pages); kfree(ctx->ring_folios);
ctx->ring_pages = NULL; ctx->ring_folios = NULL;
} }
} }
@ -441,7 +442,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
idx = src->index; idx = src->index;
if (idx < (pgoff_t)ctx->nr_pages) { if (idx < (pgoff_t)ctx->nr_pages) {
/* Make sure the old folio hasn't already been changed */ /* Make sure the old folio hasn't already been changed */
if (ctx->ring_pages[idx] != &src->page) if (ctx->ring_folios[idx] != src)
rc = -EAGAIN; rc = -EAGAIN;
} else } else
rc = -EINVAL; rc = -EINVAL;
@ -465,8 +466,8 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
*/ */
spin_lock_irqsave(&ctx->completion_lock, flags); spin_lock_irqsave(&ctx->completion_lock, flags);
folio_migrate_copy(dst, src); folio_migrate_copy(dst, src);
BUG_ON(ctx->ring_pages[idx] != &src->page); BUG_ON(ctx->ring_folios[idx] != src);
ctx->ring_pages[idx] = &dst->page; ctx->ring_folios[idx] = dst;
spin_unlock_irqrestore(&ctx->completion_lock, flags); spin_unlock_irqrestore(&ctx->completion_lock, flags);
/* The old folio is no longer accessible. */ /* The old folio is no longer accessible. */
@ -516,28 +517,30 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
/ sizeof(struct io_event); / sizeof(struct io_event);
ctx->ring_pages = ctx->internal_pages; ctx->ring_folios = ctx->internal_folios;
if (nr_pages > AIO_RING_PAGES) { if (nr_pages > AIO_RING_PAGES) {
ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), ctx->ring_folios = kcalloc(nr_pages, sizeof(struct folio *),
GFP_KERNEL); GFP_KERNEL);
if (!ctx->ring_pages) { if (!ctx->ring_folios) {
put_aio_ring_file(ctx); put_aio_ring_file(ctx);
return -ENOMEM; return -ENOMEM;
} }
} }
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
struct page *page; struct folio *folio;
page = find_or_create_page(file->f_mapping,
i, GFP_USER | __GFP_ZERO);
if (!page)
break;
pr_debug("pid(%d) page[%d]->count=%d\n",
current->pid, i, page_count(page));
SetPageUptodate(page);
unlock_page(page);
ctx->ring_pages[i] = page; folio = __filemap_get_folio(file->f_mapping, i,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
GFP_USER | __GFP_ZERO);
if (IS_ERR(folio))
break;
pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i,
folio_ref_count(folio));
folio_end_read(folio, true);
ctx->ring_folios[i] = folio;
} }
ctx->nr_pages = i; ctx->nr_pages = i;
@ -570,7 +573,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ctx->user_id = ctx->mmap_base; ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */ ctx->nr_events = nr_events; /* trusted copy */
ring = page_address(ctx->ring_pages[0]); ring = folio_address(ctx->ring_folios[0]);
ring->nr = nr_events; /* user copy */ ring->nr = nr_events; /* user copy */
ring->id = ~0U; ring->id = ~0U;
ring->head = ring->tail = 0; ring->head = ring->tail = 0;
@ -578,7 +581,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ring->compat_features = AIO_RING_COMPAT_FEATURES; ring->compat_features = AIO_RING_COMPAT_FEATURES;
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring); ring->header_length = sizeof(struct aio_ring);
flush_dcache_page(ctx->ring_pages[0]); flush_dcache_folio(ctx->ring_folios[0]);
return 0; return 0;
} }
@ -689,9 +692,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
/* While kioctx setup is in progress, /* While kioctx setup is in progress,
* we are protected from page migration * we are protected from page migration
* changes ring_pages by ->ring_lock. * changes ring_folios by ->ring_lock.
*/ */
ring = page_address(ctx->ring_pages[0]); ring = folio_address(ctx->ring_folios[0]);
ring->id = ctx->id; ring->id = ctx->id;
return 0; return 0;
} }
@ -1033,7 +1036,7 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* against ctx->completed_events below will make sure we do the * against ctx->completed_events below will make sure we do the
* safe/right thing. * safe/right thing.
*/ */
ring = page_address(ctx->ring_pages[0]); ring = folio_address(ctx->ring_folios[0]);
head = ring->head; head = ring->head;
refill_reqs_available(ctx, head, ctx->tail); refill_reqs_available(ctx, head, ctx->tail);
@ -1145,12 +1148,12 @@ static void aio_complete(struct aio_kiocb *iocb)
if (++tail >= ctx->nr_events) if (++tail >= ctx->nr_events)
tail = 0; tail = 0;
ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); ev_page = folio_address(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE; event = ev_page + pos % AIO_EVENTS_PER_PAGE;
*event = iocb->ki_res; *event = iocb->ki_res;
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); flush_dcache_folio(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb, pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
(void __user *)(unsigned long)iocb->ki_res.obj, (void __user *)(unsigned long)iocb->ki_res.obj,
@ -1163,10 +1166,10 @@ static void aio_complete(struct aio_kiocb *iocb)
ctx->tail = tail; ctx->tail = tail;
ring = page_address(ctx->ring_pages[0]); ring = folio_address(ctx->ring_folios[0]);
head = ring->head; head = ring->head;
ring->tail = tail; ring->tail = tail;
flush_dcache_page(ctx->ring_pages[0]); flush_dcache_folio(ctx->ring_folios[0]);
ctx->completed_events++; ctx->completed_events++;
if (ctx->completed_events > 1) if (ctx->completed_events > 1)
@ -1238,8 +1241,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
sched_annotate_sleep(); sched_annotate_sleep();
mutex_lock(&ctx->ring_lock); mutex_lock(&ctx->ring_lock);
/* Access to ->ring_pages here is protected by ctx->ring_lock. */ /* Access to ->ring_folios here is protected by ctx->ring_lock. */
ring = page_address(ctx->ring_pages[0]); ring = folio_address(ctx->ring_folios[0]);
head = ring->head; head = ring->head;
tail = ring->tail; tail = ring->tail;
@ -1260,20 +1263,20 @@ static long aio_read_events_ring(struct kioctx *ctx,
while (ret < nr) { while (ret < nr) {
long avail; long avail;
struct io_event *ev; struct io_event *ev;
struct page *page; struct folio *folio;
avail = (head <= tail ? tail : ctx->nr_events) - head; avail = (head <= tail ? tail : ctx->nr_events) - head;
if (head == tail) if (head == tail)
break; break;
pos = head + AIO_EVENTS_OFFSET; pos = head + AIO_EVENTS_OFFSET;
page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; folio = ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE];
pos %= AIO_EVENTS_PER_PAGE; pos %= AIO_EVENTS_PER_PAGE;
avail = min(avail, nr - ret); avail = min(avail, nr - ret);
avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos); avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
ev = page_address(page); ev = folio_address(folio);
copy_ret = copy_to_user(event + ret, ev + pos, copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail); sizeof(*ev) * avail);
@ -1287,9 +1290,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
head %= ctx->nr_events; head %= ctx->nr_events;
} }
ring = page_address(ctx->ring_pages[0]); ring = folio_address(ctx->ring_folios[0]);
ring->head = head; ring->head = head;
flush_dcache_page(ctx->ring_pages[0]); flush_dcache_folio(ctx->ring_folios[0]);
pr_debug("%li h%u t%u\n", ret, head, tail); pr_debug("%li h%u t%u\n", ret, head, tail);
out: out:

View File

@ -148,6 +148,38 @@ struct file *anon_inode_getfile(const char *name,
} }
EXPORT_SYMBOL_GPL(anon_inode_getfile); EXPORT_SYMBOL_GPL(anon_inode_getfile);
/**
* anon_inode_getfile_fmode - creates a new file instance by hooking it up to an
* anonymous inode, and a dentry that describe the "class"
* of the file
*
* @name: [in] name of the "class" of the new file
* @fops: [in] file operations for the new file
* @priv: [in] private data for the new file (will be file's private_data)
* @flags: [in] flags
* @f_mode: [in] fmode
*
* Creates a new file by hooking it on a single inode. This is useful for files
* that do not need to have a full-fledged inode in order to operate correctly.
* All the files created with anon_inode_getfile() will share a single inode,
* hence saving memory and avoiding code duplication for the file/inode/dentry
* setup. Allows setting the fmode. Returns the newly created file* or an error
* pointer.
*/
struct file *anon_inode_getfile_fmode(const char *name,
const struct file_operations *fops,
void *priv, int flags, fmode_t f_mode)
{
struct file *file;
file = __anon_inode_getfile(name, fops, priv, flags, NULL, false);
if (!IS_ERR(file))
file->f_mode |= f_mode;
return file;
}
EXPORT_SYMBOL_GPL(anon_inode_getfile_fmode);
/** /**
* anon_inode_create_getfile - Like anon_inode_getfile(), but creates a new * anon_inode_create_getfile - Like anon_inode_getfile(), but creates a new
* !S_PRIVATE anon inode rather than reuse the * !S_PRIVATE anon inode rather than reuse the
@ -271,6 +303,7 @@ int anon_inode_create_getfd(const char *name, const struct file_operations *fops
return __anon_inode_getfd(name, fops, priv, flags, context_inode, true); return __anon_inode_getfd(name, fops, priv, flags, context_inode, true);
} }
static int __init anon_inode_init(void) static int __init anon_inode_init(void)
{ {
anon_inode_mnt = kern_mount(&anon_inode_fs_type); anon_inode_mnt = kern_mount(&anon_inode_fs_type);

View File

@ -844,6 +844,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
stat->blksize = block_bytes(c); stat->blksize = block_bytes(c);
stat->blocks = inode->v.i_blocks; stat->blocks = inode->v.i_blocks;
stat->subvol = inode->ei_subvol;
stat->result_mask |= STATX_SUBVOL;
if (request_mask & STATX_BTIME) { if (request_mask & STATX_BTIME) {
stat->result_mask |= STATX_BTIME; stat->result_mask |= STATX_BTIME;
stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime); stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);

View File

@ -3719,8 +3719,7 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
{ {
int ret; int ret;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC | filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
FMODE_CAN_ODIRECT;
ret = fsverity_file_open(inode, filp); ret = fsverity_file_open(inode, filp);
if (ret) if (ret)
@ -3850,6 +3849,7 @@ const struct file_operations btrfs_file_operations = {
.compat_ioctl = btrfs_compat_ioctl, .compat_ioctl = btrfs_compat_ioctl,
#endif #endif
.remap_file_range = btrfs_remap_file_range, .remap_file_range = btrfs_remap_file_range,
.fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
}; };
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end) int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)

View File

@ -8789,6 +8789,9 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
generic_fillattr(idmap, request_mask, inode, stat); generic_fillattr(idmap, request_mask, inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev; stat->dev = BTRFS_I(inode)->root->anon_dev;
stat->subvol = BTRFS_I(inode)->root->root_key.objectid;
stat->result_mask |= STATX_SUBVOL;
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes; delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
inode_bytes = inode_get_bytes(inode); inode_bytes = inode_get_bytes(inode);

View File

@ -355,7 +355,7 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
flags &= ~DCACHE_ENTRY_TYPE; flags &= ~DCACHE_ENTRY_TYPE;
WRITE_ONCE(dentry->d_flags, flags); WRITE_ONCE(dentry->d_flags, flags);
dentry->d_inode = NULL; dentry->d_inode = NULL;
if (dentry->d_flags & DCACHE_LRU_LIST) if (flags & DCACHE_LRU_LIST)
this_cpu_inc(nr_dentry_negative); this_cpu_inc(nr_dentry_negative);
} }

View File

@ -1217,7 +1217,6 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
*/ */
inode_dio_begin(inode); inode_dio_begin(inode);
retval = 0;
sdio.blkbits = blkbits; sdio.blkbits = blkbits;
sdio.blkfactor = i_blkbits - blkbits; sdio.blkfactor = i_blkbits - blkbits;
sdio.block_in_file = offset >> blkbits; sdio.block_in_file = offset >> blkbits;

View File

@ -300,9 +300,11 @@ write_tag_66_packet(char *signature, u8 cipher_code,
* | Key Identifier Size | 1 or 2 bytes | * | Key Identifier Size | 1 or 2 bytes |
* | Key Identifier | arbitrary | * | Key Identifier | arbitrary |
* | File Encryption Key Size | 1 or 2 bytes | * | File Encryption Key Size | 1 or 2 bytes |
* | Cipher Code | 1 byte |
* | File Encryption Key | arbitrary | * | File Encryption Key | arbitrary |
* | Checksum | 2 bytes |
*/ */
data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); data_len = (8 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size);
*packet = kmalloc(data_len, GFP_KERNEL); *packet = kmalloc(data_len, GFP_KERNEL);
message = *packet; message = *packet;
if (!message) { if (!message) {

View File

@ -885,8 +885,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
return ret; return ret;
} }
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | filp->f_mode |= FMODE_NOWAIT;
FMODE_DIO_PARALLEL_WRITE;
return dquot_file_open(inode, filp); return dquot_file_open(inode, filp);
} }
@ -938,7 +937,6 @@ const struct file_operations ext4_file_operations = {
.compat_ioctl = ext4_compat_ioctl, .compat_ioctl = ext4_compat_ioctl,
#endif #endif
.mmap = ext4_file_mmap, .mmap = ext4_file_mmap,
.mmap_supported_flags = MAP_SYNC,
.open = ext4_file_open, .open = ext4_file_open,
.release = ext4_release_file, .release = ext4_release_file,
.fsync = ext4_sync_file, .fsync = ext4_sync_file,
@ -946,6 +944,8 @@ const struct file_operations ext4_file_operations = {
.splice_read = ext4_file_splice_read, .splice_read = ext4_file_splice_read,
.splice_write = iter_file_splice_write, .splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate, .fallocate = ext4_fallocate,
.fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC |
FOP_DIO_PARALLEL_WRITE,
}; };
const struct inode_operations ext4_file_inode_operations = { const struct inode_operations ext4_file_inode_operations = {

View File

@ -1723,10 +1723,6 @@ static const struct constant_table ext4_param_dax[] = {
{} {}
}; };
/* String parameter that allows empty argument */
#define fsparam_string_empty(NAME, OPT) \
__fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
/* /*
* Mount option specification * Mount option specification
* We don't use fsparam_flag_no because of the way we set the * We don't use fsparam_flag_no because of the way we set the

View File

@ -569,7 +569,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
if (err) if (err)
return err; return err;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; filp->f_mode |= FMODE_NOWAIT;
filp->f_mode |= FMODE_CAN_ODIRECT; filp->f_mode |= FMODE_CAN_ODIRECT;
return dquot_file_open(inode, filp); return dquot_file_open(inode, filp);
@ -5045,4 +5045,5 @@ const struct file_operations f2fs_file_operations = {
.splice_read = f2fs_file_splice_read, .splice_read = f2fs_file_splice_read,
.splice_write = iter_file_splice_write, .splice_write = iter_file_splice_write,
.fadvise = f2fs_file_fadvise, .fadvise = f2fs_file_fadvise,
.fop_flags = FOP_BUFFER_RASYNC,
}; };

View File

@ -327,6 +327,22 @@ static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
return 0; return 0;
} }
/* Is the file descriptor a dup of the file? */
static long f_dupfd_query(int fd, struct file *filp)
{
CLASS(fd_raw, f)(fd);
/*
* We can do the 'fdput()' immediately, as the only thing that
* matters is the pointer value which isn't changed by the fdput.
*
* Technically we didn't need a ref at all, and 'fdget()' was
* overkill, but given our lockless file pointer lookup, the
* alternatives are complicated.
*/
return f.file == filp;
}
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp) struct file *filp)
{ {
@ -342,6 +358,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_DUPFD_CLOEXEC: case F_DUPFD_CLOEXEC:
err = f_dupfd(argi, filp, O_CLOEXEC); err = f_dupfd(argi, filp, O_CLOEXEC);
break; break;
case F_DUPFD_QUERY:
err = f_dupfd_query(argi, filp);
break;
case F_GETFD: case F_GETFD:
err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
break; break;
@ -446,6 +465,7 @@ static int check_fcntl_cmd(unsigned cmd)
switch (cmd) { switch (cmd) {
case F_DUPFD: case F_DUPFD:
case F_DUPFD_CLOEXEC: case F_DUPFD_CLOEXEC:
case F_DUPFD_QUERY:
case F_GETFD: case F_GETFD:
case F_SETFD: case F_SETFD:
case F_GETFL: case F_GETFL:

View File

@ -36,7 +36,7 @@ static long do_sys_name_to_handle(const struct path *path,
if (f_handle.handle_bytes > MAX_HANDLE_SZ) if (f_handle.handle_bytes > MAX_HANDLE_SZ)
return -EINVAL; return -EINVAL;
handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
GFP_KERNEL); GFP_KERNEL);
if (!handle) if (!handle)
return -ENOMEM; return -ENOMEM;
@ -71,7 +71,7 @@ static long do_sys_name_to_handle(const struct path *path,
/* copy the mount id */ /* copy the mount id */
if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
copy_to_user(ufh, handle, copy_to_user(ufh, handle,
sizeof(struct file_handle) + handle_bytes)) struct_size(handle, f_handle, handle_bytes)))
retval = -EFAULT; retval = -EFAULT;
kfree(handle); kfree(handle);
return retval; return retval;
@ -192,7 +192,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
retval = -EINVAL; retval = -EINVAL;
goto out_err; goto out_err;
} }
handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
GFP_KERNEL); GFP_KERNEL);
if (!handle) { if (!handle) {
retval = -ENOMEM; retval = -ENOMEM;

View File

@ -166,8 +166,7 @@ static void wb_wakeup_delayed(struct bdi_writeback *wb)
spin_unlock_irq(&wb->work_lock); spin_unlock_irq(&wb->work_lock);
} }
static void finish_writeback_work(struct bdi_writeback *wb, static void finish_writeback_work(struct wb_writeback_work *work)
struct wb_writeback_work *work)
{ {
struct wb_completion *done = work->done; struct wb_completion *done = work->done;
@ -196,7 +195,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
list_add_tail(&work->list, &wb->work_list); list_add_tail(&work->list, &wb->work_list);
mod_delayed_work(bdi_wq, &wb->dwork, 0); mod_delayed_work(bdi_wq, &wb->dwork, 0);
} else } else
finish_writeback_work(wb, work); finish_writeback_work(work);
spin_unlock_irq(&wb->work_lock); spin_unlock_irq(&wb->work_lock);
} }
@ -1561,7 +1560,8 @@ static void inode_sleep_on_writeback(struct inode *inode)
* thread's back can have unexpected consequences. * thread's back can have unexpected consequences.
*/ */
static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
struct writeback_control *wbc) struct writeback_control *wbc,
unsigned long dirtied_before)
{ {
if (inode->i_state & I_FREEING) if (inode->i_state & I_FREEING)
return; return;
@ -1594,7 +1594,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* We didn't write back all the pages. nfs_writepages() * We didn't write back all the pages. nfs_writepages()
* sometimes bales out without doing anything. * sometimes bales out without doing anything.
*/ */
if (wbc->nr_to_write <= 0) { if (wbc->nr_to_write <= 0 &&
!inode_dirtied_after(inode, dirtied_before)) {
/* Slice used up. Queue for next turn. */ /* Slice used up. Queue for next turn. */
requeue_io(inode, wb); requeue_io(inode, wb);
} else { } else {
@ -1862,6 +1863,11 @@ static long writeback_sb_inodes(struct super_block *sb,
unsigned long start_time = jiffies; unsigned long start_time = jiffies;
long write_chunk; long write_chunk;
long total_wrote = 0; /* count both pages and inodes */ long total_wrote = 0; /* count both pages and inodes */
unsigned long dirtied_before = jiffies;
if (work->for_kupdate)
dirtied_before = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
while (!list_empty(&wb->b_io)) { while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev); struct inode *inode = wb_inode(wb->b_io.prev);
@ -1967,7 +1973,7 @@ static long writeback_sb_inodes(struct super_block *sb,
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (!(inode->i_state & I_DIRTY_ALL)) if (!(inode->i_state & I_DIRTY_ALL))
total_wrote++; total_wrote++;
requeue_inode(inode, tmp_wb, &wbc); requeue_inode(inode, tmp_wb, &wbc, dirtied_before);
inode_sync_complete(inode); inode_sync_complete(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
@ -2069,6 +2075,7 @@ static long wb_writeback(struct bdi_writeback *wb,
struct inode *inode; struct inode *inode;
long progress; long progress;
struct blk_plug plug; struct blk_plug plug;
bool queued = false;
blk_start_plug(&plug); blk_start_plug(&plug);
for (;;) { for (;;) {
@ -2098,21 +2105,24 @@ static long wb_writeback(struct bdi_writeback *wb,
spin_lock(&wb->list_lock); spin_lock(&wb->list_lock);
/*
* Kupdate and background works are special and we want to
* include all inodes that need writing. Livelock avoidance is
* handled by these works yielding to any other work so we are
* safe.
*/
if (work->for_kupdate) {
dirtied_before = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
} else if (work->for_background)
dirtied_before = jiffies;
trace_writeback_start(wb, work); trace_writeback_start(wb, work);
if (list_empty(&wb->b_io)) if (list_empty(&wb->b_io)) {
/*
* Kupdate and background works are special and we want
* to include all inodes that need writing. Livelock
* avoidance is handled by these works yielding to any
* other work so we are safe.
*/
if (work->for_kupdate) {
dirtied_before = jiffies -
msecs_to_jiffies(dirty_expire_interval *
10);
} else if (work->for_background)
dirtied_before = jiffies;
queue_io(wb, work, dirtied_before); queue_io(wb, work, dirtied_before);
queued = true;
}
if (work->sb) if (work->sb)
progress = writeback_sb_inodes(work->sb, wb, work); progress = writeback_sb_inodes(work->sb, wb, work);
else else
@ -2127,7 +2137,7 @@ static long wb_writeback(struct bdi_writeback *wb,
* mean the overall work is done. So we keep looping as long * mean the overall work is done. So we keep looping as long
* as made some progress on cleaning pages or inodes. * as made some progress on cleaning pages or inodes.
*/ */
if (progress) { if (progress || !queued) {
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
continue; continue;
} }
@ -2262,7 +2272,7 @@ static long wb_do_writeback(struct bdi_writeback *wb)
while ((work = get_next_work_item(wb)) != NULL) { while ((work = get_next_work_item(wb)) != NULL) {
trace_writeback_exec(wb, work); trace_writeback_exec(wb, work);
wrote += wb_writeback(wb, work); wrote += wb_writeback(wb, work);
finish_writeback_work(wb, work); finish_writeback_work(work);
} }
/* /*
@ -2322,8 +2332,7 @@ void wb_workfn(struct work_struct *work)
} }
/* /*
* Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero, * Start writeback of all dirty pages on this bdi.
* write back the whole world.
*/ */
static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
enum wb_reason reason) enum wb_reason reason)
@ -2726,7 +2735,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
*/ */
void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
{ {
return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
} }
EXPORT_SYMBOL(writeback_inodes_sb); EXPORT_SYMBOL(writeback_inodes_sb);

View File

@ -40,7 +40,7 @@
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
static const struct address_space_operations hugetlbfs_aops; static const struct address_space_operations hugetlbfs_aops;
const struct file_operations hugetlbfs_file_operations; static const struct file_operations hugetlbfs_file_operations;
static const struct inode_operations hugetlbfs_dir_inode_operations; static const struct inode_operations hugetlbfs_dir_inode_operations;
static const struct inode_operations hugetlbfs_inode_operations; static const struct inode_operations hugetlbfs_inode_operations;
@ -1301,13 +1301,14 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode); inode_init_once(&ei->vfs_inode);
} }
const struct file_operations hugetlbfs_file_operations = { static const struct file_operations hugetlbfs_file_operations = {
.read_iter = hugetlbfs_read_iter, .read_iter = hugetlbfs_read_iter,
.mmap = hugetlbfs_file_mmap, .mmap = hugetlbfs_file_mmap,
.fsync = noop_fsync, .fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area, .get_unmapped_area = hugetlb_get_unmapped_area,
.llseek = default_llseek, .llseek = default_llseek,
.fallocate = hugetlbfs_fallocate, .fallocate = hugetlbfs_fallocate,
.fop_flags = FOP_HUGE_PAGES,
}; };
static const struct inode_operations hugetlbfs_dir_inode_operations = { static const struct inode_operations hugetlbfs_dir_inode_operations = {

View File

@ -1110,6 +1110,9 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
return rc; return rc;
request = PAD(sizeof(struct jffs2_raw_xattr) + strlen(xname) + 1 + size); request = PAD(sizeof(struct jffs2_raw_xattr) + strlen(xname) + 1 + size);
if (request > c->sector_size - c->cleanmarker_size)
return -ERANGE;
rc = jffs2_reserve_space(c, request, &length, rc = jffs2_reserve_space(c, request, &length,
ALLOC_NORMAL, JFFS2_SUMMARY_XATTR_SIZE); ALLOC_NORMAL, JFFS2_SUMMARY_XATTR_SIZE);
if (rc) { if (rc) {

View File

@ -295,6 +295,18 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
return 0; return 0;
} }
static int simple_offset_replace(struct offset_ctx *octx, struct dentry *dentry,
long offset)
{
int ret;
ret = mtree_store(&octx->mt, offset, dentry, GFP_KERNEL);
if (ret)
return ret;
offset_set(dentry, offset);
return 0;
}
/** /**
* simple_offset_remove - Remove an entry to a directory's offset map * simple_offset_remove - Remove an entry to a directory's offset map
* @octx: directory offset ctx to be updated * @octx: directory offset ctx to be updated
@ -345,6 +357,36 @@ int simple_offset_empty(struct dentry *dentry)
return ret; return ret;
} }
/**
* simple_offset_rename - handle directory offsets for rename
* @old_dir: parent directory of source entry
* @old_dentry: dentry of source entry
* @new_dir: parent_directory of destination entry
* @new_dentry: dentry of destination
*
* Caller provides appropriate serialization.
*
* User space expects the directory offset value of the replaced
* (new) directory entry to be unchanged after a rename.
*
* Returns zero on success, a negative errno value on failure.
*/
int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
long new_offset = dentry2offset(new_dentry);
simple_offset_remove(old_ctx, old_dentry);
if (new_offset) {
offset_set(new_dentry, 0);
return simple_offset_replace(new_ctx, old_dentry, new_offset);
}
return simple_offset_add(new_ctx, old_dentry);
}
/** /**
* simple_offset_rename_exchange - exchange rename with directory offsets * simple_offset_rename_exchange - exchange rename with directory offsets
* @old_dir: parent of dentry being moved * @old_dir: parent of dentry being moved
@ -352,6 +394,9 @@ int simple_offset_empty(struct dentry *dentry)
* @new_dir: destination parent * @new_dir: destination parent
* @new_dentry: destination dentry * @new_dentry: destination dentry
* *
* This API preserves the directory offset values. Caller provides
* appropriate serialization.
*
* Returns zero on success. Otherwise a negative errno is returned and the * Returns zero on success. Otherwise a negative errno is returned and the
* rename is rolled back. * rename is rolled back.
*/ */
@ -369,11 +414,11 @@ int simple_offset_rename_exchange(struct inode *old_dir,
simple_offset_remove(old_ctx, old_dentry); simple_offset_remove(old_ctx, old_dentry);
simple_offset_remove(new_ctx, new_dentry); simple_offset_remove(new_ctx, new_dentry);
ret = simple_offset_add(new_ctx, old_dentry); ret = simple_offset_replace(new_ctx, old_dentry, new_index);
if (ret) if (ret)
goto out_restore; goto out_restore;
ret = simple_offset_add(old_ctx, new_dentry); ret = simple_offset_replace(old_ctx, new_dentry, old_index);
if (ret) { if (ret) {
simple_offset_remove(new_ctx, old_dentry); simple_offset_remove(new_ctx, old_dentry);
goto out_restore; goto out_restore;
@ -388,10 +433,8 @@ int simple_offset_rename_exchange(struct inode *old_dir,
return 0; return 0;
out_restore: out_restore:
offset_set(old_dentry, old_index); (void)simple_offset_replace(old_ctx, old_dentry, old_index);
mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL); (void)simple_offset_replace(new_ctx, new_dentry, new_index);
offset_set(new_dentry, new_index);
mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL);
return ret; return ret;
} }

View File

@ -2422,6 +2422,14 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
if (!f.file) if (!f.file)
return ERR_PTR(-EBADF); return ERR_PTR(-EBADF);
if (flags & LOOKUP_LINKAT_EMPTY) {
if (f.file->f_cred != current_cred() &&
!ns_capable(f.file->f_cred->user_ns, CAP_DAC_READ_SEARCH)) {
fdput(f);
return ERR_PTR(-ENOENT);
}
}
dentry = f.file->f_path.dentry; dentry = f.file->f_path.dentry;
if (*s && unlikely(!d_can_lookup(dentry))) { if (*s && unlikely(!d_can_lookup(dentry))) {
@ -4641,14 +4649,13 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
goto out_putnames; goto out_putnames;
} }
/* /*
* To use null names we require CAP_DAC_READ_SEARCH * To use null names we require CAP_DAC_READ_SEARCH or
* that the open-time creds of the dfd matches current.
* This ensures that not everyone will be able to create * This ensures that not everyone will be able to create
* handlink using the passed filedescriptor. * a hardlink using the passed file descriptor.
*/ */
if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) { if (flags & AT_EMPTY_PATH)
error = -ENOENT; how |= LOOKUP_LINKAT_EMPTY;
goto out_putnames;
}
if (flags & AT_SYMLINK_FOLLOW) if (flags & AT_SYMLINK_FOLLOW)
how |= LOOKUP_FOLLOW; how |= LOOKUP_FOLLOW;

View File

@ -60,7 +60,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
if (argv->v_nmembs == 0) if (argv->v_nmembs == 0)
return 0; return 0;
if (argv->v_size > PAGE_SIZE) if ((size_t)argv->v_size > PAGE_SIZE)
return -EINVAL; return -EINVAL;
/* /*

View File

@ -33,9 +33,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW;
new_op->upcall.req.lookup.parent_refn = parent->refn; new_op->upcall.req.lookup.parent_refn = parent->refn;
strncpy(new_op->upcall.req.lookup.d_name, strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name);
dentry->d_name.name,
ORANGEFS_NAME_MAX - 1);
gossip_debug(GOSSIP_DCACHE_DEBUG, gossip_debug(GOSSIP_DCACHE_DEBUG,
"%s:%s:%d interrupt flag [%d]\n", "%s:%s:%d interrupt flag [%d]\n",

View File

@ -41,8 +41,7 @@ static int orangefs_create(struct mnt_idmap *idmap,
fill_default_sys_attrs(new_op->upcall.req.create.attributes, fill_default_sys_attrs(new_op->upcall.req.create.attributes,
ORANGEFS_TYPE_METAFILE, mode); ORANGEFS_TYPE_METAFILE, mode);
strncpy(new_op->upcall.req.create.d_name, strscpy(new_op->upcall.req.create.d_name, dentry->d_name.name);
dentry->d_name.name, ORANGEFS_NAME_MAX - 1);
ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
@ -137,8 +136,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
&parent->refn.khandle); &parent->refn.khandle);
new_op->upcall.req.lookup.parent_refn = parent->refn; new_op->upcall.req.lookup.parent_refn = parent->refn;
strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name);
ORANGEFS_NAME_MAX - 1);
gossip_debug(GOSSIP_NAME_DEBUG, gossip_debug(GOSSIP_NAME_DEBUG,
"%s: doing lookup on %s under %pU,%d\n", "%s: doing lookup on %s under %pU,%d\n",
@ -192,8 +190,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
return -ENOMEM; return -ENOMEM;
new_op->upcall.req.remove.parent_refn = parent->refn; new_op->upcall.req.remove.parent_refn = parent->refn;
strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name, strscpy(new_op->upcall.req.remove.d_name, dentry->d_name.name);
ORANGEFS_NAME_MAX - 1);
ret = service_operation(new_op, "orangefs_unlink", ret = service_operation(new_op, "orangefs_unlink",
get_interruptible_flag(inode)); get_interruptible_flag(inode));
@ -247,10 +244,8 @@ static int orangefs_symlink(struct mnt_idmap *idmap,
ORANGEFS_TYPE_SYMLINK, ORANGEFS_TYPE_SYMLINK,
mode); mode);
strncpy(new_op->upcall.req.sym.entry_name, strscpy(new_op->upcall.req.sym.entry_name, dentry->d_name.name);
dentry->d_name.name, strscpy(new_op->upcall.req.sym.target, symname);
ORANGEFS_NAME_MAX - 1);
strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX - 1);
ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
@ -324,8 +319,7 @@ static int orangefs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes, fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes,
ORANGEFS_TYPE_DIRECTORY, mode); ORANGEFS_TYPE_DIRECTORY, mode);
strncpy(new_op->upcall.req.mkdir.d_name, strscpy(new_op->upcall.req.mkdir.d_name, dentry->d_name.name);
dentry->d_name.name, ORANGEFS_NAME_MAX - 1);
ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
@ -405,12 +399,8 @@ static int orangefs_rename(struct mnt_idmap *idmap,
new_op->upcall.req.rename.old_parent_refn = ORANGEFS_I(old_dir)->refn; new_op->upcall.req.rename.old_parent_refn = ORANGEFS_I(old_dir)->refn;
new_op->upcall.req.rename.new_parent_refn = ORANGEFS_I(new_dir)->refn; new_op->upcall.req.rename.new_parent_refn = ORANGEFS_I(new_dir)->refn;
strncpy(new_op->upcall.req.rename.d_old_name, strscpy(new_op->upcall.req.rename.d_old_name, old_dentry->d_name.name);
old_dentry->d_name.name, strscpy(new_op->upcall.req.rename.d_new_name, new_dentry->d_name.name);
ORANGEFS_NAME_MAX - 1);
strncpy(new_op->upcall.req.rename.d_new_name,
new_dentry->d_name.name,
ORANGEFS_NAME_MAX - 1);
ret = service_operation(new_op, ret = service_operation(new_op,
"orangefs_rename", "orangefs_rename",

View File

@ -253,9 +253,8 @@ int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb)
new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT); new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT);
if (!new_op) if (!new_op)
return -ENOMEM; return -ENOMEM;
strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, strscpy(new_op->upcall.req.fs_mount.orangefs_config_server,
orangefs_sb->devname, orangefs_sb->devname);
ORANGEFS_MAX_SERVER_ADDR_LEN);
gossip_debug(GOSSIP_SUPER_DEBUG, gossip_debug(GOSSIP_SUPER_DEBUG,
"Attempting ORANGEFS Remount via host %s\n", "Attempting ORANGEFS Remount via host %s\n",
@ -400,8 +399,7 @@ static int orangefs_unmount(int id, __s32 fs_id, const char *devname)
return -ENOMEM; return -ENOMEM;
op->upcall.req.fs_umount.id = id; op->upcall.req.fs_umount.id = id;
op->upcall.req.fs_umount.fs_id = fs_id; op->upcall.req.fs_umount.fs_id = fs_id;
strncpy(op->upcall.req.fs_umount.orangefs_config_server, strscpy(op->upcall.req.fs_umount.orangefs_config_server, devname);
devname, ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
r = service_operation(op, "orangefs_fs_umount", 0); r = service_operation(op, "orangefs_fs_umount", 0);
/* Not much to do about an error here. */ /* Not much to do about an error here. */
if (r) if (r)
@ -494,9 +492,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
if (!new_op) if (!new_op)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, strscpy(new_op->upcall.req.fs_mount.orangefs_config_server, devname);
devname,
ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
gossip_debug(GOSSIP_SUPER_DEBUG, gossip_debug(GOSSIP_SUPER_DEBUG,
"Attempting ORANGEFS Mount via host %s\n", "Attempting ORANGEFS Mount via host %s\n",
@ -543,9 +539,8 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
* on successful mount, store the devname and data * on successful mount, store the devname and data
* used * used
*/ */
strncpy(ORANGEFS_SB(sb)->devname, strscpy(ORANGEFS_SB(sb)->devname, devname);
devname,
ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
/* mount_pending must be cleared */ /* mount_pending must be cleared */
ORANGEFS_SB(sb)->mount_pending = 0; ORANGEFS_SB(sb)->mount_pending = 0;

View File

@ -139,10 +139,6 @@ static int ovl_verity_mode_def(void)
return OVL_VERITY_OFF; return OVL_VERITY_OFF;
} }
#define fsparam_string_empty(NAME, OPT) \
__fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
const struct fs_parameter_spec ovl_parameter_spec[] = { const struct fs_parameter_spec ovl_parameter_spec[] = {
fsparam_string_empty("lowerdir", Opt_lowerdir), fsparam_string_empty("lowerdir", Opt_lowerdir),
fsparam_string("lowerdir+", Opt_lowerdir_add), fsparam_string("lowerdir+", Opt_lowerdir_add),

View File

@ -74,7 +74,18 @@ static int seq_show(struct seq_file *m, void *v)
return 0; return 0;
} }
static int proc_fdinfo_access_allowed(struct inode *inode) static int seq_fdinfo_open(struct inode *inode, struct file *file)
{
return single_open(file, seq_show, inode);
}
/**
* Shared /proc/pid/fdinfo and /proc/pid/fdinfo/fd permission helper to ensure
* that the current task has PTRACE_MODE_READ in addition to the normal
* POSIX-like checks.
*/
static int proc_fdinfo_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{ {
bool allowed = false; bool allowed = false;
struct task_struct *task = get_proc_task(inode); struct task_struct *task = get_proc_task(inode);
@ -88,18 +99,13 @@ static int proc_fdinfo_access_allowed(struct inode *inode)
if (!allowed) if (!allowed)
return -EACCES; return -EACCES;
return 0; return generic_permission(idmap, inode, mask);
} }
static int seq_fdinfo_open(struct inode *inode, struct file *file) static const struct inode_operations proc_fdinfo_file_inode_operations = {
{ .permission = proc_fdinfo_permission,
int ret = proc_fdinfo_access_allowed(inode); .setattr = proc_setattr,
};
if (ret)
return ret;
return single_open(file, seq_show, inode);
}
static const struct file_operations proc_fdinfo_file_operations = { static const struct file_operations proc_fdinfo_file_operations = {
.open = seq_fdinfo_open, .open = seq_fdinfo_open,
@ -388,6 +394,8 @@ static struct dentry *proc_fdinfo_instantiate(struct dentry *dentry,
ei = PROC_I(inode); ei = PROC_I(inode);
ei->fd = data->fd; ei->fd = data->fd;
inode->i_op = &proc_fdinfo_file_inode_operations;
inode->i_fop = &proc_fdinfo_file_operations; inode->i_fop = &proc_fdinfo_file_operations;
tid_fd_update_inode(task, inode, 0); tid_fd_update_inode(task, inode, 0);
@ -407,23 +415,13 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx)
proc_fdinfo_instantiate); proc_fdinfo_instantiate);
} }
static int proc_open_fdinfo(struct inode *inode, struct file *file)
{
int ret = proc_fdinfo_access_allowed(inode);
if (ret)
return ret;
return 0;
}
const struct inode_operations proc_fdinfo_inode_operations = { const struct inode_operations proc_fdinfo_inode_operations = {
.lookup = proc_lookupfdinfo, .lookup = proc_lookupfdinfo,
.permission = proc_fdinfo_permission,
.setattr = proc_setattr, .setattr = proc_setattr,
}; };
const struct file_operations proc_fdinfo_operations = { const struct file_operations proc_fdinfo_operations = {
.open = proc_open_fdinfo,
.read = generic_read_dir, .read = generic_read_dir,
.iterate_shared = proc_readfdinfo, .iterate_shared = proc_readfdinfo,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,

View File

@ -135,6 +135,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data);
* @parent: The parent directory in which to create. * @parent: The parent directory in which to create.
* @ops: The seq_file ops with which to read the file. * @ops: The seq_file ops with which to read the file.
* @write: The write method with which to 'modify' the file. * @write: The write method with which to 'modify' the file.
* @state_size: The size of the per-file private state to allocate.
* @data: Data for retrieval by pde_data(). * @data: Data for retrieval by pde_data().
* *
* Create a network namespaced proc file in the @parent directory with the * Create a network namespaced proc file in the @parent directory with the

View File

@ -1685,7 +1685,7 @@ int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
if ((iocb->ki_flags & IOCB_NOWAIT) && if ((iocb->ki_flags & IOCB_NOWAIT) &&
!((iocb->ki_flags & IOCB_DIRECT) || !((iocb->ki_flags & IOCB_DIRECT) ||
(file->f_mode & FMODE_BUF_WASYNC))) (file->f_op->fop_flags & FOP_BUFFER_WASYNC)))
return -EINVAL; return -EINVAL;
return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count); return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);

View File

@ -669,18 +669,11 @@ void seq_putc(struct seq_file *m, char c)
} }
EXPORT_SYMBOL(seq_putc); EXPORT_SYMBOL(seq_putc);
void seq_puts(struct seq_file *m, const char *s) void __seq_puts(struct seq_file *m, const char *s)
{ {
int len = strlen(s); seq_write(m, s, strlen(s));
if (m->count + len >= m->size) {
seq_set_overflow(m);
return;
}
memcpy(m->buf + m->count, s, len);
m->count += len;
} }
EXPORT_SYMBOL(seq_puts); EXPORT_SYMBOL(__seq_puts);
/** /**
* seq_put_decimal_ull_width - A helper routine for putting decimal numbers * seq_put_decimal_ull_width - A helper routine for putting decimal numbers

View File

@ -658,6 +658,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_mnt_id = stat->mnt_id; tmp.stx_mnt_id = stat->mnt_id;
tmp.stx_dio_mem_align = stat->dio_mem_align; tmp.stx_dio_mem_align = stat->dio_mem_align;
tmp.stx_dio_offset_align = stat->dio_offset_align; tmp.stx_dio_offset_align = stat->dio_offset_align;
tmp.stx_subvol = stat->subvol;
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
} }

View File

@ -1230,8 +1230,7 @@ xfs_file_open(
{ {
if (xfs_is_shutdown(XFS_M(inode->i_sb))) if (xfs_is_shutdown(XFS_M(inode->i_sb)))
return -EIO; return -EIO;
file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC | file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
return generic_file_open(inode, file); return generic_file_open(inode, file);
} }
@ -1244,7 +1243,9 @@ xfs_dir_open(
unsigned int mode; unsigned int mode;
int error; int error;
error = xfs_file_open(inode, file); if (xfs_is_shutdown(ip->i_mount))
return -EIO;
error = generic_file_open(inode, file);
if (error) if (error)
return error; return error;
@ -1490,7 +1491,6 @@ const struct file_operations xfs_file_operations = {
.compat_ioctl = xfs_file_compat_ioctl, .compat_ioctl = xfs_file_compat_ioctl,
#endif #endif
.mmap = xfs_file_mmap, .mmap = xfs_file_mmap,
.mmap_supported_flags = MAP_SYNC,
.open = xfs_file_open, .open = xfs_file_open,
.release = xfs_file_release, .release = xfs_file_release,
.fsync = xfs_file_fsync, .fsync = xfs_file_fsync,
@ -1498,6 +1498,8 @@ const struct file_operations xfs_file_operations = {
.fallocate = xfs_file_fallocate, .fallocate = xfs_file_fallocate,
.fadvise = xfs_file_fadvise, .fadvise = xfs_file_fadvise,
.remap_file_range = xfs_file_remap_range, .remap_file_range = xfs_file_remap_range,
.fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC |
FOP_BUFFER_WASYNC | FOP_DIO_PARALLEL_WRITE,
}; };
const struct file_operations xfs_dir_file_operations = { const struct file_operations xfs_dir_file_operations = {

View File

@ -9,12 +9,17 @@
#ifndef _LINUX_ANON_INODES_H #ifndef _LINUX_ANON_INODES_H
#define _LINUX_ANON_INODES_H #define _LINUX_ANON_INODES_H
#include <linux/types.h>
struct file_operations; struct file_operations;
struct inode; struct inode;
struct file *anon_inode_getfile(const char *name, struct file *anon_inode_getfile(const char *name,
const struct file_operations *fops, const struct file_operations *fops,
void *priv, int flags); void *priv, int flags);
struct file *anon_inode_getfile_fmode(const char *name,
const struct file_operations *fops,
void *priv, int flags, fmode_t f_mode);
struct file *anon_inode_create_getfile(const char *name, struct file *anon_inode_create_getfile(const char *name,
const struct file_operations *fops, const struct file_operations *fops,
void *priv, int flags, void *priv, int flags,

View File

@ -84,6 +84,7 @@ static inline void fdput_pos(struct fd f)
} }
DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd) DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd)
DEFINE_CLASS(fd_raw, struct fd, fdput(_T), fdget_raw(fd), int fd)
extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags);

View File

@ -110,23 +110,26 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
*/ */
/* file is open for reading */ /* file is open for reading */
#define FMODE_READ ((__force fmode_t)0x1) #define FMODE_READ ((__force fmode_t)(1 << 0))
/* file is open for writing */ /* file is open for writing */
#define FMODE_WRITE ((__force fmode_t)0x2) #define FMODE_WRITE ((__force fmode_t)(1 << 1))
/* file is seekable */ /* file is seekable */
#define FMODE_LSEEK ((__force fmode_t)0x4) #define FMODE_LSEEK ((__force fmode_t)(1 << 2))
/* file can be accessed using pread */ /* file can be accessed using pread */
#define FMODE_PREAD ((__force fmode_t)0x8) #define FMODE_PREAD ((__force fmode_t)(1 << 3))
/* file can be accessed using pwrite */ /* file can be accessed using pwrite */
#define FMODE_PWRITE ((__force fmode_t)0x10) #define FMODE_PWRITE ((__force fmode_t)(1 << 4))
/* File is opened for execution with sys_execve / sys_uselib */ /* File is opened for execution with sys_execve / sys_uselib */
#define FMODE_EXEC ((__force fmode_t)0x20) #define FMODE_EXEC ((__force fmode_t)(1 << 5))
/* File writes are restricted (block device specific) */ /* File writes are restricted (block device specific) */
#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40) #define FMODE_WRITE_RESTRICTED ((__force fmode_t)(1 << 6))
/* FMODE_* bits 7 to 8 */
/* 32bit hashes as llseek() offset (for directories) */ /* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)0x200) #define FMODE_32BITHASH ((__force fmode_t)(1 << 9))
/* 64bit hashes as llseek() offset (for directories) */ /* 64bit hashes as llseek() offset (for directories) */
#define FMODE_64BITHASH ((__force fmode_t)0x400) #define FMODE_64BITHASH ((__force fmode_t)(1 << 10))
/* /*
* Don't update ctime and mtime. * Don't update ctime and mtime.
@ -134,60 +137,53 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
* Currently a special hack for the XFS open_by_handle ioctl, but we'll * Currently a special hack for the XFS open_by_handle ioctl, but we'll
* hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
*/ */
#define FMODE_NOCMTIME ((__force fmode_t)0x800) #define FMODE_NOCMTIME ((__force fmode_t)(1 << 11))
/* Expect random access pattern */ /* Expect random access pattern */
#define FMODE_RANDOM ((__force fmode_t)0x1000) #define FMODE_RANDOM ((__force fmode_t)(1 << 12))
/* File is huge (eg. /dev/mem): treat loff_t as unsigned */ /* File is huge (eg. /dev/mem): treat loff_t as unsigned */
#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)(1 << 13))
/* File is opened with O_PATH; almost nothing can be done with it */ /* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH ((__force fmode_t)0x4000) #define FMODE_PATH ((__force fmode_t)(1 << 14))
/* File needs atomic accesses to f_pos */ /* File needs atomic accesses to f_pos */
#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) #define FMODE_ATOMIC_POS ((__force fmode_t)(1 << 15))
/* Write access to underlying fs */ /* Write access to underlying fs */
#define FMODE_WRITER ((__force fmode_t)0x10000) #define FMODE_WRITER ((__force fmode_t)(1 << 16))
/* Has read method(s) */ /* Has read method(s) */
#define FMODE_CAN_READ ((__force fmode_t)0x20000) #define FMODE_CAN_READ ((__force fmode_t)(1 << 17))
/* Has write method(s) */ /* Has write method(s) */
#define FMODE_CAN_WRITE ((__force fmode_t)0x40000) #define FMODE_CAN_WRITE ((__force fmode_t)(1 << 18))
#define FMODE_OPENED ((__force fmode_t)0x80000) #define FMODE_OPENED ((__force fmode_t)(1 << 19))
#define FMODE_CREATED ((__force fmode_t)0x100000) #define FMODE_CREATED ((__force fmode_t)(1 << 20))
/* File is stream-like */ /* File is stream-like */
#define FMODE_STREAM ((__force fmode_t)0x200000) #define FMODE_STREAM ((__force fmode_t)(1 << 21))
/* File supports DIRECT IO */ /* File supports DIRECT IO */
#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000) #define FMODE_CAN_ODIRECT ((__force fmode_t)(1 << 22))
#define FMODE_NOREUSE ((__force fmode_t)0x800000) #define FMODE_NOREUSE ((__force fmode_t)(1 << 23))
/* File supports non-exclusive O_DIRECT writes from multiple threads */ /* FMODE_* bit 24 */
#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000)
/* File is embedded in backing_file object */ /* File is embedded in backing_file object */
#define FMODE_BACKING ((__force fmode_t)0x2000000) #define FMODE_BACKING ((__force fmode_t)(1 << 25))
/* File was opened by fanotify and shouldn't generate fanotify events */ /* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000) #define FMODE_NONOTIFY ((__force fmode_t)(1 << 26))
/* File is capable of returning -EAGAIN if I/O will block */ /* File is capable of returning -EAGAIN if I/O will block */
#define FMODE_NOWAIT ((__force fmode_t)0x8000000) #define FMODE_NOWAIT ((__force fmode_t)(1 << 27))
/* File represents mount that needs unmounting */ /* File represents mount that needs unmounting */
#define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000) #define FMODE_NEED_UNMOUNT ((__force fmode_t)(1 << 28))
/* File does not contribute to nr_files count */ /* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) #define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29))
/* File supports async buffered reads */
#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
/* File supports async nowait buffered writes */
#define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000)
/* /*
* Attribute flags. These should be or-ed together to figure out what * Attribute flags. These should be or-ed together to figure out what
@ -1035,12 +1031,13 @@ struct file_handle {
__u32 handle_bytes; __u32 handle_bytes;
int handle_type; int handle_type;
/* file identifier */ /* file identifier */
unsigned char f_handle[]; unsigned char f_handle[] __counted_by(handle_bytes);
}; };
static inline struct file *get_file(struct file *f) static inline struct file *get_file(struct file *f)
{ {
atomic_long_inc(&f->f_count); long prior = atomic_long_fetch_inc_relaxed(&f->f_count);
WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n");
return f; return f;
} }
@ -2003,8 +2000,11 @@ struct iov_iter;
struct io_uring_cmd; struct io_uring_cmd;
struct offset_ctx; struct offset_ctx;
typedef unsigned int __bitwise fop_flags_t;
struct file_operations { struct file_operations {
struct module *owner; struct module *owner;
fop_flags_t fop_flags;
loff_t (*llseek) (struct file *, loff_t, int); loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
@ -2017,7 +2017,6 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *); int (*mmap) (struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *); int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id); int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *); int (*release) (struct inode *, struct file *);
@ -2048,6 +2047,17 @@ struct file_operations {
unsigned int poll_flags); unsigned int poll_flags);
} __randomize_layout; } __randomize_layout;
/* Supports async buffered reads */
#define FOP_BUFFER_RASYNC ((__force fop_flags_t)(1 << 0))
/* Supports async buffered writes */
#define FOP_BUFFER_WASYNC ((__force fop_flags_t)(1 << 1))
/* Supports synchronous page faults for mappings */
#define FOP_MMAP_SYNC ((__force fop_flags_t)(1 << 2))
/* Supports non-exclusive O_DIRECT writes from multiple threads */
#define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3))
/* Contains huge pages */
#define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4))
/* Wrap a directory iterator that needs exclusive inode access */ /* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *, int wrap_directory_iterator(struct file *, struct dir_context *,
int (*) (struct file *, struct dir_context *)); int (*) (struct file *, struct dir_context *));
@ -2253,7 +2263,13 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
#ifdef CONFIG_SWAP
#define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
#else
#define IS_SWAPFILE(inode) ((void)(inode), 0U)
#endif
#define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
#define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_IMA(inode) ((inode)->i_flags & S_IMA)
#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
@ -3340,6 +3356,8 @@ void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
int simple_offset_empty(struct dentry *dentry); int simple_offset_empty(struct dentry *dentry);
int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
int simple_offset_rename_exchange(struct inode *old_dir, int simple_offset_rename_exchange(struct inode *old_dir,
struct dentry *old_dentry, struct dentry *old_dentry,
struct inode *new_dir, struct inode *new_dir,

View File

@ -132,4 +132,8 @@ static inline bool fs_validate_description(const char *name,
#define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL)
#define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL)
/* String parameter that allows empty argument */
#define fsparam_string_empty(NAME, OPT) \
__fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
#endif /* _LINUX_FS_PARSER_H */ #endif /* _LINUX_FS_PARSER_H */

View File

@ -554,17 +554,13 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
} }
extern const struct file_operations hugetlbfs_file_operations;
extern const struct vm_operations_struct hugetlb_vm_ops; extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
int creat_flags, int page_size_log); int creat_flags, int page_size_log);
static inline bool is_file_hugepages(struct file *file) static inline bool is_file_hugepages(const struct file *file)
{ {
if (file->f_op == &hugetlbfs_file_operations) return file->f_op->fop_flags & FOP_HUGE_PAGES;
return true;
return is_file_shm_hugepages(file);
} }
static inline struct hstate *hstate_inode(struct inode *i) static inline struct hstate *hstate_inode(struct inode *i)

View File

@ -44,6 +44,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
#define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */
#define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */
#define LOOKUP_CACHED 0x200000 /* Only do cached lookup */ #define LOOKUP_CACHED 0x200000 /* Only do cached lookup */
#define LOOKUP_LINKAT_EMPTY 0x400000 /* Linkat request with empty path. */
/* LOOKUP_* flags which do scope-related checks based on the dirfd. */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */
#define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT)

View File

@ -118,7 +118,18 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
__printf(2, 3) __printf(2, 3)
void seq_printf(struct seq_file *m, const char *fmt, ...); void seq_printf(struct seq_file *m, const char *fmt, ...);
void seq_putc(struct seq_file *m, char c); void seq_putc(struct seq_file *m, char c);
void seq_puts(struct seq_file *m, const char *s); void __seq_puts(struct seq_file *m, const char *s);
static __always_inline void seq_puts(struct seq_file *m, const char *s)
{
if (!__builtin_constant_p(*s))
__seq_puts(m, s);
else if (s[0] && !s[1])
seq_putc(m, s[0]);
else
seq_write(m, s, __builtin_strlen(s));
}
void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
unsigned long long num, unsigned int width); unsigned long long num, unsigned int width);
void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,

View File

@ -16,7 +16,6 @@ struct sysv_shm {
long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
unsigned long shmlba); unsigned long shmlba);
bool is_file_shm_hugepages(struct file *file);
void exit_shm(struct task_struct *task); void exit_shm(struct task_struct *task);
#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist) #define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
#else #else
@ -30,10 +29,6 @@ static inline long do_shmat(int shmid, char __user *shmaddr,
{ {
return -ENOSYS; return -ENOSYS;
} }
static inline bool is_file_shm_hugepages(struct file *file)
{
return false;
}
static inline void exit_shm(struct task_struct *task) static inline void exit_shm(struct task_struct *task)
{ {
} }

View File

@ -53,6 +53,7 @@ struct kstat {
u32 dio_mem_align; u32 dio_mem_align;
u32 dio_offset_align; u32 dio_offset_align;
u64 change_cookie; u64 change_cookie;
u64 subvol;
}; };
/* These definitions are internal to the kernel for now. Mainly used by nfsd. */ /* These definitions are internal to the kernel for now. Mainly used by nfsd. */

View File

@ -8,6 +8,14 @@
#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0) #define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1) #define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
/*
* Request nofications on a directory.
* See below for events that may be notified.
*/
#define F_NOTIFY (F_LINUX_SPECIFIC_BASE + 2)
#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
/* /*
* Cancel a blocking posix lock; internal use only until we expose an * Cancel a blocking posix lock; internal use only until we expose an
* asynchronous lock api to userspace: * asynchronous lock api to userspace:
@ -17,12 +25,6 @@
/* Create a file descriptor with FD_CLOEXEC set. */ /* Create a file descriptor with FD_CLOEXEC set. */
#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6) #define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
/*
* Request nofications on a directory.
* See below for events that may be notified.
*/
#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2)
/* /*
* Set and get of pipe page size array * Set and get of pipe page size array
*/ */

View File

@ -126,8 +126,9 @@ struct statx {
__u64 stx_mnt_id; __u64 stx_mnt_id;
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
__u64 stx_subvol; /* Subvolume identifier */
/* 0xa0 */ /* 0xa0 */
__u64 __spare3[12]; /* Spare space for future expansion */ __u64 __spare3[11]; /* Spare space for future expansion */
/* 0x100 */ /* 0x100 */
}; };
@ -155,6 +156,7 @@ struct statx {
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */

View File

@ -471,7 +471,7 @@ static void io_prep_async_work(struct io_kiocb *req)
/* don't serialize this request if the fs doesn't need it */ /* don't serialize this request if the fs doesn't need it */
if (should_hash && (req->file->f_flags & O_DIRECT) && if (should_hash && (req->file->f_flags & O_DIRECT) &&
(req->file->f_mode & FMODE_DIO_PARALLEL_WRITE)) (req->file->f_op->fop_flags & FOP_DIO_PARALLEL_WRITE))
should_hash = false; should_hash = false;
if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL)) if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file)); io_wq_hash_work(&req->work, file_inode(req->file));

View File

@ -683,7 +683,8 @@ static bool io_rw_should_retry(struct io_kiocb *req)
* just use poll if we can, and don't attempt if the fs doesn't * just use poll if we can, and don't attempt if the fs doesn't
* support callback based unlocks * support callback based unlocks
*/ */
if (io_file_can_poll(req) || !(req->file->f_mode & FMODE_BUF_RASYNC)) if (io_file_can_poll(req) ||
!(req->file->f_op->fop_flags & FOP_BUFFER_RASYNC))
return false; return false;
wait->wait.func = io_async_buf_func; wait->wait.func = io_async_buf_func;
@ -1029,10 +1030,10 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(!io_file_supports_nowait(req))) if (unlikely(!io_file_supports_nowait(req)))
goto copy_iov; goto copy_iov;
/* File path supports NOWAIT for non-direct_IO only for block devices. */ /* Check if we can support NOWAIT. */
if (!(kiocb->ki_flags & IOCB_DIRECT) && if (!(kiocb->ki_flags & IOCB_DIRECT) &&
!(kiocb->ki_filp->f_mode & FMODE_BUF_WASYNC) && !(req->file->f_op->fop_flags & FOP_BUFFER_WASYNC) &&
(req->flags & REQ_F_ISREG)) (req->flags & REQ_F_ISREG))
goto copy_iov; goto copy_iov;
kiocb->ki_flags |= IOCB_NOWAIT; kiocb->ki_flags |= IOCB_NOWAIT;

View File

@ -662,8 +662,8 @@ static const struct file_operations shm_file_operations = {
}; };
/* /*
* shm_file_operations_huge is now identical to shm_file_operations, * shm_file_operations_huge is now identical to shm_file_operations
* but we keep it distinct for the sake of is_file_shm_hugepages(). * except for fop_flags
*/ */
static const struct file_operations shm_file_operations_huge = { static const struct file_operations shm_file_operations_huge = {
.mmap = shm_mmap, .mmap = shm_mmap,
@ -672,13 +672,9 @@ static const struct file_operations shm_file_operations_huge = {
.get_unmapped_area = shm_get_unmapped_area, .get_unmapped_area = shm_get_unmapped_area,
.llseek = noop_llseek, .llseek = noop_llseek,
.fallocate = shm_fallocate, .fallocate = shm_fallocate,
.fop_flags = FOP_HUGE_PAGES,
}; };
bool is_file_shm_hugepages(struct file *file)
{
return file->f_op == &shm_file_operations_huge;
}
static const struct vm_operations_struct shm_vm_ops = { static const struct vm_operations_struct shm_vm_ops = {
.open = shm_open, /* callback for a new vm-area open */ .open = shm_open, /* callback for a new vm-area open */
.close = shm_close, /* callback for when the vm-area is released */ .close = shm_close, /* callback for when the vm-area is released */

View File

@ -1294,7 +1294,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (!file_mmap_ok(file, inode, pgoff, len)) if (!file_mmap_ok(file, inode, pgoff, len))
return -EOVERFLOW; return -EOVERFLOW;
flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags; flags_mask = LEGACY_MAP_MASK;
if (file->f_op->fop_flags & FOP_MMAP_SYNC)
flags_mask |= MAP_SYNC;
switch (flags & MAP_TYPE) { switch (flags & MAP_TYPE) {
case MAP_SHARED: case MAP_SHARED:

View File

@ -3467,8 +3467,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
return error; return error;
} }
simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry); error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry);
error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry);
if (error) if (error)
return error; return error;

View File

@ -17,6 +17,15 @@
#include "../kselftest_harness.h" #include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h" #include "../clone3/clone3_selftests.h"
#ifndef F_LINUX_SPECIFIC_BASE
#define F_LINUX_SPECIFIC_BASE 1024
#endif
#ifndef F_DUPFD_QUERY
#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
#endif
static inline int sys_close_range(unsigned int fd, unsigned int max_fd, static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags) unsigned int flags)
{ {
@ -45,6 +54,15 @@ TEST(core_close_range)
SKIP(return, "close_range() syscall not supported"); SKIP(return, "close_range() syscall not supported");
} }
for (i = 0; i < 100; i++) {
ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]);
if (ret < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(ret, 0);
}
}
EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
for (i = 0; i <= 50; i++) for (i = 0; i <= 50; i++)
@ -358,7 +376,7 @@ TEST(close_range_cloexec_unshare)
*/ */
TEST(close_range_cloexec_syzbot) TEST(close_range_cloexec_syzbot)
{ {
int fd1, fd2, fd3, flags, ret, status; int fd1, fd2, fd3, fd4, flags, ret, status;
pid_t pid; pid_t pid;
struct __clone_args args = { struct __clone_args args = {
.flags = CLONE_FILES, .flags = CLONE_FILES,
@ -372,6 +390,13 @@ TEST(close_range_cloexec_syzbot)
fd2 = dup2(fd1, 1000); fd2 = dup2(fd1, 1000);
EXPECT_GT(fd2, 0); EXPECT_GT(fd2, 0);
flags = fcntl(fd1, F_DUPFD_QUERY, fd2);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 1);
}
pid = sys_clone3(&args, sizeof(args)); pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0); ASSERT_GE(pid, 0);
@ -396,6 +421,15 @@ TEST(close_range_cloexec_syzbot)
fd3 = dup2(fd1, 42); fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0); EXPECT_GT(fd3, 0);
flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 1);
}
/* /*
* Duplicating the file descriptor must remove the * Duplicating the file descriptor must remove the
* FD_CLOEXEC flag. * FD_CLOEXEC flag.
@ -426,6 +460,24 @@ TEST(close_range_cloexec_syzbot)
fd3 = dup2(fd1, 42); fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0); EXPECT_GT(fd3, 0);
flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 1);
}
fd4 = open("/dev/null", O_RDWR);
EXPECT_GT(fd4, 0);
/* Same inode, different file pointers. */
flags = fcntl(fd1, F_DUPFD_QUERY, fd4);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 0);
}
flags = fcntl(fd3, F_GETFD); flags = fcntl(fd3, F_GETFD);
EXPECT_GT(flags, -1); EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0); EXPECT_EQ(flags & FD_CLOEXEC, 0);
@ -433,6 +485,7 @@ TEST(close_range_cloexec_syzbot)
EXPECT_EQ(close(fd1), 0); EXPECT_EQ(close(fd1), 0);
EXPECT_EQ(close(fd2), 0); EXPECT_EQ(close(fd2), 0);
EXPECT_EQ(close(fd3), 0); EXPECT_EQ(close(fd3), 0);
EXPECT_EQ(close(fd4), 0);
} }
/* /*