mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 02:05:33 +00:00
7bb46a6734
Introduce a new truncate calling sequence into fs/mm subsystems. Rather than setattr > vmtruncate > truncate, have filesystems call their truncate sequence from ->setattr if filesystem specific operations are required. vmtruncate is deprecated, and truncate_pagecache and inode_newsize_ok helpers introduced previously should be used. simple_setattr is introduced for simple in-ram filesystems to implement the new truncate sequence. Eventually all filesystems should be converted to implement a setattr, and the default code in notify_change should go away. simple_setsize is also introduced to perform just the ATTR_SIZE portion of simple_setattr (ie. changing i_size and trimming pagecache). To implement the new truncate sequence: - filesystem specific manipulations (eg freeing blocks) must be done in the setattr method rather than ->truncate. - vmtruncate can not be used by core code to trim blocks past i_size in the event of write failure after allocation, so this must be performed in the fs code. - convert usage of helpers block_write_begin, nobh_write_begin, cont_write_begin, and *blockdev_direct_IO* to use _newtrunc postfixed variants. These avoid calling vmtruncate to trim blocks (see previous). - inode_setattr should not be used. generic_setattr is a new function to be used to copy simple attributes into the generic inode. - make use of the better opportunity to handle errors with the new sequence. Big problem with the previous calling sequence: the filesystem is not called until i_size has already changed. This means it is not allowed to fail the call, and also it does not know what the previous i_size was. Also, generic code calling vmtruncate to truncate allocated blocks in case of error had no good way to return a meaningful error (or, for example, atomically handle block deallocation). Cc: Christoph Hellwig <hch@lst.de> Acked-by: Jan Kara <jack@suse.cz> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
258 lines
6.7 KiB
C
258 lines
6.7 KiB
C
/*
|
|
* linux/fs/attr.c
|
|
*
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
* changes by Thomas Schoebel-Theuer
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/time.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/string.h>
|
|
#include <linux/capability.h>
|
|
#include <linux/fsnotify.h>
|
|
#include <linux/fcntl.h>
|
|
#include <linux/security.h>
|
|
|
|
/* Taken over from the old code... */
|
|
|
|
/* POSIX UID/GID verification for setting inode attributes. */
|
|
int inode_change_ok(const struct inode *inode, struct iattr *attr)
|
|
{
|
|
int retval = -EPERM;
|
|
unsigned int ia_valid = attr->ia_valid;
|
|
|
|
/* If force is set do it anyway. */
|
|
if (ia_valid & ATTR_FORCE)
|
|
goto fine;
|
|
|
|
/* Make sure a caller can chown. */
|
|
if ((ia_valid & ATTR_UID) &&
|
|
(current_fsuid() != inode->i_uid ||
|
|
attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
|
|
goto error;
|
|
|
|
/* Make sure caller can chgrp. */
|
|
if ((ia_valid & ATTR_GID) &&
|
|
(current_fsuid() != inode->i_uid ||
|
|
(!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
|
|
!capable(CAP_CHOWN))
|
|
goto error;
|
|
|
|
/* Make sure a caller can chmod. */
|
|
if (ia_valid & ATTR_MODE) {
|
|
if (!is_owner_or_cap(inode))
|
|
goto error;
|
|
/* Also check the setgid bit! */
|
|
if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
|
|
inode->i_gid) && !capable(CAP_FSETID))
|
|
attr->ia_mode &= ~S_ISGID;
|
|
}
|
|
|
|
/* Check for setting the inode time. */
|
|
if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
|
|
if (!is_owner_or_cap(inode))
|
|
goto error;
|
|
}
|
|
fine:
|
|
retval = 0;
|
|
error:
|
|
return retval;
|
|
}
|
|
EXPORT_SYMBOL(inode_change_ok);
|
|
|
|
/**
|
|
* inode_newsize_ok - may this inode be truncated to a given size
|
|
* @inode: the inode to be truncated
|
|
* @offset: the new size to assign to the inode
|
|
* @Returns: 0 on success, -ve errno on failure
|
|
*
|
|
* inode_newsize_ok must be called with i_mutex held.
|
|
*
|
|
* inode_newsize_ok will check filesystem limits and ulimits to check that the
|
|
* new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
|
|
* when necessary. Caller must not proceed with inode size change if failure is
|
|
* returned. @inode must be a file (not directory), with appropriate
|
|
* permissions to allow truncate (inode_newsize_ok does NOT check these
|
|
* conditions).
|
|
*/
|
|
int inode_newsize_ok(const struct inode *inode, loff_t offset)
|
|
{
|
|
if (inode->i_size < offset) {
|
|
unsigned long limit;
|
|
|
|
limit = rlimit(RLIMIT_FSIZE);
|
|
if (limit != RLIM_INFINITY && offset > limit)
|
|
goto out_sig;
|
|
if (offset > inode->i_sb->s_maxbytes)
|
|
goto out_big;
|
|
} else {
|
|
/*
|
|
* truncation of in-use swapfiles is disallowed - it would
|
|
* cause subsequent swapout to scribble on the now-freed
|
|
* blocks.
|
|
*/
|
|
if (IS_SWAPFILE(inode))
|
|
return -ETXTBSY;
|
|
}
|
|
|
|
return 0;
|
|
out_sig:
|
|
send_sig(SIGXFSZ, current, 0);
|
|
out_big:
|
|
return -EFBIG;
|
|
}
|
|
EXPORT_SYMBOL(inode_newsize_ok);
|
|
|
|
/**
|
|
* generic_setattr - copy simple metadata updates into the generic inode
|
|
* @inode: the inode to be updated
|
|
* @attr: the new attributes
|
|
*
|
|
* generic_setattr must be called with i_mutex held.
|
|
*
|
|
* generic_setattr updates the inode's metadata with that specified
|
|
* in attr. Noticably missing is inode size update, which is more complex
|
|
* as it requires pagecache updates. See simple_setsize.
|
|
*
|
|
* The inode is not marked as dirty after this operation. The rationale is
|
|
* that for "simple" filesystems, the struct inode is the inode storage.
|
|
* The caller is free to mark the inode dirty afterwards if needed.
|
|
*/
|
|
void generic_setattr(struct inode *inode, const struct iattr *attr)
|
|
{
|
|
unsigned int ia_valid = attr->ia_valid;
|
|
|
|
if (ia_valid & ATTR_UID)
|
|
inode->i_uid = attr->ia_uid;
|
|
if (ia_valid & ATTR_GID)
|
|
inode->i_gid = attr->ia_gid;
|
|
if (ia_valid & ATTR_ATIME)
|
|
inode->i_atime = timespec_trunc(attr->ia_atime,
|
|
inode->i_sb->s_time_gran);
|
|
if (ia_valid & ATTR_MTIME)
|
|
inode->i_mtime = timespec_trunc(attr->ia_mtime,
|
|
inode->i_sb->s_time_gran);
|
|
if (ia_valid & ATTR_CTIME)
|
|
inode->i_ctime = timespec_trunc(attr->ia_ctime,
|
|
inode->i_sb->s_time_gran);
|
|
if (ia_valid & ATTR_MODE) {
|
|
umode_t mode = attr->ia_mode;
|
|
|
|
if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
|
|
mode &= ~S_ISGID;
|
|
inode->i_mode = mode;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(generic_setattr);
|
|
|
|
/*
|
|
* note this function is deprecated, the new truncate sequence should be
|
|
* used instead -- see eg. simple_setsize, generic_setattr.
|
|
*/
|
|
int inode_setattr(struct inode *inode, const struct iattr *attr)
|
|
{
|
|
unsigned int ia_valid = attr->ia_valid;
|
|
|
|
if (ia_valid & ATTR_SIZE &&
|
|
attr->ia_size != i_size_read(inode)) {
|
|
int error;
|
|
|
|
error = vmtruncate(inode, attr->ia_size);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
generic_setattr(inode, attr);
|
|
|
|
mark_inode_dirty(inode);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(inode_setattr);
|
|
|
|
int notify_change(struct dentry * dentry, struct iattr * attr)
|
|
{
|
|
struct inode *inode = dentry->d_inode;
|
|
mode_t mode = inode->i_mode;
|
|
int error;
|
|
struct timespec now;
|
|
unsigned int ia_valid = attr->ia_valid;
|
|
|
|
if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
|
|
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
|
|
return -EPERM;
|
|
}
|
|
|
|
now = current_fs_time(inode->i_sb);
|
|
|
|
attr->ia_ctime = now;
|
|
if (!(ia_valid & ATTR_ATIME_SET))
|
|
attr->ia_atime = now;
|
|
if (!(ia_valid & ATTR_MTIME_SET))
|
|
attr->ia_mtime = now;
|
|
if (ia_valid & ATTR_KILL_PRIV) {
|
|
attr->ia_valid &= ~ATTR_KILL_PRIV;
|
|
ia_valid &= ~ATTR_KILL_PRIV;
|
|
error = security_inode_need_killpriv(dentry);
|
|
if (error > 0)
|
|
error = security_inode_killpriv(dentry);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* We now pass ATTR_KILL_S*ID to the lower level setattr function so
|
|
* that the function has the ability to reinterpret a mode change
|
|
* that's due to these bits. This adds an implicit restriction that
|
|
* no function will ever call notify_change with both ATTR_MODE and
|
|
* ATTR_KILL_S*ID set.
|
|
*/
|
|
if ((ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) &&
|
|
(ia_valid & ATTR_MODE))
|
|
BUG();
|
|
|
|
if (ia_valid & ATTR_KILL_SUID) {
|
|
if (mode & S_ISUID) {
|
|
ia_valid = attr->ia_valid |= ATTR_MODE;
|
|
attr->ia_mode = (inode->i_mode & ~S_ISUID);
|
|
}
|
|
}
|
|
if (ia_valid & ATTR_KILL_SGID) {
|
|
if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
|
|
if (!(ia_valid & ATTR_MODE)) {
|
|
ia_valid = attr->ia_valid |= ATTR_MODE;
|
|
attr->ia_mode = inode->i_mode;
|
|
}
|
|
attr->ia_mode &= ~S_ISGID;
|
|
}
|
|
}
|
|
if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
|
|
return 0;
|
|
|
|
error = security_inode_setattr(dentry, attr);
|
|
if (error)
|
|
return error;
|
|
|
|
if (ia_valid & ATTR_SIZE)
|
|
down_write(&dentry->d_inode->i_alloc_sem);
|
|
|
|
if (inode->i_op && inode->i_op->setattr) {
|
|
error = inode->i_op->setattr(dentry, attr);
|
|
} else {
|
|
error = inode_change_ok(inode, attr);
|
|
if (!error)
|
|
error = inode_setattr(inode, attr);
|
|
}
|
|
|
|
if (ia_valid & ATTR_SIZE)
|
|
up_write(&dentry->d_inode->i_alloc_sem);
|
|
|
|
if (!error)
|
|
fsnotify_change(dentry, ia_valid);
|
|
|
|
return error;
|
|
}
|
|
|
|
EXPORT_SYMBOL(notify_change);
|