mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
7f016edaa0
Merely checking if the directory is encrypted happens for every open when using ext4, at the moment refing and unrefing the parent, costing 2 atomics and serializing opens of different files. The most common case of encryption not being used can be checked for with RCU instead. Sample result from open1_processes -t 20 ("Separate file open/close") from will-it-scale on Sapphire Rapids (ops/s): before: 12539898 after: 25575494 (+103%) v2: - add a comment justifying rcu usage, submitted by Eric Biggers - whack spurious IS_ENCRYPTED check from the refed case Signed-off-by: Mateusz Guzik <mjguzik@gmail.com> Link: https://lore.kernel.org/r/20240508081400.422212-1-mjguzik@gmail.com Signed-off-by: Eric Biggers <ebiggers@google.com>
472 lines
15 KiB
C
472 lines
15 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* fs/crypto/hooks.c
|
|
*
|
|
* Encryption hooks for higher-level filesystem operations.
|
|
*/
|
|
|
|
#include "fscrypt_private.h"
|
|
|
|
/**
|
|
* fscrypt_file_open() - prepare to open a possibly-encrypted regular file
|
|
* @inode: the inode being opened
|
|
* @filp: the struct file being set up
|
|
*
|
|
* Currently, an encrypted regular file can only be opened if its encryption key
|
|
* is available; access to the raw encrypted contents is not supported.
|
|
* Therefore, we first set up the inode's encryption key (if not already done)
|
|
* and return an error if it's unavailable.
|
|
*
|
|
* We also verify that if the parent directory (from the path via which the file
|
|
* is being opened) is encrypted, then the inode being opened uses the same
|
|
* encryption policy. This is needed as part of the enforcement that all files
|
|
* in an encrypted directory tree use the same encryption policy, as a
|
|
* protection against certain types of offline attacks. Note that this check is
|
|
* needed even when opening an *unencrypted* file, since it's forbidden to have
|
|
* an unencrypted file in an encrypted directory.
|
|
*
|
|
* Return: 0 on success, -ENOKEY if the key is missing, or another -errno code
|
|
*/
|
|
int fscrypt_file_open(struct inode *inode, struct file *filp)
|
|
{
|
|
int err;
|
|
struct dentry *dentry, *dentry_parent;
|
|
struct inode *inode_parent;
|
|
|
|
err = fscrypt_require_key(inode);
|
|
if (err)
|
|
return err;
|
|
|
|
dentry = file_dentry(filp);
|
|
|
|
/*
|
|
* Getting a reference to the parent dentry is needed for the actual
|
|
* encryption policy comparison, but it's expensive on multi-core
|
|
* systems. Since this function runs on unencrypted files too, start
|
|
* with a lightweight RCU-mode check for the parent directory being
|
|
* unencrypted (in which case it's fine for the child to be either
|
|
* unencrypted, or encrypted with any policy). Only continue on to the
|
|
* full policy check if the parent directory is actually encrypted.
|
|
*/
|
|
rcu_read_lock();
|
|
dentry_parent = READ_ONCE(dentry->d_parent);
|
|
inode_parent = d_inode_rcu(dentry_parent);
|
|
if (inode_parent != NULL && !IS_ENCRYPTED(inode_parent)) {
|
|
rcu_read_unlock();
|
|
return 0;
|
|
}
|
|
rcu_read_unlock();
|
|
|
|
dentry_parent = dget_parent(dentry);
|
|
if (!fscrypt_has_permitted_context(d_inode(dentry_parent), inode)) {
|
|
fscrypt_warn(inode,
|
|
"Inconsistent encryption context (parent directory: %lu)",
|
|
d_inode(dentry_parent)->i_ino);
|
|
err = -EPERM;
|
|
}
|
|
dput(dentry_parent);
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL_GPL(fscrypt_file_open);
|
|
|
|
int __fscrypt_prepare_link(struct inode *inode, struct inode *dir,
|
|
struct dentry *dentry)
|
|
{
|
|
if (fscrypt_is_nokey_name(dentry))
|
|
return -ENOKEY;
|
|
/*
|
|
* We don't need to separately check that the directory inode's key is
|
|
* available, as it's implied by the dentry not being a no-key name.
|
|
*/
|
|
|
|
if (!fscrypt_has_permitted_context(dir, inode))
|
|
return -EXDEV;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__fscrypt_prepare_link);
|
|
|
|
int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|
struct inode *new_dir, struct dentry *new_dentry,
|
|
unsigned int flags)
|
|
{
|
|
if (fscrypt_is_nokey_name(old_dentry) ||
|
|
fscrypt_is_nokey_name(new_dentry))
|
|
return -ENOKEY;
|
|
/*
|
|
* We don't need to separately check that the directory inodes' keys are
|
|
* available, as it's implied by the dentries not being no-key names.
|
|
*/
|
|
|
|
if (old_dir != new_dir) {
|
|
if (IS_ENCRYPTED(new_dir) &&
|
|
!fscrypt_has_permitted_context(new_dir,
|
|
d_inode(old_dentry)))
|
|
return -EXDEV;
|
|
|
|
if ((flags & RENAME_EXCHANGE) &&
|
|
IS_ENCRYPTED(old_dir) &&
|
|
!fscrypt_has_permitted_context(old_dir,
|
|
d_inode(new_dentry)))
|
|
return -EXDEV;
|
|
}
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__fscrypt_prepare_rename);
|
|
|
|
int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry,
|
|
struct fscrypt_name *fname)
|
|
{
|
|
int err = fscrypt_setup_filename(dir, &dentry->d_name, 1, fname);
|
|
|
|
if (err && err != -ENOENT)
|
|
return err;
|
|
|
|
fscrypt_prepare_dentry(dentry, fname->is_nokey_name);
|
|
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup);
|
|
|
|
/**
|
|
* fscrypt_prepare_lookup_partial() - prepare lookup without filename setup
|
|
* @dir: the encrypted directory being searched
|
|
* @dentry: the dentry being looked up in @dir
|
|
*
|
|
* This function should be used by the ->lookup and ->atomic_open methods of
|
|
* filesystems that handle filename encryption and no-key name encoding
|
|
* themselves and thus can't use fscrypt_prepare_lookup(). Like
|
|
* fscrypt_prepare_lookup(), this will try to set up the directory's encryption
|
|
* key and will set DCACHE_NOKEY_NAME on the dentry if the key is unavailable.
|
|
* However, this function doesn't set up a struct fscrypt_name for the filename.
|
|
*
|
|
* Return: 0 on success; -errno on error. Note that the encryption key being
|
|
* unavailable is not considered an error. It is also not an error if
|
|
* the encryption policy is unsupported by this kernel; that is treated
|
|
* like the key being unavailable, so that files can still be deleted.
|
|
*/
|
|
int fscrypt_prepare_lookup_partial(struct inode *dir, struct dentry *dentry)
|
|
{
|
|
int err = fscrypt_get_encryption_info(dir, true);
|
|
bool is_nokey_name = (!err && !fscrypt_has_encryption_key(dir));
|
|
|
|
fscrypt_prepare_dentry(dentry, is_nokey_name);
|
|
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL_GPL(fscrypt_prepare_lookup_partial);
|
|
|
|
int __fscrypt_prepare_readdir(struct inode *dir)
|
|
{
|
|
return fscrypt_get_encryption_info(dir, true);
|
|
}
|
|
EXPORT_SYMBOL_GPL(__fscrypt_prepare_readdir);
|
|
|
|
int __fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr)
|
|
{
|
|
if (attr->ia_valid & ATTR_SIZE)
|
|
return fscrypt_require_key(d_inode(dentry));
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__fscrypt_prepare_setattr);
|
|
|
|
/**
|
|
* fscrypt_prepare_setflags() - prepare to change flags with FS_IOC_SETFLAGS
|
|
* @inode: the inode on which flags are being changed
|
|
* @oldflags: the old flags
|
|
* @flags: the new flags
|
|
*
|
|
* The caller should be holding i_rwsem for write.
|
|
*
|
|
* Return: 0 on success; -errno if the flags change isn't allowed or if
|
|
* another error occurs.
|
|
*/
|
|
int fscrypt_prepare_setflags(struct inode *inode,
|
|
unsigned int oldflags, unsigned int flags)
|
|
{
|
|
struct fscrypt_inode_info *ci;
|
|
struct fscrypt_master_key *mk;
|
|
int err;
|
|
|
|
/*
|
|
* When the CASEFOLD flag is set on an encrypted directory, we must
|
|
* derive the secret key needed for the dirhash. This is only possible
|
|
* if the directory uses a v2 encryption policy.
|
|
*/
|
|
if (IS_ENCRYPTED(inode) && (flags & ~oldflags & FS_CASEFOLD_FL)) {
|
|
err = fscrypt_require_key(inode);
|
|
if (err)
|
|
return err;
|
|
ci = inode->i_crypt_info;
|
|
if (ci->ci_policy.version != FSCRYPT_POLICY_V2)
|
|
return -EINVAL;
|
|
mk = ci->ci_master_key;
|
|
down_read(&mk->mk_sem);
|
|
if (mk->mk_present)
|
|
err = fscrypt_derive_dirhash_key(ci, mk);
|
|
else
|
|
err = -ENOKEY;
|
|
up_read(&mk->mk_sem);
|
|
return err;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* fscrypt_prepare_symlink() - prepare to create a possibly-encrypted symlink
|
|
* @dir: directory in which the symlink is being created
|
|
* @target: plaintext symlink target
|
|
* @len: length of @target excluding null terminator
|
|
* @max_len: space the filesystem has available to store the symlink target
|
|
* @disk_link: (out) the on-disk symlink target being prepared
|
|
*
|
|
* This function computes the size the symlink target will require on-disk,
|
|
* stores it in @disk_link->len, and validates it against @max_len. An
|
|
* encrypted symlink may be longer than the original.
|
|
*
|
|
* Additionally, @disk_link->name is set to @target if the symlink will be
|
|
* unencrypted, but left NULL if the symlink will be encrypted. For encrypted
|
|
* symlinks, the filesystem must call fscrypt_encrypt_symlink() to create the
|
|
* on-disk target later. (The reason for the two-step process is that some
|
|
* filesystems need to know the size of the symlink target before creating the
|
|
* inode, e.g. to determine whether it will be a "fast" or "slow" symlink.)
|
|
*
|
|
* Return: 0 on success, -ENAMETOOLONG if the symlink target is too long,
|
|
* -ENOKEY if the encryption key is missing, or another -errno code if a problem
|
|
* occurred while setting up the encryption key.
|
|
*/
|
|
int fscrypt_prepare_symlink(struct inode *dir, const char *target,
|
|
unsigned int len, unsigned int max_len,
|
|
struct fscrypt_str *disk_link)
|
|
{
|
|
const union fscrypt_policy *policy;
|
|
|
|
/*
|
|
* To calculate the size of the encrypted symlink target we need to know
|
|
* the amount of NUL padding, which is determined by the flags set in
|
|
* the encryption policy which will be inherited from the directory.
|
|
*/
|
|
policy = fscrypt_policy_to_inherit(dir);
|
|
if (policy == NULL) {
|
|
/* Not encrypted */
|
|
disk_link->name = (unsigned char *)target;
|
|
disk_link->len = len + 1;
|
|
if (disk_link->len > max_len)
|
|
return -ENAMETOOLONG;
|
|
return 0;
|
|
}
|
|
if (IS_ERR(policy))
|
|
return PTR_ERR(policy);
|
|
|
|
/*
|
|
* Calculate the size of the encrypted symlink and verify it won't
|
|
* exceed max_len. Note that for historical reasons, encrypted symlink
|
|
* targets are prefixed with the ciphertext length, despite this
|
|
* actually being redundant with i_size. This decreases by 2 bytes the
|
|
* longest symlink target we can accept.
|
|
*
|
|
* We could recover 1 byte by not counting a null terminator, but
|
|
* counting it (even though it is meaningless for ciphertext) is simpler
|
|
* for now since filesystems will assume it is there and subtract it.
|
|
*/
|
|
if (!__fscrypt_fname_encrypted_size(policy, len,
|
|
max_len - sizeof(struct fscrypt_symlink_data) - 1,
|
|
&disk_link->len))
|
|
return -ENAMETOOLONG;
|
|
disk_link->len += sizeof(struct fscrypt_symlink_data) + 1;
|
|
|
|
disk_link->name = NULL;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(fscrypt_prepare_symlink);
|
|
|
|
int __fscrypt_encrypt_symlink(struct inode *inode, const char *target,
|
|
unsigned int len, struct fscrypt_str *disk_link)
|
|
{
|
|
int err;
|
|
struct qstr iname = QSTR_INIT(target, len);
|
|
struct fscrypt_symlink_data *sd;
|
|
unsigned int ciphertext_len;
|
|
|
|
/*
|
|
* fscrypt_prepare_new_inode() should have already set up the new
|
|
* symlink inode's encryption key. We don't wait until now to do it,
|
|
* since we may be in a filesystem transaction now.
|
|
*/
|
|
if (WARN_ON_ONCE(!fscrypt_has_encryption_key(inode)))
|
|
return -ENOKEY;
|
|
|
|
if (disk_link->name) {
|
|
/* filesystem-provided buffer */
|
|
sd = (struct fscrypt_symlink_data *)disk_link->name;
|
|
} else {
|
|
sd = kmalloc(disk_link->len, GFP_NOFS);
|
|
if (!sd)
|
|
return -ENOMEM;
|
|
}
|
|
ciphertext_len = disk_link->len - sizeof(*sd) - 1;
|
|
sd->len = cpu_to_le16(ciphertext_len);
|
|
|
|
err = fscrypt_fname_encrypt(inode, &iname, sd->encrypted_path,
|
|
ciphertext_len);
|
|
if (err)
|
|
goto err_free_sd;
|
|
|
|
/*
|
|
* Null-terminating the ciphertext doesn't make sense, but we still
|
|
* count the null terminator in the length, so we might as well
|
|
* initialize it just in case the filesystem writes it out.
|
|
*/
|
|
sd->encrypted_path[ciphertext_len] = '\0';
|
|
|
|
/* Cache the plaintext symlink target for later use by get_link() */
|
|
err = -ENOMEM;
|
|
inode->i_link = kmemdup(target, len + 1, GFP_NOFS);
|
|
if (!inode->i_link)
|
|
goto err_free_sd;
|
|
|
|
if (!disk_link->name)
|
|
disk_link->name = (unsigned char *)sd;
|
|
return 0;
|
|
|
|
err_free_sd:
|
|
if (!disk_link->name)
|
|
kfree(sd);
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__fscrypt_encrypt_symlink);
|
|
|
|
/**
|
|
* fscrypt_get_symlink() - get the target of an encrypted symlink
|
|
* @inode: the symlink inode
|
|
* @caddr: the on-disk contents of the symlink
|
|
* @max_size: size of @caddr buffer
|
|
* @done: if successful, will be set up to free the returned target if needed
|
|
*
|
|
* If the symlink's encryption key is available, we decrypt its target.
|
|
* Otherwise, we encode its target for presentation.
|
|
*
|
|
* This may sleep, so the filesystem must have dropped out of RCU mode already.
|
|
*
|
|
* Return: the presentable symlink target or an ERR_PTR()
|
|
*/
|
|
const char *fscrypt_get_symlink(struct inode *inode, const void *caddr,
|
|
unsigned int max_size,
|
|
struct delayed_call *done)
|
|
{
|
|
const struct fscrypt_symlink_data *sd;
|
|
struct fscrypt_str cstr, pstr;
|
|
bool has_key;
|
|
int err;
|
|
|
|
/* This is for encrypted symlinks only */
|
|
if (WARN_ON_ONCE(!IS_ENCRYPTED(inode)))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
/* If the decrypted target is already cached, just return it. */
|
|
pstr.name = READ_ONCE(inode->i_link);
|
|
if (pstr.name)
|
|
return pstr.name;
|
|
|
|
/*
|
|
* Try to set up the symlink's encryption key, but we can continue
|
|
* regardless of whether the key is available or not.
|
|
*/
|
|
err = fscrypt_get_encryption_info(inode, false);
|
|
if (err)
|
|
return ERR_PTR(err);
|
|
has_key = fscrypt_has_encryption_key(inode);
|
|
|
|
/*
|
|
* For historical reasons, encrypted symlink targets are prefixed with
|
|
* the ciphertext length, even though this is redundant with i_size.
|
|
*/
|
|
|
|
if (max_size < sizeof(*sd) + 1)
|
|
return ERR_PTR(-EUCLEAN);
|
|
sd = caddr;
|
|
cstr.name = (unsigned char *)sd->encrypted_path;
|
|
cstr.len = le16_to_cpu(sd->len);
|
|
|
|
if (cstr.len == 0)
|
|
return ERR_PTR(-EUCLEAN);
|
|
|
|
if (cstr.len + sizeof(*sd) > max_size)
|
|
return ERR_PTR(-EUCLEAN);
|
|
|
|
err = fscrypt_fname_alloc_buffer(cstr.len, &pstr);
|
|
if (err)
|
|
return ERR_PTR(err);
|
|
|
|
err = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
|
|
if (err)
|
|
goto err_kfree;
|
|
|
|
err = -EUCLEAN;
|
|
if (pstr.name[0] == '\0')
|
|
goto err_kfree;
|
|
|
|
pstr.name[pstr.len] = '\0';
|
|
|
|
/*
|
|
* Cache decrypted symlink targets in i_link for later use. Don't cache
|
|
* symlink targets encoded without the key, since those become outdated
|
|
* once the key is added. This pairs with the READ_ONCE() above and in
|
|
* the VFS path lookup code.
|
|
*/
|
|
if (!has_key ||
|
|
cmpxchg_release(&inode->i_link, NULL, pstr.name) != NULL)
|
|
set_delayed_call(done, kfree_link, pstr.name);
|
|
|
|
return pstr.name;
|
|
|
|
err_kfree:
|
|
kfree(pstr.name);
|
|
return ERR_PTR(err);
|
|
}
|
|
EXPORT_SYMBOL_GPL(fscrypt_get_symlink);
|
|
|
|
/**
|
|
* fscrypt_symlink_getattr() - set the correct st_size for encrypted symlinks
|
|
* @path: the path for the encrypted symlink being queried
|
|
* @stat: the struct being filled with the symlink's attributes
|
|
*
|
|
* Override st_size of encrypted symlinks to be the length of the decrypted
|
|
* symlink target (or the no-key encoded symlink target, if the key is
|
|
* unavailable) rather than the length of the encrypted symlink target. This is
|
|
* necessary for st_size to match the symlink target that userspace actually
|
|
* sees. POSIX requires this, and some userspace programs depend on it.
|
|
*
|
|
* This requires reading the symlink target from disk if needed, setting up the
|
|
* inode's encryption key if possible, and then decrypting or encoding the
|
|
* symlink target. This makes lstat() more heavyweight than is normally the
|
|
* case. However, decrypted symlink targets will be cached in ->i_link, so
|
|
* usually the symlink won't have to be read and decrypted again later if/when
|
|
* it is actually followed, readlink() is called, or lstat() is called again.
|
|
*
|
|
* Return: 0 on success, -errno on failure
|
|
*/
|
|
int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat)
|
|
{
|
|
struct dentry *dentry = path->dentry;
|
|
struct inode *inode = d_inode(dentry);
|
|
const char *link;
|
|
DEFINE_DELAYED_CALL(done);
|
|
|
|
/*
|
|
* To get the symlink target that userspace will see (whether it's the
|
|
* decrypted target or the no-key encoded target), we can just get it in
|
|
* the same way the VFS does during path resolution and readlink().
|
|
*/
|
|
link = READ_ONCE(inode->i_link);
|
|
if (!link) {
|
|
link = inode->i_op->get_link(dentry, inode, &done);
|
|
if (IS_ERR(link))
|
|
return PTR_ERR(link);
|
|
}
|
|
stat->size = strlen(link);
|
|
do_delayed_call(&done);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(fscrypt_symlink_getattr);
|