linux-next/fs/f2fs/xattr.c

597 lines
14 KiB
C
Raw Normal View History

/*
* fs/f2fs/xattr.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*
* Portions of this code from linux/fs/ext2/xattr.c
*
* Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
*
* Fix by Harrison Xing <harrison@mountainviewdata.com>.
* Extended attributes for symlinks and special files added per
* suggestion of Luka Renko <luka.renko@hermes.si>.
* xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
* Red Hat Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/rwsem.h>
#include <linux/f2fs_fs.h>
#include <linux/security.h>
#include "f2fs.h"
#include "xattr.h"
static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
size_t list_size, const char *name, size_t name_len, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
int total_len, prefix_len = 0;
const char *prefix = NULL;
switch (type) {
case F2FS_XATTR_INDEX_USER:
if (!test_opt(sbi, XATTR_USER))
return -EOPNOTSUPP;
prefix = XATTR_USER_PREFIX;
prefix_len = XATTR_USER_PREFIX_LEN;
break;
case F2FS_XATTR_INDEX_TRUSTED:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
prefix = XATTR_TRUSTED_PREFIX;
prefix_len = XATTR_TRUSTED_PREFIX_LEN;
break;
case F2FS_XATTR_INDEX_SECURITY:
prefix = XATTR_SECURITY_PREFIX;
prefix_len = XATTR_SECURITY_PREFIX_LEN;
break;
default:
return -EINVAL;
}
total_len = prefix_len + name_len + 1;
if (list && total_len <= list_size) {
memcpy(list, prefix, prefix_len);
memcpy(list + prefix_len, name, name_len);
list[prefix_len + name_len] = '\0';
}
return total_len;
}
static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
void *buffer, size_t size, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
switch (type) {
case F2FS_XATTR_INDEX_USER:
if (!test_opt(sbi, XATTR_USER))
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
return -EINVAL;
}
if (strcmp(name, "") == 0)
return -EINVAL;
return f2fs_getxattr(dentry->d_inode, type, name, buffer, size);
}
static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
switch (type) {
case F2FS_XATTR_INDEX_USER:
if (!test_opt(sbi, XATTR_USER))
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
return -EINVAL;
}
if (strcmp(name, "") == 0)
return -EINVAL;
return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL);
}
static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
size_t list_size, const char *name, size_t name_len, int type)
{
const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
size_t size;
if (type != F2FS_XATTR_INDEX_ADVISE)
return 0;
size = strlen(xname) + 1;
if (list && size <= list_size)
memcpy(list, xname, size);
return size;
}
static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
void *buffer, size_t size, int type)
{
struct inode *inode = dentry->d_inode;
if (strcmp(name, "") != 0)
return -EINVAL;
*((char *)buffer) = F2FS_I(inode)->i_advise;
return sizeof(char);
}
static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags, int type)
{
struct inode *inode = dentry->d_inode;
if (strcmp(name, "") != 0)
return -EINVAL;
if (!inode_owner_or_capable(inode))
return -EPERM;
if (value == NULL)
return -EINVAL;
F2FS_I(inode)->i_advise |= *(char *)value;
return 0;
}
#ifdef CONFIG_F2FS_FS_SECURITY
static int __f2fs_setxattr(struct inode *inode, int name_index,
const char *name, const void *value, size_t value_len,
struct page *ipage);
static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
void *page)
{
const struct xattr *xattr;
int err = 0;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
xattr->name, xattr->value,
xattr->value_len, (struct page *)page);
if (err < 0)
break;
}
return err;
}
int f2fs_init_security(struct inode *inode, struct inode *dir,
const struct qstr *qstr, struct page *ipage)
{
return security_inode_init_security(inode, dir, qstr,
&f2fs_initxattrs, ipage);
}
#endif
const struct xattr_handler f2fs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.flags = F2FS_XATTR_INDEX_USER,
.list = f2fs_xattr_generic_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
const struct xattr_handler f2fs_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
.flags = F2FS_XATTR_INDEX_TRUSTED,
.list = f2fs_xattr_generic_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
const struct xattr_handler f2fs_xattr_advise_handler = {
.prefix = F2FS_SYSTEM_ADVISE_PREFIX,
.flags = F2FS_XATTR_INDEX_ADVISE,
.list = f2fs_xattr_advise_list,
.get = f2fs_xattr_advise_get,
.set = f2fs_xattr_advise_set,
};
const struct xattr_handler f2fs_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.flags = F2FS_XATTR_INDEX_SECURITY,
.list = f2fs_xattr_generic_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
static const struct xattr_handler *f2fs_xattr_handler_map[] = {
[F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler,
#ifdef CONFIG_F2FS_FS_POSIX_ACL
[F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler,
[F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler,
#endif
[F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler,
#ifdef CONFIG_F2FS_FS_SECURITY
[F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler,
#endif
[F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler,
};
const struct xattr_handler *f2fs_xattr_handlers[] = {
&f2fs_xattr_user_handler,
#ifdef CONFIG_F2FS_FS_POSIX_ACL
&f2fs_xattr_acl_access_handler,
&f2fs_xattr_acl_default_handler,
#endif
&f2fs_xattr_trusted_handler,
#ifdef CONFIG_F2FS_FS_SECURITY
&f2fs_xattr_security_handler,
#endif
&f2fs_xattr_advise_handler,
NULL,
};
static inline const struct xattr_handler *f2fs_xattr_handler(int name_index)
{
const struct xattr_handler *handler = NULL;
if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map))
handler = f2fs_xattr_handler_map[name_index];
return handler;
}
static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index,
size_t name_len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
if (entry->e_name_index != name_index)
continue;
if (entry->e_name_len != name_len)
continue;
if (!memcmp(entry->e_name, name, name_len))
break;
}
return entry;
}
static void *read_all_xattrs(struct inode *inode, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_xattr_header *header;
size_t size = PAGE_SIZE, inline_size = 0;
void *txattr_addr;
inline_size = inline_xattr_size(inode);
txattr_addr = kzalloc(inline_size + size, GFP_KERNEL);
if (!txattr_addr)
return NULL;
/* read from inline xattr */
if (inline_size) {
struct page *page = NULL;
void *inline_addr;
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page))
goto fail;
inline_addr = inline_xattr_addr(page);
}
memcpy(txattr_addr, inline_addr, inline_size);
f2fs_put_page(page, 1);
}
/* read from xattr node block */
if (F2FS_I(inode)->i_xattr_nid) {
struct page *xpage;
void *xattr_addr;
/* The inode already has an extended attribute block. */
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
if (IS_ERR(xpage))
goto fail;
xattr_addr = page_address(xpage);
memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE);
f2fs_put_page(xpage, 1);
}
header = XATTR_HDR(txattr_addr);
/* never been allocated xattrs */
if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
header->h_refcount = cpu_to_le32(1);
}
return txattr_addr;
fail:
kzfree(txattr_addr);
return NULL;
}
static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
void *txattr_addr, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
size_t inline_size = 0;
void *xattr_addr;
struct page *xpage;
nid_t new_nid = 0;
int err;
inline_size = inline_xattr_size(inode);
if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
if (!alloc_nid(sbi, &new_nid))
return -ENOSPC;
/* write to inline xattr */
if (inline_size) {
struct page *page = NULL;
void *inline_addr;
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
alloc_nid_failed(sbi, new_nid);
return PTR_ERR(page);
}
inline_addr = inline_xattr_addr(page);
}
memcpy(inline_addr, txattr_addr, inline_size);
f2fs_put_page(page, 1);
/* no need to use xattr node block */
if (hsize <= inline_size) {
err = truncate_xattr_node(inode, ipage);
alloc_nid_failed(sbi, new_nid);
return err;
}
}
/* write to xattr node block */
if (F2FS_I(inode)->i_xattr_nid) {
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_nid);
return PTR_ERR(xpage);
}
f2fs_bug_on(new_nid);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_nid);
return PTR_ERR(xpage);
}
alloc_nid_done(sbi, new_nid);
}
xattr_addr = page_address(xpage);
memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
sizeof(struct node_footer));
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);
/* need to checkpoint during fsync */
F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
return 0;
}
int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
void *buffer, size_t buffer_size)
{
struct f2fs_xattr_entry *entry;
void *base_addr;
int error = 0;
size_t value_len, name_len;
if (name == NULL)
return -EINVAL;
name_len = strlen(name);
base_addr = read_all_xattrs(inode, NULL);
if (!base_addr)
return -ENOMEM;
entry = __find_xattr(base_addr, name_index, name_len, name);
if (IS_XATTR_LAST_ENTRY(entry)) {
error = -ENODATA;
goto cleanup;
}
value_len = le16_to_cpu(entry->e_value_size);
if (buffer && value_len > buffer_size) {
error = -ERANGE;
goto cleanup;
}
if (buffer) {
char *pval = entry->e_name + entry->e_name_len;
memcpy(buffer, pval, value_len);
}
error = value_len;
cleanup:
kzfree(base_addr);
return error;
}
ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
{
struct inode *inode = dentry->d_inode;
struct f2fs_xattr_entry *entry;
void *base_addr;
int error = 0;
size_t rest = buffer_size;
base_addr = read_all_xattrs(inode, NULL);
if (!base_addr)
return -ENOMEM;
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
f2fs_xattr_handler(entry->e_name_index);
size_t size;
if (!handler)
continue;
size = handler->list(dentry, buffer, rest, entry->e_name,
entry->e_name_len, handler->flags);
if (buffer && size > rest) {
error = -ERANGE;
goto cleanup;
}
if (buffer)
buffer += size;
rest -= size;
}
error = buffer_size - rest;
cleanup:
kzfree(base_addr);
return error;
}
static int __f2fs_setxattr(struct inode *inode, int name_index,
const char *name, const void *value, size_t value_len,
struct page *ipage)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_xattr_entry *here, *last;
void *base_addr;
int found, newsize;
size_t name_len;
__u32 new_hsize;
int error = -ENOMEM;
if (name == NULL)
return -EINVAL;
if (value == NULL)
value_len = 0;
name_len = strlen(name);
if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode))
return -ERANGE;
base_addr = read_all_xattrs(inode, ipage);
if (!base_addr)
goto exit;
/* find entry with wanted name. */
here = __find_xattr(base_addr, name_index, name_len, name);
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
last = here;
while (!IS_XATTR_LAST_ENTRY(last))
last = XATTR_NEXT_ENTRY(last);
newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) +
name_len + value_len);
/* 1. Check space */
if (value) {
int free;
/*
* If value is NULL, it is remove operation.
* In case of update operation, we caculate free.
*/
free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
if (found)
free = free + ENTRY_SIZE(here);
if (free < newsize) {
error = -ENOSPC;
goto exit;
}
}
/* 2. Remove old entry */
if (found) {
/*
* If entry is found, remove old entry.
* If not found, remove operation is not needed.
*/
struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here);
int oldsize = ENTRY_SIZE(here);
memmove(here, next, (char *)last - (char *)next);
last = (struct f2fs_xattr_entry *)((char *)last - oldsize);
memset(last, 0, oldsize);
}
new_hsize = (char *)last - (char *)base_addr;
/* 3. Write new entry */
if (value) {
char *pval;
/*
* Before we come here, old entry is removed.
* We just write new entry.
*/
memset(last, 0, newsize);
last->e_name_index = name_index;
last->e_name_len = name_len;
memcpy(last->e_name, name, name_len);
pval = last->e_name + name_len;
memcpy(pval, value, value_len);
last->e_value_size = cpu_to_le16(value_len);
new_hsize += newsize;
}
error = write_all_xattrs(inode, new_hsize, base_addr, ipage);
if (error)
goto exit;
if (is_inode_flag_set(fi, FI_ACL_MODE)) {
inode->i_mode = fi->i_acl_mode;
inode->i_ctime = CURRENT_TIME;
clear_inode_flag(fi, FI_ACL_MODE);
}
if (ipage)
update_inode(inode, ipage);
else
update_inode_page(inode);
exit:
kzfree(base_addr);
return error;
}
int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
const void *value, size_t value_len, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
int err;
f2fs_balance_fs(sbi);
f2fs: use rw_sem instead of fs_lock(locks mutex) The fs_locks is used to block other ops(ex, recovery) when doing checkpoint. And each other operate routine(besides checkpoint) needs to acquire a fs_lock, there is a terrible problem here, if these are too many concurrency threads acquiring fs_lock, so that they will block each other and may lead to some performance problem, but this is not the phenomenon we want to see. Though there are some optimization patches introduced to enhance the usage of fs_lock, but the thorough solution is using a *rw_sem* to replace the fs_lock. Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other, this can avoid the problem described above completely. Because of the weakness of rw_sem, the above change may introduce a potential problem that the checkpoint thread might get starved if other threads are intensively locking the read semaphore for I/O.(Pointed out by Xu Jin) In order to avoid this, a wait_list is introduced, the appending read semaphore ops will be dropped into the wait_list if checkpoint thread is waiting for write semaphore, and will be waked up when checkpoint thread gives up write semaphore. Thanks to Kim's previous review and test, and will be very glad to see other guys' performance tests about this patch. V2: -fix the potential starvation problem. -use more suitable func name suggested by Xu Jin. Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com> [Jaegeuk Kim: adjust minor coding standard] Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 10:08:30 +00:00
f2fs_lock_op(sbi);
err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
f2fs: use rw_sem instead of fs_lock(locks mutex) The fs_locks is used to block other ops(ex, recovery) when doing checkpoint. And each other operate routine(besides checkpoint) needs to acquire a fs_lock, there is a terrible problem here, if these are too many concurrency threads acquiring fs_lock, so that they will block each other and may lead to some performance problem, but this is not the phenomenon we want to see. Though there are some optimization patches introduced to enhance the usage of fs_lock, but the thorough solution is using a *rw_sem* to replace the fs_lock. Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other, this can avoid the problem described above completely. Because of the weakness of rw_sem, the above change may introduce a potential problem that the checkpoint thread might get starved if other threads are intensively locking the read semaphore for I/O.(Pointed out by Xu Jin) In order to avoid this, a wait_list is introduced, the appending read semaphore ops will be dropped into the wait_list if checkpoint thread is waiting for write semaphore, and will be waked up when checkpoint thread gives up write semaphore. Thanks to Kim's previous review and test, and will be very glad to see other guys' performance tests about this patch. V2: -fix the potential starvation problem. -use more suitable func name suggested by Xu Jin. Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com> [Jaegeuk Kim: adjust minor coding standard] Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 10:08:30 +00:00
f2fs_unlock_op(sbi);
return err;
}