vfs: make open_with_fake_path() not contribute to nr_files

Stacking file operations in overlay will store an extra open file for each
overlay file opened.

The overhead is just that of "struct file" which is about 256bytes, because
overlay already pins an extra dentry and inode when the file is open, which
add up to a much larger overhead.

For fear of breaking working setups, don't start accounting the extra file.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
This commit is contained in:
Miklos Szeredi 2018-07-18 15:44:40 +02:00
parent 51e6ce820b
commit d3b1084dfd
4 changed files with 58 additions and 27 deletions

View File

@ -52,7 +52,8 @@ static void file_free_rcu(struct rcu_head *head)
static inline void file_free(struct file *f) static inline void file_free(struct file *f)
{ {
security_file_free(f); security_file_free(f);
percpu_counter_dec(&nr_files); if (!(f->f_mode & FMODE_NOACCOUNT))
percpu_counter_dec(&nr_files);
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
} }
@ -91,34 +92,11 @@ int proc_nr_files(struct ctl_table *table, int write,
} }
#endif #endif
/* Find an unused file structure and return a pointer to it. static struct file *__alloc_file(int flags, const struct cred *cred)
* Returns an error pointer if some error happend e.g. we over file
* structures limit, run out of memory or operation is not permitted.
*
* Be very careful using this. You are responsible for
* getting write access to any mount that you might assign
* to this filp, if it is opened for write. If this is not
* done, you will imbalance int the mount's writer count
* and a warning at __fput() time.
*/
struct file *alloc_empty_file(int flags, const struct cred *cred)
{ {
static long old_max;
struct file *f; struct file *f;
int error; int error;
/*
* Privileged users can go above max_files
*/
if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
/*
* percpu_counters are inaccurate. Do an expensive check before
* we go and fail.
*/
if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files)
goto over;
}
f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
if (unlikely(!f)) if (unlikely(!f))
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
@ -138,7 +116,41 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
f->f_flags = flags; f->f_flags = flags;
f->f_mode = OPEN_FMODE(flags); f->f_mode = OPEN_FMODE(flags);
/* f->f_version: 0 */ /* f->f_version: 0 */
percpu_counter_inc(&nr_files);
return f;
}
/* Find an unused file structure and return a pointer to it.
* Returns an error pointer if some error happend e.g. we over file
* structures limit, run out of memory or operation is not permitted.
*
* Be very careful using this. You are responsible for
* getting write access to any mount that you might assign
* to this filp, if it is opened for write. If this is not
* done, you will imbalance int the mount's writer count
* and a warning at __fput() time.
*/
struct file *alloc_empty_file(int flags, const struct cred *cred)
{
static long old_max;
struct file *f;
/*
* Privileged users can go above max_files
*/
if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
/*
* percpu_counters are inaccurate. Do an expensive check before
* we go and fail.
*/
if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files)
goto over;
}
f = __alloc_file(flags, cred);
if (!IS_ERR(f))
percpu_counter_inc(&nr_files);
return f; return f;
over: over:
@ -150,6 +162,21 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
return ERR_PTR(-ENFILE); return ERR_PTR(-ENFILE);
} }
/*
* Variant of alloc_empty_file() that doesn't check and modify nr_files.
*
* Should not be used unless there's a very good reason to do so.
*/
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
{
struct file *f = __alloc_file(flags, cred);
if (!IS_ERR(f))
f->f_mode |= FMODE_NOACCOUNT;
return f;
}
/** /**
* alloc_file - allocate and initialize a 'struct file' * alloc_file - allocate and initialize a 'struct file'
* *

View File

@ -94,6 +94,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
* file_table.c * file_table.c
*/ */
extern struct file *alloc_empty_file(int, const struct cred *); extern struct file *alloc_empty_file(int, const struct cred *);
extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
/* /*
* super.c * super.c

View File

@ -928,7 +928,7 @@ EXPORT_SYMBOL(dentry_open);
struct file *open_with_fake_path(const struct path *path, int flags, struct file *open_with_fake_path(const struct path *path, int flags,
struct inode *inode, const struct cred *cred) struct inode *inode, const struct cred *cred)
{ {
struct file *f = alloc_empty_file(flags, cred); struct file *f = alloc_empty_file_noaccount(flags, cred);
if (!IS_ERR(f)) { if (!IS_ERR(f)) {
int error; int error;

View File

@ -156,6 +156,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File is capable of returning -EAGAIN if I/O will block */ /* File is capable of returning -EAGAIN if I/O will block */
#define FMODE_NOWAIT ((__force fmode_t)0x8000000) #define FMODE_NOWAIT ((__force fmode_t)0x8000000)
/* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
/* /*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
* that indicates that they should check the contents of the iovec are * that indicates that they should check the contents of the iovec are