memcg: charge fs_context and legacy_fs_context

This patch adds accounting flags to fs_context and legacy_fs_context
allocation sites so that kernel could correctly charge these objects.

We have written a PoC to demonstrate the effect of the missing-charging
bugs.  The PoC takes around 1,200MB unaccounted memory, while it is
charged for only 362MB memory usage.  We evaluate the PoC on QEMU x86_64
v5.2.90 + Linux kernel v5.10.19 + Debian buster.  All the limitations
including ulimits and sysctl variables are set as default.  Specifically,
the hard NOFILE limit and nr_open in sysctl are both 1,048,576.

/*------------------------- POC code ----------------------------*/

#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/file.h>
#include <time.h>
#include <sys/wait.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <sched.h>
#include <fcntl.h>
#include <linux/mount.h>

#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \
                        } while (0)

#define STACK_SIZE (8 * 1024)
#ifndef __NR_fsopen
#define __NR_fsopen 430
#endif
static inline int fsopen(const char *fs_name, unsigned int flags)
{
        return syscall(__NR_fsopen, fs_name, flags);
}

static char thread_stack[512][STACK_SIZE];

int thread_fn(void* arg)
{
  for (int i = 0; i< 800000; ++i) {
    int fsfd = fsopen("nfs", FSOPEN_CLOEXEC);
    if (fsfd == -1) {
      errExit("fsopen");
    }
  }
  while(1);
  return 0;
}

int main(int argc, char *argv[]) {
  int thread_pid;
  for (int i = 0; i < 1; ++i) {
    thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \
      SIGCHLD, NULL);
  }
  while(1);
  return 0;
}

/*-------------------------- end --------------------------------*/

Link: https://lkml.kernel.org/r/1626517201-24086-1-git-send-email-nglaive@gmail.com
Signed-off-by: Yutian Yang <nglaive@gmail.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <shenwenbo@zju.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Yutian Yang 2021-09-02 14:55:07 -07:00 committed by Linus Torvalds
parent aa48e47e39
commit bb902cb47c

View File

@ -254,7 +254,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
struct fs_context *fc; struct fs_context *fc;
int ret = -ENOMEM; int ret = -ENOMEM;
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT);
if (!fc) if (!fc)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
@ -649,7 +649,7 @@ const struct fs_context_operations legacy_fs_context_ops = {
*/ */
static int legacy_init_fs_context(struct fs_context *fc) static int legacy_init_fs_context(struct fs_context *fc)
{ {
fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT);
if (!fc->fs_private) if (!fc->fs_private)
return -ENOMEM; return -ENOMEM;
fc->ops = &legacy_fs_context_ops; fc->ops = &legacy_fs_context_ops;