From 2d337b7158f8c38dacf8394028ab87b8a25ed707 Mon Sep 17 00:00:00 2001 From: ZhangPeng Date: Wed, 1 Mar 2023 10:06:27 +0000 Subject: [PATCH 01/29] userfaultfd: move unprivileged_userfaultfd sysctl to its own file The sysctl_unprivileged_userfaultfd is part of userfaultfd, move it to its own file. Signed-off-by: ZhangPeng Signed-off-by: Luis Chamberlain --- fs/userfaultfd.c | 20 +++++++++++++++++++- include/linux/userfaultfd_k.h | 2 -- kernel/sysctl.c | 11 ----------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 44d1ee429eb0..d01f803a6b11 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -32,7 +32,22 @@ #include #include -int sysctl_unprivileged_userfaultfd __read_mostly; +static int sysctl_unprivileged_userfaultfd __read_mostly; + +#ifdef CONFIG_SYSCTL +static struct ctl_table vm_userfaultfd_table[] = { + { + .procname = "unprivileged_userfaultfd", + .data = &sysctl_unprivileged_userfaultfd, + .maxlen = sizeof(sysctl_unprivileged_userfaultfd), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; +#endif static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; @@ -2178,6 +2193,9 @@ static int __init userfaultfd_init(void) 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, init_once_userfaultfd_ctx); +#ifdef CONFIG_SYSCTL + register_sysctl_init("vm", vm_userfaultfd_table); +#endif return 0; } __initcall(userfaultfd_init); diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 3767f18114ef..fff49fec0258 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -36,8 +36,6 @@ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) -extern int sysctl_unprivileged_userfaultfd; - extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1c240d2c99bc..c14552a662ae 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2438,17 +2438,6 @@ static struct ctl_table vm_table[] = { .extra1 = (void *)&mmap_rnd_compat_bits_min, .extra2 = (void *)&mmap_rnd_compat_bits_max, }, -#endif -#ifdef CONFIG_USERFAULTFD - { - .procname = "unprivileged_userfaultfd", - .data = &sysctl_unprivileged_userfaultfd, - .maxlen = sizeof(sysctl_unprivileged_userfaultfd), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #endif { } }; From 962de54828c5bb100eb8de881b79ed9c8b7468c0 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 9 Mar 2023 20:20:11 +0800 Subject: [PATCH 02/29] mm: hugetlb: move hugeltb sysctls to its own file This moves all hugetlb sysctls to its own file, also kill an useless hugetlb_treat_movable_handler() defination. Signed-off-by: Kefeng Wang Reviewed-by: Luis Chamberlain Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song Signed-off-by: Luis Chamberlain --- include/linux/hugetlb.h | 8 ------- kernel/sysctl.c | 32 -------------------------- mm/hugetlb.c | 51 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 48 insertions(+), 43 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7c977d234aba..4056b05d81ed 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -124,14 +124,6 @@ void hugepage_put_subpool(struct hugepage_subpool *spool); void hugetlb_dup_vma_private(struct vm_area_struct *vma); void clear_vma_resv_huge_pages(struct vm_area_struct *vma); -int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); -int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, - loff_t *); -int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, - loff_t *); -int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, - loff_t *); - int move_hugetlb_page_tables(struct vm_area_struct *vma, struct vm_area_struct *new_vma, unsigned long old_addr, unsigned long new_addr, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c14552a662ae..ce0297acf97c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2140,38 +2140,6 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, -#endif -#ifdef CONFIG_HUGETLB_PAGE - { - .procname = "nr_hugepages", - .data = NULL, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = hugetlb_sysctl_handler, - }, -#ifdef CONFIG_NUMA - { - .procname = "nr_hugepages_mempolicy", - .data = NULL, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = &hugetlb_mempolicy_sysctl_handler, - }, -#endif - { - .procname = "hugetlb_shm_group", - .data = &sysctl_hugetlb_shm_group, - .maxlen = sizeof(gid_t), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "nr_overcommit_hugepages", - .data = NULL, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = hugetlb_overcommit_handler, - }, #endif { .procname = "lowmem_reserve_ratio", diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 07abcb6eb203..0ee77c55e44e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4202,6 +4202,12 @@ static void __init hugetlb_sysfs_init(void) hugetlb_register_all_nodes(); } +#ifdef CONFIG_SYSCTL +static void hugetlb_sysctl_init(void); +#else +static inline void hugetlb_sysctl_init(void) { } +#endif + static int __init hugetlb_init(void) { int i; @@ -4257,6 +4263,7 @@ static int __init hugetlb_init(void) hugetlb_sysfs_init(); hugetlb_cgroup_file_init(); + hugetlb_sysctl_init(); #ifdef CONFIG_SMP num_fault_mutexes = roundup_pow_of_two(8 * num_possible_cpus()); @@ -4588,7 +4595,7 @@ out: return ret; } -int hugetlb_sysctl_handler(struct ctl_table *table, int write, +static int hugetlb_sysctl_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { @@ -4597,7 +4604,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, } #ifdef CONFIG_NUMA -int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write, +static int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { return hugetlb_sysctl_handler_common(true, table, write, @@ -4605,7 +4612,7 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write, } #endif /* CONFIG_NUMA */ -int hugetlb_overcommit_handler(struct ctl_table *table, int write, +static int hugetlb_overcommit_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; @@ -4634,6 +4641,44 @@ out: return ret; } +static struct ctl_table hugetlb_table[] = { + { + .procname = "nr_hugepages", + .data = NULL, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = hugetlb_sysctl_handler, + }, +#ifdef CONFIG_NUMA + { + .procname = "nr_hugepages_mempolicy", + .data = NULL, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &hugetlb_mempolicy_sysctl_handler, + }, +#endif + { + .procname = "hugetlb_shm_group", + .data = &sysctl_hugetlb_shm_group, + .maxlen = sizeof(gid_t), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "nr_overcommit_hugepages", + .data = NULL, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = hugetlb_overcommit_handler, + }, + { } +}; + +static void hugetlb_sysctl_init(void) +{ + register_sysctl_init("vm", hugetlb_table); +} #endif /* CONFIG_SYSCTL */ void hugetlb_report_meminfo(struct seq_file *m) From 67ff32289acad9ed338cd9f2351b44939e55163e Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:28:16 -0800 Subject: [PATCH 03/29] proc_sysctl: update docs for __register_sysctl_table() Update the docs for __register_sysctl_table() to make it clear no child entries can be passed. When the child is true these are non-leaf entries on the ctl table and sysctl treats these as directories. The point to __register_sysctl_table() is to deal only with directories not part of the ctl table where thay may riside, to be simple and avoid recursion. While at it, hint towards using long on extra1 and extra2 later. Cc: stable@vger.kernel.org # v5.17 Cc: Christian Brauner Cc: Kefeng Wang Signed-off-by: Luis Chamberlain --- fs/proc/proc_sysctl.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5851eb5bc726..1df0beb50dbe 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1287,7 +1287,7 @@ out: * __register_sysctl_table - register a leaf sysctl table * @set: Sysctl tree to register on * @path: The path to the directory the sysctl table is in. - * @table: the top-level table structure + * @table: the top-level table structure without any child * * Register a sysctl table hierarchy. @table should be a filled in ctl_table * array. A completely 0 filled entry terminates the table. @@ -1308,9 +1308,12 @@ out: * proc_handler - the text handler routine (described below) * * extra1, extra2 - extra pointers usable by the proc handler routines + * XXX: we should eventually modify these to use long min / max [0] + * [0] https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org * * Leaf nodes in the sysctl tree will be represented by a single file - * under /proc; non-leaf nodes will be represented by directories. + * under /proc; non-leaf nodes (where child is not NULL) are not allowed, + * sysctl_check_table() verifies this. * * There must be a proc_handler routine for any terminal nodes. * Several default handlers are available to cover common cases - @@ -1352,7 +1355,7 @@ struct ctl_table_header *__register_sysctl_table( spin_lock(&sysctl_lock); dir = &set->dir; - /* Reference moved down the diretory tree get_subdir */ + /* Reference moved down the directory tree get_subdir */ dir->header.nreg++; spin_unlock(&sysctl_lock); @@ -1369,6 +1372,11 @@ struct ctl_table_header *__register_sysctl_table( if (namelen == 0) continue; + /* + * namelen ensures if name is "foo/bar/yay" only foo is + * registered first. We traverse as if using mkdir -p and + * return a ctl_dir for the last directory entry. + */ dir = get_subdir(dir, name, namelen); if (IS_ERR(dir)) goto fail; From b2f56e5574eac0cf9e3dc382bef010298cb1f1e9 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:28:17 -0800 Subject: [PATCH 04/29] proc_sysctl: move helper which creates required subdirectories Move the code which creates the subdirectories for a ctl table into a helper routine so to make it easier to review. Document the goal. This creates no functional changes. Reviewed-by: John Johansen Signed-off-by: Luis Chamberlain --- fs/proc/proc_sysctl.c | 56 ++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 1df0beb50dbe..6b9b2694d430 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1283,6 +1283,35 @@ out: return err; } +/* Find the directory for the ctl_table. If one is not found create it. */ +static struct ctl_dir *sysctl_mkdir_p(struct ctl_dir *dir, const char *path) +{ + const char *name, *nextname; + + for (name = path; name; name = nextname) { + int namelen; + nextname = strchr(name, '/'); + if (nextname) { + namelen = nextname - name; + nextname++; + } else { + namelen = strlen(name); + } + if (namelen == 0) + continue; + + /* + * namelen ensures if name is "foo/bar/yay" only foo is + * registered first. We traverse as if using mkdir -p and + * return a ctl_dir for the last directory entry. + */ + dir = get_subdir(dir, name, namelen); + if (IS_ERR(dir)) + break; + } + return dir; +} + /** * __register_sysctl_table - register a leaf sysctl table * @set: Sysctl tree to register on @@ -1334,7 +1363,6 @@ struct ctl_table_header *__register_sysctl_table( { struct ctl_table_root *root = set->dir.header.root; struct ctl_table_header *header; - const char *name, *nextname; struct ctl_dir *dir; struct ctl_table *entry; struct ctl_node *node; @@ -1359,29 +1387,9 @@ struct ctl_table_header *__register_sysctl_table( dir->header.nreg++; spin_unlock(&sysctl_lock); - /* Find the directory for the ctl_table */ - for (name = path; name; name = nextname) { - int namelen; - nextname = strchr(name, '/'); - if (nextname) { - namelen = nextname - name; - nextname++; - } else { - namelen = strlen(name); - } - if (namelen == 0) - continue; - - /* - * namelen ensures if name is "foo/bar/yay" only foo is - * registered first. We traverse as if using mkdir -p and - * return a ctl_dir for the last directory entry. - */ - dir = get_subdir(dir, name, namelen); - if (IS_ERR(dir)) - goto fail; - } - + dir = sysctl_mkdir_p(dir, path); + if (IS_ERR(dir)) + goto fail; spin_lock(&sysctl_lock); if (insert_header(dir, header)) goto fail_put_dir_locked; From 228b09de936395ddd740df3522ea35ae934830d8 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:28:18 -0800 Subject: [PATCH 05/29] sysctl: clarify register_sysctl_init() base directory order Relatively new docs which I added which hinted the base directories needed to be created before is wrong, remove that incorrect comment. This has been hinted before by Eric twice already [0] [1], I had just not verified that until now. Now that I've verified that updates the docs to relax the context described. [0] https://lkml.kernel.org/r/875ys0azt8.fsf@email.froward.int.ebiederm.org [1] https://lkml.kernel.org/r/87ftbiud6s.fsf@x220.int.ebiederm.org Cc: stable@vger.kernel.org # v5.17 Cc: Christian Brauner Cc: Kefeng Wang Suggested-by: Eric W. Biederman Signed-off-by: Luis Chamberlain --- fs/proc/proc_sysctl.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 6b9b2694d430..15d5e02f1ec0 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1440,10 +1440,7 @@ EXPORT_SYMBOL(register_sysctl); * register_sysctl() failing on init are extremely low, and so for both reasons * this function does not return any error as it is used by initialization code. * - * Context: Can only be called after your respective sysctl base path has been - * registered. So for instance, most base directories are registered early on - * init before init levels are processed through proc_sys_init() and - * sysctl_init_bases(). + * Context: if your base directory does not exist it will be created for you. */ void __init __register_sysctl_init(const char *path, struct ctl_table *table, const char *table_name) From 96200952abeb35c4407851bfcdcbc144cc0d027d Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:28:19 -0800 Subject: [PATCH 06/29] apparmor: simplify sysctls with register_sysctl_init() Using register_sysctl_paths() is really only needed if you have subdirectories with entries. We can use the simple register_sysctl() instead. Acked-by: John Johansen Reviewed-by: Georgia Garcia Signed-off-by: Luis Chamberlain --- security/apparmor/lsm.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index d6cc4812ca53..47c7ec7e5a80 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1764,11 +1764,6 @@ static int apparmor_dointvec(struct ctl_table *table, int write, return proc_dointvec(table, write, buffer, lenp, ppos); } -static struct ctl_path apparmor_sysctl_path[] = { - { .procname = "kernel", }, - { } -}; - static struct ctl_table apparmor_sysctl_table[] = { { .procname = "unprivileged_userns_apparmor_policy", @@ -1790,8 +1785,7 @@ static struct ctl_table apparmor_sysctl_table[] = { static int __init apparmor_init_sysctl(void) { - return register_sysctl_paths(apparmor_sysctl_path, - apparmor_sysctl_table) ? 0 : -ENOMEM; + return register_sysctl("kernel", apparmor_sysctl_table) ? 0 : -ENOMEM; } #else static inline int apparmor_init_sysctl(void) From 5df5bdc3c4733a47a818576c13a7bfd0f0715124 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:28:20 -0800 Subject: [PATCH 07/29] loadpin: simplify sysctls use with register_sysctl() register_sysctl_paths() is not required, we can just use register_sysctl() with the required path specified. Reviewed-by: John Johansen Acked-by: Kees Cook Signed-off-by: Luis Chamberlain --- security/loadpin/loadpin.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index d73a281adf86..c971464b4ad5 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -52,12 +52,6 @@ static bool deny_reading_verity_digests; #endif #ifdef CONFIG_SYSCTL -static struct ctl_path loadpin_sysctl_path[] = { - { .procname = "kernel", }, - { .procname = "loadpin", }, - { } -}; - static struct ctl_table loadpin_sysctl_table[] = { { .procname = "enforce", @@ -262,7 +256,7 @@ static int __init loadpin_init(void) enforce ? "" : "not "); parse_exclude(); #ifdef CONFIG_SYSCTL - if (!register_sysctl_paths(loadpin_sysctl_path, loadpin_sysctl_table)) + if (!register_sysctl("kernel/loadpin", loadpin_sysctl_table)) pr_notice("sysctl registration failed!\n"); #endif security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks), "loadpin"); From 98cfeb8d540aa009cd5cb973def265b6c44afa00 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:28:21 -0800 Subject: [PATCH 08/29] yama: simplfy sysctls with register_sysctl() register_sysctl_paths() is only need if you have directories with entries, simplify this by using register_sysctl(). Acked-by: Kees Cook Signed-off-by: Luis Chamberlain --- security/yama/yama_lsm.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 06e226166aab..90dd012b0db5 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -447,12 +447,6 @@ static int yama_dointvec_minmax(struct ctl_table *table, int write, static int max_scope = YAMA_SCOPE_NO_ATTACH; -static struct ctl_path yama_sysctl_path[] = { - { .procname = "kernel", }, - { .procname = "yama", }, - { } -}; - static struct ctl_table yama_sysctl_table[] = { { .procname = "ptrace_scope", @@ -467,7 +461,7 @@ static struct ctl_table yama_sysctl_table[] = { }; static void __init yama_init_sysctl(void) { - if (!register_sysctl_paths(yama_sysctl_path, yama_sysctl_table)) + if (!register_sysctl("kernel/yama", yama_sysctl_table)) panic("Yama: sysctl registration failed.\n"); } #else From 02a6b455fb35d29620ceaa0ed053ae9846f875ca Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:28:22 -0800 Subject: [PATCH 09/29] seccomp: simplify sysctls with register_sysctl_init() register_sysctl_paths() is only needed if you have childs (directories) with entries. Just use register_sysctl_init() as it also does the kmemleak check for you. Acked-by: Kees Cook Signed-off-by: Luis Chamberlain --- kernel/seccomp.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index cebf26445f9e..d3e584065c7f 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -2368,12 +2368,6 @@ static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write, return ret; } -static struct ctl_path seccomp_sysctl_path[] = { - { .procname = "kernel", }, - { .procname = "seccomp", }, - { } -}; - static struct ctl_table seccomp_sysctl_table[] = { { .procname = "actions_avail", @@ -2392,14 +2386,7 @@ static struct ctl_table seccomp_sysctl_table[] = { static int __init seccomp_sysctl_init(void) { - struct ctl_table_header *hdr; - - hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table); - if (!hdr) - pr_warn("sysctl registration failed\n"); - else - kmemleak_not_leak(hdr); - + register_sysctl_init("kernel/seccomp", seccomp_sysctl_table); return 0; } From adf11ea8725bd7874b4aec6990c5807abcd5a00d Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:28:25 -0800 Subject: [PATCH 10/29] csky: simplify alignment sysctl registration Using register_sysctl_paths() is only required if we are using leafs with entries but all we are doing is creates leafs with just one leaf and then entries and register_sysctl_init() works well with that already. The 555 permission is already retained by the new_dir() proc sysctl directory creator. Signed-off-by: Luis Chamberlain --- arch/csky/abiv1/alignment.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c index 2df115d0e210..b60259daed1b 100644 --- a/arch/csky/abiv1/alignment.c +++ b/arch/csky/abiv1/alignment.c @@ -332,22 +332,9 @@ static struct ctl_table alignment_tbl[5] = { {} }; -static struct ctl_table sysctl_table[2] = { - { - .procname = "csky_alignment", - .mode = 0555, - .child = alignment_tbl}, - {} -}; - -static struct ctl_path sysctl_path[2] = { - {.procname = "csky"}, - {} -}; - static int __init csky_alignment_init(void) { - register_sysctl_paths(sysctl_path, sysctl_table); + register_sysctl_init("csky/csky_alignment", alignment_tbl); return 0; } From ca674057f36bcc42eea3832b8de5d0582615a472 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:46:06 -0800 Subject: [PATCH 11/29] scsi: simplify sysctl registration with register_sysctl() register_sysctl_table() is a deprecated compatibility wrapper. register_sysctl() can do the directory creation for you so just use that. Signed-off-by: Luis Chamberlain --- drivers/scsi/scsi_sysctl.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/scsi/scsi_sysctl.c b/drivers/scsi/scsi_sysctl.c index 7259704a7f52..7f0914ea168f 100644 --- a/drivers/scsi/scsi_sysctl.c +++ b/drivers/scsi/scsi_sysctl.c @@ -21,25 +21,11 @@ static struct ctl_table scsi_table[] = { { } }; -static struct ctl_table scsi_dir_table[] = { - { .procname = "scsi", - .mode = 0555, - .child = scsi_table }, - { } -}; - -static struct ctl_table scsi_root_table[] = { - { .procname = "dev", - .mode = 0555, - .child = scsi_dir_table }, - { } -}; - static struct ctl_table_header *scsi_table_header; int __init scsi_init_sysctl(void) { - scsi_table_header = register_sysctl_table(scsi_root_table); + scsi_table_header = register_sysctl("dev/scsi", scsi_table); if (!scsi_table_header) return -ENOMEM; return 0; From 525f23fe58b59b3a78a7044916aed9fe26918296 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:46:08 -0800 Subject: [PATCH 12/29] hv: simplify sysctl registration register_sysctl_table() is a deprecated compatibility wrapper. register_sysctl() can do the directory creation for you so just use that. Signed-off-by: Luis Chamberlain Reviewed-by: Michael Kelley Reviewed-by: Wei Liu --- drivers/hv/vmbus_drv.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index d24dd65b33d4..229353f1e9c2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1460,15 +1460,6 @@ static struct ctl_table hv_ctl_table[] = { {} }; -static struct ctl_table hv_root_table[] = { - { - .procname = "kernel", - .mode = 0555, - .child = hv_ctl_table - }, - {} -}; - /* * vmbus_bus_init -Main vmbus driver initialization routine. * @@ -1547,7 +1538,7 @@ static int vmbus_bus_init(void) * message recording won't be available in isolated * guests should the following registration fail. */ - hv_ctl_table_hdr = register_sysctl_table(hv_root_table); + hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table); if (!hv_ctl_table_hdr) pr_err("Hyper-V: sysctl table register error"); From 9adcf9d3d5c9ed6bb9d493b59594501f79ae3bed Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:46:09 -0800 Subject: [PATCH 13/29] md: simplify sysctl registration register_sysctl_table() is a deprecated compatibility wrapper. register_sysctl() can do the directory creation for you so just use that. Signed-off-by: Luis Chamberlain Acked-by: Song Liu --- drivers/md/md.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 39e49e5d7182..0dc01f834b62 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -322,26 +322,6 @@ static struct ctl_table raid_table[] = { { } }; -static struct ctl_table raid_dir_table[] = { - { - .procname = "raid", - .maxlen = 0, - .mode = S_IRUGO|S_IXUGO, - .child = raid_table, - }, - { } -}; - -static struct ctl_table raid_root_table[] = { - { - .procname = "dev", - .maxlen = 0, - .mode = 0555, - .child = raid_dir_table, - }, - { } -}; - static int start_readonly; /* @@ -9649,7 +9629,7 @@ static int __init md_init(void) mdp_major = ret; register_reboot_notifier(&md_notifier); - raid_table_header = register_sysctl_table(raid_root_table); + raid_table_header = register_sysctl("dev/raid", raid_table); md_geninit(); return 0; From 9f17a75b2d10ddd786b3b2c4fd732356de4f483e Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 2 Mar 2023 12:46:12 -0800 Subject: [PATCH 14/29] xen: simplify sysctl registration for balloon register_sysctl_table() is a deprecated compatibility wrapper. register_sysctl_init() can do the directory creation for you so just use that. Signed-off-by: Luis Chamberlain Reviewed-by: Juergen Gross --- drivers/xen/balloon.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 617a7f4f07a8..586a1673459e 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -97,24 +97,6 @@ static struct ctl_table balloon_table[] = { { } }; -static struct ctl_table balloon_root[] = { - { - .procname = "balloon", - .mode = 0555, - .child = balloon_table, - }, - { } -}; - -static struct ctl_table xen_root[] = { - { - .procname = "xen", - .mode = 0555, - .child = balloon_root, - }, - { } -}; - #else #define xen_hotplug_unpopulated 0 #endif @@ -747,7 +729,7 @@ static int __init balloon_init(void) #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); - register_sysctl_table(xen_root); + register_sysctl_init("xen/balloon", balloon_table); #endif balloon_add_regions(); From 1dc8689e4cc651e21566e10206a84c4006e81fb1 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 13:00:16 -0800 Subject: [PATCH 15/29] proc_sysctl: enhance documentation Expand documentation to clarify: o that paths don't need to exist for the new API callers o clarify that we *require* callers to keep the memory of the table around during the lifetime of the sysctls o annotate routines we are trying to deprecate and later remove Cc: stable@vger.kernel.org # v5.17 Cc: Christian Brauner Cc: Kefeng Wang Signed-off-by: Luis Chamberlain --- fs/proc/proc_sysctl.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 15d5e02f1ec0..7ad07435828f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1316,7 +1316,10 @@ static struct ctl_dir *sysctl_mkdir_p(struct ctl_dir *dir, const char *path) * __register_sysctl_table - register a leaf sysctl table * @set: Sysctl tree to register on * @path: The path to the directory the sysctl table is in. - * @table: the top-level table structure without any child + * @table: the top-level table structure without any child. This table + * should not be free'd after registration. So it should not be + * used on stack. It can either be a global or dynamically allocated + * by the caller and free'd later after sysctl unregistration. * * Register a sysctl table hierarchy. @table should be a filled in ctl_table * array. A completely 0 filled entry terminates the table. @@ -1410,8 +1413,15 @@ fail: /** * register_sysctl - register a sysctl table - * @path: The path to the directory the sysctl table is in. - * @table: the table structure + * @path: The path to the directory the sysctl table is in. If the path + * doesn't exist we will create it for you. + * @table: the table structure. The calller must ensure the life of the @table + * will be kept during the lifetime use of the syctl. It must not be freed + * until unregister_sysctl_table() is called with the given returned table + * with this registration. If your code is non modular then you don't need + * to call unregister_sysctl_table() and can instead use something like + * register_sysctl_init() which does not care for the result of the syctl + * registration. * * Register a sysctl table. @table should be a filled in ctl_table * array. A completely 0 filled entry terminates the table. @@ -1427,8 +1437,11 @@ EXPORT_SYMBOL(register_sysctl); /** * __register_sysctl_init() - register sysctl table to path - * @path: path name for sysctl base - * @table: This is the sysctl table that needs to be registered to the path + * @path: path name for sysctl base. If that path doesn't exist we will create + * it for you. + * @table: This is the sysctl table that needs to be registered to the path. + * The caller must ensure the life of the @table will be kept during the + * lifetime use of the sysctl. * @table_name: The name of sysctl table, only used for log printing when * registration fails * @@ -1570,6 +1583,7 @@ out: * * Register a sysctl table hierarchy. @table should be a filled in ctl_table * array. A completely 0 filled entry terminates the table. + * We are slowly deprecating this call so avoid its use. * * See __register_sysctl_table for more details. */ @@ -1641,6 +1655,7 @@ err_register_leaves: * * Register a sysctl table hierarchy. @table should be a filled in ctl_table * array. A completely 0 filled entry terminates the table. + * We are slowly deprecating this caller so avoid future uses of it. * * See __register_sysctl_paths for more details. */ From 37b768ce3d23f79cef906aaf7427dd345b57f477 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 00:37:19 -0800 Subject: [PATCH 16/29] lockd: simplify two-level sysctl registration for nlm_sysctls There is no need to declare two tables to just create directories, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Reviewed-by: Jeff Layton Signed-off-by: Luis Chamberlain --- fs/lockd/svc.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 9a47303b2cba..bb94949bc223 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -510,24 +510,6 @@ static struct ctl_table nlm_sysctls[] = { { } }; -static struct ctl_table nlm_sysctl_dir[] = { - { - .procname = "nfs", - .mode = 0555, - .child = nlm_sysctls, - }, - { } -}; - -static struct ctl_table nlm_sysctl_root[] = { - { - .procname = "fs", - .mode = 0555, - .child = nlm_sysctl_dir, - }, - { } -}; - #endif /* CONFIG_SYSCTL */ /* @@ -644,7 +626,7 @@ static int __init init_nlm(void) #ifdef CONFIG_SYSCTL err = -ENOMEM; - nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); + nlm_sysctl_table = register_sysctl("fs/nfs", nlm_sysctls); if (nlm_sysctl_table == NULL) goto err_sysctl; #endif From d2235a705b48a1a86e2b8ea15091d83f7f80503c Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 00:39:18 -0800 Subject: [PATCH 17/29] nfs: simplify two-level sysctl registration for nfs4_cb_sysctls There is no need to declare two tables to just create directories, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Reviewed-by: Jeff Layton Signed-off-by: Luis Chamberlain --- fs/nfs/nfs4sysctl.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index c394e4447100..e776200e9a11 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c @@ -37,27 +37,10 @@ static struct ctl_table nfs4_cb_sysctls[] = { { } }; -static struct ctl_table nfs4_cb_sysctl_dir[] = { - { - .procname = "nfs", - .mode = 0555, - .child = nfs4_cb_sysctls, - }, - { } -}; - -static struct ctl_table nfs4_cb_sysctl_root[] = { - { - .procname = "fs", - .mode = 0555, - .child = nfs4_cb_sysctl_dir, - }, - { } -}; - int nfs4_register_sysctl(void) { - nfs4_callback_sysctl_table = register_sysctl_table(nfs4_cb_sysctl_root); + nfs4_callback_sysctl_table = register_sysctl("fs/nfs", + nfs4_cb_sysctls); if (nfs4_callback_sysctl_table == NULL) return -ENOMEM; return 0; From a2189b77676b256b6606612fd68ed493f7080929 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 00:40:35 -0800 Subject: [PATCH 18/29] nfs: simplify two-level sysctl registration for nfs_cb_sysctls There is no need to declare two tables to just create directories, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Reviewed-by: Jeff Layton Signed-off-by: Luis Chamberlain --- fs/nfs/sysctl.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 7aea195ddb35..f39e2089bc4c 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -32,27 +32,9 @@ static struct ctl_table nfs_cb_sysctls[] = { { } }; -static struct ctl_table nfs_cb_sysctl_dir[] = { - { - .procname = "nfs", - .mode = 0555, - .child = nfs_cb_sysctls, - }, - { } -}; - -static struct ctl_table nfs_cb_sysctl_root[] = { - { - .procname = "fs", - .mode = 0555, - .child = nfs_cb_sysctl_dir, - }, - { } -}; - int nfs_register_sysctl(void) { - nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root); + nfs_callback_sysctl_table = register_sysctl("fs/nfs", nfs_cb_sysctls); if (nfs_callback_sysctl_table == NULL) return -ENOMEM; return 0; From f5d2b92c85d420d0020163b9ad153962d8f9fcc7 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 00:42:26 -0800 Subject: [PATCH 19/29] xfs: simplify two-level sysctl registration for xfs_table There is no need to declare two tables to just create directories, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Reviewed-by: Dave Chinner Signed-off-by: Luis Chamberlain --- fs/xfs/xfs_sysctl.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 546a6cd96729..fade33735393 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -210,28 +210,10 @@ static struct ctl_table xfs_table[] = { {} }; -static struct ctl_table xfs_dir_table[] = { - { - .procname = "xfs", - .mode = 0555, - .child = xfs_table - }, - {} -}; - -static struct ctl_table xfs_root_table[] = { - { - .procname = "fs", - .mode = 0555, - .child = xfs_dir_table - }, - {} -}; - int xfs_sysctl_register(void) { - xfs_table_header = register_sysctl_table(xfs_root_table); + xfs_table_header = register_sysctl("fs/xfs", xfs_table); if (!xfs_table_header) return -ENOMEM; return 0; From 3d379b8d0de0492e86b93a1b8cd07403825fe23d Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 13:46:23 -0800 Subject: [PATCH 20/29] fs/cachefiles: simplify one-level sysctl registration for cachefiles_sysctls There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain --- fs/cachefiles/error_inject.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/cachefiles/error_inject.c b/fs/cachefiles/error_inject.c index 58f8aec964e4..18de8a876b02 100644 --- a/fs/cachefiles/error_inject.c +++ b/fs/cachefiles/error_inject.c @@ -22,18 +22,9 @@ static struct ctl_table cachefiles_sysctls[] = { {} }; -static struct ctl_table cachefiles_sysctls_root[] = { - { - .procname = "cachefiles", - .mode = 0555, - .child = cachefiles_sysctls, - }, - {} -}; - int __init cachefiles_register_error_injection(void) { - cachefiles_sysctl = register_sysctl_table(cachefiles_sysctls_root); + cachefiles_sysctl = register_sysctl("cachefiles", cachefiles_sysctls); if (!cachefiles_sysctl) return -ENOMEM; return 0; From 1119aaa823e6cc35f2e8c8793cfa58d923a15c1b Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 13:48:17 -0800 Subject: [PATCH 21/29] coda: simplify one-level sysctl registration for coda_table There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Acked-by: Jan Harkes Signed-off-by: Luis Chamberlain --- fs/coda/sysctl.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index fda3b702b1c5..a247c14aaab7 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c @@ -39,19 +39,10 @@ static struct ctl_table coda_table[] = { {} }; -static struct ctl_table fs_table[] = { - { - .procname = "coda", - .mode = 0555, - .child = coda_table - }, - {} -}; - void coda_sysctl_init(void) { if ( !fs_table_header ) - fs_table_header = register_sysctl_table(fs_table); + fs_table_header = register_sysctl("coda", coda_table); } void coda_sysctl_clean(void) From 02148ff371b276d664780de6d1f52d52074c3e25 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 13:49:40 -0800 Subject: [PATCH 22/29] ntfs: simplfy one-level sysctl registration for ntfs_sysctls There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain --- fs/ntfs/sysctl.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c index a030d00af90c..174fe536a1c0 100644 --- a/fs/ntfs/sysctl.c +++ b/fs/ntfs/sysctl.c @@ -31,16 +31,6 @@ static struct ctl_table ntfs_sysctls[] = { {} }; -/* Define the parent directory /proc/sys/fs. */ -static struct ctl_table sysctls_root[] = { - { - .procname = "fs", - .mode = 0555, - .child = ntfs_sysctls - }, - {} -}; - /* Storage for the sysctls header. */ static struct ctl_table_header *sysctls_root_table; @@ -54,7 +44,7 @@ int ntfs_sysctl(int add) { if (add) { BUG_ON(sysctls_root_table); - sysctls_root_table = register_sysctl_table(sysctls_root); + sysctls_root_table = register_sysctl("fs", ntfs_sysctls); if (!sysctls_root_table) return -ENOMEM; } else { From 3d51cd8ea3c832c50c22e46354ac89e0964ea2d8 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 01:24:10 -0800 Subject: [PATCH 23/29] utsname: simplify one-level sysctl registration for uts_kern_table There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Reviewed-by: Christian Brauner Signed-off-by: Luis Chamberlain --- kernel/utsname_sysctl.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index f50398cb790d..019e3a1566cf 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -123,15 +123,6 @@ static struct ctl_table uts_kern_table[] = { {} }; -static struct ctl_table uts_root_table[] = { - { - .procname = "kernel", - .mode = 0555, - .child = uts_kern_table, - }, - {} -}; - #ifdef CONFIG_PROC_SYSCTL /* * Notify userspace about a change in a certain entry of uts_kern_table, @@ -147,7 +138,7 @@ void uts_proc_notify(enum uts_proc proc) static int __init utsname_sysctl_init(void) { - register_sysctl_table(uts_root_table); + register_sysctl("kernel", uts_kern_table); return 0; } From 03860ef038e6435c5ea2bda24dc5698e0da60ebc Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 01:10:11 -0800 Subject: [PATCH 24/29] ia64: simplify one-level sysctl registration for kdump_ctl_table There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain --- arch/ia64/kernel/crash.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 76730f34685c..88b3ce3e66cd 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -234,15 +234,6 @@ static struct ctl_table kdump_ctl_table[] = { }, { } }; - -static struct ctl_table sys_table[] = { - { - .procname = "kernel", - .mode = 0555, - .child = kdump_ctl_table, - }, - { } -}; #endif static int @@ -257,7 +248,7 @@ machine_crash_setup(void) if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) return ret; #ifdef CONFIG_SYSCTL - register_sysctl_table(sys_table); + register_sysctl("kernel", kdump_ctl_table); #endif return 0; } From ca14ccf310ee9d575c3bacbf1d61d9640d0025b6 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 00:50:50 -0800 Subject: [PATCH 25/29] arm: simplify two-level sysctl registration for ctl_isa_vars There is no need to declare two tables to just create directories, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain --- arch/arm/kernel/isa.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/arch/arm/kernel/isa.c b/arch/arm/kernel/isa.c index d8a509c5d5bd..20218876bef2 100644 --- a/arch/arm/kernel/isa.c +++ b/arch/arm/kernel/isa.c @@ -40,27 +40,11 @@ static struct ctl_table ctl_isa_vars[4] = { static struct ctl_table_header *isa_sysctl_header; -static struct ctl_table ctl_isa[2] = { - { - .procname = "isa", - .mode = 0555, - .child = ctl_isa_vars, - }, {} -}; - -static struct ctl_table ctl_bus[2] = { - { - .procname = "bus", - .mode = 0555, - .child = ctl_isa, - }, {} -}; - void __init register_isa_ports(unsigned int membase, unsigned int portbase, unsigned int portshift) { isa_membase = membase; isa_portbase = portbase; isa_portshift = portshift; - isa_sysctl_header = register_sysctl_table(ctl_bus); + isa_sysctl_header = register_sysctl("bus/isa", ctl_isa_vars); } From 8cbc82f3ec0d58961cf9c1e5d99e56741f4bf134 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 20 Mar 2023 15:40:10 +0800 Subject: [PATCH 26/29] mm: memory-failure: Move memory failure sysctls to its own file The sysctl_memory_failure_early_kill and memory_failure_recovery are only used in memory-failure.c, move them to its own file. Acked-by: Naoya Horiguchi Signed-off-by: Kefeng Wang [mcgrof: fix by adding empty ctl entry, this caused a crash] Signed-off-by: Luis Chamberlain --- include/linux/mm.h | 2 -- kernel/sysctl.c | 20 -------------------- mm/memory-failure.c | 36 ++++++++++++++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 1f79667824eb..98da268b834a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3442,8 +3442,6 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); -extern int sysctl_memory_failure_early_kill; -extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ce0297acf97c..0a8a3c9c82e4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2350,26 +2350,6 @@ static struct ctl_table vm_table[] = { .proc_handler = proc_dointvec, .extra1 = SYSCTL_ZERO, }, -#endif -#ifdef CONFIG_MEMORY_FAILURE - { - .procname = "memory_failure_early_kill", - .data = &sysctl_memory_failure_early_kill, - .maxlen = sizeof(sysctl_memory_failure_early_kill), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "memory_failure_recovery", - .data = &sysctl_memory_failure_recovery, - .maxlen = sizeof(sysctl_memory_failure_recovery), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #endif { .procname = "user_reserve_kbytes", diff --git a/mm/memory-failure.c b/mm/memory-failure.c index fae9baf3be16..10e60b6b2447 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -62,13 +62,14 @@ #include #include #include +#include #include "swap.h" #include "internal.h" #include "ras/ras_event.h" -int sysctl_memory_failure_early_kill __read_mostly = 0; +static int sysctl_memory_failure_early_kill __read_mostly; -int sysctl_memory_failure_recovery __read_mostly = 1; +static int sysctl_memory_failure_recovery __read_mostly = 1; atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); @@ -122,6 +123,37 @@ const struct attribute_group memory_failure_attr_group = { .attrs = memory_failure_attr, }; +#ifdef CONFIG_SYSCTL +static struct ctl_table memory_failure_table[] = { + { + .procname = "memory_failure_early_kill", + .data = &sysctl_memory_failure_early_kill, + .maxlen = sizeof(sysctl_memory_failure_early_kill), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "memory_failure_recovery", + .data = &sysctl_memory_failure_recovery, + .maxlen = sizeof(sysctl_memory_failure_recovery), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static int __init memory_failure_sysctl_init(void) +{ + register_sysctl_init("vm", memory_failure_table); + return 0; +} +late_initcall(memory_failure_sysctl_init); +#endif /* CONFIG_SYSCTL */ + /* * Return values: * 1: the page is dissolved (if needed) and taken off from buddy, From 48fe8ab8d5a39c7bc49cb41d0ad92c75f48a9550 Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Tue, 28 Mar 2023 14:46:28 +0800 Subject: [PATCH 27/29] mm: compaction: move compaction sysctl to its own file This moves all compaction sysctls to its own file. Move sysctl to where the functionality truly belongs to improve readability, reduce merge conflicts, and facilitate maintenance. I use x86_defconfig and linux-next-20230327 branch $ make defconfig;make all -jn CONFIG_COMPACTION=y add/remove: 1/0 grow/shrink: 1/1 up/down: 350/-256 (94) Function old new delta vm_compaction - 320 +320 kcompactd_init 180 210 +30 vm_table 2112 1856 -256 Total: Before=21119987, After=21120081, chg +0.00% Despite the addition of 94 bytes the patch still seems a worthwile cleanup. Link: https://lore.kernel.org/lkml/067f7347-ba10-5405-920c-0f5f985c84f4@suse.cz/ Signed-off-by: Minghao Chi Acked-by: Vlastimil Babka Signed-off-by: Luis Chamberlain --- include/linux/compaction.h | 7 ---- kernel/sysctl.c | 59 -------------------------- mm/compaction.c | 84 ++++++++++++++++++++++++++++++++------ 3 files changed, 72 insertions(+), 78 deletions(-) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 52a9ff65faee..a6e512cfb670 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -81,13 +81,6 @@ static inline unsigned long compact_gap(unsigned int order) } #ifdef CONFIG_COMPACTION -extern unsigned int sysctl_compaction_proactiveness; -extern int sysctl_compaction_handler(struct ctl_table *table, int write, - void *buffer, size_t *length, loff_t *ppos); -extern int compaction_proactiveness_sysctl_handler(struct ctl_table *table, - int write, void *buffer, size_t *length, loff_t *ppos); -extern int sysctl_extfrag_threshold; -extern int sysctl_compact_unevictable_allowed; extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order); extern int fragmentation_index(struct zone *zone, unsigned int order); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0a8a3c9c82e4..bfe53e835524 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -746,27 +745,6 @@ int proc_dointvec(struct ctl_table *table, int write, void *buffer, return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL); } -#ifdef CONFIG_COMPACTION -static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table, - int write, void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret, old; - - if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write) - return proc_dointvec_minmax(table, write, buffer, lenp, ppos); - - old = *(int *)table->data; - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (ret) - return ret; - if (old != *(int *)table->data) - pr_warn_once("sysctl attribute %s changed by %s[%d]\n", - table->procname, current->comm, - task_pid_nr(current)); - return ret; -} -#endif - /** * proc_douintvec - read a vector of unsigned integers * @table: the sysctl table @@ -2157,43 +2135,6 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_FOUR, }, -#ifdef CONFIG_COMPACTION - { - .procname = "compact_memory", - .data = NULL, - .maxlen = sizeof(int), - .mode = 0200, - .proc_handler = sysctl_compaction_handler, - }, - { - .procname = "compaction_proactiveness", - .data = &sysctl_compaction_proactiveness, - .maxlen = sizeof(sysctl_compaction_proactiveness), - .mode = 0644, - .proc_handler = compaction_proactiveness_sysctl_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "extfrag_threshold", - .data = &sysctl_extfrag_threshold, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_THOUSAND, - }, - { - .procname = "compact_unevictable_allowed", - .data = &sysctl_compact_unevictable_allowed, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax_warn_RT_change, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - -#endif /* CONFIG_COMPACTION */ { .procname = "min_free_kbytes", .data = &min_free_kbytes, diff --git a/mm/compaction.c b/mm/compaction.c index 5a9501e0ae01..f2ddfb1140e2 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1716,7 +1716,14 @@ typedef enum { * Allow userspace to control policy on scanning the unevictable LRU for * compactable pages. */ -int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT; +static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT; +/* + * Tunable for proactive compaction. It determines how + * aggressively the kernel should compact memory in the + * background. It takes values in the range [0, 100]. + */ +static unsigned int __read_mostly sysctl_compaction_proactiveness = 20; +static int sysctl_extfrag_threshold = 500; static inline void update_fast_start_pfn(struct compact_control *cc, unsigned long pfn) @@ -2572,8 +2579,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order, return ret; } -int sysctl_extfrag_threshold = 500; - /** * try_to_compact_pages - Direct compact to satisfy a high-order allocation * @gfp_mask: The GFP mask of the current allocation @@ -2730,14 +2735,7 @@ static void compact_nodes(void) compact_node(nid); } -/* - * Tunable for proactive compaction. It determines how - * aggressively the kernel should compact memory in the - * background. It takes values in the range [0, 100]. - */ -unsigned int __read_mostly sysctl_compaction_proactiveness = 20; - -int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write, +static int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int rc, nid; @@ -2767,7 +2765,7 @@ int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write, * This is the entry point for compacting all nodes via * /proc/sys/vm/compact_memory */ -int sysctl_compaction_handler(struct ctl_table *table, int write, +static int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { if (write) @@ -3063,6 +3061,65 @@ static int kcompactd_cpu_online(unsigned int cpu) return 0; } +static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table, + int write, void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, old; + + if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write) + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); + + old = *(int *)table->data; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret) + return ret; + if (old != *(int *)table->data) + pr_warn_once("sysctl attribute %s changed by %s[%d]\n", + table->procname, current->comm, + task_pid_nr(current)); + return ret; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table vm_compaction[] = { + { + .procname = "compact_memory", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0200, + .proc_handler = sysctl_compaction_handler, + }, + { + .procname = "compaction_proactiveness", + .data = &sysctl_compaction_proactiveness, + .maxlen = sizeof(sysctl_compaction_proactiveness), + .mode = 0644, + .proc_handler = compaction_proactiveness_sysctl_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "extfrag_threshold", + .data = &sysctl_extfrag_threshold, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_THOUSAND, + }, + { + .procname = "compact_unevictable_allowed", + .data = &sysctl_compact_unevictable_allowed, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_warn_RT_change, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; +#endif + static int __init kcompactd_init(void) { int nid; @@ -3078,6 +3135,9 @@ static int __init kcompactd_init(void) for_each_node_state(nid, N_MEMORY) kcompactd_run(nid); +#ifdef CONFIG_SYSCTL + register_sysctl_init("vm", vm_compaction); +#endif return 0; } subsys_initcall(kcompactd_init) From b3f312c4815d7acf6c7f88f921544626c31bd24a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 29 Mar 2023 10:02:41 +0200 Subject: [PATCH 28/29] mm: compaction: remove incorrect #ifdef checks Without CONFIG_SYSCTL, the compiler warns about a few unused functions: mm/compaction.c:3076:12: error: 'proc_dointvec_minmax_warn_RT_change' defined but not used [-Werror=unused-function] mm/compaction.c:2780:12: error: 'sysctl_compaction_handler' defined but not used [-Werror=unused-function] mm/compaction.c:2750:12: error: 'compaction_proactiveness_sysctl_handler' defined but not used [-Werror=unused-function] The #ifdef is actually not necessary here, as the alternative register_sysctl_init() stub function does not use its argument, which lets the compiler drop the rest implicitly, while avoiding the warning. Fixes: c521126610c3 ("mm: compaction: move compaction sysctl to its own file") Signed-off-by: Arnd Bergmann Signed-off-by: Luis Chamberlain --- mm/compaction.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index f2ddfb1140e2..9ff71239b1fc 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -3080,7 +3080,6 @@ static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table, return ret; } -#ifdef CONFIG_SYSCTL static struct ctl_table vm_compaction[] = { { .procname = "compact_memory", @@ -3118,7 +3117,6 @@ static struct ctl_table vm_compaction[] = { }, { } }; -#endif static int __init kcompactd_init(void) { @@ -3135,9 +3133,7 @@ static int __init kcompactd_init(void) for_each_node_state(nid, N_MEMORY) kcompactd_run(nid); -#ifdef CONFIG_SYSCTL register_sysctl_init("vm", vm_compaction); -#endif return 0; } subsys_initcall(kcompactd_init) From e3184de9d46c2eebdb776face2e2662c6733331d Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 31 Mar 2023 16:45:02 +0800 Subject: [PATCH 29/29] fs: fix sysctls.c built 'obj-$(CONFIG_SYSCTL) += sysctls.o' must be moved after "obj-y :=", or it won't be built as it is overwrited. Note that there is nothing that is going to break by linking sysctl.o later, we were just being way to cautious and patches have been updated to reflect these considerations and sent for stable as well with the whole "base" stuff needing to be linked prior to child sysctl tables that use that directory. All of the kernel sysctl APIs always share the same directory, and races against using it should end up re-using the same single created directory. And so something we can do eventually is do away with all the base stuff. For now it's fine, it's not creating an issue. It is just a bit pedantic and careful. Fixes: ab171b952c6e ("fs: move namespace sysctls and declare fs base directory") Cc: stable@vger.kernel.org # v5.17 Cc: Christian Brauner Cc: Kefeng Wang Signed-off-by: Kefeng Wang [mcgrof: enhanced commit log for stable criteria and clarify base stuff ] Signed-off-by: Luis Chamberlain --- fs/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/Makefile b/fs/Makefile index 05f89b5c962f..8d4736fcc766 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -6,7 +6,6 @@ # Rewritten to use lists instead of if-statements. # -obj-$(CONFIG_SYSCTL) += sysctls.o obj-y := open.o read_write.o file_table.o super.o \ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ @@ -50,7 +49,7 @@ obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_COREDUMP) += coredump.o -obj-$(CONFIG_SYSCTL) += drop_caches.o +obj-$(CONFIG_SYSCTL) += drop_caches.o sysctls.o obj-$(CONFIG_FHANDLE) += fhandle.o obj-y += iomap/