2019-05-27 08:55:05 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2006-06-28 04:26:45 -07:00
|
|
|
/*
|
2020-11-07 23:20:52 +03:00
|
|
|
* Copyright (C) 2002 Richard Henderson
|
|
|
|
* Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM.
|
2023-03-19 14:35:38 -07:00
|
|
|
* Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
|
2020-11-07 23:20:52 +03:00
|
|
|
*/
|
2020-04-19 18:55:06 +03:00
|
|
|
|
|
|
|
#define INCLUDE_VERMAGIC
|
|
|
|
|
2011-05-23 14:51:41 -04:00
|
|
|
#include <linux/export.h>
|
2016-07-23 14:01:45 -04:00
|
|
|
#include <linux/extable.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <linux/moduleloader.h>
|
2019-07-04 15:57:34 -03:00
|
|
|
#include <linux/module_signature.h>
|
2015-04-29 14:36:05 -04:00
|
|
|
#include <linux/trace_events.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <linux/init.h>
|
2007-05-08 00:28:38 -07:00
|
|
|
#include <linux/kallsyms.h>
|
2021-07-07 18:09:20 -07:00
|
|
|
#include <linux/buildid.h>
|
2008-10-06 13:19:27 +04:00
|
|
|
#include <linux/fs.h>
|
2005-09-13 01:25:16 -07:00
|
|
|
#include <linux/kernel.h>
|
2020-10-02 10:38:15 -07:00
|
|
|
#include <linux/kernel_read_file.h>
|
2022-11-01 22:14:06 +01:00
|
|
|
#include <linux/kstrtox.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <linux/elf.h>
|
|
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <linux/syscalls.h>
|
|
|
|
#include <linux/fcntl.h>
|
|
|
|
#include <linux/rcupdate.h>
|
2006-01-11 12:17:46 -08:00
|
|
|
#include <linux/capability.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/moduleparam.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include <linux/vermagic.h>
|
|
|
|
#include <linux/notifier.h>
|
2006-10-18 01:47:25 -04:00
|
|
|
#include <linux/sched.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <linux/device.h>
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
#include <linux/string.h>
|
2006-03-23 03:00:24 -08:00
|
|
|
#include <linux/mutex.h>
|
2008-08-30 10:09:00 +02:00
|
|
|
#include <linux/rculist.h>
|
2016-12-24 11:46:01 -08:00
|
|
|
#include <linux/uaccess.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <asm/cacheflush.h>
|
2017-07-06 15:35:58 -07:00
|
|
|
#include <linux/set_memory.h>
|
2009-09-21 17:03:57 -07:00
|
|
|
#include <asm/mmu_context.h>
|
2006-06-09 21:53:55 +02:00
|
|
|
#include <linux/license.h>
|
2008-02-08 04:18:42 -08:00
|
|
|
#include <asm/sections.h>
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-18 12:16:16 -04:00
|
|
|
#include <linux/tracepoint.h>
|
2008-08-14 15:45:09 -04:00
|
|
|
#include <linux/ftrace.h>
|
2016-03-16 20:55:39 -04:00
|
|
|
#include <linux/livepatch.h>
|
2009-01-07 08:45:46 -08:00
|
|
|
#include <linux/async.h>
|
2009-02-20 16:29:08 +09:00
|
|
|
#include <linux/percpu.h>
|
2009-06-11 13:23:20 +01:00
|
|
|
#include <linux/kmemleak.h>
|
2010-09-17 11:09:00 -04:00
|
|
|
#include <linux/jump_label.h>
|
2010-11-16 22:35:16 +01:00
|
|
|
#include <linux/pfn.h>
|
2011-04-20 11:10:52 +02:00
|
|
|
#include <linux/bsearch.h>
|
2016-08-02 14:03:47 -07:00
|
|
|
#include <linux/dynamic_debug.h>
|
2017-02-04 13:10:38 -05:00
|
|
|
#include <linux/audit.h>
|
2022-09-08 14:54:47 -07:00
|
|
|
#include <linux/cfi.h>
|
2024-03-21 09:36:33 -07:00
|
|
|
#include <linux/codetag.h>
|
2023-03-28 20:03:19 -07:00
|
|
|
#include <linux/debugfs.h>
|
2024-05-05 19:06:18 +03:00
|
|
|
#include <linux/execmem.h>
|
2012-10-22 18:09:41 +10:30
|
|
|
#include <uapi/linux/module.h>
|
2022-03-22 14:03:31 +00:00
|
|
|
#include "internal.h"
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2009-08-17 16:56:28 +08:00
|
|
|
#define CREATE_TRACE_POINTS
|
|
|
|
#include <trace/events/module.h>
|
|
|
|
|
2010-06-05 11:17:36 -06:00
|
|
|
/*
|
|
|
|
* Mutex protects:
|
|
|
|
* 1) List of modules (also safely readable with preempt_disable),
|
|
|
|
* 2) module_use links,
|
2022-02-23 13:02:15 +01:00
|
|
|
* 3) mod_tree.addr_min/mod_tree.addr_max.
|
2020-11-07 23:20:52 +03:00
|
|
|
* (delete and add uses RCU list operations).
|
|
|
|
*/
|
2022-03-22 14:03:32 +00:00
|
|
|
DEFINE_MUTEX(module_mutex);
|
|
|
|
LIST_HEAD(modules);
|
2010-05-20 21:04:21 -05:00
|
|
|
|
2019-04-25 17:11:37 -07:00
|
|
|
/* Work queue for freeing init sections in success case */
|
2020-10-08 13:32:20 -04:00
|
|
|
static void do_free_init(struct work_struct *w);
|
|
|
|
static DECLARE_WORK(init_free_wq, do_free_init);
|
|
|
|
static LLIST_HEAD(init_free_list);
|
2019-04-25 17:11:37 -07:00
|
|
|
|
2022-03-22 14:03:35 +00:00
|
|
|
struct mod_tree_root mod_tree __cacheline_aligned = {
|
2015-05-27 11:09:38 +09:30
|
|
|
.addr_min = -1UL,
|
2012-09-26 10:09:40 +01:00
|
|
|
};
|
|
|
|
|
2022-03-22 14:03:44 +00:00
|
|
|
struct symsearch {
|
|
|
|
const struct kernel_symbol *start, *stop;
|
2024-12-29 00:45:28 +09:00
|
|
|
const u32 *crcs;
|
2022-03-22 14:03:44 +00:00
|
|
|
enum mod_license license;
|
|
|
|
};
|
|
|
|
|
2015-05-27 11:09:38 +09:30
|
|
|
/*
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
* Bounds of module memory, for speeding up __module_address.
|
2015-05-27 11:09:38 +09:30
|
|
|
* Protected by module_mutex.
|
|
|
|
*/
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
static void __mod_update_bounds(enum mod_mem_type type __maybe_unused, void *base,
|
|
|
|
unsigned int size, struct mod_tree_root *tree)
|
2015-05-27 11:09:38 +09:30
|
|
|
{
|
|
|
|
unsigned long min = (unsigned long)base;
|
|
|
|
unsigned long max = min + size;
|
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
|
|
|
|
if (mod_mem_type_is_core_data(type)) {
|
|
|
|
if (min < tree->data_addr_min)
|
|
|
|
tree->data_addr_min = min;
|
|
|
|
if (max > tree->data_addr_max)
|
|
|
|
tree->data_addr_max = max;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
2022-02-23 13:02:12 +01:00
|
|
|
if (min < tree->addr_min)
|
|
|
|
tree->addr_min = min;
|
|
|
|
if (max > tree->addr_max)
|
|
|
|
tree->addr_max = max;
|
2015-05-27 11:09:38 +09:30
|
|
|
}
|
|
|
|
|
|
|
|
static void mod_update_bounds(struct module *mod)
|
|
|
|
{
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
for_each_mod_mem_type(type) {
|
|
|
|
struct module_memory *mod_mem = &mod->mem[type];
|
|
|
|
|
|
|
|
if (mod_mem->size)
|
|
|
|
__mod_update_bounds(type, mod_mem->base, mod_mem->size, &mod_tree);
|
|
|
|
}
|
2015-05-27 11:09:38 +09:30
|
|
|
}
|
|
|
|
|
2009-04-14 17:27:18 +10:00
|
|
|
/* Block module loading/unloading? */
|
2022-06-13 08:02:01 +02:00
|
|
|
int modules_disabled;
|
2012-02-01 10:33:14 +08:00
|
|
|
core_param(nomodule, modules_disabled, bint, 0);
|
2009-04-14 17:27:18 +10:00
|
|
|
|
2008-01-29 17:13:18 -05:00
|
|
|
/* Waiting for a module to finish initializing? */
|
|
|
|
static DECLARE_WAIT_QUEUE_HEAD(module_wq);
|
|
|
|
|
[PATCH] Notifier chain update: API changes
The kernel's implementation of notifier chains is unsafe. There is no
protection against entries being added to or removed from a chain while the
chain is in use. The issues were discussed in this thread:
http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
We noticed that notifier chains in the kernel fall into two basic usage
classes:
"Blocking" chains are always called from a process context
and the callout routines are allowed to sleep;
"Atomic" chains can be called from an atomic context and
the callout routines are not allowed to sleep.
We decided to codify this distinction and make it part of the API. Therefore
this set of patches introduces three new, parallel APIs: one for blocking
notifiers, one for atomic notifiers, and one for "raw" notifiers (which is
really just the old API under a new name). New kinds of data structures are
used for the heads of the chains, and new routines are defined for
registration, unregistration, and calling a chain. The three APIs are
explained in include/linux/notifier.h and their implementation is in
kernel/sys.c.
With atomic and blocking chains, the implementation guarantees that the chain
links will not be corrupted and that chain callers will not get messed up by
entries being added or removed. For raw chains the implementation provides no
guarantees at all; users of this API must provide their own protections. (The
idea was that situations may come up where the assumptions of the atomic and
blocking APIs are not appropriate, so it should be possible for users to
handle these things in their own way.)
There are some limitations, which should not be too hard to live with. For
atomic/blocking chains, registration and unregistration must always be done in
a process context since the chain is protected by a mutex/rwsem. Also, a
callout routine for a non-raw chain must not try to register or unregister
entries on its own chain. (This did happen in a couple of places and the code
had to be changed to avoid it.)
Since atomic chains may be called from within an NMI handler, they cannot use
spinlocks for synchronization. Instead we use RCU. The overhead falls almost
entirely in the unregister routine, which is okay since unregistration is much
less frequent that calling a chain.
Here is the list of chains that we adjusted and their classifications. None
of them use the raw API, so for the moment it is only a placeholder.
ATOMIC CHAINS
-------------
arch/i386/kernel/traps.c: i386die_chain
arch/ia64/kernel/traps.c: ia64die_chain
arch/powerpc/kernel/traps.c: powerpc_die_chain
arch/sparc64/kernel/traps.c: sparc64die_chain
arch/x86_64/kernel/traps.c: die_chain
drivers/char/ipmi/ipmi_si_intf.c: xaction_notifier_list
kernel/panic.c: panic_notifier_list
kernel/profile.c: task_free_notifier
net/bluetooth/hci_core.c: hci_notifier
net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_chain
net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_expect_chain
net/ipv6/addrconf.c: inet6addr_chain
net/netfilter/nf_conntrack_core.c: nf_conntrack_chain
net/netfilter/nf_conntrack_core.c: nf_conntrack_expect_chain
net/netlink/af_netlink.c: netlink_chain
BLOCKING CHAINS
---------------
arch/powerpc/platforms/pseries/reconfig.c: pSeries_reconfig_chain
arch/s390/kernel/process.c: idle_chain
arch/x86_64/kernel/process.c idle_notifier
drivers/base/memory.c: memory_chain
drivers/cpufreq/cpufreq.c cpufreq_policy_notifier_list
drivers/cpufreq/cpufreq.c cpufreq_transition_notifier_list
drivers/macintosh/adb.c: adb_client_list
drivers/macintosh/via-pmu.c sleep_notifier_list
drivers/macintosh/via-pmu68k.c sleep_notifier_list
drivers/macintosh/windfarm_core.c wf_client_list
drivers/usb/core/notify.c usb_notifier_list
drivers/video/fbmem.c fb_notifier_list
kernel/cpu.c cpu_chain
kernel/module.c module_notify_list
kernel/profile.c munmap_notifier
kernel/profile.c task_exit_notifier
kernel/sys.c reboot_notifier_list
net/core/dev.c netdev_chain
net/decnet/dn_dev.c: dnaddr_chain
net/ipv4/devinet.c: inetaddr_chain
It's possible that some of these classifications are wrong. If they are,
please let us know or submit a patch to fix them. Note that any chain that
gets called very frequently should be atomic, because the rwsem read-locking
used for blocking chains is very likely to incur cache misses on SMP systems.
(However, if the chain's callout routines may sleep then the chain cannot be
atomic.)
The patch set was written by Alan Stern and Chandra Seetharaman, incorporating
material written by Keith Owens and suggestions from Paul McKenney and Andrew
Morton.
[jes@sgi.com: restructure the notifier chain initialization macros]
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-27 01:16:30 -08:00
|
|
|
static BLOCKING_NOTIFIER_HEAD(module_notify_list);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2014-11-10 09:31:29 +10:30
|
|
|
int register_module_notifier(struct notifier_block *nb)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
[PATCH] Notifier chain update: API changes
The kernel's implementation of notifier chains is unsafe. There is no
protection against entries being added to or removed from a chain while the
chain is in use. The issues were discussed in this thread:
http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
We noticed that notifier chains in the kernel fall into two basic usage
classes:
"Blocking" chains are always called from a process context
and the callout routines are allowed to sleep;
"Atomic" chains can be called from an atomic context and
the callout routines are not allowed to sleep.
We decided to codify this distinction and make it part of the API. Therefore
this set of patches introduces three new, parallel APIs: one for blocking
notifiers, one for atomic notifiers, and one for "raw" notifiers (which is
really just the old API under a new name). New kinds of data structures are
used for the heads of the chains, and new routines are defined for
registration, unregistration, and calling a chain. The three APIs are
explained in include/linux/notifier.h and their implementation is in
kernel/sys.c.
With atomic and blocking chains, the implementation guarantees that the chain
links will not be corrupted and that chain callers will not get messed up by
entries being added or removed. For raw chains the implementation provides no
guarantees at all; users of this API must provide their own protections. (The
idea was that situations may come up where the assumptions of the atomic and
blocking APIs are not appropriate, so it should be possible for users to
handle these things in their own way.)
There are some limitations, which should not be too hard to live with. For
atomic/blocking chains, registration and unregistration must always be done in
a process context since the chain is protected by a mutex/rwsem. Also, a
callout routine for a non-raw chain must not try to register or unregister
entries on its own chain. (This did happen in a couple of places and the code
had to be changed to avoid it.)
Since atomic chains may be called from within an NMI handler, they cannot use
spinlocks for synchronization. Instead we use RCU. The overhead falls almost
entirely in the unregister routine, which is okay since unregistration is much
less frequent that calling a chain.
Here is the list of chains that we adjusted and their classifications. None
of them use the raw API, so for the moment it is only a placeholder.
ATOMIC CHAINS
-------------
arch/i386/kernel/traps.c: i386die_chain
arch/ia64/kernel/traps.c: ia64die_chain
arch/powerpc/kernel/traps.c: powerpc_die_chain
arch/sparc64/kernel/traps.c: sparc64die_chain
arch/x86_64/kernel/traps.c: die_chain
drivers/char/ipmi/ipmi_si_intf.c: xaction_notifier_list
kernel/panic.c: panic_notifier_list
kernel/profile.c: task_free_notifier
net/bluetooth/hci_core.c: hci_notifier
net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_chain
net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_expect_chain
net/ipv6/addrconf.c: inet6addr_chain
net/netfilter/nf_conntrack_core.c: nf_conntrack_chain
net/netfilter/nf_conntrack_core.c: nf_conntrack_expect_chain
net/netlink/af_netlink.c: netlink_chain
BLOCKING CHAINS
---------------
arch/powerpc/platforms/pseries/reconfig.c: pSeries_reconfig_chain
arch/s390/kernel/process.c: idle_chain
arch/x86_64/kernel/process.c idle_notifier
drivers/base/memory.c: memory_chain
drivers/cpufreq/cpufreq.c cpufreq_policy_notifier_list
drivers/cpufreq/cpufreq.c cpufreq_transition_notifier_list
drivers/macintosh/adb.c: adb_client_list
drivers/macintosh/via-pmu.c sleep_notifier_list
drivers/macintosh/via-pmu68k.c sleep_notifier_list
drivers/macintosh/windfarm_core.c wf_client_list
drivers/usb/core/notify.c usb_notifier_list
drivers/video/fbmem.c fb_notifier_list
kernel/cpu.c cpu_chain
kernel/module.c module_notify_list
kernel/profile.c munmap_notifier
kernel/profile.c task_exit_notifier
kernel/sys.c reboot_notifier_list
net/core/dev.c netdev_chain
net/decnet/dn_dev.c: dnaddr_chain
net/ipv4/devinet.c: inetaddr_chain
It's possible that some of these classifications are wrong. If they are,
please let us know or submit a patch to fix them. Note that any chain that
gets called very frequently should be atomic, because the rwsem read-locking
used for blocking chains is very likely to incur cache misses on SMP systems.
(However, if the chain's callout routines may sleep then the chain cannot be
atomic.)
The patch set was written by Alan Stern and Chandra Seetharaman, incorporating
material written by Keith Owens and suggestions from Paul McKenney and Andrew
Morton.
[jes@sgi.com: restructure the notifier chain initialization macros]
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-27 01:16:30 -08:00
|
|
|
return blocking_notifier_chain_register(&module_notify_list, nb);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(register_module_notifier);
|
|
|
|
|
2014-11-10 09:31:29 +10:30
|
|
|
int unregister_module_notifier(struct notifier_block *nb)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
[PATCH] Notifier chain update: API changes
The kernel's implementation of notifier chains is unsafe. There is no
protection against entries being added to or removed from a chain while the
chain is in use. The issues were discussed in this thread:
http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
We noticed that notifier chains in the kernel fall into two basic usage
classes:
"Blocking" chains are always called from a process context
and the callout routines are allowed to sleep;
"Atomic" chains can be called from an atomic context and
the callout routines are not allowed to sleep.
We decided to codify this distinction and make it part of the API. Therefore
this set of patches introduces three new, parallel APIs: one for blocking
notifiers, one for atomic notifiers, and one for "raw" notifiers (which is
really just the old API under a new name). New kinds of data structures are
used for the heads of the chains, and new routines are defined for
registration, unregistration, and calling a chain. The three APIs are
explained in include/linux/notifier.h and their implementation is in
kernel/sys.c.
With atomic and blocking chains, the implementation guarantees that the chain
links will not be corrupted and that chain callers will not get messed up by
entries being added or removed. For raw chains the implementation provides no
guarantees at all; users of this API must provide their own protections. (The
idea was that situations may come up where the assumptions of the atomic and
blocking APIs are not appropriate, so it should be possible for users to
handle these things in their own way.)
There are some limitations, which should not be too hard to live with. For
atomic/blocking chains, registration and unregistration must always be done in
a process context since the chain is protected by a mutex/rwsem. Also, a
callout routine for a non-raw chain must not try to register or unregister
entries on its own chain. (This did happen in a couple of places and the code
had to be changed to avoid it.)
Since atomic chains may be called from within an NMI handler, they cannot use
spinlocks for synchronization. Instead we use RCU. The overhead falls almost
entirely in the unregister routine, which is okay since unregistration is much
less frequent that calling a chain.
Here is the list of chains that we adjusted and their classifications. None
of them use the raw API, so for the moment it is only a placeholder.
ATOMIC CHAINS
-------------
arch/i386/kernel/traps.c: i386die_chain
arch/ia64/kernel/traps.c: ia64die_chain
arch/powerpc/kernel/traps.c: powerpc_die_chain
arch/sparc64/kernel/traps.c: sparc64die_chain
arch/x86_64/kernel/traps.c: die_chain
drivers/char/ipmi/ipmi_si_intf.c: xaction_notifier_list
kernel/panic.c: panic_notifier_list
kernel/profile.c: task_free_notifier
net/bluetooth/hci_core.c: hci_notifier
net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_chain
net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_expect_chain
net/ipv6/addrconf.c: inet6addr_chain
net/netfilter/nf_conntrack_core.c: nf_conntrack_chain
net/netfilter/nf_conntrack_core.c: nf_conntrack_expect_chain
net/netlink/af_netlink.c: netlink_chain
BLOCKING CHAINS
---------------
arch/powerpc/platforms/pseries/reconfig.c: pSeries_reconfig_chain
arch/s390/kernel/process.c: idle_chain
arch/x86_64/kernel/process.c idle_notifier
drivers/base/memory.c: memory_chain
drivers/cpufreq/cpufreq.c cpufreq_policy_notifier_list
drivers/cpufreq/cpufreq.c cpufreq_transition_notifier_list
drivers/macintosh/adb.c: adb_client_list
drivers/macintosh/via-pmu.c sleep_notifier_list
drivers/macintosh/via-pmu68k.c sleep_notifier_list
drivers/macintosh/windfarm_core.c wf_client_list
drivers/usb/core/notify.c usb_notifier_list
drivers/video/fbmem.c fb_notifier_list
kernel/cpu.c cpu_chain
kernel/module.c module_notify_list
kernel/profile.c munmap_notifier
kernel/profile.c task_exit_notifier
kernel/sys.c reboot_notifier_list
net/core/dev.c netdev_chain
net/decnet/dn_dev.c: dnaddr_chain
net/ipv4/devinet.c: inetaddr_chain
It's possible that some of these classifications are wrong. If they are,
please let us know or submit a patch to fix them. Note that any chain that
gets called very frequently should be atomic, because the rwsem read-locking
used for blocking chains is very likely to incur cache misses on SMP systems.
(However, if the chain's callout routines may sleep then the chain cannot be
atomic.)
The patch set was written by Alan Stern and Chandra Seetharaman, incorporating
material written by Keith Owens and suggestions from Paul McKenney and Andrew
Morton.
[jes@sgi.com: restructure the notifier chain initialization macros]
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-27 01:16:30 -08:00
|
|
|
return blocking_notifier_chain_unregister(&module_notify_list, nb);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(unregister_module_notifier);
|
|
|
|
|
2016-11-16 16:45:48 +01:00
|
|
|
/*
|
|
|
|
* We require a truly strong try_module_get(): 0 means success.
|
|
|
|
* Otherwise an error is returned due to ongoing or failed
|
|
|
|
* initialization etc.
|
|
|
|
*/
|
2005-04-16 15:20:36 -07:00
|
|
|
static inline int strong_try_module_get(struct module *mod)
|
|
|
|
{
|
2013-01-12 11:38:44 +10:30
|
|
|
BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED);
|
2005-04-16 15:20:36 -07:00
|
|
|
if (mod && mod->state == MODULE_STATE_COMING)
|
2008-01-29 17:13:18 -05:00
|
|
|
return -EBUSY;
|
|
|
|
if (try_module_get(mod))
|
2005-04-16 15:20:36 -07:00
|
|
|
return 0;
|
2008-01-29 17:13:18 -05:00
|
|
|
else
|
|
|
|
return -ENOENT;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2013-01-21 17:17:39 +10:30
|
|
|
static inline void add_taint_module(struct module *mod, unsigned flag,
|
|
|
|
enum lockdep_ok lockdep_ok)
|
2006-10-11 01:21:48 -07:00
|
|
|
{
|
2013-01-21 17:17:39 +10:30
|
|
|
add_taint(flag, lockdep_ok);
|
2016-09-21 13:47:22 +02:00
|
|
|
set_bit(flag, &mod->taints);
|
2006-10-11 01:21:48 -07:00
|
|
|
}
|
|
|
|
|
2007-05-09 07:26:28 +02:00
|
|
|
/*
|
|
|
|
* A thread that wants to hold a reference to a module only while it
|
2022-02-16 12:31:09 -05:00
|
|
|
* is running can call this to safely exit.
|
2005-04-16 15:20:36 -07:00
|
|
|
*/
|
2021-12-03 11:00:19 -06:00
|
|
|
void __noreturn __module_put_and_kthread_exit(struct module *mod, long code)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
module_put(mod);
|
2021-12-03 11:00:19 -06:00
|
|
|
kthread_exit(code);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
2021-12-03 11:00:19 -06:00
|
|
|
EXPORT_SYMBOL(__module_put_and_kthread_exit);
|
2007-10-18 03:06:07 -07:00
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Find a module section: 0 means not found. */
|
2010-08-05 12:59:10 -06:00
|
|
|
static unsigned int find_sec(const struct load_info *info, const char *name)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
for (i = 1; i < info->hdr->e_shnum; i++) {
|
|
|
|
Elf_Shdr *shdr = &info->sechdrs[i];
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Alloc bit cleared means "ignore it." */
|
2010-08-05 12:59:10 -06:00
|
|
|
if ((shdr->sh_flags & SHF_ALLOC)
|
|
|
|
&& strcmp(info->secstrings + shdr->sh_name, name) == 0)
|
2005-04-16 15:20:36 -07:00
|
|
|
return i;
|
2010-08-05 12:59:10 -06:00
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-10-15 23:16:39 +00:00
|
|
|
/**
|
|
|
|
* find_any_unique_sec() - Find a unique section index by name
|
|
|
|
* @info: Load info for the module to scan
|
|
|
|
* @name: Name of the section we're looking for
|
|
|
|
*
|
|
|
|
* Locates a unique section by name. Ignores SHF_ALLOC.
|
|
|
|
*
|
|
|
|
* Return: Section index if found uniquely, zero if absent, negative count
|
|
|
|
* of total instances if multiple were found.
|
|
|
|
*/
|
|
|
|
static int find_any_unique_sec(const struct load_info *info, const char *name)
|
|
|
|
{
|
|
|
|
unsigned int idx;
|
|
|
|
unsigned int count = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 1; i < info->hdr->e_shnum; i++) {
|
|
|
|
if (strcmp(info->secstrings + info->sechdrs[i].sh_name,
|
|
|
|
name) == 0) {
|
|
|
|
count++;
|
|
|
|
idx = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (count == 1) {
|
|
|
|
return idx;
|
|
|
|
} else if (count == 0) {
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
return -count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-10-22 10:00:13 -05:00
|
|
|
/* Find a module section, or NULL. */
|
2010-08-05 12:59:10 -06:00
|
|
|
static void *section_addr(const struct load_info *info, const char *name)
|
2008-10-22 10:00:13 -05:00
|
|
|
{
|
|
|
|
/* Section 0 has sh_addr 0. */
|
2010-08-05 12:59:10 -06:00
|
|
|
return (void *)info->sechdrs[find_sec(info, name)].sh_addr;
|
2008-10-22 10:00:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Find a module section, or NULL. Fill in number of "objects" in section. */
|
2010-08-05 12:59:10 -06:00
|
|
|
static void *section_objs(const struct load_info *info,
|
2008-10-22 10:00:13 -05:00
|
|
|
const char *name,
|
|
|
|
size_t object_size,
|
|
|
|
unsigned int *num)
|
|
|
|
{
|
2010-08-05 12:59:10 -06:00
|
|
|
unsigned int sec = find_sec(info, name);
|
2008-10-22 10:00:13 -05:00
|
|
|
|
|
|
|
/* Section 0 has sh_addr 0 and sh_size 0. */
|
2010-08-05 12:59:10 -06:00
|
|
|
*num = info->sechdrs[sec].sh_size / object_size;
|
|
|
|
return (void *)info->sechdrs[sec].sh_addr;
|
2008-10-22 10:00:13 -05:00
|
|
|
}
|
|
|
|
|
2020-11-09 17:19:31 -08:00
|
|
|
/* Find a module section: 0 means not found. Ignores SHF_ALLOC flag. */
|
|
|
|
static unsigned int find_any_sec(const struct load_info *info, const char *name)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 1; i < info->hdr->e_shnum; i++) {
|
|
|
|
Elf_Shdr *shdr = &info->sechdrs[i];
|
|
|
|
if (strcmp(info->secstrings + shdr->sh_name, name) == 0)
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find a module section, or NULL. Fill in number of "objects" in section.
|
|
|
|
* Ignores SHF_ALLOC flag.
|
|
|
|
*/
|
|
|
|
static __maybe_unused void *any_section_objs(const struct load_info *info,
|
|
|
|
const char *name,
|
|
|
|
size_t object_size,
|
|
|
|
unsigned int *num)
|
|
|
|
{
|
|
|
|
unsigned int sec = find_any_sec(info, name);
|
|
|
|
|
|
|
|
/* Section 0 has sh_addr 0 and sh_size 0. */
|
|
|
|
*num = info->sechdrs[sec].sh_size / object_size;
|
|
|
|
return (void *)info->sechdrs[sec].sh_addr;
|
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#ifndef CONFIG_MODVERSIONS
|
|
|
|
#define symversion(base, idx) NULL
|
|
|
|
#else
|
2006-03-28 01:56:20 -08:00
|
|
|
#define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL)
|
2005-04-16 15:20:36 -07:00
|
|
|
#endif
|
|
|
|
|
module: use relative references for __ksymtab entries
An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab entries,
each consisting of two 64-bit fields containing absolute references, to
the symbol itself and to a char array containing its name, respectively.
When we build the same configuration with KASLR enabled, we end up with an
additional ~192 KB of relocations in the .init section, i.e., one 24 byte
entry for each absolute reference, which all need to be processed at boot
time.
Given how the struct kernel_symbol that describes each entry is completely
local to module.c (except for the references emitted by EXPORT_SYMBOL()
itself), we can easily modify it to contain two 32-bit relative references
instead. This reduces the size of the __ksymtab section by 50% for all
64-bit architectures, and gets rid of the runtime relocations entirely for
architectures implementing KASLR, either via standard PIE linking (arm64)
or using custom host tools (x86).
Note that the binary search involving __ksymtab contents relies on each
section being sorted by symbol name. This is implemented based on the
input section names, not the names in the ksymtab entries, so this patch
does not interfere with that.
Given that the use of place-relative relocations requires support both in
the toolchain and in the module loader, we cannot enable this feature for
all architectures. So make it dependent on whether
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined.
Link: http://lkml.kernel.org/r/20180704083651.24360-4-ard.biesheuvel@linaro.org
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-08-21 21:56:09 -07:00
|
|
|
static const char *kernel_symbol_name(const struct kernel_symbol *sym)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
|
|
|
|
return offset_to_ptr(&sym->name_offset);
|
|
|
|
#else
|
|
|
|
return sym->name;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
module: add support for symbol namespaces.
The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace. There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.
I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.
A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.
MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.
The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'. This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.
On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.
Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2019-09-06 11:32:27 +01:00
|
|
|
static const char *kernel_symbol_namespace(const struct kernel_symbol *sym)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
|
2019-09-11 13:26:46 +01:00
|
|
|
if (!sym->namespace_offset)
|
|
|
|
return NULL;
|
module: add support for symbol namespaces.
The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace. There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.
I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.
A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.
MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.
The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'. This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.
On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.
Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2019-09-06 11:32:27 +01:00
|
|
|
return offset_to_ptr(&sym->namespace_offset);
|
|
|
|
#else
|
|
|
|
return sym->namespace;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2022-03-22 14:03:39 +00:00
|
|
|
int cmp_name(const void *name, const void *sym)
|
2011-04-20 11:10:52 +02:00
|
|
|
{
|
2019-09-09 20:39:02 +09:00
|
|
|
return strcmp(name, kernel_symbol_name(sym));
|
2011-04-20 11:10:52 +02:00
|
|
|
}
|
|
|
|
|
2018-11-19 17:43:58 +01:00
|
|
|
static bool find_exported_symbol_in_section(const struct symsearch *syms,
|
|
|
|
struct module *owner,
|
2022-05-05 12:52:10 +09:00
|
|
|
struct find_symbol_arg *fsa)
|
2011-04-19 21:49:58 +02:00
|
|
|
{
|
2011-04-20 11:10:52 +02:00
|
|
|
struct kernel_symbol *sym;
|
|
|
|
|
2022-05-05 12:52:11 +09:00
|
|
|
if (!fsa->gplok && syms->license == GPL_ONLY)
|
|
|
|
return false;
|
|
|
|
|
2011-04-20 11:10:52 +02:00
|
|
|
sym = bsearch(fsa->name, syms->start, syms->stop - syms->start,
|
|
|
|
sizeof(struct kernel_symbol), cmp_name);
|
2022-05-05 12:52:12 +09:00
|
|
|
if (!sym)
|
|
|
|
return false;
|
2011-04-20 11:10:52 +02:00
|
|
|
|
2022-05-05 12:52:12 +09:00
|
|
|
fsa->owner = owner;
|
|
|
|
fsa->crc = symversion(syms->crcs, sym - syms->start);
|
|
|
|
fsa->sym = sym;
|
|
|
|
fsa->license = syms->license;
|
2011-04-19 21:49:58 +02:00
|
|
|
|
2022-05-05 12:52:12 +09:00
|
|
|
return true;
|
2011-04-19 21:49:58 +02:00
|
|
|
}
|
|
|
|
|
2020-11-07 23:20:52 +03:00
|
|
|
/*
|
|
|
|
* Find an exported symbol and return it, along with, (optional) crc and
|
|
|
|
* (optional) module which owns it. Needs preempt disabled or module_mutex.
|
|
|
|
*/
|
2022-03-22 14:03:44 +00:00
|
|
|
bool find_symbol(struct find_symbol_arg *fsa)
|
2008-07-22 19:24:25 -05:00
|
|
|
{
|
2021-02-02 13:13:30 +01:00
|
|
|
static const struct symsearch arr[] = {
|
|
|
|
{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
|
2021-02-02 13:13:34 +01:00
|
|
|
NOT_GPL_ONLY },
|
2021-02-02 13:13:30 +01:00
|
|
|
{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
|
|
|
|
__start___kcrctab_gpl,
|
2021-02-02 13:13:34 +01:00
|
|
|
GPL_ONLY },
|
2021-02-02 13:13:30 +01:00
|
|
|
};
|
|
|
|
struct module *mod;
|
|
|
|
unsigned int i;
|
2008-07-22 19:24:25 -05:00
|
|
|
|
2021-02-02 13:13:30 +01:00
|
|
|
module_assert_mutex_or_preempt();
|
2008-07-22 19:24:25 -05:00
|
|
|
|
2021-02-02 13:13:30 +01:00
|
|
|
for (i = 0; i < ARRAY_SIZE(arr); i++)
|
2021-01-20 14:58:27 +01:00
|
|
|
if (find_exported_symbol_in_section(&arr[i], NULL, fsa))
|
|
|
|
return true;
|
2021-02-02 13:13:30 +01:00
|
|
|
|
|
|
|
list_for_each_entry_rcu(mod, &modules, list,
|
|
|
|
lockdep_is_held(&module_mutex)) {
|
|
|
|
struct symsearch arr[] = {
|
|
|
|
{ mod->syms, mod->syms + mod->num_syms, mod->crcs,
|
2021-02-02 13:13:34 +01:00
|
|
|
NOT_GPL_ONLY },
|
2021-02-02 13:13:30 +01:00
|
|
|
{ mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
|
|
|
|
mod->gpl_crcs,
|
2021-02-02 13:13:34 +01:00
|
|
|
GPL_ONLY },
|
2021-02-02 13:13:30 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
if (mod->state == MODULE_STATE_UNFORMED)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(arr); i++)
|
2021-01-20 14:58:27 +01:00
|
|
|
if (find_exported_symbol_in_section(&arr[i], mod, fsa))
|
|
|
|
return true;
|
2008-07-22 19:24:25 -05:00
|
|
|
}
|
|
|
|
|
2021-01-20 14:58:27 +01:00
|
|
|
pr_debug("Failed to find symbol %s\n", fsa->name);
|
|
|
|
return false;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2015-07-29 05:52:14 +09:30
|
|
|
/*
|
|
|
|
* Search for module by name: must hold module_mutex (or preempt disabled
|
|
|
|
* for read-only access).
|
|
|
|
*/
|
2022-03-22 14:03:39 +00:00
|
|
|
struct module *find_module_all(const char *name, size_t len,
|
|
|
|
bool even_unformed)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
struct module *mod;
|
|
|
|
|
2015-07-29 05:52:14 +09:30
|
|
|
module_assert_mutex_or_preempt();
|
2015-05-27 11:09:35 +09:30
|
|
|
|
2019-12-03 15:14:04 +09:00
|
|
|
list_for_each_entry_rcu(mod, &modules, list,
|
|
|
|
lockdep_is_held(&module_mutex)) {
|
2013-01-12 11:38:44 +10:30
|
|
|
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
|
|
|
|
continue;
|
2013-07-02 15:35:11 +09:30
|
|
|
if (strlen(mod->name) == len && !memcmp(mod->name, name, len))
|
2005-04-16 15:20:36 -07:00
|
|
|
return mod;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
2013-01-12 11:38:44 +10:30
|
|
|
|
|
|
|
struct module *find_module(const char *name)
|
|
|
|
{
|
2013-07-02 15:35:11 +09:30
|
|
|
return find_module_all(name, strlen(name), false);
|
2013-01-12 11:38:44 +10:30
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
2009-02-20 16:29:08 +09:00
|
|
|
|
2010-03-10 18:56:10 +09:00
|
|
|
static inline void __percpu *mod_percpu(struct module *mod)
|
2009-02-20 16:29:08 +09:00
|
|
|
{
|
2010-03-10 18:56:10 +09:00
|
|
|
return mod->percpu;
|
|
|
|
}
|
2009-02-20 16:29:08 +09:00
|
|
|
|
2013-07-03 10:06:29 +09:30
|
|
|
static int percpu_modalloc(struct module *mod, struct load_info *info)
|
2010-03-10 18:56:10 +09:00
|
|
|
{
|
2013-07-03 10:06:29 +09:30
|
|
|
Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu];
|
|
|
|
unsigned long align = pcpusec->sh_addralign;
|
|
|
|
|
|
|
|
if (!pcpusec->sh_size)
|
|
|
|
return 0;
|
|
|
|
|
2009-02-20 16:29:08 +09:00
|
|
|
if (align > PAGE_SIZE) {
|
2013-11-12 15:11:28 -08:00
|
|
|
pr_warn("%s: per-cpu alignment %li > %li\n",
|
|
|
|
mod->name, align, PAGE_SIZE);
|
2009-02-20 16:29:08 +09:00
|
|
|
align = PAGE_SIZE;
|
|
|
|
}
|
|
|
|
|
2013-07-03 10:06:29 +09:30
|
|
|
mod->percpu = __alloc_reserved_percpu(pcpusec->sh_size, align);
|
2010-03-10 18:56:10 +09:00
|
|
|
if (!mod->percpu) {
|
2013-11-12 15:11:28 -08:00
|
|
|
pr_warn("%s: Could not allocate %lu bytes percpu data\n",
|
|
|
|
mod->name, (unsigned long)pcpusec->sh_size);
|
2010-03-10 18:56:10 +09:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2013-07-03 10:06:29 +09:30
|
|
|
mod->percpu_size = pcpusec->sh_size;
|
2010-03-10 18:56:10 +09:00
|
|
|
return 0;
|
2009-02-20 16:29:08 +09:00
|
|
|
}
|
|
|
|
|
2010-03-10 18:56:10 +09:00
|
|
|
static void percpu_modfree(struct module *mod)
|
2009-02-20 16:29:08 +09:00
|
|
|
{
|
2010-03-10 18:56:10 +09:00
|
|
|
free_percpu(mod->percpu);
|
2009-02-20 16:29:08 +09:00
|
|
|
}
|
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
static unsigned int find_pcpusec(struct load_info *info)
|
2009-02-20 16:29:07 +09:00
|
|
|
{
|
2010-08-05 12:59:10 -06:00
|
|
|
return find_sec(info, ".data..percpu");
|
2009-02-20 16:29:07 +09:00
|
|
|
}
|
|
|
|
|
2010-03-10 18:56:10 +09:00
|
|
|
static void percpu_modcopy(struct module *mod,
|
|
|
|
const void *from, unsigned long size)
|
2009-02-20 16:29:07 +09:00
|
|
|
{
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
for_each_possible_cpu(cpu)
|
2010-03-10 18:56:10 +09:00
|
|
|
memcpy(per_cpu_ptr(mod->percpu, cpu), from, size);
|
2009-02-20 16:29:07 +09:00
|
|
|
}
|
|
|
|
|
2017-02-27 15:37:36 +01:00
|
|
|
bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
|
2010-03-10 18:57:54 +09:00
|
|
|
{
|
|
|
|
struct module *mod;
|
|
|
|
unsigned int cpu;
|
|
|
|
|
|
|
|
preempt_disable();
|
|
|
|
|
|
|
|
list_for_each_entry_rcu(mod, &modules, list) {
|
2013-01-12 11:38:44 +10:30
|
|
|
if (mod->state == MODULE_STATE_UNFORMED)
|
|
|
|
continue;
|
2010-03-10 18:57:54 +09:00
|
|
|
if (!mod->percpu_size)
|
|
|
|
continue;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
void *start = per_cpu_ptr(mod->percpu, cpu);
|
2017-02-27 15:37:36 +01:00
|
|
|
void *va = (void *)addr;
|
2010-03-10 18:57:54 +09:00
|
|
|
|
2017-02-27 15:37:36 +01:00
|
|
|
if (va >= start && va < start + mod->percpu_size) {
|
2017-03-20 12:26:55 +01:00
|
|
|
if (can_addr) {
|
2017-02-27 15:37:36 +01:00
|
|
|
*can_addr = (unsigned long) (va - start);
|
2017-03-20 12:26:55 +01:00
|
|
|
*can_addr += (unsigned long)
|
|
|
|
per_cpu_ptr(mod->percpu,
|
|
|
|
get_boot_cpu_id());
|
|
|
|
}
|
2010-03-10 18:57:54 +09:00
|
|
|
preempt_enable();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
preempt_enable();
|
|
|
|
return false;
|
2009-02-20 16:29:07 +09:00
|
|
|
}
|
|
|
|
|
2017-02-27 15:37:36 +01:00
|
|
|
/**
|
2020-11-04 23:34:59 +03:00
|
|
|
* is_module_percpu_address() - test whether address is from module static percpu
|
2017-02-27 15:37:36 +01:00
|
|
|
* @addr: address to test
|
|
|
|
*
|
|
|
|
* Test whether @addr belongs to module static percpu area.
|
|
|
|
*
|
2020-11-04 23:34:59 +03:00
|
|
|
* Return: %true if @addr is from module static percpu area
|
2017-02-27 15:37:36 +01:00
|
|
|
*/
|
|
|
|
bool is_module_percpu_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
return __is_module_percpu_address(addr, NULL);
|
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#else /* ... !CONFIG_SMP */
|
2009-02-20 16:29:07 +09:00
|
|
|
|
2010-03-10 18:56:10 +09:00
|
|
|
static inline void __percpu *mod_percpu(struct module *mod)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
2013-07-03 10:06:29 +09:30
|
|
|
static int percpu_modalloc(struct module *mod, struct load_info *info)
|
2010-03-10 18:56:10 +09:00
|
|
|
{
|
2013-07-03 10:06:29 +09:30
|
|
|
/* UP modules shouldn't have this section: ENOMEM isn't quite right */
|
|
|
|
if (info->sechdrs[info->index.pcpu].sh_size != 0)
|
|
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
2010-03-10 18:56:10 +09:00
|
|
|
}
|
|
|
|
static inline void percpu_modfree(struct module *mod)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
}
|
2010-08-05 12:59:10 -06:00
|
|
|
static unsigned int find_pcpusec(struct load_info *info)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2010-03-10 18:56:10 +09:00
|
|
|
static inline void percpu_modcopy(struct module *mod,
|
|
|
|
const void *from, unsigned long size)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
/* pcpusec should be 0, and size of that section should be 0. */
|
|
|
|
BUG_ON(size != 0);
|
|
|
|
}
|
2010-03-10 18:57:54 +09:00
|
|
|
bool is_module_percpu_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
2009-02-20 16:29:07 +09:00
|
|
|
|
2017-02-27 15:37:36 +01:00
|
|
|
bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
#define MODINFO_ATTR(field) \
|
|
|
|
static void setup_modinfo_##field(struct module *mod, const char *s) \
|
|
|
|
{ \
|
|
|
|
mod->field = kstrdup(s, GFP_KERNEL); \
|
|
|
|
} \
|
2024-12-16 18:25:10 +01:00
|
|
|
static ssize_t show_modinfo_##field(const struct module_attribute *mattr, \
|
2011-07-24 22:06:04 +09:30
|
|
|
struct module_kobject *mk, char *buffer) \
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
{ \
|
2013-08-20 15:34:21 +09:30
|
|
|
return scnprintf(buffer, PAGE_SIZE, "%s\n", mk->mod->field); \
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
} \
|
|
|
|
static int modinfo_##field##_exists(struct module *mod) \
|
|
|
|
{ \
|
|
|
|
return mod->field != NULL; \
|
|
|
|
} \
|
|
|
|
static void free_modinfo_##field(struct module *mod) \
|
|
|
|
{ \
|
2007-10-18 03:06:07 -07:00
|
|
|
kfree(mod->field); \
|
|
|
|
mod->field = NULL; \
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
} \
|
2024-12-16 18:25:10 +01:00
|
|
|
static const struct module_attribute modinfo_##field = { \
|
2007-06-14 03:45:17 +09:00
|
|
|
.attr = { .name = __stringify(field), .mode = 0444 }, \
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
.show = show_modinfo_##field, \
|
|
|
|
.setup = setup_modinfo_##field, \
|
|
|
|
.test = modinfo_##field##_exists, \
|
|
|
|
.free = free_modinfo_##field, \
|
|
|
|
};
|
|
|
|
|
|
|
|
MODINFO_ATTR(version);
|
|
|
|
MODINFO_ATTR(srcversion);
|
|
|
|
|
2022-07-14 16:39:33 +01:00
|
|
|
static struct {
|
|
|
|
char name[MODULE_NAME_LEN + 1];
|
|
|
|
char taints[MODULE_FLAGS_BUF_SIZE];
|
|
|
|
} last_unloaded_module;
|
2008-01-25 21:08:33 +01:00
|
|
|
|
2006-02-16 13:50:23 -08:00
|
|
|
#ifdef CONFIG_MODULE_UNLOAD
|
2010-03-29 14:25:18 -04:00
|
|
|
|
|
|
|
EXPORT_TRACEPOINT_SYMBOL(module_get);
|
|
|
|
|
2014-11-10 09:30:29 +10:30
|
|
|
/* MODULE_REF_BASE is the base reference count by kmodule loader. */
|
|
|
|
#define MODULE_REF_BASE 1
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Init the unload section of the module. */
|
2010-08-05 12:59:04 -06:00
|
|
|
static int module_unload_init(struct module *mod)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2014-11-10 09:30:29 +10:30
|
|
|
/*
|
|
|
|
* Initialize reference counter to MODULE_REF_BASE.
|
|
|
|
* refcnt == 0 means module is going.
|
|
|
|
*/
|
|
|
|
atomic_set(&mod->refcnt, MODULE_REF_BASE);
|
2010-08-05 12:59:04 -06:00
|
|
|
|
2010-05-31 12:19:37 -07:00
|
|
|
INIT_LIST_HEAD(&mod->source_list);
|
|
|
|
INIT_LIST_HEAD(&mod->target_list);
|
2010-01-05 15:34:50 +09:00
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Hold reference count during initialization. */
|
2014-11-10 09:30:29 +10:30
|
|
|
atomic_inc(&mod->refcnt);
|
2010-08-05 12:59:04 -06:00
|
|
|
|
|
|
|
return 0;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Does a already use b? */
|
|
|
|
static int already_uses(struct module *a, struct module *b)
|
|
|
|
{
|
|
|
|
struct module_use *use;
|
|
|
|
|
2010-05-31 12:19:37 -07:00
|
|
|
list_for_each_entry(use, &b->source_list, source_list) {
|
2023-03-21 19:36:23 -06:00
|
|
|
if (use->source == a)
|
2005-04-16 15:20:36 -07:00
|
|
|
return 1;
|
|
|
|
}
|
2011-12-06 12:11:31 -07:00
|
|
|
pr_debug("%s does not use %s!\n", a->name, b->name);
|
2005-04-16 15:20:36 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-05-31 12:19:37 -07:00
|
|
|
/*
|
|
|
|
* Module a uses b
|
|
|
|
* - we add 'a' as a "source", 'b' as a "target" of module use
|
|
|
|
* - the module_use is added to the list of 'b' sources (so
|
|
|
|
* 'b' can walk the list to see who sourced them), and of 'a'
|
|
|
|
* targets (so 'a' can see what modules it targets).
|
|
|
|
*/
|
|
|
|
static int add_module_usage(struct module *a, struct module *b)
|
|
|
|
{
|
|
|
|
struct module_use *use;
|
|
|
|
|
2011-12-06 12:11:31 -07:00
|
|
|
pr_debug("Allocating new usage for %s.\n", a->name);
|
2010-05-31 12:19:37 -07:00
|
|
|
use = kmalloc(sizeof(*use), GFP_ATOMIC);
|
2017-10-06 16:27:26 +02:00
|
|
|
if (!use)
|
2010-05-31 12:19:37 -07:00
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
use->source = a;
|
|
|
|
use->target = b;
|
|
|
|
list_add(&use->source_list, &b->source_list);
|
|
|
|
list_add(&use->target_list, &a->target_list);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-06-05 11:17:36 -06:00
|
|
|
/* Module a uses b: caller needs module_mutex() */
|
2020-07-30 08:10:20 +02:00
|
|
|
static int ref_module(struct module *a, struct module *b)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2010-06-05 11:17:35 -06:00
|
|
|
int err;
|
2007-01-18 13:26:15 +01:00
|
|
|
|
2010-06-05 11:17:37 -06:00
|
|
|
if (b == NULL || already_uses(a, b))
|
2010-05-25 16:48:30 -07:00
|
|
|
return 0;
|
|
|
|
|
2010-06-05 11:17:37 -06:00
|
|
|
/* If module isn't available, we fail. */
|
|
|
|
err = strong_try_module_get(b);
|
2008-01-29 17:13:18 -05:00
|
|
|
if (err)
|
2010-06-05 11:17:37 -06:00
|
|
|
return err;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2010-05-31 12:19:37 -07:00
|
|
|
err = add_module_usage(a, b);
|
|
|
|
if (err) {
|
2005-04-16 15:20:36 -07:00
|
|
|
module_put(b);
|
2010-06-05 11:17:37 -06:00
|
|
|
return err;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
2010-06-05 11:17:37 -06:00
|
|
|
return 0;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Clear the unload stuff of the module. */
|
|
|
|
static void module_unload_free(struct module *mod)
|
|
|
|
{
|
2010-05-31 12:19:37 -07:00
|
|
|
struct module_use *use, *tmp;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2010-06-05 11:17:36 -06:00
|
|
|
mutex_lock(&module_mutex);
|
2010-05-31 12:19:37 -07:00
|
|
|
list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) {
|
|
|
|
struct module *i = use->target;
|
2011-12-06 12:11:31 -07:00
|
|
|
pr_debug("%s unusing %s\n", mod->name, i->name);
|
2010-05-31 12:19:37 -07:00
|
|
|
module_put(i);
|
|
|
|
list_del(&use->source_list);
|
|
|
|
list_del(&use->target_list);
|
|
|
|
kfree(use);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
2010-06-05 11:17:36 -06:00
|
|
|
mutex_unlock(&module_mutex);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_MODULE_FORCE_UNLOAD
|
2006-01-08 01:04:29 -08:00
|
|
|
static inline int try_force_unload(unsigned int flags)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
int ret = (flags & O_TRUNC);
|
|
|
|
if (ret)
|
2013-01-21 17:17:39 +10:30
|
|
|
add_taint(TAINT_FORCED_RMMOD, LOCKDEP_NOW_UNRELIABLE);
|
2005-04-16 15:20:36 -07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
#else
|
2006-01-08 01:04:29 -08:00
|
|
|
static inline int try_force_unload(unsigned int flags)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_MODULE_FORCE_UNLOAD */
|
|
|
|
|
2014-11-10 09:30:29 +10:30
|
|
|
/* Try to release refcount of module, 0 means success. */
|
|
|
|
static int try_release_module_ref(struct module *mod)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2014-11-10 09:30:29 +10:30
|
|
|
int ret;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2014-11-10 09:30:29 +10:30
|
|
|
/* Try to decrement refcnt which we set at loading */
|
|
|
|
ret = atomic_sub_return(MODULE_REF_BASE, &mod->refcnt);
|
|
|
|
BUG_ON(ret < 0);
|
|
|
|
if (ret)
|
|
|
|
/* Someone can put this right now, recover with checking */
|
|
|
|
ret = atomic_add_unless(&mod->refcnt, MODULE_REF_BASE, 0);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2014-11-10 09:30:29 +10:30
|
|
|
return ret;
|
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2014-11-10 09:30:29 +10:30
|
|
|
static int try_stop_module(struct module *mod, int flags, int *forced)
|
|
|
|
{
|
2008-07-22 19:24:25 -05:00
|
|
|
/* If it's not unused, quit unless we're forcing. */
|
2014-11-10 09:30:29 +10:30
|
|
|
if (try_release_module_ref(mod) != 0) {
|
|
|
|
*forced = try_force_unload(flags);
|
|
|
|
if (!(*forced))
|
2005-04-16 15:20:36 -07:00
|
|
|
return -EWOULDBLOCK;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Mark it as dying. */
|
2014-11-10 09:30:29 +10:30
|
|
|
mod->state = MODULE_STATE_GOING;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2014-11-10 09:30:29 +10:30
|
|
|
return 0;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2015-01-22 11:13:14 +10:30
|
|
|
/**
|
2020-11-04 23:34:59 +03:00
|
|
|
* module_refcount() - return the refcount or -1 if unloading
|
2015-01-22 11:13:14 +10:30
|
|
|
* @mod: the module we're checking
|
|
|
|
*
|
2020-11-04 23:34:59 +03:00
|
|
|
* Return:
|
2015-01-22 11:13:14 +10:30
|
|
|
* -1 if the module is in the process of unloading
|
|
|
|
* otherwise the number of references in the kernel to the module
|
|
|
|
*/
|
|
|
|
int module_refcount(struct module *mod)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2015-01-22 11:13:14 +10:30
|
|
|
return atomic_read(&mod->refcnt) - MODULE_REF_BASE;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(module_refcount);
|
|
|
|
|
|
|
|
/* This exists whether we can unload or not */
|
|
|
|
static void free_module(struct module *mod);
|
|
|
|
|
2009-01-14 14:14:10 +01:00
|
|
|
SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
|
|
|
|
unsigned int, flags)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
struct module *mod;
|
2007-02-23 14:54:57 -08:00
|
|
|
char name[MODULE_NAME_LEN];
|
2022-07-14 16:39:33 +01:00
|
|
|
char buf[MODULE_FLAGS_BUF_SIZE];
|
2005-04-16 15:20:36 -07:00
|
|
|
int ret, forced = 0;
|
|
|
|
|
2009-04-02 15:49:29 -07:00
|
|
|
if (!capable(CAP_SYS_MODULE) || modules_disabled)
|
2007-02-23 14:54:57 -08:00
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
|
|
|
|
return -EFAULT;
|
|
|
|
name[MODULE_NAME_LEN-1] = '\0';
|
|
|
|
|
2017-05-02 10:16:04 -04:00
|
|
|
audit_log_kern_module(name);
|
|
|
|
|
2010-05-06 18:49:20 +02:00
|
|
|
if (mutex_lock_interruptible(&module_mutex) != 0)
|
|
|
|
return -EINTR;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
mod = find_module(name);
|
|
|
|
if (!mod) {
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2010-05-31 12:19:37 -07:00
|
|
|
if (!list_empty(&mod->source_list)) {
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Other modules depend on us: get rid of them first. */
|
|
|
|
ret = -EWOULDBLOCK;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Doing init or already dying? */
|
|
|
|
if (mod->state != MODULE_STATE_LIVE) {
|
2013-09-17 05:48:51 +09:30
|
|
|
/* FIXME: if (force), slam module count damn the torpedoes */
|
2011-12-06 12:11:31 -07:00
|
|
|
pr_debug("%s already dying\n", mod->name);
|
2005-04-16 15:20:36 -07:00
|
|
|
ret = -EBUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If it has an init func, it must have an exit func to unload */
|
2007-10-16 23:26:27 -07:00
|
|
|
if (mod->init && !mod->exit) {
|
2006-01-08 01:04:29 -08:00
|
|
|
forced = try_force_unload(flags);
|
2005-04-16 15:20:36 -07:00
|
|
|
if (!forced) {
|
|
|
|
/* This module can't be removed */
|
|
|
|
ret = -EBUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = try_stop_module(mod, flags, &forced);
|
|
|
|
if (ret != 0)
|
|
|
|
goto out;
|
|
|
|
|
2008-04-21 14:34:31 +02:00
|
|
|
mutex_unlock(&module_mutex);
|
2011-03-30 22:57:33 -03:00
|
|
|
/* Final destruction now no one is using it. */
|
2008-04-21 14:34:31 +02:00
|
|
|
if (mod->exit != NULL)
|
2005-04-16 15:20:36 -07:00
|
|
|
mod->exit();
|
2008-04-21 14:34:31 +02:00
|
|
|
blocking_notifier_call_chain(&module_notify_list,
|
|
|
|
MODULE_STATE_GOING, mod);
|
2016-03-16 20:55:39 -04:00
|
|
|
klp_module_going(mod);
|
2016-02-16 17:32:33 -05:00
|
|
|
ftrace_release_mod(mod);
|
|
|
|
|
2009-01-07 08:45:46 -08:00
|
|
|
async_synchronize_full();
|
2010-06-05 11:17:36 -06:00
|
|
|
|
2022-07-14 16:39:33 +01:00
|
|
|
/* Store the name and taints of the last unloaded module for diagnostic purposes */
|
|
|
|
strscpy(last_unloaded_module.name, mod->name, sizeof(last_unloaded_module.name));
|
|
|
|
strscpy(last_unloaded_module.taints, module_flags(mod, buf, false), sizeof(last_unloaded_module.taints));
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2010-06-05 11:17:36 -06:00
|
|
|
free_module(mod);
|
2019-11-13 12:29:50 +03:00
|
|
|
/* someone could wait for the module in add_unformed_module() */
|
|
|
|
wake_up_all(&module_wq);
|
2010-06-05 11:17:36 -06:00
|
|
|
return 0;
|
|
|
|
out:
|
2006-03-23 03:00:46 -08:00
|
|
|
mutex_unlock(&module_mutex);
|
2005-04-16 15:20:36 -07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void __symbol_put(const char *symbol)
|
|
|
|
{
|
2021-01-20 14:58:27 +01:00
|
|
|
struct find_symbol_arg fsa = {
|
|
|
|
.name = symbol,
|
|
|
|
.gplok = true,
|
|
|
|
};
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2007-07-15 23:41:46 -07:00
|
|
|
preempt_disable();
|
2021-05-12 07:01:57 -07:00
|
|
|
BUG_ON(!find_symbol(&fsa));
|
2021-01-20 14:58:27 +01:00
|
|
|
module_put(fsa.owner);
|
2007-07-15 23:41:46 -07:00
|
|
|
preempt_enable();
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(__symbol_put);
|
|
|
|
|
2009-08-26 22:02:54 +09:30
|
|
|
/* Note this assumes addr is a function, which it currently always is. */
|
2005-04-16 15:20:36 -07:00
|
|
|
void symbol_put_addr(void *addr)
|
|
|
|
{
|
2006-05-15 09:44:06 -07:00
|
|
|
struct module *modaddr;
|
2009-08-26 22:02:54 +09:30
|
|
|
unsigned long a = (unsigned long)dereference_function_descriptor(addr);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2009-08-26 22:02:54 +09:30
|
|
|
if (core_kernel_text(a))
|
2006-05-15 09:44:06 -07:00
|
|
|
return;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2015-08-20 10:34:59 +09:30
|
|
|
/*
|
|
|
|
* Even though we hold a reference on the module; we still need to
|
|
|
|
* disable preemption in order to safely traverse the data structure.
|
|
|
|
*/
|
|
|
|
preempt_disable();
|
2009-08-26 22:02:54 +09:30
|
|
|
modaddr = __module_text_address(a);
|
2009-03-31 13:05:31 -06:00
|
|
|
BUG_ON(!modaddr);
|
2006-05-15 09:44:06 -07:00
|
|
|
module_put(modaddr);
|
2015-08-20 10:34:59 +09:30
|
|
|
preempt_enable();
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(symbol_put_addr);
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static ssize_t show_refcnt(const struct module_attribute *mattr,
|
2011-07-24 22:06:04 +09:30
|
|
|
struct module_kobject *mk, char *buffer)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2015-01-22 11:13:14 +10:30
|
|
|
return sprintf(buffer, "%i\n", module_refcount(mk->mod));
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static const struct module_attribute modinfo_refcnt =
|
2012-01-13 09:32:15 +10:30
|
|
|
__ATTR(refcnt, 0444, show_refcnt, NULL);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2012-03-26 12:50:52 +10:30
|
|
|
void __module_get(struct module *module)
|
|
|
|
{
|
|
|
|
if (module) {
|
2014-11-10 09:29:29 +10:30
|
|
|
atomic_inc(&module->refcnt);
|
2012-03-26 12:50:52 +10:30
|
|
|
trace_module_get(module, _RET_IP_);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(__module_get);
|
|
|
|
|
|
|
|
bool try_module_get(struct module *module)
|
|
|
|
{
|
|
|
|
bool ret = true;
|
|
|
|
|
|
|
|
if (module) {
|
2014-11-10 09:30:29 +10:30
|
|
|
/* Note: here, we can fail to get a reference */
|
|
|
|
if (likely(module_is_live(module) &&
|
|
|
|
atomic_inc_not_zero(&module->refcnt) != 0))
|
2012-03-26 12:50:52 +10:30
|
|
|
trace_module_get(module, _RET_IP_);
|
2014-11-10 09:30:29 +10:30
|
|
|
else
|
2012-03-26 12:50:52 +10:30
|
|
|
ret = false;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(try_module_get);
|
|
|
|
|
2006-10-18 01:47:25 -04:00
|
|
|
void module_put(struct module *module)
|
|
|
|
{
|
2014-11-10 09:30:29 +10:30
|
|
|
int ret;
|
|
|
|
|
2006-10-18 01:47:25 -04:00
|
|
|
if (module) {
|
2014-11-10 09:30:29 +10:30
|
|
|
ret = atomic_dec_if_positive(&module->refcnt);
|
|
|
|
WARN_ON(ret < 0); /* Failed to put refcount */
|
2010-03-24 10:57:43 +08:00
|
|
|
trace_module_put(module, _RET_IP_);
|
2006-10-18 01:47:25 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(module_put);
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#else /* !CONFIG_MODULE_UNLOAD */
|
|
|
|
static inline void module_unload_free(struct module *mod)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-07-30 08:10:20 +02:00
|
|
|
static int ref_module(struct module *a, struct module *b)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2010-06-05 11:17:37 -06:00
|
|
|
return strong_try_module_get(b);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2010-08-05 12:59:04 -06:00
|
|
|
static inline int module_unload_init(struct module *mod)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2010-08-05 12:59:04 -06:00
|
|
|
return 0;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
#endif /* CONFIG_MODULE_UNLOAD */
|
|
|
|
|
2022-05-02 21:51:03 +01:00
|
|
|
size_t module_flags_taint(unsigned long taints, char *buf)
|
2012-01-15 19:32:55 -04:00
|
|
|
{
|
|
|
|
size_t l = 0;
|
2016-09-21 13:47:22 +02:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
|
2022-05-02 21:51:03 +01:00
|
|
|
if (taint_flags[i].module && test_bit(i, &taints))
|
2017-01-01 20:25:25 -06:00
|
|
|
buf[l++] = taint_flags[i].c_true;
|
2016-09-21 13:47:22 +02:00
|
|
|
}
|
2012-01-15 19:32:55 -04:00
|
|
|
|
|
|
|
return l;
|
|
|
|
}
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static ssize_t show_initstate(const struct module_attribute *mattr,
|
2011-07-24 22:06:04 +09:30
|
|
|
struct module_kobject *mk, char *buffer)
|
2006-11-24 12:15:25 +01:00
|
|
|
{
|
|
|
|
const char *state = "unknown";
|
|
|
|
|
2011-07-24 22:06:04 +09:30
|
|
|
switch (mk->mod->state) {
|
2006-11-24 12:15:25 +01:00
|
|
|
case MODULE_STATE_LIVE:
|
|
|
|
state = "live";
|
|
|
|
break;
|
|
|
|
case MODULE_STATE_COMING:
|
|
|
|
state = "coming";
|
|
|
|
break;
|
|
|
|
case MODULE_STATE_GOING:
|
|
|
|
state = "going";
|
|
|
|
break;
|
2013-01-12 11:38:44 +10:30
|
|
|
default:
|
|
|
|
BUG();
|
2006-11-24 12:15:25 +01:00
|
|
|
}
|
|
|
|
return sprintf(buffer, "%s\n", state);
|
|
|
|
}
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static const struct module_attribute modinfo_initstate =
|
2012-01-13 09:32:15 +10:30
|
|
|
__ATTR(initstate, 0444, show_initstate, NULL);
|
2006-11-24 12:15:25 +01:00
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static ssize_t store_uevent(const struct module_attribute *mattr,
|
2011-07-24 22:06:04 +09:30
|
|
|
struct module_kobject *mk,
|
|
|
|
const char *buffer, size_t count)
|
|
|
|
{
|
2018-12-05 12:27:44 +01:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
rc = kobject_synth_uevent(&mk->kobj, buffer, count);
|
|
|
|
return rc ? rc : count;
|
2011-07-24 22:06:04 +09:30
|
|
|
}
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
const struct module_attribute module_uevent =
|
2012-01-13 09:32:15 +10:30
|
|
|
__ATTR(uevent, 0200, NULL, store_uevent);
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static ssize_t show_coresize(const struct module_attribute *mattr,
|
2012-01-13 09:32:15 +10:30
|
|
|
struct module_kobject *mk, char *buffer)
|
|
|
|
{
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
unsigned int size = mk->mod->mem[MOD_TEXT].size;
|
|
|
|
|
|
|
|
if (!IS_ENABLED(CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC)) {
|
|
|
|
for_class_mod_mem_type(type, core_data)
|
|
|
|
size += mk->mod->mem[type].size;
|
|
|
|
}
|
|
|
|
return sprintf(buffer, "%u\n", size);
|
2012-01-13 09:32:15 +10:30
|
|
|
}
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static const struct module_attribute modinfo_coresize =
|
2012-01-13 09:32:15 +10:30
|
|
|
__ATTR(coresize, 0444, show_coresize, NULL);
|
|
|
|
|
2022-02-23 13:02:14 +01:00
|
|
|
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
|
2024-12-16 18:25:10 +01:00
|
|
|
static ssize_t show_datasize(const struct module_attribute *mattr,
|
2022-02-23 13:02:14 +01:00
|
|
|
struct module_kobject *mk, char *buffer)
|
|
|
|
{
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
unsigned int size = 0;
|
|
|
|
|
|
|
|
for_class_mod_mem_type(type, core_data)
|
|
|
|
size += mk->mod->mem[type].size;
|
|
|
|
return sprintf(buffer, "%u\n", size);
|
2022-02-23 13:02:14 +01:00
|
|
|
}
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static const struct module_attribute modinfo_datasize =
|
2022-02-23 13:02:14 +01:00
|
|
|
__ATTR(datasize, 0444, show_datasize, NULL);
|
|
|
|
#endif
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static ssize_t show_initsize(const struct module_attribute *mattr,
|
2012-01-13 09:32:15 +10:30
|
|
|
struct module_kobject *mk, char *buffer)
|
|
|
|
{
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
unsigned int size = 0;
|
|
|
|
|
|
|
|
for_class_mod_mem_type(type, init)
|
|
|
|
size += mk->mod->mem[type].size;
|
|
|
|
return sprintf(buffer, "%u\n", size);
|
2012-01-13 09:32:15 +10:30
|
|
|
}
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static const struct module_attribute modinfo_initsize =
|
2012-01-13 09:32:15 +10:30
|
|
|
__ATTR(initsize, 0444, show_initsize, NULL);
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static ssize_t show_taint(const struct module_attribute *mattr,
|
2012-01-13 09:32:15 +10:30
|
|
|
struct module_kobject *mk, char *buffer)
|
|
|
|
{
|
|
|
|
size_t l;
|
|
|
|
|
2022-05-02 21:51:03 +01:00
|
|
|
l = module_flags_taint(mk->mod->taints, buffer);
|
2012-01-13 09:32:15 +10:30
|
|
|
buffer[l++] = '\n';
|
|
|
|
return l;
|
|
|
|
}
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
static const struct module_attribute modinfo_taint =
|
2012-01-13 09:32:15 +10:30
|
|
|
__ATTR(taint, 0444, show_taint, NULL);
|
2011-07-24 22:06:04 +09:30
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
const struct module_attribute *const modinfo_attrs[] = {
|
2012-01-13 09:32:15 +10:30
|
|
|
&module_uevent,
|
2006-02-16 13:50:23 -08:00
|
|
|
&modinfo_version,
|
|
|
|
&modinfo_srcversion,
|
2012-01-13 09:32:15 +10:30
|
|
|
&modinfo_initstate,
|
|
|
|
&modinfo_coresize,
|
2022-02-23 13:02:14 +01:00
|
|
|
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
|
|
|
|
&modinfo_datasize,
|
|
|
|
#endif
|
2012-01-13 09:32:15 +10:30
|
|
|
&modinfo_initsize,
|
|
|
|
&modinfo_taint,
|
2006-02-16 13:50:23 -08:00
|
|
|
#ifdef CONFIG_MODULE_UNLOAD
|
2012-01-13 09:32:15 +10:30
|
|
|
&modinfo_refcnt,
|
2006-02-16 13:50:23 -08:00
|
|
|
#endif
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
2024-12-16 18:25:10 +01:00
|
|
|
const size_t modinfo_attrs_count = ARRAY_SIZE(modinfo_attrs);
|
2022-03-22 14:03:42 +00:00
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
static const char vermagic[] = VERMAGIC_STRING;
|
|
|
|
|
2022-03-22 14:03:44 +00:00
|
|
|
int try_to_force_load(struct module *mod, const char *reason)
|
2008-05-04 17:04:16 -07:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_MODULE_FORCE_LOAD
|
2008-10-15 22:01:41 -07:00
|
|
|
if (!test_taint(TAINT_FORCED_MODULE))
|
2013-11-12 15:11:28 -08:00
|
|
|
pr_warn("%s: %s: kernel tainted.\n", mod->name, reason);
|
2013-01-21 17:17:39 +10:30
|
|
|
add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_NOW_UNRELIABLE);
|
2008-05-04 17:04:16 -07:00
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
return -ENOEXEC;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2023-03-19 14:27:35 -07:00
|
|
|
/* Parse tag=value strings from .modinfo section */
|
2023-03-19 14:27:36 -07:00
|
|
|
char *module_next_tag_pair(char *string, unsigned long *secsize)
|
2023-03-19 14:27:35 -07:00
|
|
|
{
|
|
|
|
/* Skip non-zero chars */
|
|
|
|
while (string[0]) {
|
|
|
|
string++;
|
|
|
|
if ((*secsize)-- <= 1)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Skip any zero padding. */
|
|
|
|
while (!string[0]) {
|
|
|
|
string++;
|
|
|
|
if ((*secsize)-- <= 1)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return string;
|
|
|
|
}
|
|
|
|
|
module: add support for symbol namespaces.
The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace. There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.
I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.
A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.
MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.
The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'. This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.
On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.
Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2019-09-06 11:32:27 +01:00
|
|
|
static char *get_next_modinfo(const struct load_info *info, const char *tag,
|
2023-03-19 14:27:35 -07:00
|
|
|
char *prev)
|
|
|
|
{
|
|
|
|
char *p;
|
|
|
|
unsigned int taglen = strlen(tag);
|
|
|
|
Elf_Shdr *infosec = &info->sechdrs[info->index.info];
|
|
|
|
unsigned long size = infosec->sh_size;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get_modinfo() calls made before rewrite_section_headers()
|
|
|
|
* must use sh_offset, as sh_addr isn't set!
|
|
|
|
*/
|
|
|
|
char *modinfo = (char *)info->hdr + infosec->sh_offset;
|
|
|
|
|
|
|
|
if (prev) {
|
|
|
|
size -= prev - modinfo;
|
2023-03-19 14:27:36 -07:00
|
|
|
modinfo = module_next_tag_pair(prev, &size);
|
2023-03-19 14:27:35 -07:00
|
|
|
}
|
|
|
|
|
2023-03-19 14:27:36 -07:00
|
|
|
for (p = modinfo; p; p = module_next_tag_pair(p, &size)) {
|
2023-03-19 14:27:35 -07:00
|
|
|
if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=')
|
|
|
|
return p + taglen + 1;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *get_modinfo(const struct load_info *info, const char *tag)
|
|
|
|
{
|
|
|
|
return get_next_modinfo(info, tag, NULL);
|
|
|
|
}
|
module: add support for symbol namespaces.
The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace. There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.
I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.
A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.
MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.
The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'. This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.
On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.
Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2019-09-06 11:32:27 +01:00
|
|
|
|
|
|
|
static int verify_namespace_is_imported(const struct load_info *info,
|
|
|
|
const struct kernel_symbol *sym,
|
|
|
|
struct module *mod)
|
|
|
|
{
|
|
|
|
const char *namespace;
|
|
|
|
char *imported_namespace;
|
|
|
|
|
|
|
|
namespace = kernel_symbol_namespace(sym);
|
2019-10-18 10:31:43 +01:00
|
|
|
if (namespace && namespace[0]) {
|
2023-03-19 14:27:37 -07:00
|
|
|
for_each_modinfo_entry(imported_namespace, info, "import_ns") {
|
module: add support for symbol namespaces.
The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace. There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.
I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.
A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.
MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.
The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'. This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.
On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.
Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2019-09-06 11:32:27 +01:00
|
|
|
if (strcmp(namespace, imported_namespace) == 0)
|
|
|
|
return 0;
|
|
|
|
}
|
2019-09-06 11:32:29 +01:00
|
|
|
#ifdef CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
|
|
|
|
pr_warn(
|
|
|
|
#else
|
|
|
|
pr_err(
|
|
|
|
#endif
|
|
|
|
"%s: module uses symbol (%s) from namespace %s, but does not import it.\n",
|
|
|
|
mod->name, kernel_symbol_name(sym), namespace);
|
|
|
|
#ifndef CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
|
module: add support for symbol namespaces.
The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace. There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.
I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.
A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.
MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.
The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'. This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.
On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.
Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2019-09-06 11:32:27 +01:00
|
|
|
return -EINVAL;
|
2019-09-06 11:32:29 +01:00
|
|
|
#endif
|
module: add support for symbol namespaces.
The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace. There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.
I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.
A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.
MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.
The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'. This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.
On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.
Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2019-09-06 11:32:27 +01:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-04-27 15:36:06 +08:00
|
|
|
static bool inherit_taint(struct module *mod, struct module *owner, const char *name)
|
2020-07-28 23:33:33 +02:00
|
|
|
{
|
|
|
|
if (!owner || !test_bit(TAINT_PROPRIETARY_MODULE, &owner->taints))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (mod->using_gplonly_symbols) {
|
2022-04-27 15:36:06 +08:00
|
|
|
pr_err("%s: module using GPL-only symbols uses symbols %s from proprietary module %s.\n",
|
|
|
|
mod->name, name, owner->name);
|
2020-07-28 23:33:33 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!test_bit(TAINT_PROPRIETARY_MODULE, &mod->taints)) {
|
2022-04-27 15:36:06 +08:00
|
|
|
pr_warn("%s: module uses symbols %s from proprietary module %s, inheriting taint.\n",
|
|
|
|
mod->name, name, owner->name);
|
2020-07-28 23:33:33 +02:00
|
|
|
set_bit(TAINT_PROPRIETARY_MODULE, &mod->taints);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
module: add support for symbol namespaces.
The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace. There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.
I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.
A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.
MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.
The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'. This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.
On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.
Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2019-09-06 11:32:27 +01:00
|
|
|
|
2010-06-05 11:17:36 -06:00
|
|
|
/* Resolve a symbol for this module. I.e. if we find one, record usage. */
|
2010-08-05 12:59:10 -06:00
|
|
|
static const struct kernel_symbol *resolve_symbol(struct module *mod,
|
|
|
|
const struct load_info *info,
|
2008-12-05 19:03:56 -05:00
|
|
|
const char *name,
|
2010-06-05 11:17:37 -06:00
|
|
|
char ownername[])
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2021-01-20 14:58:27 +01:00
|
|
|
struct find_symbol_arg fsa = {
|
|
|
|
.name = name,
|
|
|
|
.gplok = !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)),
|
|
|
|
.warn = true,
|
|
|
|
};
|
2010-06-05 11:17:37 -06:00
|
|
|
int err;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2015-02-11 15:01:13 +10:30
|
|
|
/*
|
|
|
|
* The module_mutex should not be a heavily contended lock;
|
|
|
|
* if we get the occasional sleep here, we'll go an extra iteration
|
|
|
|
* in the wait_event_interruptible(), which is harmless.
|
|
|
|
*/
|
|
|
|
sched_annotate_sleep();
|
2010-06-05 11:17:36 -06:00
|
|
|
mutex_lock(&module_mutex);
|
2021-01-20 14:58:27 +01:00
|
|
|
if (!find_symbol(&fsa))
|
2010-06-05 11:17:37 -06:00
|
|
|
goto unlock;
|
|
|
|
|
2021-01-20 14:58:27 +01:00
|
|
|
if (fsa.license == GPL_ONLY)
|
2020-07-28 23:33:33 +02:00
|
|
|
mod->using_gplonly_symbols = true;
|
|
|
|
|
2022-04-27 15:36:06 +08:00
|
|
|
if (!inherit_taint(mod, fsa.owner, name)) {
|
2021-01-20 14:58:27 +01:00
|
|
|
fsa.sym = NULL;
|
2020-07-28 23:33:33 +02:00
|
|
|
goto getname;
|
|
|
|
}
|
|
|
|
|
2021-01-20 14:58:27 +01:00
|
|
|
if (!check_version(info, name, mod, fsa.crc)) {
|
|
|
|
fsa.sym = ERR_PTR(-EINVAL);
|
2010-06-05 11:17:37 -06:00
|
|
|
goto getname;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
2010-06-05 11:17:37 -06:00
|
|
|
|
2021-01-20 14:58:27 +01:00
|
|
|
err = verify_namespace_is_imported(info, fsa.sym, mod);
|
module: add support for symbol namespaces.
The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace. There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.
I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.
A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.
MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.
The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'. This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.
On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.
Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2019-09-06 11:32:27 +01:00
|
|
|
if (err) {
|
2021-01-20 14:58:27 +01:00
|
|
|
fsa.sym = ERR_PTR(err);
|
module: add support for symbol namespaces.
The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace. There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.
I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.
A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.
MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.
The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'. This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.
On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.
Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2019-09-06 11:32:27 +01:00
|
|
|
goto getname;
|
|
|
|
}
|
|
|
|
|
2021-01-20 14:58:27 +01:00
|
|
|
err = ref_module(mod, fsa.owner);
|
2010-06-05 11:17:37 -06:00
|
|
|
if (err) {
|
2021-01-20 14:58:27 +01:00
|
|
|
fsa.sym = ERR_PTR(err);
|
2010-06-05 11:17:37 -06:00
|
|
|
goto getname;
|
|
|
|
}
|
|
|
|
|
|
|
|
getname:
|
|
|
|
/* We must make copy under the lock if we failed to get ref. */
|
2021-01-20 14:58:27 +01:00
|
|
|
strncpy(ownername, module_name(fsa.owner), MODULE_NAME_LEN);
|
2010-06-05 11:17:37 -06:00
|
|
|
unlock:
|
2010-06-05 11:17:36 -06:00
|
|
|
mutex_unlock(&module_mutex);
|
2021-01-20 14:58:27 +01:00
|
|
|
return fsa.sym;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
static const struct kernel_symbol *
|
|
|
|
resolve_symbol_wait(struct module *mod,
|
|
|
|
const struct load_info *info,
|
|
|
|
const char *name)
|
2010-06-05 11:17:37 -06:00
|
|
|
{
|
|
|
|
const struct kernel_symbol *ksym;
|
2010-08-05 12:59:10 -06:00
|
|
|
char owner[MODULE_NAME_LEN];
|
2010-06-05 11:17:37 -06:00
|
|
|
|
|
|
|
if (wait_event_interruptible_timeout(module_wq,
|
2010-08-05 12:59:10 -06:00
|
|
|
!IS_ERR(ksym = resolve_symbol(mod, info, name, owner))
|
|
|
|
|| PTR_ERR(ksym) != -EBUSY,
|
2010-06-05 11:17:37 -06:00
|
|
|
30 * HZ) <= 0) {
|
2013-11-12 15:11:28 -08:00
|
|
|
pr_warn("%s: gave up waiting for init of module %s.\n",
|
|
|
|
mod->name, owner);
|
2010-06-05 11:17:37 -06:00
|
|
|
}
|
|
|
|
return ksym;
|
|
|
|
}
|
|
|
|
|
2011-06-30 21:22:11 +02:00
|
|
|
void __weak module_arch_cleanup(struct module *mod)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2015-01-20 09:07:04 +10:30
|
|
|
void __weak module_arch_freeing_init(struct module *mod)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2024-10-23 19:27:07 +03:00
|
|
|
void *__module_writable_address(struct module *mod, void *loc)
|
|
|
|
{
|
|
|
|
for_class_mod_mem_type(type, text) {
|
|
|
|
struct module_memory *mem = &mod->mem[type];
|
|
|
|
|
|
|
|
if (loc >= mem->base && loc < mem->base + mem->size)
|
|
|
|
return loc + (mem->rw_copy - mem->base);
|
|
|
|
}
|
|
|
|
|
|
|
|
return loc;
|
|
|
|
}
|
|
|
|
|
2024-05-05 19:06:17 +03:00
|
|
|
static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
{
|
2024-05-05 19:06:17 +03:00
|
|
|
unsigned int size = PAGE_ALIGN(mod->mem[type].size);
|
2024-05-05 19:06:20 +03:00
|
|
|
enum execmem_type execmem_type;
|
2024-05-05 19:06:17 +03:00
|
|
|
void *ptr;
|
|
|
|
|
|
|
|
mod->mem[type].size = size;
|
|
|
|
|
2024-05-05 19:06:20 +03:00
|
|
|
if (mod_mem_type_is_data(type))
|
|
|
|
execmem_type = EXECMEM_MODULE_DATA;
|
2024-05-05 19:06:17 +03:00
|
|
|
else
|
2024-05-05 19:06:20 +03:00
|
|
|
execmem_type = EXECMEM_MODULE_TEXT;
|
2024-05-05 19:06:17 +03:00
|
|
|
|
2024-05-05 19:06:20 +03:00
|
|
|
ptr = execmem_alloc(execmem_type, size);
|
2024-05-05 19:06:17 +03:00
|
|
|
if (!ptr)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2024-10-23 19:27:07 +03:00
|
|
|
mod->mem[type].base = ptr;
|
|
|
|
|
|
|
|
if (execmem_is_rox(execmem_type)) {
|
|
|
|
ptr = vzalloc(size);
|
|
|
|
|
|
|
|
if (!ptr) {
|
|
|
|
execmem_free(mod->mem[type].base);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
mod->mem[type].rw_copy = ptr;
|
|
|
|
mod->mem[type].is_rox = true;
|
|
|
|
} else {
|
|
|
|
mod->mem[type].rw_copy = mod->mem[type].base;
|
|
|
|
memset(mod->mem[type].base, 0, size);
|
|
|
|
}
|
|
|
|
|
2024-05-05 19:06:17 +03:00
|
|
|
/*
|
|
|
|
* The pointer to these blocks of memory are stored on the module
|
|
|
|
* structure and we keep that around so long as the module is
|
|
|
|
* around. We only free that memory when we unload the module.
|
|
|
|
* Just mark them as not being a leak then. The .init* ELF
|
|
|
|
* sections *do* get freed after boot so we *could* treat them
|
|
|
|
* slightly differently with kmemleak_ignore() and only grey
|
|
|
|
* them out as they work as typical memory allocations which
|
|
|
|
* *do* eventually get freed, but let's just keep things simple
|
|
|
|
* and avoid *any* false positives.
|
|
|
|
*/
|
|
|
|
kmemleak_not_leak(ptr);
|
|
|
|
|
|
|
|
return 0;
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
}
|
|
|
|
|
2024-10-23 10:07:56 -07:00
|
|
|
static void module_memory_free(struct module *mod, enum mod_mem_type type)
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
{
|
2024-10-23 19:27:07 +03:00
|
|
|
struct module_memory *mem = &mod->mem[type];
|
2024-05-05 19:06:17 +03:00
|
|
|
|
2024-10-23 19:27:07 +03:00
|
|
|
if (mem->is_rox)
|
|
|
|
vfree(mem->rw_copy);
|
2024-03-21 09:36:34 -07:00
|
|
|
|
2024-10-23 10:07:56 -07:00
|
|
|
execmem_free(mem->base);
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
}
|
|
|
|
|
2024-10-23 10:07:56 -07:00
|
|
|
static void free_mod_mem(struct module *mod)
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
{
|
|
|
|
for_each_mod_mem_type(type) {
|
|
|
|
struct module_memory *mod_mem = &mod->mem[type];
|
|
|
|
|
|
|
|
if (type == MOD_DATA)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Free lock-classes; relies on the preceding sync_rcu(). */
|
|
|
|
lockdep_free_key_range(mod_mem->base, mod_mem->size);
|
|
|
|
if (mod_mem->size)
|
2024-10-23 10:07:56 -07:00
|
|
|
module_memory_free(mod, type);
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* MOD_DATA hosts mod, so free it at last */
|
|
|
|
lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size);
|
2024-10-23 10:07:56 -07:00
|
|
|
module_memory_free(mod, MOD_DATA);
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
}
|
|
|
|
|
2010-06-05 11:17:36 -06:00
|
|
|
/* Free a module, remove from lists, etc. */
|
2005-04-16 15:20:36 -07:00
|
|
|
static void free_module(struct module *mod)
|
|
|
|
{
|
2009-08-17 16:56:28 +08:00
|
|
|
trace_module_free(mod);
|
|
|
|
|
2024-10-23 10:07:56 -07:00
|
|
|
codetag_unload_module(mod);
|
2024-03-21 09:36:34 -07:00
|
|
|
|
2010-08-05 12:59:09 -06:00
|
|
|
mod_sysfs_teardown(mod);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2020-11-07 23:20:52 +03:00
|
|
|
/*
|
|
|
|
* We leave it in list to prevent duplicate loads, but make sure
|
|
|
|
* that noone uses it while it's being deconstructed.
|
|
|
|
*/
|
modules, lock around setting of MODULE_STATE_UNFORMED
A panic was seen in the following sitation.
There are two threads running on the system. The first thread is a system
monitoring thread that is reading /proc/modules. The second thread is
loading and unloading a module (in this example I'm using my simple
dummy-module.ko). Note, in the "real world" this occurred with the qlogic
driver module.
When doing this, the following panic occurred:
------------[ cut here ]------------
kernel BUG at kernel/module.c:3739!
invalid opcode: 0000 [#1] SMP
Modules linked in: binfmt_misc sg nfsv3 rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache intel_powerclamp coretemp kvm_intel kvm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel lrw igb gf128mul glue_helper iTCO_wdt iTCO_vendor_support ablk_helper ptp sb_edac cryptd pps_core edac_core shpchp i2c_i801 pcspkr wmi lpc_ich ioatdma mfd_core dca ipmi_si nfsd ipmi_msghandler auth_rpcgss nfs_acl lockd sunrpc xfs libcrc32c sr_mod cdrom sd_mod crc_t10dif crct10dif_common mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit drm_kms_helper ttm isci drm libsas ahci libahci scsi_transport_sas libata i2c_core dm_mirror dm_region_hash dm_log dm_mod [last unloaded: dummy_module]
CPU: 37 PID: 186343 Comm: cat Tainted: GF O-------------- 3.10.0+ #7
Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013
task: ffff8807fd2d8000 ti: ffff88080fa7c000 task.ti: ffff88080fa7c000
RIP: 0010:[<ffffffff810d64c5>] [<ffffffff810d64c5>] module_flags+0xb5/0xc0
RSP: 0018:ffff88080fa7fe18 EFLAGS: 00010246
RAX: 0000000000000003 RBX: ffffffffa03b5200 RCX: 0000000000000000
RDX: 0000000000001000 RSI: ffff88080fa7fe38 RDI: ffffffffa03b5000
RBP: ffff88080fa7fe28 R08: 0000000000000010 R09: 0000000000000000
R10: 0000000000000000 R11: 000000000000000f R12: ffffffffa03b5000
R13: ffffffffa03b5008 R14: ffffffffa03b5200 R15: ffffffffa03b5000
FS: 00007f6ae57ef740(0000) GS:ffff88101e7a0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000404f70 CR3: 0000000ffed48000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
ffffffffa03b5200 ffff8810101e4800 ffff88080fa7fe70 ffffffff810d666c
ffff88081e807300 000000002e0f2fbf 0000000000000000 ffff88100f257b00
ffffffffa03b5008 ffff88080fa7ff48 ffff8810101e4800 ffff88080fa7fee0
Call Trace:
[<ffffffff810d666c>] m_show+0x19c/0x1e0
[<ffffffff811e4d7e>] seq_read+0x16e/0x3b0
[<ffffffff812281ed>] proc_reg_read+0x3d/0x80
[<ffffffff811c0f2c>] vfs_read+0x9c/0x170
[<ffffffff811c1a58>] SyS_read+0x58/0xb0
[<ffffffff81605829>] system_call_fastpath+0x16/0x1b
Code: 48 63 c2 83 c2 01 c6 04 03 29 48 63 d2 eb d9 0f 1f 80 00 00 00 00 48 63 d2 c6 04 13 2d 41 8b 0c 24 8d 50 02 83 f9 01 75 b2 eb cb <0f> 0b 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41
RIP [<ffffffff810d64c5>] module_flags+0xb5/0xc0
RSP <ffff88080fa7fe18>
Consider the two processes running on the system.
CPU 0 (/proc/modules reader)
CPU 1 (loading/unloading module)
CPU 0 opens /proc/modules, and starts displaying data for each module by
traversing the modules list via fs/seq_file.c:seq_open() and
fs/seq_file.c:seq_read(). For each module in the modules list, seq_read
does
op->start() <-- this is a pointer to m_start()
op->show() <- this is a pointer to m_show()
op->stop() <-- this is a pointer to m_stop()
The m_start(), m_show(), and m_stop() module functions are defined in
kernel/module.c. The m_start() and m_stop() functions acquire and release
the module_mutex respectively.
ie) When reading /proc/modules, the module_mutex is acquired and released
for each module.
m_show() is called with the module_mutex held. It accesses the module
struct data and attempts to write out module data. It is in this code
path that the above BUG_ON() warning is encountered, specifically m_show()
calls
static char *module_flags(struct module *mod, char *buf)
{
int bx = 0;
BUG_ON(mod->state == MODULE_STATE_UNFORMED);
...
The other thread, CPU 1, in unloading the module calls the syscall
delete_module() defined in kernel/module.c. The module_mutex is acquired
for a short time, and then released. free_module() is called without the
module_mutex. free_module() then sets mod->state = MODULE_STATE_UNFORMED,
also without the module_mutex. Some additional code is called and then the
module_mutex is reacquired to remove the module from the modules list:
/* Now we can delete it from the lists */
mutex_lock(&module_mutex);
stop_machine(__unlink_module, mod, NULL);
mutex_unlock(&module_mutex);
This is the sequence of events that leads to the panic.
CPU 1 is removing dummy_module via delete_module(). It acquires the
module_mutex, and then releases it. CPU 1 has NOT set dummy_module->state to
MODULE_STATE_UNFORMED yet.
CPU 0, which is reading the /proc/modules, acquires the module_mutex and
acquires a pointer to the dummy_module which is still in the modules list.
CPU 0 calls m_show for dummy_module. The check in m_show() for
MODULE_STATE_UNFORMED passed for dummy_module even though it is being
torn down.
Meanwhile CPU 1, which has been continuing to remove dummy_module without
holding the module_mutex, now calls free_module() and sets
dummy_module->state to MODULE_STATE_UNFORMED.
CPU 0 now calls module_flags() with dummy_module and ...
static char *module_flags(struct module *mod, char *buf)
{
int bx = 0;
BUG_ON(mod->state == MODULE_STATE_UNFORMED);
and BOOM.
Acquire and release the module_mutex lock around the setting of
MODULE_STATE_UNFORMED in the teardown path, which should resolve the
problem.
Testing: In the unpatched kernel I can panic the system within 1 minute by
doing
while (true) do insmod dummy_module.ko; rmmod dummy_module.ko; done
and
while (true) do cat /proc/modules; done
in separate terminals.
In the patched kernel I was able to run just over one hour without seeing
any issues. I also verified the output of panic via sysrq-c and the output
of /proc/modules looks correct for all three states for the dummy_module.
dummy_module 12661 0 - Unloading 0xffffffffa03a5000 (OE-)
dummy_module 12661 0 - Live 0xffffffffa03bb000 (OE)
dummy_module 14015 1 - Loading 0xffffffffa03a5000 (OE+)
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: stable@kernel.org
2014-10-14 02:51:39 +10:30
|
|
|
mutex_lock(&module_mutex);
|
2013-04-17 13:20:03 +09:30
|
|
|
mod->state = MODULE_STATE_UNFORMED;
|
modules, lock around setting of MODULE_STATE_UNFORMED
A panic was seen in the following sitation.
There are two threads running on the system. The first thread is a system
monitoring thread that is reading /proc/modules. The second thread is
loading and unloading a module (in this example I'm using my simple
dummy-module.ko). Note, in the "real world" this occurred with the qlogic
driver module.
When doing this, the following panic occurred:
------------[ cut here ]------------
kernel BUG at kernel/module.c:3739!
invalid opcode: 0000 [#1] SMP
Modules linked in: binfmt_misc sg nfsv3 rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache intel_powerclamp coretemp kvm_intel kvm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel lrw igb gf128mul glue_helper iTCO_wdt iTCO_vendor_support ablk_helper ptp sb_edac cryptd pps_core edac_core shpchp i2c_i801 pcspkr wmi lpc_ich ioatdma mfd_core dca ipmi_si nfsd ipmi_msghandler auth_rpcgss nfs_acl lockd sunrpc xfs libcrc32c sr_mod cdrom sd_mod crc_t10dif crct10dif_common mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit drm_kms_helper ttm isci drm libsas ahci libahci scsi_transport_sas libata i2c_core dm_mirror dm_region_hash dm_log dm_mod [last unloaded: dummy_module]
CPU: 37 PID: 186343 Comm: cat Tainted: GF O-------------- 3.10.0+ #7
Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013
task: ffff8807fd2d8000 ti: ffff88080fa7c000 task.ti: ffff88080fa7c000
RIP: 0010:[<ffffffff810d64c5>] [<ffffffff810d64c5>] module_flags+0xb5/0xc0
RSP: 0018:ffff88080fa7fe18 EFLAGS: 00010246
RAX: 0000000000000003 RBX: ffffffffa03b5200 RCX: 0000000000000000
RDX: 0000000000001000 RSI: ffff88080fa7fe38 RDI: ffffffffa03b5000
RBP: ffff88080fa7fe28 R08: 0000000000000010 R09: 0000000000000000
R10: 0000000000000000 R11: 000000000000000f R12: ffffffffa03b5000
R13: ffffffffa03b5008 R14: ffffffffa03b5200 R15: ffffffffa03b5000
FS: 00007f6ae57ef740(0000) GS:ffff88101e7a0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000404f70 CR3: 0000000ffed48000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
ffffffffa03b5200 ffff8810101e4800 ffff88080fa7fe70 ffffffff810d666c
ffff88081e807300 000000002e0f2fbf 0000000000000000 ffff88100f257b00
ffffffffa03b5008 ffff88080fa7ff48 ffff8810101e4800 ffff88080fa7fee0
Call Trace:
[<ffffffff810d666c>] m_show+0x19c/0x1e0
[<ffffffff811e4d7e>] seq_read+0x16e/0x3b0
[<ffffffff812281ed>] proc_reg_read+0x3d/0x80
[<ffffffff811c0f2c>] vfs_read+0x9c/0x170
[<ffffffff811c1a58>] SyS_read+0x58/0xb0
[<ffffffff81605829>] system_call_fastpath+0x16/0x1b
Code: 48 63 c2 83 c2 01 c6 04 03 29 48 63 d2 eb d9 0f 1f 80 00 00 00 00 48 63 d2 c6 04 13 2d 41 8b 0c 24 8d 50 02 83 f9 01 75 b2 eb cb <0f> 0b 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41
RIP [<ffffffff810d64c5>] module_flags+0xb5/0xc0
RSP <ffff88080fa7fe18>
Consider the two processes running on the system.
CPU 0 (/proc/modules reader)
CPU 1 (loading/unloading module)
CPU 0 opens /proc/modules, and starts displaying data for each module by
traversing the modules list via fs/seq_file.c:seq_open() and
fs/seq_file.c:seq_read(). For each module in the modules list, seq_read
does
op->start() <-- this is a pointer to m_start()
op->show() <- this is a pointer to m_show()
op->stop() <-- this is a pointer to m_stop()
The m_start(), m_show(), and m_stop() module functions are defined in
kernel/module.c. The m_start() and m_stop() functions acquire and release
the module_mutex respectively.
ie) When reading /proc/modules, the module_mutex is acquired and released
for each module.
m_show() is called with the module_mutex held. It accesses the module
struct data and attempts to write out module data. It is in this code
path that the above BUG_ON() warning is encountered, specifically m_show()
calls
static char *module_flags(struct module *mod, char *buf)
{
int bx = 0;
BUG_ON(mod->state == MODULE_STATE_UNFORMED);
...
The other thread, CPU 1, in unloading the module calls the syscall
delete_module() defined in kernel/module.c. The module_mutex is acquired
for a short time, and then released. free_module() is called without the
module_mutex. free_module() then sets mod->state = MODULE_STATE_UNFORMED,
also without the module_mutex. Some additional code is called and then the
module_mutex is reacquired to remove the module from the modules list:
/* Now we can delete it from the lists */
mutex_lock(&module_mutex);
stop_machine(__unlink_module, mod, NULL);
mutex_unlock(&module_mutex);
This is the sequence of events that leads to the panic.
CPU 1 is removing dummy_module via delete_module(). It acquires the
module_mutex, and then releases it. CPU 1 has NOT set dummy_module->state to
MODULE_STATE_UNFORMED yet.
CPU 0, which is reading the /proc/modules, acquires the module_mutex and
acquires a pointer to the dummy_module which is still in the modules list.
CPU 0 calls m_show for dummy_module. The check in m_show() for
MODULE_STATE_UNFORMED passed for dummy_module even though it is being
torn down.
Meanwhile CPU 1, which has been continuing to remove dummy_module without
holding the module_mutex, now calls free_module() and sets
dummy_module->state to MODULE_STATE_UNFORMED.
CPU 0 now calls module_flags() with dummy_module and ...
static char *module_flags(struct module *mod, char *buf)
{
int bx = 0;
BUG_ON(mod->state == MODULE_STATE_UNFORMED);
and BOOM.
Acquire and release the module_mutex lock around the setting of
MODULE_STATE_UNFORMED in the teardown path, which should resolve the
problem.
Testing: In the unpatched kernel I can panic the system within 1 minute by
doing
while (true) do insmod dummy_module.ko; rmmod dummy_module.ko; done
and
while (true) do cat /proc/modules; done
in separate terminals.
In the patched kernel I was able to run just over one hour without seeing
any issues. I also verified the output of panic via sysrq-c and the output
of /proc/modules looks correct for all three states for the dummy_module.
dummy_module 12661 0 - Unloading 0xffffffffa03a5000 (OE-)
dummy_module 12661 0 - Live 0xffffffffa03bb000 (OE)
dummy_module 14015 1 - Loading 0xffffffffa03a5000 (OE+)
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: stable@kernel.org
2014-10-14 02:51:39 +10:30
|
|
|
mutex_unlock(&module_mutex);
|
2013-04-17 13:20:03 +09:30
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Arch-specific cleanup. */
|
|
|
|
module_arch_cleanup(mod);
|
|
|
|
|
|
|
|
/* Module unload stuff */
|
|
|
|
module_unload_free(mod);
|
|
|
|
|
2009-03-31 13:05:29 -06:00
|
|
|
/* Free any allocated parameters. */
|
|
|
|
destroy_params(mod->kp, mod->num_kp);
|
|
|
|
|
2016-03-22 20:03:16 -04:00
|
|
|
if (is_livepatch_module(mod))
|
|
|
|
free_module_elf(mod);
|
|
|
|
|
2013-04-17 13:20:03 +09:30
|
|
|
/* Now we can delete it from the lists */
|
|
|
|
mutex_lock(&module_mutex);
|
2014-11-10 09:27:29 +10:30
|
|
|
/* Unlink carefully: kallsyms could be walking list. */
|
|
|
|
list_del_rcu(&mod->list);
|
2015-05-27 11:09:37 +09:30
|
|
|
mod_tree_remove(mod);
|
2014-11-10 09:28:29 +10:30
|
|
|
/* Remove this module from bug list, this uses list_del_rcu */
|
2014-11-10 09:27:29 +10:30
|
|
|
module_bug_cleanup(mod);
|
2015-05-27 11:09:35 +09:30
|
|
|
/* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
|
2018-11-06 19:17:01 -08:00
|
|
|
synchronize_rcu();
|
2022-05-02 21:52:52 +01:00
|
|
|
if (try_add_tainted_module(mod))
|
|
|
|
pr_err("%s: adding tainted module to the unloaded tainted modules list failed.\n",
|
|
|
|
mod->name);
|
2013-04-17 13:20:03 +09:30
|
|
|
mutex_unlock(&module_mutex);
|
|
|
|
|
2015-11-26 09:45:08 +10:30
|
|
|
/* This may be empty, but that's OK */
|
2015-01-20 09:07:04 +10:30
|
|
|
module_arch_freeing_init(mod);
|
2005-04-16 15:20:36 -07:00
|
|
|
kfree(mod->args);
|
2010-03-10 18:56:10 +09:00
|
|
|
percpu_modfree(mod);
|
2010-08-05 12:59:04 -06:00
|
|
|
|
2024-10-23 10:07:56 -07:00
|
|
|
free_mod_mem(mod);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
void *__symbol_get(const char *symbol)
|
|
|
|
{
|
2021-01-20 14:58:27 +01:00
|
|
|
struct find_symbol_arg fsa = {
|
|
|
|
.name = symbol,
|
|
|
|
.gplok = true,
|
|
|
|
.warn = true,
|
|
|
|
};
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2007-07-15 23:41:46 -07:00
|
|
|
preempt_disable();
|
2023-08-01 19:35:44 +02:00
|
|
|
if (!find_symbol(&fsa))
|
|
|
|
goto fail;
|
|
|
|
if (fsa.license != GPL_ONLY) {
|
|
|
|
pr_warn("failing symbol_get of non-GPLONLY symbol %s.\n",
|
|
|
|
symbol);
|
|
|
|
goto fail;
|
2021-01-20 14:58:27 +01:00
|
|
|
}
|
2023-08-01 19:35:44 +02:00
|
|
|
if (strong_try_module_get(fsa.owner))
|
|
|
|
goto fail;
|
2007-07-15 23:41:46 -07:00
|
|
|
preempt_enable();
|
2021-01-20 14:58:27 +01:00
|
|
|
return (void *)kernel_symbol_value(fsa.sym);
|
2023-08-01 19:35:44 +02:00
|
|
|
fail:
|
|
|
|
preempt_enable();
|
|
|
|
return NULL;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(__symbol_get);
|
|
|
|
|
2006-01-08 01:04:25 -08:00
|
|
|
/*
|
|
|
|
* Ensure that an exported symbol [global namespace] does not already exist
|
2007-05-09 07:26:28 +02:00
|
|
|
* in the kernel or in some other module's exported symbol table.
|
2010-06-05 11:17:37 -06:00
|
|
|
*
|
|
|
|
* You must hold the module_mutex.
|
2006-01-08 01:04:25 -08:00
|
|
|
*/
|
2018-11-19 17:43:58 +01:00
|
|
|
static int verify_exported_symbols(struct module *mod)
|
2006-01-08 01:04:25 -08:00
|
|
|
{
|
2008-05-01 21:15:00 -05:00
|
|
|
unsigned int i;
|
|
|
|
const struct kernel_symbol *s;
|
|
|
|
struct {
|
|
|
|
const struct kernel_symbol *sym;
|
|
|
|
unsigned int num;
|
|
|
|
} arr[] = {
|
|
|
|
{ mod->syms, mod->num_syms },
|
|
|
|
{ mod->gpl_syms, mod->num_gpl_syms },
|
|
|
|
};
|
2006-01-08 01:04:25 -08:00
|
|
|
|
2008-05-01 21:15:00 -05:00
|
|
|
for (i = 0; i < ARRAY_SIZE(arr); i++) {
|
|
|
|
for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
|
2021-01-20 14:58:27 +01:00
|
|
|
struct find_symbol_arg fsa = {
|
|
|
|
.name = kernel_symbol_name(s),
|
|
|
|
.gplok = true,
|
|
|
|
};
|
|
|
|
if (find_symbol(&fsa)) {
|
2013-11-12 15:11:28 -08:00
|
|
|
pr_err("%s: exports duplicate symbol %s"
|
2008-05-01 21:15:00 -05:00
|
|
|
" (owned by %s)\n",
|
module: use relative references for __ksymtab entries
An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab entries,
each consisting of two 64-bit fields containing absolute references, to
the symbol itself and to a char array containing its name, respectively.
When we build the same configuration with KASLR enabled, we end up with an
additional ~192 KB of relocations in the .init section, i.e., one 24 byte
entry for each absolute reference, which all need to be processed at boot
time.
Given how the struct kernel_symbol that describes each entry is completely
local to module.c (except for the references emitted by EXPORT_SYMBOL()
itself), we can easily modify it to contain two 32-bit relative references
instead. This reduces the size of the __ksymtab section by 50% for all
64-bit architectures, and gets rid of the runtime relocations entirely for
architectures implementing KASLR, either via standard PIE linking (arm64)
or using custom host tools (x86).
Note that the binary search involving __ksymtab contents relies on each
section being sorted by symbol name. This is implemented based on the
input section names, not the names in the ksymtab entries, so this patch
does not interfere with that.
Given that the use of place-relative relocations requires support both in
the toolchain and in the module loader, we cannot enable this feature for
all architectures. So make it dependent on whether
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined.
Link: http://lkml.kernel.org/r/20180704083651.24360-4-ard.biesheuvel@linaro.org
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-08-21 21:56:09 -07:00
|
|
|
mod->name, kernel_symbol_name(s),
|
2021-01-20 14:58:27 +01:00
|
|
|
module_name(fsa.owner));
|
2008-05-01 21:15:00 -05:00
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
2006-01-08 01:04:25 -08:00
|
|
|
}
|
2008-05-01 21:15:00 -05:00
|
|
|
}
|
|
|
|
return 0;
|
2006-01-08 01:04:25 -08:00
|
|
|
}
|
|
|
|
|
2021-01-15 11:52:22 -08:00
|
|
|
static bool ignore_undef_symbol(Elf_Half emachine, const char *name)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* On x86, PIC code and Clang non-PIC code may have call foo@PLT. GNU as
|
|
|
|
* before 2.37 produces an unreferenced _GLOBAL_OFFSET_TABLE_ on x86-64.
|
|
|
|
* i386 has a similar problem but may not deserve a fix.
|
|
|
|
*
|
|
|
|
* If we ever have to ignore many symbols, consider refactoring the code to
|
|
|
|
* only warn if referenced by a relocation.
|
|
|
|
*/
|
|
|
|
if (emachine == EM_386 || emachine == EM_X86_64)
|
|
|
|
return !strcmp(name, "_GLOBAL_OFFSET_TABLE_");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2007-11-08 08:37:38 -08:00
|
|
|
/* Change all symbols so that st_value encodes the pointer directly. */
|
2010-08-05 12:59:10 -06:00
|
|
|
static int simplify_symbols(struct module *mod, const struct load_info *info)
|
|
|
|
{
|
|
|
|
Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
|
|
|
|
Elf_Sym *sym = (void *)symsec->sh_addr;
|
2005-04-16 15:20:36 -07:00
|
|
|
unsigned long secbase;
|
2010-08-05 12:59:10 -06:00
|
|
|
unsigned int i;
|
2005-04-16 15:20:36 -07:00
|
|
|
int ret = 0;
|
2008-12-05 19:03:56 -05:00
|
|
|
const struct kernel_symbol *ksym;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
|
|
|
|
const char *name = info->strtab + sym[i].st_name;
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
switch (sym[i].st_shndx) {
|
|
|
|
case SHN_COMMON:
|
2014-02-08 09:01:09 +01:00
|
|
|
/* Ignore common symbols */
|
|
|
|
if (!strncmp(name, "__gnu_lto", 9))
|
|
|
|
break;
|
|
|
|
|
2020-11-07 23:20:52 +03:00
|
|
|
/*
|
|
|
|
* We compiled with -fno-common. These are not
|
|
|
|
* supposed to happen.
|
|
|
|
*/
|
2011-12-06 12:11:31 -07:00
|
|
|
pr_debug("Common symbol: %s\n", name);
|
2014-11-10 09:31:29 +10:30
|
|
|
pr_warn("%s: please compile with -fno-common\n",
|
2005-04-16 15:20:36 -07:00
|
|
|
mod->name);
|
|
|
|
ret = -ENOEXEC;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SHN_ABS:
|
|
|
|
/* Don't need to do anything */
|
2023-03-21 19:36:21 -06:00
|
|
|
pr_debug("Absolute symbol: 0x%08lx %s\n",
|
|
|
|
(long)sym[i].st_value, name);
|
2005-04-16 15:20:36 -07:00
|
|
|
break;
|
|
|
|
|
2016-03-22 20:03:16 -04:00
|
|
|
case SHN_LIVEPATCH:
|
|
|
|
/* Livepatch symbols are resolved by livepatch */
|
|
|
|
break;
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
case SHN_UNDEF:
|
2010-08-05 12:59:10 -06:00
|
|
|
ksym = resolve_symbol_wait(mod, info, name);
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Ok if resolved. */
|
2010-06-05 11:17:37 -06:00
|
|
|
if (ksym && !IS_ERR(ksym)) {
|
module: use relative references for __ksymtab entries
An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab entries,
each consisting of two 64-bit fields containing absolute references, to
the symbol itself and to a char array containing its name, respectively.
When we build the same configuration with KASLR enabled, we end up with an
additional ~192 KB of relocations in the .init section, i.e., one 24 byte
entry for each absolute reference, which all need to be processed at boot
time.
Given how the struct kernel_symbol that describes each entry is completely
local to module.c (except for the references emitted by EXPORT_SYMBOL()
itself), we can easily modify it to contain two 32-bit relative references
instead. This reduces the size of the __ksymtab section by 50% for all
64-bit architectures, and gets rid of the runtime relocations entirely for
architectures implementing KASLR, either via standard PIE linking (arm64)
or using custom host tools (x86).
Note that the binary search involving __ksymtab contents relies on each
section being sorted by symbol name. This is implemented based on the
input section names, not the names in the ksymtab entries, so this patch
does not interfere with that.
Given that the use of place-relative relocations requires support both in
the toolchain and in the module loader, we cannot enable this feature for
all architectures. So make it dependent on whether
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined.
Link: http://lkml.kernel.org/r/20180704083651.24360-4-ard.biesheuvel@linaro.org
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-08-21 21:56:09 -07:00
|
|
|
sym[i].st_value = kernel_symbol_value(ksym);
|
2005-04-16 15:20:36 -07:00
|
|
|
break;
|
2008-12-05 19:03:56 -05:00
|
|
|
}
|
|
|
|
|
2021-01-15 11:52:22 -08:00
|
|
|
/* Ok if weak or ignored. */
|
|
|
|
if (!ksym &&
|
|
|
|
(ELF_ST_BIND(sym[i].st_info) == STB_WEAK ||
|
|
|
|
ignore_undef_symbol(info->hdr->e_machine, name)))
|
2005-04-16 15:20:36 -07:00
|
|
|
break;
|
|
|
|
|
2010-06-05 11:17:37 -06:00
|
|
|
ret = PTR_ERR(ksym) ?: -ENOENT;
|
2018-06-22 17:38:50 +02:00
|
|
|
pr_warn("%s: Unknown symbol %s (err %d)\n",
|
|
|
|
mod->name, name, ret);
|
2005-04-16 15:20:36 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
/* Divert to percpu allocation if a percpu var. */
|
2010-08-05 12:59:10 -06:00
|
|
|
if (sym[i].st_shndx == info->index.pcpu)
|
2010-03-10 18:56:10 +09:00
|
|
|
secbase = (unsigned long)mod_percpu(mod);
|
2005-04-16 15:20:36 -07:00
|
|
|
else
|
2010-08-05 12:59:10 -06:00
|
|
|
secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
|
2005-04-16 15:20:36 -07:00
|
|
|
sym[i].st_value += secbase;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
static int apply_relocations(struct module *mod, const struct load_info *info)
|
2010-08-05 12:59:05 -06:00
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
/* Now do relocations. */
|
2010-08-05 12:59:10 -06:00
|
|
|
for (i = 1; i < info->hdr->e_shnum; i++) {
|
|
|
|
unsigned int infosec = info->sechdrs[i].sh_info;
|
2010-08-05 12:59:05 -06:00
|
|
|
|
|
|
|
/* Not a valid relocation section? */
|
2010-08-05 12:59:10 -06:00
|
|
|
if (infosec >= info->hdr->e_shnum)
|
2010-08-05 12:59:05 -06:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Don't bother with non-allocated sections */
|
2010-08-05 12:59:10 -06:00
|
|
|
if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC))
|
2010-08-05 12:59:05 -06:00
|
|
|
continue;
|
|
|
|
|
2016-03-22 20:03:16 -04:00
|
|
|
if (info->sechdrs[i].sh_flags & SHF_RELA_LIVEPATCH)
|
livepatch: Apply vmlinux-specific KLP relocations early
KLP relocations are livepatch-specific relocations which are applied to
a KLP module's text or data. They exist for two reasons:
1) Unexported symbols: replacement functions often need to access
unexported symbols (e.g. static functions), which "normal"
relocations don't allow.
2) Late module patching: this is the ability for a KLP module to
bypass normal module dependencies, such that the KLP module can be
loaded *before* a to-be-patched module. This means that
relocations which need to access symbols in the to-be-patched
module might need to be applied to the KLP module well after it has
been loaded.
Non-late-patched KLP relocations are applied from the KLP module's init
function. That usually works fine, unless the patched code wants to use
alternatives, paravirt patching, jump tables, or some other special
section which needs relocations. Then we run into ordering issues and
crashes.
In order for those special sections to work properly, the KLP
relocations should be applied *before* the special section init code
runs, such as apply_paravirt(), apply_alternatives(), or
jump_label_apply_nops().
You might think the obvious solution would be to move the KLP relocation
initialization earlier, but it's not necessarily that simple. The
problem is the above-mentioned late module patching, for which KLP
relocations can get applied well after the KLP module is loaded.
To "fix" this issue in the past, we created .klp.arch sections:
.klp.arch.{module}..altinstructions
.klp.arch.{module}..parainstructions
Those sections allow KLP late module patching code to call
apply_paravirt() and apply_alternatives() after the module-specific KLP
relocations (.klp.rela.{module}.{section}) have been applied.
But that has a lot of drawbacks, including code complexity, the need for
arch-specific code, and the (per-arch) danger that we missed some
special section -- for example the __jump_table section which is used
for jump labels.
It turns out there's a simpler and more functional approach. There are
two kinds of KLP relocation sections:
1) vmlinux-specific KLP relocation sections
.klp.rela.vmlinux.{sec}
These are relocations (applied to the KLP module) which reference
unexported vmlinux symbols.
2) module-specific KLP relocation sections
.klp.rela.{module}.{sec}:
These are relocations (applied to the KLP module) which reference
unexported or exported module symbols.
Up until now, these have been treated the same. However, they're
inherently different.
Because of late module patching, module-specific KLP relocations can be
applied very late, thus they can create the ordering headaches described
above.
But vmlinux-specific KLP relocations don't have that problem. There's
nothing to prevent them from being applied earlier. So apply them at
the same time as normal relocations, when the KLP module is being
loaded.
This means that for vmlinux-specific KLP relocations, we no longer have
any ordering issues. vmlinux-referencing jump labels, alternatives, and
paravirt patching will work automatically, without the need for the
.klp.arch hacks.
All that said, for module-specific KLP relocations, the ordering
problems still exist and we *do* still need .klp.arch. Or do we? Stay
tuned.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Jessica Yu <jeyu@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2020-04-29 10:24:44 -05:00
|
|
|
err = klp_apply_section_relocs(mod, info->sechdrs,
|
|
|
|
info->secstrings,
|
|
|
|
info->strtab,
|
|
|
|
info->index.sym, i,
|
|
|
|
NULL);
|
|
|
|
else if (info->sechdrs[i].sh_type == SHT_REL)
|
2010-08-05 12:59:10 -06:00
|
|
|
err = apply_relocate(info->sechdrs, info->strtab,
|
|
|
|
info->index.sym, i, mod);
|
|
|
|
else if (info->sechdrs[i].sh_type == SHT_RELA)
|
|
|
|
err = apply_relocate_add(info->sechdrs, info->strtab,
|
|
|
|
info->index.sym, i, mod);
|
2010-08-05 12:59:05 -06:00
|
|
|
if (err < 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2008-12-31 12:31:18 +01:00
|
|
|
/* Additional bytes needed by arch in front of individual sections */
|
|
|
|
unsigned int __weak arch_mod_section_prepend(struct module *mod,
|
|
|
|
unsigned int section)
|
|
|
|
{
|
|
|
|
/* default implementation just returns zero */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
long module_get_offset_and_type(struct module *mod, enum mod_mem_type type,
|
|
|
|
Elf_Shdr *sechdr, unsigned int section)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
long offset;
|
|
|
|
long mask = ((unsigned long)(type) & SH_ENTSIZE_TYPE_MASK) << SH_ENTSIZE_TYPE_SHIFT;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
mod->mem[type].size += arch_mod_section_prepend(mod, section);
|
|
|
|
offset = ALIGN(mod->mem[type].size, sechdr->sh_addralign ?: 1);
|
|
|
|
mod->mem[type].size = offset + sechdr->sh_size;
|
|
|
|
|
|
|
|
WARN_ON_ONCE(offset & mask);
|
|
|
|
return offset | mask;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2023-08-01 14:54:07 +00:00
|
|
|
bool module_init_layout_section(const char *sname)
|
module: check for exit sections in layout_sections() instead of module_init_section()
Previously, when CONFIG_MODULE_UNLOAD=n, the module loader just does not
attempt to load exit sections since it never expects that any code in those
sections will ever execute. However, dynamic code patching (alternatives,
jump_label and static_call) can have sites in __exit code, even if __exit is
never executed. Therefore __exit must be present at runtime, at least for as
long as __init code is.
Commit 33121347fb1c ("module: treat exit sections the same as init
sections when !CONFIG_MODULE_UNLOAD") solves the requirements of
jump_labels and static_calls by putting the exit sections in the init
region of the module so that they are at least present at init, and
discarded afterwards. It does this by including a check for exit
sections in module_init_section(), so that it also returns true for exit
sections, and the module loader will automatically sort them in the init
region of the module.
However, the solution there was not completely arch-independent. ARM is
a special case where it supplies its own module_{init, exit}_section()
functions. Instead of pushing the exit section checks into
module_init_section(), just implement the exit section check in
layout_sections(), so that we don't have to touch arch-dependent code.
Fixes: 33121347fb1c ("module: treat exit sections the same as init sections when !CONFIG_MODULE_UNLOAD")
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-05-12 15:45:46 +02:00
|
|
|
{
|
|
|
|
#ifndef CONFIG_MODULE_UNLOAD
|
|
|
|
if (module_exit_section(sname))
|
|
|
|
return true;
|
|
|
|
#endif
|
|
|
|
return module_init_section(sname);
|
|
|
|
}
|
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
static void __layout_sections(struct module *mod, struct load_info *info, bool is_init)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
unsigned int m, i;
|
|
|
|
|
|
|
|
static const unsigned long masks[][2] = {
|
2020-11-07 23:20:52 +03:00
|
|
|
/*
|
|
|
|
* NOTE: all executable code must be the first section
|
2005-04-16 15:20:36 -07:00
|
|
|
* in this array; otherwise modify the text_size
|
2020-11-07 23:20:52 +03:00
|
|
|
* finder in the two loops below
|
|
|
|
*/
|
2005-04-16 15:20:36 -07:00
|
|
|
{ SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },
|
|
|
|
{ SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },
|
2016-07-27 12:06:21 +09:30
|
|
|
{ SHF_RO_AFTER_INIT | SHF_ALLOC, ARCH_SHF_SMALL },
|
2005-04-16 15:20:36 -07:00
|
|
|
{ SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },
|
|
|
|
{ ARCH_SHF_SMALL | SHF_ALLOC, 0 }
|
|
|
|
};
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
static const int core_m_to_mem_type[] = {
|
|
|
|
MOD_TEXT,
|
|
|
|
MOD_RODATA,
|
|
|
|
MOD_RO_AFTER_INIT,
|
|
|
|
MOD_DATA,
|
2023-05-28 16:00:41 -07:00
|
|
|
MOD_DATA,
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
};
|
|
|
|
static const int init_m_to_mem_type[] = {
|
|
|
|
MOD_INIT_TEXT,
|
|
|
|
MOD_INIT_RODATA,
|
|
|
|
MOD_INVALID,
|
|
|
|
MOD_INIT_DATA,
|
2023-05-28 16:00:41 -07:00
|
|
|
MOD_INIT_DATA,
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
};
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
enum mod_mem_type type = is_init ? init_m_to_mem_type[m] : core_m_to_mem_type[m];
|
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
for (i = 0; i < info->hdr->e_shnum; ++i) {
|
|
|
|
Elf_Shdr *s = &info->sechdrs[i];
|
|
|
|
const char *sname = info->secstrings + s->sh_name;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|
|
|
|
|| (s->sh_flags & masks[m][1])
|
|
|
|
|| s->sh_entsize != ~0UL
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
|| is_init != module_init_layout_section(sname))
|
2005-04-16 15:20:36 -07:00
|
|
|
continue;
|
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
if (WARN_ON_ONCE(type == MOD_INVALID))
|
2005-04-16 15:20:36 -07:00
|
|
|
continue;
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
|
2024-10-23 10:07:56 -07:00
|
|
|
/*
|
|
|
|
* Do not allocate codetag memory as we load it into
|
|
|
|
* preallocated contiguous memory.
|
|
|
|
*/
|
|
|
|
if (codetag_needs_module_section(mod, sname, s->sh_size)) {
|
|
|
|
/*
|
|
|
|
* s->sh_entsize won't be used but populate the
|
|
|
|
* type field to avoid confusion.
|
|
|
|
*/
|
|
|
|
s->sh_entsize = ((unsigned long)(type) & SH_ENTSIZE_TYPE_MASK)
|
|
|
|
<< SH_ENTSIZE_TYPE_SHIFT;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
s->sh_entsize = module_get_offset_and_type(mod, type, s, i);
|
2011-12-06 12:11:31 -07:00
|
|
|
pr_debug("\t%s\n", sname);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
/*
|
|
|
|
* Lay out the SHF_ALLOC sections in a way not dissimilar to how ld
|
|
|
|
* might -- code, read-only data, read-write data, small data. Tally
|
|
|
|
* sizes, and place the offsets into sh_entsize fields: high bit means it
|
|
|
|
* belongs in init.
|
|
|
|
*/
|
|
|
|
static void layout_sections(struct module *mod, struct load_info *info)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < info->hdr->e_shnum; i++)
|
|
|
|
info->sechdrs[i].sh_entsize = ~0UL;
|
|
|
|
|
2023-03-21 19:36:20 -06:00
|
|
|
pr_debug("Core section allocation order for %s:\n", mod->name);
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
__layout_sections(mod, info, false);
|
|
|
|
|
2023-03-21 19:36:20 -06:00
|
|
|
pr_debug("Init section allocation order for %s:\n", mod->name);
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
__layout_sections(mod, info, true);
|
|
|
|
}
|
|
|
|
|
2023-03-19 14:27:40 -07:00
|
|
|
static void module_license_taint_check(struct module *mod, const char *license)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
if (!license)
|
|
|
|
license = "unspecified";
|
|
|
|
|
2006-10-11 01:21:48 -07:00
|
|
|
if (!license_is_gpl_compatible(license)) {
|
2008-10-15 22:01:41 -07:00
|
|
|
if (!test_taint(TAINT_PROPRIETARY_MODULE))
|
2013-11-12 15:11:28 -08:00
|
|
|
pr_warn("%s: module license '%s' taints kernel.\n",
|
|
|
|
mod->name, license);
|
2013-01-21 17:17:39 +10:30
|
|
|
add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
|
|
|
|
LOCKDEP_NOW_UNRELIABLE);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
static void setup_modinfo(struct module *mod, struct load_info *info)
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
{
|
2024-12-16 18:25:10 +01:00
|
|
|
const struct module_attribute *attr;
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; (attr = modinfo_attrs[i]); i++) {
|
|
|
|
if (attr->setup)
|
2010-08-05 12:59:10 -06:00
|
|
|
attr->setup(mod, get_modinfo(info, attr->attr.name));
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-09-25 00:32:58 -06:00
|
|
|
static void free_modinfo(struct module *mod)
|
|
|
|
{
|
2024-12-16 18:25:10 +01:00
|
|
|
const struct module_attribute *attr;
|
2009-09-25 00:32:58 -06:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; (attr = modinfo_attrs[i]); i++) {
|
|
|
|
if (attr->free)
|
|
|
|
attr->free(mod);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-14 11:36:41 +01:00
|
|
|
bool __weak module_init_section(const char *name)
|
|
|
|
{
|
|
|
|
return strstarts(name, ".init");
|
|
|
|
}
|
|
|
|
|
2019-06-07 12:49:11 +02:00
|
|
|
bool __weak module_exit_section(const char *name)
|
|
|
|
{
|
|
|
|
return strstarts(name, ".exit");
|
|
|
|
}
|
|
|
|
|
2024-10-15 23:16:35 +00:00
|
|
|
static int validate_section_offset(const struct load_info *info, Elf_Shdr *shdr)
|
2010-08-05 12:59:03 -06:00
|
|
|
{
|
2021-10-15 14:57:41 -06:00
|
|
|
#if defined(CONFIG_64BIT)
|
|
|
|
unsigned long long secend;
|
|
|
|
#else
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
unsigned long secend;
|
2021-10-15 14:57:41 -06:00
|
|
|
#endif
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for both overflow and offset/size being
|
|
|
|
* too large.
|
|
|
|
*/
|
|
|
|
secend = shdr->sh_offset + shdr->sh_size;
|
|
|
|
if (secend < shdr->sh_offset || secend > info->len)
|
|
|
|
return -ENOEXEC;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-10-15 23:16:36 +00:00
|
|
|
/**
|
|
|
|
* elf_validity_ehdr() - Checks an ELF header for module validity
|
|
|
|
* @info: Load info containing the ELF header to check
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
*
|
2024-10-15 23:16:36 +00:00
|
|
|
* Checks whether an ELF header could belong to a valid module. Checks:
|
2023-03-19 14:35:41 -07:00
|
|
|
*
|
2024-10-15 23:16:36 +00:00
|
|
|
* * ELF header is within the data the user provided
|
|
|
|
* * ELF magic is present
|
|
|
|
* * It is relocatable (not final linked, not core file, etc.)
|
|
|
|
* * The header's machine type matches what the architecture expects.
|
|
|
|
* * Optional arch-specific hook for other properties
|
|
|
|
* - module_elf_check_arch() is currently only used by PPC to check
|
|
|
|
* ELF ABI version, but may be used by others in the future.
|
2023-03-19 14:35:41 -07:00
|
|
|
*
|
2024-10-15 23:16:36 +00:00
|
|
|
* Return: %0 if valid, %-ENOEXEC on failure.
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
*/
|
2024-10-15 23:16:36 +00:00
|
|
|
static int elf_validity_ehdr(const struct load_info *info)
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
{
|
2021-10-15 14:57:40 -06:00
|
|
|
if (info->len < sizeof(*(info->hdr))) {
|
|
|
|
pr_err("Invalid ELF header len %lu\n", info->len);
|
2024-10-15 23:16:36 +00:00
|
|
|
return -ENOEXEC;
|
2021-10-15 14:57:40 -06:00
|
|
|
}
|
|
|
|
if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0) {
|
|
|
|
pr_err("Invalid ELF header magic: != %s\n", ELFMAG);
|
2024-10-15 23:16:36 +00:00
|
|
|
return -ENOEXEC;
|
2021-10-15 14:57:40 -06:00
|
|
|
}
|
|
|
|
if (info->hdr->e_type != ET_REL) {
|
|
|
|
pr_err("Invalid ELF header type: %u != %u\n",
|
|
|
|
info->hdr->e_type, ET_REL);
|
2024-10-15 23:16:36 +00:00
|
|
|
return -ENOEXEC;
|
2021-10-15 14:57:40 -06:00
|
|
|
}
|
|
|
|
if (!elf_check_arch(info->hdr)) {
|
|
|
|
pr_err("Invalid architecture in ELF header: %u\n",
|
|
|
|
info->hdr->e_machine);
|
2024-10-15 23:16:36 +00:00
|
|
|
return -ENOEXEC;
|
2021-10-15 14:57:40 -06:00
|
|
|
}
|
2022-11-28 14:15:36 +10:00
|
|
|
if (!module_elf_check_arch(info->hdr)) {
|
|
|
|
pr_err("Invalid module architecture in ELF header: %u\n",
|
|
|
|
info->hdr->e_machine);
|
2024-10-15 23:16:36 +00:00
|
|
|
return -ENOEXEC;
|
2022-11-28 14:15:36 +10:00
|
|
|
}
|
2024-10-15 23:16:36 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-10-15 23:16:37 +00:00
|
|
|
/**
|
|
|
|
* elf_validity_cache_sechdrs() - Cache section headers if valid
|
|
|
|
* @info: Load info to compute section headers from
|
|
|
|
*
|
|
|
|
* Checks:
|
|
|
|
*
|
|
|
|
* * ELF header is valid (see elf_validity_ehdr())
|
|
|
|
* * Section headers are the size we expect
|
|
|
|
* * Section array fits in the user provided data
|
|
|
|
* * Section index 0 is NULL
|
|
|
|
* * Section contents are inbounds
|
|
|
|
*
|
|
|
|
* Then updates @info with a &load_info->sechdrs pointer if valid.
|
|
|
|
*
|
|
|
|
* Return: %0 if valid, negative error code if validation failed.
|
|
|
|
*/
|
|
|
|
static int elf_validity_cache_sechdrs(struct load_info *info)
|
|
|
|
{
|
|
|
|
Elf_Shdr *sechdrs;
|
|
|
|
Elf_Shdr *shdr;
|
|
|
|
int i;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = elf_validity_ehdr(info);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
2021-10-15 14:57:40 -06:00
|
|
|
if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) {
|
|
|
|
pr_err("Invalid ELF section header size\n");
|
2024-10-15 23:16:37 +00:00
|
|
|
return -ENOEXEC;
|
2021-10-15 14:57:40 -06:00
|
|
|
}
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
/*
|
|
|
|
* e_shnum is 16 bits, and sizeof(Elf_Shdr) is
|
|
|
|
* known and small. So e_shnum * sizeof(Elf_Shdr)
|
|
|
|
* will not overflow unsigned long on any platform.
|
|
|
|
*/
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
if (info->hdr->e_shoff >= info->len
|
|
|
|
|| (info->hdr->e_shnum * sizeof(Elf_Shdr) >
|
2021-10-15 14:57:40 -06:00
|
|
|
info->len - info->hdr->e_shoff)) {
|
|
|
|
pr_err("Invalid ELF section header overflow\n");
|
2024-10-15 23:16:37 +00:00
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
sechdrs = (void *)info->hdr + info->hdr->e_shoff;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The code assumes that section 0 has a length of zero and
|
|
|
|
* an addr of zero, so check for it.
|
|
|
|
*/
|
|
|
|
if (sechdrs[0].sh_type != SHT_NULL
|
|
|
|
|| sechdrs[0].sh_size != 0
|
|
|
|
|| sechdrs[0].sh_addr != 0) {
|
|
|
|
pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n",
|
|
|
|
sechdrs[0].sh_type);
|
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Validate contents are inbounds */
|
|
|
|
for (i = 1; i < info->hdr->e_shnum; i++) {
|
|
|
|
shdr = &sechdrs[i];
|
|
|
|
switch (shdr->sh_type) {
|
|
|
|
case SHT_NULL:
|
|
|
|
case SHT_NOBITS:
|
|
|
|
/* No contents, offset/size don't mean anything */
|
|
|
|
continue;
|
|
|
|
default:
|
|
|
|
err = validate_section_offset(info, shdr);
|
|
|
|
if (err < 0) {
|
|
|
|
pr_err("Invalid ELF section in module (section %u type %u)\n",
|
|
|
|
i, shdr->sh_type);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
2021-10-15 14:57:40 -06:00
|
|
|
}
|
2010-08-05 12:59:03 -06:00
|
|
|
|
2024-10-15 23:16:37 +00:00
|
|
|
info->sechdrs = sechdrs;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-10-15 23:16:38 +00:00
|
|
|
/**
|
|
|
|
* elf_validity_cache_secstrings() - Caches section names if valid
|
|
|
|
* @info: Load info to cache section names from. Must have valid sechdrs.
|
|
|
|
*
|
|
|
|
* Specifically checks:
|
|
|
|
*
|
|
|
|
* * Section name table index is inbounds of section headers
|
|
|
|
* * Section name table is not empty
|
|
|
|
* * Section name table is NUL terminated
|
|
|
|
* * All section name offsets are inbounds of the section
|
|
|
|
*
|
|
|
|
* Then updates @info with a &load_info->secstrings pointer if valid.
|
|
|
|
*
|
|
|
|
* Return: %0 if valid, negative error code if validation failed.
|
|
|
|
*/
|
|
|
|
static int elf_validity_cache_secstrings(struct load_info *info)
|
|
|
|
{
|
|
|
|
Elf_Shdr *strhdr, *shdr;
|
|
|
|
char *secstrings;
|
|
|
|
int i;
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify if the section name table index is valid.
|
|
|
|
*/
|
|
|
|
if (info->hdr->e_shstrndx == SHN_UNDEF
|
2021-10-15 14:57:40 -06:00
|
|
|
|| info->hdr->e_shstrndx >= info->hdr->e_shnum) {
|
|
|
|
pr_err("Invalid ELF section name index: %d || e_shstrndx (%d) >= e_shnum (%d)\n",
|
|
|
|
info->hdr->e_shstrndx, info->hdr->e_shstrndx,
|
|
|
|
info->hdr->e_shnum);
|
2024-10-15 23:16:38 +00:00
|
|
|
return -ENOEXEC;
|
2021-10-15 14:57:40 -06:00
|
|
|
}
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
|
|
|
|
strhdr = &info->sechdrs[info->hdr->e_shstrndx];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The section name table must be NUL-terminated, as required
|
|
|
|
* by the spec. This makes strcmp and pr_* calls that access
|
|
|
|
* strings in the section safe.
|
|
|
|
*/
|
2024-10-15 23:16:38 +00:00
|
|
|
secstrings = (void *)info->hdr + strhdr->sh_offset;
|
2022-05-04 12:54:20 +03:00
|
|
|
if (strhdr->sh_size == 0) {
|
|
|
|
pr_err("empty section name table\n");
|
2024-10-15 23:16:38 +00:00
|
|
|
return -ENOEXEC;
|
2022-05-04 12:54:20 +03:00
|
|
|
}
|
2024-10-15 23:16:38 +00:00
|
|
|
if (secstrings[strhdr->sh_size - 1] != '\0') {
|
2021-10-15 14:57:40 -06:00
|
|
|
pr_err("ELF Spec violation: section name table isn't null terminated\n");
|
2024-10-15 23:16:38 +00:00
|
|
|
return -ENOEXEC;
|
2021-10-15 14:57:40 -06:00
|
|
|
}
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
|
2024-10-15 23:16:38 +00:00
|
|
|
for (i = 0; i < info->hdr->e_shnum; i++) {
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
shdr = &info->sechdrs[i];
|
2024-10-15 23:16:38 +00:00
|
|
|
/* SHT_NULL means sh_name has an undefined value */
|
|
|
|
if (shdr->sh_type == SHT_NULL)
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
continue;
|
2024-10-15 23:16:38 +00:00
|
|
|
if (shdr->sh_name >= strhdr->sh_size) {
|
|
|
|
pr_err("Invalid ELF section name in module (section %u type %u)\n",
|
|
|
|
i, shdr->sh_type);
|
|
|
|
return -ENOEXEC;
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-15 23:16:38 +00:00
|
|
|
info->secstrings = secstrings;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-10-15 23:16:39 +00:00
|
|
|
/**
|
|
|
|
* elf_validity_cache_index_info() - Validate and cache modinfo section
|
|
|
|
* @info: Load info to populate the modinfo index on.
|
|
|
|
* Must have &load_info->sechdrs and &load_info->secstrings populated
|
|
|
|
*
|
|
|
|
* Checks that if there is a .modinfo section, it is unique.
|
|
|
|
* Then, it caches its index in &load_info->index.info.
|
|
|
|
* Finally, it tries to populate the name to improve error messages.
|
|
|
|
*
|
|
|
|
* Return: %0 if valid, %-ENOEXEC if multiple modinfo sections were found.
|
|
|
|
*/
|
|
|
|
static int elf_validity_cache_index_info(struct load_info *info)
|
|
|
|
{
|
|
|
|
int info_idx;
|
|
|
|
|
|
|
|
info_idx = find_any_unique_sec(info, ".modinfo");
|
|
|
|
|
|
|
|
if (info_idx == 0)
|
|
|
|
/* Early return, no .modinfo */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (info_idx < 0) {
|
2023-03-19 14:35:40 -07:00
|
|
|
pr_err("Only one .modinfo section must exist.\n");
|
2024-10-15 23:16:39 +00:00
|
|
|
return -ENOEXEC;
|
2023-03-19 14:35:40 -07:00
|
|
|
}
|
|
|
|
|
2024-10-15 23:16:39 +00:00
|
|
|
info->index.info = info_idx;
|
|
|
|
/* Try to find a name early so we can log errors with a module name */
|
|
|
|
info->name = get_modinfo(info, "name");
|
2023-03-19 14:35:40 -07:00
|
|
|
|
2024-10-15 23:16:39 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2023-03-19 14:35:40 -07:00
|
|
|
|
2024-10-15 23:16:40 +00:00
|
|
|
/**
|
|
|
|
* elf_validity_cache_index_mod() - Validates and caches this_module section
|
|
|
|
* @info: Load info to cache this_module on.
|
|
|
|
* Must have &load_info->sechdrs and &load_info->secstrings populated
|
|
|
|
*
|
|
|
|
* The ".gnu.linkonce.this_module" ELF section is special. It is what modpost
|
|
|
|
* uses to refer to __this_module and let's use rely on THIS_MODULE to point
|
|
|
|
* to &__this_module properly. The kernel's modpost declares it on each
|
|
|
|
* modules's *.mod.c file. If the struct module of the kernel changes a full
|
|
|
|
* kernel rebuild is required.
|
|
|
|
*
|
|
|
|
* We have a few expectations for this special section, this function
|
|
|
|
* validates all this for us:
|
|
|
|
*
|
|
|
|
* * The section has contents
|
|
|
|
* * The section is unique
|
|
|
|
* * We expect the kernel to always have to allocate it: SHF_ALLOC
|
|
|
|
* * The section size must match the kernel's run time's struct module
|
|
|
|
* size
|
|
|
|
*
|
|
|
|
* If all checks pass, the index will be cached in &load_info->index.mod
|
|
|
|
*
|
|
|
|
* Return: %0 on validation success, %-ENOEXEC on failure
|
|
|
|
*/
|
|
|
|
static int elf_validity_cache_index_mod(struct load_info *info)
|
|
|
|
{
|
|
|
|
Elf_Shdr *shdr;
|
|
|
|
int mod_idx;
|
|
|
|
|
|
|
|
mod_idx = find_any_unique_sec(info, ".gnu.linkonce.this_module");
|
|
|
|
if (mod_idx <= 0) {
|
|
|
|
pr_err("module %s: Exactly one .gnu.linkonce.this_module section must exist.\n",
|
2023-03-19 14:35:40 -07:00
|
|
|
info->name ?: "(missing .modinfo section or name field)");
|
2024-10-15 23:16:40 +00:00
|
|
|
return -ENOEXEC;
|
2023-03-19 14:35:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
shdr = &info->sechdrs[mod_idx];
|
|
|
|
|
|
|
|
if (shdr->sh_type == SHT_NOBITS) {
|
2023-03-19 14:35:40 -07:00
|
|
|
pr_err("module %s: .gnu.linkonce.this_module section must have a size set\n",
|
|
|
|
info->name ?: "(missing .modinfo section or name field)");
|
2024-10-15 23:16:40 +00:00
|
|
|
return -ENOEXEC;
|
2023-03-19 14:35:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!(shdr->sh_flags & SHF_ALLOC)) {
|
2023-03-19 14:35:40 -07:00
|
|
|
pr_err("module %s: .gnu.linkonce.this_module must occupy memory during process execution\n",
|
|
|
|
info->name ?: "(missing .modinfo section or name field)");
|
2024-10-15 23:16:40 +00:00
|
|
|
return -ENOEXEC;
|
2023-03-19 14:35:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
if (shdr->sh_size != sizeof(struct module)) {
|
2023-03-19 14:35:40 -07:00
|
|
|
pr_err("module %s: .gnu.linkonce.this_module section size must match the kernel's built struct module size at run time\n",
|
|
|
|
info->name ?: "(missing .modinfo section or name field)");
|
2024-10-15 23:16:40 +00:00
|
|
|
return -ENOEXEC;
|
2023-03-19 14:35:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
info->index.mod = mod_idx;
|
|
|
|
|
2024-10-15 23:16:40 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2023-03-19 14:35:38 -07:00
|
|
|
|
2024-10-15 23:16:41 +00:00
|
|
|
/**
|
|
|
|
* elf_validity_cache_index_sym() - Validate and cache symtab index
|
|
|
|
* @info: Load info to cache symtab index in.
|
|
|
|
* Must have &load_info->sechdrs and &load_info->secstrings populated.
|
|
|
|
*
|
|
|
|
* Checks that there is exactly one symbol table, then caches its index in
|
|
|
|
* &load_info->index.sym.
|
|
|
|
*
|
|
|
|
* Return: %0 if valid, %-ENOEXEC on failure.
|
|
|
|
*/
|
|
|
|
static int elf_validity_cache_index_sym(struct load_info *info)
|
|
|
|
{
|
|
|
|
unsigned int sym_idx;
|
|
|
|
unsigned int num_sym_secs = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 1; i < info->hdr->e_shnum; i++) {
|
|
|
|
if (info->sechdrs[i].sh_type == SHT_SYMTAB) {
|
|
|
|
num_sym_secs++;
|
|
|
|
sym_idx = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (num_sym_secs != 1) {
|
|
|
|
pr_warn("%s: module has no symbols (stripped?)\n",
|
|
|
|
info->name ?: "(missing .modinfo section or name field)");
|
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
info->index.sym = sym_idx;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2024-10-15 23:16:40 +00:00
|
|
|
|
2024-10-15 23:16:42 +00:00
|
|
|
/**
|
|
|
|
* elf_validity_cache_index_str() - Validate and cache strtab index
|
|
|
|
* @info: Load info to cache strtab index in.
|
|
|
|
* Must have &load_info->sechdrs and &load_info->secstrings populated.
|
|
|
|
* Must have &load_info->index.sym populated.
|
|
|
|
*
|
|
|
|
* Looks at the symbol table's associated string table, makes sure it is
|
|
|
|
* in-bounds, and caches it.
|
|
|
|
*
|
|
|
|
* Return: %0 if valid, %-ENOEXEC on failure.
|
|
|
|
*/
|
|
|
|
static int elf_validity_cache_index_str(struct load_info *info)
|
|
|
|
{
|
|
|
|
unsigned int str_idx = info->sechdrs[info->index.sym].sh_link;
|
|
|
|
|
|
|
|
if (str_idx == SHN_UNDEF || str_idx >= info->hdr->e_shnum) {
|
|
|
|
pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n",
|
|
|
|
str_idx, str_idx, info->hdr->e_shnum);
|
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
info->index.str = str_idx;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2025-01-03 17:37:01 +00:00
|
|
|
/**
|
|
|
|
* elf_validity_cache_index_versions() - Validate and cache version indices
|
|
|
|
* @info: Load info to cache version indices in.
|
|
|
|
* Must have &load_info->sechdrs and &load_info->secstrings populated.
|
|
|
|
* @flags: Load flags, relevant to suppress version loading, see
|
|
|
|
* uapi/linux/module.h
|
|
|
|
*
|
|
|
|
* If we're ignoring modversions based on @flags, zero all version indices
|
|
|
|
* and return validity. Othewrise check:
|
|
|
|
*
|
|
|
|
* * If "__version_ext_crcs" is present, "__version_ext_names" is present
|
|
|
|
* * There is a name present for every crc
|
|
|
|
*
|
|
|
|
* Then populate:
|
|
|
|
*
|
|
|
|
* * &load_info->index.vers
|
|
|
|
* * &load_info->index.vers_ext_crc
|
|
|
|
* * &load_info->index.vers_ext_names
|
|
|
|
*
|
|
|
|
* if present.
|
|
|
|
*
|
|
|
|
* Return: %0 if valid, %-ENOEXEC on failure.
|
|
|
|
*/
|
|
|
|
static int elf_validity_cache_index_versions(struct load_info *info, int flags)
|
|
|
|
{
|
|
|
|
unsigned int vers_ext_crc;
|
|
|
|
unsigned int vers_ext_name;
|
|
|
|
size_t crc_count;
|
|
|
|
size_t remaining_len;
|
|
|
|
size_t name_size;
|
|
|
|
char *name;
|
|
|
|
|
|
|
|
/* If modversions were suppressed, pretend we didn't find any */
|
|
|
|
if (flags & MODULE_INIT_IGNORE_MODVERSIONS) {
|
|
|
|
info->index.vers = 0;
|
|
|
|
info->index.vers_ext_crc = 0;
|
|
|
|
info->index.vers_ext_name = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
vers_ext_crc = find_sec(info, "__version_ext_crcs");
|
|
|
|
vers_ext_name = find_sec(info, "__version_ext_names");
|
|
|
|
|
|
|
|
/* If we have one field, we must have the other */
|
|
|
|
if (!!vers_ext_crc != !!vers_ext_name) {
|
|
|
|
pr_err("extended version crc+name presence does not match");
|
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have extended version information, we should have the same
|
|
|
|
* number of entries in every section.
|
|
|
|
*/
|
|
|
|
if (vers_ext_crc) {
|
|
|
|
crc_count = info->sechdrs[vers_ext_crc].sh_size / sizeof(u32);
|
|
|
|
name = (void *)info->hdr +
|
|
|
|
info->sechdrs[vers_ext_name].sh_offset;
|
|
|
|
remaining_len = info->sechdrs[vers_ext_name].sh_size;
|
|
|
|
|
|
|
|
while (crc_count--) {
|
|
|
|
name_size = strnlen(name, remaining_len) + 1;
|
|
|
|
if (name_size > remaining_len) {
|
|
|
|
pr_err("more extended version crcs than names");
|
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
remaining_len -= name_size;
|
|
|
|
name += name_size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
info->index.vers = find_sec(info, "__versions");
|
|
|
|
info->index.vers_ext_crc = vers_ext_crc;
|
|
|
|
info->index.vers_ext_name = vers_ext_name;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-10-15 23:16:43 +00:00
|
|
|
/**
|
|
|
|
* elf_validity_cache_index() - Resolve, validate, cache section indices
|
|
|
|
* @info: Load info to read from and update.
|
|
|
|
* &load_info->sechdrs and &load_info->secstrings must be populated.
|
|
|
|
* @flags: Load flags, relevant to suppress version loading, see
|
|
|
|
* uapi/linux/module.h
|
|
|
|
*
|
|
|
|
* Populates &load_info->index, validating as it goes.
|
|
|
|
* See child functions for per-field validation:
|
|
|
|
*
|
|
|
|
* * elf_validity_cache_index_info()
|
|
|
|
* * elf_validity_cache_index_mod()
|
|
|
|
* * elf_validity_cache_index_sym()
|
|
|
|
* * elf_validity_cache_index_str()
|
2025-01-03 17:37:01 +00:00
|
|
|
* * elf_validity_cache_index_versions()
|
2024-10-15 23:16:43 +00:00
|
|
|
*
|
|
|
|
* If CONFIG_SMP is enabled, load the percpu section by name with no
|
|
|
|
* validation.
|
|
|
|
*
|
|
|
|
* Return: 0 on success, negative error code if an index failed validation.
|
|
|
|
*/
|
|
|
|
static int elf_validity_cache_index(struct load_info *info, int flags)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = elf_validity_cache_index_info(info);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
err = elf_validity_cache_index_mod(info);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
err = elf_validity_cache_index_sym(info);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
err = elf_validity_cache_index_str(info);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
2025-01-03 17:37:01 +00:00
|
|
|
err = elf_validity_cache_index_versions(info, flags);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
2023-03-19 14:35:41 -07:00
|
|
|
|
|
|
|
info->index.pcpu = find_pcpusec(info);
|
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
return 0;
|
2024-10-15 23:16:43 +00:00
|
|
|
}
|
2021-10-15 14:57:40 -06:00
|
|
|
|
2024-10-15 23:16:44 +00:00
|
|
|
/**
|
2024-10-15 23:16:45 +00:00
|
|
|
* elf_validity_cache_strtab() - Validate and cache symbol string table
|
2024-10-15 23:16:44 +00:00
|
|
|
* @info: Load info to read from and update.
|
|
|
|
* Must have &load_info->sechdrs and &load_info->secstrings populated.
|
|
|
|
* Must have &load_info->index populated.
|
|
|
|
*
|
2024-10-15 23:16:45 +00:00
|
|
|
* Checks:
|
|
|
|
*
|
|
|
|
* * The string table is not empty.
|
|
|
|
* * The string table starts and ends with NUL (required by ELF spec).
|
|
|
|
* * Every &Elf_Sym->st_name offset in the symbol table is inbounds of the
|
|
|
|
* string table.
|
|
|
|
*
|
|
|
|
* And caches the pointer as &load_info->strtab in @info.
|
|
|
|
*
|
2024-10-15 23:16:44 +00:00
|
|
|
* Return: 0 on success, negative error code if a check failed.
|
|
|
|
*/
|
|
|
|
static int elf_validity_cache_strtab(struct load_info *info)
|
|
|
|
{
|
|
|
|
Elf_Shdr *str_shdr = &info->sechdrs[info->index.str];
|
2024-10-15 23:16:45 +00:00
|
|
|
Elf_Shdr *sym_shdr = &info->sechdrs[info->index.sym];
|
2024-10-15 23:16:44 +00:00
|
|
|
char *strtab = (char *)info->hdr + str_shdr->sh_offset;
|
2024-10-15 23:16:45 +00:00
|
|
|
Elf_Sym *syms = (void *)info->hdr + sym_shdr->sh_offset;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (str_shdr->sh_size == 0) {
|
|
|
|
pr_err("empty symbol string table\n");
|
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
if (strtab[0] != '\0') {
|
|
|
|
pr_err("symbol string table missing leading NUL\n");
|
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
if (strtab[str_shdr->sh_size - 1] != '\0') {
|
|
|
|
pr_err("symbol string table isn't NUL terminated\n");
|
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now that we know strtab is correctly structured, check symbol
|
|
|
|
* starts are inbounds before they're used later.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < sym_shdr->sh_size / sizeof(*syms); i++) {
|
|
|
|
if (syms[i].st_name >= str_shdr->sh_size) {
|
|
|
|
pr_err("symbol name out of bounds in string table");
|
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
}
|
2024-10-15 23:16:44 +00:00
|
|
|
|
|
|
|
info->strtab = strtab;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
/*
|
2023-03-19 14:35:41 -07:00
|
|
|
* Check userspace passed ELF module against our expectations, and cache
|
|
|
|
* useful variables for further processing as we go.
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
*
|
2023-03-19 14:35:41 -07:00
|
|
|
* This does basic validity checks against section offsets and sizes, the
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
* section name string table, and the indices used for it (sh_name).
|
2023-03-19 14:35:41 -07:00
|
|
|
*
|
|
|
|
* As a last step, since we're already checking the ELF sections we cache
|
|
|
|
* useful variables which will be used later for our convenience:
|
|
|
|
*
|
|
|
|
* o pointers to section headers
|
|
|
|
* o cache the modinfo symbol section
|
|
|
|
* o cache the string symbol section
|
|
|
|
* o cache the module section
|
|
|
|
*
|
|
|
|
* As a last step we set info->mod to the temporary copy of the module in
|
|
|
|
* info->hdr. The final one will be allocated in move_module(). Any
|
|
|
|
* modifications we make to our copy of the module will be carried over
|
|
|
|
* to the final minted module.
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
*/
|
2023-03-19 14:35:41 -07:00
|
|
|
static int elf_validity_cache_copy(struct load_info *info, int flags)
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
2024-10-15 23:16:37 +00:00
|
|
|
err = elf_validity_cache_sechdrs(info);
|
2024-10-15 23:16:36 +00:00
|
|
|
if (err < 0)
|
|
|
|
return err;
|
2024-10-15 23:16:38 +00:00
|
|
|
err = elf_validity_cache_secstrings(info);
|
2024-10-15 23:16:39 +00:00
|
|
|
if (err < 0)
|
|
|
|
return err;
|
2024-10-15 23:16:43 +00:00
|
|
|
err = elf_validity_cache_index(info, flags);
|
2024-10-15 23:16:42 +00:00
|
|
|
if (err < 0)
|
|
|
|
return err;
|
2024-10-15 23:16:44 +00:00
|
|
|
err = elf_validity_cache_strtab(info);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
2023-03-19 14:35:40 -07:00
|
|
|
|
2023-03-19 14:35:38 -07:00
|
|
|
/* This is temporary: point mod into copy of data. */
|
2024-10-15 23:16:40 +00:00
|
|
|
info->mod = (void *)info->hdr + info->sechdrs[info->index.mod].sh_offset;
|
2023-03-19 14:35:38 -07:00
|
|
|
|
2023-03-19 14:35:40 -07:00
|
|
|
/*
|
|
|
|
* If we didn't load the .modinfo 'name' field earlier, fall back to
|
|
|
|
* on-disk struct mod 'name' field.
|
|
|
|
*/
|
|
|
|
if (!info->name)
|
|
|
|
info->name = info->mod->name;
|
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-04-07 10:33:49 -07:00
|
|
|
#define COPY_CHUNK_SIZE (16*PAGE_SIZE)
|
|
|
|
|
|
|
|
static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned long len)
|
|
|
|
{
|
|
|
|
do {
|
|
|
|
unsigned long n = min(len, COPY_CHUNK_SIZE);
|
|
|
|
|
|
|
|
if (copy_from_user(dst, usrc, n) != 0)
|
|
|
|
return -EFAULT;
|
|
|
|
cond_resched();
|
|
|
|
dst += n;
|
|
|
|
usrc += n;
|
|
|
|
len -= n;
|
|
|
|
} while (len);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-08-25 10:04:45 -05:00
|
|
|
static int check_modinfo_livepatch(struct module *mod, struct load_info *info)
|
2016-03-22 20:03:16 -04:00
|
|
|
{
|
2022-03-22 14:03:34 +00:00
|
|
|
if (!get_modinfo(info, "livepatch"))
|
|
|
|
/* Nothing more to do */
|
|
|
|
return 0;
|
|
|
|
|
2023-03-19 14:27:41 -07:00
|
|
|
if (set_livepatch_module(mod))
|
2022-03-22 14:03:34 +00:00
|
|
|
return 0;
|
2016-03-22 20:03:16 -04:00
|
|
|
|
2022-03-22 14:03:34 +00:00
|
|
|
pr_err("%s: module is marked as livepatch module, but livepatch support is disabled",
|
|
|
|
mod->name);
|
|
|
|
return -ENOEXEC;
|
2016-03-22 20:03:16 -04:00
|
|
|
}
|
|
|
|
|
2018-01-25 15:50:28 -08:00
|
|
|
static void check_modinfo_retpoline(struct module *mod, struct load_info *info)
|
|
|
|
{
|
|
|
|
if (retpoline_module_ok(get_modinfo(info, "retpoline")))
|
|
|
|
return;
|
|
|
|
|
|
|
|
pr_warn("%s: loading module not compiled with retpoline compiler.\n",
|
|
|
|
mod->name);
|
|
|
|
}
|
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
/* Sets info->hdr and info->len. */
|
|
|
|
static int copy_module_from_user(const void __user *umod, unsigned long len,
|
|
|
|
struct load_info *info)
|
2010-08-05 12:59:03 -06:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
info->len = len;
|
|
|
|
if (info->len < sizeof(*(info->hdr)))
|
2010-08-05 12:59:03 -06:00
|
|
|
return -ENOEXEC;
|
|
|
|
|
2020-10-02 10:38:22 -07:00
|
|
|
err = security_kernel_load_data(LOADING_MODULE, true);
|
2012-10-16 07:32:07 +10:30
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2010-08-05 12:59:03 -06:00
|
|
|
/* Suck in entire file: we'll want most of it. */
|
2020-06-01 21:51:40 -07:00
|
|
|
info->hdr = __vmalloc(info->len, GFP_KERNEL | __GFP_NOWARN);
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
if (!info->hdr)
|
2010-08-05 12:59:03 -06:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-04-07 10:33:49 -07:00
|
|
|
if (copy_chunked_from_user(info->hdr, umod, info->len) != 0) {
|
2020-10-02 10:38:22 -07:00
|
|
|
err = -EFAULT;
|
|
|
|
goto out;
|
2010-08-05 12:59:03 -06:00
|
|
|
}
|
|
|
|
|
2020-10-02 10:38:22 -07:00
|
|
|
err = security_kernel_post_load_data((char *)info->hdr, info->len,
|
|
|
|
LOADING_MODULE, "init_module");
|
|
|
|
out:
|
|
|
|
if (err)
|
|
|
|
vfree(info->hdr);
|
|
|
|
|
|
|
|
return err;
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
}
|
|
|
|
|
2022-01-05 13:55:12 -08:00
|
|
|
static void free_copy(struct load_info *info, int flags)
|
2010-08-05 12:59:08 -06:00
|
|
|
{
|
2022-01-05 13:55:12 -08:00
|
|
|
if (flags & MODULE_INIT_COMPRESSED_FILE)
|
|
|
|
module_decompress_cleanup(info);
|
|
|
|
else
|
|
|
|
vfree(info->hdr);
|
2010-08-05 12:59:08 -06:00
|
|
|
}
|
|
|
|
|
2012-10-22 18:09:41 +10:30
|
|
|
static int rewrite_section_headers(struct load_info *info, int flags)
|
2010-08-05 12:59:06 -06:00
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
/* This should always be true, but let's be sure. */
|
|
|
|
info->sechdrs[0].sh_addr = 0;
|
|
|
|
|
|
|
|
for (i = 1; i < info->hdr->e_shnum; i++) {
|
|
|
|
Elf_Shdr *shdr = &info->sechdrs[i];
|
|
|
|
|
2020-11-07 23:20:52 +03:00
|
|
|
/*
|
|
|
|
* Mark all sections sh_addr with their address in the
|
|
|
|
* temporary image.
|
|
|
|
*/
|
2010-08-05 12:59:06 -06:00
|
|
|
shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;
|
|
|
|
|
|
|
|
}
|
2010-08-05 12:59:07 -06:00
|
|
|
|
|
|
|
/* Track but don't keep modinfo and version sections. */
|
2017-04-21 15:35:27 -07:00
|
|
|
info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
|
2025-01-03 17:37:01 +00:00
|
|
|
info->sechdrs[info->index.vers_ext_crc].sh_flags &=
|
|
|
|
~(unsigned long)SHF_ALLOC;
|
|
|
|
info->sechdrs[info->index.vers_ext_name].sh_flags &=
|
|
|
|
~(unsigned long)SHF_ALLOC;
|
2010-08-05 12:59:07 -06:00
|
|
|
info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
|
2017-04-21 15:35:27 -07:00
|
|
|
|
2010-08-05 12:59:06 -06:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-11-15 19:50:30 +01:00
|
|
|
static const char *const module_license_offenders[] = {
|
|
|
|
/* driverloader was caught wrongly pretending to be under GPL */
|
|
|
|
"driverloader",
|
|
|
|
|
|
|
|
/* lve claims to be GPL but upstream won't provide source */
|
|
|
|
"lve",
|
|
|
|
};
|
|
|
|
|
2023-03-19 14:27:42 -07:00
|
|
|
/*
|
|
|
|
* These calls taint the kernel depending certain module circumstances */
|
|
|
|
static void module_augment_kernel_taints(struct module *mod, struct load_info *info)
|
2010-08-05 12:59:03 -06:00
|
|
|
{
|
2023-03-19 14:27:45 -07:00
|
|
|
int prev_taint = test_taint(TAINT_PROPRIETARY_MODULE);
|
2024-11-15 19:50:30 +01:00
|
|
|
size_t i;
|
2023-03-19 14:27:45 -07:00
|
|
|
|
2016-04-13 11:06:12 +09:30
|
|
|
if (!get_modinfo(info, "intree")) {
|
|
|
|
if (!test_taint(TAINT_OOT_MODULE))
|
|
|
|
pr_warn("%s: loading out-of-tree module taints kernel.\n",
|
|
|
|
mod->name);
|
2013-01-21 17:17:39 +10:30
|
|
|
add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
|
2016-04-13 11:06:12 +09:30
|
|
|
}
|
2011-10-24 15:12:28 +02:00
|
|
|
|
2018-01-25 15:50:28 -08:00
|
|
|
check_modinfo_retpoline(mod, info);
|
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
if (get_modinfo(info, "staging")) {
|
2013-01-21 17:17:39 +10:30
|
|
|
add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
|
2013-11-12 15:11:28 -08:00
|
|
|
pr_warn("%s: module is from the staging directory, the quality "
|
|
|
|
"is unknown, you have been warned.\n", mod->name);
|
2010-08-05 12:59:03 -06:00
|
|
|
}
|
2010-08-05 12:59:05 -06:00
|
|
|
|
2023-03-19 14:27:41 -07:00
|
|
|
if (is_livepatch_module(mod)) {
|
|
|
|
add_taint_module(mod, TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
|
|
|
|
pr_notice_once("%s: tainting kernel with TAINT_LIVEPATCH\n",
|
|
|
|
mod->name);
|
|
|
|
}
|
2023-03-19 14:27:42 -07:00
|
|
|
|
2023-03-19 14:27:40 -07:00
|
|
|
module_license_taint_check(mod, get_modinfo(info, "license"));
|
2010-08-05 12:59:05 -06:00
|
|
|
|
2022-07-08 12:48:45 +08:00
|
|
|
if (get_modinfo(info, "test")) {
|
|
|
|
if (!test_taint(TAINT_TEST))
|
|
|
|
pr_warn("%s: loading test module taints kernel.\n",
|
|
|
|
mod->name);
|
|
|
|
add_taint_module(mod, TAINT_TEST, LOCKDEP_STILL_OK);
|
|
|
|
}
|
2023-03-19 14:27:44 -07:00
|
|
|
#ifdef CONFIG_MODULE_SIG
|
|
|
|
mod->sig_ok = info->sig_ok;
|
|
|
|
if (!mod->sig_ok) {
|
|
|
|
pr_notice_once("%s: module verification failed: signature "
|
|
|
|
"and/or required key missing - tainting "
|
|
|
|
"kernel\n", mod->name);
|
|
|
|
add_taint_module(mod, TAINT_UNSIGNED_MODULE, LOCKDEP_STILL_OK);
|
|
|
|
}
|
|
|
|
#endif
|
2023-03-19 14:27:45 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* ndiswrapper is under GPL by itself, but loads proprietary modules.
|
|
|
|
* Don't use add_taint_module(), as it would prevent ndiswrapper from
|
|
|
|
* using GPL-only symbols it needs.
|
|
|
|
*/
|
|
|
|
if (strcmp(mod->name, "ndiswrapper") == 0)
|
|
|
|
add_taint(TAINT_PROPRIETARY_MODULE, LOCKDEP_NOW_UNRELIABLE);
|
|
|
|
|
2024-11-15 19:50:30 +01:00
|
|
|
for (i = 0; i < ARRAY_SIZE(module_license_offenders); ++i) {
|
|
|
|
if (strcmp(mod->name, module_license_offenders[i]) == 0)
|
|
|
|
add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
|
|
|
|
LOCKDEP_NOW_UNRELIABLE);
|
|
|
|
}
|
2023-03-19 14:27:45 -07:00
|
|
|
|
|
|
|
if (!prev_taint && test_taint(TAINT_PROPRIETARY_MODULE))
|
|
|
|
pr_warn("%s: module license taints kernel.\n", mod->name);
|
|
|
|
|
2023-03-19 14:27:42 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int check_modinfo(struct module *mod, struct load_info *info, int flags)
|
|
|
|
{
|
|
|
|
const char *modmagic = get_modinfo(info, "vermagic");
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (flags & MODULE_INIT_IGNORE_VERMAGIC)
|
|
|
|
modmagic = NULL;
|
|
|
|
|
|
|
|
/* This is allowed: modprobe --force will invalidate it. */
|
|
|
|
if (!modmagic) {
|
|
|
|
err = try_to_force_load(mod, "bad vermagic");
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
} else if (!same_magic(modmagic, vermagic, info->index.vers)) {
|
|
|
|
pr_err("%s: version magic '%s' should be '%s'\n",
|
|
|
|
info->name, modmagic, vermagic);
|
|
|
|
return -ENOEXEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = check_modinfo_livepatch(mod, info);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2010-08-05 12:59:03 -06:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-10-14 18:08:46 +10:30
|
|
|
static int find_module_sections(struct module *mod, struct load_info *info)
|
2010-08-05 12:59:02 -06:00
|
|
|
{
|
2010-08-05 12:59:10 -06:00
|
|
|
mod->kp = section_objs(info, "__param",
|
2010-08-05 12:59:02 -06:00
|
|
|
sizeof(*mod->kp), &mod->num_kp);
|
2010-08-05 12:59:10 -06:00
|
|
|
mod->syms = section_objs(info, "__ksymtab",
|
2010-08-05 12:59:02 -06:00
|
|
|
sizeof(*mod->syms), &mod->num_syms);
|
2010-08-05 12:59:10 -06:00
|
|
|
mod->crcs = section_addr(info, "__kcrctab");
|
|
|
|
mod->gpl_syms = section_objs(info, "__ksymtab_gpl",
|
2010-08-05 12:59:02 -06:00
|
|
|
sizeof(*mod->gpl_syms),
|
|
|
|
&mod->num_gpl_syms);
|
2010-08-05 12:59:10 -06:00
|
|
|
mod->gpl_crcs = section_addr(info, "__kcrctab_gpl");
|
2010-08-05 12:59:02 -06:00
|
|
|
|
|
|
|
#ifdef CONFIG_CONSTRUCTORS
|
2010-08-05 12:59:10 -06:00
|
|
|
mod->ctors = section_objs(info, ".ctors",
|
2010-08-05 12:59:02 -06:00
|
|
|
sizeof(*mod->ctors), &mod->num_ctors);
|
2013-10-14 18:08:46 +10:30
|
|
|
if (!mod->ctors)
|
|
|
|
mod->ctors = section_objs(info, ".init_array",
|
|
|
|
sizeof(*mod->ctors), &mod->num_ctors);
|
|
|
|
else if (find_sec(info, ".init_array")) {
|
|
|
|
/*
|
|
|
|
* This shouldn't happen with same compiler and binutils
|
|
|
|
* building all parts of the module.
|
|
|
|
*/
|
2014-11-10 09:31:29 +10:30
|
|
|
pr_warn("%s: has both .ctors and .init_array.\n",
|
2013-10-14 18:08:46 +10:30
|
|
|
mod->name);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2010-08-05 12:59:02 -06:00
|
|
|
#endif
|
|
|
|
|
2020-03-10 14:04:34 +01:00
|
|
|
mod->noinstr_text_start = section_objs(info, ".noinstr.text", 1,
|
|
|
|
&mod->noinstr_text_size);
|
|
|
|
|
2010-08-05 12:59:02 -06:00
|
|
|
#ifdef CONFIG_TRACEPOINTS
|
2011-01-26 17:26:22 -05:00
|
|
|
mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
|
|
|
|
sizeof(*mod->tracepoints_ptrs),
|
|
|
|
&mod->num_tracepoints);
|
2010-08-05 12:59:02 -06:00
|
|
|
#endif
|
2019-04-05 16:15:00 -07:00
|
|
|
#ifdef CONFIG_TREE_SRCU
|
|
|
|
mod->srcu_struct_ptrs = section_objs(info, "___srcu_struct_ptrs",
|
|
|
|
sizeof(*mod->srcu_struct_ptrs),
|
|
|
|
&mod->num_srcu_structs);
|
|
|
|
#endif
|
2018-12-12 16:42:37 -08:00
|
|
|
#ifdef CONFIG_BPF_EVENTS
|
|
|
|
mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map",
|
|
|
|
sizeof(*mod->bpf_raw_events),
|
|
|
|
&mod->num_bpf_raw_events);
|
|
|
|
#endif
|
2020-11-09 17:19:31 -08:00
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
mod->btf_data = any_section_objs(info, ".BTF", 1, &mod->btf_data_size);
|
2024-06-20 10:17:29 +01:00
|
|
|
mod->btf_base_data = any_section_objs(info, ".BTF.base", 1,
|
|
|
|
&mod->btf_base_data_size);
|
2020-11-09 17:19:31 -08:00
|
|
|
#endif
|
2018-12-31 00:14:15 +09:00
|
|
|
#ifdef CONFIG_JUMP_LABEL
|
2010-09-17 11:09:00 -04:00
|
|
|
mod->jump_entries = section_objs(info, "__jump_table",
|
|
|
|
sizeof(*mod->jump_entries),
|
|
|
|
&mod->num_jump_entries);
|
|
|
|
#endif
|
2010-08-05 12:59:02 -06:00
|
|
|
#ifdef CONFIG_EVENT_TRACING
|
2010-08-05 12:59:10 -06:00
|
|
|
mod->trace_events = section_objs(info, "_ftrace_events",
|
2010-08-05 12:59:02 -06:00
|
|
|
sizeof(*mod->trace_events),
|
|
|
|
&mod->num_trace_events);
|
2017-05-31 16:56:44 -05:00
|
|
|
mod->trace_evals = section_objs(info, "_ftrace_eval_map",
|
|
|
|
sizeof(*mod->trace_evals),
|
|
|
|
&mod->num_trace_evals);
|
2010-08-05 12:59:02 -06:00
|
|
|
#endif
|
tracing: Fix module use of trace_bprintk()
On use of trace_printk() there's a macro that determines if the format
is static or a variable. If it is static, it defaults to __trace_bprintk()
otherwise it uses __trace_printk().
A while ago, Lai Jiangshan added __trace_bprintk(). In that patch, we
discussed a way to allow modules to use it. The difference between
__trace_bprintk() and __trace_printk() is that for faster processing,
just the format and args are stored in the trace instead of running
it through a sprintf function. In order to do this, the format used
by the __trace_bprintk() had to be persistent.
See commit 1ba28e02a18cbdbea123836f6c98efb09cbf59ec
The problem comes with trace_bprintk() where the module is unloaded.
The pointer left in the buffer is still pointing to the format.
To solve this issue, the formats in the module were copied into kernel
core. If the same format was used, they would use the same copy (to prevent
memory leak). This all worked well until we tried to merge everything.
At the time this was written, Lai Jiangshan, Frederic Weisbecker,
Ingo Molnar and myself were all touching the same code. When this was
merged, we lost the part of it that was in module.c. This kept out the
copying of the formats and unloading the module could cause bad pointers
left in the ring buffer.
This patch adds back (with updates required for current kernel) the
module code that sets up the necessary pointers.
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2010-11-10 22:19:24 -05:00
|
|
|
#ifdef CONFIG_TRACING
|
|
|
|
mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
|
|
|
|
sizeof(*mod->trace_bprintk_fmt_start),
|
|
|
|
&mod->num_trace_bprintk_fmt);
|
|
|
|
#endif
|
2010-08-05 12:59:02 -06:00
|
|
|
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
|
|
|
|
/* sechdrs[0].sh_size is always zero */
|
module/ftrace: handle patchable-function-entry
When using patchable-function-entry, the compiler will record the
callsites into a section named "__patchable_function_entries" rather
than "__mcount_loc". Let's abstract this difference behind a new
FTRACE_CALLSITE_SECTION, so that architectures don't have to handle this
explicitly (e.g. with custom module linker scripts).
As parisc currently handles this explicitly, it is fixed up accordingly,
with its custom linker script removed. Since FTRACE_CALLSITE_SECTION is
only defined when DYNAMIC_FTRACE is selected, the parisc module loading
code is updated to only use the definition in that case. When
DYNAMIC_FTRACE is not selected, modules shouldn't have this section, so
this removes some redundant work in that case.
To make sure that this is keep up-to-date for modules and the main
kernel, a comment is added to vmlinux.lds.h, with the existing ifdeffery
simplified for legibility.
I built parisc generic-{32,64}bit_defconfig with DYNAMIC_FTRACE enabled,
and verified that the section made it into the .ko files for modules.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Helge Deller <deller@gmx.de>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Sven Schnelle <svens@stackframe.org>
Tested-by: Torsten Duwe <duwe@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: linux-parisc@vger.kernel.org
2019-10-16 18:17:11 +01:00
|
|
|
mod->ftrace_callsites = section_objs(info, FTRACE_CALLSITE_SECTION,
|
2010-08-05 12:59:02 -06:00
|
|
|
sizeof(*mod->ftrace_callsites),
|
|
|
|
&mod->num_ftrace_callsites);
|
|
|
|
#endif
|
2018-01-13 02:55:03 +09:00
|
|
|
#ifdef CONFIG_FUNCTION_ERROR_INJECTION
|
|
|
|
mod->ei_funcs = section_objs(info, "_error_injection_whitelist",
|
|
|
|
sizeof(*mod->ei_funcs),
|
|
|
|
&mod->num_ei_funcs);
|
2020-03-26 23:49:48 +09:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_KPROBES
|
|
|
|
mod->kprobes_text_start = section_objs(info, ".kprobes.text", 1,
|
|
|
|
&mod->kprobes_text_size);
|
2020-03-26 23:50:00 +09:00
|
|
|
mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist",
|
|
|
|
sizeof(unsigned long),
|
|
|
|
&mod->num_kprobe_blacklist);
|
2020-08-18 15:57:42 +02:00
|
|
|
#endif
|
printk: Userspace format indexing support
We have a number of systems industry-wide that have a subset of their
functionality that works as follows:
1. Receive a message from local kmsg, serial console, or netconsole;
2. Apply a set of rules to classify the message;
3. Do something based on this classification (like scheduling a
remediation for the machine), rinse, and repeat.
As a couple of examples of places we have this implemented just inside
Facebook, although this isn't a Facebook-specific problem, we have this
inside our netconsole processing (for alarm classification), and as part
of our machine health checking. We use these messages to determine
fairly important metrics around production health, and it's important
that we get them right.
While for some kinds of issues we have counters, tracepoints, or metrics
with a stable interface which can reliably indicate the issue, in order
to react to production issues quickly we need to work with the interface
which most kernel developers naturally use when developing: printk.
Most production issues come from unexpected phenomena, and as such
usually the code in question doesn't have easily usable tracepoints or
other counters available for the specific problem being mitigated. We
have a number of lines of monitoring defence against problems in
production (host metrics, process metrics, service metrics, etc), and
where it's not feasible to reliably monitor at another level, this kind
of pragmatic netconsole monitoring is essential.
As one would expect, monitoring using printk is rather brittle for a
number of reasons -- most notably that the message might disappear
entirely in a new version of the kernel, or that the message may change
in some way that the regex or other classification methods start to
silently fail.
One factor that makes this even harder is that, under normal operation,
many of these messages are never expected to be hit. For example, there
may be a rare hardware bug which one wants to detect if it was to ever
happen again, but its recurrence is not likely or anticipated. This
precludes using something like checking whether the printk in question
was printed somewhere fleetwide recently to determine whether the
message in question is still present or not, since we don't anticipate
that it should be printed anywhere, but still need to monitor for its
future presence in the long-term.
This class of issue has happened on a number of occasions, causing
unhealthy machines with hardware issues to remain in production for
longer than ideal. As a recent example, some monitoring around
blk_update_request fell out of date and caused semi-broken machines to
remain in production for longer than would be desirable.
Searching through the codebase to find the message is also extremely
fragile, because many of the messages are further constructed beyond
their callsite (eg. btrfs_printk and other module-specific wrappers,
each with their own functionality). Even if they aren't, guessing the
format and formulation of the underlying message based on the aesthetics
of the message emitted is not a recipe for success at scale, and our
previous issues with fleetwide machine health checking demonstrate as
much.
This provides a solution to the issue of silently changed or deleted
printks: we record pointers to all printk format strings known at
compile time into a new .printk_index section, both in vmlinux and
modules. At runtime, this can then be iterated by looking at
<debugfs>/printk/index/<module>, which emits the following format, both
readable by humans and able to be parsed by machines:
$ head -1 vmlinux; shuf -n 5 vmlinux
# <level[,flags]> filename:line function "format"
<5> block/blk-settings.c:661 disk_stack_limits "%s: Warning: Device %s is misaligned\n"
<4> kernel/trace/trace.c:8296 trace_create_file "Could not create tracefs '%s' entry\n"
<6> arch/x86/kernel/hpet.c:144 _hpet_print_config "hpet: %s(%d):\n"
<6> init/do_mounts.c:605 prepare_namespace "Waiting for root device %s...\n"
<6> drivers/acpi/osl.c:1410 acpi_no_auto_serialize_setup "ACPI: auto-serialization disabled\n"
This mitigates the majority of cases where we have a highly-specific
printk which we want to match on, as we can now enumerate and check
whether the format changed or the printk callsite disappeared entirely
in userspace. This allows us to catch changes to printks we monitor
earlier and decide what to do about it before it becomes problematic.
There is no additional runtime cost for printk callers or printk itself,
and the assembly generated is exactly the same.
Signed-off-by: Chris Down <chris@chrisdown.name>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Tested-by: Petr Mladek <pmladek@suse.com>
Reported-by: kernel test robot <lkp@intel.com>
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Acked-by: Jessica Yu <jeyu@kernel.org> # for module.{c,h}
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/e42070983637ac5e384f17fbdbe86d19c7b212a5.1623775748.git.chris@chrisdown.name
2021-06-15 17:52:53 +01:00
|
|
|
#ifdef CONFIG_PRINTK_INDEX
|
|
|
|
mod->printk_index_start = section_objs(info, ".printk_index",
|
|
|
|
sizeof(*mod->printk_index_start),
|
|
|
|
&mod->printk_index_size);
|
|
|
|
#endif
|
2020-08-18 15:57:42 +02:00
|
|
|
#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
|
|
|
|
mod->static_call_sites = section_objs(info, ".static_call_sites",
|
|
|
|
sizeof(*mod->static_call_sites),
|
|
|
|
&mod->num_static_call_sites);
|
2017-12-11 11:36:46 -05:00
|
|
|
#endif
|
2022-07-13 08:52:20 +08:00
|
|
|
#if IS_ENABLED(CONFIG_KUNIT)
|
2022-07-09 11:19:57 +08:00
|
|
|
mod->kunit_suites = section_objs(info, ".kunit_test_suites",
|
|
|
|
sizeof(*mod->kunit_suites),
|
|
|
|
&mod->num_kunit_suites);
|
2023-12-13 19:44:17 +00:00
|
|
|
mod->kunit_init_suites = section_objs(info, ".kunit_init_test_suites",
|
|
|
|
sizeof(*mod->kunit_init_suites),
|
|
|
|
&mod->num_kunit_init_suites);
|
2022-07-09 11:19:57 +08:00
|
|
|
#endif
|
|
|
|
|
2010-08-05 12:59:12 -06:00
|
|
|
mod->extable = section_objs(info, "__ex_table",
|
|
|
|
sizeof(*mod->extable), &mod->num_exentries);
|
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
if (section_addr(info, "__obsparm"))
|
2013-11-12 15:11:28 -08:00
|
|
|
pr_warn("%s: Ignoring obsolete parameters\n", mod->name);
|
2010-08-05 12:59:12 -06:00
|
|
|
|
2023-03-03 11:50:56 -05:00
|
|
|
#ifdef CONFIG_DYNAMIC_DEBUG_CORE
|
|
|
|
mod->dyndbg_info.descs = section_objs(info, "__dyndbg",
|
|
|
|
sizeof(*mod->dyndbg_info.descs),
|
|
|
|
&mod->dyndbg_info.num_descs);
|
|
|
|
mod->dyndbg_info.classes = section_objs(info, "__dyndbg_classes",
|
|
|
|
sizeof(*mod->dyndbg_info.classes),
|
|
|
|
&mod->dyndbg_info.num_classes);
|
|
|
|
#endif
|
2013-10-14 18:08:46 +10:30
|
|
|
|
|
|
|
return 0;
|
2010-08-05 12:59:02 -06:00
|
|
|
}
|
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
static int move_module(struct module *mod, struct load_info *info)
|
2010-08-05 12:59:02 -06:00
|
|
|
{
|
|
|
|
int i;
|
2023-03-19 14:35:39 -07:00
|
|
|
enum mod_mem_type t = 0;
|
|
|
|
int ret = -ENOMEM;
|
2024-10-23 10:07:56 -07:00
|
|
|
bool codetag_section_found = false;
|
2010-08-05 12:59:02 -06:00
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
for_each_mod_mem_type(type) {
|
|
|
|
if (!mod->mem[type].size) {
|
|
|
|
mod->mem[type].base = NULL;
|
2024-10-23 19:27:07 +03:00
|
|
|
mod->mem[type].rw_copy = NULL;
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
continue;
|
|
|
|
}
|
2024-05-05 19:06:17 +03:00
|
|
|
|
|
|
|
ret = module_memory_alloc(mod, type);
|
|
|
|
if (ret) {
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
t = type;
|
2024-10-23 10:07:56 -07:00
|
|
|
goto out_err;
|
2012-12-11 09:38:33 +10:30
|
|
|
}
|
2022-02-23 13:02:14 +01:00
|
|
|
}
|
|
|
|
|
2010-08-05 12:59:02 -06:00
|
|
|
/* Transfer each section which specifies SHF_ALLOC */
|
2023-03-21 19:36:20 -06:00
|
|
|
pr_debug("Final section addresses for %s:\n", mod->name);
|
2010-08-05 12:59:10 -06:00
|
|
|
for (i = 0; i < info->hdr->e_shnum; i++) {
|
2010-08-05 12:59:02 -06:00
|
|
|
void *dest;
|
2010-08-05 12:59:10 -06:00
|
|
|
Elf_Shdr *shdr = &info->sechdrs[i];
|
2024-10-23 10:07:56 -07:00
|
|
|
const char *sname;
|
2024-10-23 19:27:07 +03:00
|
|
|
unsigned long addr;
|
2010-08-05 12:59:02 -06:00
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
if (!(shdr->sh_flags & SHF_ALLOC))
|
2010-08-05 12:59:02 -06:00
|
|
|
continue;
|
|
|
|
|
2024-10-23 10:07:56 -07:00
|
|
|
sname = info->secstrings + shdr->sh_name;
|
|
|
|
/*
|
|
|
|
* Load codetag sections separately as they might still be used
|
|
|
|
* after module unload.
|
|
|
|
*/
|
|
|
|
if (codetag_needs_module_section(mod, sname, shdr->sh_size)) {
|
|
|
|
dest = codetag_alloc_module_section(mod, sname, shdr->sh_size,
|
|
|
|
arch_mod_section_prepend(mod, i), shdr->sh_addralign);
|
|
|
|
if (WARN_ON(!dest)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out_err;
|
|
|
|
}
|
|
|
|
if (IS_ERR(dest)) {
|
|
|
|
ret = PTR_ERR(dest);
|
|
|
|
goto out_err;
|
|
|
|
}
|
|
|
|
addr = (unsigned long)dest;
|
|
|
|
codetag_section_found = true;
|
|
|
|
} else {
|
|
|
|
enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT;
|
|
|
|
unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK;
|
|
|
|
|
|
|
|
addr = (unsigned long)mod->mem[type].base + offset;
|
|
|
|
dest = mod->mem[type].rw_copy + offset;
|
|
|
|
}
|
2010-08-05 12:59:02 -06:00
|
|
|
|
2023-03-19 14:35:39 -07:00
|
|
|
if (shdr->sh_type != SHT_NOBITS) {
|
|
|
|
/*
|
|
|
|
* Our ELF checker already validated this, but let's
|
|
|
|
* be pedantic and make the goal clearer. We actually
|
|
|
|
* end up copying over all modifications made to the
|
|
|
|
* userspace copy of the entire struct module.
|
|
|
|
*/
|
|
|
|
if (i == info->index.mod &&
|
|
|
|
(WARN_ON_ONCE(shdr->sh_size != sizeof(struct module)))) {
|
|
|
|
ret = -ENOEXEC;
|
2024-10-23 10:07:56 -07:00
|
|
|
goto out_err;
|
2023-03-19 14:35:39 -07:00
|
|
|
}
|
2010-08-05 12:59:10 -06:00
|
|
|
memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
|
2023-03-19 14:35:39 -07:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Update the userspace copy's ELF section address to point to
|
|
|
|
* our newly allocated memory as a pure convenience so that
|
|
|
|
* users of info can keep taking advantage and using the newly
|
|
|
|
* minted official memory area.
|
|
|
|
*/
|
2024-10-23 19:27:07 +03:00
|
|
|
shdr->sh_addr = addr;
|
2023-03-21 19:36:22 -06:00
|
|
|
pr_debug("\t0x%lx 0x%.8lx %s\n", (long)shdr->sh_addr,
|
|
|
|
(long)shdr->sh_size, info->secstrings + shdr->sh_name);
|
2010-08-05 12:59:02 -06:00
|
|
|
}
|
2010-08-05 12:59:08 -06:00
|
|
|
|
|
|
|
return 0;
|
2024-10-23 10:07:56 -07:00
|
|
|
out_err:
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
for (t--; t >= 0; t--)
|
2024-10-23 10:07:56 -07:00
|
|
|
module_memory_free(mod, t);
|
|
|
|
if (codetag_section_found)
|
|
|
|
codetag_free_module_sections(mod);
|
|
|
|
|
2023-03-19 14:35:39 -07:00
|
|
|
return ret;
|
2010-08-05 12:59:02 -06:00
|
|
|
}
|
|
|
|
|
2023-03-19 14:27:46 -07:00
|
|
|
static int check_export_symbol_versions(struct module *mod)
|
2010-08-05 12:59:05 -06:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_MODVERSIONS
|
2021-02-02 13:13:34 +01:00
|
|
|
if ((mod->num_syms && !mod->crcs) ||
|
|
|
|
(mod->num_gpl_syms && !mod->gpl_crcs)) {
|
2010-08-05 12:59:05 -06:00
|
|
|
return try_to_force_load(mod,
|
|
|
|
"no versions for exported symbols");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void flush_module_icache(const struct module *mod)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Flush the instruction cache, since we've played with text.
|
|
|
|
* Do it before processing of module parameters, so the module
|
|
|
|
* can provide parameter accessor functions of its own.
|
|
|
|
*/
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
for_each_mod_mem_type(type) {
|
|
|
|
const struct module_memory *mod_mem = &mod->mem[type];
|
|
|
|
|
|
|
|
if (mod_mem->size) {
|
|
|
|
flush_icache_range((unsigned long)mod_mem->base,
|
|
|
|
(unsigned long)mod_mem->base + mod_mem->size);
|
|
|
|
}
|
|
|
|
}
|
2010-08-05 12:59:05 -06:00
|
|
|
}
|
|
|
|
|
2022-11-28 14:15:36 +10:00
|
|
|
bool __weak module_elf_check_arch(Elf_Ehdr *hdr)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-06-30 21:22:11 +02:00
|
|
|
int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
|
|
|
|
Elf_Shdr *sechdrs,
|
|
|
|
char *secstrings,
|
|
|
|
struct module *mod)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-07-21 15:37:56 +09:30
|
|
|
/* module_blacklist is a comma-separated list of module names */
|
|
|
|
static char *module_blacklist;
|
2017-06-28 18:32:31 -07:00
|
|
|
static bool blacklisted(const char *module_name)
|
2016-07-21 15:37:56 +09:30
|
|
|
{
|
|
|
|
const char *p;
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
if (!module_blacklist)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (p = module_blacklist; *p; p += len) {
|
|
|
|
len = strcspn(p, ",");
|
|
|
|
if (strlen(module_name) == len && !memcmp(module_name, p, len))
|
|
|
|
return true;
|
|
|
|
if (p[len] == ',')
|
|
|
|
len++;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
core_param(module_blacklist, module_blacklist, charp, 0400);
|
|
|
|
|
2012-10-22 18:09:41 +10:30
|
|
|
static struct module *layout_and_allocate(struct load_info *info, int flags)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
struct module *mod;
|
2016-07-27 12:06:21 +09:30
|
|
|
unsigned int ndx;
|
2010-08-05 12:59:08 -06:00
|
|
|
int err;
|
2009-10-01 15:43:54 -07:00
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Allow arches to frob section contents and sizes. */
|
2010-08-05 12:59:10 -06:00
|
|
|
err = module_frob_arch_sections(info->hdr, info->sechdrs,
|
2018-06-22 13:59:29 +02:00
|
|
|
info->secstrings, info->mod);
|
2005-04-16 15:20:36 -07:00
|
|
|
if (err < 0)
|
2013-07-03 10:06:28 +09:30
|
|
|
return ERR_PTR(err);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2020-04-03 19:13:03 +02:00
|
|
|
err = module_enforce_rwx_sections(info->hdr, info->sechdrs,
|
|
|
|
info->secstrings, info->mod);
|
|
|
|
if (err < 0)
|
|
|
|
return ERR_PTR(err);
|
|
|
|
|
2013-07-03 10:06:28 +09:30
|
|
|
/* We will do a special allocation for per-cpu sections later. */
|
|
|
|
info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2016-07-27 12:06:21 +09:30
|
|
|
/*
|
|
|
|
* Mark ro_after_init section with SHF_RO_AFTER_INIT so that
|
|
|
|
* layout_sections() can put it in the right place.
|
|
|
|
* Note: ro_after_init sections also have SHF_{WRITE,ALLOC} set.
|
|
|
|
*/
|
|
|
|
ndx = find_sec(info, ".data..ro_after_init");
|
2018-09-18 23:51:43 -07:00
|
|
|
if (ndx)
|
|
|
|
info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT;
|
|
|
|
/*
|
|
|
|
* Mark the __jump_table section as ro_after_init as well: these data
|
|
|
|
* structures are never modified, with the exception of entries that
|
|
|
|
* refer to code in the __init section, which are annotated as such
|
|
|
|
* at module load time.
|
|
|
|
*/
|
|
|
|
ndx = find_sec(info, "__jump_table");
|
2016-07-27 12:06:21 +09:30
|
|
|
if (ndx)
|
|
|
|
info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT;
|
|
|
|
|
2020-11-07 23:20:52 +03:00
|
|
|
/*
|
|
|
|
* Determine total sizes, and put offsets in sh_entsize. For now
|
|
|
|
* this is done generically; there doesn't appear to be any
|
|
|
|
* special cases for the architectures.
|
|
|
|
*/
|
2018-06-22 13:59:29 +02:00
|
|
|
layout_sections(info->mod, info);
|
|
|
|
layout_symtab(info->mod, info);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2010-08-05 12:59:02 -06:00
|
|
|
/* Allocate and move to the final place */
|
2018-06-22 13:59:29 +02:00
|
|
|
err = move_module(info->mod, info);
|
2010-08-05 12:59:08 -06:00
|
|
|
if (err)
|
2013-07-03 10:06:28 +09:30
|
|
|
return ERR_PTR(err);
|
2010-08-05 12:59:08 -06:00
|
|
|
|
|
|
|
/* Module has been copied to its final place now: return it. */
|
|
|
|
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
|
2010-08-05 12:59:10 -06:00
|
|
|
kmemleak_load_module(mod, info);
|
2024-10-23 10:07:56 -07:00
|
|
|
codetag_module_replaced(info->mod, mod);
|
|
|
|
|
2010-08-05 12:59:08 -06:00
|
|
|
return mod;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mod is no longer valid after this! */
|
|
|
|
static void module_deallocate(struct module *mod, struct load_info *info)
|
|
|
|
{
|
|
|
|
percpu_modfree(mod);
|
2015-01-20 09:07:04 +10:30
|
|
|
module_arch_freeing_init(mod);
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
|
2024-10-23 10:07:56 -07:00
|
|
|
free_mod_mem(mod);
|
2010-08-05 12:59:08 -06:00
|
|
|
}
|
|
|
|
|
2011-06-30 21:22:11 +02:00
|
|
|
int __weak module_finalize(const Elf_Ehdr *hdr,
|
|
|
|
const Elf_Shdr *sechdrs,
|
|
|
|
struct module *me)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-10-23 19:27:07 +03:00
|
|
|
int __weak module_post_finalize(const Elf_Ehdr *hdr,
|
|
|
|
const Elf_Shdr *sechdrs,
|
|
|
|
struct module *me)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-08-05 12:59:12 -06:00
|
|
|
static int post_relocation(struct module *mod, const struct load_info *info)
|
|
|
|
{
|
2024-10-23 19:27:07 +03:00
|
|
|
int ret;
|
|
|
|
|
2010-08-05 12:59:13 -06:00
|
|
|
/* Sort exception table now relocations are done. */
|
2010-08-05 12:59:12 -06:00
|
|
|
sort_extable(mod->extable, mod->extable + mod->num_exentries);
|
|
|
|
|
|
|
|
/* Copy relocated percpu area over. */
|
|
|
|
percpu_modcopy(mod, (void *)info->sechdrs[info->index.pcpu].sh_addr,
|
|
|
|
info->sechdrs[info->index.pcpu].sh_size);
|
|
|
|
|
2010-08-05 12:59:13 -06:00
|
|
|
/* Setup kallsyms-specific fields. */
|
2010-08-05 12:59:12 -06:00
|
|
|
add_kallsyms(mod, info);
|
|
|
|
|
|
|
|
/* Arch-specific module finalizing. */
|
2024-10-23 19:27:07 +03:00
|
|
|
ret = module_finalize(info->hdr, info->sechdrs, mod);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
for_each_mod_mem_type(type) {
|
|
|
|
struct module_memory *mem = &mod->mem[type];
|
|
|
|
|
|
|
|
if (mem->is_rox) {
|
|
|
|
if (!execmem_update_copy(mem->base, mem->rw_copy,
|
|
|
|
mem->size))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
vfree(mem->rw_copy);
|
|
|
|
mem->rw_copy = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return module_post_finalize(info->hdr, info->sechdrs, mod);
|
2010-08-05 12:59:12 -06:00
|
|
|
}
|
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
/* Call module constructors. */
|
|
|
|
static void do_mod_ctors(struct module *mod)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_CONSTRUCTORS
|
|
|
|
unsigned long i;
|
|
|
|
|
|
|
|
for (i = 0; i < mod->num_ctors; i++)
|
|
|
|
mod->ctors[i]();
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-01-20 09:07:05 +10:30
|
|
|
/* For freeing module_init on success, in case kallsyms traversing */
|
|
|
|
struct mod_initfree {
|
2019-04-25 17:11:37 -07:00
|
|
|
struct llist_node node;
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
void *init_text;
|
|
|
|
void *init_data;
|
|
|
|
void *init_rodata;
|
2015-01-20 09:07:05 +10:30
|
|
|
};
|
|
|
|
|
2019-04-25 17:11:37 -07:00
|
|
|
static void do_free_init(struct work_struct *w)
|
2015-01-20 09:07:05 +10:30
|
|
|
{
|
2019-04-25 17:11:37 -07:00
|
|
|
struct llist_node *pos, *n, *list;
|
|
|
|
struct mod_initfree *initfree;
|
|
|
|
|
|
|
|
list = llist_del_all(&init_free_list);
|
|
|
|
|
|
|
|
synchronize_rcu();
|
|
|
|
|
|
|
|
llist_for_each_safe(pos, n, list) {
|
|
|
|
initfree = container_of(pos, struct mod_initfree, node);
|
2024-05-05 19:06:18 +03:00
|
|
|
execmem_free(initfree->init_text);
|
|
|
|
execmem_free(initfree->init_data);
|
|
|
|
execmem_free(initfree->init_rodata);
|
2019-04-25 17:11:37 -07:00
|
|
|
kfree(initfree);
|
|
|
|
}
|
2015-01-20 09:07:05 +10:30
|
|
|
}
|
|
|
|
|
2024-02-27 10:35:46 +08:00
|
|
|
void flush_module_init_free_work(void)
|
|
|
|
{
|
|
|
|
flush_work(&init_free_wq);
|
|
|
|
}
|
|
|
|
|
2022-06-03 18:01:00 -07:00
|
|
|
#undef MODULE_PARAM_PREFIX
|
|
|
|
#define MODULE_PARAM_PREFIX "module."
|
|
|
|
/* Default value for module->async_probe_requested */
|
|
|
|
static bool async_probe;
|
|
|
|
module_param(async_probe, bool, 0644);
|
|
|
|
|
2015-02-17 13:46:50 -08:00
|
|
|
/*
|
|
|
|
* This is where the real work happens.
|
|
|
|
*
|
|
|
|
* Keep it uninlined to provide a reliable breakpoint target, e.g. for the gdb
|
|
|
|
* helper command 'lx-symbols'.
|
|
|
|
*/
|
|
|
|
static noinline int do_init_module(struct module *mod)
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
{
|
|
|
|
int ret = 0;
|
2015-01-20 09:07:05 +10:30
|
|
|
struct mod_initfree *freeinit;
|
2023-03-28 20:03:19 -07:00
|
|
|
#if defined(CONFIG_MODULE_STATS)
|
|
|
|
unsigned int text_size = 0, total_size = 0;
|
|
|
|
|
|
|
|
for_each_mod_mem_type(type) {
|
|
|
|
const struct module_memory *mod_mem = &mod->mem[type];
|
|
|
|
if (mod_mem->size) {
|
|
|
|
total_size += mod_mem->size;
|
|
|
|
if (type == MOD_TEXT || type == MOD_INIT_TEXT)
|
|
|
|
text_size += mod_mem->size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
2015-01-20 09:07:05 +10:30
|
|
|
|
|
|
|
freeinit = kmalloc(sizeof(*freeinit), GFP_KERNEL);
|
|
|
|
if (!freeinit) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail;
|
|
|
|
}
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
freeinit->init_text = mod->mem[MOD_INIT_TEXT].base;
|
|
|
|
freeinit->init_data = mod->mem[MOD_INIT_DATA].base;
|
|
|
|
freeinit->init_rodata = mod->mem[MOD_INIT_RODATA].base;
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
|
|
|
|
do_mod_ctors(mod);
|
|
|
|
/* Start the module */
|
|
|
|
if (mod->init != NULL)
|
|
|
|
ret = do_one_initcall(mod->init);
|
|
|
|
if (ret < 0) {
|
2015-01-20 09:07:05 +10:30
|
|
|
goto fail_free_freeinit;
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
}
|
|
|
|
if (ret > 0) {
|
2013-11-12 15:11:28 -08:00
|
|
|
pr_warn("%s: '%s'->init suspiciously returned %d, it should "
|
|
|
|
"follow 0/-E convention\n"
|
|
|
|
"%s: loading module anyway...\n",
|
|
|
|
__func__, mod->name, ret, __func__);
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
dump_stack();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now it's a first class citizen! */
|
|
|
|
mod->state = MODULE_STATE_LIVE;
|
|
|
|
blocking_notifier_call_chain(&module_notify_list,
|
|
|
|
MODULE_STATE_LIVE, mod);
|
|
|
|
|
2020-11-27 10:09:39 +01:00
|
|
|
/* Delay uevent until module has finished its init routine */
|
|
|
|
kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
|
|
|
|
|
2013-01-15 18:52:51 -08:00
|
|
|
/*
|
|
|
|
* We need to finish all async code before the module init sequence
|
2022-01-27 15:39:53 -08:00
|
|
|
* is done. This has potential to deadlock if synchronous module
|
|
|
|
* loading is requested from async (which is not allowed!).
|
2013-01-15 18:52:51 -08:00
|
|
|
*
|
2022-01-27 15:39:53 -08:00
|
|
|
* See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous
|
|
|
|
* request_module() from async workers") for more details.
|
2013-01-15 18:52:51 -08:00
|
|
|
*/
|
2022-01-27 15:39:53 -08:00
|
|
|
if (!mod->async_probe_requested)
|
2013-01-15 18:52:51 -08:00
|
|
|
async_synchronize_full();
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
ftrace_free_mem(mod, mod->mem[MOD_INIT_TEXT].base,
|
|
|
|
mod->mem[MOD_INIT_TEXT].base + mod->mem[MOD_INIT_TEXT].size);
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
mutex_lock(&module_mutex);
|
|
|
|
/* Drop initial reference. */
|
|
|
|
module_put(mod);
|
|
|
|
trim_init_extable(mod);
|
|
|
|
#ifdef CONFIG_KALLSYMS
|
modules: fix longstanding /proc/kallsyms vs module insertion race.
For CONFIG_KALLSYMS, we keep two symbol tables and two string tables.
There's one full copy, marked SHF_ALLOC and laid out at the end of the
module's init section. There's also a cut-down version that only
contains core symbols and strings, and lives in the module's core
section.
After module init (and before we free the module memory), we switch
the mod->symtab, mod->num_symtab and mod->strtab to point to the core
versions. We do this under the module_mutex.
However, kallsyms doesn't take the module_mutex: it uses
preempt_disable() and rcu tricks to walk through the modules, because
it's used in the oops path. It's also used in /proc/kallsyms.
There's nothing atomic about the change of these variables, so we can
get the old (larger!) num_symtab and the new symtab pointer; in fact
this is what I saw when trying to reproduce.
By grouping these variables together, we can use a
carefully-dereferenced pointer to ensure we always get one or the
other (the free of the module init section is already done in an RCU
callback, so that's safe). We allocate the init one at the end of the
module init section, and keep the core one inside the struct module
itself (it could also have been allocated at the end of the module
core, but that's probably overkill).
Reported-by: Weilong Chen <chenweilong@huawei.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541
Cc: stable@kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2016-02-03 16:55:26 +10:30
|
|
|
/* Switch to core kallsyms now init is done: kallsyms may be walking! */
|
|
|
|
rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
#endif
|
2024-12-05 20:46:15 +01:00
|
|
|
ret = module_enable_rodata_ro_after_init(mod);
|
2024-02-16 09:14:27 +01:00
|
|
|
if (ret)
|
2024-12-05 20:46:16 +01:00
|
|
|
pr_warn("%s: module_enable_rodata_ro_after_init() returned %d, "
|
|
|
|
"ro_after_init data might still be writable\n",
|
|
|
|
mod->name, ret);
|
|
|
|
|
2015-05-27 11:09:37 +09:30
|
|
|
mod_tree_remove_init(mod);
|
2015-01-20 09:07:04 +10:30
|
|
|
module_arch_freeing_init(mod);
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
for_class_mod_mem_type(type, init) {
|
|
|
|
mod->mem[type].base = NULL;
|
|
|
|
mod->mem[type].size = 0;
|
|
|
|
}
|
2023-03-28 20:03:19 -07:00
|
|
|
|
2020-11-20 23:08:29 -08:00
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
2024-06-20 10:17:29 +01:00
|
|
|
/* .BTF is not SHF_ALLOC and will get removed, so sanitize pointers */
|
2020-11-20 23:08:29 -08:00
|
|
|
mod->btf_data = NULL;
|
2024-06-20 10:17:29 +01:00
|
|
|
mod->btf_base_data = NULL;
|
2020-11-20 23:08:29 -08:00
|
|
|
#endif
|
2015-01-20 09:07:05 +10:30
|
|
|
/*
|
|
|
|
* We want to free module_init, but be aware that kallsyms may be
|
2015-05-27 11:09:35 +09:30
|
|
|
* walking this with preempt disabled. In all the failure paths, we
|
2018-11-06 19:17:01 -08:00
|
|
|
* call synchronize_rcu(), but we don't want to slow down the success
|
2024-05-05 19:06:18 +03:00
|
|
|
* path. execmem_free() cannot be called in an interrupt, so do the
|
2019-04-25 17:11:37 -07:00
|
|
|
* work and call synchronize_rcu() in a work queue.
|
|
|
|
*
|
2024-05-05 19:06:18 +03:00
|
|
|
* Note that execmem_alloc() on most architectures creates W+X page
|
2018-05-11 16:01:42 -07:00
|
|
|
* mappings which won't be cleaned up until do_free_init() runs. Any
|
|
|
|
* code such as mark_rodata_ro() which depends on those mappings to
|
2024-02-27 10:35:46 +08:00
|
|
|
* be cleaned up needs to sync with the queued work by invoking
|
|
|
|
* flush_module_init_free_work().
|
2015-01-20 09:07:05 +10:30
|
|
|
*/
|
2019-04-25 17:11:37 -07:00
|
|
|
if (llist_add(&freeinit->node, &init_free_list))
|
|
|
|
schedule_work(&init_free_wq);
|
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
mutex_unlock(&module_mutex);
|
|
|
|
wake_up_all(&module_wq);
|
|
|
|
|
2023-03-28 20:03:19 -07:00
|
|
|
mod_stat_add_long(text_size, &total_text_size);
|
|
|
|
mod_stat_add_long(total_size, &total_mod_size);
|
|
|
|
|
|
|
|
mod_stat_inc(&modcount);
|
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
return 0;
|
2015-01-20 09:07:05 +10:30
|
|
|
|
|
|
|
fail_free_freeinit:
|
|
|
|
kfree(freeinit);
|
|
|
|
fail:
|
|
|
|
/* Try to protect us from buggy refcounters. */
|
|
|
|
mod->state = MODULE_STATE_GOING;
|
2018-11-06 19:17:01 -08:00
|
|
|
synchronize_rcu();
|
2015-01-20 09:07:05 +10:30
|
|
|
module_put(mod);
|
|
|
|
blocking_notifier_call_chain(&module_notify_list,
|
|
|
|
MODULE_STATE_GOING, mod);
|
2016-03-16 20:55:39 -04:00
|
|
|
klp_module_going(mod);
|
2016-02-16 17:32:33 -05:00
|
|
|
ftrace_release_mod(mod);
|
2015-01-20 09:07:05 +10:30
|
|
|
free_module(mod);
|
|
|
|
wake_up_all(&module_wq);
|
2023-03-28 20:03:19 -07:00
|
|
|
|
2015-01-20 09:07:05 +10:30
|
|
|
return ret;
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
}
|
|
|
|
|
|
|
|
static int may_init_module(void)
|
|
|
|
{
|
|
|
|
if (!capable(CAP_SYS_MODULE) || modules_disabled)
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-03-10 20:05:52 -08:00
|
|
|
/* Is this module of this name done loading? No locks held. */
|
|
|
|
static bool finished_loading(const char *name)
|
|
|
|
{
|
|
|
|
struct module *mod;
|
|
|
|
bool ret;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The module_mutex should not be a heavily contended lock;
|
|
|
|
* if we get the occasional sleep here, we'll go an extra iteration
|
|
|
|
* in the wait_event_interruptible(), which is harmless.
|
|
|
|
*/
|
|
|
|
sched_annotate_sleep();
|
|
|
|
mutex_lock(&module_mutex);
|
|
|
|
mod = find_module_all(name, strlen(name), true);
|
|
|
|
ret = !mod || mod->state == MODULE_STATE_LIVE
|
|
|
|
|| mod->state == MODULE_STATE_GOING;
|
|
|
|
mutex_unlock(&module_mutex);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Must be called with module_mutex held */
|
2023-03-28 20:03:19 -07:00
|
|
|
static int module_patient_check_exists(const char *name,
|
|
|
|
enum fail_dup_mod_reason reason)
|
2023-03-10 20:05:52 -08:00
|
|
|
{
|
|
|
|
struct module *old;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
old = find_module_all(name, strlen(name), true);
|
|
|
|
if (old == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (old->state == MODULE_STATE_COMING ||
|
|
|
|
old->state == MODULE_STATE_UNFORMED) {
|
|
|
|
/* Wait in case it fails to load. */
|
|
|
|
mutex_unlock(&module_mutex);
|
|
|
|
err = wait_event_interruptible(module_wq,
|
|
|
|
finished_loading(name));
|
|
|
|
mutex_lock(&module_mutex);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* The module might have gone in the meantime. */
|
|
|
|
old = find_module_all(name, strlen(name), true);
|
|
|
|
}
|
|
|
|
|
2023-03-28 20:03:19 -07:00
|
|
|
if (try_add_failed_module(name, reason))
|
|
|
|
pr_warn("Could not add fail-tracking for module: %s\n", name);
|
|
|
|
|
2023-03-10 20:05:52 -08:00
|
|
|
/*
|
|
|
|
* We are here only when the same module was being loaded. Do
|
|
|
|
* not try to load it again right now. It prevents long delays
|
|
|
|
* caused by serialized module load failures. It might happen
|
|
|
|
* when more devices of the same type trigger load of
|
|
|
|
* a particular module.
|
|
|
|
*/
|
|
|
|
if (old && old->state == MODULE_STATE_LIVE)
|
|
|
|
return -EEXIST;
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
2013-01-21 17:18:59 +10:30
|
|
|
/*
|
|
|
|
* We try to place it in the list now to make sure it's unique before
|
|
|
|
* we dedicate too many resources. In particular, temporary percpu
|
|
|
|
* memory exhaustion.
|
|
|
|
*/
|
|
|
|
static int add_unformed_module(struct module *mod)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
mod->state = MODULE_STATE_UNFORMED;
|
|
|
|
|
|
|
|
mutex_lock(&module_mutex);
|
2023-03-28 20:03:19 -07:00
|
|
|
err = module_patient_check_exists(mod->name, FAIL_DUP_MOD_LOAD);
|
2023-03-10 20:05:52 -08:00
|
|
|
if (err)
|
2013-01-21 17:18:59 +10:30
|
|
|
goto out;
|
2023-03-10 20:05:52 -08:00
|
|
|
|
2015-05-27 11:09:38 +09:30
|
|
|
mod_update_bounds(mod);
|
2013-01-21 17:18:59 +10:30
|
|
|
list_add_rcu(&mod->list, &modules);
|
2015-05-27 11:09:37 +09:30
|
|
|
mod_tree_insert(mod);
|
2013-01-21 17:18:59 +10:30
|
|
|
err = 0;
|
|
|
|
|
|
|
|
out:
|
|
|
|
mutex_unlock(&module_mutex);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int complete_formation(struct module *mod, struct load_info *info)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
mutex_lock(&module_mutex);
|
|
|
|
|
|
|
|
/* Find duplicate symbols (must be called under lock). */
|
2018-11-19 17:43:58 +01:00
|
|
|
err = verify_exported_symbols(mod);
|
2013-01-21 17:18:59 +10:30
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
|
|
|
|
2022-09-08 14:54:47 -07:00
|
|
|
/* These rely on module_mutex for list integrity. */
|
2013-01-21 17:18:59 +10:30
|
|
|
module_bug_finalize(info->hdr, info->sechdrs, mod);
|
2022-09-08 14:54:47 -07:00
|
|
|
module_cfi_finalize(info->hdr, info->sechdrs, mod);
|
2013-01-21 17:18:59 +10:30
|
|
|
|
2024-12-05 20:46:15 +01:00
|
|
|
err = module_enable_rodata_ro(mod);
|
2024-02-16 09:14:27 +01:00
|
|
|
if (err)
|
|
|
|
goto out_strict_rwx;
|
|
|
|
err = module_enable_data_nx(mod);
|
|
|
|
if (err)
|
|
|
|
goto out_strict_rwx;
|
|
|
|
err = module_enable_text_rox(mod);
|
|
|
|
if (err)
|
|
|
|
goto out_strict_rwx;
|
2014-05-14 10:54:19 +09:30
|
|
|
|
2020-11-07 23:20:52 +03:00
|
|
|
/*
|
|
|
|
* Mark state as coming so strong_try_module_get() ignores us,
|
|
|
|
* but kallsyms etc. can see us.
|
|
|
|
*/
|
2013-01-21 17:18:59 +10:30
|
|
|
mod->state = MODULE_STATE_COMING;
|
2014-05-14 10:54:19 +09:30
|
|
|
mutex_unlock(&module_mutex);
|
|
|
|
|
|
|
|
return 0;
|
2013-01-21 17:18:59 +10:30
|
|
|
|
2024-02-16 09:14:27 +01:00
|
|
|
out_strict_rwx:
|
|
|
|
module_bug_cleanup(mod);
|
2013-01-21 17:18:59 +10:30
|
|
|
out:
|
|
|
|
mutex_unlock(&module_mutex);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-03-16 20:55:38 -04:00
|
|
|
static int prepare_coming_module(struct module *mod)
|
|
|
|
{
|
2016-03-16 20:55:39 -04:00
|
|
|
int err;
|
|
|
|
|
2016-03-16 20:55:38 -04:00
|
|
|
ftrace_module_enable(mod);
|
2016-03-16 20:55:39 -04:00
|
|
|
err = klp_module_coming(mod);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2020-08-18 15:57:38 +02:00
|
|
|
err = blocking_notifier_call_chain_robust(&module_notify_list,
|
|
|
|
MODULE_STATE_COMING, MODULE_STATE_GOING, mod);
|
|
|
|
err = notifier_to_errno(err);
|
|
|
|
if (err)
|
|
|
|
klp_module_going(mod);
|
|
|
|
|
|
|
|
return err;
|
2016-03-16 20:55:38 -04:00
|
|
|
}
|
|
|
|
|
module: add extra argument for parse_params() callback
This adds an extra argument onto parse_params() to be used
as a way to make the unused callback a bit more useful and
generic by allowing the caller to pass on a data structure
of its choice. An example use case is to allow us to easily
make module parameters for every module which we will do
next.
@ parse @
identifier name, args, params, num, level_min, level_max;
identifier unknown, param, val, doing;
type s16;
@@
extern char *parse_args(const char *name,
char *args,
const struct kernel_param *params,
unsigned num,
s16 level_min,
s16 level_max,
+ void *arg,
int (*unknown)(char *param, char *val,
const char *doing
+ , void *arg
));
@ parse_mod @
identifier name, args, params, num, level_min, level_max;
identifier unknown, param, val, doing;
type s16;
@@
char *parse_args(const char *name,
char *args,
const struct kernel_param *params,
unsigned num,
s16 level_min,
s16 level_max,
+ void *arg,
int (*unknown)(char *param, char *val,
const char *doing
+ , void *arg
))
{
...
}
@ parse_args_found @
expression R, E1, E2, E3, E4, E5, E6;
identifier func;
@@
(
R =
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
func);
|
R =
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
&func);
|
R =
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
NULL);
|
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
func);
|
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
&func);
|
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
NULL);
)
@ parse_args_unused depends on parse_args_found @
identifier parse_args_found.func;
@@
int func(char *param, char *val, const char *unused
+ , void *arg
)
{
...
}
@ mod_unused depends on parse_args_found @
identifier parse_args_found.func;
expression A1, A2, A3;
@@
- func(A1, A2, A3);
+ func(A1, A2, A3, NULL);
Generated-by: Coccinelle SmPL
Cc: cocci@systeme.lip6.fr
Cc: Tejun Heo <tj@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Felipe Contreras <felipe.contreras@gmail.com>
Cc: Ewan Milne <emilne@redhat.com>
Cc: Jean Delvare <jdelvare@suse.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2015-03-30 16:20:03 -07:00
|
|
|
static int unknown_module_param_cb(char *param, char *val, const char *modname,
|
|
|
|
void *arg)
|
2013-07-02 15:35:12 +09:30
|
|
|
{
|
2015-03-30 16:20:05 -07:00
|
|
|
struct module *mod = arg;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (strcmp(param, "async_probe") == 0) {
|
2022-11-01 22:14:06 +01:00
|
|
|
if (kstrtobool(val, &mod->async_probe_requested))
|
2022-06-03 18:01:00 -07:00
|
|
|
mod->async_probe_requested = true;
|
2015-03-30 16:20:05 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-11-10 09:31:29 +10:30
|
|
|
/* Check for magic 'dyndbg' arg */
|
2015-03-30 16:20:05 -07:00
|
|
|
ret = ddebug_dyndbg_module_param_cb(param, val, modname);
|
2013-11-12 15:11:28 -08:00
|
|
|
if (ret != 0)
|
|
|
|
pr_warn("%s: unknown parameter '%s' ignored\n", modname, param);
|
2013-07-02 15:35:12 +09:30
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-03-19 14:27:38 -07:00
|
|
|
/* Module within temporary copy, this doesn't do any allocation */
|
|
|
|
static int early_mod_check(struct load_info *info, int flags)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now that we know we have the correct module name, check
|
|
|
|
* if it's blacklisted.
|
|
|
|
*/
|
|
|
|
if (blacklisted(info->name)) {
|
|
|
|
pr_err("Module %s is blacklisted\n", info->name);
|
|
|
|
return -EPERM;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = rewrite_section_headers(info, flags);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* Check module struct version now, before we try to use module. */
|
|
|
|
if (!check_modstruct_version(info, info->mod))
|
|
|
|
return -ENOEXEC;
|
|
|
|
|
2023-03-19 14:27:39 -07:00
|
|
|
err = check_modinfo(info->mod, info, flags);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2023-03-10 20:48:03 -08:00
|
|
|
mutex_lock(&module_mutex);
|
|
|
|
err = module_patient_check_exists(info->mod->name, FAIL_DUP_MOD_BECOMING);
|
|
|
|
mutex_unlock(&module_mutex);
|
|
|
|
|
|
|
|
return err;
|
2023-03-19 14:27:38 -07:00
|
|
|
}
|
|
|
|
|
2020-11-07 23:20:52 +03:00
|
|
|
/*
|
|
|
|
* Allocate and load the module: note that size of section 0 is always
|
|
|
|
* zero, and we rely on this for optional sections.
|
|
|
|
*/
|
2012-10-22 18:09:41 +10:30
|
|
|
static int load_module(struct load_info *info, const char __user *uargs,
|
|
|
|
int flags)
|
2010-08-05 12:59:08 -06:00
|
|
|
{
|
2013-01-21 17:18:59 +10:30
|
|
|
struct module *mod;
|
2023-03-28 20:03:19 -07:00
|
|
|
bool module_allocated = false;
|
2018-06-22 14:00:01 +02:00
|
|
|
long err = 0;
|
2014-04-28 11:34:33 +09:30
|
|
|
char *after_dashes;
|
2010-08-05 12:59:08 -06:00
|
|
|
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
/*
|
|
|
|
* Do the signature check (if any) first. All that
|
|
|
|
* the signature check needs is info->len, it does
|
|
|
|
* not need any of the section info. That can be
|
|
|
|
* set up later. This will minimize the chances
|
|
|
|
* of a corrupt module causing problems before
|
|
|
|
* we even get to the signature check.
|
|
|
|
*
|
|
|
|
* The check will also adjust info->len by stripping
|
|
|
|
* off the sig length at the end of the module, making
|
|
|
|
* checks against info->len more correct.
|
|
|
|
*/
|
|
|
|
err = module_sig_check(info, flags);
|
|
|
|
if (err)
|
|
|
|
goto free_copy;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do basic sanity checks against the ELF header and
|
2023-03-19 14:35:41 -07:00
|
|
|
* sections. Cache useful sections and set the
|
|
|
|
* info->mod to the userspace passed struct module.
|
module: harden ELF info handling
5fdc7db644 ("module: setup load info before module_sig_check()")
moved the ELF setup, so that it was done before the signature
check. This made the module name available to signature error
messages.
However, the checks for ELF correctness in setup_load_info
are not sufficient to prevent bad memory references due to
corrupted offset fields, indices, etc.
So, there's a regression in behavior here: a corrupt and unsigned
(or badly signed) module, which might previously have been rejected
immediately, can now cause an oops/crash.
Harden ELF handling for module loading by doing the following:
- Move the signature check back up so that it comes before ELF
initialization. It's best to do the signature check to see
if we can trust the module, before using the ELF structures
inside it. This also makes checks against info->len
more accurate again, as this field will be reduced by the
length of the signature in mod_check_sig().
The module name is now once again not available for error
messages during the signature check, but that seems like
a fair tradeoff.
- Check if sections have offset / size fields that at least don't
exceed the length of the module.
- Check if sections have section name offsets that don't fall
outside the section name table.
- Add a few other sanity checks against invalid section indices,
etc.
This is not an exhaustive consistency check, but the idea is to
at least get through the signature and blacklist checks without
crashing because of corrupted ELF info, and to error out gracefully
for most issues that would have caused problems later on.
Fixes: 5fdc7db6448a ("module: setup load info before module_sig_check()")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
2021-01-14 22:21:46 +00:00
|
|
|
*/
|
2023-03-19 14:35:41 -07:00
|
|
|
err = elf_validity_cache_copy(info, flags);
|
2018-06-22 14:00:01 +02:00
|
|
|
if (err)
|
|
|
|
goto free_copy;
|
|
|
|
|
2023-03-19 14:27:38 -07:00
|
|
|
err = early_mod_check(info, flags);
|
2010-08-05 12:59:08 -06:00
|
|
|
if (err)
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
goto free_copy;
|
2010-08-05 12:59:08 -06:00
|
|
|
|
|
|
|
/* Figure out module layout, and allocate all the memory. */
|
2012-10-22 18:09:41 +10:30
|
|
|
mod = layout_and_allocate(info, flags);
|
2010-08-05 12:59:02 -06:00
|
|
|
if (IS_ERR(mod)) {
|
|
|
|
err = PTR_ERR(mod);
|
2010-08-05 12:59:08 -06:00
|
|
|
goto free_copy;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2023-03-28 20:03:19 -07:00
|
|
|
module_allocated = true;
|
|
|
|
|
2017-02-04 13:10:38 -05:00
|
|
|
audit_log_kern_module(mod->name);
|
|
|
|
|
2013-01-21 17:18:59 +10:30
|
|
|
/* Reserve our place in the list. */
|
|
|
|
err = add_unformed_module(mod);
|
|
|
|
if (err)
|
2013-01-12 13:27:34 +10:30
|
|
|
goto free_module;
|
|
|
|
|
2023-03-19 14:27:43 -07:00
|
|
|
/*
|
|
|
|
* We are tainting your kernel if your module gets into
|
|
|
|
* the modules linked list somehow.
|
|
|
|
*/
|
|
|
|
module_augment_kernel_taints(mod, info);
|
2012-09-26 10:09:40 +01:00
|
|
|
|
2013-07-03 10:06:28 +09:30
|
|
|
/* To avoid stressing percpu allocator, do this once we're unique. */
|
2013-07-03 10:06:29 +09:30
|
|
|
err = percpu_modalloc(mod, info);
|
2013-07-03 10:06:28 +09:30
|
|
|
if (err)
|
|
|
|
goto unlink_mod;
|
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
/* Now module is in final location, initialize linked lists, etc. */
|
2010-08-05 12:59:04 -06:00
|
|
|
err = module_unload_init(mod);
|
|
|
|
if (err)
|
2013-01-12 13:27:34 +10:30
|
|
|
goto unlink_mod;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2015-06-26 06:44:38 +09:30
|
|
|
init_param_lock(mod);
|
module: add per-module param_lock
Add a "param_lock" mutex to each module, and update params.c to use
the correct built-in or module mutex while locking kernel params.
Remove the kparam_block_sysfs_r/w() macros, replace them with direct
calls to kernel_param_[un]lock(module).
The kernel param code currently uses a single mutex to protect
modification of any and all kernel params. While this generally works,
there is one specific problem with it; a module callback function
cannot safely load another module, i.e. with request_module() or even
with indirect calls such as crypto_has_alg(). If the module to be
loaded has any of its params configured (e.g. with a /etc/modprobe.d/*
config file), then the attempt will result in a deadlock between the
first module param callback waiting for modprobe, and modprobe trying to
lock the single kernel param mutex to set the new module's param.
This fixes that by using per-module mutexes, so that each individual module
is protected against concurrent changes in its own kernel params, but is
not blocked by changes to other module params. All built-in modules
continue to use the built-in mutex, since they will always be loaded at
runtime and references (e.g. request_module(), crypto_has_alg()) to them
will never cause load-time param changing.
This also simplifies the interface used by modules to block sysfs access
to their params; while there are currently functions to block and unblock
sysfs param access which are split up by read and write and expect a single
kernel param to be passed, their actual operation is identical and applies
to all params, not just the one passed to them; they simply lock and unlock
the global param mutex. They are replaced with direct calls to
kernel_param_[un]lock(THIS_MODULE), which locks THIS_MODULE's param_lock, or
if the module is built-in, it locks the built-in mutex.
Suggested-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2015-06-17 06:18:52 +09:30
|
|
|
|
2020-11-07 23:20:52 +03:00
|
|
|
/*
|
|
|
|
* Now we've got everything in the final locations, we can
|
|
|
|
* find optional sections.
|
|
|
|
*/
|
2013-10-14 18:08:46 +10:30
|
|
|
err = find_module_sections(mod, info);
|
|
|
|
if (err)
|
|
|
|
goto free_unload;
|
2008-02-28 17:11:02 -05:00
|
|
|
|
2023-03-19 14:27:46 -07:00
|
|
|
err = check_export_symbol_versions(mod);
|
2010-08-05 12:59:05 -06:00
|
|
|
if (err)
|
|
|
|
goto free_unload;
|
2006-01-08 01:03:41 -08:00
|
|
|
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
/* Set up MODINFO_ATTR fields */
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
setup_modinfo(mod, info);
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-23 22:05:15 -07:00
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Fix up syms, so that st_value is a pointer to location. */
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
err = simplify_symbols(mod, info);
|
2005-04-16 15:20:36 -07:00
|
|
|
if (err < 0)
|
2010-08-05 12:59:08 -06:00
|
|
|
goto free_modinfo;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
err = apply_relocations(mod, info);
|
2010-08-05 12:59:05 -06:00
|
|
|
if (err < 0)
|
2010-08-05 12:59:08 -06:00
|
|
|
goto free_modinfo;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
err = post_relocation(mod, info);
|
2005-04-16 15:20:36 -07:00
|
|
|
if (err < 0)
|
2010-08-05 12:59:08 -06:00
|
|
|
goto free_modinfo;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2010-08-05 12:59:05 -06:00
|
|
|
flush_module_icache(mod);
|
2005-09-06 15:17:11 -07:00
|
|
|
|
2010-08-05 12:59:10 -06:00
|
|
|
/* Now copy in args */
|
|
|
|
mod->args = strndup_user(uargs, ~0UL >> 1);
|
|
|
|
if (IS_ERR(mod->args)) {
|
|
|
|
err = PTR_ERR(mod->args);
|
|
|
|
goto free_arch_cleanup;
|
|
|
|
}
|
2006-03-25 03:07:05 -08:00
|
|
|
|
2021-07-07 18:09:20 -07:00
|
|
|
init_build_id(mod, info);
|
2010-07-03 13:07:35 +10:00
|
|
|
|
2014-04-24 10:40:12 -04:00
|
|
|
/* Ftrace init must be called in the MODULE_STATE_UNFORMED state */
|
|
|
|
ftrace_module_init(mod);
|
|
|
|
|
2013-01-21 17:18:59 +10:30
|
|
|
/* Finally it's fully formed, ready to start executing. */
|
|
|
|
err = complete_formation(mod, info);
|
|
|
|
if (err)
|
2013-01-12 13:27:34 +10:30
|
|
|
goto ddebug_cleanup;
|
2010-06-05 11:17:37 -06:00
|
|
|
|
2016-03-16 20:55:38 -04:00
|
|
|
err = prepare_coming_module(mod);
|
|
|
|
if (err)
|
|
|
|
goto bug_cleanup;
|
|
|
|
|
2022-06-03 18:01:00 -07:00
|
|
|
mod->async_probe_requested = async_probe;
|
|
|
|
|
2010-08-05 12:59:13 -06:00
|
|
|
/* Module is ready to execute: parsing args may do that. */
|
2014-04-28 11:34:33 +09:30
|
|
|
after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
|
2016-02-03 16:55:26 +10:30
|
|
|
-32768, 32767, mod,
|
module: add extra argument for parse_params() callback
This adds an extra argument onto parse_params() to be used
as a way to make the unused callback a bit more useful and
generic by allowing the caller to pass on a data structure
of its choice. An example use case is to allow us to easily
make module parameters for every module which we will do
next.
@ parse @
identifier name, args, params, num, level_min, level_max;
identifier unknown, param, val, doing;
type s16;
@@
extern char *parse_args(const char *name,
char *args,
const struct kernel_param *params,
unsigned num,
s16 level_min,
s16 level_max,
+ void *arg,
int (*unknown)(char *param, char *val,
const char *doing
+ , void *arg
));
@ parse_mod @
identifier name, args, params, num, level_min, level_max;
identifier unknown, param, val, doing;
type s16;
@@
char *parse_args(const char *name,
char *args,
const struct kernel_param *params,
unsigned num,
s16 level_min,
s16 level_max,
+ void *arg,
int (*unknown)(char *param, char *val,
const char *doing
+ , void *arg
))
{
...
}
@ parse_args_found @
expression R, E1, E2, E3, E4, E5, E6;
identifier func;
@@
(
R =
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
func);
|
R =
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
&func);
|
R =
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
NULL);
|
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
func);
|
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
&func);
|
parse_args(E1, E2, E3, E4, E5, E6,
+ NULL,
NULL);
)
@ parse_args_unused depends on parse_args_found @
identifier parse_args_found.func;
@@
int func(char *param, char *val, const char *unused
+ , void *arg
)
{
...
}
@ mod_unused depends on parse_args_found @
identifier parse_args_found.func;
expression A1, A2, A3;
@@
- func(A1, A2, A3);
+ func(A1, A2, A3, NULL);
Generated-by: Coccinelle SmPL
Cc: cocci@systeme.lip6.fr
Cc: Tejun Heo <tj@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Felipe Contreras <felipe.contreras@gmail.com>
Cc: Ewan Milne <emilne@redhat.com>
Cc: Jean Delvare <jdelvare@suse.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2015-03-30 16:20:03 -07:00
|
|
|
unknown_module_param_cb);
|
2014-04-28 11:34:33 +09:30
|
|
|
if (IS_ERR(after_dashes)) {
|
|
|
|
err = PTR_ERR(after_dashes);
|
2016-03-16 20:55:38 -04:00
|
|
|
goto coming_cleanup;
|
2014-04-28 11:34:33 +09:30
|
|
|
} else if (after_dashes) {
|
|
|
|
pr_warn("%s: parameters '%s' after `--' ignored\n",
|
|
|
|
mod->name, after_dashes);
|
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2017-02-04 13:10:38 -05:00
|
|
|
/* Link in to sysfs. */
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
|
2005-04-16 15:20:36 -07:00
|
|
|
if (err < 0)
|
2016-03-16 20:55:38 -04:00
|
|
|
goto coming_cleanup;
|
2010-06-05 11:17:36 -06:00
|
|
|
|
2016-03-22 20:03:16 -04:00
|
|
|
if (is_livepatch_module(mod)) {
|
|
|
|
err = copy_module_elf(mod, info);
|
|
|
|
if (err < 0)
|
|
|
|
goto sysfs_cleanup;
|
|
|
|
}
|
|
|
|
|
2012-01-13 09:32:14 +10:30
|
|
|
/* Get rid of temporary copy. */
|
2022-01-05 13:55:12 -08:00
|
|
|
free_copy(info, flags);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2024-03-21 09:36:33 -07:00
|
|
|
codetag_load_module(mod);
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Done! */
|
2010-08-05 12:59:13 -06:00
|
|
|
trace_module_load(mod);
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
|
|
|
|
return do_init_module(mod);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2016-03-22 20:03:16 -04:00
|
|
|
sysfs_cleanup:
|
|
|
|
mod_sysfs_teardown(mod);
|
2016-03-16 20:55:38 -04:00
|
|
|
coming_cleanup:
|
2016-10-20 17:18:12 +01:00
|
|
|
mod->state = MODULE_STATE_GOING;
|
2017-02-10 14:06:22 -08:00
|
|
|
destroy_params(mod->kp, mod->num_kp);
|
2016-03-16 20:55:38 -04:00
|
|
|
blocking_notifier_call_chain(&module_notify_list,
|
|
|
|
MODULE_STATE_GOING, mod);
|
2016-03-16 20:55:39 -04:00
|
|
|
klp_module_going(mod);
|
2013-01-12 13:27:34 +10:30
|
|
|
bug_cleanup:
|
2020-10-27 15:03:36 +01:00
|
|
|
mod->state = MODULE_STATE_GOING;
|
2013-01-12 13:27:34 +10:30
|
|
|
/* module_bug_cleanup needs module_mutex protection */
|
2010-06-05 11:17:36 -06:00
|
|
|
mutex_lock(&module_mutex);
|
2010-10-05 11:29:27 -07:00
|
|
|
module_bug_cleanup(mod);
|
2013-01-20 20:22:58 -08:00
|
|
|
mutex_unlock(&module_mutex);
|
2014-08-16 04:13:37 +09:30
|
|
|
|
2013-01-21 17:18:59 +10:30
|
|
|
ddebug_cleanup:
|
2018-01-08 10:41:21 +05:30
|
|
|
ftrace_release_mod(mod);
|
2018-11-06 19:17:01 -08:00
|
|
|
synchronize_rcu();
|
2010-08-05 12:59:10 -06:00
|
|
|
kfree(mod->args);
|
|
|
|
free_arch_cleanup:
|
2005-04-16 15:20:36 -07:00
|
|
|
module_arch_cleanup(mod);
|
2010-08-05 12:59:08 -06:00
|
|
|
free_modinfo:
|
2009-09-25 00:32:58 -06:00
|
|
|
free_modinfo(mod);
|
2010-08-05 12:59:05 -06:00
|
|
|
free_unload:
|
2005-04-16 15:20:36 -07:00
|
|
|
module_unload_free(mod);
|
2013-01-12 13:27:34 +10:30
|
|
|
unlink_mod:
|
|
|
|
mutex_lock(&module_mutex);
|
|
|
|
/* Unlink carefully: kallsyms could be walking list. */
|
|
|
|
list_del_rcu(&mod->list);
|
2015-07-09 06:48:06 +09:30
|
|
|
mod_tree_remove(mod);
|
2013-01-12 13:27:34 +10:30
|
|
|
wake_up_all(&module_wq);
|
2015-05-27 11:09:35 +09:30
|
|
|
/* Wait for RCU-sched synchronizing before releasing mod->list. */
|
2018-11-06 19:17:01 -08:00
|
|
|
synchronize_rcu();
|
2013-01-12 13:27:34 +10:30
|
|
|
mutex_unlock(&module_mutex);
|
2010-08-05 12:59:08 -06:00
|
|
|
free_module:
|
2023-03-28 20:03:19 -07:00
|
|
|
mod_stat_bump_invalid(info, flags);
|
2015-02-26 16:23:11 +01:00
|
|
|
/* Free lock-classes; relies on the preceding sync_rcu() */
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
for_class_mod_mem_type(type, core_data) {
|
|
|
|
lockdep_free_key_range(mod->mem[type].base,
|
|
|
|
mod->mem[type].size);
|
|
|
|
}
|
2015-02-26 16:23:11 +01:00
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
module_deallocate(mod, info);
|
2010-08-05 12:59:08 -06:00
|
|
|
free_copy:
|
2023-03-28 20:03:19 -07:00
|
|
|
/*
|
|
|
|
* The info->len is always set. We distinguish between
|
|
|
|
* failures once the proper module was allocated and
|
|
|
|
* before that.
|
|
|
|
*/
|
|
|
|
if (!module_allocated)
|
|
|
|
mod_stat_bump_becoming(info, flags);
|
2022-01-05 13:55:12 -08:00
|
|
|
free_copy(info, flags);
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
return err;
|
2009-06-17 16:28:03 -07:00
|
|
|
}
|
|
|
|
|
2009-01-14 14:14:10 +01:00
|
|
|
SYSCALL_DEFINE3(init_module, void __user *, umod,
|
|
|
|
unsigned long, len, const char __user *, uargs)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
int err;
|
|
|
|
struct load_info info = { };
|
2005-04-16 15:20:36 -07:00
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
err = may_init_module();
|
|
|
|
if (err)
|
|
|
|
return err;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n",
|
|
|
|
umod, len, uargs);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
err = copy_module_from_user(umod, len, &info);
|
2023-03-28 20:03:19 -07:00
|
|
|
if (err) {
|
|
|
|
mod_stat_inc(&failed_kreads);
|
|
|
|
mod_stat_add_long(len, &invalid_kread_bytes);
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
return err;
|
2023-03-28 20:03:19 -07:00
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2012-10-22 18:09:41 +10:30
|
|
|
return load_module(&info, uargs, 0);
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
}
|
2010-11-29 13:15:42 -05:00
|
|
|
|
modules: catch concurrent module loads, treat them as idempotent
This is the new-and-improved attempt at avoiding huge memory load spikes
when the user space boot sequence tries to load hundreds (or even
thousands) of redundant duplicate modules in parallel.
See commit 9828ed3f695a ("module: error out early on concurrent load of
the same module file") for background and an earlier failed attempt that
was reverted.
That earlier attempt just said "concurrently loading the same module is
silly, just open the module file exclusively and return -ETXTBSY if
somebody else is already loading it".
While it is true that concurrent module loads of the same module is
silly, the reason that earlier attempt then failed was that the
concurrently loaded module would often be a prerequisite for another
module.
Thus failing to load the prerequisite would then cause cascading
failures of the other modules, rather than just short-circuiting that
one unnecessary module load.
At the same time, we still really don't want to load the contents of the
same module file hundreds of times, only to then wait for an eventually
successful load, and have everybody else return -EEXIST.
As a result, this takes another approach, and treats concurrent module
loads from the same file as "idempotent" in the inode. So if one module
load is ongoing, we don't start a new one, but instead just wait for the
first one to complete and return the same return value as it did.
So unlike the first attempt, this does not return early: the intent is
not to speed up the boot, but to avoid a thundering herd problem in
allocating memory (both physical and virtual) for a module more than
once.
Also note that this does change behavior: it used to be that when you
had concurrent loads, you'd have one "winner" that would return success,
and everybody else would return -EEXIST.
In contrast, this idempotent logic goes all Oprah on the problem, and
says "You are a winner! And you are a winner! We are ALL winners". But
since there's no possible actual real semantic difference between "you
loaded the module" and "somebody else already loaded the module", this
is more of a feel-good change than an actual honest-to-goodness semantic
change.
Of course, any true Johnny-come-latelies that don't get caught in the
concurrency filter will still return -EEXIST. It's no different from
not even getting a seat at an Oprah taping. That's life.
See the long thread on the kernel mailing list about this all, which
includes some numbers for memory use before and after the patch.
Link: https://lore.kernel.org/lkml/20230524213620.3509138-1-mcgrof@kernel.org/
Reviewed-by: Johan Hovold <johan@kernel.org>
Tested-by: Johan Hovold <johan@kernel.org>
Tested-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Rudi Heitbaum <rudi@heitbaum..com>
Tested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-05-29 21:39:51 -04:00
|
|
|
struct idempotent {
|
|
|
|
const void *cookie;
|
|
|
|
struct hlist_node entry;
|
|
|
|
struct completion complete;
|
|
|
|
int ret;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define IDEM_HASH_BITS 8
|
|
|
|
static struct hlist_head idem_hash[1 << IDEM_HASH_BITS];
|
|
|
|
static DEFINE_SPINLOCK(idem_lock);
|
|
|
|
|
|
|
|
static bool idempotent(struct idempotent *u, const void *cookie)
|
|
|
|
{
|
|
|
|
int hash = hash_ptr(cookie, IDEM_HASH_BITS);
|
|
|
|
struct hlist_head *head = idem_hash + hash;
|
|
|
|
struct idempotent *existing;
|
|
|
|
bool first;
|
|
|
|
|
2024-08-09 08:33:28 -07:00
|
|
|
u->ret = -EINTR;
|
modules: catch concurrent module loads, treat them as idempotent
This is the new-and-improved attempt at avoiding huge memory load spikes
when the user space boot sequence tries to load hundreds (or even
thousands) of redundant duplicate modules in parallel.
See commit 9828ed3f695a ("module: error out early on concurrent load of
the same module file") for background and an earlier failed attempt that
was reverted.
That earlier attempt just said "concurrently loading the same module is
silly, just open the module file exclusively and return -ETXTBSY if
somebody else is already loading it".
While it is true that concurrent module loads of the same module is
silly, the reason that earlier attempt then failed was that the
concurrently loaded module would often be a prerequisite for another
module.
Thus failing to load the prerequisite would then cause cascading
failures of the other modules, rather than just short-circuiting that
one unnecessary module load.
At the same time, we still really don't want to load the contents of the
same module file hundreds of times, only to then wait for an eventually
successful load, and have everybody else return -EEXIST.
As a result, this takes another approach, and treats concurrent module
loads from the same file as "idempotent" in the inode. So if one module
load is ongoing, we don't start a new one, but instead just wait for the
first one to complete and return the same return value as it did.
So unlike the first attempt, this does not return early: the intent is
not to speed up the boot, but to avoid a thundering herd problem in
allocating memory (both physical and virtual) for a module more than
once.
Also note that this does change behavior: it used to be that when you
had concurrent loads, you'd have one "winner" that would return success,
and everybody else would return -EEXIST.
In contrast, this idempotent logic goes all Oprah on the problem, and
says "You are a winner! And you are a winner! We are ALL winners". But
since there's no possible actual real semantic difference between "you
loaded the module" and "somebody else already loaded the module", this
is more of a feel-good change than an actual honest-to-goodness semantic
change.
Of course, any true Johnny-come-latelies that don't get caught in the
concurrency filter will still return -EEXIST. It's no different from
not even getting a seat at an Oprah taping. That's life.
See the long thread on the kernel mailing list about this all, which
includes some numbers for memory use before and after the patch.
Link: https://lore.kernel.org/lkml/20230524213620.3509138-1-mcgrof@kernel.org/
Reviewed-by: Johan Hovold <johan@kernel.org>
Tested-by: Johan Hovold <johan@kernel.org>
Tested-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Rudi Heitbaum <rudi@heitbaum..com>
Tested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-05-29 21:39:51 -04:00
|
|
|
u->cookie = cookie;
|
|
|
|
init_completion(&u->complete);
|
|
|
|
|
|
|
|
spin_lock(&idem_lock);
|
|
|
|
first = true;
|
|
|
|
hlist_for_each_entry(existing, head, entry) {
|
|
|
|
if (existing->cookie != cookie)
|
|
|
|
continue;
|
|
|
|
first = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
hlist_add_head(&u->entry, idem_hash + hash);
|
|
|
|
spin_unlock(&idem_lock);
|
|
|
|
|
|
|
|
return !first;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We were the first one with 'cookie' on the list, and we ended
|
|
|
|
* up completing the operation. We now need to walk the list,
|
|
|
|
* remove everybody - which includes ourselves - fill in the return
|
|
|
|
* value, and then complete the operation.
|
|
|
|
*/
|
2023-07-04 06:37:32 -07:00
|
|
|
static int idempotent_complete(struct idempotent *u, int ret)
|
modules: catch concurrent module loads, treat them as idempotent
This is the new-and-improved attempt at avoiding huge memory load spikes
when the user space boot sequence tries to load hundreds (or even
thousands) of redundant duplicate modules in parallel.
See commit 9828ed3f695a ("module: error out early on concurrent load of
the same module file") for background and an earlier failed attempt that
was reverted.
That earlier attempt just said "concurrently loading the same module is
silly, just open the module file exclusively and return -ETXTBSY if
somebody else is already loading it".
While it is true that concurrent module loads of the same module is
silly, the reason that earlier attempt then failed was that the
concurrently loaded module would often be a prerequisite for another
module.
Thus failing to load the prerequisite would then cause cascading
failures of the other modules, rather than just short-circuiting that
one unnecessary module load.
At the same time, we still really don't want to load the contents of the
same module file hundreds of times, only to then wait for an eventually
successful load, and have everybody else return -EEXIST.
As a result, this takes another approach, and treats concurrent module
loads from the same file as "idempotent" in the inode. So if one module
load is ongoing, we don't start a new one, but instead just wait for the
first one to complete and return the same return value as it did.
So unlike the first attempt, this does not return early: the intent is
not to speed up the boot, but to avoid a thundering herd problem in
allocating memory (both physical and virtual) for a module more than
once.
Also note that this does change behavior: it used to be that when you
had concurrent loads, you'd have one "winner" that would return success,
and everybody else would return -EEXIST.
In contrast, this idempotent logic goes all Oprah on the problem, and
says "You are a winner! And you are a winner! We are ALL winners". But
since there's no possible actual real semantic difference between "you
loaded the module" and "somebody else already loaded the module", this
is more of a feel-good change than an actual honest-to-goodness semantic
change.
Of course, any true Johnny-come-latelies that don't get caught in the
concurrency filter will still return -EEXIST. It's no different from
not even getting a seat at an Oprah taping. That's life.
See the long thread on the kernel mailing list about this all, which
includes some numbers for memory use before and after the patch.
Link: https://lore.kernel.org/lkml/20230524213620.3509138-1-mcgrof@kernel.org/
Reviewed-by: Johan Hovold <johan@kernel.org>
Tested-by: Johan Hovold <johan@kernel.org>
Tested-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Rudi Heitbaum <rudi@heitbaum..com>
Tested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-05-29 21:39:51 -04:00
|
|
|
{
|
|
|
|
const void *cookie = u->cookie;
|
|
|
|
int hash = hash_ptr(cookie, IDEM_HASH_BITS);
|
|
|
|
struct hlist_head *head = idem_hash + hash;
|
|
|
|
struct hlist_node *next;
|
|
|
|
struct idempotent *pos;
|
|
|
|
|
|
|
|
spin_lock(&idem_lock);
|
|
|
|
hlist_for_each_entry_safe(pos, next, head, entry) {
|
|
|
|
if (pos->cookie != cookie)
|
|
|
|
continue;
|
2024-08-09 08:33:28 -07:00
|
|
|
hlist_del_init(&pos->entry);
|
modules: catch concurrent module loads, treat them as idempotent
This is the new-and-improved attempt at avoiding huge memory load spikes
when the user space boot sequence tries to load hundreds (or even
thousands) of redundant duplicate modules in parallel.
See commit 9828ed3f695a ("module: error out early on concurrent load of
the same module file") for background and an earlier failed attempt that
was reverted.
That earlier attempt just said "concurrently loading the same module is
silly, just open the module file exclusively and return -ETXTBSY if
somebody else is already loading it".
While it is true that concurrent module loads of the same module is
silly, the reason that earlier attempt then failed was that the
concurrently loaded module would often be a prerequisite for another
module.
Thus failing to load the prerequisite would then cause cascading
failures of the other modules, rather than just short-circuiting that
one unnecessary module load.
At the same time, we still really don't want to load the contents of the
same module file hundreds of times, only to then wait for an eventually
successful load, and have everybody else return -EEXIST.
As a result, this takes another approach, and treats concurrent module
loads from the same file as "idempotent" in the inode. So if one module
load is ongoing, we don't start a new one, but instead just wait for the
first one to complete and return the same return value as it did.
So unlike the first attempt, this does not return early: the intent is
not to speed up the boot, but to avoid a thundering herd problem in
allocating memory (both physical and virtual) for a module more than
once.
Also note that this does change behavior: it used to be that when you
had concurrent loads, you'd have one "winner" that would return success,
and everybody else would return -EEXIST.
In contrast, this idempotent logic goes all Oprah on the problem, and
says "You are a winner! And you are a winner! We are ALL winners". But
since there's no possible actual real semantic difference between "you
loaded the module" and "somebody else already loaded the module", this
is more of a feel-good change than an actual honest-to-goodness semantic
change.
Of course, any true Johnny-come-latelies that don't get caught in the
concurrency filter will still return -EEXIST. It's no different from
not even getting a seat at an Oprah taping. That's life.
See the long thread on the kernel mailing list about this all, which
includes some numbers for memory use before and after the patch.
Link: https://lore.kernel.org/lkml/20230524213620.3509138-1-mcgrof@kernel.org/
Reviewed-by: Johan Hovold <johan@kernel.org>
Tested-by: Johan Hovold <johan@kernel.org>
Tested-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Rudi Heitbaum <rudi@heitbaum..com>
Tested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-05-29 21:39:51 -04:00
|
|
|
pos->ret = ret;
|
|
|
|
complete(&pos->complete);
|
|
|
|
}
|
|
|
|
spin_unlock(&idem_lock);
|
2023-07-04 06:37:32 -07:00
|
|
|
return ret;
|
modules: catch concurrent module loads, treat them as idempotent
This is the new-and-improved attempt at avoiding huge memory load spikes
when the user space boot sequence tries to load hundreds (or even
thousands) of redundant duplicate modules in parallel.
See commit 9828ed3f695a ("module: error out early on concurrent load of
the same module file") for background and an earlier failed attempt that
was reverted.
That earlier attempt just said "concurrently loading the same module is
silly, just open the module file exclusively and return -ETXTBSY if
somebody else is already loading it".
While it is true that concurrent module loads of the same module is
silly, the reason that earlier attempt then failed was that the
concurrently loaded module would often be a prerequisite for another
module.
Thus failing to load the prerequisite would then cause cascading
failures of the other modules, rather than just short-circuiting that
one unnecessary module load.
At the same time, we still really don't want to load the contents of the
same module file hundreds of times, only to then wait for an eventually
successful load, and have everybody else return -EEXIST.
As a result, this takes another approach, and treats concurrent module
loads from the same file as "idempotent" in the inode. So if one module
load is ongoing, we don't start a new one, but instead just wait for the
first one to complete and return the same return value as it did.
So unlike the first attempt, this does not return early: the intent is
not to speed up the boot, but to avoid a thundering herd problem in
allocating memory (both physical and virtual) for a module more than
once.
Also note that this does change behavior: it used to be that when you
had concurrent loads, you'd have one "winner" that would return success,
and everybody else would return -EEXIST.
In contrast, this idempotent logic goes all Oprah on the problem, and
says "You are a winner! And you are a winner! We are ALL winners". But
since there's no possible actual real semantic difference between "you
loaded the module" and "somebody else already loaded the module", this
is more of a feel-good change than an actual honest-to-goodness semantic
change.
Of course, any true Johnny-come-latelies that don't get caught in the
concurrency filter will still return -EEXIST. It's no different from
not even getting a seat at an Oprah taping. That's life.
See the long thread on the kernel mailing list about this all, which
includes some numbers for memory use before and after the patch.
Link: https://lore.kernel.org/lkml/20230524213620.3509138-1-mcgrof@kernel.org/
Reviewed-by: Johan Hovold <johan@kernel.org>
Tested-by: Johan Hovold <johan@kernel.org>
Tested-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Rudi Heitbaum <rudi@heitbaum..com>
Tested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-05-29 21:39:51 -04:00
|
|
|
}
|
|
|
|
|
2024-08-09 08:33:28 -07:00
|
|
|
/*
|
|
|
|
* Wait for the idempotent worker.
|
|
|
|
*
|
|
|
|
* If we get interrupted, we need to remove ourselves from the
|
|
|
|
* the idempotent list, and the completion may still come in.
|
|
|
|
*
|
|
|
|
* The 'idem_lock' protects against the race, and 'idem.ret' was
|
|
|
|
* initialized to -EINTR and is thus always the right return
|
|
|
|
* value even if the idempotent work then completes between
|
|
|
|
* the wait_for_completion and the cleanup.
|
|
|
|
*/
|
|
|
|
static int idempotent_wait_for_completion(struct idempotent *u)
|
|
|
|
{
|
|
|
|
if (wait_for_completion_interruptible(&u->complete)) {
|
|
|
|
spin_lock(&idem_lock);
|
|
|
|
if (!hlist_unhashed(&u->entry))
|
|
|
|
hlist_del(&u->entry);
|
|
|
|
spin_unlock(&idem_lock);
|
|
|
|
}
|
|
|
|
return u->ret;
|
|
|
|
}
|
|
|
|
|
2023-05-29 20:55:13 -04:00
|
|
|
static int init_module_from_file(struct file *f, const char __user * uargs, int flags)
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 07:31:07 +10:30
|
|
|
{
|
|
|
|
struct load_info info = { };
|
2022-01-05 13:55:12 -08:00
|
|
|
void *buf = NULL;
|
2023-07-04 06:37:32 -07:00
|
|
|
int len;
|
modules: catch concurrent module loads, treat them as idempotent
This is the new-and-improved attempt at avoiding huge memory load spikes
when the user space boot sequence tries to load hundreds (or even
thousands) of redundant duplicate modules in parallel.
See commit 9828ed3f695a ("module: error out early on concurrent load of
the same module file") for background and an earlier failed attempt that
was reverted.
That earlier attempt just said "concurrently loading the same module is
silly, just open the module file exclusively and return -ETXTBSY if
somebody else is already loading it".
While it is true that concurrent module loads of the same module is
silly, the reason that earlier attempt then failed was that the
concurrently loaded module would often be a prerequisite for another
module.
Thus failing to load the prerequisite would then cause cascading
failures of the other modules, rather than just short-circuiting that
one unnecessary module load.
At the same time, we still really don't want to load the contents of the
same module file hundreds of times, only to then wait for an eventually
successful load, and have everybody else return -EEXIST.
As a result, this takes another approach, and treats concurrent module
loads from the same file as "idempotent" in the inode. So if one module
load is ongoing, we don't start a new one, but instead just wait for the
first one to complete and return the same return value as it did.
So unlike the first attempt, this does not return early: the intent is
not to speed up the boot, but to avoid a thundering herd problem in
allocating memory (both physical and virtual) for a module more than
once.
Also note that this does change behavior: it used to be that when you
had concurrent loads, you'd have one "winner" that would return success,
and everybody else would return -EEXIST.
In contrast, this idempotent logic goes all Oprah on the problem, and
says "You are a winner! And you are a winner! We are ALL winners". But
since there's no possible actual real semantic difference between "you
loaded the module" and "somebody else already loaded the module", this
is more of a feel-good change than an actual honest-to-goodness semantic
change.
Of course, any true Johnny-come-latelies that don't get caught in the
concurrency filter will still return -EEXIST. It's no different from
not even getting a seat at an Oprah taping. That's life.
See the long thread on the kernel mailing list about this all, which
includes some numbers for memory use before and after the patch.
Link: https://lore.kernel.org/lkml/20230524213620.3509138-1-mcgrof@kernel.org/
Reviewed-by: Johan Hovold <johan@kernel.org>
Tested-by: Johan Hovold <johan@kernel.org>
Tested-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Rudi Heitbaum <rudi@heitbaum..com>
Tested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-05-29 21:39:51 -04:00
|
|
|
|
2023-05-29 20:55:13 -04:00
|
|
|
len = kernel_read_file(f, 0, &buf, INT_MAX, NULL, READING_MODULE);
|
2023-03-28 20:03:19 -07:00
|
|
|
if (len < 0) {
|
|
|
|
mod_stat_inc(&failed_kreads);
|
2022-01-05 13:55:12 -08:00
|
|
|
return len;
|
2023-03-28 20:03:19 -07:00
|
|
|
}
|
2022-01-05 13:55:12 -08:00
|
|
|
|
|
|
|
if (flags & MODULE_INIT_COMPRESSED_FILE) {
|
2023-05-29 20:55:13 -04:00
|
|
|
int err = module_decompress(&info, buf, len);
|
2022-01-05 13:55:12 -08:00
|
|
|
vfree(buf); /* compressed data is no longer needed */
|
2023-03-28 20:03:19 -07:00
|
|
|
if (err) {
|
|
|
|
mod_stat_inc(&failed_decompress);
|
|
|
|
mod_stat_add_long(len, &invalid_decompress_bytes);
|
2022-01-05 13:55:12 -08:00
|
|
|
return err;
|
2023-03-28 20:03:19 -07:00
|
|
|
}
|
2022-01-05 13:55:12 -08:00
|
|
|
} else {
|
|
|
|
info.hdr = buf;
|
|
|
|
info.len = len;
|
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2023-07-04 06:37:32 -07:00
|
|
|
return load_module(&info, uargs, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int idempotent_init_module(struct file *f, const char __user * uargs, int flags)
|
|
|
|
{
|
|
|
|
struct idempotent idem;
|
|
|
|
|
2024-07-20 01:17:52 -04:00
|
|
|
if (!(f->f_mode & FMODE_READ))
|
2023-07-04 06:37:32 -07:00
|
|
|
return -EBADF;
|
|
|
|
|
module: warn about excessively long module waits
Russell King reported that the arm cbc(aes) crypto module hangs when
loaded, and Herbert Xu bisected it to commit 9b9879fc0327 ("modules:
catch concurrent module loads, treat them as idempotent"), and noted:
"So what's happening here is that the first modprobe tries to load a
fallback CBC implementation, in doing so it triggers a load of the
exact same module due to module aliases.
IOW we're loading aes-arm-bs which provides cbc(aes). However, this
needs a fallback of cbc(aes) to operate, which is made out of the
generic cbc module + any implementation of aes, or ecb(aes). The
latter happens to also be provided by aes-arm-cb so that's why it
tries to load the same module again"
So loading the aes-arm-bs module ends up wanting to recursively load
itself, and the recursive load then ends up waiting for the original
module load to complete.
This is a regression, in that it used to be that we just tried to load
the module multiple times, and then as we went on to install it the
second time we would instead just error out because the module name
already existed.
That is actually also exactly what the original "catch concurrent loads"
patch did in commit 9828ed3f695a ("module: error out early on concurrent
load of the same module file"), but it turns out that it ends up being
racy, in that erroring out before the module has been fully initialized
will cause failures in dependent module loading.
See commit ac2263b588df (which was the revert of that "error out early")
commit for details about why erroring out before the module has been
initialized is actually fundamentally racy.
Now, for the actual recursive module load (as opposed to just
concurrently loading the same module twice), the race is not an issue.
At the same time it's hard for the kernel to see that this is recursion,
because the module load is always done from a usermode helper, so the
recursion is not some simple callchain within the kernel.
End result: this is not the real fix, but this at least adds a warning
for the situation (admittedly much too late for all the debugging pain
that Russell and Herbert went through) and if we can come to a
resolution on how to detect the recursion properly, this re-organizes
the code to make that easier.
Link: https://lore.kernel.org/all/ZrFHLqvFqhzykuYw@shell.armlinux.org.uk/
Reported-by: Russell King <linux@armlinux.org.uk>
Debugged-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-08-08 12:29:40 -07:00
|
|
|
/* Are we the winners of the race and get to do this? */
|
|
|
|
if (!idempotent(&idem, file_inode(f))) {
|
|
|
|
int ret = init_module_from_file(f, uargs, flags);
|
|
|
|
return idempotent_complete(&idem, ret);
|
2023-07-04 06:37:32 -07:00
|
|
|
}
|
|
|
|
|
module: warn about excessively long module waits
Russell King reported that the arm cbc(aes) crypto module hangs when
loaded, and Herbert Xu bisected it to commit 9b9879fc0327 ("modules:
catch concurrent module loads, treat them as idempotent"), and noted:
"So what's happening here is that the first modprobe tries to load a
fallback CBC implementation, in doing so it triggers a load of the
exact same module due to module aliases.
IOW we're loading aes-arm-bs which provides cbc(aes). However, this
needs a fallback of cbc(aes) to operate, which is made out of the
generic cbc module + any implementation of aes, or ecb(aes). The
latter happens to also be provided by aes-arm-cb so that's why it
tries to load the same module again"
So loading the aes-arm-bs module ends up wanting to recursively load
itself, and the recursive load then ends up waiting for the original
module load to complete.
This is a regression, in that it used to be that we just tried to load
the module multiple times, and then as we went on to install it the
second time we would instead just error out because the module name
already existed.
That is actually also exactly what the original "catch concurrent loads"
patch did in commit 9828ed3f695a ("module: error out early on concurrent
load of the same module file"), but it turns out that it ends up being
racy, in that erroring out before the module has been fully initialized
will cause failures in dependent module loading.
See commit ac2263b588df (which was the revert of that "error out early")
commit for details about why erroring out before the module has been
initialized is actually fundamentally racy.
Now, for the actual recursive module load (as opposed to just
concurrently loading the same module twice), the race is not an issue.
At the same time it's hard for the kernel to see that this is recursion,
because the module load is always done from a usermode helper, so the
recursion is not some simple callchain within the kernel.
End result: this is not the real fix, but this at least adds a warning
for the situation (admittedly much too late for all the debugging pain
that Russell and Herbert went through) and if we can come to a
resolution on how to detect the recursion properly, this re-organizes
the code to make that easier.
Link: https://lore.kernel.org/all/ZrFHLqvFqhzykuYw@shell.armlinux.org.uk/
Reported-by: Russell King <linux@armlinux.org.uk>
Debugged-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-08-08 12:29:40 -07:00
|
|
|
/*
|
|
|
|
* Somebody else won the race and is loading the module.
|
|
|
|
*/
|
2024-08-09 08:33:28 -07:00
|
|
|
return idempotent_wait_for_completion(&idem);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2023-05-29 20:55:13 -04:00
|
|
|
SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
|
|
|
|
{
|
2024-07-19 21:19:02 -04:00
|
|
|
int err = may_init_module();
|
2023-05-29 20:55:13 -04:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
|
|
|
|
|
|
|
|
if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
|
|
|
|
|MODULE_INIT_IGNORE_VERMAGIC
|
|
|
|
|MODULE_INIT_COMPRESSED_FILE))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2024-07-19 21:19:02 -04:00
|
|
|
CLASS(fd, f)(fd);
|
2024-07-20 01:17:52 -04:00
|
|
|
if (fd_empty(f))
|
|
|
|
return -EBADF;
|
2024-07-19 21:19:02 -04:00
|
|
|
return idempotent_init_module(fd_file(f), uargs, flags);
|
2023-05-29 20:55:13 -04:00
|
|
|
}
|
|
|
|
|
2016-09-21 13:47:22 +02:00
|
|
|
/* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */
|
2022-07-14 16:39:31 +01:00
|
|
|
char *module_flags(struct module *mod, char *buf, bool show_state)
|
2006-10-11 01:21:48 -07:00
|
|
|
{
|
|
|
|
int bx = 0;
|
|
|
|
|
2013-01-12 11:38:44 +10:30
|
|
|
BUG_ON(mod->state == MODULE_STATE_UNFORMED);
|
2022-07-14 16:39:31 +01:00
|
|
|
if (!mod->taints && !show_state)
|
|
|
|
goto out;
|
2008-01-25 21:08:33 +01:00
|
|
|
if (mod->taints ||
|
|
|
|
mod->state == MODULE_STATE_GOING ||
|
|
|
|
mod->state == MODULE_STATE_COMING) {
|
2006-10-11 01:21:48 -07:00
|
|
|
buf[bx++] = '(';
|
2022-05-02 21:51:03 +01:00
|
|
|
bx += module_flags_taint(mod->taints, buf + bx);
|
2008-01-25 21:08:33 +01:00
|
|
|
/* Show a - for module-is-being-unloaded */
|
2022-07-14 16:39:31 +01:00
|
|
|
if (mod->state == MODULE_STATE_GOING && show_state)
|
2008-01-25 21:08:33 +01:00
|
|
|
buf[bx++] = '-';
|
|
|
|
/* Show a + for module-is-being-loaded */
|
2022-07-14 16:39:31 +01:00
|
|
|
if (mod->state == MODULE_STATE_COMING && show_state)
|
2008-01-25 21:08:33 +01:00
|
|
|
buf[bx++] = '+';
|
2006-10-11 01:21:48 -07:00
|
|
|
buf[bx++] = ')';
|
|
|
|
}
|
2022-07-14 16:39:31 +01:00
|
|
|
out:
|
2006-10-11 01:21:48 -07:00
|
|
|
buf[bx] = '\0';
|
|
|
|
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Given an address, look for it in the module exception tables. */
|
|
|
|
const struct exception_table_entry *search_module_extables(unsigned long addr)
|
|
|
|
{
|
|
|
|
const struct exception_table_entry *e = NULL;
|
|
|
|
struct module *mod;
|
|
|
|
|
2007-07-15 23:41:46 -07:00
|
|
|
preempt_disable();
|
2017-02-08 15:48:01 +01:00
|
|
|
mod = __module_address(addr);
|
|
|
|
if (!mod)
|
|
|
|
goto out;
|
2007-10-18 03:06:07 -07:00
|
|
|
|
2017-02-08 15:48:01 +01:00
|
|
|
if (!mod->num_exentries)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
e = search_extable(mod->extable,
|
2017-07-10 15:51:58 -07:00
|
|
|
mod->num_exentries,
|
2017-02-08 15:48:01 +01:00
|
|
|
addr);
|
|
|
|
out:
|
2007-07-15 23:41:46 -07:00
|
|
|
preempt_enable();
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2017-02-08 15:48:01 +01:00
|
|
|
/*
|
|
|
|
* Now, if we found one, we are running inside it now, hence
|
|
|
|
* we cannot unload the module, hence no refcnt needed.
|
|
|
|
*/
|
2005-04-16 15:20:36 -07:00
|
|
|
return e;
|
|
|
|
}
|
|
|
|
|
2020-11-04 23:35:51 +03:00
|
|
|
/**
|
|
|
|
* is_module_address() - is this address inside a module?
|
2009-03-31 13:05:31 -06:00
|
|
|
* @addr: the address to check.
|
|
|
|
*
|
|
|
|
* See is_module_text_address() if you simply want to see if the address
|
|
|
|
* is code (not data).
|
2006-07-03 00:24:24 -07:00
|
|
|
*/
|
2009-03-31 13:05:31 -06:00
|
|
|
bool is_module_address(unsigned long addr)
|
2006-07-03 00:24:24 -07:00
|
|
|
{
|
2009-03-31 13:05:31 -06:00
|
|
|
bool ret;
|
2006-07-03 00:24:24 -07:00
|
|
|
|
2007-07-15 23:41:46 -07:00
|
|
|
preempt_disable();
|
2009-03-31 13:05:31 -06:00
|
|
|
ret = __module_address(addr) != NULL;
|
2007-07-15 23:41:46 -07:00
|
|
|
preempt_enable();
|
2006-07-03 00:24:24 -07:00
|
|
|
|
2009-03-31 13:05:31 -06:00
|
|
|
return ret;
|
2006-07-03 00:24:24 -07:00
|
|
|
}
|
|
|
|
|
2020-11-04 23:35:51 +03:00
|
|
|
/**
|
|
|
|
* __module_address() - get the module which contains an address.
|
2009-03-31 13:05:31 -06:00
|
|
|
* @addr: the address.
|
|
|
|
*
|
|
|
|
* Must be called with preempt disabled or module mutex held so that
|
|
|
|
* module doesn't get freed during this.
|
|
|
|
*/
|
2009-04-05 11:04:19 -07:00
|
|
|
struct module *__module_address(unsigned long addr)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
struct module *mod;
|
|
|
|
|
2022-02-23 13:02:14 +01:00
|
|
|
if (addr >= mod_tree.addr_min && addr <= mod_tree.addr_max)
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
goto lookup;
|
|
|
|
|
2022-02-23 13:02:14 +01:00
|
|
|
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
if (addr >= mod_tree.data_addr_min && addr <= mod_tree.data_addr_max)
|
|
|
|
goto lookup;
|
2022-02-23 13:02:14 +01:00
|
|
|
#endif
|
2008-07-22 19:24:28 -05:00
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
lookup:
|
2015-05-27 11:09:35 +09:30
|
|
|
module_assert_mutex_or_preempt();
|
|
|
|
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
mod = mod_find(addr, &mod_tree);
|
2015-05-27 11:09:37 +09:30
|
|
|
if (mod) {
|
|
|
|
BUG_ON(!within_module(addr, mod));
|
2013-01-12 11:38:44 +10:30
|
|
|
if (mod->state == MODULE_STATE_UNFORMED)
|
2015-05-27 11:09:37 +09:30
|
|
|
mod = NULL;
|
2013-01-12 11:38:44 +10:30
|
|
|
}
|
2015-05-27 11:09:37 +09:30
|
|
|
return mod;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2020-11-04 23:35:51 +03:00
|
|
|
/**
|
|
|
|
* is_module_text_address() - is this address inside module code?
|
2009-03-31 13:05:31 -06:00
|
|
|
* @addr: the address to check.
|
|
|
|
*
|
|
|
|
* See is_module_address() if you simply want to see if the address is
|
|
|
|
* anywhere in a module. See kernel_text_address() for testing if an
|
|
|
|
* address corresponds to kernel or module code.
|
|
|
|
*/
|
|
|
|
bool is_module_text_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
bool ret;
|
|
|
|
|
|
|
|
preempt_disable();
|
|
|
|
ret = __module_text_address(addr) != NULL;
|
|
|
|
preempt_enable();
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-11-04 23:35:51 +03:00
|
|
|
/**
|
|
|
|
* __module_text_address() - get the module whose code contains an address.
|
2009-03-31 13:05:31 -06:00
|
|
|
* @addr: the address.
|
|
|
|
*
|
|
|
|
* Must be called with preempt disabled or module mutex held so that
|
|
|
|
* module doesn't get freed during this.
|
|
|
|
*/
|
|
|
|
struct module *__module_text_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
struct module *mod = __module_address(addr);
|
|
|
|
if (mod) {
|
|
|
|
/* Make sure it's within the text section. */
|
module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:
1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
obvious how these data are used (are they RO, RX, or RW?)
Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:
MOD_TEXT,
MOD_DATA,
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_INIT_TEXT,
MOD_INIT_DATA,
MOD_INIT_RODATA,
and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.
Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.
Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-02-06 16:28:02 -08:00
|
|
|
if (!within_module_mem_type(addr, mod, MOD_TEXT) &&
|
|
|
|
!within_module_mem_type(addr, mod, MOD_INIT_TEXT))
|
2009-03-31 13:05:31 -06:00
|
|
|
mod = NULL;
|
|
|
|
}
|
|
|
|
return mod;
|
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/* Don't grab lock, we're oopsing. */
|
|
|
|
void print_modules(void)
|
|
|
|
{
|
|
|
|
struct module *mod;
|
2016-09-21 13:47:22 +02:00
|
|
|
char buf[MODULE_FLAGS_BUF_SIZE];
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2009-06-16 11:07:14 -07:00
|
|
|
printk(KERN_DEFAULT "Modules linked in:");
|
2008-08-30 10:09:00 +02:00
|
|
|
/* Most callers should already have preempt disabled, but make sure */
|
|
|
|
preempt_disable();
|
2013-01-12 11:38:44 +10:30
|
|
|
list_for_each_entry_rcu(mod, &modules, list) {
|
|
|
|
if (mod->state == MODULE_STATE_UNFORMED)
|
|
|
|
continue;
|
2022-07-14 16:39:31 +01:00
|
|
|
pr_cont(" %s%s", mod->name, module_flags(mod, buf, true));
|
2013-01-12 11:38:44 +10:30
|
|
|
}
|
2022-05-02 21:52:52 +01:00
|
|
|
|
|
|
|
print_unloaded_tainted_modules();
|
2008-08-30 10:09:00 +02:00
|
|
|
preempt_enable();
|
2022-07-14 16:39:33 +01:00
|
|
|
if (last_unloaded_module.name[0])
|
|
|
|
pr_cont(" [last unloaded: %s%s]", last_unloaded_module.name,
|
|
|
|
last_unloaded_module.taints);
|
2014-02-03 11:13:13 +10:30
|
|
|
pr_cont("\n");
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
2023-03-28 20:03:19 -07:00
|
|
|
|
|
|
|
#ifdef CONFIG_MODULE_DEBUGFS
|
|
|
|
struct dentry *mod_debugfs_root;
|
|
|
|
|
|
|
|
static int module_debugfs_init(void)
|
|
|
|
{
|
|
|
|
mod_debugfs_root = debugfs_create_dir("modules", NULL);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
module_init(module_debugfs_init);
|
|
|
|
#endif
|