From 239d87327dcd361b0098038995f8908f3296864f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 12 Dec 2024 17:28:06 -0800 Subject: [PATCH 1/3] fortify: Hide run-time copy size from value range tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC performs value range tracking for variables as a way to provide better diagnostics. One place this is regularly seen is with warnings associated with bounds-checking, e.g. -Wstringop-overflow, -Wstringop-overread, -Warray-bounds, etc. In order to keep the signal-to-noise ratio high, warnings aren't emitted when a value range spans the entire value range representable by a given variable. For example: unsigned int len; char dst[8]; ... memcpy(dst, src, len); If len's value is unknown, it has the full "unsigned int" range of [0, UINT_MAX], and GCC's compile-time bounds checks against memcpy() will be ignored. However, when a code path has been able to narrow the range: if (len > 16) return; memcpy(dst, src, len); Then the range will be updated for the execution path. Above, len is now [0, 16] when reading memcpy(), so depending on other optimizations, we might see a -Wstringop-overflow warning like: error: '__builtin_memcpy' writing between 9 and 16 bytes into region of size 8 [-Werror=stringop-overflow] When building with CONFIG_FORTIFY_SOURCE, the fortified run-time bounds checking can appear to narrow value ranges of lengths for memcpy(), depending on how the compiler constructs the execution paths during optimization passes, due to the checks against the field sizes. For example: if (p_size_field != SIZE_MAX && p_size != p_size_field && p_size_field < size) As intentionally designed, these checks only affect the kernel warnings emitted at run-time and do not block the potentially overflowing memcpy(), so GCC thinks it needs to produce a warning about the resulting value range that might be reaching the memcpy(). We have seen this manifest a few times now, with the most recent being with cpumasks: In function ‘bitmap_copy’, inlined from ‘cpumask_copy’ at ./include/linux/cpumask.h:839:2, inlined from ‘__padata_set_cpumasks’ at kernel/padata.c:730:2: ./include/linux/fortify-string.h:114:33: error: ‘__builtin_memcpy’ reading between 257 and 536870904 bytes from a region of size 256 [-Werror=stringop-overread] 114 | #define __underlying_memcpy __builtin_memcpy | ^ ./include/linux/fortify-string.h:633:9: note: in expansion of macro ‘__underlying_memcpy’ 633 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ ./include/linux/fortify-string.h:678:26: note: in expansion of macro ‘__fortify_memcpy_chk’ 678 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ ./include/linux/bitmap.h:259:17: note: in expansion of macro ‘memcpy’ 259 | memcpy(dst, src, len); | ^~~~~~ kernel/padata.c: In function ‘__padata_set_cpumasks’: kernel/padata.c:713:48: note: source object ‘pcpumask’ of size [0, 256] 713 | cpumask_var_t pcpumask, | ~~~~~~~~~~~~~~^~~~~~~~ This warning is _not_ emitted when CONFIG_FORTIFY_SOURCE is disabled, and with the recent -fdiagnostics-details we can confirm the origin of the warning is due to FORTIFY's bounds checking: ../include/linux/bitmap.h:259:17: note: in expansion of macro 'memcpy' 259 | memcpy(dst, src, len); | ^~~~~~ '__padata_set_cpumasks': events 1-2 ../include/linux/fortify-string.h:613:36: 612 | if (p_size_field != SIZE_MAX && | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 613 | p_size != p_size_field && p_size_field < size) | ~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~ | | | (1) when the condition is evaluated to false | (2) when the condition is evaluated to true '__padata_set_cpumasks': event 3 114 | #define __underlying_memcpy __builtin_memcpy | ^ | | | (3) out of array bounds here Note that the cpumask warning started appearing since bitmap functions were recently marked __always_inline in commit ed8cd2b3bd9f ("bitmap: Switch from inline to __always_inline"), which allowed GCC to gain visibility into the variables as they passed through the FORTIFY implementation. In order to silence these false positives but keep otherwise deterministic compile-time warnings intact, hide the length variable from GCC with OPTIMIZE_HIDE_VAR() before calling the builtin memcpy. Additionally add a comment about why all the macro args have copies with const storage. Reported-by: "Thomas Weißschuh" Closes: https://lore.kernel.org/all/db7190c8-d17f-4a0d-bc2f-5903c79f36c2@t-8ch.de/ Reported-by: Nilay Shroff Closes: https://lore.kernel.org/all/20241112124127.1666300-1-nilay@linux.ibm.com/ Tested-by: Nilay Shroff Acked-by: Yury Norov Acked-by: Greg Kroah-Hartman Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 0d99bf11d260..e4ce1cae03bf 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -616,6 +616,12 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, return false; } +/* + * To work around what seems to be an optimizer bug, the macro arguments + * need to have const copies or the values end up changed by the time they + * reach fortify_warn_once(). See commit 6f7630b1b5bc ("fortify: Capture + * __bos() results in const temp vars") for more details. + */ #define __fortify_memcpy_chk(p, q, size, p_size, q_size, \ p_size_field, q_size_field, op) ({ \ const size_t __fortify_size = (size_t)(size); \ @@ -623,6 +629,8 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t __q_size = (q_size); \ const size_t __p_size_field = (p_size_field); \ const size_t __q_size_field = (q_size_field); \ + /* Keep a mutable version of the size for the final copy. */ \ + size_t __copy_size = __fortify_size; \ fortify_warn_once(fortify_memcpy_chk(__fortify_size, __p_size, \ __q_size, __p_size_field, \ __q_size_field, FORTIFY_FUNC_ ##op), \ @@ -630,7 +638,11 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, __fortify_size, \ "field \"" #p "\" at " FILE_LINE, \ __p_size_field); \ - __underlying_##op(p, q, __fortify_size); \ + /* Hide only the run-time size from value range tracking to */ \ + /* silence compile-time false positive bounds warnings. */ \ + if (!__builtin_constant_p(__copy_size)) \ + OPTIMIZER_HIDE_VAR(__copy_size); \ + __underlying_##op(p, q, __copy_size); \ }) /* From 57a6baf3a3ea61159a149573258a0a92f3a3d6dd Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 8 Nov 2024 12:34:24 +0100 Subject: [PATCH 2/3] tracing: Add task_prctl_unknown tracepoint prctl() is a complex syscall which multiplexes its functionality based on a large set of PR_* options. Currently we count 64 such options. The return value of unknown options is -EINVAL, and doesn't distinguish from known options that were passed invalid args that also return -EINVAL. To understand if programs are attempting to use prctl() options not yet available on the running kernel, provide the task_prctl_unknown tracepoint. Note, this tracepoint is in an unlikely cold path, and would therefore be suitable for continuous monitoring (e.g. via perf_event_open). While the above is likely the simplest usecase, additionally this tracepoint can help unlock some testing scenarios (where probing sys_enter or sys_exit causes undesirable performance overheads): a. unprivileged triggering of a test module: test modules may register a probe to be called back on task_prctl_unknown, and pick a very large unknown prctl() option upon which they perform a test function for an unprivileged user; b. unprivileged triggering of an eBPF program function: similar as idea (a). Example trace_pipe output: test-380 [001] ..... 78.142904: task_prctl_unknown: option=1234 arg2=101 arg3=102 arg4=103 arg5=104 Signed-off-by: Marco Elver Reviewed-by: Alexander Potapenko Link: https://lore.kernel.org/r/20241108113455.2924361-1-elver@google.com Signed-off-by: Kees Cook --- include/trace/events/task.h | 37 +++++++++++++++++++++++++++++++++++++ kernel/sys.c | 3 +++ 2 files changed, 40 insertions(+) diff --git a/include/trace/events/task.h b/include/trace/events/task.h index 47b527464d1a..209d315852fb 100644 --- a/include/trace/events/task.h +++ b/include/trace/events/task.h @@ -56,6 +56,43 @@ TRACE_EVENT(task_rename, __entry->newcomm, __entry->oom_score_adj) ); +/** + * task_prctl_unknown - called on unknown prctl() option + * @option: option passed + * @arg2: arg2 passed + * @arg3: arg3 passed + * @arg4: arg4 passed + * @arg5: arg5 passed + * + * Called on an unknown prctl() option. + */ +TRACE_EVENT(task_prctl_unknown, + + TP_PROTO(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5), + + TP_ARGS(option, arg2, arg3, arg4, arg5), + + TP_STRUCT__entry( + __field( int, option) + __field( unsigned long, arg2) + __field( unsigned long, arg3) + __field( unsigned long, arg4) + __field( unsigned long, arg5) + ), + + TP_fast_assign( + __entry->option = option; + __entry->arg2 = arg2; + __entry->arg3 = arg3; + __entry->arg4 = arg4; + __entry->arg5 = arg5; + ), + + TP_printk("option=%d arg2=%ld arg3=%ld arg4=%ld arg5=%ld", + __entry->option, __entry->arg2, __entry->arg3, __entry->arg4, __entry->arg5) +); + #endif /* This part must be outside protection */ diff --git a/kernel/sys.c b/kernel/sys.c index 4da31f28fda8..b366cef102ec 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -75,6 +75,8 @@ #include #include +#include + #include "uid16.h" #ifndef SET_UNALIGN_CTL @@ -2785,6 +2787,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3); break; default: + trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); error = -EINVAL; break; } From a6115cceb1dd61974410979e278dd3f369a7f566 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 8 Nov 2024 12:34:25 +0100 Subject: [PATCH 3/3] tracing: Remove pid in task_rename tracing output Remove pid in task_rename tracepoint output, since that tracepoint only deals with the current task, and is printed by default. This also saves some space in the entry and avoids wasted padding. Link: https://lkml.kernel.org/r/20241105120247.596a0dc9@gandalf.local.home Suggested-by: Steven Rostedt Signed-off-by: Marco Elver Link: https://lore.kernel.org/r/20241108113455.2924361-2-elver@google.com Signed-off-by: Kees Cook --- include/trace/events/task.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/include/trace/events/task.h b/include/trace/events/task.h index 209d315852fb..af535b053033 100644 --- a/include/trace/events/task.h +++ b/include/trace/events/task.h @@ -38,22 +38,19 @@ TRACE_EVENT(task_rename, TP_ARGS(task, comm), TP_STRUCT__entry( - __field( pid_t, pid) __array( char, oldcomm, TASK_COMM_LEN) __array( char, newcomm, TASK_COMM_LEN) __field( short, oom_score_adj) ), TP_fast_assign( - __entry->pid = task->pid; memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN); strscpy(entry->newcomm, comm, TASK_COMM_LEN); __entry->oom_score_adj = task->signal->oom_score_adj; ), - TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%hd", - __entry->pid, __entry->oldcomm, - __entry->newcomm, __entry->oom_score_adj) + TP_printk("oldcomm=%s newcomm=%s oom_score_adj=%hd", + __entry->oldcomm, __entry->newcomm, __entry->oom_score_adj) ); /**