2019-06-03 07:44:50 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
|
* Stack tracing support
|
|
|
|
|
*
|
|
|
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
|
|
|
*/
|
|
|
|
|
#include <linux/kernel.h>
|
2022-12-09 12:10:13 +01:00
|
|
|
#include <linux/efi.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <linux/export.h>
|
2024-02-01 12:52:24 +00:00
|
|
|
#include <linux/filter.h>
|
2015-12-15 17:33:41 +09:00
|
|
|
#include <linux/ftrace.h>
|
2023-11-24 11:05:10 +00:00
|
|
|
#include <linux/kprobes.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <linux/sched.h>
|
2017-02-08 18:51:35 +01:00
|
|
|
#include <linux/sched/debug.h>
|
2017-02-08 18:51:37 +01:00
|
|
|
#include <linux/sched/task_stack.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <linux/stacktrace.h>
|
|
|
|
|
|
2022-12-09 12:10:13 +01:00
|
|
|
#include <asm/efi.h>
|
2015-12-04 11:02:26 +00:00
|
|
|
#include <asm/irq.h>
|
2016-11-03 20:23:05 +00:00
|
|
|
#include <asm/stack_pointer.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <asm/stacktrace.h>
|
|
|
|
|
|
2024-10-17 10:25:35 +01:00
|
|
|
enum kunwind_source {
|
|
|
|
|
KUNWIND_SOURCE_UNKNOWN,
|
|
|
|
|
KUNWIND_SOURCE_FRAME,
|
|
|
|
|
KUNWIND_SOURCE_CALLER,
|
|
|
|
|
KUNWIND_SOURCE_TASK,
|
|
|
|
|
KUNWIND_SOURCE_REGS_PC,
|
|
|
|
|
};
|
|
|
|
|
|
2024-10-17 10:25:36 +01:00
|
|
|
union unwind_flags {
|
|
|
|
|
unsigned long all;
|
|
|
|
|
struct {
|
|
|
|
|
unsigned long fgraph : 1,
|
|
|
|
|
kretprobe : 1;
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
2023-11-24 11:05:10 +00:00
|
|
|
/*
|
|
|
|
|
* Kernel unwind state
|
|
|
|
|
*
|
|
|
|
|
* @common: Common unwind state.
|
|
|
|
|
* @task: The task being unwound.
|
2024-06-18 16:23:42 +00:00
|
|
|
* @graph_idx: Used by ftrace_graph_ret_addr() for optimized stack unwinding.
|
2023-11-24 11:05:10 +00:00
|
|
|
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
|
|
|
|
|
* associated with the most recently encountered replacement lr
|
|
|
|
|
* value.
|
|
|
|
|
*/
|
|
|
|
|
struct kunwind_state {
|
|
|
|
|
struct unwind_state common;
|
|
|
|
|
struct task_struct *task;
|
2024-06-18 16:23:42 +00:00
|
|
|
int graph_idx;
|
2023-11-24 11:05:10 +00:00
|
|
|
#ifdef CONFIG_KRETPROBES
|
|
|
|
|
struct llist_node *kr_cur;
|
|
|
|
|
#endif
|
2024-10-17 10:25:35 +01:00
|
|
|
enum kunwind_source source;
|
2024-10-17 10:25:36 +01:00
|
|
|
union unwind_flags flags;
|
arm64: stacktrace: unwind exception boundaries
When arm64's stack unwinder encounters an exception boundary, it uses
the pt_regs::stackframe created by the entry code, which has a copy of
the PC and FP at the time the exception was taken. The unwinder doesn't
know anything about pt_regs, and reports the PC from the stackframe, but
does not report the LR.
The LR is only guaranteed to contain the return address at function call
boundaries, and can be used as a scratch register at other times, so the
LR at an exception boundary may or may not be a legitimate return
address. It would be useful to report the LR value regardless, as it can
be helpful when debugging, and in future it will be helpful for reliable
stacktrace support.
This patch changes the way we unwind across exception boundaries,
allowing both the PC and LR to be reported. The entry code creates a
frame_record_meta structure embedded within pt_regs, which the unwinder
uses to find the pt_regs. The unwinder can then extract pt_regs::pc and
pt_regs::lr as two separate unwind steps before continuing with a
regular walk of frame records.
When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
with an "L" marker so that it can be identified easily. For example,
an unwind across an exception boundary will appear as follows:
| el1h_64_irq+0x6c/0x70
| _raw_spin_unlock_irqrestore+0x10/0x60 (P)
| __aarch64_insn_write+0x6c/0x90 (L)
| aarch64_insn_patch_text_nosync+0x28/0x80
... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.
Note that the LR may be stale at the point of the exception, for example,
shortly after a return:
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
... where the LR points a few instructions before the current PC.
This plays nicely with all the other unwind metadata tracking. With the
ftrace_graph profiler enabled globally, and kretprobes installed on
generic_handle_domain_irq() and do_interrupt_handler(), a backtrace triggered
by magic-sysrq + L reports:
| Call trace:
| show_stack+0x20/0x40 (CF)
| dump_stack_lvl+0x60/0x80 (F)
| dump_stack+0x18/0x28
| nmi_cpu_backtrace+0xfc/0x140
| nmi_trigger_cpumask_backtrace+0x1c8/0x200
| arch_trigger_cpumask_backtrace+0x20/0x40
| sysrq_handle_showallcpus+0x24/0x38 (F)
| __handle_sysrq+0xa8/0x1b0 (F)
| handle_sysrq+0x38/0x50 (F)
| pl011_int+0x460/0x5a8 (F)
| __handle_irq_event_percpu+0x60/0x220 (F)
| handle_irq_event+0x54/0xc0 (F)
| handle_fasteoi_irq+0xa8/0x1d0 (F)
| generic_handle_domain_irq+0x34/0x58 (F)
| gic_handle_irq+0x54/0x140 (FK)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0xa0
| el1_interrupt+0x34/0x68 (FK)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
| cpu_startup_entry+0x3c/0x50 (F)
| rest_init+0xe4/0xf0
| start_kernel+0x744/0x750
| __primary_switched+0x88/0x98
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Puranjay Mohan <puranjay12@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241017092538.1859841-11-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-10-17 10:25:38 +01:00
|
|
|
struct pt_regs *regs;
|
2023-11-24 11:05:10 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static __always_inline void
|
|
|
|
|
kunwind_init(struct kunwind_state *state,
|
|
|
|
|
struct task_struct *task)
|
|
|
|
|
{
|
|
|
|
|
unwind_init_common(&state->common);
|
|
|
|
|
state->task = task;
|
2024-10-17 10:25:35 +01:00
|
|
|
state->source = KUNWIND_SOURCE_UNKNOWN;
|
2024-10-17 10:25:36 +01:00
|
|
|
state->flags.all = 0;
|
arm64: stacktrace: unwind exception boundaries
When arm64's stack unwinder encounters an exception boundary, it uses
the pt_regs::stackframe created by the entry code, which has a copy of
the PC and FP at the time the exception was taken. The unwinder doesn't
know anything about pt_regs, and reports the PC from the stackframe, but
does not report the LR.
The LR is only guaranteed to contain the return address at function call
boundaries, and can be used as a scratch register at other times, so the
LR at an exception boundary may or may not be a legitimate return
address. It would be useful to report the LR value regardless, as it can
be helpful when debugging, and in future it will be helpful for reliable
stacktrace support.
This patch changes the way we unwind across exception boundaries,
allowing both the PC and LR to be reported. The entry code creates a
frame_record_meta structure embedded within pt_regs, which the unwinder
uses to find the pt_regs. The unwinder can then extract pt_regs::pc and
pt_regs::lr as two separate unwind steps before continuing with a
regular walk of frame records.
When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
with an "L" marker so that it can be identified easily. For example,
an unwind across an exception boundary will appear as follows:
| el1h_64_irq+0x6c/0x70
| _raw_spin_unlock_irqrestore+0x10/0x60 (P)
| __aarch64_insn_write+0x6c/0x90 (L)
| aarch64_insn_patch_text_nosync+0x28/0x80
... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.
Note that the LR may be stale at the point of the exception, for example,
shortly after a return:
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
... where the LR points a few instructions before the current PC.
This plays nicely with all the other unwind metadata tracking. With the
ftrace_graph profiler enabled globally, and kretprobes installed on
generic_handle_domain_irq() and do_interrupt_handler(), a backtrace triggered
by magic-sysrq + L reports:
| Call trace:
| show_stack+0x20/0x40 (CF)
| dump_stack_lvl+0x60/0x80 (F)
| dump_stack+0x18/0x28
| nmi_cpu_backtrace+0xfc/0x140
| nmi_trigger_cpumask_backtrace+0x1c8/0x200
| arch_trigger_cpumask_backtrace+0x20/0x40
| sysrq_handle_showallcpus+0x24/0x38 (F)
| __handle_sysrq+0xa8/0x1b0 (F)
| handle_sysrq+0x38/0x50 (F)
| pl011_int+0x460/0x5a8 (F)
| __handle_irq_event_percpu+0x60/0x220 (F)
| handle_irq_event+0x54/0xc0 (F)
| handle_fasteoi_irq+0xa8/0x1d0 (F)
| generic_handle_domain_irq+0x34/0x58 (F)
| gic_handle_irq+0x54/0x140 (FK)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0xa0
| el1_interrupt+0x34/0x68 (FK)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
| cpu_startup_entry+0x3c/0x50 (F)
| rest_init+0xe4/0xf0
| start_kernel+0x744/0x750
| __primary_switched+0x88/0x98
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Puranjay Mohan <puranjay12@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241017092538.1859841-11-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-10-17 10:25:38 +01:00
|
|
|
state->regs = NULL;
|
2023-11-24 11:05:10 +00:00
|
|
|
}
|
|
|
|
|
|
2022-06-17 13:02:14 -05:00
|
|
|
/*
|
|
|
|
|
* Start an unwind from a pt_regs.
|
|
|
|
|
*
|
|
|
|
|
* The unwind will begin at the PC within the regs.
|
|
|
|
|
*
|
|
|
|
|
* The regs must be on a stack currently owned by the calling task.
|
|
|
|
|
*/
|
2023-04-11 17:29:43 +01:00
|
|
|
static __always_inline void
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_init_from_regs(struct kunwind_state *state,
|
|
|
|
|
struct pt_regs *regs)
|
2022-06-17 13:02:14 -05:00
|
|
|
{
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_init(state, current);
|
2022-06-17 13:02:14 -05:00
|
|
|
|
arm64: stacktrace: unwind exception boundaries
When arm64's stack unwinder encounters an exception boundary, it uses
the pt_regs::stackframe created by the entry code, which has a copy of
the PC and FP at the time the exception was taken. The unwinder doesn't
know anything about pt_regs, and reports the PC from the stackframe, but
does not report the LR.
The LR is only guaranteed to contain the return address at function call
boundaries, and can be used as a scratch register at other times, so the
LR at an exception boundary may or may not be a legitimate return
address. It would be useful to report the LR value regardless, as it can
be helpful when debugging, and in future it will be helpful for reliable
stacktrace support.
This patch changes the way we unwind across exception boundaries,
allowing both the PC and LR to be reported. The entry code creates a
frame_record_meta structure embedded within pt_regs, which the unwinder
uses to find the pt_regs. The unwinder can then extract pt_regs::pc and
pt_regs::lr as two separate unwind steps before continuing with a
regular walk of frame records.
When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
with an "L" marker so that it can be identified easily. For example,
an unwind across an exception boundary will appear as follows:
| el1h_64_irq+0x6c/0x70
| _raw_spin_unlock_irqrestore+0x10/0x60 (P)
| __aarch64_insn_write+0x6c/0x90 (L)
| aarch64_insn_patch_text_nosync+0x28/0x80
... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.
Note that the LR may be stale at the point of the exception, for example,
shortly after a return:
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
... where the LR points a few instructions before the current PC.
This plays nicely with all the other unwind metadata tracking. With the
ftrace_graph profiler enabled globally, and kretprobes installed on
generic_handle_domain_irq() and do_interrupt_handler(), a backtrace triggered
by magic-sysrq + L reports:
| Call trace:
| show_stack+0x20/0x40 (CF)
| dump_stack_lvl+0x60/0x80 (F)
| dump_stack+0x18/0x28
| nmi_cpu_backtrace+0xfc/0x140
| nmi_trigger_cpumask_backtrace+0x1c8/0x200
| arch_trigger_cpumask_backtrace+0x20/0x40
| sysrq_handle_showallcpus+0x24/0x38 (F)
| __handle_sysrq+0xa8/0x1b0 (F)
| handle_sysrq+0x38/0x50 (F)
| pl011_int+0x460/0x5a8 (F)
| __handle_irq_event_percpu+0x60/0x220 (F)
| handle_irq_event+0x54/0xc0 (F)
| handle_fasteoi_irq+0xa8/0x1d0 (F)
| generic_handle_domain_irq+0x34/0x58 (F)
| gic_handle_irq+0x54/0x140 (FK)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0xa0
| el1_interrupt+0x34/0x68 (FK)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
| cpu_startup_entry+0x3c/0x50 (F)
| rest_init+0xe4/0xf0
| start_kernel+0x744/0x750
| __primary_switched+0x88/0x98
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Puranjay Mohan <puranjay12@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241017092538.1859841-11-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-10-17 10:25:38 +01:00
|
|
|
state->regs = regs;
|
2023-11-24 11:05:10 +00:00
|
|
|
state->common.fp = regs->regs[29];
|
|
|
|
|
state->common.pc = regs->pc;
|
2024-10-17 10:25:35 +01:00
|
|
|
state->source = KUNWIND_SOURCE_REGS_PC;
|
2022-06-17 13:02:14 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Start an unwind from a caller.
|
|
|
|
|
*
|
|
|
|
|
* The unwind will begin at the caller of whichever function this is inlined
|
|
|
|
|
* into.
|
|
|
|
|
*
|
|
|
|
|
* The function which invokes this must be noinline.
|
|
|
|
|
*/
|
2023-04-11 17:29:43 +01:00
|
|
|
static __always_inline void
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_init_from_caller(struct kunwind_state *state)
|
2022-06-17 13:02:14 -05:00
|
|
|
{
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_init(state, current);
|
2022-06-17 13:02:14 -05:00
|
|
|
|
2023-11-24 11:05:10 +00:00
|
|
|
state->common.fp = (unsigned long)__builtin_frame_address(1);
|
|
|
|
|
state->common.pc = (unsigned long)__builtin_return_address(0);
|
2024-10-17 10:25:35 +01:00
|
|
|
state->source = KUNWIND_SOURCE_CALLER;
|
2022-06-17 13:02:14 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Start an unwind from a blocked task.
|
|
|
|
|
*
|
|
|
|
|
* The unwind will begin at the blocked tasks saved PC (i.e. the caller of
|
|
|
|
|
* cpu_switch_to()).
|
|
|
|
|
*
|
|
|
|
|
* The caller should ensure the task is blocked in cpu_switch_to() for the
|
|
|
|
|
* duration of the unwind, or the unwind will be bogus. It is never valid to
|
|
|
|
|
* call this for the current task.
|
|
|
|
|
*/
|
2023-04-11 17:29:43 +01:00
|
|
|
static __always_inline void
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_init_from_task(struct kunwind_state *state,
|
|
|
|
|
struct task_struct *task)
|
2022-06-17 13:02:14 -05:00
|
|
|
{
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_init(state, task);
|
2022-06-17 13:02:14 -05:00
|
|
|
|
2023-11-24 11:05:10 +00:00
|
|
|
state->common.fp = thread_saved_fp(task);
|
|
|
|
|
state->common.pc = thread_saved_pc(task);
|
2024-10-17 10:25:35 +01:00
|
|
|
state->source = KUNWIND_SOURCE_TASK;
|
2022-06-17 13:02:14 -05:00
|
|
|
}
|
2021-03-19 17:40:22 +00:00
|
|
|
|
2023-04-11 17:29:41 +01:00
|
|
|
static __always_inline int
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_recover_return_address(struct kunwind_state *state)
|
2023-04-11 17:29:41 +01:00
|
|
|
{
|
|
|
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
|
|
|
if (state->task->ret_stack &&
|
2023-11-24 11:05:10 +00:00
|
|
|
(state->common.pc == (unsigned long)return_to_handler)) {
|
2023-04-11 17:29:41 +01:00
|
|
|
unsigned long orig_pc;
|
2024-06-18 16:23:42 +00:00
|
|
|
orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
|
2023-11-24 11:05:10 +00:00
|
|
|
state->common.pc,
|
|
|
|
|
(void *)state->common.fp);
|
arm64: stacktrace: Don't WARN when unwinding other tasks
The arm64 stacktrace code has a few error conditions where a
WARN_ON_ONCE() is triggered before the stacktrace is terminated and an
error is returned to the caller. The conditions shouldn't be triggered
when unwinding the current task, but it is possible to trigger these
when unwinding another task which is not blocked, as the stack of that
task is concurrently modified. Kent reports that these warnings can be
triggered while running filesystem tests on bcachefs, which calls the
stacktrace code directly.
To produce a meaningful stacktrace of another task, the task in question
should be blocked, but the stacktrace code is expected to be robust to
cases where it is not blocked. Note that this is purely about not
unuduly scaring the user and/or crashing the kernel; stacktraces in such
cases are meaningless and may leak kernel secrets from the stack of the
task being unwound.
Ideally we'd pin the task in a blocked state during the unwind, as we do
for /proc/${PID}/wchan since commit:
42a20f86dc19f928 ("sched: Add wrapper for get_wchan() to keep task blocked")
... but a bunch of places don't do that, notably /proc/${PID}/stack,
where we don't pin the task in a blocked state, but do restrict the
output to privileged users since commit:
f8a00cef17206ecd ("proc: restrict kernel stack dumps to root")
... and so it's possible to trigger these warnings accidentally, e.g. by
reading /proc/*/stack (as root):
| for n in $(seq 1 10); do
| while true; do cat /proc/*/stack > /dev/null 2>&1; done &
| done
| ------------[ cut here ]------------
| WARNING: CPU: 3 PID: 166 at arch/arm64/kernel/stacktrace.c:207 arch_stack_walk+0x1c8/0x370
| Modules linked in:
| CPU: 3 UID: 0 PID: 166 Comm: cat Not tainted 6.13.0-rc2-00003-g3dafa7a7925d #2
| Hardware name: linux,dummy-virt (DT)
| pstate: 81400005 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
| pc : arch_stack_walk+0x1c8/0x370
| lr : arch_stack_walk+0x1b0/0x370
| sp : ffff800080773890
| x29: ffff800080773930 x28: fff0000005c44500 x27: fff00000058fa038
| x26: 000000007ffff000 x25: 0000000000000000 x24: 0000000000000000
| x23: ffffa35a8d9600ec x22: 0000000000000000 x21: fff00000043a33c0
| x20: ffff800080773970 x19: ffffa35a8d960168 x18: 0000000000000000
| x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
| x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
| x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
| x8 : ffff8000807738e0 x7 : ffff8000806e3800 x6 : ffff8000806e3818
| x5 : ffff800080773920 x4 : ffff8000806e4000 x3 : ffff8000807738e0
| x2 : 0000000000000018 x1 : ffff8000806e3800 x0 : 0000000000000000
| Call trace:
| arch_stack_walk+0x1c8/0x370 (P)
| stack_trace_save_tsk+0x8c/0x108
| proc_pid_stack+0xb0/0x134
| proc_single_show+0x60/0x120
| seq_read_iter+0x104/0x438
| seq_read+0xf8/0x140
| vfs_read+0xc4/0x31c
| ksys_read+0x70/0x108
| __arm64_sys_read+0x1c/0x28
| invoke_syscall+0x48/0x104
| el0_svc_common.constprop.0+0x40/0xe0
| do_el0_svc+0x1c/0x28
| el0_svc+0x30/0xcc
| el0t_64_sync_handler+0x10c/0x138
| el0t_64_sync+0x198/0x19c
| ---[ end trace 0000000000000000 ]---
Fix this by only warning when unwinding the current task. When unwinding
another task the error conditions will be handled by returning an error
without producing a warning.
The two warnings in kunwind_next_frame_record_meta() were added recently
as part of commit:
c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries")
The warning when recovering the fgraph return address has changed form
many times, but was originally introduced back in commit:
9f416319f40cd857 ("arm64: fix unwind_frame() for filtered out fn for function graph tracing")
Fixes: c2c6b27b5aa1 ("arm64: stacktrace: unwind exception boundaries")
Fixes: 9f416319f40c ("arm64: fix unwind_frame() for filtered out fn for function graph tracing")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241211140704.2498712-3-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-12-11 14:07:04 +00:00
|
|
|
if (state->common.pc == orig_pc) {
|
|
|
|
|
WARN_ON_ONCE(state->task == current);
|
2023-04-11 17:29:41 +01:00
|
|
|
return -EINVAL;
|
arm64: stacktrace: Don't WARN when unwinding other tasks
The arm64 stacktrace code has a few error conditions where a
WARN_ON_ONCE() is triggered before the stacktrace is terminated and an
error is returned to the caller. The conditions shouldn't be triggered
when unwinding the current task, but it is possible to trigger these
when unwinding another task which is not blocked, as the stack of that
task is concurrently modified. Kent reports that these warnings can be
triggered while running filesystem tests on bcachefs, which calls the
stacktrace code directly.
To produce a meaningful stacktrace of another task, the task in question
should be blocked, but the stacktrace code is expected to be robust to
cases where it is not blocked. Note that this is purely about not
unuduly scaring the user and/or crashing the kernel; stacktraces in such
cases are meaningless and may leak kernel secrets from the stack of the
task being unwound.
Ideally we'd pin the task in a blocked state during the unwind, as we do
for /proc/${PID}/wchan since commit:
42a20f86dc19f928 ("sched: Add wrapper for get_wchan() to keep task blocked")
... but a bunch of places don't do that, notably /proc/${PID}/stack,
where we don't pin the task in a blocked state, but do restrict the
output to privileged users since commit:
f8a00cef17206ecd ("proc: restrict kernel stack dumps to root")
... and so it's possible to trigger these warnings accidentally, e.g. by
reading /proc/*/stack (as root):
| for n in $(seq 1 10); do
| while true; do cat /proc/*/stack > /dev/null 2>&1; done &
| done
| ------------[ cut here ]------------
| WARNING: CPU: 3 PID: 166 at arch/arm64/kernel/stacktrace.c:207 arch_stack_walk+0x1c8/0x370
| Modules linked in:
| CPU: 3 UID: 0 PID: 166 Comm: cat Not tainted 6.13.0-rc2-00003-g3dafa7a7925d #2
| Hardware name: linux,dummy-virt (DT)
| pstate: 81400005 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
| pc : arch_stack_walk+0x1c8/0x370
| lr : arch_stack_walk+0x1b0/0x370
| sp : ffff800080773890
| x29: ffff800080773930 x28: fff0000005c44500 x27: fff00000058fa038
| x26: 000000007ffff000 x25: 0000000000000000 x24: 0000000000000000
| x23: ffffa35a8d9600ec x22: 0000000000000000 x21: fff00000043a33c0
| x20: ffff800080773970 x19: ffffa35a8d960168 x18: 0000000000000000
| x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
| x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
| x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
| x8 : ffff8000807738e0 x7 : ffff8000806e3800 x6 : ffff8000806e3818
| x5 : ffff800080773920 x4 : ffff8000806e4000 x3 : ffff8000807738e0
| x2 : 0000000000000018 x1 : ffff8000806e3800 x0 : 0000000000000000
| Call trace:
| arch_stack_walk+0x1c8/0x370 (P)
| stack_trace_save_tsk+0x8c/0x108
| proc_pid_stack+0xb0/0x134
| proc_single_show+0x60/0x120
| seq_read_iter+0x104/0x438
| seq_read+0xf8/0x140
| vfs_read+0xc4/0x31c
| ksys_read+0x70/0x108
| __arm64_sys_read+0x1c/0x28
| invoke_syscall+0x48/0x104
| el0_svc_common.constprop.0+0x40/0xe0
| do_el0_svc+0x1c/0x28
| el0_svc+0x30/0xcc
| el0t_64_sync_handler+0x10c/0x138
| el0t_64_sync+0x198/0x19c
| ---[ end trace 0000000000000000 ]---
Fix this by only warning when unwinding the current task. When unwinding
another task the error conditions will be handled by returning an error
without producing a warning.
The two warnings in kunwind_next_frame_record_meta() were added recently
as part of commit:
c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries")
The warning when recovering the fgraph return address has changed form
many times, but was originally introduced back in commit:
9f416319f40cd857 ("arm64: fix unwind_frame() for filtered out fn for function graph tracing")
Fixes: c2c6b27b5aa1 ("arm64: stacktrace: unwind exception boundaries")
Fixes: 9f416319f40c ("arm64: fix unwind_frame() for filtered out fn for function graph tracing")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241211140704.2498712-3-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-12-11 14:07:04 +00:00
|
|
|
}
|
2023-11-24 11:05:10 +00:00
|
|
|
state->common.pc = orig_pc;
|
2024-10-17 10:25:36 +01:00
|
|
|
state->flags.fgraph = 1;
|
2023-04-11 17:29:41 +01:00
|
|
|
}
|
|
|
|
|
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_KRETPROBES
|
2023-11-24 11:05:10 +00:00
|
|
|
if (is_kretprobe_trampoline(state->common.pc)) {
|
|
|
|
|
unsigned long orig_pc;
|
|
|
|
|
orig_pc = kretprobe_find_ret_addr(state->task,
|
|
|
|
|
(void *)state->common.fp,
|
|
|
|
|
&state->kr_cur);
|
|
|
|
|
state->common.pc = orig_pc;
|
2024-10-17 10:25:36 +01:00
|
|
|
state->flags.kretprobe = 1;
|
2023-04-11 17:29:41 +01:00
|
|
|
}
|
|
|
|
|
#endif /* CONFIG_KRETPROBES */
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
arm64: stacktrace: unwind exception boundaries
When arm64's stack unwinder encounters an exception boundary, it uses
the pt_regs::stackframe created by the entry code, which has a copy of
the PC and FP at the time the exception was taken. The unwinder doesn't
know anything about pt_regs, and reports the PC from the stackframe, but
does not report the LR.
The LR is only guaranteed to contain the return address at function call
boundaries, and can be used as a scratch register at other times, so the
LR at an exception boundary may or may not be a legitimate return
address. It would be useful to report the LR value regardless, as it can
be helpful when debugging, and in future it will be helpful for reliable
stacktrace support.
This patch changes the way we unwind across exception boundaries,
allowing both the PC and LR to be reported. The entry code creates a
frame_record_meta structure embedded within pt_regs, which the unwinder
uses to find the pt_regs. The unwinder can then extract pt_regs::pc and
pt_regs::lr as two separate unwind steps before continuing with a
regular walk of frame records.
When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
with an "L" marker so that it can be identified easily. For example,
an unwind across an exception boundary will appear as follows:
| el1h_64_irq+0x6c/0x70
| _raw_spin_unlock_irqrestore+0x10/0x60 (P)
| __aarch64_insn_write+0x6c/0x90 (L)
| aarch64_insn_patch_text_nosync+0x28/0x80
... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.
Note that the LR may be stale at the point of the exception, for example,
shortly after a return:
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
... where the LR points a few instructions before the current PC.
This plays nicely with all the other unwind metadata tracking. With the
ftrace_graph profiler enabled globally, and kretprobes installed on
generic_handle_domain_irq() and do_interrupt_handler(), a backtrace triggered
by magic-sysrq + L reports:
| Call trace:
| show_stack+0x20/0x40 (CF)
| dump_stack_lvl+0x60/0x80 (F)
| dump_stack+0x18/0x28
| nmi_cpu_backtrace+0xfc/0x140
| nmi_trigger_cpumask_backtrace+0x1c8/0x200
| arch_trigger_cpumask_backtrace+0x20/0x40
| sysrq_handle_showallcpus+0x24/0x38 (F)
| __handle_sysrq+0xa8/0x1b0 (F)
| handle_sysrq+0x38/0x50 (F)
| pl011_int+0x460/0x5a8 (F)
| __handle_irq_event_percpu+0x60/0x220 (F)
| handle_irq_event+0x54/0xc0 (F)
| handle_fasteoi_irq+0xa8/0x1d0 (F)
| generic_handle_domain_irq+0x34/0x58 (F)
| gic_handle_irq+0x54/0x140 (FK)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0xa0
| el1_interrupt+0x34/0x68 (FK)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
| cpu_startup_entry+0x3c/0x50 (F)
| rest_init+0xe4/0xf0
| start_kernel+0x744/0x750
| __primary_switched+0x88/0x98
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Puranjay Mohan <puranjay12@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241017092538.1859841-11-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-10-17 10:25:38 +01:00
|
|
|
static __always_inline
|
|
|
|
|
int kunwind_next_regs_pc(struct kunwind_state *state)
|
|
|
|
|
{
|
|
|
|
|
struct stack_info *info;
|
|
|
|
|
unsigned long fp = state->common.fp;
|
|
|
|
|
struct pt_regs *regs;
|
|
|
|
|
|
|
|
|
|
regs = container_of((u64 *)fp, struct pt_regs, stackframe.record.fp);
|
|
|
|
|
|
|
|
|
|
info = unwind_find_stack(&state->common, (unsigned long)regs, sizeof(*regs));
|
|
|
|
|
if (!info)
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
unwind_consume_stack(&state->common, info, (unsigned long)regs,
|
|
|
|
|
sizeof(*regs));
|
|
|
|
|
|
|
|
|
|
state->regs = regs;
|
|
|
|
|
state->common.pc = regs->pc;
|
|
|
|
|
state->common.fp = regs->regs[29];
|
|
|
|
|
state->regs = NULL;
|
arm64: stacktrace: Skip reporting LR at exception boundaries
Aishwarya reports that warnings are sometimes seen when running the
ftrace kselftests, e.g.
| WARNING: CPU: 5 PID: 2066 at arch/arm64/kernel/stacktrace.c:141 arch_stack_walk+0x4a0/0x4c0
| Modules linked in:
| CPU: 5 UID: 0 PID: 2066 Comm: ftracetest Not tainted 6.13.0-rc2 #2
| Hardware name: linux,dummy-virt (DT)
| pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_stack_walk+0x4a0/0x4c0
| lr : arch_stack_walk+0x248/0x4c0
| sp : ffff800083643d20
| x29: ffff800083643dd0 x28: ffff00007b891400 x27: ffff00007b891928
| x26: 0000000000000001 x25: 00000000000000c0 x24: ffff800082f39d80
| x23: ffff80008003ee8c x22: ffff80008004baa8 x21: ffff8000800533e0
| x20: ffff800083643e10 x19: ffff80008003eec8 x18: 0000000000000000
| x17: 0000000000000000 x16: ffff800083640000 x15: 0000000000000000
| x14: 02a37a802bbb8a92 x13: 00000000000001a9 x12: 0000000000000001
| x11: ffff800082ffad60 x10: ffff800083643d20 x9 : ffff80008003eed0
| x8 : ffff80008004baa8 x7 : ffff800086f2be80 x6 : ffff0000057cf000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff800086f2b690
| x2 : ffff80008004baa8 x1 : ffff80008004baa8 x0 : ffff80008004baa8
| Call trace:
| arch_stack_walk+0x4a0/0x4c0 (P)
| arch_stack_walk+0x248/0x4c0 (L)
| profile_pc+0x44/0x80
| profile_tick+0x50/0x80 (F)
| tick_nohz_handler+0xcc/0x160 (F)
| __hrtimer_run_queues+0x2ac/0x340 (F)
| hrtimer_interrupt+0xf4/0x268 (F)
| arch_timer_handler_virt+0x34/0x60 (F)
| handle_percpu_devid_irq+0x88/0x220 (F)
| generic_handle_domain_irq+0x34/0x60 (F)
| gic_handle_irq+0x54/0x140 (F)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0x98
| el1_interrupt+0x34/0x68 (F)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| queued_spin_lock_slowpath+0x78/0x460 (P)
The warning in question is:
WARN_ON_ONCE(state->common.pc == orig_pc))
... in kunwind_recover_return_address(), which is triggered when
return_to_handler() is encountered in the trace, but
ftrace_graph_ret_addr() cannot find a corresponding original return
address on the fgraph return stack.
This happens because the stacktrace code encounters an exception
boundary where the LR was not live at the time of the exception, but the
LR happens to contain return_to_handler(); either because the task
recently returned there, or due to unfortunate usage of the LR at a
scratch register. In such cases attempts to recover the return address
via ftrace_graph_ret_addr() may fail, triggering the WARN_ON_ONCE()
above and aborting the unwind (hence the stacktrace terminating after
reporting the PC at the time of the exception).
Handling unreliable LR values in these cases is likely to require some
larger rework, so for the moment avoid this problem by restoring the old
behaviour of skipping the LR at exception boundaries, which the
stacktrace code did prior to commit:
c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries")
This commit is effectively a partial revert, keeping the structures and
logic to explicitly identify exception boundaries while still skipping
reporting of the LR. The logic to explicitly identify exception
boundaries is still useful for general robustness and as a building
block for future support for RELIABLE_STACKTRACE.
Fixes: c2c6b27b5aa1 ("arm64: stacktrace: unwind exception boundaries")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Aishwarya TCV <aishwarya.tcv@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241211140704.2498712-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-12-11 14:07:03 +00:00
|
|
|
state->source = KUNWIND_SOURCE_REGS_PC;
|
arm64: stacktrace: unwind exception boundaries
When arm64's stack unwinder encounters an exception boundary, it uses
the pt_regs::stackframe created by the entry code, which has a copy of
the PC and FP at the time the exception was taken. The unwinder doesn't
know anything about pt_regs, and reports the PC from the stackframe, but
does not report the LR.
The LR is only guaranteed to contain the return address at function call
boundaries, and can be used as a scratch register at other times, so the
LR at an exception boundary may or may not be a legitimate return
address. It would be useful to report the LR value regardless, as it can
be helpful when debugging, and in future it will be helpful for reliable
stacktrace support.
This patch changes the way we unwind across exception boundaries,
allowing both the PC and LR to be reported. The entry code creates a
frame_record_meta structure embedded within pt_regs, which the unwinder
uses to find the pt_regs. The unwinder can then extract pt_regs::pc and
pt_regs::lr as two separate unwind steps before continuing with a
regular walk of frame records.
When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
with an "L" marker so that it can be identified easily. For example,
an unwind across an exception boundary will appear as follows:
| el1h_64_irq+0x6c/0x70
| _raw_spin_unlock_irqrestore+0x10/0x60 (P)
| __aarch64_insn_write+0x6c/0x90 (L)
| aarch64_insn_patch_text_nosync+0x28/0x80
... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.
Note that the LR may be stale at the point of the exception, for example,
shortly after a return:
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
... where the LR points a few instructions before the current PC.
This plays nicely with all the other unwind metadata tracking. With the
ftrace_graph profiler enabled globally, and kretprobes installed on
generic_handle_domain_irq() and do_interrupt_handler(), a backtrace triggered
by magic-sysrq + L reports:
| Call trace:
| show_stack+0x20/0x40 (CF)
| dump_stack_lvl+0x60/0x80 (F)
| dump_stack+0x18/0x28
| nmi_cpu_backtrace+0xfc/0x140
| nmi_trigger_cpumask_backtrace+0x1c8/0x200
| arch_trigger_cpumask_backtrace+0x20/0x40
| sysrq_handle_showallcpus+0x24/0x38 (F)
| __handle_sysrq+0xa8/0x1b0 (F)
| handle_sysrq+0x38/0x50 (F)
| pl011_int+0x460/0x5a8 (F)
| __handle_irq_event_percpu+0x60/0x220 (F)
| handle_irq_event+0x54/0xc0 (F)
| handle_fasteoi_irq+0xa8/0x1d0 (F)
| generic_handle_domain_irq+0x34/0x58 (F)
| gic_handle_irq+0x54/0x140 (FK)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0xa0
| el1_interrupt+0x34/0x68 (FK)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
| cpu_startup_entry+0x3c/0x50 (F)
| rest_init+0xe4/0xf0
| start_kernel+0x744/0x750
| __primary_switched+0x88/0x98
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Puranjay Mohan <puranjay12@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241017092538.1859841-11-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-10-17 10:25:38 +01:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static __always_inline int
|
|
|
|
|
kunwind_next_frame_record_meta(struct kunwind_state *state)
|
|
|
|
|
{
|
|
|
|
|
struct task_struct *tsk = state->task;
|
|
|
|
|
unsigned long fp = state->common.fp;
|
|
|
|
|
struct frame_record_meta *meta;
|
|
|
|
|
struct stack_info *info;
|
|
|
|
|
|
|
|
|
|
info = unwind_find_stack(&state->common, fp, sizeof(*meta));
|
|
|
|
|
if (!info)
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
meta = (struct frame_record_meta *)fp;
|
|
|
|
|
switch (READ_ONCE(meta->type)) {
|
|
|
|
|
case FRAME_META_TYPE_FINAL:
|
|
|
|
|
if (meta == &task_pt_regs(tsk)->stackframe)
|
|
|
|
|
return -ENOENT;
|
arm64: stacktrace: Don't WARN when unwinding other tasks
The arm64 stacktrace code has a few error conditions where a
WARN_ON_ONCE() is triggered before the stacktrace is terminated and an
error is returned to the caller. The conditions shouldn't be triggered
when unwinding the current task, but it is possible to trigger these
when unwinding another task which is not blocked, as the stack of that
task is concurrently modified. Kent reports that these warnings can be
triggered while running filesystem tests on bcachefs, which calls the
stacktrace code directly.
To produce a meaningful stacktrace of another task, the task in question
should be blocked, but the stacktrace code is expected to be robust to
cases where it is not blocked. Note that this is purely about not
unuduly scaring the user and/or crashing the kernel; stacktraces in such
cases are meaningless and may leak kernel secrets from the stack of the
task being unwound.
Ideally we'd pin the task in a blocked state during the unwind, as we do
for /proc/${PID}/wchan since commit:
42a20f86dc19f928 ("sched: Add wrapper for get_wchan() to keep task blocked")
... but a bunch of places don't do that, notably /proc/${PID}/stack,
where we don't pin the task in a blocked state, but do restrict the
output to privileged users since commit:
f8a00cef17206ecd ("proc: restrict kernel stack dumps to root")
... and so it's possible to trigger these warnings accidentally, e.g. by
reading /proc/*/stack (as root):
| for n in $(seq 1 10); do
| while true; do cat /proc/*/stack > /dev/null 2>&1; done &
| done
| ------------[ cut here ]------------
| WARNING: CPU: 3 PID: 166 at arch/arm64/kernel/stacktrace.c:207 arch_stack_walk+0x1c8/0x370
| Modules linked in:
| CPU: 3 UID: 0 PID: 166 Comm: cat Not tainted 6.13.0-rc2-00003-g3dafa7a7925d #2
| Hardware name: linux,dummy-virt (DT)
| pstate: 81400005 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
| pc : arch_stack_walk+0x1c8/0x370
| lr : arch_stack_walk+0x1b0/0x370
| sp : ffff800080773890
| x29: ffff800080773930 x28: fff0000005c44500 x27: fff00000058fa038
| x26: 000000007ffff000 x25: 0000000000000000 x24: 0000000000000000
| x23: ffffa35a8d9600ec x22: 0000000000000000 x21: fff00000043a33c0
| x20: ffff800080773970 x19: ffffa35a8d960168 x18: 0000000000000000
| x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
| x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
| x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
| x8 : ffff8000807738e0 x7 : ffff8000806e3800 x6 : ffff8000806e3818
| x5 : ffff800080773920 x4 : ffff8000806e4000 x3 : ffff8000807738e0
| x2 : 0000000000000018 x1 : ffff8000806e3800 x0 : 0000000000000000
| Call trace:
| arch_stack_walk+0x1c8/0x370 (P)
| stack_trace_save_tsk+0x8c/0x108
| proc_pid_stack+0xb0/0x134
| proc_single_show+0x60/0x120
| seq_read_iter+0x104/0x438
| seq_read+0xf8/0x140
| vfs_read+0xc4/0x31c
| ksys_read+0x70/0x108
| __arm64_sys_read+0x1c/0x28
| invoke_syscall+0x48/0x104
| el0_svc_common.constprop.0+0x40/0xe0
| do_el0_svc+0x1c/0x28
| el0_svc+0x30/0xcc
| el0t_64_sync_handler+0x10c/0x138
| el0t_64_sync+0x198/0x19c
| ---[ end trace 0000000000000000 ]---
Fix this by only warning when unwinding the current task. When unwinding
another task the error conditions will be handled by returning an error
without producing a warning.
The two warnings in kunwind_next_frame_record_meta() were added recently
as part of commit:
c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries")
The warning when recovering the fgraph return address has changed form
many times, but was originally introduced back in commit:
9f416319f40cd857 ("arm64: fix unwind_frame() for filtered out fn for function graph tracing")
Fixes: c2c6b27b5aa1 ("arm64: stacktrace: unwind exception boundaries")
Fixes: 9f416319f40c ("arm64: fix unwind_frame() for filtered out fn for function graph tracing")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241211140704.2498712-3-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-12-11 14:07:04 +00:00
|
|
|
WARN_ON_ONCE(tsk == current);
|
arm64: stacktrace: unwind exception boundaries
When arm64's stack unwinder encounters an exception boundary, it uses
the pt_regs::stackframe created by the entry code, which has a copy of
the PC and FP at the time the exception was taken. The unwinder doesn't
know anything about pt_regs, and reports the PC from the stackframe, but
does not report the LR.
The LR is only guaranteed to contain the return address at function call
boundaries, and can be used as a scratch register at other times, so the
LR at an exception boundary may or may not be a legitimate return
address. It would be useful to report the LR value regardless, as it can
be helpful when debugging, and in future it will be helpful for reliable
stacktrace support.
This patch changes the way we unwind across exception boundaries,
allowing both the PC and LR to be reported. The entry code creates a
frame_record_meta structure embedded within pt_regs, which the unwinder
uses to find the pt_regs. The unwinder can then extract pt_regs::pc and
pt_regs::lr as two separate unwind steps before continuing with a
regular walk of frame records.
When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
with an "L" marker so that it can be identified easily. For example,
an unwind across an exception boundary will appear as follows:
| el1h_64_irq+0x6c/0x70
| _raw_spin_unlock_irqrestore+0x10/0x60 (P)
| __aarch64_insn_write+0x6c/0x90 (L)
| aarch64_insn_patch_text_nosync+0x28/0x80
... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.
Note that the LR may be stale at the point of the exception, for example,
shortly after a return:
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
... where the LR points a few instructions before the current PC.
This plays nicely with all the other unwind metadata tracking. With the
ftrace_graph profiler enabled globally, and kretprobes installed on
generic_handle_domain_irq() and do_interrupt_handler(), a backtrace triggered
by magic-sysrq + L reports:
| Call trace:
| show_stack+0x20/0x40 (CF)
| dump_stack_lvl+0x60/0x80 (F)
| dump_stack+0x18/0x28
| nmi_cpu_backtrace+0xfc/0x140
| nmi_trigger_cpumask_backtrace+0x1c8/0x200
| arch_trigger_cpumask_backtrace+0x20/0x40
| sysrq_handle_showallcpus+0x24/0x38 (F)
| __handle_sysrq+0xa8/0x1b0 (F)
| handle_sysrq+0x38/0x50 (F)
| pl011_int+0x460/0x5a8 (F)
| __handle_irq_event_percpu+0x60/0x220 (F)
| handle_irq_event+0x54/0xc0 (F)
| handle_fasteoi_irq+0xa8/0x1d0 (F)
| generic_handle_domain_irq+0x34/0x58 (F)
| gic_handle_irq+0x54/0x140 (FK)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0xa0
| el1_interrupt+0x34/0x68 (FK)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
| cpu_startup_entry+0x3c/0x50 (F)
| rest_init+0xe4/0xf0
| start_kernel+0x744/0x750
| __primary_switched+0x88/0x98
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Puranjay Mohan <puranjay12@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241017092538.1859841-11-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-10-17 10:25:38 +01:00
|
|
|
return -EINVAL;
|
|
|
|
|
case FRAME_META_TYPE_PT_REGS:
|
|
|
|
|
return kunwind_next_regs_pc(state);
|
|
|
|
|
default:
|
arm64: stacktrace: Don't WARN when unwinding other tasks
The arm64 stacktrace code has a few error conditions where a
WARN_ON_ONCE() is triggered before the stacktrace is terminated and an
error is returned to the caller. The conditions shouldn't be triggered
when unwinding the current task, but it is possible to trigger these
when unwinding another task which is not blocked, as the stack of that
task is concurrently modified. Kent reports that these warnings can be
triggered while running filesystem tests on bcachefs, which calls the
stacktrace code directly.
To produce a meaningful stacktrace of another task, the task in question
should be blocked, but the stacktrace code is expected to be robust to
cases where it is not blocked. Note that this is purely about not
unuduly scaring the user and/or crashing the kernel; stacktraces in such
cases are meaningless and may leak kernel secrets from the stack of the
task being unwound.
Ideally we'd pin the task in a blocked state during the unwind, as we do
for /proc/${PID}/wchan since commit:
42a20f86dc19f928 ("sched: Add wrapper for get_wchan() to keep task blocked")
... but a bunch of places don't do that, notably /proc/${PID}/stack,
where we don't pin the task in a blocked state, but do restrict the
output to privileged users since commit:
f8a00cef17206ecd ("proc: restrict kernel stack dumps to root")
... and so it's possible to trigger these warnings accidentally, e.g. by
reading /proc/*/stack (as root):
| for n in $(seq 1 10); do
| while true; do cat /proc/*/stack > /dev/null 2>&1; done &
| done
| ------------[ cut here ]------------
| WARNING: CPU: 3 PID: 166 at arch/arm64/kernel/stacktrace.c:207 arch_stack_walk+0x1c8/0x370
| Modules linked in:
| CPU: 3 UID: 0 PID: 166 Comm: cat Not tainted 6.13.0-rc2-00003-g3dafa7a7925d #2
| Hardware name: linux,dummy-virt (DT)
| pstate: 81400005 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
| pc : arch_stack_walk+0x1c8/0x370
| lr : arch_stack_walk+0x1b0/0x370
| sp : ffff800080773890
| x29: ffff800080773930 x28: fff0000005c44500 x27: fff00000058fa038
| x26: 000000007ffff000 x25: 0000000000000000 x24: 0000000000000000
| x23: ffffa35a8d9600ec x22: 0000000000000000 x21: fff00000043a33c0
| x20: ffff800080773970 x19: ffffa35a8d960168 x18: 0000000000000000
| x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
| x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
| x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
| x8 : ffff8000807738e0 x7 : ffff8000806e3800 x6 : ffff8000806e3818
| x5 : ffff800080773920 x4 : ffff8000806e4000 x3 : ffff8000807738e0
| x2 : 0000000000000018 x1 : ffff8000806e3800 x0 : 0000000000000000
| Call trace:
| arch_stack_walk+0x1c8/0x370 (P)
| stack_trace_save_tsk+0x8c/0x108
| proc_pid_stack+0xb0/0x134
| proc_single_show+0x60/0x120
| seq_read_iter+0x104/0x438
| seq_read+0xf8/0x140
| vfs_read+0xc4/0x31c
| ksys_read+0x70/0x108
| __arm64_sys_read+0x1c/0x28
| invoke_syscall+0x48/0x104
| el0_svc_common.constprop.0+0x40/0xe0
| do_el0_svc+0x1c/0x28
| el0_svc+0x30/0xcc
| el0t_64_sync_handler+0x10c/0x138
| el0t_64_sync+0x198/0x19c
| ---[ end trace 0000000000000000 ]---
Fix this by only warning when unwinding the current task. When unwinding
another task the error conditions will be handled by returning an error
without producing a warning.
The two warnings in kunwind_next_frame_record_meta() were added recently
as part of commit:
c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries")
The warning when recovering the fgraph return address has changed form
many times, but was originally introduced back in commit:
9f416319f40cd857 ("arm64: fix unwind_frame() for filtered out fn for function graph tracing")
Fixes: c2c6b27b5aa1 ("arm64: stacktrace: unwind exception boundaries")
Fixes: 9f416319f40c ("arm64: fix unwind_frame() for filtered out fn for function graph tracing")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241211140704.2498712-3-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-12-11 14:07:04 +00:00
|
|
|
WARN_ON_ONCE(tsk == current);
|
arm64: stacktrace: unwind exception boundaries
When arm64's stack unwinder encounters an exception boundary, it uses
the pt_regs::stackframe created by the entry code, which has a copy of
the PC and FP at the time the exception was taken. The unwinder doesn't
know anything about pt_regs, and reports the PC from the stackframe, but
does not report the LR.
The LR is only guaranteed to contain the return address at function call
boundaries, and can be used as a scratch register at other times, so the
LR at an exception boundary may or may not be a legitimate return
address. It would be useful to report the LR value regardless, as it can
be helpful when debugging, and in future it will be helpful for reliable
stacktrace support.
This patch changes the way we unwind across exception boundaries,
allowing both the PC and LR to be reported. The entry code creates a
frame_record_meta structure embedded within pt_regs, which the unwinder
uses to find the pt_regs. The unwinder can then extract pt_regs::pc and
pt_regs::lr as two separate unwind steps before continuing with a
regular walk of frame records.
When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
with an "L" marker so that it can be identified easily. For example,
an unwind across an exception boundary will appear as follows:
| el1h_64_irq+0x6c/0x70
| _raw_spin_unlock_irqrestore+0x10/0x60 (P)
| __aarch64_insn_write+0x6c/0x90 (L)
| aarch64_insn_patch_text_nosync+0x28/0x80
... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.
Note that the LR may be stale at the point of the exception, for example,
shortly after a return:
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
... where the LR points a few instructions before the current PC.
This plays nicely with all the other unwind metadata tracking. With the
ftrace_graph profiler enabled globally, and kretprobes installed on
generic_handle_domain_irq() and do_interrupt_handler(), a backtrace triggered
by magic-sysrq + L reports:
| Call trace:
| show_stack+0x20/0x40 (CF)
| dump_stack_lvl+0x60/0x80 (F)
| dump_stack+0x18/0x28
| nmi_cpu_backtrace+0xfc/0x140
| nmi_trigger_cpumask_backtrace+0x1c8/0x200
| arch_trigger_cpumask_backtrace+0x20/0x40
| sysrq_handle_showallcpus+0x24/0x38 (F)
| __handle_sysrq+0xa8/0x1b0 (F)
| handle_sysrq+0x38/0x50 (F)
| pl011_int+0x460/0x5a8 (F)
| __handle_irq_event_percpu+0x60/0x220 (F)
| handle_irq_event+0x54/0xc0 (F)
| handle_fasteoi_irq+0xa8/0x1d0 (F)
| generic_handle_domain_irq+0x34/0x58 (F)
| gic_handle_irq+0x54/0x140 (FK)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0xa0
| el1_interrupt+0x34/0x68 (FK)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
| cpu_startup_entry+0x3c/0x50 (F)
| rest_init+0xe4/0xf0
| start_kernel+0x744/0x750
| __primary_switched+0x88/0x98
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Puranjay Mohan <puranjay12@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241017092538.1859841-11-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-10-17 10:25:38 +01:00
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static __always_inline int
|
|
|
|
|
kunwind_next_frame_record(struct kunwind_state *state)
|
|
|
|
|
{
|
|
|
|
|
unsigned long fp = state->common.fp;
|
|
|
|
|
struct frame_record *record;
|
|
|
|
|
struct stack_info *info;
|
|
|
|
|
unsigned long new_fp, new_pc;
|
|
|
|
|
|
|
|
|
|
if (fp & 0x7)
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
info = unwind_find_stack(&state->common, fp, sizeof(*record));
|
|
|
|
|
if (!info)
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
record = (struct frame_record *)fp;
|
|
|
|
|
new_fp = READ_ONCE(record->fp);
|
|
|
|
|
new_pc = READ_ONCE(record->lr);
|
|
|
|
|
|
|
|
|
|
if (!new_fp && !new_pc)
|
|
|
|
|
return kunwind_next_frame_record_meta(state);
|
|
|
|
|
|
|
|
|
|
unwind_consume_stack(&state->common, info, fp, sizeof(*record));
|
|
|
|
|
|
|
|
|
|
state->common.fp = new_fp;
|
|
|
|
|
state->common.pc = new_pc;
|
|
|
|
|
state->source = KUNWIND_SOURCE_FRAME;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-27 15:29:03 +01:00
|
|
|
/*
|
|
|
|
|
* Unwind from one frame record (A) to the next frame record (B).
|
|
|
|
|
*
|
|
|
|
|
* We terminate early if the location of B indicates a malformed chain of frame
|
|
|
|
|
* records (e.g. a cycle), determined based on the location and fp value of A
|
|
|
|
|
* and the location (but not the fp value) of B.
|
|
|
|
|
*/
|
2023-04-11 17:29:43 +01:00
|
|
|
static __always_inline int
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_next(struct kunwind_state *state)
|
2022-07-27 15:29:03 +01:00
|
|
|
{
|
|
|
|
|
int err;
|
|
|
|
|
|
2024-10-17 10:25:36 +01:00
|
|
|
state->flags.all = 0;
|
|
|
|
|
|
2024-10-17 10:25:35 +01:00
|
|
|
switch (state->source) {
|
|
|
|
|
case KUNWIND_SOURCE_FRAME:
|
|
|
|
|
case KUNWIND_SOURCE_CALLER:
|
|
|
|
|
case KUNWIND_SOURCE_TASK:
|
|
|
|
|
case KUNWIND_SOURCE_REGS_PC:
|
arm64: stacktrace: Skip reporting LR at exception boundaries
Aishwarya reports that warnings are sometimes seen when running the
ftrace kselftests, e.g.
| WARNING: CPU: 5 PID: 2066 at arch/arm64/kernel/stacktrace.c:141 arch_stack_walk+0x4a0/0x4c0
| Modules linked in:
| CPU: 5 UID: 0 PID: 2066 Comm: ftracetest Not tainted 6.13.0-rc2 #2
| Hardware name: linux,dummy-virt (DT)
| pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_stack_walk+0x4a0/0x4c0
| lr : arch_stack_walk+0x248/0x4c0
| sp : ffff800083643d20
| x29: ffff800083643dd0 x28: ffff00007b891400 x27: ffff00007b891928
| x26: 0000000000000001 x25: 00000000000000c0 x24: ffff800082f39d80
| x23: ffff80008003ee8c x22: ffff80008004baa8 x21: ffff8000800533e0
| x20: ffff800083643e10 x19: ffff80008003eec8 x18: 0000000000000000
| x17: 0000000000000000 x16: ffff800083640000 x15: 0000000000000000
| x14: 02a37a802bbb8a92 x13: 00000000000001a9 x12: 0000000000000001
| x11: ffff800082ffad60 x10: ffff800083643d20 x9 : ffff80008003eed0
| x8 : ffff80008004baa8 x7 : ffff800086f2be80 x6 : ffff0000057cf000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff800086f2b690
| x2 : ffff80008004baa8 x1 : ffff80008004baa8 x0 : ffff80008004baa8
| Call trace:
| arch_stack_walk+0x4a0/0x4c0 (P)
| arch_stack_walk+0x248/0x4c0 (L)
| profile_pc+0x44/0x80
| profile_tick+0x50/0x80 (F)
| tick_nohz_handler+0xcc/0x160 (F)
| __hrtimer_run_queues+0x2ac/0x340 (F)
| hrtimer_interrupt+0xf4/0x268 (F)
| arch_timer_handler_virt+0x34/0x60 (F)
| handle_percpu_devid_irq+0x88/0x220 (F)
| generic_handle_domain_irq+0x34/0x60 (F)
| gic_handle_irq+0x54/0x140 (F)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0x98
| el1_interrupt+0x34/0x68 (F)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| queued_spin_lock_slowpath+0x78/0x460 (P)
The warning in question is:
WARN_ON_ONCE(state->common.pc == orig_pc))
... in kunwind_recover_return_address(), which is triggered when
return_to_handler() is encountered in the trace, but
ftrace_graph_ret_addr() cannot find a corresponding original return
address on the fgraph return stack.
This happens because the stacktrace code encounters an exception
boundary where the LR was not live at the time of the exception, but the
LR happens to contain return_to_handler(); either because the task
recently returned there, or due to unfortunate usage of the LR at a
scratch register. In such cases attempts to recover the return address
via ftrace_graph_ret_addr() may fail, triggering the WARN_ON_ONCE()
above and aborting the unwind (hence the stacktrace terminating after
reporting the PC at the time of the exception).
Handling unreliable LR values in these cases is likely to require some
larger rework, so for the moment avoid this problem by restoring the old
behaviour of skipping the LR at exception boundaries, which the
stacktrace code did prior to commit:
c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries")
This commit is effectively a partial revert, keeping the structures and
logic to explicitly identify exception boundaries while still skipping
reporting of the LR. The logic to explicitly identify exception
boundaries is still useful for general robustness and as a building
block for future support for RELIABLE_STACKTRACE.
Fixes: c2c6b27b5aa1 ("arm64: stacktrace: unwind exception boundaries")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Aishwarya TCV <aishwarya.tcv@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241211140704.2498712-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-12-11 14:07:03 +00:00
|
|
|
err = kunwind_next_frame_record(state);
|
2024-10-17 10:25:35 +01:00
|
|
|
break;
|
|
|
|
|
default:
|
arm64: stacktrace: unwind exception boundaries
When arm64's stack unwinder encounters an exception boundary, it uses
the pt_regs::stackframe created by the entry code, which has a copy of
the PC and FP at the time the exception was taken. The unwinder doesn't
know anything about pt_regs, and reports the PC from the stackframe, but
does not report the LR.
The LR is only guaranteed to contain the return address at function call
boundaries, and can be used as a scratch register at other times, so the
LR at an exception boundary may or may not be a legitimate return
address. It would be useful to report the LR value regardless, as it can
be helpful when debugging, and in future it will be helpful for reliable
stacktrace support.
This patch changes the way we unwind across exception boundaries,
allowing both the PC and LR to be reported. The entry code creates a
frame_record_meta structure embedded within pt_regs, which the unwinder
uses to find the pt_regs. The unwinder can then extract pt_regs::pc and
pt_regs::lr as two separate unwind steps before continuing with a
regular walk of frame records.
When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
with an "L" marker so that it can be identified easily. For example,
an unwind across an exception boundary will appear as follows:
| el1h_64_irq+0x6c/0x70
| _raw_spin_unlock_irqrestore+0x10/0x60 (P)
| __aarch64_insn_write+0x6c/0x90 (L)
| aarch64_insn_patch_text_nosync+0x28/0x80
... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.
Note that the LR may be stale at the point of the exception, for example,
shortly after a return:
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
... where the LR points a few instructions before the current PC.
This plays nicely with all the other unwind metadata tracking. With the
ftrace_graph profiler enabled globally, and kretprobes installed on
generic_handle_domain_irq() and do_interrupt_handler(), a backtrace triggered
by magic-sysrq + L reports:
| Call trace:
| show_stack+0x20/0x40 (CF)
| dump_stack_lvl+0x60/0x80 (F)
| dump_stack+0x18/0x28
| nmi_cpu_backtrace+0xfc/0x140
| nmi_trigger_cpumask_backtrace+0x1c8/0x200
| arch_trigger_cpumask_backtrace+0x20/0x40
| sysrq_handle_showallcpus+0x24/0x38 (F)
| __handle_sysrq+0xa8/0x1b0 (F)
| handle_sysrq+0x38/0x50 (F)
| pl011_int+0x460/0x5a8 (F)
| __handle_irq_event_percpu+0x60/0x220 (F)
| handle_irq_event+0x54/0xc0 (F)
| handle_fasteoi_irq+0xa8/0x1d0 (F)
| generic_handle_domain_irq+0x34/0x58 (F)
| gic_handle_irq+0x54/0x140 (FK)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0xa0
| el1_interrupt+0x34/0x68 (FK)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
| cpu_startup_entry+0x3c/0x50 (F)
| rest_init+0xe4/0xf0
| start_kernel+0x744/0x750
| __primary_switched+0x88/0x98
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Puranjay Mohan <puranjay12@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241017092538.1859841-11-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-10-17 10:25:38 +01:00
|
|
|
err = -EINVAL;
|
2024-10-17 10:25:35 +01:00
|
|
|
}
|
2022-07-27 15:29:03 +01:00
|
|
|
|
arm64: stacktrace: unwind exception boundaries
When arm64's stack unwinder encounters an exception boundary, it uses
the pt_regs::stackframe created by the entry code, which has a copy of
the PC and FP at the time the exception was taken. The unwinder doesn't
know anything about pt_regs, and reports the PC from the stackframe, but
does not report the LR.
The LR is only guaranteed to contain the return address at function call
boundaries, and can be used as a scratch register at other times, so the
LR at an exception boundary may or may not be a legitimate return
address. It would be useful to report the LR value regardless, as it can
be helpful when debugging, and in future it will be helpful for reliable
stacktrace support.
This patch changes the way we unwind across exception boundaries,
allowing both the PC and LR to be reported. The entry code creates a
frame_record_meta structure embedded within pt_regs, which the unwinder
uses to find the pt_regs. The unwinder can then extract pt_regs::pc and
pt_regs::lr as two separate unwind steps before continuing with a
regular walk of frame records.
When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
with an "L" marker so that it can be identified easily. For example,
an unwind across an exception boundary will appear as follows:
| el1h_64_irq+0x6c/0x70
| _raw_spin_unlock_irqrestore+0x10/0x60 (P)
| __aarch64_insn_write+0x6c/0x90 (L)
| aarch64_insn_patch_text_nosync+0x28/0x80
... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.
Note that the LR may be stale at the point of the exception, for example,
shortly after a return:
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
... where the LR points a few instructions before the current PC.
This plays nicely with all the other unwind metadata tracking. With the
ftrace_graph profiler enabled globally, and kretprobes installed on
generic_handle_domain_irq() and do_interrupt_handler(), a backtrace triggered
by magic-sysrq + L reports:
| Call trace:
| show_stack+0x20/0x40 (CF)
| dump_stack_lvl+0x60/0x80 (F)
| dump_stack+0x18/0x28
| nmi_cpu_backtrace+0xfc/0x140
| nmi_trigger_cpumask_backtrace+0x1c8/0x200
| arch_trigger_cpumask_backtrace+0x20/0x40
| sysrq_handle_showallcpus+0x24/0x38 (F)
| __handle_sysrq+0xa8/0x1b0 (F)
| handle_sysrq+0x38/0x50 (F)
| pl011_int+0x460/0x5a8 (F)
| __handle_irq_event_percpu+0x60/0x220 (F)
| handle_irq_event+0x54/0xc0 (F)
| handle_fasteoi_irq+0xa8/0x1d0 (F)
| generic_handle_domain_irq+0x34/0x58 (F)
| gic_handle_irq+0x54/0x140 (FK)
| call_on_irq_stack+0x24/0x58 (F)
| do_interrupt_handler+0x88/0xa0
| el1_interrupt+0x34/0x68 (FK)
| el1h_64_irq_handler+0x18/0x28
| el1h_64_irq+0x6c/0x70
| default_idle_call+0x34/0x180 (P)
| default_idle_call+0x28/0x180 (L)
| do_idle+0x204/0x268
| cpu_startup_entry+0x3c/0x50 (F)
| rest_init+0xe4/0xf0
| start_kernel+0x744/0x750
| __primary_switched+0x88/0x98
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Puranjay Mohan <puranjay12@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241017092538.1859841-11-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-10-17 10:25:38 +01:00
|
|
|
if (err)
|
|
|
|
|
return err;
|
|
|
|
|
|
2023-11-24 11:05:10 +00:00
|
|
|
state->common.pc = ptrauth_strip_kernel_insn_pac(state->common.pc);
|
2022-07-27 15:29:03 +01:00
|
|
|
|
2023-11-24 11:05:10 +00:00
|
|
|
return kunwind_recover_return_address(state);
|
2022-07-27 15:29:03 +01:00
|
|
|
}
|
|
|
|
|
|
2023-11-24 11:05:11 +00:00
|
|
|
typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
|
|
|
|
|
|
2023-04-11 17:29:43 +01:00
|
|
|
static __always_inline void
|
2023-11-24 11:05:11 +00:00
|
|
|
do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
|
2023-11-24 11:05:10 +00:00
|
|
|
void *cookie)
|
2022-07-27 15:29:03 +01:00
|
|
|
{
|
2023-11-24 11:05:10 +00:00
|
|
|
if (kunwind_recover_return_address(state))
|
2023-04-11 17:29:41 +01:00
|
|
|
return;
|
|
|
|
|
|
2022-07-27 15:29:03 +01:00
|
|
|
while (1) {
|
|
|
|
|
int ret;
|
|
|
|
|
|
2023-11-24 11:05:11 +00:00
|
|
|
if (!consume_state(state, cookie))
|
2022-07-27 15:29:03 +01:00
|
|
|
break;
|
2023-11-24 11:05:10 +00:00
|
|
|
ret = kunwind_next(state);
|
2022-07-27 15:29:03 +01:00
|
|
|
if (ret < 0)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-01 14:06:45 +01:00
|
|
|
/*
|
|
|
|
|
* Per-cpu stacks are only accessible when unwinding the current task in a
|
|
|
|
|
* non-preemptible context.
|
|
|
|
|
*/
|
|
|
|
|
#define STACKINFO_CPU(name) \
|
|
|
|
|
({ \
|
|
|
|
|
((task == current) && !preemptible()) \
|
|
|
|
|
? stackinfo_get_##name() \
|
|
|
|
|
: stackinfo_get_unknown(); \
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* SDEI stacks are only accessible when unwinding the current task in an NMI
|
|
|
|
|
* context.
|
|
|
|
|
*/
|
|
|
|
|
#define STACKINFO_SDEI(name) \
|
|
|
|
|
({ \
|
|
|
|
|
((task == current) && in_nmi()) \
|
|
|
|
|
? stackinfo_get_sdei_##name() \
|
|
|
|
|
: stackinfo_get_unknown(); \
|
|
|
|
|
})
|
|
|
|
|
|
2022-12-09 12:10:13 +01:00
|
|
|
#define STACKINFO_EFI \
|
|
|
|
|
({ \
|
|
|
|
|
((task == current) && current_in_efi()) \
|
|
|
|
|
? stackinfo_get_efi() \
|
|
|
|
|
: stackinfo_get_unknown(); \
|
|
|
|
|
})
|
|
|
|
|
|
2023-11-24 11:05:11 +00:00
|
|
|
static __always_inline void
|
|
|
|
|
kunwind_stack_walk(kunwind_consume_fn consume_state,
|
|
|
|
|
void *cookie, struct task_struct *task,
|
|
|
|
|
struct pt_regs *regs)
|
2012-03-05 11:49:27 +00:00
|
|
|
{
|
2022-09-01 14:06:45 +01:00
|
|
|
struct stack_info stacks[] = {
|
|
|
|
|
stackinfo_get_task(task),
|
|
|
|
|
STACKINFO_CPU(irq),
|
|
|
|
|
#if defined(CONFIG_VMAP_STACK)
|
|
|
|
|
STACKINFO_CPU(overflow),
|
|
|
|
|
#endif
|
|
|
|
|
#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
|
|
|
|
|
STACKINFO_SDEI(normal),
|
|
|
|
|
STACKINFO_SDEI(critical),
|
2022-12-09 12:10:13 +01:00
|
|
|
#endif
|
|
|
|
|
#ifdef CONFIG_EFI
|
|
|
|
|
STACKINFO_EFI,
|
2022-09-01 14:06:45 +01:00
|
|
|
#endif
|
|
|
|
|
};
|
2023-11-24 11:05:10 +00:00
|
|
|
struct kunwind_state state = {
|
|
|
|
|
.common = {
|
|
|
|
|
.stacks = stacks,
|
|
|
|
|
.nr_stacks = ARRAY_SIZE(stacks),
|
|
|
|
|
},
|
2022-09-01 14:06:45 +01:00
|
|
|
};
|
2016-09-05 08:03:16 +05:30
|
|
|
|
2022-06-17 13:02:15 -05:00
|
|
|
if (regs) {
|
|
|
|
|
if (task != current)
|
|
|
|
|
return;
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_init_from_regs(&state, regs);
|
2022-06-17 13:02:15 -05:00
|
|
|
} else if (task == current) {
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_init_from_caller(&state);
|
2022-06-17 13:02:15 -05:00
|
|
|
} else {
|
2023-11-24 11:05:10 +00:00
|
|
|
kunwind_init_from_task(&state, task);
|
2022-06-17 13:02:15 -05:00
|
|
|
}
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2023-11-24 11:05:11 +00:00
|
|
|
do_kunwind(&state, consume_state, cookie);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct kunwind_consume_entry_data {
|
|
|
|
|
stack_trace_consume_fn consume_entry;
|
|
|
|
|
void *cookie;
|
|
|
|
|
};
|
|
|
|
|
|
2024-02-29 23:16:20 +00:00
|
|
|
static __always_inline bool
|
2023-11-24 11:05:11 +00:00
|
|
|
arch_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
|
|
|
|
|
{
|
|
|
|
|
struct kunwind_consume_entry_data *data = cookie;
|
|
|
|
|
return data->consume_entry(data->cookie, state->common.pc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
|
|
|
|
|
void *cookie, struct task_struct *task,
|
|
|
|
|
struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
struct kunwind_consume_entry_data data = {
|
|
|
|
|
.consume_entry = consume_entry,
|
|
|
|
|
.cookie = cookie,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
|
2012-03-05 11:49:27 +00:00
|
|
|
}
|
2023-04-11 17:29:42 +01:00
|
|
|
|
2024-02-01 12:52:24 +00:00
|
|
|
struct bpf_unwind_consume_entry_data {
|
|
|
|
|
bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp);
|
|
|
|
|
void *cookie;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
arch_bpf_unwind_consume_entry(const struct kunwind_state *state, void *cookie)
|
|
|
|
|
{
|
|
|
|
|
struct bpf_unwind_consume_entry_data *data = cookie;
|
|
|
|
|
|
|
|
|
|
return data->consume_entry(data->cookie, state->common.pc, 0,
|
|
|
|
|
state->common.fp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u64 ip, u64 sp,
|
|
|
|
|
u64 fp), void *cookie)
|
|
|
|
|
{
|
|
|
|
|
struct bpf_unwind_consume_entry_data data = {
|
|
|
|
|
.consume_entry = consume_entry,
|
|
|
|
|
.cookie = cookie,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-17 10:25:35 +01:00
|
|
|
static const char *state_source_string(const struct kunwind_state *state)
|
|
|
|
|
{
|
|
|
|
|
switch (state->source) {
|
|
|
|
|
case KUNWIND_SOURCE_FRAME: return NULL;
|
|
|
|
|
case KUNWIND_SOURCE_CALLER: return "C";
|
|
|
|
|
case KUNWIND_SOURCE_TASK: return "T";
|
|
|
|
|
case KUNWIND_SOURCE_REGS_PC: return "P";
|
|
|
|
|
default: return "U";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-17 10:25:34 +01:00
|
|
|
static bool dump_backtrace_entry(const struct kunwind_state *state, void *arg)
|
2023-04-11 17:29:42 +01:00
|
|
|
{
|
2024-10-17 10:25:35 +01:00
|
|
|
const char *source = state_source_string(state);
|
2024-10-17 10:25:36 +01:00
|
|
|
union unwind_flags flags = state->flags;
|
|
|
|
|
bool has_info = source || flags.all;
|
2023-04-11 17:29:42 +01:00
|
|
|
char *loglvl = arg;
|
2024-10-17 10:25:36 +01:00
|
|
|
|
|
|
|
|
printk("%s %pSb%s%s%s%s%s\n", loglvl,
|
2024-10-17 10:25:35 +01:00
|
|
|
(void *)state->common.pc,
|
2024-10-17 10:25:36 +01:00
|
|
|
has_info ? " (" : "",
|
2024-10-17 10:25:35 +01:00
|
|
|
source ? source : "",
|
2024-10-17 10:25:36 +01:00
|
|
|
flags.fgraph ? "F" : "",
|
|
|
|
|
flags.kretprobe ? "K" : "",
|
|
|
|
|
has_info ? ")" : "");
|
|
|
|
|
|
2023-04-11 17:29:42 +01:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
|
|
|
|
|
const char *loglvl)
|
|
|
|
|
{
|
|
|
|
|
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
|
|
|
|
|
|
|
|
|
|
if (regs && user_mode(regs))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (!tsk)
|
|
|
|
|
tsk = current;
|
|
|
|
|
|
|
|
|
|
if (!try_get_task_stack(tsk))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
printk("%sCall trace:\n", loglvl);
|
2024-10-17 10:25:34 +01:00
|
|
|
kunwind_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);
|
2023-04-11 17:29:42 +01:00
|
|
|
|
|
|
|
|
put_task_stack(tsk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
|
|
|
|
|
{
|
|
|
|
|
dump_backtrace(NULL, tsk, loglvl);
|
|
|
|
|
barrier();
|
|
|
|
|
}
|
2023-12-19 10:22:29 +08:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The struct defined for userspace stack frame in AARCH64 mode.
|
|
|
|
|
*/
|
|
|
|
|
struct frame_tail {
|
|
|
|
|
struct frame_tail __user *fp;
|
|
|
|
|
unsigned long lr;
|
|
|
|
|
} __attribute__((packed));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Get the return address for a single stackframe and return a pointer to the
|
|
|
|
|
* next frame tail.
|
|
|
|
|
*/
|
|
|
|
|
static struct frame_tail __user *
|
|
|
|
|
unwind_user_frame(struct frame_tail __user *tail, void *cookie,
|
|
|
|
|
stack_trace_consume_fn consume_entry)
|
|
|
|
|
{
|
|
|
|
|
struct frame_tail buftail;
|
|
|
|
|
unsigned long err;
|
|
|
|
|
unsigned long lr;
|
|
|
|
|
|
|
|
|
|
/* Also check accessibility of one struct frame_tail beyond */
|
|
|
|
|
if (!access_ok(tail, sizeof(buftail)))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
pagefault_disable();
|
|
|
|
|
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
|
|
|
|
|
pagefault_enable();
|
|
|
|
|
|
|
|
|
|
if (err)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
lr = ptrauth_strip_user_insn_pac(buftail.lr);
|
|
|
|
|
|
|
|
|
|
if (!consume_entry(cookie, lr))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Frame pointers should strictly progress back up the stack
|
|
|
|
|
* (towards higher addresses).
|
|
|
|
|
*/
|
|
|
|
|
if (tail >= buftail.fp)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
return buftail.fp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
|
/*
|
|
|
|
|
* The registers we're interested in are at the end of the variable
|
|
|
|
|
* length saved register structure. The fp points at the end of this
|
|
|
|
|
* structure so the address of this struct is:
|
|
|
|
|
* (struct compat_frame_tail *)(xxx->fp)-1
|
|
|
|
|
*
|
|
|
|
|
* This code has been adapted from the ARM OProfile support.
|
|
|
|
|
*/
|
|
|
|
|
struct compat_frame_tail {
|
|
|
|
|
compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
|
|
|
|
|
u32 sp;
|
|
|
|
|
u32 lr;
|
|
|
|
|
} __attribute__((packed));
|
|
|
|
|
|
|
|
|
|
static struct compat_frame_tail __user *
|
|
|
|
|
unwind_compat_user_frame(struct compat_frame_tail __user *tail, void *cookie,
|
|
|
|
|
stack_trace_consume_fn consume_entry)
|
|
|
|
|
{
|
|
|
|
|
struct compat_frame_tail buftail;
|
|
|
|
|
unsigned long err;
|
|
|
|
|
|
|
|
|
|
/* Also check accessibility of one struct frame_tail beyond */
|
|
|
|
|
if (!access_ok(tail, sizeof(buftail)))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
pagefault_disable();
|
|
|
|
|
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
|
|
|
|
|
pagefault_enable();
|
|
|
|
|
|
|
|
|
|
if (err)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
if (!consume_entry(cookie, buftail.lr))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Frame pointers should strictly progress back up the stack
|
|
|
|
|
* (towards higher addresses).
|
|
|
|
|
*/
|
|
|
|
|
if (tail + 1 >= (struct compat_frame_tail __user *)
|
|
|
|
|
compat_ptr(buftail.fp))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
|
|
|
|
|
}
|
|
|
|
|
#endif /* CONFIG_COMPAT */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
|
|
|
|
|
const struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
if (!consume_entry(cookie, regs->pc))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (!compat_user_mode(regs)) {
|
|
|
|
|
/* AARCH64 mode */
|
|
|
|
|
struct frame_tail __user *tail;
|
|
|
|
|
|
|
|
|
|
tail = (struct frame_tail __user *)regs->regs[29];
|
|
|
|
|
while (tail && !((unsigned long)tail & 0x7))
|
|
|
|
|
tail = unwind_user_frame(tail, cookie, consume_entry);
|
|
|
|
|
} else {
|
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
|
/* AARCH32 compat mode */
|
|
|
|
|
struct compat_frame_tail __user *tail;
|
|
|
|
|
|
|
|
|
|
tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
|
|
|
|
|
while (tail && !((unsigned long)tail & 0x3))
|
|
|
|
|
tail = unwind_compat_user_frame(tail, cookie, consume_entry);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|