LoongArch: Add ORC stack unwinder support

The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
similar in concept to a DWARF unwinder. The difference is that the format
of the ORC data is much simpler than DWARF, which in turn allows the ORC
unwinder to be much simpler and faster.

The ORC data consists of unwind tables which are generated by objtool.
After analyzing all the code paths of a .o file, it determines information
about the stack state at each instruction address in the file and outputs
that information to the .orc_unwind and .orc_unwind_ip sections.

The per-object ORC sections are combined at link time and are sorted and
post-processed at boot time. The unwinder uses the resulting data to
correlate instruction addresses with their stack states at run time.

Most of the logic are similar with x86, in order to get ra info before ra
is saved into stack, add ra_reg and ra_offset into orc_entry. At the same
time, modify some arch-specific code to silence the objtool warnings.

Co-developed-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Jinyang He <hejinyang@loongson.cn>
Co-developed-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
This commit is contained in:
Tiezhu Yang
2024-03-11 22:23:47 +08:00
committed by Huacai Chen
parent e91c5e4c21
commit cb8a2ef084
37 changed files with 875 additions and 42 deletions

View File

@@ -136,6 +136,7 @@ config LOONGARCH
select HAVE_KVM
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -148,6 +149,7 @@ config LOONGARCH
select HAVE_SAMPLE_FTRACE_DIRECT
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_SETUP_PER_CPU_AREA if NUMA
select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_TIF_NOHZ

View File

@@ -26,4 +26,15 @@ config UNWINDER_PROLOGUE
Some of the addresses it reports may be incorrect (but better than the
Guess unwinder).
config UNWINDER_ORC
bool "ORC unwinder"
select OBJTOOL
help
This option enables the ORC (Oops Rewind Capability) unwinder for
unwinding kernel stack traces. It uses a custom data format which is
a simplified version of the DWARF Call Frame Information standard.
Enabling this option will increase the kernel's runtime memory usage
by roughly 2-4MB, depending on your kernel config.
endchoice

View File

@@ -26,6 +26,18 @@ endif
32bit-emul = elf32loongarch
64bit-emul = elf64loongarch
ifdef CONFIG_UNWINDER_ORC
orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h
orc_hash_sh := $(srctree)/scripts/orc_hash.sh
targets += $(orc_hash_h)
quiet_cmd_orc_hash = GEN $@
cmd_orc_hash = mkdir -p $(dir $@); \
$(CONFIG_SHELL) $(orc_hash_sh) < $< > $@
$(orc_hash_h): $(srctree)/arch/loongarch/include/asm/orc_types.h $(orc_hash_sh) FORCE
$(call if_changed,orc_hash)
archprepare: $(orc_hash_h)
endif
ifdef CONFIG_DYNAMIC_FTRACE
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
@@ -72,8 +84,6 @@ KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access)
KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data)
KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data)
KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data)
KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
else
cflags-y += $(call cc-option,-mno-explicit-relocs)
KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel
@@ -82,6 +92,15 @@ KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs
KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
endif
KBUILD_AFLAGS += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
KBUILD_CFLAGS += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
KBUILD_AFLAGS += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
KBUILD_CFLAGS += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
ifdef CONFIG_OBJTOOL
KBUILD_CFLAGS += -fno-jump-tables
endif
KBUILD_RUSTFLAGS_MODULE += -Crelocation-model=pic
ifeq ($(CONFIG_RELOCATABLE),y)

View File

@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
generated-y += orc_hash.h
generic-y += dma-contiguous.h
generic-y += mcs_spinlock.h
generic-y += parport.h

View File

@@ -44,6 +44,7 @@
do { \
instrumentation_begin(); \
__BUG_FLAGS(BUGFLAG_WARNING|(flags)); \
annotate_reachable(); \
instrumentation_end(); \
} while (0)

View File

@@ -6,6 +6,8 @@
#include <asm/ptrace.h>
#include <linux/kprobes.h>
extern void *exception_table[];
void show_registers(struct pt_regs *regs);
asmlinkage void cache_parity_error(void);

View File

@@ -6,6 +6,7 @@
#define _ASM_MODULE_H
#include <asm/inst.h>
#include <asm/orc_types.h>
#include <asm-generic/module.h>
#define RELA_STACK_DEPTH 16
@@ -21,6 +22,12 @@ struct mod_arch_specific {
struct mod_section plt;
struct mod_section plt_idx;
#ifdef CONFIG_UNWINDER_ORC
unsigned int num_orcs;
int *orc_unwind_ip;
struct orc_entry *orc_unwind;
#endif
/* For CONFIG_DYNAMIC_FTRACE */
struct plt_entry *ftrace_trampolines;
};

View File

@@ -0,0 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ORC_HEADER_H
#define _ORC_HEADER_H
#include <linux/types.h>
#include <linux/compiler.h>
#include <asm/orc_hash.h>
/*
* The header is currently a 20-byte hash of the ORC entry definition; see
* scripts/orc_hash.sh.
*/
#define ORC_HEADER \
__used __section(".orc_header") __aligned(4) \
static const u8 orc_header[] = { ORC_HASH }
#endif /* _ORC_HEADER_H */

View File

@@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ORC_LOOKUP_H
#define _ORC_LOOKUP_H
/*
* This is a lookup table for speeding up access to the .orc_unwind table.
* Given an input address offset, the corresponding lookup table entry
* specifies a subset of the .orc_unwind table to search.
*
* Each block represents the end of the previous range and the start of the
* next range. An extra block is added to give the last range an end.
*
* The block size should be a power of 2 to avoid a costly 'div' instruction.
*
* A block size of 256 was chosen because it roughly doubles unwinder
* performance while only adding ~5% to the ORC data footprint.
*/
#define LOOKUP_BLOCK_ORDER 8
#define LOOKUP_BLOCK_SIZE (1 << LOOKUP_BLOCK_ORDER)
#ifndef LINKER_SCRIPT
extern unsigned int orc_lookup[];
extern unsigned int orc_lookup_end[];
#define LOOKUP_START_IP (unsigned long)_stext
#define LOOKUP_STOP_IP (unsigned long)_etext
#endif /* LINKER_SCRIPT */
#endif /* _ORC_LOOKUP_H */

View File

@@ -0,0 +1,58 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ORC_TYPES_H
#define _ORC_TYPES_H
#include <linux/types.h>
/*
* The ORC_REG_* registers are base registers which are used to find other
* registers on the stack.
*
* ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
* address of the previous frame: the caller's SP before it called the current
* function.
*
* ORC_REG_UNDEFINED means the corresponding register's value didn't change in
* the current frame.
*
* The most commonly used base registers are SP and FP -- which the previous SP
* is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is
* usually based on.
*
* The rest of the base registers are needed for special cases like entry code
* and GCC realigned stacks.
*/
#define ORC_REG_UNDEFINED 0
#define ORC_REG_PREV_SP 1
#define ORC_REG_SP 2
#define ORC_REG_FP 3
#define ORC_REG_MAX 4
#define ORC_TYPE_UNDEFINED 0
#define ORC_TYPE_END_OF_STACK 1
#define ORC_TYPE_CALL 2
#define ORC_TYPE_REGS 3
#define ORC_TYPE_REGS_PARTIAL 4
#ifndef __ASSEMBLY__
/*
* This struct is more or less a vastly simplified version of the DWARF Call
* Frame Information standard. It contains only the necessary parts of DWARF
* CFI, simplified for ease of access by the in-kernel unwinder. It tells the
* unwinder how to find the previous SP and FP (and sometimes entry regs) on
* the stack for a given code address. Each instance of the struct corresponds
* to one or more code locations.
*/
struct orc_entry {
s16 sp_offset;
s16 fp_offset;
s16 ra_offset;
unsigned int sp_reg:4;
unsigned int fp_reg:4;
unsigned int ra_reg:4;
unsigned int type:3;
unsigned int signal:1;
};
#endif /* __ASSEMBLY__ */
#endif /* _ORC_TYPES_H */

View File

@@ -13,6 +13,7 @@
#include <asm/asm-offsets.h>
#include <asm/loongarch.h>
#include <asm/thread_info.h>
#include <asm/unwind_hints.h>
/* Make the addition of cfi info a little easier. */
.macro cfi_rel_offset reg offset=0 docfi=0
@@ -162,6 +163,7 @@
li.w t0, CSR_CRMD_WE
csrxchg t0, t0, LOONGARCH_CSR_CRMD
#endif
UNWIND_HINT_REGS
.endm
.macro SAVE_ALL docfi=0
@@ -219,6 +221,7 @@
.macro RESTORE_SP_AND_RET docfi=0
cfi_ld sp, PT_R3, \docfi
UNWIND_HINT_FUNC
ertn
.endm

View File

@@ -16,6 +16,7 @@
enum unwinder_type {
UNWINDER_GUESS,
UNWINDER_PROLOGUE,
UNWINDER_ORC,
};
struct unwind_state {
@@ -24,7 +25,7 @@ struct unwind_state {
struct task_struct *task;
bool first, error, reset;
int graph_idx;
unsigned long sp, pc, ra;
unsigned long sp, fp, pc, ra;
};
bool default_next_frame(struct unwind_state *state);
@@ -61,14 +62,17 @@ static __always_inline void __unwind_start(struct unwind_state *state,
state->sp = regs->regs[3];
state->pc = regs->csr_era;
state->ra = regs->regs[1];
state->fp = regs->regs[22];
} else if (task && task != current) {
state->sp = thread_saved_fp(task);
state->pc = thread_saved_ra(task);
state->ra = 0;
state->fp = 0;
} else {
state->sp = (unsigned long)__builtin_frame_address(0);
state->pc = (unsigned long)__builtin_return_address(0);
state->ra = 0;
state->fp = 0;
}
state->task = task;
get_stack_info(state->sp, state->task, &state->stack_info);
@@ -77,6 +81,18 @@ static __always_inline void __unwind_start(struct unwind_state *state,
static __always_inline unsigned long __unwind_get_return_address(struct unwind_state *state)
{
return unwind_done(state) ? 0 : state->pc;
if (unwind_done(state))
return 0;
return __kernel_text_address(state->pc) ? state->pc : 0;
}
#ifdef CONFIG_UNWINDER_ORC
void unwind_init(void);
void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size);
#else
static inline void unwind_init(void) {}
static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {}
#endif
#endif /* _ASM_UNWIND_H */

View File

@@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_LOONGARCH_UNWIND_HINTS_H
#define _ASM_LOONGARCH_UNWIND_HINTS_H
#include <linux/objtool.h>
#include <asm/orc_types.h>
#ifdef __ASSEMBLY__
.macro UNWIND_HINT_UNDEFINED
UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED
.endm
.macro UNWIND_HINT_END_OF_STACK
UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK
.endm
.macro UNWIND_HINT_REGS
UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_REGS
.endm
.macro UNWIND_HINT_FUNC
UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL
.endm
#endif /* __ASSEMBLY__ */
#endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */

View File

@@ -3,6 +3,8 @@
# Makefile for the Linux/LoongArch kernel.
#
OBJECT_FILES_NON_STANDARD_head.o := y
extra-y := vmlinux.lds
obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
@@ -21,6 +23,7 @@ obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o
CFLAGS_module.o += $(call cc-option,-Wno-override-init,)
CFLAGS_syscall.o += $(call cc-option,-Wno-override-init,)
CFLAGS_traps.o += $(call cc-option,-Wno-override-init,)
CFLAGS_perf_event.o += $(call cc-option,-Wno-override-init,)
ifdef CONFIG_FUNCTION_TRACER
@@ -62,6 +65,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o

View File

@@ -14,11 +14,13 @@
#include <asm/regdef.h>
#include <asm/stackframe.h>
#include <asm/thread_info.h>
#include <asm/unwind_hints.h>
.text
.cfi_sections .debug_frame
.align 5
SYM_CODE_START(handle_syscall)
UNWIND_HINT_UNDEFINED
csrrd t0, PERCPU_BASE_KS
la.pcrel t1, kernelsp
add.d t1, t1, t0
@@ -57,6 +59,7 @@ SYM_CODE_START(handle_syscall)
cfi_st fp, PT_R22
SAVE_STATIC
UNWIND_HINT_REGS
#ifdef CONFIG_KGDB
li.w t1, CSR_CRMD_WE
@@ -75,6 +78,7 @@ SYM_CODE_END(handle_syscall)
_ASM_NOKPROBE(handle_syscall)
SYM_CODE_START(ret_from_fork)
UNWIND_HINT_REGS
bl schedule_tail # a0 = struct task_struct *prev
move a0, sp
bl syscall_exit_to_user_mode
@@ -84,6 +88,7 @@ SYM_CODE_START(ret_from_fork)
SYM_CODE_END(ret_from_fork)
SYM_CODE_START(ret_from_kernel_thread)
UNWIND_HINT_REGS
bl schedule_tail # a0 = struct task_struct *prev
move a0, s1
jirl ra, s0, 0

View File

@@ -15,6 +15,7 @@
#include <asm/fpregdef.h>
#include <asm/loongarch.h>
#include <asm/regdef.h>
#include <asm/unwind_hints.h>
#define FPU_REG_WIDTH 8
#define LSX_REG_WIDTH 16
@@ -526,3 +527,9 @@ SYM_FUNC_END(_restore_lasx_context)
.L_fpu_fault:
li.w a0, -EFAULT # failure
jr ra
#ifdef CONFIG_CPU_HAS_LBT
STACK_FRAME_NON_STANDARD _restore_fp
STACK_FRAME_NON_STANDARD _restore_lsx
STACK_FRAME_NON_STANDARD _restore_lasx
#endif

View File

@@ -32,6 +32,7 @@ SYM_FUNC_START(__arch_cpu_idle)
SYM_FUNC_END(__arch_cpu_idle)
SYM_CODE_START(handle_vint)
UNWIND_HINT_UNDEFINED
BACKUP_T0T1
SAVE_ALL
la_abs t1, __arch_cpu_idle
@@ -49,6 +50,7 @@ SYM_CODE_START(handle_vint)
SYM_CODE_END(handle_vint)
SYM_CODE_START(except_vec_cex)
UNWIND_HINT_UNDEFINED
b cache_parity_error
SYM_CODE_END(except_vec_cex)
@@ -67,6 +69,7 @@ SYM_CODE_END(except_vec_cex)
.macro BUILD_HANDLER exception handler prep
.align 5
SYM_CODE_START(handle_\exception)
UNWIND_HINT_UNDEFINED
666:
BACKUP_T0T1
SAVE_ALL
@@ -77,7 +80,9 @@ SYM_CODE_END(except_vec_cex)
668:
RESTORE_ALL_AND_RET
SYM_CODE_END(handle_\exception)
.pushsection ".data", "aw", %progbits
SYM_DATA(unwind_hint_\exception, .word 668b - 666b)
.popsection
.endm
BUILD_HANDLER ade ade badv
@@ -94,6 +99,7 @@ SYM_CODE_END(except_vec_cex)
BUILD_HANDLER reserved reserved none /* others */
SYM_CODE_START(handle_sys)
UNWIND_HINT_UNDEFINED
la_abs t0, handle_syscall
jr t0
SYM_CODE_END(handle_sys)

View File

@@ -11,6 +11,7 @@
#include <asm/asm-offsets.h>
#include <asm/errno.h>
#include <asm/regdef.h>
#include <asm/unwind_hints.h>
#define SCR_REG_WIDTH 8
@@ -153,3 +154,5 @@ SYM_FUNC_END(_restore_ftop_context)
.L_lbt_fault:
li.w a0, -EFAULT # failure
jr ra
STACK_FRAME_NON_STANDARD _restore_ftop_context

View File

@@ -73,6 +73,7 @@ SYM_FUNC_START(ftrace_stub)
SYM_FUNC_END(ftrace_stub)
SYM_CODE_START(ftrace_common)
UNWIND_HINT_UNDEFINED
PTR_ADDI a0, ra, -8 /* arg0: ip */
move a1, t0 /* arg1: parent_ip */
la.pcrel t1, function_trace_op
@@ -113,12 +114,14 @@ ftrace_common_return:
SYM_CODE_END(ftrace_common)
SYM_CODE_START(ftrace_caller)
UNWIND_HINT_UNDEFINED
ftrace_regs_entry allregs=0
b ftrace_common
SYM_CODE_END(ftrace_caller)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
SYM_CODE_START(ftrace_regs_caller)
UNWIND_HINT_UNDEFINED
ftrace_regs_entry allregs=1
b ftrace_common
SYM_CODE_END(ftrace_regs_caller)
@@ -126,6 +129,7 @@ SYM_CODE_END(ftrace_regs_caller)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
SYM_CODE_START(ftrace_graph_caller)
UNWIND_HINT_UNDEFINED
PTR_L a0, sp, PT_ERA
PTR_ADDI a0, a0, -8 /* arg0: self_addr */
PTR_ADDI a1, sp, PT_R1 /* arg1: parent */
@@ -134,6 +138,7 @@ SYM_CODE_START(ftrace_graph_caller)
SYM_CODE_END(ftrace_graph_caller)
SYM_CODE_START(return_to_handler)
UNWIND_HINT_UNDEFINED
/* Save return value regs */
PTR_ADDI sp, sp, -FGRET_REGS_SIZE
PTR_S a0, sp, FGRET_REGS_A0
@@ -155,6 +160,7 @@ SYM_CODE_END(return_to_handler)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
SYM_CODE_START(ftrace_stub_direct_tramp)
UNWIND_HINT_UNDEFINED
jr t0
SYM_CODE_END(ftrace_stub_direct_tramp)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */

View File

@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <asm/alternative.h>
#include <asm/inst.h>
#include <asm/unwind.h>
static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top)
{
@@ -515,15 +516,28 @@ static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs, struct module *mod)
{
const Elf_Shdr *s, *se;
const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
const Elf_Shdr *s, *alt = NULL, *orc = NULL, *orc_ip = NULL, *ftrace = NULL;
for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
if (!strcmp(".altinstructions", secstrs + s->sh_name))
apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size);
alt = s;
if (!strcmp(".orc_unwind", secstrs + s->sh_name))
orc = s;
if (!strcmp(".orc_unwind_ip", secstrs + s->sh_name))
orc_ip = s;
if (!strcmp(".ftrace_trampoline", secstrs + s->sh_name))
module_init_ftrace_plt(hdr, s, mod);
ftrace = s;
}
if (alt)
apply_alternatives((void *)alt->sh_addr, (void *)alt->sh_addr + alt->sh_size);
if (orc && orc_ip)
unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, (void *)orc->sh_addr, orc->sh_size);
if (ftrace)
module_init_ftrace_plt(hdr, ftrace, mod);
return 0;
}

Some files were not shown because too many files have changed in this diff Show More