mirror of
https://github.com/Dasharo/skiboot.git
synced 2026-03-06 14:50:44 -08:00
Sample output from Cédric:
-------------------------
[ 88.294111649,7] cpu_idle_p9 called on cpu 0x063c with pm disabled
[ 88.289365222,7] cpu_idle_p9 called on cpu 0x025f with pm disabled
[ 88.289900684,7] cpu_idle_p9 called on cpu 0x045f with pm disabled
[ 88.302621295,7] CHIPTOD: Base TFMR=0x2512000000000000
[ 88.289899701,7] cpu_idle_p9 called on cpu 0x0456 with pm disabled
LOCK ERROR: Deadlock detected @0x30402740 (state: 0x0000000400000001)
[ 88.332264757,3] ***********************************************
[ 88.332300051,3] < assert failed at core/lock.c:32 >
[ 88.332328282,3] .
[ 88.332347335,3] .
[ 88.332364894,3] .
[ 88.332377963,3] OO__)
[ 88.332395458,3] <"__/
[ 88.332412628,3] ^ ^
[ 88.332450246,3] Fatal TRAP at 00000000300286a0 .lock_error+0x64 MSR 9000000000021002
[ 88.332501812,3] CFAR : 00000000300414f4 MSR : 9000000000021002
[ 88.332536539,3] SRR0 : 00000000300286a0 SRR1 : 9000000000021002
[ 88.332574644,3] HSRR0: 0000000030020024 HSRR1: 9000000000001000
[ 88.332610635,3] DSISR: 00000000 DAR : 0000000000000000
[ 88.332650628,3] LR : 0000000030028690 CTR : 00000000300f9fa0
[ 88.332684451,3] CR : 20002000 XER : 00000000
[ 88.332712767,3] GPR00: 0000000030028690 GPR16: 0000000032c98000
[ 88.332748046,3] GPR01: 0000000032c9b0a0 GPR17: 0000000000000000
[ 88.332784060,3] GPR02: 0000000030169d00 GPR18: 0000000000000000
[ 88.332822091,3] GPR03: 0000000032c9b310 GPR19: 0000000000000000
[ 88.332861357,3] GPR04: 0000000030041480 GPR20: 0000000000000000
[ 88.332897229,3] GPR05: 0000000000000000 GPR21: 0000000000000000
[ 88.332937051,3] GPR06: 0000000000000010 GPR22: 0000000000000000
[ 88.332968463,3] GPR07: 0000000000000000 GPR23: 0000000000000000
[ 88.333007333,3] GPR08: 000000000002cbb5 GPR24: 0000000000000000
[ 88.333041971,3] GPR09: 0000000000000000 GPR25: 0000000000000000
[ 88.333081073,3] GPR10: 0000000000000000 GPR26: 0000000000000003
[ 88.333114301,3] GPR11: 3839616263646566 GPR27: 0000000000000211
[ 88.333156040,3] GPR12: 0000000020002000 GPR28: 000000003042a134
[ 88.333189222,3] GPR13: 0000000000000000 GPR29: 0000000030402740
[ 88.333225638,3] GPR14: 0000000000000000 GPR30: 0000000000000001
[ 88.333259730,3] GPR15: 0000000000000000 GPR31: 0000000000000000
CPU 0211 Backtrace:
S: 0000000032c9b3b0 R: 0000000030028690 .lock_error+0x54
S: 0000000032c9b440 R: 0000000030028828 .add_lock_request+0xd0
S: 0000000032c9b4f0 R: 0000000030028a9c .lock_caller+0x8c
S: 0000000032c9b5a0 R: 0000000030021b30 .__mcount_stack_check+0x70
S: 0000000032c9b650 R: 00000000300fabb0 .list_check_node+0x1c
S: 0000000032c9b6f0 R: 00000000300fac98 .list_check+0x38
S: 0000000032c9b790 R: 00000000300289bc .try_lock_caller+0xac
S: 0000000032c9b830 R: 0000000030028ad8 .lock_caller+0xc8
S: 0000000032c9b8e0 R: 0000000030028d74 .lock_recursive_caller+0x54
S: 0000000032c9b980 R: 0000000030020cb8 .console_write+0x48
S: 0000000032c9ba30 R: 00000000300445a8 .vprlog+0xc8
S: 0000000032c9bc20 R: 0000000030044630 ._prlog+0x50
S: 0000000032c9bcb0 R: 0000000030029204 .cpu_idle_p9+0x74
S: 0000000032c9bd40 R: 0000000030029628 .cpu_idle_pm+0x4c
S: 0000000032c9bde0 R: 0000000030023fe0 .__secondary_cpu_entry+0xa0
S: 0000000032c9be70 R: 0000000030024034 .secondary_cpu_entry+0x40
S: 0000000032c9bf00 R: 0000000030003290 secondary_wait+0x8c
CPU 0x4:
opal_run_pollers ->
check_stacks -> takes stack_check_lock lock
prlog ->
console_write -> waits for con_lock
CPU 0x211
cpu_idle_p9 ->
prlog ->
console_write -> Takes con_lock lock
list_check_node -> tries to take stack_check_lock and hits deadlock.
I think we don't need to hold `stack_check_lock` while printing
backtraces. Instead it makes sense to hold backtrace lock (bt_lock)
and print output.
Reported-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Tested-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
267 lines
6.6 KiB
C
267 lines
6.6 KiB
C
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
|
|
/*
|
|
* Create/Print backtraces, check stack usage etc.
|
|
*
|
|
* Copyright 2013-2019 IBM Corp.
|
|
*/
|
|
|
|
#include <skiboot.h>
|
|
#include <processor.h>
|
|
#include <cpu.h>
|
|
#include <stack.h>
|
|
#include <mem_region.h>
|
|
#include <unistd.h>
|
|
#include <lock.h>
|
|
|
|
#define STACK_BUF_ENTRIES 60
|
|
static struct bt_entry bt_buf[STACK_BUF_ENTRIES];
|
|
|
|
/* Dumps backtrace to buffer */
|
|
static void __nomcount __backtrace_create(struct bt_entry *entries,
|
|
unsigned int max_ents,
|
|
struct bt_metadata *metadata,
|
|
struct stack_frame *eframe)
|
|
{
|
|
unsigned long *fp = (unsigned long *)eframe;
|
|
unsigned long top_adj = top_of_ram;
|
|
|
|
/* Assume one stack for early backtraces */
|
|
if (top_of_ram == SKIBOOT_BASE + SKIBOOT_SIZE)
|
|
top_adj = top_of_ram + STACK_SIZE;
|
|
|
|
metadata->ents = 0;
|
|
while (max_ents) {
|
|
fp = (unsigned long *)fp[0];
|
|
if (!fp || (unsigned long)fp > top_adj)
|
|
break;
|
|
eframe = (struct stack_frame *)fp;
|
|
if (eframe->magic == STACK_INT_MAGIC) {
|
|
entries->exception_type = eframe->type;
|
|
entries->exception_pc = eframe->pc;
|
|
} else {
|
|
entries->exception_type = 0;
|
|
}
|
|
entries->sp = (unsigned long)fp;
|
|
entries->pc = fp[2];
|
|
entries++;
|
|
metadata->ents++;
|
|
max_ents--;
|
|
}
|
|
|
|
metadata->r1_caller = eframe->gpr[1];
|
|
|
|
if (fp)
|
|
metadata->token = eframe->gpr[0];
|
|
else
|
|
metadata->token = -1UL;
|
|
|
|
metadata->pir = mfspr(SPR_PIR);
|
|
}
|
|
|
|
void __nomcount backtrace_create(struct bt_entry *entries,
|
|
unsigned int max_ents,
|
|
struct bt_metadata *metadata)
|
|
{
|
|
unsigned long *fp = __builtin_frame_address(0);
|
|
struct stack_frame *eframe = (struct stack_frame *)fp;
|
|
|
|
__backtrace_create(entries, max_ents, metadata, eframe);
|
|
}
|
|
|
|
void backtrace_print(struct bt_entry *entries, struct bt_metadata *metadata,
|
|
char *out_buf, unsigned int *len, bool symbols)
|
|
{
|
|
static char bt_text_buf[4096];
|
|
int i, l = 0, max;
|
|
char *buf = out_buf;
|
|
unsigned long bottom, top, normal_top, tbot, ttop;
|
|
char mark;
|
|
|
|
if (!out_buf) {
|
|
buf = bt_text_buf;
|
|
max = sizeof(bt_text_buf) - 16;
|
|
} else
|
|
max = *len - 1;
|
|
|
|
bottom = cpu_stack_bottom(metadata->pir);
|
|
normal_top = cpu_stack_top(metadata->pir);
|
|
top = cpu_emergency_stack_top(metadata->pir);
|
|
tbot = SKIBOOT_BASE;
|
|
ttop = (unsigned long)&_etext;
|
|
|
|
l += snprintf(buf, max, "CPU %04lx Backtrace:\n", metadata->pir);
|
|
for (i = 0; i < metadata->ents && l < max; i++) {
|
|
if (entries->sp < bottom || entries->sp > top)
|
|
mark = '!';
|
|
else if (entries->sp > normal_top)
|
|
mark = 'E';
|
|
else if (entries->pc < tbot || entries->pc > ttop)
|
|
mark = '*';
|
|
else
|
|
mark = ' ';
|
|
l += snprintf(buf + l, max - l,
|
|
" S: %016lx R: %016lx %c ",
|
|
entries->sp, entries->pc, mark);
|
|
if (symbols)
|
|
l += snprintf_symbol(buf + l, max - l, entries->pc);
|
|
l += snprintf(buf + l, max - l, "\n");
|
|
if (entries->exception_type) {
|
|
l += snprintf(buf + l, max - l,
|
|
" --- Interrupt 0x%lx at %016lx ---\n",
|
|
entries->exception_type, entries->exception_pc);
|
|
}
|
|
entries++;
|
|
}
|
|
if (metadata->token <= OPAL_LAST)
|
|
l += snprintf(buf + l, max - l,
|
|
" --- OPAL call token: 0x%lx caller R1: 0x%016lx ---\n",
|
|
metadata->token, metadata->r1_caller);
|
|
else if (metadata->token == -1UL)
|
|
l += snprintf(buf + l, max - l, " --- OPAL boot ---\n");
|
|
if (!out_buf)
|
|
write(stdout->fd, bt_text_buf, l);
|
|
buf[l++] = 0;
|
|
if (len)
|
|
*len = l;
|
|
}
|
|
|
|
/*
|
|
* To ensure that we always get backtrace output we bypass the usual console
|
|
* locking paths. The downside is that when multiple threads need to print
|
|
* a backtrace they garble each other. To prevent this we use a seperate
|
|
* lock to serialise printing of the dumps.
|
|
*/
|
|
static struct lock bt_lock = LOCK_UNLOCKED;
|
|
|
|
void backtrace(void)
|
|
{
|
|
struct bt_metadata metadata;
|
|
|
|
lock(&bt_lock);
|
|
|
|
backtrace_create(bt_buf, STACK_BUF_ENTRIES, &metadata);
|
|
backtrace_print(bt_buf, &metadata, NULL, NULL, true);
|
|
|
|
unlock(&bt_lock);
|
|
}
|
|
|
|
void backtrace_r1(uint64_t r1)
|
|
{
|
|
struct bt_metadata metadata;
|
|
|
|
lock(&bt_lock);
|
|
|
|
__backtrace_create(bt_buf, STACK_BUF_ENTRIES, &metadata, (struct stack_frame *)r1);
|
|
backtrace_print(bt_buf, &metadata, NULL, NULL, true);
|
|
|
|
unlock(&bt_lock);
|
|
}
|
|
|
|
void __nomcount __stack_chk_fail(void);
|
|
void __nomcount __stack_chk_fail(void)
|
|
{
|
|
static bool failed_once;
|
|
|
|
if (failed_once)
|
|
return;
|
|
failed_once = true;
|
|
prlog(PR_EMERG, "Stack corruption detected !\n");
|
|
abort();
|
|
}
|
|
|
|
#ifdef STACK_CHECK_ENABLED
|
|
|
|
static int64_t lowest_stack_mark = LONG_MAX;
|
|
static struct lock stack_check_lock = LOCK_UNLOCKED;
|
|
|
|
void __nomcount __mcount_stack_check(uint64_t sp, uint64_t lr);
|
|
void __nomcount __mcount_stack_check(uint64_t sp, uint64_t lr)
|
|
{
|
|
struct cpu_thread *c = this_cpu();
|
|
uint64_t base = (uint64_t)c;
|
|
uint64_t bot = base + sizeof(struct cpu_thread);
|
|
int64_t mark = sp - bot;
|
|
uint64_t top = base + NORMAL_STACK_SIZE;
|
|
|
|
/*
|
|
* Don't check the emergency stack just yet.
|
|
*/
|
|
if (c->in_opal_call > 1)
|
|
return;
|
|
|
|
/*
|
|
* Don't re-enter on this CPU or don't enter at all if somebody
|
|
* has spotted an overflow
|
|
*/
|
|
if (c->in_mcount)
|
|
return;
|
|
c->in_mcount = true;
|
|
|
|
/* Capture lowest stack for this thread */
|
|
if (mark < c->stack_bot_mark) {
|
|
lock(&stack_check_lock);
|
|
c->stack_bot_mark = mark;
|
|
c->stack_bot_pc = lr;
|
|
c->stack_bot_tok = c->current_token;
|
|
backtrace_create(c->stack_bot_bt, CPU_BACKTRACE_SIZE,
|
|
&c->stack_bot_bt_metadata);
|
|
unlock(&stack_check_lock);
|
|
|
|
if (mark < STACK_WARNING_GAP) {
|
|
prlog(PR_EMERG, "CPU %04x Stack usage danger !"
|
|
" pc=%08llx sp=%08llx (gap=%lld) token=%lld\n",
|
|
c->pir, lr, sp, mark, c->current_token);
|
|
}
|
|
}
|
|
|
|
/* Stack is within bounds? */
|
|
if (sp >= (bot + STACK_SAFETY_GAP) && sp < top) {
|
|
c->in_mcount = false;
|
|
return;
|
|
}
|
|
|
|
prlog(PR_EMERG, "CPU %04x Stack overflow detected !"
|
|
" pc=%08llx sp=%08llx (gap=%lld) token=%lld\n",
|
|
c->pir, lr, sp, mark, c->current_token);
|
|
abort();
|
|
}
|
|
|
|
void check_stacks(void)
|
|
{
|
|
struct cpu_thread *c, *lowest = NULL;
|
|
|
|
/* We should never call that from mcount */
|
|
assert(!this_cpu()->in_mcount);
|
|
|
|
/* Mark ourselves "in_mcount" to avoid deadlock on stack
|
|
* check lock
|
|
*/
|
|
this_cpu()->in_mcount = true;
|
|
|
|
for_each_cpu(c) {
|
|
if (!c->stack_bot_mark ||
|
|
c->stack_bot_mark >= lowest_stack_mark)
|
|
continue;
|
|
lock(&stack_check_lock);
|
|
if (c->stack_bot_mark < lowest_stack_mark) {
|
|
lowest = c;
|
|
lowest_stack_mark = c->stack_bot_mark;
|
|
}
|
|
unlock(&stack_check_lock);
|
|
}
|
|
if (lowest) {
|
|
lock(&bt_lock);
|
|
prlog(PR_NOTICE, "CPU %04x lowest stack mark %lld bytes left"
|
|
" pc=%08llx token=%lld\n",
|
|
lowest->pir, lowest->stack_bot_mark, lowest->stack_bot_pc,
|
|
lowest->stack_bot_tok);
|
|
backtrace_print(lowest->stack_bot_bt,
|
|
&lowest->stack_bot_bt_metadata,
|
|
NULL, NULL, true);
|
|
unlock(&bt_lock);
|
|
}
|
|
|
|
this_cpu()->in_mcount = false;
|
|
}
|
|
#endif /* STACK_CHECK_ENABLED */
|