mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
trace: Add timerlat tracer
The timerlat tracer aims to help the preemptive kernel developers to
found souces of wakeup latencies of real-time threads. Like cyclictest,
the tracer sets a periodic timer that wakes up a thread. The thread then
computes a *wakeup latency* value as the difference between the *current
time* and the *absolute time* that the timer was set to expire. The main
goal of timerlat is tracing in such a way to help kernel developers.
Usage
Write the ASCII text "timerlat" into the current_tracer file of the
tracing system (generally mounted at /sys/kernel/tracing).
For example:
[root@f32 ~]# cd /sys/kernel/tracing/
[root@f32 tracing]# echo timerlat > current_tracer
It is possible to follow the trace by reading the trace trace file:
[root@f32 tracing]# cat trace
# tracer: timerlat
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# || /
# |||| ACTIVATION
# TASK-PID CPU# |||| TIMESTAMP ID CONTEXT LATENCY
# | | | |||| | | | |
<idle>-0 [000] d.h1 54.029328: #1 context irq timer_latency 932 ns
<...>-867 [000] .... 54.029339: #1 context thread timer_latency 11700 ns
<idle>-0 [001] dNh1 54.029346: #1 context irq timer_latency 2833 ns
<...>-868 [001] .... 54.029353: #1 context thread timer_latency 9820 ns
<idle>-0 [000] d.h1 54.030328: #2 context irq timer_latency 769 ns
<...>-867 [000] .... 54.030330: #2 context thread timer_latency 3070 ns
<idle>-0 [001] d.h1 54.030344: #2 context irq timer_latency 935 ns
<...>-868 [001] .... 54.030347: #2 context thread timer_latency 4351 ns
The tracer creates a per-cpu kernel thread with real-time priority that
prints two lines at every activation. The first is the *timer latency*
observed at the *hardirq* context before the activation of the thread.
The second is the *timer latency* observed by the thread, which is the
same level that cyclictest reports. The ACTIVATION ID field
serves to relate the *irq* execution to its respective *thread* execution.
The irq/thread splitting is important to clarify at which context
the unexpected high value is coming from. The *irq* context can be
delayed by hardware related actions, such as SMIs, NMIs, IRQs
or by a thread masking interrupts. Once the timer happens, the delay
can also be influenced by blocking caused by threads. For example, by
postponing the scheduler execution via preempt_disable(), by the
scheduler execution, or by masking interrupts. Threads can
also be delayed by the interference from other threads and IRQs.
The timerlat can also take advantage of the osnoise: traceevents.
For example:
[root@f32 ~]# cd /sys/kernel/tracing/
[root@f32 tracing]# echo timerlat > current_tracer
[root@f32 tracing]# echo osnoise > set_event
[root@f32 tracing]# echo 25 > osnoise/stop_tracing_total_us
[root@f32 tracing]# tail -10 trace
cc1-87882 [005] d..h... 548.771078: #402268 context irq timer_latency 1585 ns
cc1-87882 [005] dNLh1.. 548.771082: irq_noise: local_timer:236 start 548.771077442 duration 4597 ns
cc1-87882 [005] dNLh2.. 548.771083: irq_noise: reschedule:253 start 548.771083017 duration 56 ns
cc1-87882 [005] dNLh2.. 548.771086: irq_noise: call_function_single:251 start 548.771083811 duration 2048 ns
cc1-87882 [005] dNLh2.. 548.771088: irq_noise: call_function_single:251 start 548.771086814 duration 1495 ns
cc1-87882 [005] dNLh2.. 548.771091: irq_noise: call_function_single:251 start 548.771089194 duration 1558 ns
cc1-87882 [005] dNLh2.. 548.771094: irq_noise: call_function_single:251 start 548.771091719 duration 1932 ns
cc1-87882 [005] dNLh2.. 548.771096: irq_noise: call_function_single:251 start 548.771094696 duration 1050 ns
cc1-87882 [005] d...3.. 548.771101: thread_noise: cc1:87882 start 548.771078243 duration 10909 ns
timerlat/5-1035 [005] ....... 548.771103: #402268 context thread timer_latency 25960 ns
For further information see: Documentation/trace/timerlat-tracer.rst
Link: https://lkml.kernel.org/r/71f18efc013e1194bcaea1e54db957de2b19ba62.1624372313.git.bristot@redhat.com
Cc: Phil Auld <pauld@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kate Carcia <kcarcia@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexandre Chartre <alexandre.chartre@oracle.com>
Cc: Clark Willaims <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
This commit is contained in:
committed by
Steven Rostedt (VMware)
parent
bce29ac9ce
commit
a955d7eac1
@@ -24,6 +24,7 @@ Linux Tracing Technologies
|
||||
boottime-trace
|
||||
hwlat_detector
|
||||
osnoise-tracer
|
||||
timerlat-tracer
|
||||
intel_th
|
||||
ring-buffer-design
|
||||
stm
|
||||
|
||||
181
Documentation/trace/timerlat-tracer.rst
Normal file
181
Documentation/trace/timerlat-tracer.rst
Normal file
@@ -0,0 +1,181 @@
|
||||
###############
|
||||
Timerlat tracer
|
||||
###############
|
||||
|
||||
The timerlat tracer aims to help the preemptive kernel developers to
|
||||
find souces of wakeup latencies of real-time threads. Like cyclictest,
|
||||
the tracer sets a periodic timer that wakes up a thread. The thread then
|
||||
computes a *wakeup latency* value as the difference between the *current
|
||||
time* and the *absolute time* that the timer was set to expire. The main
|
||||
goal of timerlat is tracing in such a way to help kernel developers.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
Write the ASCII text "timerlat" into the current_tracer file of the
|
||||
tracing system (generally mounted at /sys/kernel/tracing).
|
||||
|
||||
For example::
|
||||
|
||||
[root@f32 ~]# cd /sys/kernel/tracing/
|
||||
[root@f32 tracing]# echo timerlat > current_tracer
|
||||
|
||||
It is possible to follow the trace by reading the trace trace file::
|
||||
|
||||
[root@f32 tracing]# cat trace
|
||||
# tracer: timerlat
|
||||
#
|
||||
# _-----=> irqs-off
|
||||
# / _----=> need-resched
|
||||
# | / _---=> hardirq/softirq
|
||||
# || / _--=> preempt-depth
|
||||
# || /
|
||||
# |||| ACTIVATION
|
||||
# TASK-PID CPU# |||| TIMESTAMP ID CONTEXT LATENCY
|
||||
# | | | |||| | | | |
|
||||
<idle>-0 [000] d.h1 54.029328: #1 context irq timer_latency 932 ns
|
||||
<...>-867 [000] .... 54.029339: #1 context thread timer_latency 11700 ns
|
||||
<idle>-0 [001] dNh1 54.029346: #1 context irq timer_latency 2833 ns
|
||||
<...>-868 [001] .... 54.029353: #1 context thread timer_latency 9820 ns
|
||||
<idle>-0 [000] d.h1 54.030328: #2 context irq timer_latency 769 ns
|
||||
<...>-867 [000] .... 54.030330: #2 context thread timer_latency 3070 ns
|
||||
<idle>-0 [001] d.h1 54.030344: #2 context irq timer_latency 935 ns
|
||||
<...>-868 [001] .... 54.030347: #2 context thread timer_latency 4351 ns
|
||||
|
||||
|
||||
The tracer creates a per-cpu kernel thread with real-time priority that
|
||||
prints two lines at every activation. The first is the *timer latency*
|
||||
observed at the *hardirq* context before the activation of the thread.
|
||||
The second is the *timer latency* observed by the thread. The ACTIVATION
|
||||
ID field serves to relate the *irq* execution to its respective *thread*
|
||||
execution.
|
||||
|
||||
The *irq*/*thread* splitting is important to clarify at which context
|
||||
the unexpected high value is coming from. The *irq* context can be
|
||||
delayed by hardware related actions, such as SMIs, NMIs, IRQs
|
||||
or by a thread masking interrupts. Once the timer happens, the delay
|
||||
can also be influenced by blocking caused by threads. For example, by
|
||||
postponing the scheduler execution via preempt_disable(), by the
|
||||
scheduler execution, or by masking interrupts. Threads can
|
||||
also be delayed by the interference from other threads and IRQs.
|
||||
|
||||
Tracer options
|
||||
---------------------
|
||||
|
||||
The timerlat tracer is built on top of osnoise tracer.
|
||||
So its configuration is also done in the osnoise/ config
|
||||
directory. The timerlat configs are:
|
||||
|
||||
- cpus: CPUs at which a timerlat thread will execute.
|
||||
- timerlat_period_us: the period of the timerlat thread.
|
||||
- osnoise/stop_tracing_us: stop the system tracing if a
|
||||
timer latency at the *irq* context higher than the configured
|
||||
value happens. Writing 0 disables this option.
|
||||
- stop_tracing_total_us: stop the system tracing if a
|
||||
timer latency at the *thread* context higher than the configured
|
||||
value happens. Writing 0 disables this option.
|
||||
- print_stack: save the stack of the IRQ ocurrence, and print
|
||||
it afte the *thread context* event".
|
||||
|
||||
timerlat and osnoise
|
||||
----------------------------
|
||||
|
||||
The timerlat can also take advantage of the osnoise: traceevents.
|
||||
For example::
|
||||
|
||||
[root@f32 ~]# cd /sys/kernel/tracing/
|
||||
[root@f32 tracing]# echo timerlat > current_tracer
|
||||
[root@f32 tracing]# echo 1 > events/osnoise/enable
|
||||
[root@f32 tracing]# echo 25 > osnoise/stop_tracing_total_us
|
||||
[root@f32 tracing]# tail -10 trace
|
||||
cc1-87882 [005] d..h... 548.771078: #402268 context irq timer_latency 13585 ns
|
||||
cc1-87882 [005] dNLh1.. 548.771082: irq_noise: local_timer:236 start 548.771077442 duration 7597 ns
|
||||
cc1-87882 [005] dNLh2.. 548.771099: irq_noise: qxl:21 start 548.771085017 duration 7139 ns
|
||||
cc1-87882 [005] d...3.. 548.771102: thread_noise: cc1:87882 start 548.771078243 duration 9909 ns
|
||||
timerlat/5-1035 [005] ....... 548.771104: #402268 context thread timer_latency 39960 ns
|
||||
|
||||
In this case, the root cause of the timer latency does not point to a
|
||||
single cause, but to multiple ones. Firstly, the timer IRQ was delayed
|
||||
for 13 us, which may point to a long IRQ disabled section (see IRQ
|
||||
stacktrace section). Then the timer interrupt that wakes up the timerlat
|
||||
thread took 7597 ns, and the qxl:21 device IRQ took 7139 ns. Finally,
|
||||
the cc1 thread noise took 9909 ns of time before the context switch.
|
||||
Such pieces of evidence are useful for the developer to use other
|
||||
tracing methods to figure out how to debug and optimize the system.
|
||||
|
||||
It is worth mentioning that the *duration* values reported
|
||||
by the osnoise: events are *net* values. For example, the
|
||||
thread_noise does not include the duration of the overhead caused
|
||||
by the IRQ execution (which indeed accounted for 12736 ns). But
|
||||
the values reported by the timerlat tracer (timerlat_latency)
|
||||
are *gross* values.
|
||||
|
||||
The art below illustrates a CPU timeline and how the timerlat tracer
|
||||
observes it at the top and the osnoise: events at the bottom. Each "-"
|
||||
in the timelines means circa 1 us, and the time moves ==>::
|
||||
|
||||
External timer irq thread
|
||||
clock latency latency
|
||||
event 13585 ns 39960 ns
|
||||
| ^ ^
|
||||
v | |
|
||||
|-------------| |
|
||||
|-------------+-------------------------|
|
||||
^ ^
|
||||
========================================================================
|
||||
[tmr irq] [dev irq]
|
||||
[another thread...^ v..^ v.......][timerlat/ thread] <-- CPU timeline
|
||||
=========================================================================
|
||||
|-------| |-------|
|
||||
|--^ v-------|
|
||||
| | |
|
||||
| | + thread_noise: 9909 ns
|
||||
| +-> irq_noise: 6139 ns
|
||||
+-> irq_noise: 7597 ns
|
||||
|
||||
IRQ stacktrace
|
||||
---------------------------
|
||||
|
||||
The osnoise/print_stack option is helpful for the cases in which a thread
|
||||
noise causes the major factor for the timer latency, because of preempt or
|
||||
irq disabled. For example::
|
||||
|
||||
[root@f32 tracing]# echo 500 > osnoise/stop_tracing_total_us
|
||||
[root@f32 tracing]# echo 500 > osnoise/print_stack
|
||||
[root@f32 tracing]# echo timerlat > current_tracer
|
||||
[root@f32 tracing]# tail -21 per_cpu/cpu7/trace
|
||||
insmod-1026 [007] dN.h1.. 200.201948: irq_noise: local_timer:236 start 200.201939376 duration 7872 ns
|
||||
insmod-1026 [007] d..h1.. 200.202587: #29800 context irq timer_latency 1616 ns
|
||||
insmod-1026 [007] dN.h2.. 200.202598: irq_noise: local_timer:236 start 200.202586162 duration 11855 ns
|
||||
insmod-1026 [007] dN.h3.. 200.202947: irq_noise: local_timer:236 start 200.202939174 duration 7318 ns
|
||||
insmod-1026 [007] d...3.. 200.203444: thread_noise: insmod:1026 start 200.202586933 duration 838681 ns
|
||||
timerlat/7-1001 [007] ....... 200.203445: #29800 context thread timer_latency 859978 ns
|
||||
timerlat/7-1001 [007] ....1.. 200.203446: <stack trace>
|
||||
=> timerlat_irq
|
||||
=> __hrtimer_run_queues
|
||||
=> hrtimer_interrupt
|
||||
=> __sysvec_apic_timer_interrupt
|
||||
=> asm_call_irq_on_stack
|
||||
=> sysvec_apic_timer_interrupt
|
||||
=> asm_sysvec_apic_timer_interrupt
|
||||
=> delay_tsc
|
||||
=> dummy_load_1ms_pd_init
|
||||
=> do_one_initcall
|
||||
=> do_init_module
|
||||
=> __do_sys_finit_module
|
||||
=> do_syscall_64
|
||||
=> entry_SYSCALL_64_after_hwframe
|
||||
|
||||
In this case, it is possible to see that the thread added the highest
|
||||
contribution to the *timer latency* and the stack trace, saved during
|
||||
the timerlat IRQ handler, points to a function named
|
||||
dummy_load_1ms_pd_init, which had the following code (on purpose)::
|
||||
|
||||
static int __init dummy_load_1ms_pd_init(void)
|
||||
{
|
||||
preempt_disable();
|
||||
mdelay(1);
|
||||
preempt_enable();
|
||||
return 0;
|
||||
|
||||
}
|
||||
@@ -390,6 +390,34 @@ config OSNOISE_TRACER
|
||||
To enable this tracer, echo in "osnoise" into the current_tracer
|
||||
file.
|
||||
|
||||
config TIMERLAT_TRACER
|
||||
bool "Timerlat tracer"
|
||||
select OSNOISE_TRACER
|
||||
select GENERIC_TRACER
|
||||
help
|
||||
The timerlat tracer aims to help the preemptive kernel developers
|
||||
to find sources of wakeup latencies of real-time threads.
|
||||
|
||||
The tracer creates a per-cpu kernel thread with real-time priority.
|
||||
The tracer thread sets a periodic timer to wakeup itself, and goes
|
||||
to sleep waiting for the timer to fire. At the wakeup, the thread
|
||||
then computes a wakeup latency value as the difference between
|
||||
the current time and the absolute time that the timer was set
|
||||
to expire.
|
||||
|
||||
The tracer prints two lines at every activation. The first is the
|
||||
timer latency observed at the hardirq context before the
|
||||
activation of the thread. The second is the timer latency observed
|
||||
by the thread, which is the same level that cyclictest reports. The
|
||||
ACTIVATION ID field serves to relate the irq execution to its
|
||||
respective thread execution.
|
||||
|
||||
The tracer is build on top of osnoise tracer, and the osnoise:
|
||||
events can be used to trace the source of interference from NMI,
|
||||
IRQs and other threads. It also enables the capture of the
|
||||
stacktrace at the IRQ context, which helps to identify the code
|
||||
path that can cause thread delay.
|
||||
|
||||
config MMIOTRACE
|
||||
bool "Memory mapped IO tracing"
|
||||
depends on HAVE_MMIOTRACE_SUPPORT && PCI
|
||||
|
||||
@@ -45,6 +45,7 @@ enum trace_type {
|
||||
TRACE_BPUTS,
|
||||
TRACE_HWLAT,
|
||||
TRACE_OSNOISE,
|
||||
TRACE_TIMERLAT,
|
||||
TRACE_RAW_DATA,
|
||||
TRACE_FUNC_REPEATS,
|
||||
|
||||
@@ -448,6 +449,7 @@ extern void __ftrace_bad_type(void);
|
||||
IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \
|
||||
IF_ASSIGN(var, ent, struct hwlat_entry, TRACE_HWLAT); \
|
||||
IF_ASSIGN(var, ent, struct osnoise_entry, TRACE_OSNOISE);\
|
||||
IF_ASSIGN(var, ent, struct timerlat_entry, TRACE_TIMERLAT);\
|
||||
IF_ASSIGN(var, ent, struct raw_data_entry, TRACE_RAW_DATA);\
|
||||
IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
|
||||
TRACE_MMIO_RW); \
|
||||
|
||||
@@ -385,3 +385,19 @@ FTRACE_ENTRY(osnoise, osnoise_entry,
|
||||
__entry->softirq_count,
|
||||
__entry->thread_count)
|
||||
);
|
||||
|
||||
FTRACE_ENTRY(timerlat, timerlat_entry,
|
||||
|
||||
TRACE_TIMERLAT,
|
||||
|
||||
F_STRUCT(
|
||||
__field( unsigned int, seqnum )
|
||||
__field( int, context )
|
||||
__field( u64, timer_latency )
|
||||
),
|
||||
|
||||
F_printk("seq:%u\tcontext:%d\ttimer_latency:%llu\n",
|
||||
__entry->seqnum,
|
||||
__entry->context,
|
||||
__entry->timer_latency)
|
||||
);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1301,6 +1301,52 @@ static struct trace_event trace_osnoise_event = {
|
||||
.funcs = &trace_osnoise_funcs,
|
||||
};
|
||||
|
||||
/* TRACE_TIMERLAT */
|
||||
static enum print_line_t
|
||||
trace_timerlat_print(struct trace_iterator *iter, int flags,
|
||||
struct trace_event *event)
|
||||
{
|
||||
struct trace_entry *entry = iter->ent;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
struct timerlat_entry *field;
|
||||
|
||||
trace_assign_type(field, entry);
|
||||
|
||||
trace_seq_printf(s, "#%-5u context %6s timer_latency %9llu ns\n",
|
||||
field->seqnum,
|
||||
field->context ? "thread" : "irq",
|
||||
field->timer_latency);
|
||||
|
||||
return trace_handle_return(s);
|
||||
}
|
||||
|
||||
static enum print_line_t
|
||||
trace_timerlat_raw(struct trace_iterator *iter, int flags,
|
||||
struct trace_event *event)
|
||||
{
|
||||
struct timerlat_entry *field;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
|
||||
trace_assign_type(field, iter->ent);
|
||||
|
||||
trace_seq_printf(s, "%u %d %llu\n",
|
||||
field->seqnum,
|
||||
field->context,
|
||||
field->timer_latency);
|
||||
|
||||
return trace_handle_return(s);
|
||||
}
|
||||
|
||||
static struct trace_event_functions trace_timerlat_funcs = {
|
||||
.trace = trace_timerlat_print,
|
||||
.raw = trace_timerlat_raw,
|
||||
};
|
||||
|
||||
static struct trace_event trace_timerlat_event = {
|
||||
.type = TRACE_TIMERLAT,
|
||||
.funcs = &trace_timerlat_funcs,
|
||||
};
|
||||
|
||||
/* TRACE_BPUTS */
|
||||
static enum print_line_t
|
||||
trace_bputs_print(struct trace_iterator *iter, int flags,
|
||||
@@ -1512,6 +1558,7 @@ static struct trace_event *events[] __initdata = {
|
||||
&trace_print_event,
|
||||
&trace_hwlat_event,
|
||||
&trace_osnoise_event,
|
||||
&trace_timerlat_event,
|
||||
&trace_raw_data_event,
|
||||
&trace_func_repeats_event,
|
||||
NULL
|
||||
|
||||
Reference in New Issue
Block a user