Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (47 commits)
  perf report: Add --symbols parameter
  perf report: Add --comms parameter
  perf report: Add --dsos parameter
  perf_counter tools: Adjust only prelinked symbol's addresses
  perf_counter: Provide a way to enable counters on exec
  perf_counter tools: Reduce perf stat measurement overhead/skew
  perf stat: Use percentages for scaling output
  perf_counter, x86: Update x86_pmu after WARN()
  perf stat: Micro-optimize the code: memcpy is only required if no event is selected and !null_run
  perf stat: Improve output
  perf stat: Fix multi-run stats
  perf stat: Add -n/--null option to run without counters
  perf_counter tools: Remove dead code
  perf_counter: Complete counter swap
  perf report: Print sorted callchains per histogram entries
  perf_counter tools: Prepare a small callchain framework
  perf record: Fix unhandled io return value
  perf_counter tools: Add alias for 'l1d' and 'l1i'
  perf-report: Add bare minimum PERF_EVENT_READ parsing
  perf-report: Add modes for inherited stats and no-samples
  ...
This commit is contained in:
Linus Torvalds
2009-06-30 19:02:59 -07:00
32 changed files with 1602 additions and 420 deletions
+2
View File
@@ -61,6 +61,8 @@ struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
#define PERF_COUNTER_INDEX_OFFSET 1
/*
* Only override the default definitions in include/linux/perf_counter.h
* if we have hardware PMU support.
+3
View File
@@ -87,6 +87,9 @@ union cpuid10_edx {
#ifdef CONFIG_PERF_COUNTERS
extern void init_hw_perf_counters(void);
extern void perf_counters_lapic_init(void);
#define PERF_COUNTER_INDEX_OFFSET 0
#else
static inline void init_hw_perf_counters(void) { }
static inline void perf_counters_lapic_init(void) { }
+10 -12
View File
@@ -401,7 +401,7 @@ static const u64 amd_hw_cache_event_ids
[ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */
[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
@@ -912,6 +912,8 @@ x86_perf_counter_set_period(struct perf_counter *counter,
err = checking_wrmsrl(hwc->counter_base + idx,
(u64)(-left) & x86_pmu.counter_mask);
perf_counter_update_userpage(counter);
return ret;
}
@@ -969,13 +971,6 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
if (!x86_pmu.num_counters_fixed)
return -1;
/*
* Quirk, IA32_FIXED_CTRs do not work on current Atom processors:
*/
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86_model == 28)
return -1;
event = hwc->config & ARCH_PERFMON_EVENT_MASK;
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
@@ -1041,6 +1036,8 @@ try_generic:
x86_perf_counter_set_period(counter, hwc, idx);
x86_pmu.enable(hwc, idx);
perf_counter_update_userpage(counter);
return 0;
}
@@ -1133,6 +1130,8 @@ static void x86_pmu_disable(struct perf_counter *counter)
x86_perf_counter_update(counter, hwc, idx);
cpuc->counters[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
perf_counter_update_userpage(counter);
}
/*
@@ -1428,8 +1427,6 @@ static int intel_pmu_init(void)
*/
x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
/*
* Install the hw-cache-events table:
*/
@@ -1499,21 +1496,22 @@ void __init init_hw_perf_counters(void)
pr_cont("%s PMU driver.\n", x86_pmu.name);
if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
}
perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
perf_max_counters = x86_pmu.num_counters;
if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
}
perf_counter_mask |=
((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
x86_pmu.intel_ctrl = perf_counter_mask;
perf_counters_lapic_init();
register_die_notifier(&perf_counter_nmi_notifier);
+39 -7
View File
@@ -178,8 +178,10 @@ struct perf_counter_attr {
mmap : 1, /* include mmap data */
comm : 1, /* include comm data */
freq : 1, /* use freq, not period */
inherit_stat : 1, /* per task counts */
enable_on_exec : 1, /* next exec enables */
__reserved_1 : 53;
__reserved_1 : 51;
__u32 wakeup_events; /* wakeup every n events */
__u32 __reserved_2;
@@ -232,6 +234,14 @@ struct perf_counter_mmap_page {
__u32 lock; /* seqlock for synchronization */
__u32 index; /* hardware counter identifier */
__s64 offset; /* add to hardware counter value */
__u64 time_enabled; /* time counter active */
__u64 time_running; /* time counter on cpu */
/*
* Hole for extension of the self monitor capabilities
*/
__u64 __reserved[123]; /* align to 1k */
/*
* Control data for the mmap() data buffer.
@@ -253,7 +263,6 @@ struct perf_counter_mmap_page {
#define PERF_EVENT_MISC_KERNEL (1 << 0)
#define PERF_EVENT_MISC_USER (2 << 0)
#define PERF_EVENT_MISC_HYPERVISOR (3 << 0)
#define PERF_EVENT_MISC_OVERFLOW (1 << 2)
struct perf_event_header {
__u32 type;
@@ -327,9 +336,18 @@ enum perf_event_type {
PERF_EVENT_FORK = 7,
/*
* When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
* will be PERF_SAMPLE_*
*
* struct {
* struct perf_event_header header;
* u32 pid, tid;
* u64 value;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 parent_id; } && PERF_FORMAT_ID
* };
*/
PERF_EVENT_READ = 8,
/*
* struct {
* struct perf_event_header header;
*
@@ -337,8 +355,9 @@ enum perf_event_type {
* { u32 pid, tid; } && PERF_SAMPLE_TID
* { u64 time; } && PERF_SAMPLE_TIME
* { u64 addr; } && PERF_SAMPLE_ADDR
* { u64 config; } && PERF_SAMPLE_CONFIG
* { u64 id; } && PERF_SAMPLE_ID
* { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 period; } && PERF_SAMPLE_PERIOD
*
* { u64 nr;
* { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
@@ -347,6 +366,9 @@ enum perf_event_type {
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
* };
*/
PERF_EVENT_SAMPLE = 9,
PERF_EVENT_MAX, /* non-ABI */
};
enum perf_callchain_context {
@@ -582,6 +604,7 @@ struct perf_counter_context {
int nr_counters;
int nr_active;
int is_active;
int nr_stat;
atomic_t refcount;
struct task_struct *task;
@@ -669,7 +692,16 @@ static inline int is_software_counter(struct perf_counter *counter)
(counter->attr.type != PERF_TYPE_HW_CACHE);
}
extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
static inline void
perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
{
if (atomic_read(&perf_swcounter_enabled[event]))
__perf_swcounter_event(event, nr, nmi, regs, addr);
}
extern void __perf_counter_mmap(struct vm_area_struct *vma);
+271 -49
View File
File diff suppressed because it is too large Load Diff
+30
View File
@@ -0,0 +1,30 @@
Most of the infrastructure that 'perf' uses here has been reused
from the Git project, as of version:
66996ec: Sync with 1.6.2.4
Here is an (incomplete!) list of main contributors to those files
in util/* and elsewhere:
Alex Riesen
Christian Couder
Dmitry Potapov
Jeff King
Johannes Schindelin
Johannes Sixt
Junio C Hamano
Linus Torvalds
Matthias Kestenholz
Michal Ostrowski
Miklos Vajna
Petr Baudis
Pierre Habouzit
René Scharfe
Samuel Tardieu
Shawn O. Pearce
Steffen Prohaska
Steve Haslam
Thanks guys!
The full history of the files can be found in the upstream Git commits.
+13 -1
View File
@@ -13,13 +13,25 @@ SYNOPSIS
DESCRIPTION
-----------
This command displays the performance counter profile information recorded
via perf report.
via perf record.
OPTIONS
-------
-i::
--input=::
Input file name. (default: perf.data)
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
file://filename entries.
-C::
--comms=::
Only consider symbols in these comms. CSV that understands
file://filename entries.
-S::
--symbols=::
Only consider these symbols. CSV that understands
file://filename entries.
SEE ALSO
--------
+3 -3
View File
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
SYNOPSIS
--------
[verse]
'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command>
'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>]
DESCRIPTION
-----------
@@ -40,7 +40,7 @@ OPTIONS
-a::
system-wide collection
-l::
-S::
scale counter values
EXAMPLES
+5 -1
View File
@@ -290,7 +290,7 @@ LIB_FILE=libperf.a
LIB_H += ../../include/linux/perf_counter.h
LIB_H += perf.h
LIB_H += types.h
LIB_H += util/types.h
LIB_H += util/list.h
LIB_H += util/rbtree.h
LIB_H += util/levenshtein.h
@@ -301,6 +301,7 @@ LIB_H += util/util.h
LIB_H += util/help.h
LIB_H += util/strbuf.h
LIB_H += util/string.h
LIB_H += util/strlist.h
LIB_H += util/run-command.h
LIB_H += util/sigchain.h
LIB_H += util/symbol.h
@@ -322,12 +323,15 @@ LIB_OBJS += util/run-command.o
LIB_OBJS += util/quote.o
LIB_OBJS += util/strbuf.o
LIB_OBJS += util/string.o
LIB_OBJS += util/strlist.o
LIB_OBJS += util/usage.o
LIB_OBJS += util/wrapper.o
LIB_OBJS += util/sigchain.o
LIB_OBJS += util/symbol.o
LIB_OBJS += util/color.o
LIB_OBJS += util/pager.o
LIB_OBJS += util/header.o
LIB_OBJS += util/callchain.o
BUILTIN_OBJS += builtin-annotate.o
BUILTIN_OBJS += builtin-help.o
+4 -4
View File
@@ -855,7 +855,7 @@ static unsigned long total = 0,
total_unknown = 0;
static int
process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
process_sample_event(event_t *event, unsigned long offset, unsigned long head)
{
char level;
int show = 0;
@@ -1013,10 +1013,10 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head)
static int
process_event(event_t *event, unsigned long offset, unsigned long head)
{
if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
return process_overflow_event(event, offset, head);
switch (event->header.type) {
case PERF_EVENT_SAMPLE:
return process_sample_event(event, offset, head);
case PERF_EVENT_MMAP:
return process_mmap_event(event, offset, head);
+87 -40
View File
@@ -14,6 +14,8 @@
#include "util/parse-events.h"
#include "util/string.h"
#include "util/header.h"
#include <unistd.h>
#include <sched.h>
@@ -39,6 +41,8 @@ static int force = 0;
static int append_file = 0;
static int call_graph = 0;
static int verbose = 0;
static int inherit_stat = 0;
static int no_samples = 0;
static long samples;
static struct timeval last_read;
@@ -52,7 +56,8 @@ static int nr_poll;
static int nr_cpu;
static int file_new = 1;
static struct perf_file_header file_header;
struct perf_header *header;
struct mmap_event {
struct perf_event_header header;
@@ -306,12 +311,11 @@ static void pid_synthesize_mmap_samples(pid_t pid)
continue;
pbf += n + 3;
if (*pbf == 'x') { /* vm_exec */
char *execname = strrchr(bf, ' ');
char *execname = strchr(bf, '/');
if (execname == NULL || execname[1] != '/')
if (execname == NULL)
continue;
execname += 1;
size = strlen(execname);
execname[size - 1] = '\0'; /* Remove \n */
memcpy(mmap_ev.filename, execname, size);
@@ -329,7 +333,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
fclose(fp);
}
static void synthesize_samples(void)
static void synthesize_all(void)
{
DIR *proc;
struct dirent dirent, *next;
@@ -353,10 +357,35 @@ static void synthesize_samples(void)
static int group_fd;
static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr)
{
struct perf_header_attr *h_attr;
if (nr < header->attrs) {
h_attr = header->attr[nr];
} else {
h_attr = perf_header_attr__new(a);
perf_header__add_attr(header, h_attr);
}
return h_attr;
}
static void create_counter(int counter, int cpu, pid_t pid)
{
struct perf_counter_attr *attr = attrs + counter;
int track = 1;
struct perf_header_attr *h_attr;
int track = !counter; /* only the first counter needs these */
struct {
u64 count;
u64 time_enabled;
u64 time_running;
u64 id;
} read_data;
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING |
PERF_FORMAT_ID;
attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
@@ -366,25 +395,20 @@ static void create_counter(int counter, int cpu, pid_t pid)
attr->sample_freq = freq;
}
if (no_samples)
attr->sample_freq = 0;
if (inherit_stat)
attr->inherit_stat = 1;
if (call_graph)
attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
if (file_new) {
file_header.sample_type = attr->sample_type;
} else {
if (file_header.sample_type != attr->sample_type) {
fprintf(stderr, "incompatible append\n");
exit(-1);
}
}
attr->mmap = track;
attr->comm = track;
attr->inherit = (cpu < 0) && inherit;
attr->disabled = 1;
track = 0; /* only the first counter needs these */
try_again:
fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
@@ -415,6 +439,22 @@ try_again:
exit(-1);
}
h_attr = get_header_attr(attr, counter);
if (!file_new) {
if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
fprintf(stderr, "incompatible append\n");
exit(-1);
}
}
if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) {
perror("Unable to read perf file descriptor\n");
exit(-1);
}
perf_header_attr__add_id(h_attr, read_data.id);
assert(fd[nr_cpu][counter] >= 0);
fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
@@ -445,11 +485,6 @@ static void open_counters(int cpu, pid_t pid)
{
int counter;
if (pid > 0) {
pid_synthesize_comm_event(pid, 0);
pid_synthesize_mmap_samples(pid);
}
group_fd = -1;
for (counter = 0; counter < nr_counters; counter++)
create_counter(counter, cpu, pid);
@@ -459,17 +494,16 @@ static void open_counters(int cpu, pid_t pid)
static void atexit_header(void)
{
file_header.data_size += bytes_written;
header->data_size += bytes_written;
if (pwrite(output, &file_header, sizeof(file_header), 0) == -1)
perror("failed to write on file headers");
perf_header__write(header, output);
}
static int __cmd_record(int argc, const char **argv)
{
int i, counter;
struct stat st;
pid_t pid;
pid_t pid = 0;
int flags;
int ret;
@@ -500,22 +534,31 @@ static int __cmd_record(int argc, const char **argv)
exit(-1);
}
if (!file_new) {
if (read(output, &file_header, sizeof(file_header)) == -1) {
perror("failed to read file headers");
exit(-1);
}
lseek(output, file_header.data_size, SEEK_CUR);
}
if (!file_new)
header = perf_header__read(output);
else
header = perf_header__new();
atexit(atexit_header);
if (!system_wide) {
open_counters(-1, target_pid != -1 ? target_pid : getpid());
pid = target_pid;
if (pid == -1)
pid = getpid();
open_counters(-1, pid);
} else for (i = 0; i < nr_cpus; i++)
open_counters(i, target_pid);
if (file_new)
perf_header__write(header, output);
if (!system_wide) {
pid_synthesize_comm_event(pid, 0);
pid_synthesize_mmap_samples(pid);
} else
synthesize_all();
if (target_pid == -1 && argc) {
pid = fork();
if (pid < 0)
@@ -539,10 +582,7 @@ static int __cmd_record(int argc, const char **argv)
}
}
if (system_wide)
synthesize_samples();
while (!done) {
for (;;) {
int hits = samples;
for (i = 0; i < nr_cpu; i++) {
@@ -550,8 +590,11 @@ static int __cmd_record(int argc, const char **argv)
mmap_read(&mmap_array[i][counter]);
}
if (hits == samples)
if (hits == samples) {
if (done)
break;
ret = poll(event_array, nr_poll, 100);
}
}
/*
@@ -600,6 +643,10 @@ static const struct option options[] = {
"do call-graph (stack chain/backtrace) recording"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_BOOLEAN('s', "stat", &inherit_stat,
"per thread counts"),
OPT_BOOLEAN('n', "no-samples", &no_samples,
"don't sample"),
OPT_END()
};
+195 -37
View File
@@ -15,8 +15,11 @@
#include "util/rbtree.h"
#include "util/symbol.h"
#include "util/string.h"
#include "util/callchain.h"
#include "util/strlist.h"
#include "perf.h"
#include "util/header.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
@@ -30,6 +33,8 @@ static char *vmlinux = NULL;
static char default_sort_order[] = "comm,dso";
static char *sort_order = default_sort_order;
static char *dso_list_str, *comm_list_str, *sym_list_str;
static struct strlist *dso_list, *comm_list, *sym_list;
static int input;
static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
@@ -51,6 +56,9 @@ static char *parent_pattern = default_parent_pattern;
static regex_t parent_regex;
static int exclude_other = 1;
static int callchain;
static u64 sample_type;
struct ip_event {
struct perf_event_header header;
@@ -59,11 +67,6 @@ struct ip_event {
unsigned char __more_data[];
};
struct ip_callchain {
u64 nr;
u64 ips[0];
};
struct mmap_event {
struct perf_event_header header;
u32 pid, tid;
@@ -97,6 +100,13 @@ struct lost_event {
u64 lost;
};
struct read_event {
struct perf_event_header header;
u32 pid,tid;
u64 value;
u64 format[3];
};
typedef union event_union {
struct perf_event_header header;
struct ip_event ip;
@@ -105,6 +115,7 @@ typedef union event_union {
struct fork_event fork;
struct period_event period;
struct lost_event lost;
struct read_event read;
} event_t;
static LIST_HEAD(dsos);
@@ -229,7 +240,7 @@ static u64 vdso__map_ip(struct map *map, u64 ip)
static inline int is_anon_memory(const char *filename)
{
return strcmp(filename, "//anon") == 0;
return strcmp(filename, "//anon") == 0;
}
static struct map *map__new(struct mmap_event *event)
@@ -400,9 +411,27 @@ static void thread__insert_map(struct thread *self, struct map *map)
list_for_each_entry_safe(pos, tmp, &self->maps, node) {
if (map__overlap(pos, map)) {
list_del_init(&pos->node);
/* XXX leaks dsos */
free(pos);
if (verbose >= 2) {
printf("overlapping maps:\n");
map__fprintf(map, stdout);
map__fprintf(pos, stdout);
}
if (map->start <= pos->start && map->end > pos->start)
pos->start = map->end;
if (map->end >= pos->end && map->start < pos->end)
pos->end = map->start;
if (verbose >= 2) {
printf("after collision:\n");
map__fprintf(pos, stdout);
}
if (pos->start >= pos->end) {
list_del_init(&pos->node);
free(pos);
}
}
}
@@ -464,17 +493,19 @@ static size_t threads__fprintf(FILE *fp)
static struct rb_root hist;
struct hist_entry {
struct rb_node rb_node;
struct rb_node rb_node;
struct thread *thread;
struct map *map;
struct dso *dso;
struct symbol *sym;
struct symbol *parent;
u64 ip;
char level;
struct thread *thread;
struct map *map;
struct dso *dso;
struct symbol *sym;
struct symbol *parent;
u64 ip;
char level;
struct callchain_node callchain;
struct rb_root sorted_chain;
u64 count;
u64 count;
};
/*
@@ -744,6 +775,48 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
return cmp;
}
static size_t
callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples)
{
struct callchain_list *chain;
size_t ret = 0;
if (!self)
return 0;
ret += callchain__fprintf(fp, self->parent, total_samples);
list_for_each_entry(chain, &self->val, list)
ret += fprintf(fp, " %p\n", (void *)chain->ip);
return ret;
}
static size_t
hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
u64 total_samples)
{
struct rb_node *rb_node;
struct callchain_node *chain;
size_t ret = 0;
rb_node = rb_first(&self->sorted_chain);
while (rb_node) {
double percent;
chain = rb_entry(rb_node, struct callchain_node, rb_node);
percent = chain->hit * 100.0 / total_samples;
ret += fprintf(fp, " %6.2f%%\n", percent);
ret += callchain__fprintf(fp, chain, total_samples);
ret += fprintf(fp, "\n");
rb_node = rb_next(rb_node);
}
return ret;
}
static size_t
hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
{
@@ -784,6 +857,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
ret += fprintf(fp, "\n");
if (callchain)
hist_entry_callchain__fprintf(fp, self, total_samples);
return ret;
}
@@ -797,7 +873,7 @@ resolve_symbol(struct thread *thread, struct map **mapp,
{
struct dso *dso = dsop ? *dsop : NULL;
struct map *map = mapp ? *mapp : NULL;
uint64_t ip = *ipp;
u64 ip = *ipp;
if (!thread)
return NULL;
@@ -814,7 +890,6 @@ resolve_symbol(struct thread *thread, struct map **mapp,
*mapp = map;
got_map:
ip = map->map_ip(map, ip);
*ipp = ip;
dso = map->dso;
} else {
@@ -828,6 +903,8 @@ got_map:
dso = kernel_dso;
}
dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
dprintf(" ...... map: %Lx -> %Lx\n", *ipp, ip);
*ipp = ip;
if (dsop)
*dsop = dso;
@@ -867,6 +944,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
.level = level,
.count = count,
.parent = NULL,
.sorted_chain = RB_ROOT
};
int cmp;
@@ -909,6 +987,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
if (!cmp) {
he->count += count;
if (callchain)
append_chain(&he->callchain, chain);
return 0;
}
@@ -922,6 +1002,10 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
if (!he)
return -ENOMEM;
*he = entry;
if (callchain) {
callchain_init(&he->callchain);
append_chain(&he->callchain, chain);
}
rb_link_node(&he->rb_node, parent, p);
rb_insert_color(&he->rb_node, &hist);
@@ -998,6 +1082,9 @@ static void output__insert_entry(struct hist_entry *he)
struct rb_node *parent = NULL;
struct hist_entry *iter;
if (callchain)
sort_chain_to_rbtree(&he->sorted_chain, &he->callchain);
while (*p != NULL) {
parent = *p;
iter = rb_entry(parent, struct hist_entry, rb_node);
@@ -1115,7 +1202,7 @@ static int validate_chain(struct ip_callchain *chain, event_t *event)
}
static int
process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
process_sample_event(event_t *event, unsigned long offset, unsigned long head)
{
char level;
int show = 0;
@@ -1127,12 +1214,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
void *more_data = event->ip.__more_data;
struct ip_callchain *chain = NULL;
if (event->header.type & PERF_SAMPLE_PERIOD) {
if (sample_type & PERF_SAMPLE_PERIOD) {
period = *(u64 *)more_data;
more_data += sizeof(u64);
}
dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n",
dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->header.misc,
@@ -1140,7 +1227,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
(void *)(long)ip,
(long long)period);
if (event->header.type & PERF_SAMPLE_CALLCHAIN) {
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int i;
chain = (void *)more_data;
@@ -1166,6 +1253,9 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
return -1;
}
if (comm_list && !strlist__has_entry(comm_list, thread->comm))
return 0;
if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
show = SHOW_KERNEL;
level = 'k';
@@ -1188,6 +1278,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
if (show & show_mask) {
struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name))
return 0;
if (sym_list && sym && !strlist__has_entry(sym_list, sym->name))
return 0;
if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
eprintf("problem incrementing symbol count, skipping event\n");
return -1;
@@ -1327,15 +1423,28 @@ static void trace_event(event_t *event)
dprintf(".\n");
}
static int
process_read_event(event_t *event, unsigned long offset, unsigned long head)
{
dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->read.pid,
event->read.tid,
event->read.value);
return 0;
}
static int
process_event(event_t *event, unsigned long offset, unsigned long head)
{
trace_event(event);
if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
return process_overflow_event(event, offset, head);
switch (event->header.type) {
case PERF_EVENT_SAMPLE:
return process_sample_event(event, offset, head);
case PERF_EVENT_MMAP:
return process_mmap_event(event, offset, head);
@@ -1351,6 +1460,9 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
case PERF_EVENT_LOST:
return process_lost_event(event, offset, head);
case PERF_EVENT_READ:
return process_read_event(event, offset, head);
/*
* We dont process them right now but they are fine:
*/
@@ -1366,13 +1478,30 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
return 0;
}
static struct perf_file_header file_header;
static struct perf_header *header;
static u64 perf_header__sample_type(void)
{
u64 sample_type = 0;
int i;
for (i = 0; i < header->attrs; i++) {
struct perf_header_attr *attr = header->attr[i];
if (!sample_type)
sample_type = attr->attr.sample_type;
else if (sample_type != attr->attr.sample_type)
die("non matching sample_type");
}
return sample_type;
}
static int __cmd_report(void)
{
int ret, rc = EXIT_FAILURE;
unsigned long offset = 0;
unsigned long head = sizeof(file_header);
unsigned long head, shift;
struct stat stat;
event_t *event;
uint32_t size;
@@ -1400,13 +1529,12 @@ static int __cmd_report(void)
exit(0);
}
if (read(input, &file_header, sizeof(file_header)) == -1) {
perror("failed to read file headers");
exit(-1);
}
header = perf_header__read(input);
head = header->data_offset;
if (sort__has_parent &&
!(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) {
sample_type = perf_header__sample_type();
if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
fprintf(stderr, "selected --sort parent, but no callchain data\n");
exit(-1);
}
@@ -1426,6 +1554,11 @@ static int __cmd_report(void)
cwd = NULL;
cwdlen = 0;
}
shift = page_size * (head / page_size);
offset += shift;
head -= shift;
remap:
buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
MAP_SHARED, input, offset);
@@ -1442,9 +1575,10 @@ more:
size = 8;
if (head + event->header.size >= page_size * mmap_window) {
unsigned long shift = page_size * (head / page_size);
int ret;
shift = page_size * (head / page_size);
ret = munmap(buf, page_size * mmap_window);
assert(ret == 0);
@@ -1482,7 +1616,7 @@ more:
head += size;
if (offset + head >= sizeof(file_header) + file_header.data_size)
if (offset + head >= header->data_offset + header->data_size)
goto done;
if (offset + head < stat.st_size)
@@ -1536,6 +1670,13 @@ static const struct option options[] = {
"regex filter to identify parent, see: '--sort parent'"),
OPT_BOOLEAN('x', "exclude-other", &exclude_other,
"Only display entries with parent-match"),
OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"),
OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]",
"only consider symbols in these dsos"),
OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]",
"only consider symbols in these comms"),
OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
OPT_END()
};
@@ -1554,6 +1695,19 @@ static void setup_sorting(void)
free(str);
}
static void setup_list(struct strlist **list, const char *list_str,
const char *list_name)
{
if (list_str) {
*list = strlist__new(true, list_str);
if (!*list) {
fprintf(stderr, "problems parsing %s list\n",
list_name);
exit(129);
}
}
}
int cmd_report(int argc, const char **argv, const char *prefix)
{
symbol__init();
@@ -1575,6 +1729,10 @@ int cmd_report(int argc, const char **argv, const char *prefix)
if (argc)
usage_with_options(report_usage, options);
setup_list(&dso_list, dso_list_str, "dso");
setup_list(&comm_list, comm_list_str, "comm");
setup_list(&sym_list, sym_list_str, "symbol");
setup_pager();
return __cmd_report();
+103 -70
View File
@@ -32,6 +32,7 @@
* Wu Fengguang <fengguang.wu@intel.com>
* Mike Galbraith <efault@gmx.de>
* Paul Mackerras <paulus@samba.org>
* Jaswinder Singh Rajput <jaswinder@kernel.org>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
@@ -45,7 +46,7 @@
#include <sys/prctl.h>
#include <math.h>
static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
static struct perf_counter_attr default_attrs[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
@@ -59,42 +60,28 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
};
#define MAX_RUN 100
static int system_wide = 0;
static int inherit = 1;
static int verbose = 0;
static int nr_cpus = 0;
static int run_idx = 0;
static int run_count = 1;
static int inherit = 1;
static int scale = 1;
static int target_pid = -1;
static int null_run = 0;
static int fd[MAX_NR_CPUS][MAX_COUNTERS];
static int target_pid = -1;
static int nr_cpus = 0;
static unsigned int page_size;
static int scale = 1;
static const unsigned int default_count[] = {
1000000,
1000000,
10000,
10000,
1000000,
10000,
};
#define MAX_RUN 100
static int run_count = 1;
static int run_idx = 0;
static u64 event_res[MAX_RUN][MAX_COUNTERS][3];
static u64 event_scaled[MAX_RUN][MAX_COUNTERS];
//static u64 event_hist[MAX_RUN][MAX_COUNTERS][3];
static u64 runtime_nsecs[MAX_RUN];
static u64 walltime_nsecs[MAX_RUN];
static u64 runtime_cycles[MAX_RUN];
static u64 event_res[MAX_RUN][MAX_COUNTERS][3];
static u64 event_scaled[MAX_RUN][MAX_COUNTERS];
static u64 event_res_avg[MAX_COUNTERS][3];
static u64 event_res_noise[MAX_COUNTERS][3];
@@ -109,7 +96,10 @@ static u64 walltime_nsecs_noise;
static u64 runtime_cycles_avg;
static u64 runtime_cycles_noise;
static void create_perf_stat_counter(int counter)
#define ERR_PERF_OPEN \
"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
static void create_perf_stat_counter(int counter, int pid)
{
struct perf_counter_attr *attr = attrs + counter;
@@ -119,20 +109,21 @@ static void create_perf_stat_counter(int counter)
if (system_wide) {
int cpu;
for (cpu = 0; cpu < nr_cpus; cpu ++) {
for (cpu = 0; cpu < nr_cpus; cpu++) {
fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
if (fd[cpu][counter] < 0 && verbose) {
printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno));
}
if (fd[cpu][counter] < 0 && verbose)
fprintf(stderr, ERR_PERF_OPEN, counter,
fd[cpu][counter], strerror(errno));
}
} else {
attr->inherit = inherit;
attr->disabled = 1;
attr->inherit = inherit;
attr->disabled = 1;
attr->enable_on_exec = 1;
fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
if (fd[0][counter] < 0 && verbose) {
printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno));
}
fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0);
if (fd[0][counter] < 0 && verbose)
fprintf(stderr, ERR_PERF_OPEN, counter,
fd[0][counter], strerror(errno));
}
}
@@ -168,7 +159,7 @@ static void read_counter(int counter)
count[0] = count[1] = count[2] = 0;
nv = scale ? 3 : 1;
for (cpu = 0; cpu < nr_cpus; cpu ++) {
for (cpu = 0; cpu < nr_cpus; cpu++) {
if (fd[cpu][counter] < 0)
continue;
@@ -215,32 +206,67 @@ static int run_perf_stat(int argc, const char **argv)
int status = 0;
int counter;
int pid;
int child_ready_pipe[2], go_pipe[2];
char buf;
if (!system_wide)
nr_cpus = 1;
for (counter = 0; counter < nr_counters; counter++)
create_perf_stat_counter(counter);
/*
* Enable counters and exec the command:
*/
t0 = rdclock();
prctl(PR_TASK_PERF_COUNTERS_ENABLE);
if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) {
perror("failed to create pipes");
exit(1);
}
if ((pid = fork()) < 0)
perror("failed to fork");
if (!pid) {
if (execvp(argv[0], (char **)argv)) {
perror(argv[0]);
exit(-1);
}
close(child_ready_pipe[0]);
close(go_pipe[1]);
fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
/*
* Do a dummy execvp to get the PLT entry resolved,
* so we avoid the resolver overhead on the real
* execvp call.
*/
execvp("", (char **)argv);
/*
* Tell the parent we're ready to go
*/
close(child_ready_pipe[1]);
/*
* Wait until the parent tells us to go.
*/
read(go_pipe[0], &buf, 1);
execvp(argv[0], (char **)argv);
perror(argv[0]);
exit(-1);
}
/*
* Wait for the child to be ready to exec.
*/
close(child_ready_pipe[1]);
close(go_pipe[0]);
read(child_ready_pipe[0], &buf, 1);
close(child_ready_pipe[0]);
for (counter = 0; counter < nr_counters; counter++)
create_perf_stat_counter(counter, pid);
/*
* Enable counters and exec the command:
*/
t0 = rdclock();
close(go_pipe[1]);
wait(&status);
prctl(PR_TASK_PERF_COUNTERS_DISABLE);
t1 = rdclock();
walltime_nsecs[run_idx] = t1 - t0;
@@ -262,7 +288,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
{
double msecs = (double)count[0] / 1000000;
fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter));
fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter));
if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
@@ -276,7 +302,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
static void abs_printout(int counter, u64 *count, u64 *noise)
{
fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter));
fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter));
if (runtime_cycles_avg &&
attrs[counter].type == PERF_TYPE_HARDWARE &&
@@ -306,7 +332,7 @@ static void print_counter(int counter)
scaled = event_scaled_avg[counter];
if (scaled == -1) {
fprintf(stderr, " %14s %-20s\n",
fprintf(stderr, " %14s %-24s\n",
"<not counted>", event_name(counter));
return;
}
@@ -364,8 +390,11 @@ static void calc_avg(void)
event_res_avg[j]+1, event_res[i][j]+1);
update_avg("counter/2", j,
event_res_avg[j]+2, event_res[i][j]+2);
update_avg("scaled", j,
event_scaled_avg + j, event_scaled[i]+j);
if (event_scaled[i][j] != -1)
update_avg("scaled", j,
event_scaled_avg + j, event_scaled[i]+j);
else
event_scaled_avg[j] = -1;
}
}
runtime_nsecs_avg /= run_count;
@@ -429,11 +458,14 @@ static void print_stat(int argc, const char **argv)
for (counter = 0; counter < nr_counters; counter++)
print_counter(counter);
fprintf(stderr, "\n");
fprintf(stderr, " %14.9f seconds time elapsed.\n",
fprintf(stderr, " %14.9f seconds time elapsed",
(double)walltime_nsecs_avg/1e9);
fprintf(stderr, "\n");
if (run_count > 1) {
fprintf(stderr, " ( +- %7.3f%% )",
100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg);
}
fprintf(stderr, "\n\n");
}
static volatile int signr = -1;
@@ -466,13 +498,15 @@ static const struct option options[] = {
OPT_INTEGER('p', "pid", &target_pid,
"stat events on existing pid"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
"system-wide collection from all CPUs"),
OPT_BOOLEAN('S', "scale", &scale,
"scale/normalize counters"),
"scale/normalize counters"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_INTEGER('r', "repeat", &run_count,
"repeat command and print average + stddev (max: 100)"),
OPT_BOOLEAN('n', "null", &null_run,
"null run - dont start any counters"),
OPT_END()
};
@@ -480,18 +514,17 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
{
int status;
page_size = sysconf(_SC_PAGE_SIZE);
memcpy(attrs, default_attrs, sizeof(attrs));
argc = parse_options(argc, argv, options, stat_usage, 0);
if (!argc)
usage_with_options(stat_usage, options);
if (run_count <= 0 || run_count > MAX_RUN)
usage_with_options(stat_usage, options);
if (!nr_counters)
nr_counters = 8;
/* Set attrs and nr_counters if no event is selected and !null_run */
if (!null_run && !nr_counters) {
memcpy(attrs, default_attrs, sizeof(default_attrs));
nr_counters = ARRAY_SIZE(default_attrs);
}
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
@@ -511,7 +544,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
status = 0;
for (run_idx = 0; run_idx < run_count; run_idx++) {
if (run_count != 1 && verbose)
fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1);
fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
status = run_perf_stat(argc, argv);
}
+6 -5
View File
@@ -392,11 +392,11 @@ static void record_ip(u64 ip, int counter)
samples--;
}
static void process_event(u64 ip, int counter)
static void process_event(u64 ip, int counter, int user)
{
samples++;
if (ip < min_ip || ip > max_ip) {
if (user) {
userspace_samples++;
return;
}
@@ -509,9 +509,10 @@ static void mmap_read_counter(struct mmap_data *md)
old += size;
if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
if (event->header.type & PERF_SAMPLE_IP)
process_event(event->ip.ip, md->counter);
if (event->header.type == PERF_EVENT_SAMPLE) {
int user =
(event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER;
process_event(event->ip.ip, md->counter, user);
}
}
+4 -5
View File
@@ -25,7 +25,7 @@
#include <sys/syscall.h>
#include "../../include/linux/perf_counter.h"
#include "types.h"
#include "util/types.h"
/*
* prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
@@ -72,10 +72,9 @@ sys_perf_counter_open(struct perf_counter_attr *attr,
#define MAX_COUNTERS 256
#define MAX_NR_CPUS 256
struct perf_file_header {
u64 version;
u64 sample_type;
u64 data_size;
struct ip_callchain {
u64 nr;
u64 ips[0];
};
#endif
+174
View File
@@ -0,0 +1,174 @@
/*
* Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
*
* Handle the callchains from the stream in an ad-hoc radix tree and then
* sort them in an rbtree.
*
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include "callchain.h"
static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct callchain_node *rnode;
while (*p) {
parent = *p;
rnode = rb_entry(parent, struct callchain_node, rb_node);
if (rnode->hit < chain->hit)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
}
rb_link_node(&chain->rb_node, parent, p);
rb_insert_color(&chain->rb_node, root);
}
/*
* Once we get every callchains from the stream, we can now
* sort them by hit
*/
void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node)
{
struct callchain_node *child;
list_for_each_entry(child, &node->children, brothers)
sort_chain_to_rbtree(rb_root, child);
if (node->hit)
rb_insert_callchain(rb_root, node);
}
static struct callchain_node *create_child(struct callchain_node *parent)
{
struct callchain_node *new;
new = malloc(sizeof(*new));
if (!new) {
perror("not enough memory to create child for code path tree");
return NULL;
}
new->parent = parent;
INIT_LIST_HEAD(&new->children);
INIT_LIST_HEAD(&new->val);
list_add_tail(&new->brothers, &parent->children);
return new;
}
static void
fill_node(struct callchain_node *node, struct ip_callchain *chain, int start)
{
int i;
for (i = start; i < chain->nr; i++) {
struct callchain_list *call;
call = malloc(sizeof(*chain));
if (!call) {
perror("not enough memory for the code path tree");
return;
}
call->ip = chain->ips[i];
list_add_tail(&call->list, &node->val);
}
node->val_nr = i - start;
}
static void add_child(struct callchain_node *parent, struct ip_callchain *chain)
{
struct callchain_node *new;
new = create_child(parent);
fill_node(new, chain, parent->val_nr);
new->hit = 1;
}
static void
split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
struct callchain_list *to_split, int idx)
{
struct callchain_node *new;
/* split */
new = create_child(parent);
list_move_tail(&to_split->list, &new->val);
new->hit = parent->hit;
parent->hit = 0;
parent->val_nr = idx;
/* create the new one */
add_child(parent, chain);
}
static int
__append_chain(struct callchain_node *root, struct ip_callchain *chain,
int start);
static int
__append_chain_children(struct callchain_node *root, struct ip_callchain *chain)
{
struct callchain_node *rnode;
/* lookup in childrens */
list_for_each_entry(rnode, &root->children, brothers) {
int ret = __append_chain(rnode, chain, root->val_nr);
if (!ret)
return 0;
}
return -1;
}
static int
__append_chain(struct callchain_node *root, struct ip_callchain *chain,
int start)
{
struct callchain_list *cnode;
int i = start;
bool found = false;
/* lookup in the current node */
list_for_each_entry(cnode, &root->val, list) {
if (cnode->ip != chain->ips[i++])
break;
if (!found)
found = true;
if (i == chain->nr)
break;
}
/* matches not, relay on the parent */
if (!found)
return -1;
/* we match only a part of the node. Split it and add the new chain */
if (i < root->val_nr) {
split_add_child(root, chain, cnode, i);
return 0;
}
/* we match 100% of the path, increment the hit */
if (i == root->val_nr) {
root->hit++;
return 0;
}
return __append_chain_children(root, chain);
}
void append_chain(struct callchain_node *root, struct ip_callchain *chain)
{
if (__append_chain_children(root, chain) == -1)
add_child(root, chain);
}
+33
View File
@@ -0,0 +1,33 @@
#ifndef __PERF_CALLCHAIN_H
#define __PERF_CALLCHAIN_H
#include "../perf.h"
#include "list.h"
#include "rbtree.h"
struct callchain_node {
struct callchain_node *parent;
struct list_head brothers;
struct list_head children;
struct list_head val;
struct rb_node rb_node;
int val_nr;
int hit;
};
struct callchain_list {
unsigned long ip;
struct list_head list;
};
static inline void callchain_init(struct callchain_node *node)
{
INIT_LIST_HEAD(&node->brothers);
INIT_LIST_HEAD(&node->children);
INIT_LIST_HEAD(&node->val);
}
void append_chain(struct callchain_node *root, struct ip_callchain *chain);
void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node);
#endif
+242
View File
@@ -0,0 +1,242 @@
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include "util.h"
#include "header.h"
/*
*
*/
struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr)
{
struct perf_header_attr *self = malloc(sizeof(*self));
if (!self)
die("nomem");
self->attr = *attr;
self->ids = 0;
self->size = 1;
self->id = malloc(sizeof(u64));
if (!self->id)
die("nomem");
return self;
}
void perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
{
int pos = self->ids;
self->ids++;
if (self->ids > self->size) {
self->size *= 2;
self->id = realloc(self->id, self->size * sizeof(u64));
if (!self->id)
die("nomem");
}
self->id[pos] = id;
}
/*
*
*/
struct perf_header *perf_header__new(void)
{
struct perf_header *self = malloc(sizeof(*self));
if (!self)
die("nomem");
self->frozen = 0;
self->attrs = 0;
self->size = 1;
self->attr = malloc(sizeof(void *));
if (!self->attr)
die("nomem");
self->data_offset = 0;
self->data_size = 0;
return self;
}
void perf_header__add_attr(struct perf_header *self,
struct perf_header_attr *attr)
{
int pos = self->attrs;
if (self->frozen)
die("frozen");
self->attrs++;
if (self->attrs > self->size) {
self->size *= 2;
self->attr = realloc(self->attr, self->size * sizeof(void *));
if (!self->attr)
die("nomem");
}
self->attr[pos] = attr;
}
static const char *__perf_magic = "PERFFILE";
#define PERF_MAGIC (*(u64 *)__perf_magic)
struct perf_file_section {
u64 offset;
u64 size;
};
struct perf_file_attr {
struct perf_counter_attr attr;
struct perf_file_section ids;
};
struct perf_file_header {
u64 magic;
u64 size;
u64 attr_size;
struct perf_file_section attrs;
struct perf_file_section data;
};
static void do_write(int fd, void *buf, size_t size)
{
while (size) {
int ret = write(fd, buf, size);
if (ret < 0)
die("failed to write");
size -= ret;
buf += ret;
}
}
void perf_header__write(struct perf_header *self, int fd)
{
struct perf_file_header f_header;
struct perf_file_attr f_attr;
struct perf_header_attr *attr;
int i;
lseek(fd, sizeof(f_header), SEEK_SET);
for (i = 0; i < self->attrs; i++) {
attr = self->attr[i];
attr->id_offset = lseek(fd, 0, SEEK_CUR);
do_write(fd, attr->id, attr->ids * sizeof(u64));
}
self->attr_offset = lseek(fd, 0, SEEK_CUR);
for (i = 0; i < self->attrs; i++) {
attr = self->attr[i];
f_attr = (struct perf_file_attr){
.attr = attr->attr,
.ids = {
.offset = attr->id_offset,
.size = attr->ids * sizeof(u64),
}
};
do_write(fd, &f_attr, sizeof(f_attr));
}
self->data_offset = lseek(fd, 0, SEEK_CUR);
f_header = (struct perf_file_header){
.magic = PERF_MAGIC,
.size = sizeof(f_header),
.attr_size = sizeof(f_attr),
.attrs = {
.offset = self->attr_offset,
.size = self->attrs * sizeof(f_attr),
},
.data = {
.offset = self->data_offset,
.size = self->data_size,
},
};
lseek(fd, 0, SEEK_SET);
do_write(fd, &f_header, sizeof(f_header));
lseek(fd, self->data_offset + self->data_size, SEEK_SET);
self->frozen = 1;
}
static void do_read(int fd, void *buf, size_t size)
{
while (size) {
int ret = read(fd, buf, size);
if (ret < 0)
die("failed to read");
size -= ret;
buf += ret;
}
}
struct perf_header *perf_header__read(int fd)
{
struct perf_header *self = perf_header__new();
struct perf_file_header f_header;
struct perf_file_attr f_attr;
u64 f_id;
int nr_attrs, nr_ids, i, j;
lseek(fd, 0, SEEK_SET);
do_read(fd, &f_header, sizeof(f_header));
if (f_header.magic != PERF_MAGIC ||
f_header.size != sizeof(f_header) ||
f_header.attr_size != sizeof(f_attr))
die("incompatible file format");
nr_attrs = f_header.attrs.size / sizeof(f_attr);
lseek(fd, f_header.attrs.offset, SEEK_SET);
for (i = 0; i < nr_attrs; i++) {
struct perf_header_attr *attr;
off_t tmp = lseek(fd, 0, SEEK_CUR);
do_read(fd, &f_attr, sizeof(f_attr));
attr = perf_header_attr__new(&f_attr.attr);
nr_ids = f_attr.ids.size / sizeof(u64);
lseek(fd, f_attr.ids.offset, SEEK_SET);
for (j = 0; j < nr_ids; j++) {
do_read(fd, &f_id, sizeof(f_id));
perf_header_attr__add_id(attr, f_id);
}
perf_header__add_attr(self, attr);
lseek(fd, tmp, SEEK_SET);
}
self->data_offset = f_header.data.offset;
self->data_size = f_header.data.size;
lseek(fd, self->data_offset + self->data_size, SEEK_SET);
self->frozen = 1;
return self;
}
+37
View File
@@ -0,0 +1,37 @@
#ifndef _PERF_HEADER_H
#define _PERF_HEADER_H
#include "../../../include/linux/perf_counter.h"
#include <sys/types.h>
#include "types.h"
struct perf_header_attr {
struct perf_counter_attr attr;
int ids, size;
u64 *id;
off_t id_offset;
};
struct perf_header {
int frozen;
int attrs, size;
struct perf_header_attr **attr;
off_t attr_offset;
u64 data_offset;
u64 data_size;
};
struct perf_header *perf_header__read(int fd);
void perf_header__write(struct perf_header *self, int fd);
void perf_header__add_attr(struct perf_header *self,
struct perf_header_attr *attr);
struct perf_header_attr *
perf_header_attr__new(struct perf_counter_attr *attr);
void perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
struct perf_header *perf_header__new(void);
#endif /* _PERF_HEADER_H */
-15
View File
@@ -126,21 +126,6 @@ static int is_executable(const char *name)
!S_ISREG(st.st_mode))
return 0;
#ifdef __MINGW32__
/* cannot trust the executable bit, peek into the file instead */
char buf[3] = { 0 };
int n;
int fd = open(name, O_RDONLY);
st.st_mode &= ~S_IXUSR;
if (fd >= 0) {
n = read(fd, buf, 2);
if (n == 2)
/* DOS executables start with "MZ" */
if (!strcmp(buf, "#!") || !strcmp(buf, "MZ"))
st.st_mode |= S_IXUSR;
close(fd);
}
#endif
return st.st_mode & S_IXUSR;
}

Some files were not shown because too many files have changed in this diff Show More