Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes and cleanups from Ingo Molnar:
 "A kernel fix plus mostly tooling fixes, but also some tooling
  restructuring and cleanups"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits)
  perf: Fix building warning on ARM 32
  perf symbols: Fix use after free in filename__read_build_id
  perf evlist: Use roundup_pow_of_two
  tools: Adopt roundup_pow_of_two
  perf tools: Make the mmap length autotuning more robust
  tools: Adopt rounddown_pow_of_two and deps
  tools: Adopt fls_long and deps
  tools: Move bitops.h from tools/perf/util to tools/
  tools: Introduce asm-generic/bitops.h
  tools lib: Move asm-generic/bitops/find.h code to tools/include and tools/lib
  tools: Whitespace prep patches for moving bitops.h
  tools: Move code originally from asm-generic/atomic.h into tools/include/asm-generic/
  tools: Move code originally from linux/log2.h to tools/include/linux/
  tools: Move __ffs implementation to tools/include/asm-generic/bitops/__ffs.h
  perf evlist: Do not use hard coded value for a mmap_pages default
  perf trace: Let the perf_evlist__mmap autosize the number of pages to use
  perf evlist: Improve the strerror_mmap method
  perf evlist: Clarify sterror_mmap variable names
  perf evlist: Fixup brown paper bag on "hint" for --mmap-pages cmdline arg
  perf trace: Provide a better explanation when mmap fails
  ...
This commit is contained in:
Linus Torvalds
2014-12-19 13:15:24 -08:00
41 changed files with 964 additions and 781 deletions
+27
View File
@@ -0,0 +1,27 @@
#ifndef __TOOLS_ASM_GENERIC_BITOPS_H
#define __TOOLS_ASM_GENERIC_BITOPS_H
/*
* tools/ copied this from include/asm-generic/bitops.h, bit by bit as it needed
* some functions.
*
* For the benefit of those who are trying to port Linux to another
* architecture, here are some C-language equivalents. You should
* recode these in the native assembly language, if at all possible.
*
* C language equivalents written by Theodore Ts'o, 9/26/92
*/
#include <asm-generic/bitops/__ffs.h>
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/find.h>
#ifndef _TOOLS_LINUX_BITOPS_H_
#error only <linux/bitops.h> can be included directly
#endif
#include <asm-generic/bitops/atomic.h>
#endif /* __TOOLS_ASM_GENERIC_BITOPS_H */
+43
View File
@@ -0,0 +1,43 @@
#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
#define _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
#include <asm/types.h>
/**
* __ffs - find first bit in word.
* @word: The word to search
*
* Undefined if no bit exists, so code should check against 0 first.
*/
static __always_inline unsigned long __ffs(unsigned long word)
{
int num = 0;
#if __BITS_PER_LONG == 64
if ((word & 0xffffffff) == 0) {
num += 32;
word >>= 32;
}
#endif
if ((word & 0xffff) == 0) {
num += 16;
word >>= 16;
}
if ((word & 0xff) == 0) {
num += 8;
word >>= 8;
}
if ((word & 0xf) == 0) {
num += 4;
word >>= 4;
}
if ((word & 0x3) == 0) {
num += 2;
word >>= 2;
}
if ((word & 0x1) == 0)
num += 1;
return num;
}
#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ */
+1
View File
@@ -0,0 +1 @@
#include <../../../../include/asm-generic/bitops/__fls.h>
+22
View File
@@ -0,0 +1,22 @@
#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
#include <asm/types.h>
static inline void set_bit(int nr, unsigned long *addr)
{
addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG);
}
static inline void clear_bit(int nr, unsigned long *addr)
{
addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
}
static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
{
return ((1UL << (nr % __BITS_PER_LONG)) &
(((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
}
#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
+33
View File
@@ -0,0 +1,33 @@
#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
#ifndef find_next_bit
/**
* find_next_bit - find the next set bit in a memory region
* @addr: The address to base the search on
* @offset: The bitnumber to start searching at
* @size: The bitmap size in bits
*
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
size, unsigned long offset);
#endif
#ifndef find_first_bit
/**
* find_first_bit - find the first set bit in a memory region
* @addr: The address to start the search at
* @size: The maximum number of bits to search
*
* Returns the bit number of the first set bit.
* If no bits are set, returns @size.
*/
extern unsigned long find_first_bit(const unsigned long *addr,
unsigned long size);
#endif /* find_first_bit */
#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */
+1
View File
@@ -0,0 +1 @@
#include <../../../../include/asm-generic/bitops/fls.h>
+1
View File
@@ -0,0 +1 @@
#include <../../../../include/asm-generic/bitops/fls64.h>
+53
View File
@@ -0,0 +1,53 @@
#ifndef _TOOLS_LINUX_BITOPS_H_
#define _TOOLS_LINUX_BITOPS_H_
#include <linux/kernel.h>
#include <linux/compiler.h>
#include <asm/hweight.h>
#ifndef __WORDSIZE
#define __WORDSIZE (__SIZEOF_LONG__ * 8)
#endif
#define BITS_PER_LONG __WORDSIZE
#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
#define BITS_PER_BYTE 8
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE)
/*
* Include this here because some architectures need generic_ffs/fls in
* scope
*
* XXX: this needs to be asm/bitops.h, when we get to per arch optimizations
*/
#include <asm-generic/bitops.h>
#define for_each_set_bit(bit, addr, size) \
for ((bit) = find_first_bit((addr), (size)); \
(bit) < (size); \
(bit) = find_next_bit((addr), (size), (bit) + 1))
/* same as for_each_set_bit() but use bit as value to start with */
#define for_each_set_bit_from(bit, addr, size) \
for ((bit) = find_next_bit((addr), (size), (bit)); \
(bit) < (size); \
(bit) = find_next_bit((addr), (size), (bit) + 1))
static inline unsigned long hweight_long(unsigned long w)
{
return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
}
static inline unsigned fls_long(unsigned long l)
{
if (sizeof(l) == 4)
return fls(l);
return fls64(l);
}
#endif
+185
View File
@@ -0,0 +1,185 @@
/* Integer base 2 logarithm calculation
*
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _TOOLS_LINUX_LOG2_H
#define _TOOLS_LINUX_LOG2_H
/*
* deal with unrepresentable constant logarithms
*/
extern __attribute__((const, noreturn))
int ____ilog2_NaN(void);
/*
* non-constant log of base 2 calculators
* - the arch may override these in asm/bitops.h if they can be implemented
* more efficiently than using fls() and fls64()
* - the arch is not required to handle n==0 if implementing the fallback
*/
static inline __attribute__((const))
int __ilog2_u32(u32 n)
{
return fls(n) - 1;
}
static inline __attribute__((const))
int __ilog2_u64(u64 n)
{
return fls64(n) - 1;
}
/*
* Determine whether some value is a power of two, where zero is
* *not* considered a power of two.
*/
static inline __attribute__((const))
bool is_power_of_2(unsigned long n)
{
return (n != 0 && ((n & (n - 1)) == 0));
}
/*
* round up to nearest power of two
*/
static inline __attribute__((const))
unsigned long __roundup_pow_of_two(unsigned long n)
{
return 1UL << fls_long(n - 1);
}
/*
* round down to nearest power of two
*/
static inline __attribute__((const))
unsigned long __rounddown_pow_of_two(unsigned long n)
{
return 1UL << (fls_long(n) - 1);
}
/**
* ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
* @n - parameter
*
* constant-capable log of base 2 calculation
* - this can be used to initialise global variables from constant data, hence
* the massive ternary operator construction
*
* selects the appropriately-sized optimised version depending on sizeof(n)
*/
#define ilog2(n) \
( \
__builtin_constant_p(n) ? ( \
(n) < 1 ? ____ilog2_NaN() : \
(n) & (1ULL << 63) ? 63 : \
(n) & (1ULL << 62) ? 62 : \
(n) & (1ULL << 61) ? 61 : \
(n) & (1ULL << 60) ? 60 : \
(n) & (1ULL << 59) ? 59 : \
(n) & (1ULL << 58) ? 58 : \
(n) & (1ULL << 57) ? 57 : \
(n) & (1ULL << 56) ? 56 : \
(n) & (1ULL << 55) ? 55 : \
(n) & (1ULL << 54) ? 54 : \
(n) & (1ULL << 53) ? 53 : \
(n) & (1ULL << 52) ? 52 : \
(n) & (1ULL << 51) ? 51 : \
(n) & (1ULL << 50) ? 50 : \
(n) & (1ULL << 49) ? 49 : \
(n) & (1ULL << 48) ? 48 : \
(n) & (1ULL << 47) ? 47 : \
(n) & (1ULL << 46) ? 46 : \
(n) & (1ULL << 45) ? 45 : \
(n) & (1ULL << 44) ? 44 : \
(n) & (1ULL << 43) ? 43 : \
(n) & (1ULL << 42) ? 42 : \
(n) & (1ULL << 41) ? 41 : \
(n) & (1ULL << 40) ? 40 : \
(n) & (1ULL << 39) ? 39 : \
(n) & (1ULL << 38) ? 38 : \
(n) & (1ULL << 37) ? 37 : \
(n) & (1ULL << 36) ? 36 : \
(n) & (1ULL << 35) ? 35 : \
(n) & (1ULL << 34) ? 34 : \
(n) & (1ULL << 33) ? 33 : \
(n) & (1ULL << 32) ? 32 : \
(n) & (1ULL << 31) ? 31 : \
(n) & (1ULL << 30) ? 30 : \
(n) & (1ULL << 29) ? 29 : \
(n) & (1ULL << 28) ? 28 : \
(n) & (1ULL << 27) ? 27 : \
(n) & (1ULL << 26) ? 26 : \
(n) & (1ULL << 25) ? 25 : \
(n) & (1ULL << 24) ? 24 : \
(n) & (1ULL << 23) ? 23 : \
(n) & (1ULL << 22) ? 22 : \
(n) & (1ULL << 21) ? 21 : \
(n) & (1ULL << 20) ? 20 : \
(n) & (1ULL << 19) ? 19 : \
(n) & (1ULL << 18) ? 18 : \
(n) & (1ULL << 17) ? 17 : \
(n) & (1ULL << 16) ? 16 : \
(n) & (1ULL << 15) ? 15 : \
(n) & (1ULL << 14) ? 14 : \
(n) & (1ULL << 13) ? 13 : \
(n) & (1ULL << 12) ? 12 : \
(n) & (1ULL << 11) ? 11 : \
(n) & (1ULL << 10) ? 10 : \
(n) & (1ULL << 9) ? 9 : \
(n) & (1ULL << 8) ? 8 : \
(n) & (1ULL << 7) ? 7 : \
(n) & (1ULL << 6) ? 6 : \
(n) & (1ULL << 5) ? 5 : \
(n) & (1ULL << 4) ? 4 : \
(n) & (1ULL << 3) ? 3 : \
(n) & (1ULL << 2) ? 2 : \
(n) & (1ULL << 1) ? 1 : \
(n) & (1ULL << 0) ? 0 : \
____ilog2_NaN() \
) : \
(sizeof(n) <= 4) ? \
__ilog2_u32(n) : \
__ilog2_u64(n) \
)
/**
* roundup_pow_of_two - round the given value up to nearest power of two
* @n - parameter
*
* round the given value up to the nearest power of two
* - the result is undefined when n == 0
* - this can be used to initialise global variables from constant data
*/
#define roundup_pow_of_two(n) \
( \
__builtin_constant_p(n) ? ( \
(n == 1) ? 1 : \
(1UL << (ilog2((n) - 1) + 1)) \
) : \
__roundup_pow_of_two(n) \
)
/**
* rounddown_pow_of_two - round the given value down to nearest power of two
* @n - parameter
*
* round the given value down to the nearest power of two
* - the result is undefined when n == 0
* - this can be used to initialise global variables from constant data
*/
#define rounddown_pow_of_two(n) \
( \
__builtin_constant_p(n) ? ( \
(1UL << ilog2(n))) : \
__rounddown_pow_of_two(n) \
)
#endif /* _TOOLS_LINUX_LOG2_H */
+34
View File
@@ -7,6 +7,10 @@
#include <stdlib.h>
#include <string.h>
#include <sys/vfs.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include "debugfs.h"
#include "fs.h"
@@ -163,3 +167,33 @@ const char *name##__mountpoint(void) \
FS__MOUNTPOINT(sysfs, FS__SYSFS);
FS__MOUNTPOINT(procfs, FS__PROCFS);
int filename__read_int(const char *filename, int *value)
{
char line[64];
int fd = open(filename, O_RDONLY), err = -1;
if (fd < 0)
return -1;
if (read(fd, line, sizeof(line)) > 0) {
*value = atoi(line);
err = 0;
}
close(fd);
return err;
}
int sysctl__read_int(const char *sysctl, int *value)
{
char path[PATH_MAX];
const char *procfs = procfs__mountpoint();
if (!procfs)
return -1;
snprintf(path, sizeof(path), "%s/sys/%s", procfs, sysctl);
return filename__read_int(path, value);
}
+3
View File
@@ -11,4 +11,7 @@
const char *sysfs__mountpoint(void);
const char *procfs__mountpoint(void);
int filename__read_int(const char *filename, int *value);
int sysctl__read_int(const char *sysctl, int *value);
#endif /* __API_FS__ */
+89
View File
@@ -0,0 +1,89 @@
/* find_next_bit.c: fallback find next bit implementation
*
* Copied from lib/find_next_bit.c to tools/lib/next_bit.c
*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/bitops.h>
#include <asm/types.h>
#include <asm/byteorder.h>
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
#ifndef find_next_bit
/*
* Find the next set bit in a memory region.
*/
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset %= BITS_PER_LONG;
if (offset) {
tmp = *(p++);
tmp &= (~0UL << offset);
if (size < BITS_PER_LONG)
goto found_first;
if (tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG-1)) {
if ((tmp = *(p++)))
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = *p;
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __ffs(tmp);
}
#endif
#ifndef find_first_bit
/*
* Find the first set bit in a memory region.
*/
unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
{
const unsigned long *p = addr;
unsigned long result = 0;
unsigned long tmp;
while (size & ~(BITS_PER_LONG-1)) {
if ((tmp = *(p++)))
goto found;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found:
return result + __ffs(tmp);
}
#endif
+4
View File
@@ -18,6 +18,10 @@ OPTIONS
--debug verbose # sets verbose = 1
--debug verbose=2 # sets verbose = 2
--buildid-dir::
Setup buildid cache directory. It has higher priority than
buildid.dir config file option.
DESCRIPTION
-----------
Performance counters for Linux are a new kernel-based subsystem
+15 -1
View File
@@ -4,17 +4,31 @@ tools/lib/traceevent
tools/lib/api
tools/lib/symbol/kallsyms.c
tools/lib/symbol/kallsyms.h
tools/lib/util/find_next_bit.c
tools/include/asm/bug.h
tools/include/asm-generic/bitops/atomic.h
tools/include/asm-generic/bitops/__ffs.h
tools/include/asm-generic/bitops/__fls.h
tools/include/asm-generic/bitops/find.h
tools/include/asm-generic/bitops/fls64.h
tools/include/asm-generic/bitops/fls.h
tools/include/asm-generic/bitops.h
tools/include/linux/bitops.h
tools/include/linux/compiler.h
tools/include/linux/hash.h
tools/include/linux/export.h
tools/include/linux/hash.h
tools/include/linux/log2.h
tools/include/linux/types.h
include/asm-generic/bitops/fls64.h
include/asm-generic/bitops/__fls.h
include/asm-generic/bitops/fls.h
include/linux/const.h
include/linux/perf_event.h
include/linux/rbtree.h
include/linux/list.h
include/linux/hash.h
include/linux/stringify.h
lib/find_next_bit.c
lib/rbtree.c
include/linux/swab.h
arch/*/include/asm/unistd*.h
+13 -2
View File
@@ -231,8 +231,16 @@ LIB_H += ../../include/uapi/linux/const.h
LIB_H += ../include/linux/hash.h
LIB_H += ../../include/linux/stringify.h
LIB_H += util/include/linux/bitmap.h
LIB_H += util/include/linux/bitops.h
LIB_H += ../include/linux/bitops.h
LIB_H += ../include/asm-generic/bitops/atomic.h
LIB_H += ../include/asm-generic/bitops/find.h
LIB_H += ../include/asm-generic/bitops/fls64.h
LIB_H += ../include/asm-generic/bitops/fls.h
LIB_H += ../include/asm-generic/bitops/__ffs.h
LIB_H += ../include/asm-generic/bitops/__fls.h
LIB_H += ../include/asm-generic/bitops.h
LIB_H += ../include/linux/compiler.h
LIB_H += ../include/linux/log2.h
LIB_H += util/include/linux/const.h
LIB_H += util/include/linux/ctype.h
LIB_H += util/include/linux/kernel.h
@@ -335,6 +343,7 @@ LIB_OBJS += $(OUTPUT)util/event.o
LIB_OBJS += $(OUTPUT)util/evlist.o
LIB_OBJS += $(OUTPUT)util/evsel.o
LIB_OBJS += $(OUTPUT)util/exec_cmd.o
LIB_OBJS += $(OUTPUT)util/find_next_bit.o
LIB_OBJS += $(OUTPUT)util/help.o
LIB_OBJS += $(OUTPUT)util/kallsyms.o
LIB_OBJS += $(OUTPUT)util/levenshtein.o
@@ -458,7 +467,6 @@ BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
endif
BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
@@ -735,6 +743,9 @@ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS
$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
+269 -151
View File
@@ -13,6 +13,7 @@
#include "../util/cloexec.h"
#include "bench.h"
#include "mem-memcpy-arch.h"
#include "mem-memset-arch.h"
#include <stdio.h>
#include <stdlib.h>
@@ -48,20 +49,24 @@ static const struct option options[] = {
};
typedef void *(*memcpy_t)(void *, const void *, size_t);
typedef void *(*memset_t)(void *, int, size_t);
struct routine {
const char *name;
const char *desc;
memcpy_t fn;
union {
memcpy_t memcpy;
memset_t memset;
} fn;
};
struct routine routines[] = {
{ "default",
"Default memcpy() provided by glibc",
memcpy },
struct routine memcpy_routines[] = {
{ .name = "default",
.desc = "Default memcpy() provided by glibc",
.fn.memcpy = memcpy },
#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
#include "mem-memcpy-x86-64-asm-def.h"
#undef MEMCPY_FN
@@ -69,7 +74,7 @@ struct routine routines[] = {
{ NULL,
NULL,
NULL }
{NULL} }
};
static const char * const bench_mem_memcpy_usage[] = {
@@ -110,7 +115,161 @@ static double timeval2double(struct timeval *ts)
(double)ts->tv_usec / (double)1000000;
}
static void alloc_mem(void **dst, void **src, size_t length)
#define pf (no_prefault ? 0 : 1)
#define print_bps(x) do { \
if (x < K) \
printf(" %14lf B/Sec", x); \
else if (x < K * K) \
printf(" %14lfd KB/Sec", x / K); \
else if (x < K * K * K) \
printf(" %14lf MB/Sec", x / K / K); \
else \
printf(" %14lf GB/Sec", x / K / K / K); \
} while (0)
struct bench_mem_info {
const struct routine *routines;
u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault);
double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault);
const char *const *usage;
};
static int bench_mem_common(int argc, const char **argv,
const char *prefix __maybe_unused,
struct bench_mem_info *info)
{
int i;
size_t len;
double totallen;
double result_bps[2];
u64 result_cycle[2];
argc = parse_options(argc, argv, options,
info->usage, 0);
if (no_prefault && only_prefault) {
fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
return 1;
}
if (use_cycle)
init_cycle();
len = (size_t)perf_atoll((char *)length_str);
totallen = (double)len * iterations;
result_cycle[0] = result_cycle[1] = 0ULL;
result_bps[0] = result_bps[1] = 0.0;
if ((s64)len <= 0) {
fprintf(stderr, "Invalid length:%s\n", length_str);
return 1;
}
/* same to without specifying either of prefault and no-prefault */
if (only_prefault && no_prefault)
only_prefault = no_prefault = false;
for (i = 0; info->routines[i].name; i++) {
if (!strcmp(info->routines[i].name, routine))
break;
}
if (!info->routines[i].name) {
printf("Unknown routine:%s\n", routine);
printf("Available routines...\n");
for (i = 0; info->routines[i].name; i++) {
printf("\t%s ... %s\n",
info->routines[i].name, info->routines[i].desc);
}
return 1;
}
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Copying %s Bytes ...\n\n", length_str);
if (!only_prefault && !no_prefault) {
/* show both of results */
if (use_cycle) {
result_cycle[0] =
info->do_cycle(&info->routines[i], len, false);
result_cycle[1] =
info->do_cycle(&info->routines[i], len, true);
} else {
result_bps[0] =
info->do_gettimeofday(&info->routines[i],
len, false);
result_bps[1] =
info->do_gettimeofday(&info->routines[i],
len, true);
}
} else {
if (use_cycle) {
result_cycle[pf] =
info->do_cycle(&info->routines[i],
len, only_prefault);
} else {
result_bps[pf] =
info->do_gettimeofday(&info->routines[i],
len, only_prefault);
}
}
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
if (!only_prefault && !no_prefault) {
if (use_cycle) {
printf(" %14lf Cycle/Byte\n",
(double)result_cycle[0]
/ totallen);
printf(" %14lf Cycle/Byte (with prefault)\n",
(double)result_cycle[1]
/ totallen);
} else {
print_bps(result_bps[0]);
printf("\n");
print_bps(result_bps[1]);
printf(" (with prefault)\n");
}
} else {
if (use_cycle) {
printf(" %14lf Cycle/Byte",
(double)result_cycle[pf]
/ totallen);
} else
print_bps(result_bps[pf]);
printf("%s\n", only_prefault ? " (with prefault)" : "");
}
break;
case BENCH_FORMAT_SIMPLE:
if (!only_prefault && !no_prefault) {
if (use_cycle) {
printf("%lf %lf\n",
(double)result_cycle[0] / totallen,
(double)result_cycle[1] / totallen);
} else {
printf("%lf %lf\n",
result_bps[0], result_bps[1]);
}
} else {
if (use_cycle) {
printf("%lf\n", (double)result_cycle[pf]
/ totallen);
} else
printf("%lf\n", result_bps[pf]);
}
break;
default:
/* reaching this means there's some disaster: */
die("unknown format: %d\n", bench_format);
break;
}
return 0;
}
static void memcpy_alloc_mem(void **dst, void **src, size_t length)
{
*dst = zalloc(length);
if (!*dst)
@@ -123,13 +282,14 @@ static void alloc_mem(void **dst, void **src, size_t length)
memset(*src, 0, length);
}
static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
{
u64 cycle_start = 0ULL, cycle_end = 0ULL;
void *src = NULL, *dst = NULL;
memcpy_t fn = r->fn.memcpy;
int i;
alloc_mem(&src, &dst, len);
memcpy_alloc_mem(&src, &dst, len);
if (prefault)
fn(dst, src, len);
@@ -144,13 +304,15 @@ static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
return cycle_end - cycle_start;
}
static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
static double do_memcpy_gettimeofday(const struct routine *r, size_t len,
bool prefault)
{
struct timeval tv_start, tv_end, tv_diff;
memcpy_t fn = r->fn.memcpy;
void *src = NULL, *dst = NULL;
int i;
alloc_mem(&src, &dst, len);
memcpy_alloc_mem(&src, &dst, len);
if (prefault)
fn(dst, src, len);
@@ -164,149 +326,105 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
free(src);
free(dst);
return (double)((double)len / timeval2double(&tv_diff));
return (double)(((double)len * iterations) / timeval2double(&tv_diff));
}
#define pf (no_prefault ? 0 : 1)
#define print_bps(x) do { \
if (x < K) \
printf(" %14lf B/Sec", x); \
else if (x < K * K) \
printf(" %14lfd KB/Sec", x / K); \
else if (x < K * K * K) \
printf(" %14lf MB/Sec", x / K / K); \
else \
printf(" %14lf GB/Sec", x / K / K / K); \
} while (0)
int bench_mem_memcpy(int argc, const char **argv,
const char *prefix __maybe_unused)
{
int i;
size_t len;
double result_bps[2];
u64 result_cycle[2];
struct bench_mem_info info = {
.routines = memcpy_routines,
.do_cycle = do_memcpy_cycle,
.do_gettimeofday = do_memcpy_gettimeofday,
.usage = bench_mem_memcpy_usage,
};
argc = parse_options(argc, argv, options,
bench_mem_memcpy_usage, 0);
if (no_prefault && only_prefault) {
fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
return 1;
}
if (use_cycle)
init_cycle();
len = (size_t)perf_atoll((char *)length_str);
result_cycle[0] = result_cycle[1] = 0ULL;
result_bps[0] = result_bps[1] = 0.0;
if ((s64)len <= 0) {
fprintf(stderr, "Invalid length:%s\n", length_str);
return 1;
}
/* same to without specifying either of prefault and no-prefault */
if (only_prefault && no_prefault)
only_prefault = no_prefault = false;
for (i = 0; routines[i].name; i++) {
if (!strcmp(routines[i].name, routine))
break;
}
if (!routines[i].name) {
printf("Unknown routine:%s\n", routine);
printf("Available routines...\n");
for (i = 0; routines[i].name; i++) {
printf("\t%s ... %s\n",
routines[i].name, routines[i].desc);
}
return 1;
}
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Copying %s Bytes ...\n\n", length_str);
if (!only_prefault && !no_prefault) {
/* show both of results */
if (use_cycle) {
result_cycle[0] =
do_memcpy_cycle(routines[i].fn, len, false);
result_cycle[1] =
do_memcpy_cycle(routines[i].fn, len, true);
} else {
result_bps[0] =
do_memcpy_gettimeofday(routines[i].fn,
len, false);
result_bps[1] =
do_memcpy_gettimeofday(routines[i].fn,
len, true);
}
} else {
if (use_cycle) {
result_cycle[pf] =
do_memcpy_cycle(routines[i].fn,
len, only_prefault);
} else {
result_bps[pf] =
do_memcpy_gettimeofday(routines[i].fn,
len, only_prefault);
}
}
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
if (!only_prefault && !no_prefault) {
if (use_cycle) {
printf(" %14lf Cycle/Byte\n",
(double)result_cycle[0]
/ (double)len);
printf(" %14lf Cycle/Byte (with prefault)\n",
(double)result_cycle[1]
/ (double)len);
} else {
print_bps(result_bps[0]);
printf("\n");
print_bps(result_bps[1]);
printf(" (with prefault)\n");
}
} else {
if (use_cycle) {
printf(" %14lf Cycle/Byte",
(double)result_cycle[pf]
/ (double)len);
} else
print_bps(result_bps[pf]);
printf("%s\n", only_prefault ? " (with prefault)" : "");
}
break;
case BENCH_FORMAT_SIMPLE:
if (!only_prefault && !no_prefault) {
if (use_cycle) {
printf("%lf %lf\n",
(double)result_cycle[0] / (double)len,
(double)result_cycle[1] / (double)len);
} else {
printf("%lf %lf\n",
result_bps[0], result_bps[1]);
}
} else {
if (use_cycle) {
printf("%lf\n", (double)result_cycle[pf]
/ (double)len);
} else
printf("%lf\n", result_bps[pf]);
}
break;
default:
/* reaching this means there's some disaster: */
die("unknown format: %d\n", bench_format);
break;
}
return 0;
return bench_mem_common(argc, argv, prefix, &info);
}
static void memset_alloc_mem(void **dst, size_t length)
{
*dst = zalloc(length);
if (!*dst)
die("memory allocation failed - maybe length is too large?\n");
}
static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
{
u64 cycle_start = 0ULL, cycle_end = 0ULL;
memset_t fn = r->fn.memset;
void *dst = NULL;
int i;
memset_alloc_mem(&dst, len);
if (prefault)
fn(dst, -1, len);
cycle_start = get_cycle();
for (i = 0; i < iterations; ++i)
fn(dst, i, len);
cycle_end = get_cycle();
free(dst);
return cycle_end - cycle_start;
}
static double do_memset_gettimeofday(const struct routine *r, size_t len,
bool prefault)
{
struct timeval tv_start, tv_end, tv_diff;
memset_t fn = r->fn.memset;
void *dst = NULL;
int i;
memset_alloc_mem(&dst, len);
if (prefault)
fn(dst, -1, len);
BUG_ON(gettimeofday(&tv_start, NULL));
for (i = 0; i < iterations; ++i)
fn(dst, i, len);
BUG_ON(gettimeofday(&tv_end, NULL));
timersub(&tv_end, &tv_start, &tv_diff);
free(dst);
return (double)(((double)len * iterations) / timeval2double(&tv_diff));
}
static const char * const bench_mem_memset_usage[] = {
"perf bench mem memset <options>",
NULL
};
static const struct routine memset_routines[] = {
{ .name ="default",
.desc = "Default memset() provided by glibc",
.fn.memset = memset },
#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
#include "mem-memset-x86-64-asm-def.h"
#undef MEMSET_FN
#endif
{ .name = NULL,
.desc = NULL,
.fn.memset = NULL }
};
int bench_mem_memset(int argc, const char **argv,
const char *prefix __maybe_unused)
{
struct bench_mem_info info = {
.routines = memset_routines,
.do_cycle = do_memset_cycle,
.do_gettimeofday = do_memset_gettimeofday,
.usage = bench_mem_memset_usage,
};
return bench_mem_common(argc, argv, prefix, &info);
}
-304
View File
@@ -1,304 +0,0 @@
/*
* mem-memset.c
*
* memset: Simple memory set in various ways
*
* Trivial clone of mem-memcpy.c.
*/
#include "../perf.h"
#include "../util/util.h"
#include "../util/parse-options.h"
#include "../util/header.h"
#include "../util/cloexec.h"
#include "bench.h"
#include "mem-memset-arch.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <errno.h>
#define K 1024
static const char *length_str = "1MB";
static const char *routine = "default";
static int iterations = 1;
static bool use_cycle;
static int cycle_fd;
static bool only_prefault;
static bool no_prefault;
static const struct option options[] = {
OPT_STRING('l', "length", &length_str, "1MB",
"Specify length of memory to set. "
"Available units: B, KB, MB, GB and TB (upper and lower)"),
OPT_STRING('r', "routine", &routine, "default",
"Specify routine to set"),
OPT_INTEGER('i', "iterations", &iterations,
"repeat memset() invocation this number of times"),
OPT_BOOLEAN('c', "cycle", &use_cycle,
"Use cycles event instead of gettimeofday() for measuring"),
OPT_BOOLEAN('o', "only-prefault", &only_prefault,
"Show only the result with page faults before memset()"),
OPT_BOOLEAN('n', "no-prefault", &no_prefault,
"Show only the result without page faults before memset()"),
OPT_END()
};
typedef void *(*memset_t)(void *, int, size_t);
struct routine {
const char *name;
const char *desc;
memset_t fn;
};
static const struct routine routines[] = {
{ "default",
"Default memset() provided by glibc",
memset },
#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMSET_FN(fn, name, desc) { name, desc, fn },
#include "mem-memset-x86-64-asm-def.h"
#undef MEMSET_FN
#endif
{ NULL,
NULL,
NULL }
};
static const char * const bench_mem_memset_usage[] = {
"perf bench mem memset <options>",
NULL
};
static struct perf_event_attr cycle_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES
};
static void init_cycle(void)
{
cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1,
perf_event_open_cloexec_flag());
if (cycle_fd < 0 && errno == ENOSYS)
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
else
BUG_ON(cycle_fd < 0);
}
static u64 get_cycle(void)
{
int ret;
u64 clk;
ret = read(cycle_fd, &clk, sizeof(u64));
BUG_ON(ret != sizeof(u64));
return clk;
}
static double timeval2double(struct timeval *ts)
{
return (double)ts->tv_sec +
(double)ts->tv_usec / (double)1000000;
}
static void alloc_mem(void **dst, size_t length)
{
*dst = zalloc(length);
if (!*dst)
die("memory allocation failed - maybe length is too large?\n");
}
static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault)
{
u64 cycle_start = 0ULL, cycle_end = 0ULL;
void *dst = NULL;
int i;
alloc_mem(&dst, len);
if (prefault)
fn(dst, -1, len);
cycle_start = get_cycle();
for (i = 0; i < iterations; ++i)
fn(dst, i, len);
cycle_end = get_cycle();
free(dst);
return cycle_end - cycle_start;
}
static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
{
struct timeval tv_start, tv_end, tv_diff;
void *dst = NULL;
int i;
alloc_mem(&dst, len);
if (prefault)
fn(dst, -1, len);
BUG_ON(gettimeofday(&tv_start, NULL));
for (i = 0; i < iterations; ++i)
fn(dst, i, len);
BUG_ON(gettimeofday(&tv_end, NULL));
timersub(&tv_end, &tv_start, &tv_diff);
free(dst);
return (double)((double)len / timeval2double(&tv_diff));
}
#define pf (no_prefault ? 0 : 1)
#define print_bps(x) do { \
if (x < K) \
printf(" %14lf B/Sec", x); \
else if (x < K * K) \
printf(" %14lfd KB/Sec", x / K); \
else if (x < K * K * K) \
printf(" %14lf MB/Sec", x / K / K); \
else \
printf(" %14lf GB/Sec", x / K / K / K); \
} while (0)
int bench_mem_memset(int argc, const char **argv,
const char *prefix __maybe_unused)
{
int i;
size_t len;
double result_bps[2];
u64 result_cycle[2];
argc = parse_options(argc, argv, options,
bench_mem_memset_usage, 0);
if (no_prefault && only_prefault) {
fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
return 1;
}
if (use_cycle)
init_cycle();
len = (size_t)perf_atoll((char *)length_str);
result_cycle[0] = result_cycle[1] = 0ULL;
result_bps[0] = result_bps[1] = 0.0;
if ((s64)len <= 0) {
fprintf(stderr, "Invalid length:%s\n", length_str);
return 1;
}
/* same to without specifying either of prefault and no-prefault */
if (only_prefault && no_prefault)
only_prefault = no_prefault = false;
for (i = 0; routines[i].name; i++) {
if (!strcmp(routines[i].name, routine))
break;
}
if (!routines[i].name) {
printf("Unknown routine:%s\n", routine);
printf("Available routines...\n");
for (i = 0; routines[i].name; i++) {
printf("\t%s ... %s\n",
routines[i].name, routines[i].desc);
}
return 1;
}
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Copying %s Bytes ...\n\n", length_str);
if (!only_prefault && !no_prefault) {
/* show both of results */
if (use_cycle) {
result_cycle[0] =
do_memset_cycle(routines[i].fn, len, false);
result_cycle[1] =
do_memset_cycle(routines[i].fn, len, true);
} else {
result_bps[0] =
do_memset_gettimeofday(routines[i].fn,
len, false);
result_bps[1] =
do_memset_gettimeofday(routines[i].fn,
len, true);
}
} else {
if (use_cycle) {
result_cycle[pf] =
do_memset_cycle(routines[i].fn,
len, only_prefault);
} else {
result_bps[pf] =
do_memset_gettimeofday(routines[i].fn,
len, only_prefault);
}
}
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
if (!only_prefault && !no_prefault) {
if (use_cycle) {
printf(" %14lf Cycle/Byte\n",
(double)result_cycle[0]
/ (double)len);
printf(" %14lf Cycle/Byte (with prefault)\n ",
(double)result_cycle[1]
/ (double)len);
} else {
print_bps(result_bps[0]);
printf("\n");
print_bps(result_bps[1]);
printf(" (with prefault)\n");
}
} else {
if (use_cycle) {
printf(" %14lf Cycle/Byte",
(double)result_cycle[pf]
/ (double)len);
} else
print_bps(result_bps[pf]);
printf("%s\n", only_prefault ? " (with prefault)" : "");
}
break;
case BENCH_FORMAT_SIMPLE:
if (!only_prefault && !no_prefault) {
if (use_cycle) {
printf("%lf %lf\n",
(double)result_cycle[0] / (double)len,
(double)result_cycle[1] / (double)len);
} else {
printf("%lf %lf\n",
result_bps[0], result_bps[1]);
}
} else {
if (use_cycle) {
printf("%lf\n", (double)result_cycle[pf]
/ (double)len);
} else
printf("%lf\n", result_bps[pf]);
}
break;
default:
/* reaching this means there's some disaster: */
die("unknown format: %d\n", bench_format);
break;
}
return 0;
}
+5 -8
View File
@@ -285,12 +285,11 @@ int cmd_buildid_cache(int argc, const char **argv,
struct str_node *pos;
int ret = 0;
bool force = false;
char debugdir[PATH_MAX];
char const *add_name_list_str = NULL,
*remove_name_list_str = NULL,
*missing_filename = NULL,
*update_name_list_str = NULL,
*kcore_filename;
*kcore_filename = NULL;
char sbuf[STRERR_BUFSIZE];
struct perf_data_file file = {
@@ -335,13 +334,11 @@ int cmd_buildid_cache(int argc, const char **argv,
setup_pager();
snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
if (add_name_list_str) {
list = strlist__new(true, add_name_list_str);
if (list) {
strlist__for_each(pos, list)
if (build_id_cache__add_file(pos->s, debugdir)) {
if (build_id_cache__add_file(pos->s, buildid_dir)) {
if (errno == EEXIST) {
pr_debug("%s already in the cache\n",
pos->s);
@@ -359,7 +356,7 @@ int cmd_buildid_cache(int argc, const char **argv,
list = strlist__new(true, remove_name_list_str);
if (list) {
strlist__for_each(pos, list)
if (build_id_cache__remove_file(pos->s, debugdir)) {
if (build_id_cache__remove_file(pos->s, buildid_dir)) {
if (errno == ENOENT) {
pr_debug("%s wasn't in the cache\n",
pos->s);
@@ -380,7 +377,7 @@ int cmd_buildid_cache(int argc, const char **argv,
list = strlist__new(true, update_name_list_str);
if (list) {
strlist__for_each(pos, list)
if (build_id_cache__update_file(pos->s, debugdir)) {
if (build_id_cache__update_file(pos->s, buildid_dir)) {
if (errno == ENOENT) {
pr_debug("%s wasn't in the cache\n",
pos->s);
@@ -395,7 +392,7 @@ int cmd_buildid_cache(int argc, const char **argv,
}
if (kcore_filename &&
build_id_cache__add_kcore(kcore_filename, debugdir, force))
build_id_cache__add_kcore(kcore_filename, buildid_dir, force))
pr_warning("Couldn't add %s\n", kcore_filename);
out:
+2 -1
View File
@@ -1293,7 +1293,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
OPT_UINTEGER('d', "display", &kvm->display_time,
"time in seconds between display updates"),
OPT_STRING(0, "event", &kvm->report_event, "report event",
"event for reporting: vmexit, mmio, ioport"),
"event for reporting: "
"vmexit, mmio (x86 only), ioport (x86 only)"),
OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
"vcpu id to report"),
OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
+7 -7
View File
@@ -2045,7 +2045,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
unsigned long before;
const bool forks = argc > 0;
bool draining = false;
char sbuf[STRERR_BUFSIZE];
trace->live = true;
@@ -2106,11 +2105,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
goto out_error_open;
err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
if (err < 0) {
fprintf(trace->output, "Couldn't mmap the events: %s\n",
strerror_r(errno, sbuf, sizeof(sbuf)));
goto out_delete_evlist;
}
if (err < 0)
goto out_error_mmap;
perf_evlist__enable(evlist);
@@ -2210,6 +2206,10 @@ out_error_tp:
perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
goto out_error;
out_error_mmap:
perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
goto out_error;
out_error_open:
perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
@@ -2485,7 +2485,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
.user_freq = UINT_MAX,
.user_interval = ULLONG_MAX,
.no_buffering = true,
.mmap_pages = 1024,
.mmap_pages = UINT_MAX,
},
.output = stdout,
.show_comm = true,

Some files were not shown because too many files have changed in this diff Show More