Bug 872496 - Allow early registration of stack tops, to improve native unwind quality. r=bgirard.

This commit is contained in:
Julian Seward 2013-05-28 14:03:38 +02:00
parent b0b966b2cd
commit 004a71f006
18 changed files with 261 additions and 68 deletions

View File

@ -672,7 +672,8 @@ NS_IMETHODIMP
TransactionThreadPoolListener::OnThreadCreated()
{
MOZ_ASSERT(!NS_IsMainThread());
profiler_register_thread("IndexedDB Transaction");
char aLocal;
profiler_register_thread("IndexedDB Transaction", &aLocal);
return NS_OK;
}

View File

@ -514,7 +514,8 @@ public:
JSRuntime* rt = JS_GetRuntime(cx);
profiler_register_thread("WebWorker");
char aLocal;
profiler_register_thread("WebWorker", &aLocal);
#ifdef MOZ_ENABLE_PROFILER_SPS
if (PseudoStack* stack = mozilla_get_pseudo_stack())
stack->sampleRuntime(rt);

View File

@ -137,7 +137,8 @@ void Thread::StopSoon() {
}
void Thread::ThreadMain() {
profiler_register_thread(name_.c_str());
char aLocal;
profiler_register_thread(name_.c_str(), &aLocal);
// The message loop for this thread.
MessageLoop message_loop(startup_data_->options.message_loop_type);

View File

@ -3885,7 +3885,8 @@ XREMain::XRE_mainRun()
int
XREMain::XRE_main(int argc, char* argv[], const nsXREAppData* aAppData)
{
GeckoProfilerInitRAII profilerGuard;
char aLocal;
GeckoProfilerInitRAII profilerGuard(&aLocal);
PROFILER_LABEL("Startup", "XRE_Main");
nsresult rv = NS_OK;
@ -4080,7 +4081,8 @@ public:
int
XRE_mainMetro(int argc, char* argv[], const nsXREAppData* aAppData)
{
GeckoProfilerInitRAII profilerGuard;
char aLocal;
GeckoProfilerInitRAII profilerGuard(&aLocal);
PROFILER_LABEL("Startup", "XRE_Main");
nsresult rv = NS_OK;

View File

@ -282,7 +282,8 @@ XRE_InitChildProcess(int aArgc,
NS_ENSURE_ARG_MIN(aArgc, 2);
NS_ENSURE_ARG_POINTER(aArgv);
NS_ENSURE_ARG_POINTER(aArgv[0]);
profiler_init();
char aLocal;
profiler_init(&aLocal);
PROFILER_LABEL("Startup", "XRE_InitChildProcess");
sChildProcessType = aProcess;

View File

@ -79,7 +79,7 @@
// Initilize the profiler TLS, signal handlers on linux. If MOZ_PROFILER_STARTUP
// is set the profiler will be started. This call must happen before any other
// sampler calls. Particularly sampler_label/sampler_marker.
static inline void profiler_init() {};
static inline void profiler_init(void* stackTop) {};
// Clean up the profiler module, stopping it if required. This function may
// also save a shutdown profile if requested. No profiler calls should happen
@ -135,7 +135,7 @@ static inline void profiler_lock() {}
// Re-enable the profiler and notify 'profiler-unlocked'.
static inline void profiler_unlock() {}
static inline void profiler_register_thread(const char* name) {}
static inline void profiler_register_thread(const char* name, void* stackTop) {}
static inline void profiler_unregister_thread() {}
// Call by the JSRuntime's operation callback. This is used to enable
@ -152,8 +152,8 @@ static inline double profiler_time() { return 0; }
class GeckoProfilerInitRAII {
public:
GeckoProfilerInitRAII() {
profiler_init();
GeckoProfilerInitRAII(void* stackTop) {
profiler_init(stackTop);
}
~GeckoProfilerInitRAII() {
profiler_shutdown();

View File

@ -42,7 +42,7 @@ JSObject *mozilla_sampler_get_profile_data(JSContext *aCx);
const char** mozilla_sampler_get_features();
void mozilla_sampler_init();
void mozilla_sampler_init(void* stackTop);
void mozilla_sampler_shutdown();
@ -58,7 +58,7 @@ void mozilla_sampler_lock();
void mozilla_sampler_unlock();
// Register/unregister threads with the profiler
bool mozilla_sampler_register_thread(const char* name);
bool mozilla_sampler_register_thread(const char* name, void* stackTop);
void mozilla_sampler_unregister_thread();
double mozilla_sampler_time();

View File

@ -52,9 +52,9 @@ extern bool stack_key_initialized;
#endif
static inline
void profiler_init()
void profiler_init(void* stackTop)
{
mozilla_sampler_init();
mozilla_sampler_init(stackTop);
}
static inline
@ -141,9 +141,9 @@ void profiler_unlock()
}
static inline
void profiler_register_thread(const char* name)
void profiler_register_thread(const char* name, void* stackTop)
{
mozilla_sampler_register_thread(name);
mozilla_sampler_register_thread(name, stackTop);
}
static inline

View File

@ -562,7 +562,7 @@ static void print_callback(const ProfileEntry& entry, const char* tagStringData)
void mozilla_sampler_print_location1()
{
if (!stack_key_initialized)
profiler_init();
profiler_init(NULL);
PseudoStack *stack = tlsPseudoStack.get();
if (!stack) {

View File

@ -83,6 +83,10 @@ void uwt__register_thread_for_profiling ( void* stackTop )
{
}
void uwt__unregister_thread_for_profiling()
{
}
// RUNS IN SIGHANDLER CONTEXT
UnwinderThreadBuffer* uwt__acquire_empty_buffer()
{
@ -122,6 +126,9 @@ static int unwind_thr_exit_now = 0; // RACED ON
// registered thread.
static void thread_register_for_profiling ( void* stackTop );
// Unregister a thread.
static void thread_unregister_for_profiling();
// Frees some memory when the unwinder thread is shut down.
static void do_breakpad_unwind_Buffer_free_singletons();
@ -176,6 +183,11 @@ void uwt__register_thread_for_profiling(void* stackTop)
thread_register_for_profiling(stackTop);
}
void uwt__unregister_thread_for_profiling()
{
thread_unregister_for_profiling();
}
// RUNS IN SIGHANDLER CONTEXT
UnwinderThreadBuffer* uwt__acquire_empty_buffer()
{
@ -348,21 +360,29 @@ typedef
/*SL*/ static uint64_t g_seqNo = 0;
/*SL*/ static SpinLock g_spinLock = { 0 };
/* Globals -- the thread array */
#define N_SAMPLING_THREADS 10
/*SL*/ static StackLimit g_stackLimits[N_SAMPLING_THREADS];
/*SL*/ static int g_stackLimitsUsed = 0;
/* Globals -- the thread array. The array is dynamically expanded on
demand. The spinlock must be held when accessing g_stackLimits,
g_stackLimits[some index], g_stackLimitsUsed and g_stackLimitsSize.
However, the spinlock must not be held when calling malloc to
allocate or expand the array, as that would risk deadlock against a
sampling thread that holds the malloc lock and is trying to acquire
the spinlock. */
/*SL*/ static StackLimit* g_stackLimits = NULL;
/*SL*/ static size_t g_stackLimitsUsed = 0;
/*SL*/ static size_t g_stackLimitsSize = 0;
/* Stats -- atomically incremented, no lock needed */
static uintptr_t g_stats_totalSamples = 0; // total # sample attempts
static uintptr_t g_stats_noBuffAvail = 0; // # failed due to no buffer avail
static uintptr_t g_stats_thrUnregd = 0; // # failed due to unregistered thr
/* We must be VERY CAREFUL what we do with the spinlock held. The
only thing it is safe to do with it held is modify (viz, read or
write) g_buffers, g_buffers[], g_seqNo, g_buffers[]->state,
g_stackLimits[] and g_stackLimitsUsed. No arbitrary computations,
no syscalls, no printfs, no file IO, and absolutely no dynamic
memory allocation (else we WILL eventually deadlock).
g_stackLimits, g_stackLimits[], g_stackLimitsUsed and
g_stackLimitsSize. No arbitrary computations, no syscalls, no
printfs, no file IO, and absolutely no dynamic memory allocation
(else we WILL eventually deadlock).
This applies both to the signal handler and to the unwinder thread.
*/
@ -476,44 +496,175 @@ static void atomic_INC(uintptr_t* loc)
}
}
/* Register a thread for profiling. It must not be allowed to receive
signals before this is done, else the signal handler will
MOZ_ASSERT. */
// Registers a thread for profiling. Detects and ignores duplicate
// registration.
static void thread_register_for_profiling(void* stackTop)
{
int i;
/* Minimal sanity check on stackTop */
MOZ_ASSERT( (void*)&i < stackTop );
pthread_t me = pthread_self();
spinLock_acquire(&g_spinLock);
pthread_t me = pthread_self();
for (i = 0; i < g_stackLimitsUsed; i++) {
/* check for duplicate registration */
MOZ_ASSERT(g_stackLimits[i].thrId != me);
// tmp copy of g_stackLimitsUsed, to avoid racing in message printing
int n_used;
// Ignore spurious calls which aren't really registering anything.
if (stackTop == NULL) {
n_used = g_stackLimitsUsed;
spinLock_release(&g_spinLock);
LOGF("BPUnw: [%d total] thread_register_for_profiling"
"(me=%p, stacktop=NULL) (IGNORED)", n_used, (void*)me);
return;
}
if (!(g_stackLimitsUsed < N_SAMPLING_THREADS))
MOZ_CRASH(); // Don't continue -- we'll get memory corruption.
/* Minimal sanity check on stackTop */
MOZ_ASSERT((void*)&n_used/*any auto var will do*/ < stackTop);
bool is_dup = false;
for (size_t i = 0; i < g_stackLimitsUsed; i++) {
if (g_stackLimits[i].thrId == me) {
is_dup = true;
break;
}
}
if (is_dup) {
/* It's a duplicate registration. Ignore it: drop the lock and
return. */
n_used = g_stackLimitsUsed;
spinLock_release(&g_spinLock);
LOGF("BPUnw: [%d total] thread_register_for_profiling"
"(me=%p, stacktop=%p) (DUPLICATE)", n_used, (void*)me, stackTop);
return;
}
/* Make sure the g_stackLimits array is large enough to accommodate
this new entry. This is tricky. If it isn't large enough, we
can malloc a larger version, but we have to do that without
holding the spinlock, else we risk deadlock. The deadlock
scenario is:
Some other thread that is being sampled
This thread
call malloc call this function
acquire malloc lock acquire the spinlock
(sampling signal) discover thread array not big enough,
call uwt__acquire_empty_buffer call malloc to make it larger
acquire the spinlock acquire malloc lock
This gives an inconsistent lock acquisition order on the malloc
lock and spinlock, hence risk of deadlock.
Allocating more space for the array without holding the spinlock
implies tolerating races against other thread(s) who are also
trying to expand the array. How can we detect if we have been
out-raced? Every successful expansion of g_stackLimits[] results
in an increase in g_stackLimitsSize. Hence we can detect if we
got out-raced by remembering g_stackLimitsSize before we dropped
the spinlock and checking if it has changed after the spinlock is
reacquired. */
MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize);
if (g_stackLimitsUsed == g_stackLimitsSize) {
/* g_stackLimits[] is full; resize it. */
size_t old_size = g_stackLimitsSize;
size_t new_size = old_size == 0 ? 4 : (2 * old_size);
spinLock_release(&g_spinLock);
StackLimit* new_arr = (StackLimit*)malloc(new_size * sizeof(StackLimit));
if (!new_arr)
return;
spinLock_acquire(&g_spinLock);
if (old_size != g_stackLimitsSize) {
/* We've been outraced. Instead of trying to deal in-line with
this extremely rare case, just start all over again by
tail-calling this routine. */
spinLock_release(&g_spinLock);
free(new_arr);
thread_register_for_profiling(stackTop);
return;
}
memcpy(new_arr, g_stackLimits, old_size * sizeof(StackLimit));
if (g_stackLimits)
free(g_stackLimits);
g_stackLimits = new_arr;
MOZ_ASSERT(g_stackLimitsSize < new_size);
g_stackLimitsSize = new_size;
}
MOZ_ASSERT(g_stackLimitsUsed < g_stackLimitsSize);
/* Finally, we have a safe place to put the new entry. */
// Round |stackTop| up to the end of the containing page. We may
// as well do this -- there's no danger of a fault, and we might
// get a few more base-of-the-stack frames as a result. This
// assumes that no target has a page size smaller than 4096.
uintptr_t stackTopR = (uintptr_t)stackTop;
stackTopR = (stackTopR & ~(uintptr_t)4095) + (uintptr_t)4095;
g_stackLimits[g_stackLimitsUsed].thrId = me;
g_stackLimits[g_stackLimitsUsed].stackTop = stackTop;
g_stackLimits[g_stackLimitsUsed].stackTop = (void*)stackTopR;
g_stackLimits[g_stackLimitsUsed].nSamples = 0;
g_stackLimitsUsed++;
n_used = g_stackLimitsUsed;
spinLock_release(&g_spinLock);
LOGF("BPUnw: thread_register_for_profiling(stacktop %p, me %p)",
stackTop, (void*)me);
LOGF("BPUnw: [%d total] thread_register_for_profiling"
"(me=%p, stacktop=%p)", n_used, (void*)me, stackTop);
}
// Deregisters a thread from profiling. Detects and ignores attempts
// to deregister a not-registered thread.
static void thread_unregister_for_profiling()
{
spinLock_acquire(&g_spinLock);
// tmp copy of g_stackLimitsUsed, to avoid racing in message printing
size_t n_used;
size_t i;
bool found = false;
pthread_t me = pthread_self();
for (i = 0; i < g_stackLimitsUsed; i++) {
if (g_stackLimits[i].thrId == me)
break;
}
if (i < g_stackLimitsUsed) {
// found this entry. Slide the remaining ones down one place.
for (; i+1 < g_stackLimitsUsed; i++) {
g_stackLimits[i] = g_stackLimits[i+1];
}
g_stackLimitsUsed--;
found = true;
}
n_used = g_stackLimitsUsed;
spinLock_release(&g_spinLock);
LOGF("BPUnw: [%d total] thread_unregister_for_profiling(me=%p) %s",
(int)n_used, (void*)me, found ? "" : " (NOT REGISTERED) ");
}
__attribute__((unused))
static void show_registered_threads()
{
int i;
size_t i;
spinLock_acquire(&g_spinLock);
for (i = 0; i < g_stackLimitsUsed; i++) {
LOGF("[%d] pthread_t=%p nSamples=%lld",
i, (void*)g_stackLimits[i].thrId,
(unsigned long long int)g_stackLimits[i].nSamples);
(int)i, (void*)g_stackLimits[i].thrId,
(unsigned long long int)g_stackLimits[i].nSamples);
}
spinLock_release(&g_spinLock);
}
@ -529,7 +680,7 @@ static UnwinderThreadBuffer* acquire_empty_buffer()
fillseqno++; and remember it
rel lock
*/
int i;
size_t i;
atomic_INC( &g_stats_totalSamples );
@ -549,11 +700,20 @@ static UnwinderThreadBuffer* acquire_empty_buffer()
is safe to call in a signal handler, which strikes me as highly
likely. */
pthread_t me = pthread_self();
MOZ_ASSERT(g_stackLimitsUsed >= 0 && g_stackLimitsUsed <= N_SAMPLING_THREADS);
MOZ_ASSERT(g_stackLimitsUsed <= g_stackLimitsSize);
for (i = 0; i < g_stackLimitsUsed; i++) {
if (g_stackLimits[i].thrId == me)
break;
}
/* If the thread isn't registered for profiling, just ignore the call
and return NULL. */
if (i == g_stackLimitsUsed) {
spinLock_release(&g_spinLock);
atomic_INC( &g_stats_thrUnregd );
return NULL;
}
/* "this thread is registered for profiling" */
MOZ_ASSERT(i < g_stackLimitsUsed);
@ -574,7 +734,7 @@ static UnwinderThreadBuffer* acquire_empty_buffer()
if (g_buffers[i]->state == S_EMPTY)
break;
}
MOZ_ASSERT(i >= 0 && i <= N_UNW_THR_BUFFERS);
MOZ_ASSERT(i <= N_UNW_THR_BUFFERS);
if (i == N_UNW_THR_BUFFERS) {
/* Again, no free buffers .. give up. */
@ -1784,9 +1944,11 @@ void do_breakpad_unwind_Buffer(/*OUT*/PCandSP** pairs,
if (LOGLEVEL >= 2) {
if (0 == (g_stats_totalSamples % 1000))
LOGF("BPUnw: %llu total samples, %llu failed due to buffer unavail",
LOGF("BPUnw: %llu total samples, %llu failed (buffer unavail), "
"%llu failed (thread unreg'd), ",
(unsigned long long int)g_stats_totalSamples,
(unsigned long long int)g_stats_noBuffAvail);
(unsigned long long int)g_stats_noBuffAvail,
(unsigned long long int)g_stats_thrUnregd);
}
delete stack;

View File

@ -37,18 +37,26 @@ void uwt__stop();
// and can safely release any resources.
void uwt__deinit();
// Registers a sampler thread for profiling. Threads must be registered
// before they are allowed to call utb__acquire_empty_buffer or
// utb__release_full_buffer.
// Registers a sampler thread for profiling. Threads must be
// registered before calls to call utb__acquire_empty_buffer or
// utb__release_full_buffer have any effect. If stackTop is
// NULL, the call is ignored.
void uwt__register_thread_for_profiling(void* stackTop);
// RUNS IN SIGHANDLER CONTEXT
// Deregister a sampler thread for profiling.
void uwt__unregister_thread_for_profiling();
// RUNS IN SIGHANDLER CONTEXT
// Called in the sampled thread (signal) context. Get an empty buffer
// into which ProfileEntries can be put. It may return NULL if no
// empty buffers can be found, which will be the case if the unwinder
// thread(s) have fallen behind for some reason. In this case the
// sampled thread must simply give up and return from the signal handler
// immediately, else it risks deadlock.
// sampled thread must simply give up and return from the signal
// handler immediately, else it risks deadlock.
//
// If the calling thread has not previously registered itself for
// profiling via uwt__register_thread_for_profiling, this routine
// returns NULL.
UnwinderThreadBuffer* uwt__acquire_empty_buffer();
// RUNS IN SIGHANDLER CONTEXT

View File

@ -65,6 +65,7 @@
#include "ProfileEntry.h"
#include "nsThreadUtils.h"
#include "TableTicker.h"
#include "UnwinderThread2.h"
#include <string.h>
#include <stdio.h>
@ -358,7 +359,9 @@ void Sampler::Stop() {
}
}
bool Sampler::RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack, bool aIsMainThread)
bool Sampler::RegisterCurrentThread(const char* aName,
PseudoStack* aPseudoStack,
bool aIsMainThread, void* stackTop)
{
if (!Sampler::sRegisteredThreadsMutex)
return false;
@ -385,6 +388,8 @@ bool Sampler::RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack
}
sRegisteredThreads->push_back(info);
uwt__register_thread_for_profiling(stackTop);
return true;
}
@ -405,6 +410,8 @@ void Sampler::UnregisterCurrentThread()
break;
}
}
uwt__unregister_thread_for_profiling();
}
#ifdef ANDROID

View File

@ -341,7 +341,9 @@ pid_t gettid()
return (pid_t) syscall(SYS_thread_selfid);
}
bool Sampler::RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack, bool aIsMainThread)
bool Sampler::RegisterCurrentThread(const char* aName,
PseudoStack* aPseudoStack,
bool aIsMainThread, void* stackTop)
{
if (!Sampler::sRegisteredThreadsMutex)
return false;
@ -368,6 +370,8 @@ bool Sampler::RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack
}
sRegisteredThreads->push_back(info);
uwt__register_thread_for_profiling(stackTop);
return true;
}

View File

@ -32,6 +32,7 @@
#include "platform.h"
#include "TableTicker.h"
#include "ProfileEntry.h"
#include "UnwinderThread2.h"
class PlatformData : public Malloced {
public:
@ -261,7 +262,9 @@ void OS::Sleep(int milliseconds) {
::Sleep(milliseconds);
}
bool Sampler::RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack, bool aIsMainThread)
bool Sampler::RegisterCurrentThread(const char* aName,
PseudoStack* aPseudoStack,
bool aIsMainThread, void* stackTop)
{
if (!Sampler::sRegisteredThreadsMutex)
return false;
@ -288,6 +291,8 @@ bool Sampler::RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack
}
sRegisteredThreads->push_back(info);
uwt__register_thread_for_profiling(stackTop);
return true;
}

View File

@ -252,7 +252,7 @@ void read_profiler_env_vars()
////////////////////////////////////////////////////////////////////////
// BEGIN externally visible functions
void mozilla_sampler_init()
void mozilla_sampler_init(void* stackTop)
{
sInitCount++;
@ -271,7 +271,7 @@ void mozilla_sampler_init()
PseudoStack *stack = new PseudoStack();
tlsPseudoStack.set(stack);
Sampler::RegisterCurrentThread("Gecko", stack, true);
Sampler::RegisterCurrentThread("Gecko", stack, true, stackTop);
// Read mode settings from MOZ_PROFILER_MODE and interval
// settings from MOZ_PROFILER_INTERVAL and stack-scan threshhold
@ -402,7 +402,7 @@ void mozilla_sampler_start(int aProfileEntries, int aInterval,
const char** aFeatures, uint32_t aFeatureCount)
{
if (!stack_key_initialized)
profiler_init();
profiler_init(NULL);
/* If the sampling interval was set using env vars, use that
in preference to anything else. */
@ -423,9 +423,6 @@ void mozilla_sampler_start(int aProfileEntries, int aInterval,
aProfileEntries ? aProfileEntries : PROFILE_DEFAULT_ENTRY,
aFeatures, aFeatureCount);
if (t->HasUnwinderThread()) {
int aLocal;
uwt__register_thread_for_profiling( &aLocal );
// Create the unwinder thread. ATM there is only one.
uwt__init();
}
@ -467,7 +464,7 @@ void mozilla_sampler_start(int aProfileEntries, int aInterval,
void mozilla_sampler_stop()
{
if (!stack_key_initialized)
profiler_init();
profiler_init(NULL);
TableTicker *t = tlsTicker.get();
if (!t) {
@ -559,12 +556,12 @@ void mozilla_sampler_unlock()
os->NotifyObservers(nullptr, "profiler-unlocked", nullptr);
}
bool mozilla_sampler_register_thread(const char* aName)
bool mozilla_sampler_register_thread(const char* aName, void* stackTop)
{
PseudoStack* stack = new PseudoStack();
tlsPseudoStack.set(stack);
return Sampler::RegisterCurrentThread(aName, stack, false);
return Sampler::RegisterCurrentThread(aName, stack, false, stackTop);
}
void mozilla_sampler_unregister_thread()

View File

@ -335,7 +335,9 @@ class Sampler {
return *sRegisteredThreads;
}
static bool RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack, bool aIsMainThread);
static bool RegisterCurrentThread(const char* aName,
PseudoStack* aPseudoStack,
bool aIsMainThread, void* stackTop);
static void UnregisterCurrentThread();
static void Startup();

View File

@ -332,7 +332,8 @@ NS_InitXPCOM2(nsIServiceManager* *result,
{
mozPoisonValueInit();
profiler_init();
char aLocal;
profiler_init(&aLocal);
nsresult rv = NS_OK;
// We are not shutting down

View File

@ -169,7 +169,8 @@ LazyIdleThread::EnsureThread()
void
LazyIdleThread::InitThread()
{
profiler_register_thread(mName.get());
char aLocal;
profiler_register_thread(mName.get(), &aLocal);
PR_SetCurrentThreadName(mName.get());