gecko/memory/replace/dmd/DMD.cpp
Nicholas Nethercote b97d32ac43 Bug 820652 (part 1) - DMD: Split BlockKey in two, and fix the ensuing fall-out. r=jlebar.
--HG--
extra : rebase_source : 9fdda42f633b209b0111fe6104e08b5028dda8b5
2012-12-11 19:08:38 -08:00

2192 lines
61 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "DMD.h"
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef XP_WIN
#error "Windows not supported yet, sorry."
// XXX: This will be needed when Windows is supported (bug 819839).
//#include <process.h>
//#define getpid _getpid
#else
#include <unistd.h>
#endif
#ifdef ANDROID
#include <android/log.h>
#endif
#include "nscore.h"
#include "nsStackWalk.h"
#include "js/HashTable.h"
#include "js/Vector.h"
#include "mozilla/Assertions.h"
#include "mozilla/HashFunctions.h"
#include "mozilla/Likely.h"
// MOZ_REPLACE_ONLY_MEMALIGN saves us from having to define
// replace_{posix_memalign,aligned_alloc,valloc}. It requires defining
// PAGE_SIZE. Nb: sysconf() is expensive, but it's only used for (the obsolete
// and rarely used) valloc.
#define MOZ_REPLACE_ONLY_MEMALIGN 1
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#include "replace_malloc.h"
#undef MOZ_REPLACE_ONLY_MEMALIGN
#undef PAGE_SIZE
namespace mozilla {
namespace dmd {
//---------------------------------------------------------------------------
// Utilities
//---------------------------------------------------------------------------
#ifndef DISALLOW_COPY_AND_ASSIGN
#define DISALLOW_COPY_AND_ASSIGN(T) \
T(const T&); \
void operator=(const T&)
#endif
static const malloc_table_t* gMallocTable = nullptr;
// This enables/disables DMD.
static bool gIsDMDRunning = false;
enum Mode {
Normal, // run normally
Test, // do some basic correctness tests
Stress // do some performance stress tests
};
static Mode gMode = Normal;
// This provides infallible allocations (they abort on OOM). We use it for all
// of DMD's own allocations, which fall into the following three cases.
// - Direct allocations (the easy case).
// - Indirect allocations in js::{Vector,HashSet,HashMap} -- this class serves
// as their AllocPolicy.
// - Other indirect allocations (e.g. NS_StackWalk) -- see the comments on
// Thread::mBlockIntercepts and in replace_malloc for how these work.
//
class InfallibleAllocPolicy
{
static void ExitOnFailure(const void* aP);
public:
static void* malloc_(size_t aSize)
{
void* p = gMallocTable->malloc(aSize);
ExitOnFailure(p);
return p;
}
static void* calloc_(size_t aSize)
{
void* p = gMallocTable->calloc(1, aSize);
ExitOnFailure(p);
return p;
}
// This realloc_ is the one we use for direct reallocs within DMD.
static void* realloc_(void* aPtr, size_t aNewSize)
{
void* p = gMallocTable->realloc(aPtr, aNewSize);
ExitOnFailure(p);
return p;
}
// This realloc_ is required for this to be a JS container AllocPolicy.
static void* realloc_(void* aPtr, size_t aOldSize, size_t aNewSize)
{
return InfallibleAllocPolicy::realloc_(aPtr, aNewSize);
}
static void* memalign_(size_t aAlignment, size_t aSize)
{
void* p = gMallocTable->memalign(aAlignment, aSize);
ExitOnFailure(p);
return p;
}
static void free_(void* aPtr) { gMallocTable->free(aPtr); }
static char* strdup_(const char* aStr)
{
char* s = (char*) gMallocTable->malloc(strlen(aStr));
ExitOnFailure(s);
strcpy(s, aStr);
return s;
}
template <class T>
static T* new_()
{
void* mem = malloc_(sizeof(T));
ExitOnFailure(mem);
return new (mem) T;
}
template <class T, typename P1>
static T* new_(P1 p1)
{
void* mem = malloc_(sizeof(T));
ExitOnFailure(mem);
return new (mem) T(p1);
}
static void reportAllocOverflow() { ExitOnFailure(nullptr); }
};
static void
StatusMsg(const char* aFmt, ...)
{
va_list ap;
va_start(ap, aFmt);
#ifdef ANDROID
__android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, ap);
#else
// The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL.
char* fmt = (char*) InfallibleAllocPolicy::malloc_(strlen(aFmt) + 64);
sprintf(fmt, "DMD[%d] %s", getpid(), aFmt);
vfprintf(stderr, fmt, ap);
InfallibleAllocPolicy::free_(fmt);
#endif
va_end(ap);
}
/* static */ void
InfallibleAllocPolicy::ExitOnFailure(const void* aP)
{
if (!aP) {
StatusMsg("out of memory; aborting\n");
MOZ_CRASH();
}
}
void
Writer::Write(const char* aFmt, ...) const
{
va_list ap;
va_start(ap, aFmt);
mWriterFun(mWriteState, aFmt, ap);
va_end(ap);
}
#define W(...) aWriter.Write(__VA_ARGS__);
#define WriteTitle(...) \
W("------------------------------------------------------------------\n"); \
W(__VA_ARGS__); \
W("------------------------------------------------------------------\n\n");
MOZ_EXPORT void
FpWrite(void* aWriteState, const char* aFmt, va_list aAp)
{
FILE* fp = static_cast<FILE*>(aWriteState);
vfprintf(fp, aFmt, aAp);
}
static double
Percent(size_t part, size_t whole)
{
return (whole == 0) ? 0 : 100 * (double)part / whole;
}
// Commifies the number and prepends a '~' if requested. Best used with
// |kBufLen| and |gBuf[1234]|, because they should be big enough for any number
// we'll see.
static char*
Show(size_t n, char* buf, size_t buflen, bool addTilde = false)
{
int nc = 0, i = 0, lasti = buflen - 2;
buf[lasti + 1] = '\0';
if (n == 0) {
buf[lasti - i] = '0';
i++;
} else {
while (n > 0) {
if (((i - nc) % 3) == 0 && i != 0) {
buf[lasti - i] = ',';
i++;
nc++;
}
buf[lasti - i] = static_cast<char>((n % 10) + '0');
i++;
n /= 10;
}
}
int firstCharIndex = lasti - i + 1;
if (addTilde) {
firstCharIndex--;
buf[firstCharIndex] = '~';
}
MOZ_ASSERT(firstCharIndex >= 0);
return &buf[firstCharIndex];
}
static const char*
Plural(size_t aN)
{
return aN == 1 ? "" : "s";
}
// Used by calls to Show().
static const size_t kBufLen = 64;
static char gBuf1[kBufLen];
static char gBuf2[kBufLen];
static char gBuf3[kBufLen];
static char gBuf4[kBufLen];
static const size_t kNoSize = size_t(-1);
//---------------------------------------------------------------------------
// The global lock
//---------------------------------------------------------------------------
// MutexBase implements the platform-specific parts of a mutex.
#ifdef XP_WIN
#include <windows.h>
class MutexBase
{
HANDLE mMutex;
DISALLOW_COPY_AND_ASSIGN(MutexBase);
public:
MutexBase()
: mMutex(CreateMutexW(nullptr, false, nullptr))
{
MOZ_ASSERT(mMutex);
}
~MutexBase()
{
if (mMutex) {
CloseHandle(mMutex);
}
}
void Lock()
{
if (mMutex) {
WaitForSingleObject(mMutex, INFINITE);
}
}
void Unlock()
{
if (mMutex) {
ReleaseMutex(mMutex);
}
}
};
#else
#include <pthread.h>
#include <sys/types.h>
class MutexBase
{
pthread_mutex_t mMutex;
DISALLOW_COPY_AND_ASSIGN(MutexBase);
public:
MutexBase()
: mMutex(PTHREAD_MUTEX_INITIALIZER)
{}
void Lock()
{
pthread_mutex_lock(&mMutex);
}
void Unlock()
{
pthread_mutex_unlock(&mMutex);
}
};
#endif
class Mutex : private MutexBase
{
bool mIsLocked;
DISALLOW_COPY_AND_ASSIGN(Mutex);
public:
Mutex()
: mIsLocked(false)
{}
void Lock()
{
MutexBase::Lock();
MOZ_ASSERT(!mIsLocked);
mIsLocked = true;
}
void Unlock()
{
MOZ_ASSERT(mIsLocked);
mIsLocked = false;
MutexBase::Unlock();
}
bool IsLocked()
{
return mIsLocked;
}
};
// This lock must be held while manipulating global state, such as
// gStackTraceTable, gLiveBlockTable, etc.
static Mutex* gStateLock = nullptr;
class AutoLockState
{
DISALLOW_COPY_AND_ASSIGN(AutoLockState);
public:
AutoLockState()
{
gStateLock->Lock();
}
~AutoLockState()
{
gStateLock->Unlock();
}
};
class AutoUnlockState
{
DISALLOW_COPY_AND_ASSIGN(AutoUnlockState);
public:
AutoUnlockState()
{
gStateLock->Unlock();
}
~AutoUnlockState()
{
gStateLock->Lock();
}
};
//---------------------------------------------------------------------------
// Thread-local storage and blocking of intercepts
//---------------------------------------------------------------------------
#ifdef XP_WIN
#error "Windows not supported yet, sorry."
#else
#include <pthread.h>
#define DMD_TLS_INDEX_TYPE pthread_key_t
#define DMD_CREATE_TLS_INDEX(i_) pthread_key_create(&(i_), nullptr)
#define DMD_DESTROY_TLS_INDEX(i_) pthread_key_delete((i_))
#define DMD_GET_TLS_DATA(i_) pthread_getspecific((i_))
#define DMD_SET_TLS_DATA(i_, v_) pthread_setspecific((i_), (v_))
#endif
static DMD_TLS_INDEX_TYPE gTlsIndex;
class Thread
{
// Required for allocation via InfallibleAllocPolicy::new_.
friend class InfallibleAllocPolicy;
// When true, this blocks intercepts, which allows malloc interception
// functions to themselves call malloc. (Nb: for direct calls to malloc we
// can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes
// indirectly call vanilla malloc via functions like NS_StackWalk.)
bool mBlockIntercepts;
Thread()
: mBlockIntercepts(false)
{}
DISALLOW_COPY_AND_ASSIGN(Thread);
public:
static Thread* Fetch();
bool blockIntercepts()
{
MOZ_ASSERT(!mBlockIntercepts);
return mBlockIntercepts = true;
}
bool unblockIntercepts()
{
MOZ_ASSERT(mBlockIntercepts);
return mBlockIntercepts = false;
}
bool interceptsAreBlocked() const
{
return mBlockIntercepts;
}
};
/* static */ Thread*
Thread::Fetch()
{
Thread* t = static_cast<Thread*>(DMD_GET_TLS_DATA(gTlsIndex));
if (MOZ_UNLIKELY(!t)) {
// This memory is never freed, even if the thread dies. It's a leak, but
// only a tiny one.
t = InfallibleAllocPolicy::new_<Thread>();
DMD_SET_TLS_DATA(gTlsIndex, t);
}
return t;
}
// An object of this class must be created (on the stack) before running any
// code that might allocate.
class AutoBlockIntercepts
{
Thread* const mT;
DISALLOW_COPY_AND_ASSIGN(AutoBlockIntercepts);
public:
AutoBlockIntercepts(Thread* aT)
: mT(aT)
{
mT->blockIntercepts();
}
~AutoBlockIntercepts()
{
MOZ_ASSERT(mT->interceptsAreBlocked());
mT->unblockIntercepts();
}
};
//---------------------------------------------------------------------------
// Stack traces
//---------------------------------------------------------------------------
static void
PcInfo(const void* aPc, nsCodeAddressDetails* aDetails)
{
// NS_DescribeCodeAddress can (on Linux) acquire a lock inside
// the shared library loader. Another thread might call malloc
// while holding that lock (when loading a shared library). So
// we have to exit gStateLock around this call. For details, see
// https://bugzilla.mozilla.org/show_bug.cgi?id=363334#c3
{
AutoUnlockState unlock;
(void)NS_DescribeCodeAddress(const_cast<void*>(aPc), aDetails);
}
if (!aDetails->function[0]) {
strcpy(aDetails->function, "???");
}
}
class StackTrace
{
static const uint32_t MaxDepth = 24;
uint32_t mLength; // The number of PCs.
void* mPcs[MaxDepth]; // The PCs themselves.
public:
StackTrace() : mLength(0) {}
uint32_t Length() const { return mLength; }
void* Pc(uint32_t i) const { MOZ_ASSERT(i < mLength); return mPcs[i]; }
uint32_t Size() const { return mLength * sizeof(mPcs[0]); }
// The stack trace returned by this function is interned in gStackTraceTable,
// and so is immortal and unmovable.
static const StackTrace* Get(Thread* aT);
void Sort()
{
qsort(mPcs, mLength, sizeof(mPcs[0]), StackTrace::QsortCmp);
}
void Print(const Writer& aWriter) const;
// Hash policy.
typedef StackTrace* Lookup;
static uint32_t hash(const StackTrace* const& aSt)
{
return mozilla::HashBytes(aSt->mPcs, aSt->Size());
}
static bool match(const StackTrace* const& aA,
const StackTrace* const& aB)
{
return aA->mLength == aB->mLength &&
memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0;
}
private:
static void StackWalkCallback(void* aPc, void* aSp, void* aClosure)
{
StackTrace* st = (StackTrace*) aClosure;
// Only fill to MaxDepth.
// XXX: bug 818793 will allow early bailouts.
if (st->mLength < MaxDepth) {
st->mPcs[st->mLength] = aPc;
st->mLength++;
}
}
static int QsortCmp(const void* aA, const void* aB)
{
const void* const a = *static_cast<const void* const*>(aA);
const void* const b = *static_cast<const void* const*>(aB);
if (a < b) return -1;
if (a > b) return 1;
return 0;
}
};
typedef js::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy>
StackTraceTable;
static StackTraceTable* gStackTraceTable = nullptr;
void
StackTrace::Print(const Writer& aWriter) const
{
if (mLength == 0) {
W(" (empty)\n");
return;
}
if (gMode == Test) {
// Don't print anything because there's too much variation.
W(" (stack omitted due to test mode)\n");
return;
}
for (uint32_t i = 0; i < mLength; i++) {
nsCodeAddressDetails details;
void* pc = mPcs[i];
PcInfo(pc, &details);
if (details.function[0]) {
W(" %s[%s +0x%X] %p\n", details.function, details.library,
details.loffset, pc);
}
}
}
/* static */ const StackTrace*
StackTrace::Get(Thread* aT)
{
MOZ_ASSERT(gStateLock->IsLocked());
MOZ_ASSERT(aT->interceptsAreBlocked());
// On Windows, NS_StackWalk can acquire a lock from the shared library
// loader. Another thread might call malloc while holding that lock (when
// loading a shared library). So we can't be in gStateLock during the call
// to NS_StackWalk. For details, see
// https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8
StackTrace tmp;
{
#ifdef XP_WIN
AutoUnlockState unlock;
#endif
// In normal operation, skip=3 gets us past various malloc wrappers into
// more interesting stuff. But in test mode we need to skip a bit less to
// sufficiently differentiate some similar stacks.
uint32_t skip = (gMode == Test) ? 2 : 3;
nsresult rv = NS_StackWalk(StackWalkCallback, skip, &tmp, 0, nullptr);
if (NS_FAILED(rv) || tmp.mLength == 0) {
tmp.mLength = 0;
}
}
StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp);
if (!p) {
StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp);
(void)gStackTraceTable->add(p, stnew);
}
return *p;
}
//---------------------------------------------------------------------------
// Heap blocks
//---------------------------------------------------------------------------
static const char* gUnreportedName = "unreported";
// This is used by both LiveBlocks and LiveBlockGroups.
class LiveBlockKey
{
public:
const StackTrace* const mAllocStackTrace; // never null
protected:
// Live blocks can be reported in two ways.
// - The most common is via a memory reporter traversal -- the block is
// reported when the reporter runs, causing DMD to mark it as reported,
// and DMD must clear the marking once it has finished its analysis.
// - Less common are ones that are reported immediately on allocation. DMD
// must *not* clear the markings of these blocks once it has finished its
// analysis. The |mReportedOnAlloc| field is set for such blocks.
const StackTrace* mReportStackTrace; // nullptr if unreported
const char* mReporterName; // gUnreportedName if unreported
bool mReportedOnAlloc; // true if block was reported
public:
LiveBlockKey(const StackTrace* aAllocStackTrace)
: mAllocStackTrace(aAllocStackTrace),
mReportStackTrace(nullptr),
mReporterName(gUnreportedName),
mReportedOnAlloc(false)
{
MOZ_ASSERT(IsSane());
}
bool IsSane() const
{
bool hasReporterName = mReporterName != gUnreportedName;
return mAllocStackTrace &&
(( mReportStackTrace && hasReporterName) ||
(!mReportStackTrace && !hasReporterName && !mReportedOnAlloc));
}
bool IsReported() const
{
MOZ_ASSERT(IsSane());
bool isRep = mReporterName != gUnreportedName;
return isRep;
}
// Quasi-hash policy (used by LiveBlockGroup's hash policy).
//
// Hash() and Match() both assume that identical reporter names have
// identical pointers. In practice this always happens because they are
// static strings (as specified in the NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN
// macro). This is true even for multi-reporters. (If it ever became
// untrue, the worst that would happen is that some blocks that should be in
// the same block group would end up in separate block groups.)
static uint32_t Hash(const LiveBlockKey& aKey)
{
return mozilla::HashGeneric(aKey.mAllocStackTrace,
aKey.mReportStackTrace,
aKey.mReporterName);
}
static bool Match(const LiveBlockKey& aA, const LiveBlockKey& aB)
{
return aA.mAllocStackTrace == aB.mAllocStackTrace &&
aA.mReportStackTrace == aB.mReportStackTrace &&
aA.mReporterName == aB.mReporterName;
}
};
// This is used by DoubleReportBlockGroups.
class DoubleReportBlockKey
{
public:
const StackTrace* const mAllocStackTrace; // never null
protected:
// When double-reports occur we record (and later print) the stack trace
// and reporter name of *both* the reporting locations.
const StackTrace* const mReportStackTrace1; // never null
const StackTrace* const mReportStackTrace2; // never null
const char* const mReporterName1; // never gUnreportedName
const char* const mReporterName2; // never gUnreportedName
public:
DoubleReportBlockKey(const StackTrace* aAllocStackTrace,
const StackTrace* aReportStackTrace1,
const StackTrace* aReportStackTrace2,
const char* aReporterName1,
const char* aReporterName2)
: mAllocStackTrace(aAllocStackTrace),
mReportStackTrace1(aReportStackTrace1),
mReportStackTrace2(aReportStackTrace2),
mReporterName1(aReporterName1),
mReporterName2(aReporterName2)
{
MOZ_ASSERT(IsSane());
}
bool IsSane() const
{
return mAllocStackTrace &&
mReportStackTrace1 &&
mReportStackTrace2 &&
mReporterName1 != gUnreportedName &&
mReporterName2 != gUnreportedName;
}
// Quasi-hash policy (used by DoubleReportBlockGroup's hash policy).
//
// Hash() and Match() both assume that identical reporter names have
// identical pointers. See LiveBlockKey for more.
static uint32_t Hash(const DoubleReportBlockKey& aKey)
{
return mozilla::HashGeneric(aKey.mAllocStackTrace,
aKey.mReportStackTrace1,
aKey.mReportStackTrace2,
aKey.mReporterName1,
aKey.mReporterName2);
}
static bool Match(const DoubleReportBlockKey& aA,
const DoubleReportBlockKey& aB)
{
return aA.mAllocStackTrace == aB.mAllocStackTrace &&
aA.mReportStackTrace1 == aB.mReportStackTrace1 &&
aA.mReportStackTrace2 == aB.mReportStackTrace2 &&
aA.mReporterName1 == aB.mReporterName1 &&
aA.mReporterName2 == aB.mReporterName2;
}
};
class BlockSize
{
static const size_t kSlopBits = sizeof(size_t) * 8 - 1; // 31 or 63
public:
size_t mReq; // size requested
size_t mSlop:kSlopBits; // additional bytes allocated due to rounding up
size_t mSampled:1; // were one or more blocks contributing to this
// BlockSize sampled?
BlockSize()
: mReq(0),
mSlop(0),
mSampled(false)
{}
BlockSize(size_t aReq, size_t aSlop, bool aSampled)
: mReq(aReq),
mSlop(aSlop),
mSampled(aSampled)
{}
size_t Usable() const { return mReq + mSlop; }
void Add(const BlockSize& aBlockSize)
{
mReq += aBlockSize.mReq;
mSlop += aBlockSize.mSlop;
mSampled = mSampled || aBlockSize.mSampled;
}
static int Cmp(const BlockSize& aA, const BlockSize& aB)
{
// Primary sort: put bigger usable sizes before smaller usable sizes.
if (aA.Usable() > aB.Usable()) return -1;
if (aA.Usable() < aB.Usable()) return 1;
// Secondary sort: put non-sampled groups before sampled groups.
if (!aA.mSampled && aB.mSampled) return -1;
if ( aA.mSampled && !aB.mSampled) return 1;
return 0;
}
};
// A live heap block.
class LiveBlock : public LiveBlockKey
{
public:
const BlockSize mBlockSize;
public:
LiveBlock(size_t aReqSize, size_t aSlopSize,
const StackTrace* aAllocStackTrace, bool aIsExact)
: LiveBlockKey(aAllocStackTrace),
mBlockSize(aReqSize, aSlopSize, aIsExact)
{}
void Report(Thread* aT, const char* aReporterName, bool aReportedOnAlloc);
void UnreportIfNotReportedOnAlloc();
};
// Nb: js::DefaultHasher<void*> is a high quality hasher.
typedef js::HashMap<const void*, LiveBlock, js::DefaultHasher<const void*>,
InfallibleAllocPolicy> LiveBlockTable;
static LiveBlockTable* gLiveBlockTable = nullptr;
//---------------------------------------------------------------------------
// malloc/free callbacks
//---------------------------------------------------------------------------
static size_t gSampleBelowSize = 0;
static size_t gSmallBlockActualSizeCounter = 0;
static void
AllocCallback(void* aPtr, size_t aReqSize, Thread* aT)
{
MOZ_ASSERT(gIsDMDRunning);
if (!aPtr) {
return;
}
AutoLockState lock;
AutoBlockIntercepts block(aT);
size_t actualSize = gMallocTable->malloc_usable_size(aPtr);
size_t slopSize = actualSize - aReqSize;
if (actualSize < gSampleBelowSize) {
// If this allocation is smaller than the sample-below size, increment the
// cumulative counter. Then, if that counter now exceeds the sample size,
// blame this allocation for gSampleBelowSize bytes. This precludes the
// measurement of slop.
gSmallBlockActualSizeCounter += actualSize;
if (gSmallBlockActualSizeCounter >= gSampleBelowSize) {
gSmallBlockActualSizeCounter -= gSampleBelowSize;
LiveBlock b(gSampleBelowSize, /* slopSize */ 0, StackTrace::Get(aT),
/* sampled */ true);
(void)gLiveBlockTable->putNew(aPtr, b);
}
} else {
// If this block size is larger than the sample size, record it exactly.
LiveBlock b(aReqSize, slopSize, StackTrace::Get(aT), /* sampled */ false);
(void)gLiveBlockTable->putNew(aPtr, b);
}
}
static void
FreeCallback(void* aPtr, Thread* aT)
{
MOZ_ASSERT(gIsDMDRunning);
if (!aPtr) {
return;
}
AutoLockState lock;
AutoBlockIntercepts block(aT);
gLiveBlockTable->remove(aPtr);
}
//---------------------------------------------------------------------------
// malloc/free interception
//---------------------------------------------------------------------------
static void Init(const malloc_table_t* aMallocTable);
} // namespace dmd
} // namespace mozilla
void
replace_init(const malloc_table_t* aMallocTable)
{
mozilla::dmd::Init(aMallocTable);
}
void*
replace_malloc(size_t aSize)
{
using namespace mozilla::dmd;
if (!gIsDMDRunning) {
// DMD hasn't started up, either because it wasn't enabled by the user, or
// we're still in Init() and something has indirectly called malloc. Do a
// vanilla malloc. (In the latter case, if it fails we'll crash. But
// OOM is highly unlikely so early on.)
return gMallocTable->malloc(aSize);
}
Thread* t = Thread::Fetch();
if (t->interceptsAreBlocked()) {
// Intercepts are blocked, which means this must be a call to malloc
// triggered indirectly by DMD (e.g. via NS_StackWalk). Be infallible.
return InfallibleAllocPolicy::malloc_(aSize);
}
// This must be a call to malloc from outside DMD. Intercept it.
void* ptr = gMallocTable->malloc(aSize);
AllocCallback(ptr, aSize, t);
return ptr;
}
void*
replace_calloc(size_t aCount, size_t aSize)
{
using namespace mozilla::dmd;
if (!gIsDMDRunning) {
return gMallocTable->calloc(aCount, aSize);
}
Thread* t = Thread::Fetch();
if (t->interceptsAreBlocked()) {
return InfallibleAllocPolicy::calloc_(aCount * aSize);
}
void* ptr = gMallocTable->calloc(aCount, aSize);
AllocCallback(ptr, aCount * aSize, t);
return ptr;
}
void*
replace_realloc(void* aOldPtr, size_t aSize)
{
using namespace mozilla::dmd;
if (!gIsDMDRunning) {
return gMallocTable->realloc(aOldPtr, aSize);
}
Thread* t = Thread::Fetch();
if (t->interceptsAreBlocked()) {
return InfallibleAllocPolicy::realloc_(aOldPtr, aSize);
}
// If |aOldPtr| is NULL, the call is equivalent to |malloc(aSize)|.
if (!aOldPtr) {
return replace_malloc(aSize);
}
// Be very careful here! Must remove the block from the table before doing
// the realloc to avoid races, just like in replace_free().
// Nb: This does an unnecessary hashtable remove+add if the block doesn't
// move, but doing better isn't worth the effort.
FreeCallback(aOldPtr, t);
void* ptr = gMallocTable->realloc(aOldPtr, aSize);
if (ptr) {
AllocCallback(ptr, aSize, t);
} else {
// If realloc fails, we re-insert the old pointer. It will look like it
// was allocated for the first time here, which is untrue, and the slop
// bytes will be zero, which may be untrue. But this case is rare and
// doing better isn't worth the effort.
AllocCallback(aOldPtr, gMallocTable->malloc_usable_size(aOldPtr), t);
}
return ptr;
}
void*
replace_memalign(size_t aAlignment, size_t aSize)
{
using namespace mozilla::dmd;
if (!gIsDMDRunning) {
return gMallocTable->memalign(aAlignment, aSize);
}
Thread* t = Thread::Fetch();
if (t->interceptsAreBlocked()) {
return InfallibleAllocPolicy::memalign_(aAlignment, aSize);
}
void* ptr = gMallocTable->memalign(aAlignment, aSize);
AllocCallback(ptr, aSize, t);
return ptr;
}
void
replace_free(void* aPtr)
{
using namespace mozilla::dmd;
if (!gIsDMDRunning) {
gMallocTable->free(aPtr);
return;
}
Thread* t = Thread::Fetch();
if (t->interceptsAreBlocked()) {
return InfallibleAllocPolicy::free_(aPtr);
}
// Do the actual free after updating the table. Otherwise, another thread
// could call malloc and get the freed block and update the table, and then
// our update here would remove the newly-malloc'd block.
FreeCallback(aPtr, t);
gMallocTable->free(aPtr);
}
namespace mozilla {
namespace dmd {
//---------------------------------------------------------------------------
// Live and double-report block groups
//---------------------------------------------------------------------------
class BlockGroup
{
protected:
// {Live,DoubleReport}BlockKey serve as the key in
// {Live,DoubleReport}BlockGroupTable. Thes two fields constitute the value,
// so it's ok for them to be |mutable|.
mutable uint32_t mNumBlocks; // number of blocks with this LiveBlockKey
mutable BlockSize mCombinedSize; // combined size of those blocks
public:
BlockGroup()
: mNumBlocks(0),
mCombinedSize()
{}
const BlockSize& CombinedSize() const { return mCombinedSize; }
// The |const| qualifier is something of a lie, but is necessary so this type
// can be used in js::HashSet, and it fits with the |mutable| fields above.
void Add(const LiveBlock& aB) const
{
mNumBlocks++;
mCombinedSize.Add(aB.mBlockSize);
}
static const char* const kName; // for PrintSortedGroups
};
const char* const BlockGroup::kName = "block";
// A group of one or more live heap blocks with a common LiveBlockKey.
class LiveBlockGroup : public LiveBlockKey, public BlockGroup
{
friend class FrameGroup; // FrameGroups are created from LiveBlockGroups
public:
explicit LiveBlockGroup(const LiveBlockKey& aKey)
: LiveBlockKey(aKey),
BlockGroup()
{}
void Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
const char* aStr, const char* astr,
size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
size_t aTotalUsableSize) const;
static int QsortCmp(const void* aA, const void* aB)
{
const LiveBlockGroup* const a =
*static_cast<const LiveBlockGroup* const*>(aA);
const LiveBlockGroup* const b =
*static_cast<const LiveBlockGroup* const*>(aB);
return BlockSize::Cmp(a->mCombinedSize, b->mCombinedSize);
}
// Hash policy
typedef LiveBlockKey Lookup;
static uint32_t hash(const LiveBlockKey& aKey)
{
return LiveBlockKey::Hash(aKey);
}
static bool match(const LiveBlockGroup& aBg, const LiveBlockKey& aKey)
{
return LiveBlockKey::Match(aBg, aKey);
}
};
typedef js::HashSet<LiveBlockGroup, LiveBlockGroup, InfallibleAllocPolicy>
LiveBlockGroupTable;
void
LiveBlockGroup::Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
const char* aStr, const char* astr,
size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
size_t aTotalUsableSize) const
{
bool showTilde = mCombinedSize.mSampled;
W("%s: %s block%s in block group %s of %s\n",
aStr,
Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
Show(aM, gBuf2, kBufLen),
Show(aN, gBuf3, kBufLen));
W(" %s bytes (%s requested / %s slop)\n",
Show(mCombinedSize.Usable(), gBuf1, kBufLen, showTilde),
Show(mCombinedSize.mReq, gBuf2, kBufLen, showTilde),
Show(mCombinedSize.mSlop, gBuf3, kBufLen, showTilde));
W(" %4.2f%% of the heap (%4.2f%% cumulative); "
" %4.2f%% of %s (%4.2f%% cumulative)\n",
Percent(mCombinedSize.Usable(), aTotalUsableSize),
Percent(aCumulativeUsableSize, aTotalUsableSize),
Percent(mCombinedSize.Usable(), aCategoryUsableSize),
astr,
Percent(aCumulativeUsableSize, aCategoryUsableSize));
W(" Allocated at\n");
mAllocStackTrace->Print(aWriter);
if (IsReported()) {
W("\n Reported by '%s' at\n", mReporterName);
mReportStackTrace->Print(aWriter);
}
W("\n");
}
// A group of one or more double-reported heap blocks with a common
// DoubleReportBlockKey.
class DoubleReportBlockGroup : public DoubleReportBlockKey, public BlockGroup
{
public:
explicit DoubleReportBlockGroup(const DoubleReportBlockKey& aKey)
: DoubleReportBlockKey(aKey),
BlockGroup()
{}
void Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
const char* aStr, const char* astr,
size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
size_t aTotalUsableSize) const;
static int QsortCmp(const void* aA, const void* aB)
{
const DoubleReportBlockGroup* const a =
*static_cast<const DoubleReportBlockGroup* const*>(aA);
const DoubleReportBlockGroup* const b =
*static_cast<const DoubleReportBlockGroup* const*>(aB);
return BlockSize::Cmp(a->mCombinedSize, b->mCombinedSize);
}
// Hash policy
typedef DoubleReportBlockKey Lookup;
static uint32_t hash(const DoubleReportBlockKey& aKey)
{
return DoubleReportBlockKey::Hash(aKey);
}
static bool match(const DoubleReportBlockGroup& aBg,
const DoubleReportBlockKey& aKey)
{
return DoubleReportBlockKey::Match(aBg, aKey);
}
};
typedef js::HashSet<DoubleReportBlockGroup, DoubleReportBlockGroup,
InfallibleAllocPolicy> DoubleReportBlockGroupTable;
DoubleReportBlockGroupTable* gDoubleReportBlockGroupTable = nullptr;
void
DoubleReportBlockGroup::Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
const char* aStr, const char* astr,
size_t aCategoryUsableSize,
size_t aCumulativeUsableSize,
size_t aTotalUsableSize) const
{
bool showTilde = mCombinedSize.mSampled;
W("%s: %s block%s in block group %s of %s\n",
aStr,
Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
Show(aM, gBuf2, kBufLen),
Show(aN, gBuf3, kBufLen));
W(" %s bytes (%s requested / %s slop)\n",
Show(mCombinedSize.Usable(), gBuf1, kBufLen, showTilde),
Show(mCombinedSize.mReq, gBuf2, kBufLen, showTilde),
Show(mCombinedSize.mSlop, gBuf3, kBufLen, showTilde));
W(" Allocated at\n");
mAllocStackTrace->Print(aWriter);
W("\n Previously reported by '%s' at\n", mReporterName1);
mReportStackTrace1->Print(aWriter);
W("\n Now reported by '%s' at\n", mReporterName2);
mReportStackTrace2->Print(aWriter);
W("\n");
}
//---------------------------------------------------------------------------
// Stack frame groups
//---------------------------------------------------------------------------
// A group of one or more stack frames (from heap block allocation stack
// traces) with a common PC.
class FrameGroup
{
// mPc is used as the key in FrameGroupTable, and the other members
// constitute the value, so it's ok for them to be |mutable|.
const void* const mPc;
mutable size_t mNumBlocks;
mutable size_t mNumBlockGroups;
mutable BlockSize mCombinedSize;
public:
explicit FrameGroup(const void* aPc)
: mPc(aPc),
mNumBlocks(0),
mNumBlockGroups(0),
mCombinedSize()
{}
const BlockSize& CombinedSize() const { return mCombinedSize; }
// The |const| qualifier is something of a lie, but is necessary so this type
// can be used in js::HashSet, and it fits with the |mutable| fields above.
void Add(const LiveBlockGroup& aBg) const
{
mNumBlocks += aBg.mNumBlocks;
mNumBlockGroups++;
mCombinedSize.Add(aBg.mCombinedSize);
}
void Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
const char* aStr, const char* astr,
size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
size_t aTotalUsableSize) const;
static int QsortCmp(const void* aA, const void* aB)
{
const FrameGroup* const a = *static_cast<const FrameGroup* const*>(aA);
const FrameGroup* const b = *static_cast<const FrameGroup* const*>(aB);
return BlockSize::Cmp(a->mCombinedSize, b->mCombinedSize);
}
static const char* const kName; // for PrintSortedGroups
// Hash policy
typedef const void* Lookup;
static uint32_t hash(const void* const& aPc)
{
return mozilla::HashGeneric(aPc);
}
static bool match(const FrameGroup& aFg, const void* const& aPc)
{
return aFg.mPc == aPc;
}
};
const char* const FrameGroup::kName = "frame";
typedef js::HashSet<FrameGroup, FrameGroup, InfallibleAllocPolicy>
FrameGroupTable;
void
FrameGroup::Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
const char* aStr, const char* astr,
size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
size_t aTotalUsableSize) const
{
(void)aCumulativeUsableSize;
bool showTilde = mCombinedSize.mSampled;
nsCodeAddressDetails details;
PcInfo(mPc, &details);
W("%s: %s block%s and %s block group%s in frame group %s of %s\n",
aStr,
Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
Show(mNumBlockGroups, gBuf2, kBufLen, showTilde), Plural(mNumBlockGroups),
Show(aM, gBuf3, kBufLen),
Show(aN, gBuf4, kBufLen));
W(" %s bytes (%s requested / %s slop)\n",
Show(mCombinedSize.Usable(), gBuf1, kBufLen, showTilde),
Show(mCombinedSize.mReq, gBuf2, kBufLen, showTilde),
Show(mCombinedSize.mSlop, gBuf3, kBufLen, showTilde));
W(" %4.2f%% of the heap; %4.2f%% of %s\n",
Percent(mCombinedSize.Usable(), aTotalUsableSize),
Percent(mCombinedSize.Usable(), aCategoryUsableSize),
astr);
W(" PC is\n");
W(" %s[%s +0x%X] %p\n\n", details.function, details.library,
details.loffset, mPc);
}
//---------------------------------------------------------------------------
// DMD start-up
//---------------------------------------------------------------------------
static void RunTestMode(FILE* fp);
static void RunStressMode(FILE* fp);
static const char* gDMDEnvVar = nullptr;
// Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah"
// (where "blah" is non-empty) and return the pointer to "blah". |aArg| can
// have leading space chars (but not other whitespace).
static const char*
OptionValueIfMatch(const char* aArg, const char* aOptionName)
{
MOZ_ASSERT(!isspace(*aArg)); // any leading whitespace should not remain
size_t optionLen = strlen(aOptionName);
if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' &&
aArg[optionLen + 1]) {
return aArg + optionLen + 1;
}
return nullptr;
}
// Extracts a |long| value for an option from an argument. It must be within
// the range |aMin..aMax| (inclusive).
static bool
OptionLong(const char* aArg, const char* aOptionName, long aMin, long aMax,
long* aN)
{
if (const char* optionValue = OptionValueIfMatch(aArg, aOptionName)) {
char* endPtr;
*aN = strtol(optionValue, &endPtr, /* base */ 10);
if (!*endPtr && aMin <= *aN && *aN <= aMax &&
*aN != LONG_MIN && *aN != LONG_MAX) {
return true;
}
}
return false;
}
static const size_t gMaxSampleBelowSize = 100 * 1000 * 1000; // bytes
// Default to sampling with a sample-below size that's a prime number close to
// 4096.
//
// Using a sample-below size ~= 4096 is much faster than using a sample-below
// size of 1, and it's not much less accurate in practice, so it's a reasonable
// default.
//
// Using a prime sample-below size makes our sampling more random. If we used
// instead a sample-below size of 4096, for example, then if all our allocation
// sizes were even (which they likely are, due to how jemalloc rounds up), our
// alloc counter would take on only even values.
//
// In contrast, using a prime sample-below size lets us explore all possible
// values of the alloc counter.
static const size_t gDefaultSampleBelowSize = 4093;
static void
BadArg(const char* aArg)
{
StatusMsg("\n");
StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg);
StatusMsg("\n");
StatusMsg("Valid values of $DMD are:\n");
StatusMsg("- undefined or \"\" or \"0\", which disables DMD, or\n");
StatusMsg("- \"1\", which enables it with the default options, or\n");
StatusMsg("- a whitespace-separated list of |--option=val| entries, which\n");
StatusMsg(" enables it with non-default options.\n");
StatusMsg("\n");
StatusMsg("The following options are allowed; defaults are shown in [].\n");
StatusMsg(" --sample-below=<1..%d> Sample blocks smaller than this [%d]\n"
" (prime numbers recommended).\n",
int(gMaxSampleBelowSize), int(gDefaultSampleBelowSize));
StatusMsg(" --mode=<normal|test|stress> Which mode to run in? [normal]\n");
StatusMsg("\n");
exit(1);
}
// Note that fopen() can allocate.
static FILE*
OpenTestOrStressFile(const char* aFilename)
{
FILE* fp = fopen(aFilename, "w");
if (!fp) {
StatusMsg("can't create %s file: %s\n", aFilename, strerror(errno));
exit(1);
}
return fp;
}
// WARNING: this function runs *very* early -- before all static initializers
// have run. For this reason, non-scalar globals such as gStateLock and
// gStackTraceTable are allocated dynamically (so we can guarantee their
// construction in this function) rather than statically.
static void
Init(const malloc_table_t* aMallocTable)
{
MOZ_ASSERT(!gIsDMDRunning);
gMallocTable = aMallocTable;
// Set defaults of things that can be affected by the $DMD env var.
gMode = Normal;
gSampleBelowSize = gDefaultSampleBelowSize;
// DMD is controlled by the |DMD| environment variable.
// - If it's unset or empty or "0", DMD doesn't run.
// - Otherwise, the contents dictate DMD's behaviour.
char* e = getenv("DMD");
StatusMsg("$DMD = '%s'\n", e);
if (!e || strcmp(e, "") == 0 || strcmp(e, "0") == 0) {
StatusMsg("DMD is not enabled\n");
return;
}
// Save it so we can print it in Dump().
gDMDEnvVar = e = InfallibleAllocPolicy::strdup_(e);
if (strcmp(e, "1") != 0) {
bool isEnd = false;
while (!isEnd) {
// Consume leading whitespace.
while (isspace(*e)) {
e++;
}
// Save the start of the arg.
const char* arg = e;
// Find the first char after the arg, and temporarily change it to '\0'
// to isolate the arg.
while (!isspace(*e) && *e != '\0') {
e++;
}
char replacedChar = *e;
isEnd = replacedChar == '\0';
*e = '\0';
// Handle arg
long myLong;
if (OptionLong(arg, "--sample-below", 1, gMaxSampleBelowSize, &myLong)) {
gSampleBelowSize = myLong;
} else if (strcmp(arg, "--mode=normal") == 0) {
gMode = Normal;
} else if (strcmp(arg, "--mode=test") == 0) {
gMode = Test;
} else if (strcmp(arg, "--mode=stress") == 0) {
gMode = Stress;
} else if (strcmp(arg, "") == 0) {
// This can only happen if there is trailing whitespace. Ignore.
MOZ_ASSERT(isEnd);
} else {
BadArg(arg);
}
// Undo the temporary isolation.
*e = replacedChar;
}
}
// Finished parsing $DMD.
StatusMsg("DMD is enabled\n");
gStateLock = InfallibleAllocPolicy::new_<Mutex>();
gSmallBlockActualSizeCounter = 0;
DMD_CREATE_TLS_INDEX(gTlsIndex);
gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>();
gStackTraceTable->init(8192);
gLiveBlockTable = InfallibleAllocPolicy::new_<LiveBlockTable>();
gLiveBlockTable->init(8192);
gDoubleReportBlockGroupTable =
InfallibleAllocPolicy::new_<DoubleReportBlockGroupTable>();
gDoubleReportBlockGroupTable->init(0);
if (gMode == Test) {
// OpenTestOrStressFile() can allocate. So do this before setting
// gIsDMDRunning so those allocations don't show up in our results. Once
// gIsDMDRunning is set we are intercepting malloc et al. in earnest.
FILE* fp = OpenTestOrStressFile("test.dmd");
gIsDMDRunning = true;
StatusMsg("running test mode...\n");
RunTestMode(fp);
StatusMsg("finished test mode\n");
fclose(fp);
exit(0);
}
if (gMode == Stress) {
FILE* fp = OpenTestOrStressFile("stress.dmd");
gIsDMDRunning = true;
StatusMsg("running stress mode...\n");
RunStressMode(fp);
StatusMsg("finished stress mode\n");
fclose(fp);
exit(0);
}
gIsDMDRunning = true;
}
//---------------------------------------------------------------------------
// DMD reporting and unreporting
//---------------------------------------------------------------------------
void
LiveBlock::Report(Thread* aT, const char* aReporterName, bool aOnAlloc)
{
if (IsReported()) {
DoubleReportBlockKey doubleReportKey(mAllocStackTrace,
mReportStackTrace, StackTrace::Get(aT),
mReporterName, aReporterName);
DoubleReportBlockGroupTable::AddPtr p =
gDoubleReportBlockGroupTable->lookupForAdd(doubleReportKey);
if (!p) {
DoubleReportBlockGroup bg(doubleReportKey);
(void)gDoubleReportBlockGroupTable->add(p, bg);
}
p->Add(*this);
} else {
mReporterName = aReporterName;
mReportStackTrace = StackTrace::Get(aT);
mReportedOnAlloc = aOnAlloc;
}
}
void
LiveBlock::UnreportIfNotReportedOnAlloc()
{
if (!mReportedOnAlloc) {
mReporterName = gUnreportedName;
mReportStackTrace = nullptr;
}
}
static void
ReportHelper(const void* aPtr, const char* aReporterName, bool aOnAlloc)
{
if (!gIsDMDRunning || !aPtr) {
return;
}
Thread* t = Thread::Fetch();
AutoBlockIntercepts block(t);
AutoLockState lock;
if (LiveBlockTable::Ptr p = gLiveBlockTable->lookup(aPtr)) {
p->value.Report(t, aReporterName, aOnAlloc);
} else {
// We have no record of the block. Do nothing. Either:
// - We're sampling and we skipped this block. This is likely.
// - It's a bogus pointer. This is unlikely because Report() is almost
// always called in conjunction with a malloc_size_of-style function.
}
}
MOZ_EXPORT void
Report(const void* aPtr, const char* aReporterName)
{
ReportHelper(aPtr, aReporterName, /* onAlloc */ false);
}
MOZ_EXPORT void
ReportOnAlloc(const void* aPtr, const char* aReporterName)
{
ReportHelper(aPtr, aReporterName, /* onAlloc */ true);
}
//---------------------------------------------------------------------------
// DMD output
//---------------------------------------------------------------------------
// This works for LiveBlockGroups, DoubleReportBlockGroups and FrameGroups.
template <class TGroup>
static void
PrintSortedGroups(const Writer& aWriter, const char* aStr, const char* astr,
const js::HashSet<TGroup, TGroup, InfallibleAllocPolicy>& aTGroupTable,
size_t aCategoryUsableSize, size_t aTotalUsableSize)
{
const char* name = TGroup::kName;
StatusMsg(" creating and sorting %s %s group array...\n", astr, name);
// Convert the table into a sorted array.
js::Vector<const TGroup*, 0, InfallibleAllocPolicy> tgArray;
tgArray.reserve(aTGroupTable.count());
typedef js::HashSet<TGroup, TGroup, InfallibleAllocPolicy> TGroupTable;
for (typename TGroupTable::Range r = aTGroupTable.all();
!r.empty();
r.popFront()) {
tgArray.infallibleAppend(&r.front());
}
qsort(tgArray.begin(), tgArray.length(), sizeof(tgArray[0]),
TGroup::QsortCmp);
WriteTitle("%s %ss\n", aStr, name);
if (tgArray.length() == 0) {
W("(none)\n\n");
return;
}
// Limit the number of block groups printed, because fix-linux-stack.pl is
// too damn slow. Note that we don't break out of this loop because we need
// to keep adding to |cumulativeUsableSize|.
static const uint32_t MaxTGroups = 1000;
uint32_t numTGroups = tgArray.length();
StatusMsg(" printing %s %s group array...\n", astr, name);
size_t cumulativeUsableSize = 0;
for (uint32_t i = 0; i < numTGroups; i++) {
const TGroup* tg = tgArray[i];
cumulativeUsableSize += tg->CombinedSize().Usable();
if (i < MaxTGroups) {
tg->Print(aWriter, i+1, numTGroups, aStr, astr, aCategoryUsableSize,
cumulativeUsableSize, aTotalUsableSize);
} else if (i == MaxTGroups) {
W("%s: stopping after %s %s groups\n\n", aStr,
Show(MaxTGroups, gBuf1, kBufLen), name);
}
}
MOZ_ASSERT(aCategoryUsableSize == kNoSize ||
aCategoryUsableSize == cumulativeUsableSize);
}
static void
PrintSortedBlockAndFrameGroups(const Writer& aWriter,
const char* aStr, const char* astr,
const LiveBlockGroupTable& aLiveBlockGroupTable,
size_t aCategoryUsableSize,
size_t aTotalUsableSize)
{
PrintSortedGroups(aWriter, aStr, astr, aLiveBlockGroupTable,
aCategoryUsableSize, aTotalUsableSize);
// Frame groups are totally dependent on vagaries of stack traces, so we
// can't show them in test mode.
if (gMode == Test) {
return;
}
FrameGroupTable frameGroupTable;
frameGroupTable.init(2048);
for (LiveBlockGroupTable::Range r = aLiveBlockGroupTable.all();
!r.empty();
r.popFront()) {
const LiveBlockGroup& bg = r.front();
const StackTrace* st = bg.mAllocStackTrace;
// A single PC can appear multiple times in a stack trace. We ignore
// duplicates by first sorting and then ignoring adjacent duplicates.
StackTrace sorted(*st);
sorted.Sort(); // sorts the copy, not the original
void* prevPc = (void*)intptr_t(-1);
for (uint32_t i = 0; i < sorted.Length(); i++) {
void* pc = sorted.Pc(i);
if (pc == prevPc) {
continue; // ignore duplicate
}
prevPc = pc;
FrameGroupTable::AddPtr p = frameGroupTable.lookupForAdd(pc);
if (!p) {
FrameGroup fg(pc);
(void)frameGroupTable.add(p, fg);
}
p->Add(bg);
}
}
PrintSortedGroups(aWriter, aStr, astr, frameGroupTable, kNoSize,
aTotalUsableSize);
}
// This is only needed because of the |const void*| vs |void*| arg mismatch.
static size_t
MallocSizeOf(const void* aPtr)
{
return gMallocTable->malloc_usable_size(const_cast<void*>(aPtr));
}
// Note that, unlike most SizeOf* functions, this function does not take a
// |nsMallocSizeOfFun| argument. That's because those arguments are primarily
// to aid DMD track heap blocks... but DMD deliberately doesn't track heap
// blocks it allocated for itself!
MOZ_EXPORT void
SizeOf(Sizes* aSizes)
{
aSizes->mStackTraces = 0;
for (StackTraceTable::Range r = gStackTraceTable->all();
!r.empty();
r.popFront()) {
StackTrace* const& st = r.front();
aSizes->mStackTraces += MallocSizeOf(st);
}
aSizes->mStackTraceTable =
gStackTraceTable->sizeOfIncludingThis(MallocSizeOf);
aSizes->mLiveBlockTable = gLiveBlockTable->sizeOfIncludingThis(MallocSizeOf);
aSizes->mDoubleReportTable =
gDoubleReportBlockGroupTable->sizeOfIncludingThis(MallocSizeOf);
}
static void
ClearState()
{
// Unreport all blocks, except those that were reported on allocation,
// because they need to keep their reported marking.
for (LiveBlockTable::Range r = gLiveBlockTable->all();
!r.empty();
r.popFront()) {
r.front().value.UnreportIfNotReportedOnAlloc();
}
// Clear errors.
gDoubleReportBlockGroupTable->finish();
gDoubleReportBlockGroupTable->init();
}
MOZ_EXPORT void
Dump(Writer aWriter)
{
if (!gIsDMDRunning) {
const char* msg = "cannot Dump(); DMD was not enabled at startup\n";
StatusMsg("%s", msg);
W("%s", msg);
return;
}
AutoBlockIntercepts block(Thread::Fetch());
AutoLockState lock;
static int dumpCount = 1;
StatusMsg("Dump %d {\n", dumpCount++);
StatusMsg(" gathering live block groups...\n");
LiveBlockGroupTable unreportedLiveBlockGroupTable;
(void)unreportedLiveBlockGroupTable.init(1024);
size_t unreportedUsableSize = 0;
LiveBlockGroupTable reportedLiveBlockGroupTable;
(void)reportedLiveBlockGroupTable.init(1024);
size_t reportedUsableSize = 0;
bool anyBlocksSampled = false;
for (LiveBlockTable::Range r = gLiveBlockTable->all();
!r.empty();
r.popFront()) {
const LiveBlock& b = r.front().value;
size_t& size = !b.IsReported() ? unreportedUsableSize : reportedUsableSize;
size += b.mBlockSize.Usable();
LiveBlockGroupTable& table = !b.IsReported()
? unreportedLiveBlockGroupTable
: reportedLiveBlockGroupTable;
LiveBlockGroupTable::AddPtr p = table.lookupForAdd(b);
if (!p) {
LiveBlockGroup bg(b);
(void)table.add(p, bg);
}
p->Add(b);
anyBlocksSampled = anyBlocksSampled || b.mBlockSize.mSampled;
}
size_t totalUsableSize = unreportedUsableSize + reportedUsableSize;
WriteTitle("Invocation\n");
W("$DMD = '%s'\n", gDMDEnvVar);
W("Sample-below size = %lld\n\n", (long long)(gSampleBelowSize));
PrintSortedGroups(aWriter, "Double-reported", "double-reported",
*gDoubleReportBlockGroupTable, kNoSize, kNoSize);
PrintSortedBlockAndFrameGroups(aWriter, "Unreported", "unreported",
unreportedLiveBlockGroupTable,
unreportedUsableSize, totalUsableSize);
PrintSortedBlockAndFrameGroups(aWriter, "Reported", "reported",
reportedLiveBlockGroupTable,
reportedUsableSize, totalUsableSize);
bool showTilde = anyBlocksSampled;
WriteTitle("Summary\n");
W("Total: %s bytes\n",
Show(totalUsableSize, gBuf1, kBufLen, showTilde));
W("Reported: %s bytes (%5.2f%%)\n",
Show(reportedUsableSize, gBuf1, kBufLen, showTilde),
Percent(reportedUsableSize, totalUsableSize));
W("Unreported: %s bytes (%5.2f%%)\n",
Show(unreportedUsableSize, gBuf1, kBufLen, showTilde),
Percent(unreportedUsableSize, totalUsableSize));
W("\n");
// Stats are non-deterministic, so don't show them in test mode.
if (gMode != Test) {
Sizes sizes;
SizeOf(&sizes);
WriteTitle("Execution measurements\n");
W("Data structures that persist after Dump() ends:\n");
W(" Stack traces: %10s bytes\n",
Show(sizes.mStackTraces, gBuf1, kBufLen));
W(" Stack trace table: %10s bytes (%s entries, %s used)\n",
Show(sizes.mStackTraceTable, gBuf1, kBufLen),
Show(gStackTraceTable->capacity(), gBuf2, kBufLen),
Show(gStackTraceTable->count(), gBuf3, kBufLen));
W(" Live block table: %10s bytes (%s entries, %s used)\n",
Show(sizes.mLiveBlockTable, gBuf1, kBufLen),
Show(gLiveBlockTable->capacity(), gBuf2, kBufLen),
Show(gLiveBlockTable->count(), gBuf3, kBufLen));
W("\nData structures that are cleared after Dump() ends:\n");
W(" Double-report table: %10s bytes (%s entries, %s used)\n",
Show(sizes.mDoubleReportTable, gBuf1, kBufLen),
Show(gDoubleReportBlockGroupTable->capacity(), gBuf2, kBufLen),
Show(gDoubleReportBlockGroupTable->count(), gBuf3, kBufLen));
size_t unreportedSize =
unreportedLiveBlockGroupTable.sizeOfIncludingThis(MallocSizeOf);
W(" Unreported table: %10s bytes (%s entries, %s used)\n",
Show(unreportedSize, gBuf1, kBufLen),
Show(unreportedLiveBlockGroupTable.capacity(), gBuf2, kBufLen),
Show(unreportedLiveBlockGroupTable.count(), gBuf3, kBufLen));
size_t reportedSize =
reportedLiveBlockGroupTable.sizeOfIncludingThis(MallocSizeOf);
W(" Reported table: %10s bytes (%s entries, %s used)\n",
Show(reportedSize, gBuf1, kBufLen),
Show(reportedLiveBlockGroupTable.capacity(), gBuf2, kBufLen),
Show(reportedLiveBlockGroupTable.count(), gBuf3, kBufLen));
W("\n");
}
ClearState();
StatusMsg("}\n");
}
//---------------------------------------------------------------------------
// Testing
//---------------------------------------------------------------------------
// This function checks that heap blocks that have the same stack trace but
// different (or no) reporters get aggregated separately.
void foo()
{
char* a[6];
for (int i = 0; i < 6; i++) {
a[i] = (char*) malloc(128 - 16*i);
}
for (int i = 0; i <= 1; i++)
Report(a[i], "a01"); // reported
Report(a[2], "a23"); // reported
Report(a[3], "a23"); // reported
// a[4], a[5] unreported
}
// This stops otherwise-unused variables from being optimized away.
static void
UseItOrLoseIt(void* a)
{
if (a == 0) {
fprintf(stderr, "UseItOrLoseIt: %p\n", a);
}
}
// The output from this should be compared against test-expected.dmd. It's
// been tested on Linux64, and probably will give different results on other
// platforms.
static void
RunTestMode(FILE* fp)
{
Writer writer(FpWrite, fp);
// The first part of this test requires sampling to be disabled.
gSampleBelowSize = 1;
// 0th Dump. Zero for everything.
Dump(writer);
// 1st Dump: 1 freed, 9 out of 10 unreported.
// 2nd Dump: still present and unreported.
int i;
char* a;
for (i = 0; i < 10; i++) {
a = (char*) malloc(100);
UseItOrLoseIt(a);
}
free(a);
// Min-sized block.
// 1st Dump: reported.
// 2nd Dump: re-reported, twice; double-report warning.
char* a2 = (char*) malloc(0);
Report(a2, "a2");
// Operator new[].
// 1st Dump: reported.
// 2nd Dump: reportedness carries over, due to ReportOnAlloc.
char* b = new char[10];
ReportOnAlloc(b, "b");
// ReportOnAlloc, then freed.
// 1st Dump: freed, irrelevant.
// 2nd Dump: freed, irrelevant.
char* b2 = new char;
ReportOnAlloc(b2, "b2");
free(b2);
// 1st Dump: reported, plus 3 double-report warnings.
// 2nd Dump: freed, irrelevant.
char* c = (char*) calloc(10, 3);
Report(c, "c");
for (int i = 0; i < 3; i++) {
Report(c, "c");
}
// 1st Dump: ignored.
// 2nd Dump: irrelevant.
Report((void*)(intptr_t)i, "d");
// jemalloc rounds this up to 8192.
// 1st Dump: reported.
// 2nd Dump: freed.
char* e = (char*) malloc(4096);
e = (char*) realloc(e, 4097);
Report(e, "e");
// First realloc is like malloc; second realloc is shrinking.
// 1st Dump: reported.
// 2nd Dump: re-reported.
char* e2 = (char*) realloc(nullptr, 1024);
e2 = (char*) realloc(e2, 512);
Report(e2, "e2");
// First realloc is like malloc; second realloc creates a min-sized block.
// 1st Dump: reported (re-use "a2" reporter name because the order of this
// report and the "a2" above is non-deterministic).
// 2nd Dump: freed, irrelevant.
char* e3 = (char*) realloc(nullptr, 1024);
e3 = (char*) realloc(e3, 0);
MOZ_ASSERT(e3);
Report(e3, "a2");
// 1st Dump: freed, irrelevant.
// 2nd Dump: freed, irrelevant.
char* f = (char*) malloc(64);
free(f);
// 1st Dump: ignored.
// 2nd Dump: irrelevant.
Report((void*)(intptr_t)0x0, "zero");
// 1st Dump: mixture of reported and unreported.
// 2nd Dump: all unreported.
foo();
foo();
// All the odd-ball ones.
// 1st Dump: all unreported.
// 2nd Dump: all freed, irrelevant.
// XXX: no memalign on Mac
//void* x = memalign(64, 65); // rounds up to 128
//UseItOrLoseIt(x);
// XXX: posix_memalign doesn't work on B2G, apparently
//void* y;
//posix_memalign(&y, 128, 129); // rounds up to 256
//UseItOrLoseIt(y);
void* z = valloc(1); // rounds up to 4096
UseItOrLoseIt(z);
//aligned_alloc(64, 256); // XXX: C11 only
// 1st Dump.
Dump(writer);
//---------
Report(a2, "a2b");
Report(a2, "a2b");
free(c);
free(e);
Report(e2, "e2b");
free(e3);
//free(x);
//free(y);
free(z);
// 2nd Dump.
Dump(writer);
//---------
// Clear all knowledge of existing blocks to give us a clean slate.
gLiveBlockTable->clear();
// Reset the counter just in case |sample-size| was specified in $DMD.
// Otherwise the assertions fail.
gSmallBlockActualSizeCounter = 0;
gSampleBelowSize = 128;
char* s;
// This equals the sample size, and so is recorded exactly. It should be
// listed before groups of the same size that are sampled.
s = (char*) malloc(128);
UseItOrLoseIt(s);
// This exceeds the sample size, and so is recorded exactly.
s = (char*) malloc(144);
UseItOrLoseIt(s);
// These together constitute exactly one sample.
for (int i = 0; i < 16; i++) {
s = (char*) malloc(8);
UseItOrLoseIt(s);
}
MOZ_ASSERT(gSmallBlockActualSizeCounter == 0);
// These fall 8 bytes short of a full sample.
for (int i = 0; i < 15; i++) {
s = (char*) malloc(8);
UseItOrLoseIt(s);
}
MOZ_ASSERT(gSmallBlockActualSizeCounter == 120);
// This exceeds the sample size, and so is recorded exactly.
s = (char*) malloc(256);
UseItOrLoseIt(s);
MOZ_ASSERT(gSmallBlockActualSizeCounter == 120);
// This gets more than to a full sample from the |i < 15| loop above.
s = (char*) malloc(96);
UseItOrLoseIt(s);
MOZ_ASSERT(gSmallBlockActualSizeCounter == 88);
// This gets to another full sample.
for (int i = 0; i < 5; i++) {
s = (char*) malloc(8);
UseItOrLoseIt(s);
}
MOZ_ASSERT(gSmallBlockActualSizeCounter == 0);
// This allocates 16, 32, ..., 128 bytes, which results a block group that
// contains a mix of sample and non-sampled blocks, and so should be printed
// with '~' signs.
for (int i = 1; i <= 8; i++) {
s = (char*) malloc(i * 16);
UseItOrLoseIt(s);
}
MOZ_ASSERT(gSmallBlockActualSizeCounter == 64);
// At the end we're 64 bytes into the current sample so we report ~1,424
// bytes of allocation overall, which is 64 less than the real value 1,488.
Dump(writer);
}
//---------------------------------------------------------------------------
// Stress testing microbenchmark
//---------------------------------------------------------------------------
MOZ_NEVER_INLINE static void
stress5()
{
for (int i = 0; i < 10; i++) {
void* x = malloc(64);
UseItOrLoseIt(x);
if (i & 1) {
free(x);
}
}
}
MOZ_NEVER_INLINE static void
stress4()
{
stress5(); stress5(); stress5(); stress5(); stress5();
stress5(); stress5(); stress5(); stress5(); stress5();
}
MOZ_NEVER_INLINE static void
stress3()
{
for (int i = 0; i < 10; i++) {
stress4();
}
}
MOZ_NEVER_INLINE static void
stress2()
{
stress3(); stress3(); stress3(); stress3(); stress3();
stress3(); stress3(); stress3(); stress3(); stress3();
}
MOZ_NEVER_INLINE static void
stress1()
{
for (int i = 0; i < 10; i++) {
stress2();
}
}
// This stress test does lots of allocations and frees, which is where most of
// DMD's overhead occurs. It allocates 1,000,000 64-byte blocks, spread evenly
// across 1,000 distinct stack traces. It frees every second one immediately
// after allocating it.
//
// It's highly artificial, but it's deterministic and easy to run. It can be
// timed under different conditions to glean performance data.
static void
RunStressMode(FILE* fp)
{
Writer writer(FpWrite, fp);
// Disable sampling for maximum stress.
gSampleBelowSize = 1;
stress1(); stress1(); stress1(); stress1(); stress1();
stress1(); stress1(); stress1(); stress1(); stress1();
Dump(writer);
}
} // namespace dmd
} // namespace mozilla