/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef LulMain_h #define LulMain_h #include // pthread_t #include #include "LulPlatformMacros.h" #include "LulRWLock.h" // LUL: A Lightweight Unwind Library. // This file provides the end-user (external) interface for LUL. // Some comments about naming in the implementation. These are safe // to ignore if you are merely using LUL, but are important if you // hack on its internals. // // Debuginfo readers in general have tended to use the word "address" // to mean several different things. This sometimes makes them // difficult to understand and maintain. LUL tries hard to avoid // using the word "address" and instead uses the following more // precise terms: // // * SVMA ("Stated Virtual Memory Address"): this is an address of a // symbol (etc) as it is stated in the symbol table, or other // metadata, of an object. Such values are typically small and // start from zero or thereabouts, unless the object has been // prelinked. // // * AVMA ("Actual Virtual Memory Address"): this is the address of a // symbol (etc) in a running process, that is, once the associated // object has been mapped into a process. Such values are typically // much larger than SVMAs, since objects can get mapped arbitrarily // far along the address space. // // * "Bias": the difference between AVMA and SVMA for a given symbol // (specifically, AVMA - SVMA). The bias is always an integral // number of pages. Once we know the bias for a given object's // text section (for example), we can compute the AVMAs of all of // its text symbols by adding the bias to their SVMAs. // // * "Image address": typically, to read debuginfo from an object we // will temporarily mmap in the file so as to read symbol tables // etc. Addresses in this temporary mapping are called "Image // addresses". Note that the temporary mapping is entirely // unrelated to the mappings of the file that the dynamic linker // must perform merely in order to get the program to run. Hence // image addresses are unrelated to either SVMAs or AVMAs. namespace lul { // A machine word plus validity tag. class TaggedUWord { public: // Construct a valid one. TaggedUWord(uintptr_t w) : mValue(w) , mValid(true) {} // Construct an invalid one. TaggedUWord() : mValue(0) , mValid(false) {} // Add in a second one. void Add(TaggedUWord other) { if (mValid && other.Valid()) { mValue += other.Value(); } else { mValue = 0; mValid = false; } } // Is it word-aligned? bool IsAligned() const { return mValid && (mValue & (sizeof(uintptr_t)-1)) == 0; } uintptr_t Value() const { return mValue; } bool Valid() const { return mValid; } private: uintptr_t mValue; bool mValid; }; // The registers, with validity tags, that will be unwound. struct UnwindRegs { #if defined(LUL_ARCH_arm) TaggedUWord r7; TaggedUWord r11; TaggedUWord r12; TaggedUWord r13; TaggedUWord r14; TaggedUWord r15; #elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) TaggedUWord xbp; TaggedUWord xsp; TaggedUWord xip; #else # error "Unknown plat" #endif }; // The maximum number of bytes in a stack snapshot. This can be // increased if necessary, but larger values cost performance, since a // stack snapshot needs to be copied between sampling and worker // threads for each snapshot. In practice 32k seems to be enough // to get good backtraces. static const size_t N_STACK_BYTES = 32768; // The stack chunk image that will be unwound. struct StackImage { // [start_avma, +len) specify the address range in the buffer. // Obviously we require 0 <= len <= N_STACK_BYTES. uintptr_t mStartAvma; size_t mLen; uint8_t mContents[N_STACK_BYTES]; }; // The core unwinder library class. Just one of these is needed, and // it can be shared by multiple unwinder threads. // // Access to the library is mediated by a single reader-writer lock. // All attempts to change the library's internal shared state -- that // is, loading or unloading unwind info -- are forced single-threaded // by causing the called routine to acquire a write-lock. Unwind // requests do not change the library's internal shared state and // therefore require only a read-lock. Hence multiple threads can // unwind in parallel. // // The library needs to maintain state which is private to each // unwinder thread -- the CFI (Dwarf Call Frame Information) fast // cache. Hence unwinder threads first need to register with the // library, so their identities are known. Also, for maximum // effectiveness of the CFI caching, it is preferable to have a small // number of very-busy unwinder threads rather than a large number of // mostly-idle unwinder threads. // // None of the methods may be safely called from within a signal // handler, since this risks deadlock. In particular this means // a thread may not unwind itself from within a signal handler // frame. It might be safe to call Unwind() on its own stack // from not-inside a signal frame, although even that cannot be // guaranteed deadlock free. class PriMap; class SegArray; class CFICache; class LUL { public: // Create; supply a logging sink. Initialises the rw-lock. LUL(void (*aLog)(const char*)); // Destroy. This acquires mRWlock for writing. By doing that, waits // for all unwinder threads to finish any Unwind() calls they may be // in. All resources are freed and all registered unwinder threads // are deregistered. ~LUL(); // Notify of a new r-x mapping, and load the associated unwind info. // The filename is strdup'd and used for debug printing. If // aMappedImage is NULL, this function will mmap/munmap the file // itself, so as to be able to read the unwind info. If // aMappedImage is non-NULL then it is assumed to point to a // called-supplied and caller-managed mapped image of the file. // // Acquires mRWlock for writing. This must be called only after the // code area in question really has been mapped. void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName, const void* aMappedImage); // In rare cases we know an executable area exists but don't know // what the associated file is. This call notifies LUL of such // areas. This is important for correct functioning of stack // scanning and of the x86-{linux,android} special-case // __kernel_syscall function handling. Acquires mRWlock for // writing. This must be called only after the code area in // question really has been mapped. void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize); // Notify that a mapped area has been unmapped; discard any // associated unwind info. Acquires mRWlock for writing. Note that // to avoid segfaulting the stack-scan unwinder, which inspects code // areas, this must be called before the code area in question is // really unmapped. Note that, unlike NotifyAfterMap(), this // function takes the start and end addresses of the range to be // unmapped, rather than a start and a length parameter. This is so // as to make it possible to notify an unmap for the entire address // space using a single call. void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax); // Apply NotifyBeforeUnmap to the entire address space. This causes // LUL to discard all unwind and executable-area information for the // entire address space. void NotifyBeforeUnmapAll() { NotifyBeforeUnmap(0, UINTPTR_MAX); } // Returns the number of mappings currently registered. Acquires // mRWlock for writing. size_t CountMappings(); // Register the calling thread for unwinding. Acquires mRWlock for // writing. void RegisterUnwinderThread(); // Unwind |aStackImg| starting with the context in |aStartRegs|. // Write the number of frames recovered in *aFramesUsed. Put // the PC values in aFramePCs[0 .. *aFramesUsed-1] and // the SP values in aFrameSPs[0 .. *aFramesUsed-1]. // |aFramesAvail| is the size of the two output arrays and hence the // largest possible value of *aFramesUsed. PC values are always // valid, and the unwind will stop when the PC becomes invalid, but // the SP values might be invalid, in which case the value zero will // be written in the relevant frameSPs[] slot. // // Unwinding may optionally use stack scanning. The maximum number // of frames that may be recovered by stack scanning is // |aScannedFramesAllowed| and the actual number recovered is // written into *aScannedFramesAcquired. |aScannedFramesAllowed| // must be less than or equal to |aFramesAvail|. // // This function assumes that the SP values increase as it unwinds // away from the innermost frame -- that is, that the stack grows // down. It monitors SP values as it unwinds to check they // decrease, so as to avoid looping on corrupted stacks. // // Acquires mRWlock for reading. Hence multiple threads may unwind // at once, but no thread may be unwinding whilst the library loads // or discards unwind information. Returns false if the calling // thread is not registered for unwinding. // // Up to aScannedFramesAllowed stack-scanned frames may be recovered. // // The calling thread must previously have registered itself via // RegisterUnwinderThread. void Unwind(/*OUT*/uintptr_t* aFramePCs, /*OUT*/uintptr_t* aFrameSPs, /*OUT*/size_t* aFramesUsed, /*OUT*/size_t* aScannedFramesAcquired, size_t aFramesAvail, size_t aScannedFramesAllowed, UnwindRegs* aStartRegs, StackImage* aStackImg); // The logging sink. Call to send debug strings to the caller- // specified destination. void (*mLog)(const char*); private: // Invalidate the caches. Requires mRWlock to be held for writing; // does not acquire it itself. void InvalidateCFICaches(); // The one-and-only lock, a reader-writer lock, for the library. LulRWLock* mRWlock; // The top level mapping from code address ranges to postprocessed // unwind info. Basically a sorted array of (addr, len, info) // records. Threads wishing to query this field must hold mRWlock // for reading. Threads wishing to modify this field must hold // mRWlock for writing. This field is updated by NotifyAfterMap and // NotifyBeforeUnmap. PriMap* mPriMap; // An auxiliary structure that records which address ranges are // mapped r-x, for the benefit of the stack scanner. Threads // wishing to query this field must hold mRWlock for reading. // Threads wishing to modify this field must hold mRWlock for // writing. SegArray* mSegArray; // The thread-local data: a mapping from threads to CFI-fast-caches. // Threads wishing to query this field must hold mRWlock for // reading. Threads wishing to modify this field must hold mRWlock // for writing. // // The CFICaches themselves are thread-local and can be both read // and written when mRWlock is held for reading. It would probably // be faster to use the pthread_{set,get}specific functions, but // also more difficult. This map is queried once per unwind, in // order to get hold of the CFI cache for a given thread. std::map mCaches; }; // Run unit tests on an initialised, loaded-up LUL instance, and print // summary results on |aLUL|'s logging sink. Also return the number // of tests run in *aNTests and the number that passed in // *aNTestsPassed. void RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL); } // namespace lul #endif // LulMain_h