Files
UnrealEngineUWP/Engine/Source/Developer/Windows/LiveCodingServer/Private/External/LC_SymbolReconstruction.cpp
Ben Marsh e1fe0cc030 Integrating live coding feature (aka Live++) into UE4.
Allows fast iteration of C++ changes without restarting the application. To use, select the "Live Coding (Experimental)" mode from the drop down menu next to the editor's compile button, or type "LiveCoding" into the console for a monolithic build. Press Ctrl+Alt+F11 to find changes and compile.

Changes vs standalone Live++ version:

* UBT is used to execute builds. This allows standard UE4 adaptive unity mode, allows us to reuse object files when we do regular builds, supports using any build executor allowed by UBT (XGE, SNDBS, etc..).
* Adding new source files is supported.
* Custom visualizer for FNames is supported via a weakly linked symbol in a static library (Engine/Extras/NatvisHelpers).
* Settings are exposed in the editor's project settings dialog.
* Standalone application has been rewritten as a Slate app ("LiveCodingConsole"). There is an additional option to start the program as hidden, where it will not be visible until Ctrl+Alt+F11 is hit. Similarly, closing the window will hide it instead of closing the application.
* Does not require a standalone licensed version of Live++.

Known issues:

* Does not currently support class layout changes / object reinstancing

#rb none
#fyi Marc.Audy, Stefan.Boberg, Nick.Penwarden
#jira

[CL 5304722 by Ben Marsh in 4.22 branch]
2019-03-05 15:54:02 -05:00

766 lines
29 KiB
C++

// Copyright 2011-2019 Molecular Matters GmbH, all rights reserved.
#include "LC_SymbolReconstruction.h"
#include "LC_Allocators.h"
#include "LC_StringUtil.h"
#include "LC_PointerUtil.h"
#include "LC_NameMangling.h"
#include "LC_DiaUtil.h"
namespace
{
static inline bool HasLowerRVA(const symbols::Contribution* lhs, uint32_t rva)
{
return lhs->rva < rva;
}
}
void symbols::ReconstructFromExecutableCoff
(
const symbols::Provider* provider,
const executable::Image* image,
const executable::ImageSectionDB* imageSections,
const coff::CoffDB* coffDb,
const types::StringSet& strippedSymbols,
const symbols::ObjPath& objPath,
const symbols::CompilandDB* compilandDb,
const symbols::ContributionDB* contributionDb,
const symbols::ThunkDB* thunkDb,
const symbols::ImageSectionDB* imageSectionDb,
symbols::SymbolDB* symbolDB,
DiaSymbolCache* diaSymbolCache
)
{
const executable::PreferredBase imageBase = executable::GetPreferredBase(image);
const uint32_t imageSize = executable::GetSize(image);
LC_LOG_DEV("Gathering symbols from COFF file %s", objPath.c_str());
LC_LOG_INDENT_DEV;
LC_LOG_DEV("Symbols in COFF: %d", coffDb->symbols.size());
LC_LOG_DEV("Symbols stripped: %d", strippedSymbols.size());
// gather symbols by following relocation "paths", backtracking from the location in the executable
// to the symbol's origin RVA. our starting entry paths are the functions and data of which we already
// know the name and RVA.
size_t unknownSymbolsToFind = 0u;
types::vector<const coff::Symbol*> openSymbols;
openSymbols.reserve(coffDb->symbols.size());
{
const size_t count = coff::GetIndexCount(coffDb);
for (size_t i = 0u; i < count; ++i)
{
// do we have a symbol at that index?
const coff::Symbol* symbol = coff::GetSymbolByIndex(coffDb, i);
if (symbol)
{
// yes, so check whether this symbol is known already
const ImmutableString& symbolName = coff::GetSymbolName(coffDb, symbol);
const symbols::Symbol* srcSymbol = symbols::FindSymbolByName(symbolDB, symbolName);
if (srcSymbol)
{
LC_LOG_DEV("Known symbol %s at 0x%X", symbolName.c_str(), srcSymbol->rva);
openSymbols.push_back(symbol);
}
else if (strippedSymbols.find(symbolName) != strippedSymbols.end())
{
LC_LOG_DEV("Stripped symbol %s", symbolName.c_str());
}
else
{
LC_LOG_DEV("Unknown symbol %s", symbolName.c_str());
++unknownSymbolsToFind;
}
}
else
{
// we do not have a symbol stored in the COFF, because it might be external/unresolved.
// if so, chances are very high that this symbol is already known publicly.
const ImmutableString& symbolName = coff::GetUnresolvedSymbolName(coffDb, i);
if (symbolName.GetLength() == 0u)
{
continue;
}
if (symbols::FindSymbolByName(symbolDB, symbolName))
{
LC_LOG_DEV("Publicly known symbol %s", symbolName.c_str());
}
else if (!coff::IsInterestingSymbol(symbolName))
{
// relocations to those symbols are not stored in the COFF, hence we
// can not reconstruct these anyway
LC_LOG_DEV("Non-interesting symbol %s", symbolName.c_str());
}
else if (symbols::IsImageBaseRelatedSymbol(symbolName))
{
LC_LOG_DEV("Linker-generated image base symbol %s", symbolName.c_str());
}
else if (symbols::IsTlsArrayRelatedSymbol(symbolName))
{
LC_LOG_DEV("Compiler-generated symbol %s", symbolName.c_str());
}
else if (strippedSymbols.find(symbolName) != strippedSymbols.end())
{
LC_LOG_DEV("Stripped symbol %s", symbolName.c_str());
}
else if (symbolDB->symbolsWithoutRva.find(symbolName) != symbolDB->symbolsWithoutRva.end())
{
// ignore symbols without an RVA. those are often generated by the compiler or linker,
// are being relocated to, but store absolute values encoded in their offset in the PDB.
LC_LOG_DEV("Compiler- or linker-generated symbol %s without an RVA", symbolName.c_str());
}
else
{
LC_LOG_DEV("Unknown unresolved symbol %s", symbolName.c_str());
++unknownSymbolsToFind;
}
}
}
}
LC_LOG_DEV("Unknown symbols left to find: %d", unknownSymbolsToFind);
// do we already know all symbols?
if (unknownSymbolsToFind == 0u)
{
LC_LOG_DEV("Know all symbols already, nothing to do");
return;
}
// keep walking relocations of all open symbols to determine the RVA of symbols contained in this .obj
types::unordered_set<const coff::Symbol*> walkedAlready;
types::unordered_set<const coff::Symbol*> triedReconstructingAlready;
unsigned int pass = 0u;
walkOpenSymbols:
while (openSymbols.size() > 0u)
{
const coff::Symbol* symbol = openSymbols.back();
openSymbols.pop_back();
// check whether we walked this symbol already
const auto it = walkedAlready.find(symbol);
if (it != walkedAlready.end())
{
// handled already, nothing more to do
continue;
}
// check whether the symbol is actually the one that contributed its code.
// in case of COMDATs available in both executable and static libraries, this might not
// be true and would lead to completely wrong symbols being reconstructed.
const ImmutableString& srcSymbolName = coff::GetSymbolName(coffDb, symbol);
const symbols::Symbol* srcSymbol = symbols::FindSymbolByName(symbolDB, srcSymbolName);
if (srcSymbol)
{
const symbols::Contribution* symbolContribution = symbols::FindContributionByRVA(contributionDb, srcSymbol->rva);
if (symbolContribution)
{
const ImmutableString& contributingCompiland = symbols::GetContributionCompilandName(compilandDb, contributionDb, symbolContribution);
if (contributingCompiland != objPath)
{
LC_LOG_DEV("Not walking symbol %s from contribution in different file %s", srcSymbolName.c_str(), contributingCompiland.c_str());
continue;
}
}
}
LC_LOG_DEV("Walking relocations of symbol %s", srcSymbolName.c_str());
LC_LOG_INDENT_DEV;
const size_t relocationCount = symbol->relocations.size();
for (size_t i = 0u; i < relocationCount; ++i)
{
const coff::Relocation* relocation = symbol->relocations[i];
const ImmutableString& dstSymbolName = coff::GetRelocationDstSymbolName(coffDb, relocation);
// the symbol we are looking for might already be in the database because of the public symbols gathered from the PDB
if (symbols::FindSymbolByName(symbolDB, dstSymbolName))
{
LC_LOG_DEV("Publicly known symbol %s", dstSymbolName.c_str());
// we know this symbol already, but we might not have walked its relocations yet.
// add it to the list and continue.
const coff::Symbol* nextSymbol = coff::GetSymbolByIndex(coffDb, relocation->dstSymbolNameIndex);
if (nextSymbol)
{
openSymbols.push_back(nextSymbol);
}
continue;
}
else if (strippedSymbols.find(dstSymbolName) != strippedSymbols.end())
{
// the relocation points to a symbol we should ignore
LC_LOG_DEV("Ignoring stripped symbol \"%s\"", dstSymbolName.c_str());
continue;
}
else if (symbols::IsImageBaseRelatedSymbol(dstSymbolName))
{
// the linker-generated __ImageBase always sits at RVA zero, and relocations should never be patched
LC_LOG_DEV("Ignoring destination symbol \"%s\"", dstSymbolName.c_str());
continue;
}
else if (symbols::IsTlsArrayRelatedSymbol(dstSymbolName))
{
// compiler-generated symbols such as __tls_array don't have any RVA, because they always reside at
// the same address, e.g. relative to a segment register.
// one such example would be how thread-local storage variables are accessed:
// the generated code always fetches the flat address of the thread-local storage array from the TEB (https://en.wikipedia.org/wiki/Win32_Thread_Information_Block).
// the TEB itself can be accessed using segment register FS on x86, and GS on x64, so one of the first instructions of thread-local storage access is always going to
// access the member at 0x2C/0x58 relative to FS/GS, e.g.:
// mov eax, dword ptr fs:0x2C (x86)
// mov rax, qword ptr gs:0x58 (x64)
// therefore, the "RVA" of __tls_array is 0x2C (x86) or 0x58 (x64).
// see http://www.nynaeve.net/?p=180 for more in-depth information about thread-local storage on Windows.
// NOTE: we do need the RVA of __tls_index because that is used to set the data segment register to the
// table used for accessing TLS variables.
LC_LOG_DEV("Ignoring destination symbol \"%s\"", dstSymbolName.c_str());
continue;
}
if (!srcSymbol)
{
LC_ERROR_DEV("Cannot find source symbol %s (%s)",
srcSymbolName.c_str(),
nameMangling::UndecorateSymbol(srcSymbolName.c_str(), 0u).c_str());
continue;
}
const coff::Relocation::Type::Enum type = relocation->type;
// the relocation's RVA is relative to the start of the function, and the executable already has all relocations
// resolved. hence we can backtrack the RVA of the destination symbol by peeking into the executable's code
// at the address of the relocation.
const uint32_t relocationRva = srcSymbol->rva + relocation->srcRva;
// check for invalid RVAs before trying to reconstruct the symbol.
// these can occur when a COMDAT gets stripped in an .obj, but is needed by an .obj coming from a library.
// the COMDAT will then be stripped from the executable, so we shouldn't try reconstructing it.
{
#if LC_64_BIT
if (type == coff::Relocation::Type::VA_64)
{
const uint64_t rvaInCode = executable::ReadFromImage<uint64_t>(image, imageSections, relocationRva);
if (rvaInCode == 0u)
{
continue;
}
}
else
#endif
{
const uint32_t rvaInCode = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva);
if (rvaInCode == 0u)
{
continue;
}
}
}
// even though the final RVA can only be 32-bit because no image can ever be larger than 4GB, intermediate results
// can point to addresses in the full 64-bit address space.
#if LC_64_BIT
uint64_t dstRva = 0u;
#else
uint32_t dstRva = 0u;
#endif
// backtrack to the real RVA of the destination symbol depending on the type of relocation.
// 32-BIT NOTE: relative addresses are signed 32-bit offsets, but addressing performed by the CPU
// works modulo 2^32. this means that it doesn't matter whether we go forward 3GB, or back 1GB -
// the resulting address will be the same.
// we therefore carry out all calculations using *unsigned* 32-bit integers, because they have
// natural overflow/underflow behaviour, and do *not* invoke undefined behaviour like signed integers.
switch (type)
{
case coff::Relocation::Type::RELATIVE:
#if LC_64_BIT
case coff::Relocation::Type::RELATIVE_OFFSET_1:
case coff::Relocation::Type::RELATIVE_OFFSET_2:
case coff::Relocation::Type::RELATIVE_OFFSET_3:
case coff::Relocation::Type::RELATIVE_OFFSET_4:
case coff::Relocation::Type::RELATIVE_OFFSET_5:
#endif
{
// relative relocations are used for e.g. JMP and CALL instructions and are relative to the address
// of the next instruction.
// example:
// 00015DAA E8 1E B8 FF FF call _printf(0115CDh)
// the CALL instruction sits at 0x00015DAA and calls printf at 0x0115CD, but this is *not* the address
// encoded in the CALL instruction. the encoded relative address is 0xFFFFB81E, which is -18402.
// adding 0xFFFFB81E to 0x00015DAA + 5 (the address of the next instruction!) yields 0x0115CD.
// NOTE: the relocation points to the address of the *relocation*, not the beginning of
// the *instruction* (hence we add 4, not 5).
const uint32_t rva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva);
dstRva = relocationRva + rva + 4ull + coff::Relocation::Type::GetByteDistance(type);
}
break;
case coff::Relocation::Type::SECTION_RELATIVE:
{
// section-relative relocations are used for thread-local storage, e.g. accessing __declspec(thread)
// variables.
// example:
// 00016845 A1 14 35 02 00 mov eax, dword ptr[_tls_index(023514h)]
// 0001684A 64 8B 0D 2C 00 00 00 mov ecx, dword ptr fs:[2Ch]
// 00016851 8B 14 81 mov edx, dword ptr[ecx + eax*4]
// 00016854 8B 82 04 01 00 00 mov eax, dword ptr[edx + 104h]
// the code accesses a global variable in thread-local storage, which happens relative to the
// .tls section. the section-relative offset of the variable in question is 0x104, and the relocation
// directly stores this offset (0x00000104 in the last line).
// grab RVA of the symbol's section
const ImmutableString& sectionName = coff::GetTlsSectionName();
const symbols::ImageSection* section = symbols::FindImageSectionByName(imageSectionDb, sectionName);
if (!section)
{
LC_ERROR_DEV("Cannot find section %s in image", sectionName.c_str());
continue;
}
// the relocation itself is 32-bit, always positive
dstRva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva) + section->rva;
}
break;
case coff::Relocation::Type::VA_32:
{
#if LC_64_BIT
// an absolute 32-bit virtual address cannot exist in a 64-bit image, otherwise the .exe/.dll could
// not be loaded into the upper 32-bits of the address space.
LC_ERROR_DEV("Ignoring relocation of type %s (%d)", coff::Relocation::Type::ToString(type), type);
continue;
#else
// direct virtual addresses are used for accessing e.g. global symbols, string literals.
// the instruction directly stores the absolute address of the symbol in question.
// example:
// 00015DA5 68 9C 11 02 00 push 2119Ch
// this pushes the absolute address of a string literal to the stack. the address encoded
// in the opcode is 0x0002119C, which is the direct address of the string literal in memory.
dstRva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva) - imageBase;
#endif
}
break;
case coff::Relocation::Type::RVA_32:
{
// in 32-bit, this type of relocation is only used for .debug and .rsrc (resource) sections.
// the latter are only needed by the linker in order to know where to place resources in the executable.
// in 64-bit, this type of relocation is used for addressing exception-relevant functions and data,
// and seldomly for accessing data at an absolute offset to the image base, e.g.
// mov rcx,qword ptr [r8+rcx*8+1771060h]
// r8 stores the image base, 1771060h is the value of the RVA_32 relocation.
dstRva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva);
}
break;
#if LC_64_BIT
case coff::Relocation::Type::VA_64:
{
// direct virtual addresses are used for accessing e.g. global symbols, same as on 32-bit
dstRva = executable::ReadFromImage<uint64_t>(image, imageSections, relocationRva) - imageBase;
}
break;
#endif
case coff::Relocation::Type::UNKNOWN:
default:
LC_ERROR_DEV("Unknown relocation type %s (%d)", coff::Relocation::Type::ToString(type), type);
break;
}
// the original relocation might have been applied to the symbol at a certain offset.
// subtract that offset (if any) to arrive at the symbol's original RVA.
dstRva -= relocation->dstOffset;
if (dstRva == 0u)
{
// this was reconstructed from a stripped COMDAT symbol that is referenced by an .obj where it
// wasn't stripped (e.g. an .obj contained in a .lib).
continue;
}
if (dstRva > imageSize)
{
// the RVA underflowed somewhere (the unsigned int would then surely be larger than 2 GB),
// or the RVA lies outside the module.
LC_ERROR_DEV("Detected wrong RVA 0x%X: Relocation %s (%d) from %s to %s in file %s",
dstRva,
coff::Relocation::Type::ToString(type), type,
srcSymbolName.c_str(), dstSymbolName.c_str(), objPath.c_str());
LC_ERROR_DEV("Source symbol at 0x%X", srcSymbol->rva);
LC_ERROR_DEV("Relocation srcRva: 0x%X, dstOffset: 0x%X", relocation->srcRva, relocation->dstOffset);
continue;
}
// at this point, the RVA itself must fit into 32-bit, even in 64-bit
uint32_t dstRva32 = static_cast<uint32_t>(dstRva);
// when incremental linking is enabled, the linker links function calls against "@ILT+offset" thunks rather
// than the real function address. we can follow these thunks and get the function's real RVA.
const uint32_t thunkTarget = symbols::FindThunkTargetByRVA(thunkDb, dstRva32);
if (thunkTarget != 0u)
{
// the real destination RVA is at the thunk's target
dstRva32 = thunkTarget;
}
// we found a new symbol, add it to the database
symbols::Symbol* newSymbol = LC_NEW(&g_symbolAllocator, symbols::Symbol) { dstSymbolName, dstRva32 };
LC_LOG_DEV("Found new symbol %s at RVA 0x%X", dstSymbolName.c_str(), dstRva32);
symbolDB->symbolsByName.emplace(dstSymbolName, newSymbol);
symbolDB->symbolsByRva.emplace(dstRva32, newSymbol);
// walk the relocations of the new symbol as well
const coff::Symbol* nextSymbol = coff::GetSymbolByIndex(coffDb, relocation->dstSymbolNameIndex);
if (nextSymbol)
{
openSymbols.push_back(nextSymbol);
}
--unknownSymbolsToFind;
// did we already find all symbols?
if (unknownSymbolsToFind == 0u)
{
LC_LOG_DEV("All symbols known, exiting");
return;
}
}
walkedAlready.insert(symbol);
}
// there are no more symbols to walk, but we haven't found all of them yet.
// we can try finding the remaining symbols by matching their sections to sections in the PE image.
// sections with the same name across several .obj files get merged into one section in the image, which makes it
// a bit harder to find the address of an .obj's section in the image.
// in order to do this, we find the section in question inside the image, and gather all different contributions
// to this section. for each contribution, we then check whether its size matches the one in the .obj, and whether it
// originated from the .obj in question.
// if both match, we can finally check the symbol's names to ensure that we found the correct contribution.
// from there, we can calculate the symbol's section-relative offset and reconstruct its RVA.
// start by gathering all static functions and symbols which haven't been found already.
LC_LOG_DEV("Reconstructing symbol RVAs from executable contributions");
LC_LOG_INDENT_DEV;
types::vector<const coff::Symbol*> missingSymbols;
missingSymbols.reserve(unknownSymbolsToFind);
{
const size_t count = coffDb->symbols.size();
for (size_t i = 0u; i < count; ++i)
{
const coff::Symbol* symbol = coffDb->symbols[i];
const ImmutableString& symbolName = coff::GetSymbolName(coffDb, symbol);
if (strippedSymbols.find(symbolName) != strippedSymbols.end())
{
// the missing symbol is one we stripped
continue;
}
// only static symbols can be missing, all others need to be known already
if ((symbol->type == coff::SymbolType::STATIC_FUNCTION) ||
(symbol->type == coff::SymbolType::STATIC_DATA))
{
const symbols::Symbol* srcSymbol = symbols::FindSymbolByName(symbolDB, symbolName);
if (srcSymbol)
{
// found already, nothing more to do
LC_LOG_DEV("Ignoring known symbol \"%s\"", symbolName.c_str());
continue;
}
else if (symbols::IsRuntimeCheckRelatedSymbol(symbolName))
{
// code for runtime checks is always compiled into an .obj and doesn't need to be patched, and therefore
// there's no need to find all the symbols
LC_LOG_DEV("Ignoring runtime-check-related symbol \"%s\"", symbolName.c_str());
continue;
}
else if (symbols::IsControlFlowGuardRelatedSymbol(symbolName))
{
// control flow guard stores function identifiers in separate symbols in .gfids$y section, which is not
// an explicit section in the executable, and therefore cannot be found.
// this is of no interest to us anyway, because we disable CFG.
LC_LOG_DEV("Ignoring control flow guard-related symbol \"%s\"", symbolName.c_str());
continue;
}
else if (symbols::IsExceptionRelatedSymbol(symbolName))
{
// even though exception-related symbols such as unwind tables and handlers are never patched or relocated
// by us, catch clauses will refer to function and data symbols, and some of them could be stripped by us.
// we therefore need to reconstruct these symbols as well.
// we could also try reconstructing all exception-related symbols, but that has a serious impact on
// performance!
if (!symbols::IsExceptionClauseSymbol(symbolName))
{
// no exception clause, hence we're really not interested
continue;
}
}
missingSymbols.push_back(symbol);
}
else
{
// externally visible COMDAT symbols might not be known at this point, but will be found in one of
// the OBJ files eventually. this is not an error.
// ??$__vcrt_va_start_verify_argument_type@QBD@@YAXXZ is probably the most prominent example of where
// this happens all the time.
}
}
}
// next try finding the missing symbols
const size_t missingSymbolCount = missingSymbols.size();
for (size_t i = 0u; i < missingSymbolCount; ++i)
{
const coff::Symbol* symbol = missingSymbols[i];
// if we are in our second pass (or later), check whether we tried reconstructing this symbol already
if (pass > 0u)
{
const auto it = triedReconstructingAlready.find(symbol);
if (it != triedReconstructingAlready.end())
{
// tried already
continue;
}
}
triedReconstructingAlready.insert(symbol);
const ImmutableString& missingSymbolName = coff::GetSymbolName(coffDb, symbol);
const coff::Section& coffSection = coffDb->sections[symbol->sectionIndex];
LC_LOG_DEV("Trying to find RVA for static symbol %s in section %s", missingSymbolName.c_str(), coffSection.name.c_str());
LC_LOG_INDENT_DEV;
// the address of the symbol relative to the COFF section it's defined in, e.g.:
// .bss at COFF RVA 1000
// symbol0 at COFF RVA 1000, at section relative addr. 0
// symbol1 at COFF RVA 1004, at section relative addr. 4
// symbol2 at COFF RVA 1008, at section relative addr. 8
const uint32_t sectionRelativeAddress = symbol->rva - coffSection.rawDataRva;
// find this section in the image
const symbols::ImageSection* imageSection = symbols::FindImageSectionByName(imageSectionDb, coffSection.name);
if (!imageSection)
{
LC_ERROR_DEV("Cannot find image section %s", coffSection.name.c_str());
continue;
}
const uint32_t startOfImageSection = imageSection->rva;
const uint32_t endOfImageSection = startOfImageSection + imageSection->size;
// walk all contributions that are part of the image section.
// fetch all potential contributions, they might be ambiguous at first because we delay
// costly checks as far as possible.
types::vector<const symbols::Contribution*> potentialContributions;
potentialContributions.reserve(1024u);
auto contributionIt = std::lower_bound(contributionDb->contributions.begin(), contributionDb->contributions.end(), startOfImageSection, &HasLowerRVA);
while (contributionIt != contributionDb->contributions.end())
{
const symbols::Contribution* contribution = *contributionIt;
++contributionIt;
if (contribution->rva >= endOfImageSection)
{
// no more contributions that belong to this section
break;
}
if (contribution->size != coffSection.rawDataSize)
{
// section size does not match
continue;
}
else if (sectionRelativeAddress >= contribution->size)
{
// the symbol cannot be part of this contributing section because it is not large enough
continue;
}
else if (symbols::GetContributionCompilandName(compilandDb, contributionDb, contribution) != objPath)
{
// the section contribution originated from a different .obj file
continue;
}
else
{
// this is a potential contribution, store it for now
potentialContributions.push_back(contribution);
}
}
const symbols::Contribution* foundContribution = nullptr;
const size_t potentialContributionsCount = potentialContributions.size();
bool ambiguous = false;
if (potentialContributionsCount == 0u)
{
// absolutely no contribution found that matches file and size.
// this symbol/contribution has been stripped out by the linker, so don't report an error.
continue;
}
else
{
LC_LOG_DEV("Found %u candidate(s)", potentialContributionsCount);
// there are one or more potential contributions, filter them using the symbols' names.
// filtering is done by checking whether the undecorated name of the contribution's symbol is
// part of the undecorated name of the COFF symbol.
// note that we cannot do that with mangled names, because the PDB doesn't hold them for static symbols,
// and they cannot be generated from undecorated ones in all cases, e.g. for symbols in an anonymous namespace.
const std::string& coffUndecoratedName = symbols::UndecorateSymbolName(missingSymbolName);
for (auto it = potentialContributions.begin(); it != potentialContributions.end(); ++it)
{
const symbols::Contribution* contribution = *it;
// if there already is at least one symbol that spans the potential symbol's range,
// this contribution cannot be the correct one.
const Symbol* potentialSymbol = symbols::FindSymbolByRVA(symbolDB, contribution->rva);
if (potentialSymbol)
{
const uint32_t rangeStart = potentialSymbol->rva;
IDiaSymbol* diaSymbol = dia::FindSymbolByRVA(provider->diaSession, contribution->rva);
if (diaSymbol)
{
const uint32_t potentialSymbolSize = dia::GetSymbolSize(diaSymbol);
diaSymbol->Release();
const uint32_t rangeEnd = rangeStart + potentialSymbolSize;
const uint32_t potentialRva = contribution->rva + sectionRelativeAddress;
if (potentialRva >= rangeStart && potentialRva < rangeEnd)
{
// there already is a symbol that spans the potential RVA's range.
// however, for certain symbols such as exception clauses, this is OK.
if (!symbols::IsExceptionClauseSymbol(missingSymbolName))
{
continue;
}
}
}
}
const uint32_t rva = contribution->rva + sectionRelativeAddress;
// try to find the symbol in our local cache first
std::wstring diaName;
{
const auto diaSymbolIt = diaSymbolCache->find(rva);
if (diaSymbolIt == diaSymbolCache->end())
{
// the symbol could not be found in our cache, now try the PDB and store
// the lookup into the cache.
// exception clauses are labels stored as children of functions.
IDiaSymbol* diaSymbol = symbols::IsExceptionClauseSymbol(missingSymbolName)
? dia::FindLabelByRva(provider->diaSession, rva)
: dia::FindSymbolByRVA(provider->diaSession, rva);
if (diaSymbol)
{
const dia::SymbolName& diaSymbolName = dia::GetSymbolName(diaSymbol);
diaName = diaSymbolName.GetString();
diaSymbol->Release();
}
diaSymbolCache->insert(std::make_pair(rva, diaName));
}
else
{
// the symbol is in the cache, grab its name
diaName = diaSymbolIt->second;
}
}
if (diaName.length() == 0u)
{
// could not find the correct symbol, skip this contribution
continue;
}
if (!string::Contains(string::ToWideString(coffUndecoratedName).c_str(), diaName.c_str()))
{
// names don't match, skip this contribution
continue;
}
// possible candidate
if (foundContribution)
{
// there is already a candidate, which means that resolution was ambiguous
ambiguous = true;
break;
}
foundContribution = contribution;
}
}
if (ambiguous)
{
LC_ERROR_DEV("Contributions for symbol %s are ambiguous", missingSymbolName.c_str());
continue;
}
// did we find a match?
if (foundContribution)
{
const uint32_t rva = foundContribution->rva + sectionRelativeAddress;
LC_LOG_DEV("Found symbol %s at 0x%X", missingSymbolName.c_str(), rva);
symbols::Symbol* newSymbol = LC_NEW(&g_symbolAllocator, symbols::Symbol) { missingSymbolName, rva };
symbolDB->symbolsByName.emplace(missingSymbolName, newSymbol);
symbolDB->symbolsByRva.emplace(rva, newSymbol);
openSymbols.push_back(symbol);
--unknownSymbolsToFind;
// did we already find all symbols?
if (unknownSymbolsToFind == 0u)
{
LC_LOG_DEV("All symbols known, exiting");
return;
}
}
else
{
// if we had potential candidates but could not find a symbol, there is still a possibility that the
// symbol has been stripped by the linker due to the /Gw option that puts data symbols into separate
// sections. this happens in ComplexClassGlobal.cpp in our test cases as well.
LC_WARNING_DEV("Could not find symbol %s in compiland %s, possibly stripped by linker",
coff::GetSymbolName(coffDb, symbol).c_str(),
objPath.c_str());
}
}
if (openSymbols.size() != 0u)
{
// we found new symbols to walk, so do another pass
LC_LOG_DEV("Doing another pass");
++pass;
goto walkOpenSymbols;
}
}