Files
UnrealEngineUWP/Engine/Source/Developer/Windows/LiveCodingServer/Private/External/LC_SymbolReconstruction.cpp
Chris Gagnon 00f8b72a0b Merging //UE4/Dev-Main to Dev-Editor (//UE4/Dev-Editor)
#rb none

[CL 5602170 by Chris Gagnon in Dev-Editor branch]
2019-03-27 15:03:08 -04:00

766 lines
29 KiB
C++

// Copyright 2011-2019 Molecular Matters GmbH, all rights reserved.
#include "LC_SymbolReconstruction.h"
#include "LC_Allocators.h"
#include "LC_StringUtil.h"
#include "LC_PointerUtil.h"
#include "LC_NameMangling.h"
#include "LC_DiaUtil.h"
namespace
{
static inline bool HasLowerRVA(const symbols::Contribution* lhs, uint32_t rva)
{
return lhs->rva < rva;
}
}
void symbols::ReconstructFromExecutableCoff
(
const symbols::Provider* provider,
const executable::Image* image,
const executable::ImageSectionDB* imageSections,
const coff::CoffDB* coffDb,
const types::StringSet& strippedSymbols,
const symbols::ObjPath& objPath,
const symbols::CompilandDB* compilandDb,
const symbols::ContributionDB* contributionDb,
const symbols::ThunkDB* thunkDb,
const symbols::ImageSectionDB* imageSectionDb,
symbols::SymbolDB* symbolDB,
DiaSymbolCache* diaSymbolCache
)
{
const executable::PreferredBase imageBase = executable::GetPreferredBase(image);
const uint32_t imageSize = executable::GetSize(image);
LC_LOG_DEV("Gathering symbols from COFF file %s", objPath.c_str());
LC_LOG_INDENT_DEV;
LC_LOG_DEV("Symbols in COFF: %d", coffDb->symbols.size());
LC_LOG_DEV("Symbols stripped: %d", strippedSymbols.size());
// gather symbols by following relocation "paths", backtracking from the location in the executable
// to the symbol's origin RVA. our starting entry paths are the functions and data of which we already
// know the name and RVA.
size_t unknownSymbolsToFind = 0u;
types::vector<const coff::Symbol*> openSymbols;
openSymbols.reserve(coffDb->symbols.size());
{
const size_t count = coff::GetIndexCount(coffDb);
for (size_t i = 0u; i < count; ++i)
{
// do we have a symbol at that index?
const coff::Symbol* symbol = coff::GetSymbolByIndex(coffDb, i);
if (symbol)
{
// yes, so check whether this symbol is known already
const ImmutableString& symbolName = coff::GetSymbolName(coffDb, symbol);
const symbols::Symbol* srcSymbol = symbols::FindSymbolByName(symbolDB, symbolName);
if (srcSymbol)
{
LC_LOG_DEV("Known symbol %s at 0x%X", symbolName.c_str(), srcSymbol->rva);
openSymbols.push_back(symbol);
}
else if (strippedSymbols.find(symbolName) != strippedSymbols.end())
{
LC_LOG_DEV("Stripped symbol %s", symbolName.c_str());
}
else
{
LC_LOG_DEV("Unknown symbol %s", symbolName.c_str());
++unknownSymbolsToFind;
}
}
else
{
// we do not have a symbol stored in the COFF, because it might be external/unresolved.
// if so, chances are very high that this symbol is already known publicly.
const ImmutableString& symbolName = coff::GetUnresolvedSymbolName(coffDb, i);
if (symbolName.GetLength() == 0u)
{
continue;
}
if (symbols::FindSymbolByName(symbolDB, symbolName))
{
LC_LOG_DEV("Publicly known symbol %s", symbolName.c_str());
}
else if (!coff::IsInterestingSymbol(symbolName))
{
// relocations to those symbols are not stored in the COFF, hence we
// can not reconstruct these anyway
LC_LOG_DEV("Non-interesting symbol %s", symbolName.c_str());
}
else if (symbols::IsImageBaseRelatedSymbol(symbolName))
{
LC_LOG_DEV("Linker-generated image base symbol %s", symbolName.c_str());
}
else if (symbols::IsTlsArrayRelatedSymbol(symbolName))
{
LC_LOG_DEV("Compiler-generated symbol %s", symbolName.c_str());
}
else if (strippedSymbols.find(symbolName) != strippedSymbols.end())
{
LC_LOG_DEV("Stripped symbol %s", symbolName.c_str());
}
else if (symbolDB->symbolsWithoutRva.find(symbolName) != symbolDB->symbolsWithoutRva.end())
{
// ignore symbols without an RVA. those are often generated by the compiler or linker,
// are being relocated to, but store absolute values encoded in their offset in the PDB.
LC_LOG_DEV("Compiler- or linker-generated symbol %s without an RVA", symbolName.c_str());
}
else
{
LC_LOG_DEV("Unknown unresolved symbol %s", symbolName.c_str());
++unknownSymbolsToFind;
}
}
}
}
LC_LOG_DEV("Unknown symbols left to find: %d", unknownSymbolsToFind);
// do we already know all symbols?
if (unknownSymbolsToFind == 0u)
{
LC_LOG_DEV("Know all symbols already, nothing to do");
return;
}
// keep walking relocations of all open symbols to determine the RVA of symbols contained in this .obj
types::unordered_set<const coff::Symbol*> walkedAlready;
types::unordered_set<const coff::Symbol*> triedReconstructingAlready;
unsigned int pass = 0u;
walkOpenSymbols:
while (openSymbols.size() > 0u)
{
const coff::Symbol* symbol = openSymbols.back();
openSymbols.pop_back();
// check whether we walked this symbol already
const auto it = walkedAlready.find(symbol);
if (it != walkedAlready.end())
{
// handled already, nothing more to do
continue;
}
// check whether the symbol is actually the one that contributed its code.
// in case of COMDATs available in both executable and static libraries, this might not
// be true and would lead to completely wrong symbols being reconstructed.
const ImmutableString& srcSymbolName = coff::GetSymbolName(coffDb, symbol);
const symbols::Symbol* srcSymbol = symbols::FindSymbolByName(symbolDB, srcSymbolName);
if (srcSymbol)
{
const symbols::Contribution* symbolContribution = symbols::FindContributionByRVA(contributionDb, srcSymbol->rva);
if (symbolContribution)
{
const ImmutableString& contributingCompiland = symbols::GetContributionCompilandName(compilandDb, contributionDb, symbolContribution);
if (contributingCompiland != objPath)
{
LC_LOG_DEV("Not walking symbol %s from contribution in different file %s", srcSymbolName.c_str(), contributingCompiland.c_str());
continue;
}
}
}
LC_LOG_DEV("Walking relocations of symbol %s", srcSymbolName.c_str());
LC_LOG_INDENT_DEV;
const size_t relocationCount = symbol->relocations.size();
for (size_t i = 0u; i < relocationCount; ++i)
{
const coff::Relocation* relocation = symbol->relocations[i];
const ImmutableString& dstSymbolName = coff::GetRelocationDstSymbolName(coffDb, relocation);
// the symbol we are looking for might already be in the database because of the public symbols gathered from the PDB
if (symbols::FindSymbolByName(symbolDB, dstSymbolName))
{
LC_LOG_DEV("Publicly known symbol %s", dstSymbolName.c_str());
// we know this symbol already, but we might not have walked its relocations yet.
// add it to the list and continue.
const coff::Symbol* nextSymbol = coff::GetSymbolByIndex(coffDb, relocation->dstSymbolNameIndex);
if (nextSymbol)
{
openSymbols.push_back(nextSymbol);
}
continue;
}
else if (strippedSymbols.find(dstSymbolName) != strippedSymbols.end())
{
// the relocation points to a symbol we should ignore
LC_LOG_DEV("Ignoring stripped symbol \"%s\"", dstSymbolName.c_str());
continue;
}
else if (symbols::IsImageBaseRelatedSymbol(dstSymbolName))
{
// the linker-generated __ImageBase always sits at RVA zero, and relocations should never be patched
LC_LOG_DEV("Ignoring destination symbol \"%s\"", dstSymbolName.c_str());
continue;
}
else if (symbols::IsTlsArrayRelatedSymbol(dstSymbolName))
{
// compiler-generated symbols such as __tls_array don't have any RVA, because they always reside at
// the same address, e.g. relative to a segment register.
// one such example would be how thread-local storage variables are accessed:
// the generated code always fetches the flat address of the thread-local storage array from the TEB (https://en.wikipedia.org/wiki/Win32_Thread_Information_Block).
// the TEB itself can be accessed using segment register FS on x86, and GS on x64, so one of the first instructions of thread-local storage access is always going to
// access the member at 0x2C/0x58 relative to FS/GS, e.g.:
// mov eax, dword ptr fs:0x2C (x86)
// mov rax, qword ptr gs:0x58 (x64)
// therefore, the "RVA" of __tls_array is 0x2C (x86) or 0x58 (x64).
// see http://www.nynaeve.net/?p=180 for more in-depth information about thread-local storage on Windows.
// NOTE: we do need the RVA of __tls_index because that is used to set the data segment register to the
// table used for accessing TLS variables.
LC_LOG_DEV("Ignoring destination symbol \"%s\"", dstSymbolName.c_str());
continue;
}
if (!srcSymbol)
{
LC_ERROR_DEV("Cannot find source symbol %s (%s)",
srcSymbolName.c_str(),
nameMangling::UndecorateSymbol(srcSymbolName.c_str(), 0u).c_str());
continue;
}
const coff::Relocation::Type::Enum type = relocation->type;
// the relocation's RVA is relative to the start of the function, and the executable already has all relocations
// resolved. hence we can backtrack the RVA of the destination symbol by peeking into the executable's code
// at the address of the relocation.
const uint32_t relocationRva = srcSymbol->rva + relocation->srcRva;
// check for invalid RVAs before trying to reconstruct the symbol.
// these can occur when a COMDAT gets stripped in an .obj, but is needed by an .obj coming from a library.
// the COMDAT will then be stripped from the executable, so we shouldn't try reconstructing it.
{
#if LC_64_BIT
if (type == coff::Relocation::Type::VA_64)
{
const uint64_t rvaInCode = executable::ReadFromImage<uint64_t>(image, imageSections, relocationRva);
if (rvaInCode == 0u)
{
continue;
}
}
else
#endif
{
const uint32_t rvaInCode = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva);
if (rvaInCode == 0u)
{
continue;
}
}
}
// even though the final RVA can only be 32-bit because no image can ever be larger than 4GB, intermediate results
// can point to addresses in the full 64-bit address space.
#if LC_64_BIT
uint64_t dstRva = 0u;
#else
uint32_t dstRva = 0u;
#endif
// backtrack to the real RVA of the destination symbol depending on the type of relocation.
// 32-BIT NOTE: relative addresses are signed 32-bit offsets, but addressing performed by the CPU
// works modulo 2^32. this means that it doesn't matter whether we go forward 3GB, or back 1GB -
// the resulting address will be the same.
// we therefore carry out all calculations using *unsigned* 32-bit integers, because they have
// natural overflow/underflow behaviour, and do *not* invoke undefined behaviour like signed integers.
switch (type)
{
case coff::Relocation::Type::RELATIVE:
#if LC_64_BIT
case coff::Relocation::Type::RELATIVE_OFFSET_1:
case coff::Relocation::Type::RELATIVE_OFFSET_2:
case coff::Relocation::Type::RELATIVE_OFFSET_3:
case coff::Relocation::Type::RELATIVE_OFFSET_4:
case coff::Relocation::Type::RELATIVE_OFFSET_5:
#endif
{
// relative relocations are used for e.g. JMP and CALL instructions and are relative to the address
// of the next instruction.
// example:
// 00015DAA E8 1E B8 FF FF call _printf(0115CDh)
// the CALL instruction sits at 0x00015DAA and calls printf at 0x0115CD, but this is *not* the address
// encoded in the CALL instruction. the encoded relative address is 0xFFFFB81E, which is -18402.
// adding 0xFFFFB81E to 0x00015DAA + 5 (the address of the next instruction!) yields 0x0115CD.
// NOTE: the relocation points to the address of the *relocation*, not the beginning of
// the *instruction* (hence we add 4, not 5).
const uint32_t rva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva);
dstRva = relocationRva + rva + 4ull + coff::Relocation::Type::GetByteDistance(type);
}
break;
case coff::Relocation::Type::SECTION_RELATIVE:
{
// section-relative relocations are used for thread-local storage, e.g. accessing __declspec(thread)
// variables.
// example:
// 00016845 A1 14 35 02 00 mov eax, dword ptr[_tls_index(023514h)]
// 0001684A 64 8B 0D 2C 00 00 00 mov ecx, dword ptr fs:[2Ch]
// 00016851 8B 14 81 mov edx, dword ptr[ecx + eax*4]
// 00016854 8B 82 04 01 00 00 mov eax, dword ptr[edx + 104h]
// the code accesses a global variable in thread-local storage, which happens relative to the
// .tls section. the section-relative offset of the variable in question is 0x104, and the relocation
// directly stores this offset (0x00000104 in the last line).
// grab RVA of the symbol's section
const ImmutableString& sectionName = coff::GetTlsSectionName();
const symbols::ImageSection* section = symbols::FindImageSectionByName(imageSectionDb, sectionName);
if (!section)
{
LC_ERROR_DEV("Cannot find section %s in image", sectionName.c_str());
continue;
}
// the relocation itself is 32-bit, always positive
dstRva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva) + section->rva;
}
break;
case coff::Relocation::Type::VA_32:
{
#if LC_64_BIT
// an absolute 32-bit virtual address cannot exist in a 64-bit image, otherwise the .exe/.dll could
// not be loaded into the upper 32-bits of the address space.
LC_ERROR_DEV("Ignoring relocation of type %s (%d)", coff::Relocation::Type::ToString(type), type);
continue;
#else
// direct virtual addresses are used for accessing e.g. global symbols, string literals.
// the instruction directly stores the absolute address of the symbol in question.
// example:
// 00015DA5 68 9C 11 02 00 push 2119Ch
// this pushes the absolute address of a string literal to the stack. the address encoded
// in the opcode is 0x0002119C, which is the direct address of the string literal in memory.
dstRva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva) - imageBase;
#endif
}
break;
case coff::Relocation::Type::RVA_32:
{
// in 32-bit, this type of relocation is only used for .debug and .rsrc (resource) sections.
// the latter are only needed by the linker in order to know where to place resources in the executable.
// in 64-bit, this type of relocation is used for addressing exception-relevant functions and data,
// and seldomly for accessing data at an absolute offset to the image base, e.g.
// mov rcx,qword ptr [r8+rcx*8+1771060h]
// r8 stores the image base, 1771060h is the value of the RVA_32 relocation.
dstRva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva);
}
break;
#if LC_64_BIT
case coff::Relocation::Type::VA_64:
{
// direct virtual addresses are used for accessing e.g. global symbols, same as on 32-bit
dstRva = executable::ReadFromImage<uint64_t>(image, imageSections, relocationRva) - imageBase;
}
break;
#endif
case coff::Relocation::Type::UNKNOWN:
default:
LC_ERROR_DEV("Unknown relocation type %s (%d)", coff::Relocation::Type::ToString(type), type);
break;
}
// the original relocation might have been applied to the symbol at a certain offset.
// subtract that offset (if any) to arrive at the symbol's original RVA.
dstRva -= relocation->dstOffset;
if (dstRva == 0u)
{
// this was reconstructed from a stripped COMDAT symbol that is referenced by an .obj where it
// wasn't stripped (e.g. an .obj contained in a .lib).
continue;
}
if (dstRva > imageSize)
{
// the RVA underflowed somewhere (the unsigned int would then surely be larger than 2 GB),
// or the RVA lies outside the module.
LC_ERROR_DEV("Detected wrong RVA 0x%X: Relocation %s (%d) from %s to %s in file %s",
dstRva,
coff::Relocation::Type::ToString(type), type,
srcSymbolName.c_str(), dstSymbolName.c_str(), objPath.c_str());
LC_ERROR_DEV("Source symbol at 0x%X", srcSymbol->rva);
LC_ERROR_DEV("Relocation srcRva: 0x%X, dstOffset: 0x%X", relocation->srcRva, relocation->dstOffset);
continue;
}
// at this point, the RVA itself must fit into 32-bit, even in 64-bit
uint32_t dstRva32 = static_cast<uint32_t>(dstRva);
// when incremental linking is enabled, the linker links function calls against "@ILT+offset" thunks rather
// than the real function address. we can follow these thunks and get the function's real RVA.
const uint32_t thunkTarget = symbols::FindThunkTargetByRVA(thunkDb, dstRva32);
if (thunkTarget != 0u)
{
// the real destination RVA is at the thunk's target
dstRva32 = thunkTarget;
}
// we found a new symbol, add it to the database
symbols::Symbol* newSymbol = LC_NEW(&g_symbolAllocator, symbols::Symbol) { dstSymbolName, dstRva32 };
LC_LOG_DEV("Found new symbol %s at RVA 0x%X", dstSymbolName.c_str(), dstRva32);
symbolDB->symbolsByName.emplace(dstSymbolName, newSymbol);
symbolDB->symbolsByRva.emplace(dstRva32, newSymbol);
// walk the relocations of the new symbol as well
const coff::Symbol* nextSymbol = coff::GetSymbolByIndex(coffDb, relocation->dstSymbolNameIndex);
if (nextSymbol)
{
openSymbols.push_back(nextSymbol);
}
--unknownSymbolsToFind;
// did we already find all symbols?
if (unknownSymbolsToFind == 0u)
{
LC_LOG_DEV("All symbols known, exiting");
return;
}
}
walkedAlready.insert(symbol);
}
// there are no more symbols to walk, but we haven't found all of them yet.
// we can try finding the remaining symbols by matching their sections to sections in the PE image.
// sections with the same name across several .obj files get merged into one section in the image, which makes it
// a bit harder to find the address of an .obj's section in the image.
// in order to do this, we find the section in question inside the image, and gather all different contributions
// to this section. for each contribution, we then check whether its size matches the one in the .obj, and whether it
// originated from the .obj in question.
// if both match, we can finally check the symbol's names to ensure that we found the correct contribution.
// from there, we can calculate the symbol's section-relative offset and reconstruct its RVA.
// start by gathering all static functions and symbols which haven't been found already.
LC_LOG_DEV("Reconstructing symbol RVAs from executable contributions");
LC_LOG_INDENT_DEV;
types::vector<const coff::Symbol*> missingSymbols;
missingSymbols.reserve(unknownSymbolsToFind);
{
const size_t count = coffDb->symbols.size();
for (size_t i = 0u; i < count; ++i)
{
const coff::Symbol* symbol = coffDb->symbols[i];
const ImmutableString& symbolName = coff::GetSymbolName(coffDb, symbol);
if (strippedSymbols.find(symbolName) != strippedSymbols.end())
{
// the missing symbol is one we stripped
continue;
}
// only static symbols can be missing, all others need to be known already
if ((symbol->type == coff::SymbolType::STATIC_FUNCTION) ||
(symbol->type == coff::SymbolType::STATIC_DATA))
{
const symbols::Symbol* srcSymbol = symbols::FindSymbolByName(symbolDB, symbolName);
if (srcSymbol)
{
// found already, nothing more to do
LC_LOG_DEV("Ignoring known symbol \"%s\"", symbolName.c_str());
continue;
}
else if (symbols::IsRuntimeCheckRelatedSymbol(symbolName))
{
// code for runtime checks is always compiled into an .obj and doesn't need to be patched, and therefore
// there's no need to find all the symbols
LC_LOG_DEV("Ignoring runtime-check-related symbol \"%s\"", symbolName.c_str());
continue;
}
else if (symbols::IsControlFlowGuardRelatedSymbol(symbolName))
{
// control flow guard stores function identifiers in separate symbols in .gfids$y section, which is not
// an explicit section in the executable, and therefore cannot be found.
// this is of no interest to us anyway, because we disable CFG.
LC_LOG_DEV("Ignoring control flow guard-related symbol \"%s\"", symbolName.c_str());
continue;
}
else if (symbols::IsExceptionRelatedSymbol(symbolName))
{
// even though exception-related symbols such as unwind tables and handlers are never patched or relocated
// by us, catch clauses will refer to function and data symbols, and some of them could be stripped by us.
// we therefore need to reconstruct these symbols as well.
// we could also try reconstructing all exception-related symbols, but that has a serious impact on
// performance!
if (!symbols::IsExceptionClauseSymbol(symbolName))
{
// no exception clause, hence we're really not interested
continue;
}
}
missingSymbols.push_back(symbol);
}
else
{
// externally visible COMDAT symbols might not be known at this point, but will be found in one of
// the OBJ files eventually. this is not an error.
// ??$__vcrt_va_start_verify_argument_type@QBD@@YAXXZ is probably the most prominent example of where
// this happens all the time.
}
}
}
// next try finding the missing symbols
const size_t missingSymbolCount = missingSymbols.size();
for (size_t i = 0u; i < missingSymbolCount; ++i)
{
const coff::Symbol* symbol = missingSymbols[i];
// if we are in our second pass (or later), check whether we tried reconstructing this symbol already
if (pass > 0u)
{
const auto it = triedReconstructingAlready.find(symbol);
if (it != triedReconstructingAlready.end())
{
// tried already
continue;
}
}
triedReconstructingAlready.insert(symbol);
const ImmutableString& missingSymbolName = coff::GetSymbolName(coffDb, symbol);
const coff::Section& coffSection = coffDb->sections[symbol->sectionIndex];
LC_LOG_DEV("Trying to find RVA for static symbol %s in section %s", missingSymbolName.c_str(), coffSection.name.c_str());
LC_LOG_INDENT_DEV;
// the address of the symbol relative to the COFF section it's defined in, e.g.:
// .bss at COFF RVA 1000
// symbol0 at COFF RVA 1000, at section relative addr. 0
// symbol1 at COFF RVA 1004, at section relative addr. 4
// symbol2 at COFF RVA 1008, at section relative addr. 8
const uint32_t sectionRelativeAddress = symbol->rva - coffSection.rawDataRva;
// find this section in the image
const symbols::ImageSection* imageSection = symbols::FindImageSectionByName(imageSectionDb, coffSection.name);
if (!imageSection)
{
LC_ERROR_DEV("Cannot find image section %s", coffSection.name.c_str());
continue;
}
const uint32_t startOfImageSection = imageSection->rva;
const uint32_t endOfImageSection = startOfImageSection + imageSection->size;
// walk all contributions that are part of the image section.
// fetch all potential contributions, they might be ambiguous at first because we delay
// costly checks as far as possible.
types::vector<const symbols::Contribution*> potentialContributions;
potentialContributions.reserve(1024u);
auto contributionIt = std::lower_bound(contributionDb->contributions.begin(), contributionDb->contributions.end(), startOfImageSection, &HasLowerRVA);
while (contributionIt != contributionDb->contributions.end())
{
const symbols::Contribution* contribution = *contributionIt;
++contributionIt;
if (contribution->rva >= endOfImageSection)
{
// no more contributions that belong to this section
break;
}
if (contribution->size != coffSection.rawDataSize)
{
// section size does not match
continue;
}
else if (sectionRelativeAddress >= contribution->size)
{
// the symbol cannot be part of this contributing section because it is not large enough
continue;
}
else if (symbols::GetContributionCompilandName(compilandDb, contributionDb, contribution) != objPath)
{
// the section contribution originated from a different .obj file
continue;
}
else
{
// this is a potential contribution, store it for now
potentialContributions.push_back(contribution);
}
}
const symbols::Contribution* foundContribution = nullptr;
const size_t potentialContributionsCount = potentialContributions.size();
bool ambiguous = false;
if (potentialContributionsCount == 0u)
{
// absolutely no contribution found that matches file and size.
// this symbol/contribution has been stripped out by the linker, so don't report an error.
continue;
}
else
{
LC_LOG_DEV("Found %u candidate(s)", potentialContributionsCount);
// there are one or more potential contributions, filter them using the symbols' names.
// filtering is done by checking whether the undecorated name of the contribution's symbol is
// part of the undecorated name of the COFF symbol.
// note that we cannot do that with mangled names, because the PDB doesn't hold them for static symbols,
// and they cannot be generated from undecorated ones in all cases, e.g. for symbols in an anonymous namespace.
const std::string& coffUndecoratedName = symbols::UndecorateSymbolName(missingSymbolName);
for (auto it = potentialContributions.begin(); it != potentialContributions.end(); ++it)
{
const symbols::Contribution* contribution = *it;
// if there already is at least one symbol that spans the potential symbol's range,
// this contribution cannot be the correct one.
const Symbol* potentialSymbol = symbols::FindSymbolByRVA(symbolDB, contribution->rva);
if (potentialSymbol)
{
const uint32_t rangeStart = potentialSymbol->rva;
IDiaSymbol* diaSymbol = dia::FindSymbolByRVA(provider->diaSession, contribution->rva);
if (diaSymbol)
{
const uint32_t potentialSymbolSize = dia::GetSymbolSize(diaSymbol);
diaSymbol->Release();
const uint32_t rangeEnd = rangeStart + potentialSymbolSize;
const uint32_t potentialRva = contribution->rva + sectionRelativeAddress;
if (potentialRva >= rangeStart && potentialRva < rangeEnd)
{
// there already is a symbol that spans the potential RVA's range.
// however, for certain symbols such as exception clauses, this is OK.
if (!symbols::IsExceptionClauseSymbol(missingSymbolName))
{
continue;
}
}
}
}
const uint32_t rva = contribution->rva + sectionRelativeAddress;
// try to find the symbol in our local cache first
std::wstring diaName;
{
const auto diaSymbolIt = diaSymbolCache->find(rva);
if (diaSymbolIt == diaSymbolCache->end())
{
// the symbol could not be found in our cache, now try the PDB and store
// the lookup into the cache.
// exception clauses are labels stored as children of functions.
IDiaSymbol* diaSymbol = symbols::IsExceptionClauseSymbol(missingSymbolName)
? dia::FindLabelByRva(provider->diaSession, rva)
: dia::FindSymbolByRVA(provider->diaSession, rva);
if (diaSymbol)
{
const dia::SymbolName& diaSymbolName = dia::GetSymbolName(diaSymbol);
diaName = diaSymbolName.GetString();
diaSymbol->Release();
}
diaSymbolCache->insert(std::make_pair(rva, diaName));
}
else
{
// the symbol is in the cache, grab its name
diaName = diaSymbolIt->second;
}
}
if (diaName.length() == 0u)
{
// could not find the correct symbol, skip this contribution
continue;
}
if (!string::Contains(string::ToWideString(coffUndecoratedName).c_str(), diaName.c_str()))
{
// names don't match, skip this contribution
continue;
}
// possible candidate
if (foundContribution)
{
// there is already a candidate, which means that resolution was ambiguous
ambiguous = true;
break;
}
foundContribution = contribution;
}
}
if (ambiguous)
{
LC_ERROR_DEV("Contributions for symbol %s are ambiguous", missingSymbolName.c_str());
continue;
}
// did we find a match?
if (foundContribution)
{
const uint32_t rva = foundContribution->rva + sectionRelativeAddress;
LC_LOG_DEV("Found symbol %s at 0x%X", missingSymbolName.c_str(), rva);
symbols::Symbol* newSymbol = LC_NEW(&g_symbolAllocator, symbols::Symbol) { missingSymbolName, rva };
symbolDB->symbolsByName.emplace(missingSymbolName, newSymbol);
symbolDB->symbolsByRva.emplace(rva, newSymbol);
openSymbols.push_back(symbol);
--unknownSymbolsToFind;
// did we already find all symbols?
if (unknownSymbolsToFind == 0u)
{
LC_LOG_DEV("All symbols known, exiting");
return;
}
}
else
{
// if we had potential candidates but could not find a symbol, there is still a possibility that the
// symbol has been stripped by the linker due to the /Gw option that puts data symbols into separate
// sections. this happens in ComplexClassGlobal.cpp in our test cases as well.
LC_WARNING_DEV("Could not find symbol %s in compiland %s, possibly stripped by linker",
coff::GetSymbolName(coffDb, symbol).c_str(),
objPath.c_str());
}
}
if (openSymbols.size() != 0u)
{
// we found new symbols to walk, so do another pass
LC_LOG_DEV("Doing another pass");
++pass;
goto walkOpenSymbols;
}
}