Imported Upstream version 6.0.0.172

Former-commit-id: f3cc9b82f3e5bd8f0fd3ebc098f789556b44e9cd
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2019-04-12 14:10:50 +00:00
parent 8016999e4d
commit 64ac736ec5
32155 changed files with 3981439 additions and 75368 deletions

View File

@ -0,0 +1,153 @@
//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h).
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
namespace llvm {
// We sort the stack variables by alignment (largest first) to minimize
// unnecessary large gaps due to alignment.
// It is tempting to also sort variables by size so that larger variables
// have larger redzones at both ends. But reordering will make report analysis
// harder, especially when temporary unnamed variables are present.
// So, until we can provide more information (type, line number, etc)
// for the stack variables we avoid reordering them too much.
static inline bool CompareVars(const ASanStackVariableDescription &a,
const ASanStackVariableDescription &b) {
return a.Alignment > b.Alignment;
}
// We also force minimal alignment for all vars to kMinAlignment so that vars
// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
static const size_t kMinAlignment = 16;
// We want to add a full redzone after every variable.
// The larger the variable Size the larger is the redzone.
// The resulting frame size is a multiple of Alignment.
static size_t VarAndRedzoneSize(size_t Size, size_t Granularity,
size_t Alignment) {
size_t Res = 0;
if (Size <= 4) Res = 16;
else if (Size <= 16) Res = 32;
else if (Size <= 128) Res = Size + 32;
else if (Size <= 512) Res = Size + 64;
else if (Size <= 4096) Res = Size + 128;
else Res = Size + 256;
return alignTo(std::max(Res, 2 * Granularity), Alignment);
}
ASanStackFrameLayout
ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
size_t Granularity, size_t MinHeaderSize) {
assert(Granularity >= 8 && Granularity <= 64 &&
(Granularity & (Granularity - 1)) == 0);
assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 &&
MinHeaderSize >= Granularity);
const size_t NumVars = Vars.size();
assert(NumVars > 0);
for (size_t i = 0; i < NumVars; i++)
Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
std::stable_sort(Vars.begin(), Vars.end(), CompareVars);
ASanStackFrameLayout Layout;
Layout.Granularity = Granularity;
Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment);
size_t Offset = std::max(std::max(MinHeaderSize, Granularity),
Vars[0].Alignment);
assert((Offset % Granularity) == 0);
for (size_t i = 0; i < NumVars; i++) {
bool IsLast = i == NumVars - 1;
size_t Alignment = std::max(Granularity, Vars[i].Alignment);
(void)Alignment; // Used only in asserts.
size_t Size = Vars[i].Size;
assert((Alignment & (Alignment - 1)) == 0);
assert(Layout.FrameAlignment >= Alignment);
assert((Offset % Alignment) == 0);
assert(Size > 0);
size_t NextAlignment = IsLast ? Granularity
: std::max(Granularity, Vars[i + 1].Alignment);
size_t SizeWithRedzone = VarAndRedzoneSize(Size, Granularity,
NextAlignment);
Vars[i].Offset = Offset;
Offset += SizeWithRedzone;
}
if (Offset % MinHeaderSize) {
Offset += MinHeaderSize - (Offset % MinHeaderSize);
}
Layout.FrameSize = Offset;
assert((Layout.FrameSize % MinHeaderSize) == 0);
return Layout;
}
SmallString<64> ComputeASanStackFrameDescription(
const SmallVectorImpl<ASanStackVariableDescription> &Vars) {
SmallString<2048> StackDescriptionStorage;
raw_svector_ostream StackDescription(StackDescriptionStorage);
StackDescription << Vars.size();
for (const auto &Var : Vars) {
std::string Name = Var.Name;
if (Var.Line) {
Name += ":";
Name += to_string(Var.Line);
}
StackDescription << " " << Var.Offset << " " << Var.Size << " "
<< Name.size() << " " << Name;
}
return StackDescription.str();
}
SmallVector<uint8_t, 64>
GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars,
const ASanStackFrameLayout &Layout) {
assert(Vars.size() > 0);
SmallVector<uint8_t, 64> SB;
SB.clear();
const size_t Granularity = Layout.Granularity;
SB.resize(Vars[0].Offset / Granularity, kAsanStackLeftRedzoneMagic);
for (const auto &Var : Vars) {
SB.resize(Var.Offset / Granularity, kAsanStackMidRedzoneMagic);
SB.resize(SB.size() + Var.Size / Granularity, 0);
if (Var.Size % Granularity)
SB.push_back(Var.Size % Granularity);
}
SB.resize(Layout.FrameSize / Granularity, kAsanStackRightRedzoneMagic);
return SB;
}
SmallVector<uint8_t, 64> GetShadowBytesAfterScope(
const SmallVectorImpl<ASanStackVariableDescription> &Vars,
const ASanStackFrameLayout &Layout) {
SmallVector<uint8_t, 64> SB = GetShadowBytes(Vars, Layout);
const size_t Granularity = Layout.Granularity;
for (const auto &Var : Vars) {
assert(Var.LifetimeSize <= Var.Size);
const size_t LifetimeShadowSize =
(Var.LifetimeSize + Granularity - 1) / Granularity;
const size_t Offset = Var.Offset / Granularity;
std::fill(SB.begin() + Offset, SB.begin() + Offset + LifetimeShadowSize,
kAsanStackUseAfterScopeMagic);
}
return SB;
}
} // llvm namespace

View File

@ -0,0 +1,261 @@
//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file adds DWARF discriminators to the IR. Path discriminators are
// used to decide what CFG path was taken inside sub-graphs whose instructions
// share the same line and column number information.
//
// The main user of this is the sample profiler. Instruction samples are
// mapped to line number information. Since a single line may be spread
// out over several basic blocks, discriminators add more precise location
// for the samples.
//
// For example,
//
// 1 #define ASSERT(P)
// 2 if (!(P))
// 3 abort()
// ...
// 100 while (true) {
// 101 ASSERT (sum < 0);
// 102 ...
// 130 }
//
// when converted to IR, this snippet looks something like:
//
// while.body: ; preds = %entry, %if.end
// %0 = load i32* %sum, align 4, !dbg !15
// %cmp = icmp slt i32 %0, 0, !dbg !15
// br i1 %cmp, label %if.end, label %if.then, !dbg !15
//
// if.then: ; preds = %while.body
// call void @abort(), !dbg !15
// br label %if.end, !dbg !15
//
// Notice that all the instructions in blocks 'while.body' and 'if.then'
// have exactly the same debug information. When this program is sampled
// at runtime, the profiler will assume that all these instructions are
// equally frequent. This, in turn, will consider the edge while.body->if.then
// to be frequently taken (which is incorrect).
//
// By adding a discriminator value to the instructions in block 'if.then',
// we can distinguish instructions at line 101 with discriminator 0 from
// the instructions at line 101 with discriminator 1.
//
// For more details about DWARF discriminators, please visit
// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/AddDiscriminators.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include <utility>
using namespace llvm;
#define DEBUG_TYPE "add-discriminators"
// Command line option to disable discriminator generation even in the
// presence of debug information. This is only needed when debugging
// debug info generation issues.
static cl::opt<bool> NoDiscriminators(
"no-discriminators", cl::init(false),
cl::desc("Disable generation of discriminator information."));
namespace {
// The legacy pass of AddDiscriminators.
struct AddDiscriminatorsLegacyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
};
} // end anonymous namespace
char AddDiscriminatorsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
"Add DWARF path discriminators", false, false)
INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
"Add DWARF path discriminators", false, false)
// Create the legacy AddDiscriminatorsPass.
FunctionPass *llvm::createAddDiscriminatorsPass() {
return new AddDiscriminatorsLegacyPass();
}
static bool shouldHaveDiscriminator(const Instruction *I) {
return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
}
/// \brief Assign DWARF discriminators.
///
/// To assign discriminators, we examine the boundaries of every
/// basic block and its successors. Suppose there is a basic block B1
/// with successor B2. The last instruction I1 in B1 and the first
/// instruction I2 in B2 are located at the same file and line number.
/// This situation is illustrated in the following code snippet:
///
/// if (i < 10) x = i;
///
/// entry:
/// br i1 %cmp, label %if.then, label %if.end, !dbg !10
/// if.then:
/// %1 = load i32* %i.addr, align 4, !dbg !10
/// store i32 %1, i32* %x, align 4, !dbg !10
/// br label %if.end, !dbg !10
/// if.end:
/// ret void, !dbg !12
///
/// Notice how the branch instruction in block 'entry' and all the
/// instructions in block 'if.then' have the exact same debug location
/// information (!dbg !10).
///
/// To distinguish instructions in block 'entry' from instructions in
/// block 'if.then', we generate a new lexical block for all the
/// instruction in block 'if.then' that share the same file and line
/// location with the last instruction of block 'entry'.
///
/// This new lexical block will have the same location information as
/// the previous one, but with a new DWARF discriminator value.
///
/// One of the main uses of this discriminator value is in runtime
/// sample profilers. It allows the profiler to distinguish instructions
/// at location !dbg !10 that execute on different basic blocks. This is
/// important because while the predicate 'if (x < 10)' may have been
/// executed millions of times, the assignment 'x = i' may have only
/// executed a handful of times (meaning that the entry->if.then edge is
/// seldom taken).
///
/// If we did not have discriminator information, the profiler would
/// assign the same weight to both blocks 'entry' and 'if.then', which
/// in turn will make it conclude that the entry->if.then edge is very
/// hot.
///
/// To decide where to create new discriminator values, this function
/// traverses the CFG and examines instruction at basic block boundaries.
/// If the last instruction I1 of a block B1 is at the same file and line
/// location as instruction I2 of successor B2, then it creates a new
/// lexical block for I2 and all the instruction in B2 that share the same
/// file and line location as I2. This new lexical block will have a
/// different discriminator number than I1.
static bool addDiscriminators(Function &F) {
// If the function has debug information, but the user has disabled
// discriminators, do nothing.
// Simlarly, if the function has no debug info, do nothing.
if (NoDiscriminators || !F.getSubprogram())
return false;
bool Changed = false;
using Location = std::pair<StringRef, unsigned>;
using BBSet = DenseSet<const BasicBlock *>;
using LocationBBMap = DenseMap<Location, BBSet>;
using LocationDiscriminatorMap = DenseMap<Location, unsigned>;
using LocationSet = DenseSet<Location>;
LocationBBMap LBM;
LocationDiscriminatorMap LDM;
// Traverse all instructions in the function. If the source line location
// of the instruction appears in other basic block, assign a new
// discriminator for this instruction.
for (BasicBlock &B : F) {
for (auto &I : B.getInstList()) {
// Not all intrinsic calls should have a discriminator.
// We want to avoid a non-deterministic assignment of discriminators at
// different debug levels. We still allow discriminators on memory
// intrinsic calls because those can be early expanded by SROA into
// pairs of loads and stores, and the expanded load/store instructions
// should have a valid discriminator.
if (!shouldHaveDiscriminator(&I))
continue;
const DILocation *DIL = I.getDebugLoc();
if (!DIL)
continue;
Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
auto &BBMap = LBM[L];
auto R = BBMap.insert(&B);
if (BBMap.size() == 1)
continue;
// If we could insert more than one block with the same line+file, a
// discriminator is needed to distinguish both instructions.
// Only the lowest 7 bits are used to represent a discriminator to fit
// it in 1 byte ULEB128 representation.
unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator));
DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
<< DIL->getColumn() << ":" << Discriminator << " " << I
<< "\n");
Changed = true;
}
}
// Traverse all instructions and assign new discriminators to call
// instructions with the same lineno that are in the same basic block.
// Sample base profile needs to distinguish different function calls within
// a same source line for correct profile annotation.
for (BasicBlock &B : F) {
LocationSet CallLocations;
for (auto &I : B.getInstList()) {
CallInst *Current = dyn_cast<CallInst>(&I);
// We bypass intrinsic calls for the following two reasons:
// 1) We want to avoid a non-deterministic assigment of
// discriminators.
// 2) We want to minimize the number of base discriminators used.
if (!Current || isa<IntrinsicInst>(&I))
continue;
DILocation *CurrentDIL = Current->getDebugLoc();
if (!CurrentDIL)
continue;
Location L =
std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
if (!CallLocations.insert(L).second) {
unsigned Discriminator = ++LDM[L];
Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator));
Changed = true;
}
}
}
return Changed;
}
bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
return addDiscriminators(F);
}
PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (!addDiscriminators(F))
return PreservedAnalyses::all();
// FIXME: should be all()
return PreservedAnalyses::none();
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,451 @@
//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
// inserting a dummy basic block. This pass may be "required" by passes that
// cannot deal with critical edges. For this usage, the structure type is
// forward declared. This pass obviously invalidates the CFG, but can update
// dominator trees.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
#define DEBUG_TYPE "break-crit-edges"
STATISTIC(NumBroken, "Number of blocks inserted");
namespace {
struct BreakCriticalEdges : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
BreakCriticalEdges() : FunctionPass(ID) {
initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
unsigned N =
SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
NumBroken += N;
return N > 0;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
// No loop canonicalization guarantees are broken by this pass.
AU.addPreservedID(LoopSimplifyID);
}
};
}
char BreakCriticalEdges::ID = 0;
INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
"Break critical edges in CFG", false, false)
// Publicly exposed interface to pass...
char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
FunctionPass *llvm::createBreakCriticalEdgesPass() {
return new BreakCriticalEdges();
}
PreservedAnalyses BreakCriticalEdgesPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
NumBroken += N;
if (N == 0)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
return PA;
}
//===----------------------------------------------------------------------===//
// Implementation of the external critical edge manipulation functions
//===----------------------------------------------------------------------===//
/// When a loop exit edge is split, LCSSA form may require new PHIs in the new
/// exit block. This function inserts the new PHIs, as needed. Preds is a list
/// of preds inside the loop, SplitBB is the new loop exit block, and DestBB is
/// the old loop exit, now the successor of SplitBB.
static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
BasicBlock *SplitBB,
BasicBlock *DestBB) {
// SplitBB shouldn't have anything non-trivial in it yet.
assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() ||
SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!");
// For each PHI in the destination block.
for (PHINode &PN : DestBB->phis()) {
unsigned Idx = PN.getBasicBlockIndex(SplitBB);
Value *V = PN.getIncomingValue(Idx);
// If the input is a PHI which already satisfies LCSSA, don't create
// a new one.
if (const PHINode *VP = dyn_cast<PHINode>(V))
if (VP->getParent() == SplitBB)
continue;
// Otherwise a new PHI is needed. Create one and populate it.
PHINode *NewPN = PHINode::Create(
PN.getType(), Preds.size(), "split",
SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
for (unsigned i = 0, e = Preds.size(); i != e; ++i)
NewPN->addIncoming(V, Preds[i]);
// Update the original PHI.
PN.setIncomingValue(Idx, NewPN);
}
}
BasicBlock *
llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
const CriticalEdgeSplittingOptions &Options) {
if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
return nullptr;
assert(!isa<IndirectBrInst>(TI) &&
"Cannot split critical edge from IndirectBrInst");
BasicBlock *TIBB = TI->getParent();
BasicBlock *DestBB = TI->getSuccessor(SuccNum);
// Splitting the critical edge to a pad block is non-trivial. Don't do
// it in this generic function.
if (DestBB->isEHPad()) return nullptr;
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
// Create our unconditional branch.
BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
NewBI->setDebugLoc(TI->getDebugLoc());
// Branch to the new block, breaking the edge.
TI->setSuccessor(SuccNum, NewBB);
// Insert the block into the function... right after the block TI lives in.
Function &F = *TIBB->getParent();
Function::iterator FBBI = TIBB->getIterator();
F.getBasicBlockList().insert(++FBBI, NewBB);
// If there are any PHI nodes in DestBB, we need to update them so that they
// merge incoming values from NewBB instead of from TIBB.
{
unsigned BBIdx = 0;
for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
// We no longer enter through TIBB, now we come in through NewBB.
// Revector exactly one entry in the PHI node that used to come from
// TIBB to come from NewBB.
PHINode *PN = cast<PHINode>(I);
// Reuse the previous value of BBIdx if it lines up. In cases where we
// have multiple phi nodes with *lots* of predecessors, this is a speed
// win because we don't have to scan the PHI looking for TIBB. This
// happens because the BB list of PHI nodes are usually in the same
// order.
if (PN->getIncomingBlock(BBIdx) != TIBB)
BBIdx = PN->getBasicBlockIndex(TIBB);
PN->setIncomingBlock(BBIdx, NewBB);
}
}
// If there are any other edges from TIBB to DestBB, update those to go
// through the split block, making those edges non-critical as well (and
// reducing the number of phi entries in the DestBB if relevant).
if (Options.MergeIdenticalEdges) {
for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
if (TI->getSuccessor(i) != DestBB) continue;
// Remove an entry for TIBB from DestBB phi nodes.
DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs);
// We found another edge to DestBB, go to NewBB instead.
TI->setSuccessor(i, NewBB);
}
}
// If we have nothing to update, just return.
auto *DT = Options.DT;
auto *LI = Options.LI;
if (!DT && !LI)
return NewBB;
if (DT) {
// Update the DominatorTree.
// ---> NewBB -----\
// / V
// TIBB -------\\------> DestBB
//
// First, inform the DT about the new path from TIBB to DestBB via NewBB,
// then delete the old edge from TIBB to DestBB. By doing this in that order
// DestBB stays reachable in the DT the whole time and its subtree doesn't
// get disconnected.
SmallVector<DominatorTree::UpdateType, 3> Updates;
Updates.push_back({DominatorTree::Insert, TIBB, NewBB});
Updates.push_back({DominatorTree::Insert, NewBB, DestBB});
if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB))
Updates.push_back({DominatorTree::Delete, TIBB, DestBB});
DT->applyUpdates(Updates);
}
// Update LoopInfo if it is around.
if (LI) {
if (Loop *TIL = LI->getLoopFor(TIBB)) {
// If one or the other blocks were not in a loop, the new block is not
// either, and thus LI doesn't need to be updated.
if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
if (TIL == DestLoop) {
// Both in the same loop, the NewBB joins loop.
DestLoop->addBasicBlockToLoop(NewBB, *LI);
} else if (TIL->contains(DestLoop)) {
// Edge from an outer loop to an inner loop. Add to the outer loop.
TIL->addBasicBlockToLoop(NewBB, *LI);
} else if (DestLoop->contains(TIL)) {
// Edge from an inner loop to an outer loop. Add to the outer loop.
DestLoop->addBasicBlockToLoop(NewBB, *LI);
} else {
// Edge from two loops with no containment relation. Because these
// are natural loops, we know that the destination block must be the
// header of its loop (adding a branch into a loop elsewhere would
// create an irreducible loop).
assert(DestLoop->getHeader() == DestBB &&
"Should not create irreducible loops!");
if (Loop *P = DestLoop->getParentLoop())
P->addBasicBlockToLoop(NewBB, *LI);
}
}
// If TIBB is in a loop and DestBB is outside of that loop, we may need
// to update LoopSimplify form and LCSSA form.
if (!TIL->contains(DestBB)) {
assert(!TIL->contains(NewBB) &&
"Split point for loop exit is contained in loop!");
// Update LCSSA form in the newly created exit block.
if (Options.PreserveLCSSA) {
createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
}
// The only that we can break LoopSimplify form by splitting a critical
// edge is if after the split there exists some edge from TIL to DestBB
// *and* the only edge into DestBB from outside of TIL is that of
// NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
// is the new exit block and it has no non-loop predecessors. If the
// second isn't true, then DestBB was not in LoopSimplify form prior to
// the split as it had a non-loop predecessor. In both of these cases,
// the predecessor must be directly in TIL, not in a subloop, or again
// LoopSimplify doesn't hold.
SmallVector<BasicBlock *, 4> LoopPreds;
for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E;
++I) {
BasicBlock *P = *I;
if (P == NewBB)
continue; // The new block is known.
if (LI->getLoopFor(P) != TIL) {
// No need to re-simplify, it wasn't to start with.
LoopPreds.clear();
break;
}
LoopPreds.push_back(P);
}
if (!LoopPreds.empty()) {
assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
BasicBlock *NewExitBB = SplitBlockPredecessors(
DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA);
if (Options.PreserveLCSSA)
createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
}
}
}
}
return NewBB;
}
// Return the unique indirectbr predecessor of a block. This may return null
// even if such a predecessor exists, if it's not useful for splitting.
// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
// predecessors of BB.
static BasicBlock *
findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
// If the block doesn't have any PHIs, we don't care about it, since there's
// no point in splitting it.
PHINode *PN = dyn_cast<PHINode>(BB->begin());
if (!PN)
return nullptr;
// Verify we have exactly one IBR predecessor.
// Conservatively bail out if one of the other predecessors is not a "regular"
// terminator (that is, not a switch or a br).
BasicBlock *IBB = nullptr;
for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
BasicBlock *PredBB = PN->getIncomingBlock(Pred);
TerminatorInst *PredTerm = PredBB->getTerminator();
switch (PredTerm->getOpcode()) {
case Instruction::IndirectBr:
if (IBB)
return nullptr;
IBB = PredBB;
break;
case Instruction::Br:
case Instruction::Switch:
OtherPreds.push_back(PredBB);
continue;
default:
return nullptr;
}
}
return IBB;
}
bool llvm::SplitIndirectBrCriticalEdges(Function &F,
BranchProbabilityInfo *BPI,
BlockFrequencyInfo *BFI) {
// Check whether the function has any indirectbrs, and collect which blocks
// they may jump to. Since most functions don't have indirect branches,
// this lowers the common case's overhead to O(Blocks) instead of O(Edges).
SmallSetVector<BasicBlock *, 16> Targets;
for (auto &BB : F) {
auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
if (!IBI)
continue;
for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
Targets.insert(IBI->getSuccessor(Succ));
}
if (Targets.empty())
return false;
bool ShouldUpdateAnalysis = BPI && BFI;
bool Changed = false;
for (BasicBlock *Target : Targets) {
SmallVector<BasicBlock *, 16> OtherPreds;
BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
// If we did not found an indirectbr, or the indirectbr is the only
// incoming edge, this isn't the kind of edge we're looking for.
if (!IBRPred || OtherPreds.empty())
continue;
// Don't even think about ehpads/landingpads.
Instruction *FirstNonPHI = Target->getFirstNonPHI();
if (FirstNonPHI->isEHPad() || Target->isLandingPad())
continue;
BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
if (ShouldUpdateAnalysis) {
// Copy the BFI/BPI from Target to BodyBlock.
for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors();
I < E; ++I)
BPI->setEdgeProbability(BodyBlock, I,
BPI->getEdgeProbability(Target, I));
BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
}
// It's possible Target was its own successor through an indirectbr.
// In this case, the indirectbr now comes from BodyBlock.
if (IBRPred == Target)
IBRPred = BodyBlock;
// At this point Target only has PHIs, and BodyBlock has the rest of the
// block's body. Create a copy of Target that will be used by the "direct"
// preds.
ValueToValueMapTy VMap;
BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
BlockFrequency BlockFreqForDirectSucc;
for (BasicBlock *Pred : OtherPreds) {
// If the target is a loop to itself, then the terminator of the split
// block (BodyBlock) needs to be updated.
BasicBlock *Src = Pred != Target ? Pred : BodyBlock;
Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
if (ShouldUpdateAnalysis)
BlockFreqForDirectSucc += BFI->getBlockFreq(Src) *
BPI->getEdgeProbability(Src, DirectSucc);
}
if (ShouldUpdateAnalysis) {
BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency());
BlockFrequency NewBlockFreqForTarget =
BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
BPI->eraseBlock(Target);
}
// Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
// they are clones, so the number of PHIs are the same.
// (a) Remove the edge coming from IBRPred from the "Direct" PHI
// (b) Leave that as the only edge in the "Indirect" PHI.
// (c) Merge the two in the body block.
BasicBlock::iterator Indirect = Target->begin(),
End = Target->getFirstNonPHI()->getIterator();
BasicBlock::iterator Direct = DirectSucc->begin();
BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
assert(&*End == Target->getTerminator() &&
"Block was expected to only contain PHIs");
while (Indirect != End) {
PHINode *DirPHI = cast<PHINode>(Direct);
PHINode *IndPHI = cast<PHINode>(Indirect);
// Now, clean up - the direct block shouldn't get the indirect value,
// and vice versa.
DirPHI->removeIncomingValue(IBRPred);
Direct++;
// Advance the pointer here, to avoid invalidation issues when the old
// PHI is erased.
Indirect++;
PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
IBRPred);
// Create a PHI in the body block, to merge the direct and indirect
// predecessors.
PHINode *MergePHI =
PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
MergePHI->addIncoming(NewIndPHI, Target);
MergePHI->addIncoming(DirPHI, DirectSucc);
IndPHI->replaceAllUsesWith(MergePHI);
IndPHI->eraseFromParent();
}
Changed = true;
}
return Changed;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,466 @@
//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains an optimization for div and rem on architectures that
// execute short instructions significantly faster than longer instructions.
// For example, on Intel Atom 32-bit divides are slow enough that during
// runtime it is profitable to check the value of the operands, and if they are
// positive and less than 256 use an unsigned 8-bit divide.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
using namespace llvm;
#define DEBUG_TYPE "bypass-slow-division"
namespace {
struct QuotRemPair {
Value *Quotient;
Value *Remainder;
QuotRemPair(Value *InQuotient, Value *InRemainder)
: Quotient(InQuotient), Remainder(InRemainder) {}
};
/// A quotient and remainder, plus a BB from which they logically "originate".
/// If you use Quotient or Remainder in a Phi node, you should use BB as its
/// corresponding predecessor.
struct QuotRemWithBB {
BasicBlock *BB = nullptr;
Value *Quotient = nullptr;
Value *Remainder = nullptr;
};
using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
using BypassWidthsTy = DenseMap<unsigned, unsigned>;
using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
enum ValueRange {
/// Operand definitely fits into BypassType. No runtime checks are needed.
VALRNG_KNOWN_SHORT,
/// A runtime check is required, as value range is unknown.
VALRNG_UNKNOWN,
/// Operand is unlikely to fit into BypassType. The bypassing should be
/// disabled.
VALRNG_LIKELY_LONG
};
class FastDivInsertionTask {
bool IsValidTask = false;
Instruction *SlowDivOrRem = nullptr;
IntegerType *BypassType = nullptr;
BasicBlock *MainBB = nullptr;
bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
QuotRemWithBB createSlowBB(BasicBlock *Successor);
QuotRemWithBB createFastBB(BasicBlock *Successor);
QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
BasicBlock *PhiBB);
Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
Optional<QuotRemPair> insertFastDivAndRem();
bool isSignedOp() {
return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
SlowDivOrRem->getOpcode() == Instruction::SRem;
}
bool isDivisionOp() {
return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
SlowDivOrRem->getOpcode() == Instruction::UDiv;
}
Type *getSlowType() { return SlowDivOrRem->getType(); }
public:
FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
Value *getReplacement(DivCacheTy &Cache);
};
} // end anonymous namespace
FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
const BypassWidthsTy &BypassWidths) {
switch (I->getOpcode()) {
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
SlowDivOrRem = I;
break;
default:
// I is not a div/rem operation.
return;
}
// Skip division on vector types. Only optimize integer instructions.
IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
if (!SlowType)
return;
// Skip if this bitwidth is not bypassed.
auto BI = BypassWidths.find(SlowType->getBitWidth());
if (BI == BypassWidths.end())
return;
// Get type for div/rem instruction with bypass bitwidth.
IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
BypassType = BT;
// The original basic block.
MainBB = I->getParent();
// The instruction is indeed a slow div or rem operation.
IsValidTask = true;
}
/// Reuses previously-computed dividend or remainder from the current BB if
/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
/// perform the optimization and caches the resulting dividend and remainder.
/// If no replacement can be generated, nullptr is returned.
Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
// First, make sure that the task is valid.
if (!IsValidTask)
return nullptr;
// Then, look for a value in Cache.
Value *Dividend = SlowDivOrRem->getOperand(0);
Value *Divisor = SlowDivOrRem->getOperand(1);
DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
auto CacheI = Cache.find(Key);
if (CacheI == Cache.end()) {
// If previous instance does not exist, try to insert fast div.
Optional<QuotRemPair> OptResult = insertFastDivAndRem();
// Bail out if insertFastDivAndRem has failed.
if (!OptResult)
return nullptr;
CacheI = Cache.insert({Key, *OptResult}).first;
}
QuotRemPair &Value = CacheI->second;
return isDivisionOp() ? Value.Quotient : Value.Remainder;
}
/// \brief Check if a value looks like a hash.
///
/// The routine is expected to detect values computed using the most common hash
/// algorithms. Typically, hash computations end with one of the following
/// instructions:
///
/// 1) MUL with a constant wider than BypassType
/// 2) XOR instruction
///
/// And even if we are wrong and the value is not a hash, it is still quite
/// unlikely that such values will fit into BypassType.
///
/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
/// It is implemented as a depth-first search for values that look neither long
/// nor hash-like.
bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
return false;
switch (I->getOpcode()) {
case Instruction::Xor:
return true;
case Instruction::Mul: {
// After Constant Hoisting pass, long constants may be represented as
// bitcast instructions. As a result, some constants may look like an
// instruction at first, and an additional check is necessary to find out if
// an operand is actually a constant.
Value *Op1 = I->getOperand(1);
ConstantInt *C = dyn_cast<ConstantInt>(Op1);
if (!C && isa<BitCastInst>(Op1))
C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
}
case Instruction::PHI:
// Stop IR traversal in case of a crazy input code. This limits recursion
// depth.
if (Visited.size() >= 16)
return false;
// Do not visit nodes that have been visited already. We return true because
// it means that we couldn't find any value that doesn't look hash-like.
if (Visited.find(I) != Visited.end())
return true;
Visited.insert(I);
return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
// Ignore undef values as they probably don't affect the division
// operands.
return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
isa<UndefValue>(V);
});
default:
return false;
}
}
/// Check if an integer value fits into our bypass type.
ValueRange FastDivInsertionTask::getValueRange(Value *V,
VisitedSetTy &Visited) {
unsigned ShortLen = BypassType->getBitWidth();
unsigned LongLen = V->getType()->getIntegerBitWidth();
assert(LongLen > ShortLen && "Value type must be wider than BypassType");
unsigned HiBits = LongLen - ShortLen;
const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
KnownBits Known(LongLen);
computeKnownBits(V, Known, DL);
if (Known.countMinLeadingZeros() >= HiBits)
return VALRNG_KNOWN_SHORT;
if (Known.countMaxLeadingZeros() < HiBits)
return VALRNG_LIKELY_LONG;
// Long integer divisions are often used in hashtable implementations. It's
// not worth bypassing such divisions because hash values are extremely
// unlikely to have enough leading zeros. The call below tries to detect
// values that are unlikely to fit BypassType (including hashes).
if (isHashLikeValue(V, Visited))
return VALRNG_LIKELY_LONG;
return VALRNG_UNKNOWN;
}
/// Add new basic block for slow div and rem operations and put it before
/// SuccessorBB.
QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
QuotRemWithBB DivRemPair;
DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
MainBB->getParent(), SuccessorBB);
IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
Value *Dividend = SlowDivOrRem->getOperand(0);
Value *Divisor = SlowDivOrRem->getOperand(1);
if (isSignedOp()) {
DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
} else {
DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
}
Builder.CreateBr(SuccessorBB);
return DivRemPair;
}
/// Add new basic block for fast div and rem operations and put it before
/// SuccessorBB.
QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
QuotRemWithBB DivRemPair;
DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
MainBB->getParent(), SuccessorBB);
IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
Value *Dividend = SlowDivOrRem->getOperand(0);
Value *Divisor = SlowDivOrRem->getOperand(1);
Value *ShortDivisorV =
Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
Value *ShortDividendV =
Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
// udiv/urem because this optimization only handles positive numbers.
Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
DivRemPair.Quotient =
Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
DivRemPair.Remainder =
Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
Builder.CreateBr(SuccessorBB);
return DivRemPair;
}
/// Creates Phi nodes for result of Div and Rem.
QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
QuotRemWithBB &RHS,
BasicBlock *PhiBB) {
IRBuilder<> Builder(PhiBB, PhiBB->begin());
PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
RemPhi->addIncoming(LHS.Remainder, LHS.BB);
RemPhi->addIncoming(RHS.Remainder, RHS.BB);
return QuotRemPair(QuoPhi, RemPhi);
}
/// Creates a runtime check to test whether both the divisor and dividend fit
/// into BypassType. The check is inserted at the end of MainBB. True return
/// value means that the operands fit. Either of the operands may be NULL if it
/// doesn't need a runtime check.
Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
assert((Op1 || Op2) && "Nothing to check");
IRBuilder<> Builder(MainBB, MainBB->end());
Value *OrV;
if (Op1 && Op2)
OrV = Builder.CreateOr(Op1, Op2);
else
OrV = Op1 ? Op1 : Op2;
// BitMask is inverted to check if the operands are
// larger than the bypass type
uint64_t BitMask = ~BypassType->getBitMask();
Value *AndV = Builder.CreateAnd(OrV, BitMask);
// Compare operand values
Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
return Builder.CreateICmpEQ(AndV, ZeroV);
}
/// Substitutes the div/rem instruction with code that checks the value of the
/// operands and uses a shorter-faster div/rem instruction when possible.
Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
Value *Dividend = SlowDivOrRem->getOperand(0);
Value *Divisor = SlowDivOrRem->getOperand(1);
VisitedSetTy SetL;
ValueRange DividendRange = getValueRange(Dividend, SetL);
if (DividendRange == VALRNG_LIKELY_LONG)
return None;
VisitedSetTy SetR;
ValueRange DivisorRange = getValueRange(Divisor, SetR);
if (DivisorRange == VALRNG_LIKELY_LONG)
return None;
bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
if (DividendShort && DivisorShort) {
// If both operands are known to be short then just replace the long
// division with a short one in-place. Since we're not introducing control
// flow in this case, narrowing the division is always a win, even if the
// divisor is a constant (and will later get replaced by a multiplication).
IRBuilder<> Builder(SlowDivOrRem);
Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
return QuotRemPair(ExtDiv, ExtRem);
}
if (isa<ConstantInt>(Divisor)) {
// If the divisor is not a constant, DAGCombiner will convert it to a
// multiplication by a magic constant. It isn't clear if it is worth
// introducing control flow to get a narrower multiply.
return None;
}
if (DividendShort && !isSignedOp()) {
// If the division is unsigned and Dividend is known to be short, then
// either
// 1) Divisor is less or equal to Dividend, and the result can be computed
// with a short division.
// 2) Divisor is greater than Dividend. In this case, no division is needed
// at all: The quotient is 0 and the remainder is equal to Dividend.
//
// So instead of checking at runtime whether Divisor fits into BypassType,
// we emit a runtime check to differentiate between these two cases. This
// lets us entirely avoid a long div.
// Split the basic block before the div/rem.
BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
// Remove the unconditional branch from MainBB to SuccessorBB.
MainBB->getInstList().back().eraseFromParent();
QuotRemWithBB Long;
Long.BB = MainBB;
Long.Quotient = ConstantInt::get(getSlowType(), 0);
Long.Remainder = Dividend;
QuotRemWithBB Fast = createFastBB(SuccessorBB);
QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
IRBuilder<> Builder(MainBB, MainBB->end());
Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
return Result;
} else {
// General case. Create both slow and fast div/rem pairs and choose one of
// them at runtime.
// Split the basic block before the div/rem.
BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
// Remove the unconditional branch from MainBB to SuccessorBB.
MainBB->getInstList().back().eraseFromParent();
QuotRemWithBB Fast = createFastBB(SuccessorBB);
QuotRemWithBB Slow = createSlowBB(SuccessorBB);
QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
DivisorShort ? nullptr : Divisor);
IRBuilder<> Builder(MainBB, MainBB->end());
Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
return Result;
}
}
/// This optimization identifies DIV/REM instructions in a BB that can be
/// profitably bypassed and carried out with a shorter, faster divide.
bool llvm::bypassSlowDivision(BasicBlock *BB,
const BypassWidthsTy &BypassWidths) {
DivCacheTy PerBBDivCache;
bool MadeChange = false;
Instruction* Next = &*BB->begin();
while (Next != nullptr) {
// We may add instructions immediately after I, but we want to skip over
// them.
Instruction* I = Next;
Next = Next->getNextNode();
FastDivInsertionTask Task(I, BypassWidths);
if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
I->replaceAllUsesWith(Replacement);
I->eraseFromParent();
MadeChange = true;
}
}
// Above we eagerly create divs and rems, as pairs, so that we can efficiently
// create divrem machine instructions. Now erase any unused divs / rems so we
// don't leave extra instructions sitting around.
for (auto &KV : PerBBDivCache)
for (Value *V : {KV.second.Quotient, KV.second.Remainder})
RecursivelyDeleteTriviallyDeadInstructions(V);
return MadeChange;
}

View File

@ -0,0 +1,65 @@
add_llvm_library(LLVMTransformUtils
ASanStackFrameLayout.cpp
AddDiscriminators.cpp
BasicBlockUtils.cpp
BreakCriticalEdges.cpp
BuildLibCalls.cpp
BypassSlowDivision.cpp
CallPromotionUtils.cpp
CloneFunction.cpp
CloneModule.cpp
CodeExtractor.cpp
CtorUtils.cpp
DemoteRegToStack.cpp
EntryExitInstrumenter.cpp
EscapeEnumerator.cpp
Evaluator.cpp
FlattenCFG.cpp
FunctionComparator.cpp
FunctionImportUtils.cpp
GlobalStatus.cpp
InlineFunction.cpp
ImportedFunctionsInliningStatistics.cpp
InstructionNamer.cpp
IntegerDivision.cpp
LCSSA.cpp
LibCallsShrinkWrap.cpp
Local.cpp
LoopSimplify.cpp
LoopUnroll.cpp
LoopUnrollPeel.cpp
LoopUnrollRuntime.cpp
LoopUtils.cpp
LoopVersioning.cpp
LowerInvoke.cpp
LowerMemIntrinsics.cpp
LowerSwitch.cpp
Mem2Reg.cpp
MetaRenamer.cpp
ModuleUtils.cpp
NameAnonGlobals.cpp
OrderedInstructions.cpp
PredicateInfo.cpp
PromoteMemoryToRegister.cpp
StripGCRelocates.cpp
SSAUpdater.cpp
SanitizerStats.cpp
SimplifyCFG.cpp
SimplifyIndVar.cpp
SimplifyInstructions.cpp
SimplifyLibCalls.cpp
SplitModule.cpp
StripNonLineTableDebugInfo.cpp
SymbolRewriter.cpp
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
VNCoercion.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/Utils
DEPENDS
intrinsics_gen
)

View File

@ -0,0 +1,423 @@
//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements utilities useful for promoting indirect call sites to
// direct call sites.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
#define DEBUG_TYPE "call-promotion-utils"
/// Fix-up phi nodes in an invoke instruction's normal destination.
///
/// After versioning an invoke instruction, values coming from the original
/// block will now be coming from the "merge" block. For example, in the code
/// below:
///
/// then_bb:
/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
///
/// else_bb:
/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
///
/// merge_bb:
/// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ]
/// br %normal_dst
///
/// normal_dst:
/// %t3 = phi i32 [ %x, %orig_bb ], ...
///
/// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in
/// "normal_dst" must be fixed to refer to "merge_bb":
///
/// normal_dst:
/// %t3 = phi i32 [ %x, %merge_bb ], ...
///
static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
BasicBlock *MergeBlock) {
for (PHINode &Phi : Invoke->getNormalDest()->phis()) {
int Idx = Phi.getBasicBlockIndex(OrigBlock);
if (Idx == -1)
continue;
Phi.setIncomingBlock(Idx, MergeBlock);
}
}
/// Fix-up phi nodes in an invoke instruction's unwind destination.
///
/// After versioning an invoke instruction, values coming from the original
/// block will now be coming from either the "then" block or the "else" block.
/// For example, in the code below:
///
/// then_bb:
/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
///
/// else_bb:
/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
///
/// unwind_dst:
/// %t3 = phi i32 [ %x, %orig_bb ], ...
///
/// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in
/// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb":
///
/// unwind_dst:
/// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ...
///
static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
BasicBlock *ThenBlock,
BasicBlock *ElseBlock) {
for (PHINode &Phi : Invoke->getUnwindDest()->phis()) {
int Idx = Phi.getBasicBlockIndex(OrigBlock);
if (Idx == -1)
continue;
auto *V = Phi.getIncomingValue(Idx);
Phi.setIncomingBlock(Idx, ThenBlock);
Phi.addIncoming(V, ElseBlock);
}
}
/// Create a phi node for the returned value of a call or invoke instruction.
///
/// After versioning a call or invoke instruction that returns a value, we have
/// to merge the value of the original and new instructions. We do this by
/// creating a phi node and replacing uses of the original instruction with this
/// phi node.
///
/// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is
/// defined in "then_bb", we create the following phi node:
///
/// ; Uses of the original instruction are replaced by uses of the phi node.
/// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ],
///
static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst,
BasicBlock *MergeBlock, IRBuilder<> &Builder) {
if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty())
return;
Builder.SetInsertPoint(&MergeBlock->front());
PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
SmallVector<User *, 16> UsersToUpdate;
for (User *U : OrigInst->users())
UsersToUpdate.push_back(U);
for (User *U : UsersToUpdate)
U->replaceUsesOfWith(OrigInst, Phi);
Phi->addIncoming(OrigInst, OrigInst->getParent());
Phi->addIncoming(NewInst, NewInst->getParent());
}
/// Cast a call or invoke instruction to the given type.
///
/// When promoting a call site, the return type of the call site might not match
/// that of the callee. If this is the case, we have to cast the returned value
/// to the correct type. The location of the cast depends on if we have a call
/// or invoke instruction.
///
/// For example, if the call instruction below requires a bitcast after
/// promotion:
///
/// orig_bb:
/// %t0 = call i32 @func()
/// ...
///
/// The bitcast is placed after the call instruction:
///
/// orig_bb:
/// ; Uses of the original return value are replaced by uses of the bitcast.
/// %t0 = call i32 @func()
/// %t1 = bitcast i32 %t0 to ...
/// ...
///
/// A similar transformation is performed for invoke instructions. However,
/// since invokes are terminating, a new block is created for the bitcast. For
/// example, if the invoke instruction below requires a bitcast after promotion:
///
/// orig_bb:
/// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst
///
/// The edge between the original block and the invoke's normal destination is
/// split, and the bitcast is placed there:
///
/// orig_bb:
/// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst
///
/// split_bb:
/// ; Uses of the original return value are replaced by uses of the bitcast.
/// %t1 = bitcast i32 %t0 to ...
/// br label %normal_dst
///
static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) {
// Save the users of the calling instruction. These uses will be changed to
// use the bitcast after we create it.
SmallVector<User *, 16> UsersToUpdate;
for (User *U : CS.getInstruction()->users())
UsersToUpdate.push_back(U);
// Determine an appropriate location to create the bitcast for the return
// value. The location depends on if we have a call or invoke instruction.
Instruction *InsertBefore = nullptr;
if (auto *Invoke = dyn_cast<InvokeInst>(CS.getInstruction()))
InsertBefore =
&SplitEdge(Invoke->getParent(), Invoke->getNormalDest())->front();
else
InsertBefore = &*std::next(CS.getInstruction()->getIterator());
// Bitcast the return value to the correct type.
auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(),
RetTy, "", InsertBefore);
if (RetBitCast)
*RetBitCast = Cast;
// Replace all the original uses of the calling instruction with the bitcast.
for (User *U : UsersToUpdate)
U->replaceUsesOfWith(CS.getInstruction(), Cast);
}
/// Predicate and clone the given call site.
///
/// This function creates an if-then-else structure at the location of the call
/// site. The "if" condition compares the call site's called value to the given
/// callee. The original call site is moved into the "else" block, and a clone
/// of the call site is placed in the "then" block. The cloned instruction is
/// returned.
///
/// For example, the call instruction below:
///
/// orig_bb:
/// %t0 = call i32 %ptr()
/// ...
///
/// Is replace by the following:
///
/// orig_bb:
/// %cond = icmp eq i32 ()* %ptr, @func
/// br i1 %cond, %then_bb, %else_bb
///
/// then_bb:
/// ; The clone of the original call instruction is placed in the "then"
/// ; block. It is not yet promoted.
/// %t1 = call i32 %ptr()
/// br merge_bb
///
/// else_bb:
/// ; The original call instruction is moved to the "else" block.
/// %t0 = call i32 %ptr()
/// br merge_bb
///
/// merge_bb:
/// ; Uses of the original call instruction are replaced by uses of the phi
/// ; node.
/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
/// ...
///
/// A similar transformation is performed for invoke instructions. However,
/// since invokes are terminating, more work is required. For example, the
/// invoke instruction below:
///
/// orig_bb:
/// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst
///
/// Is replace by the following:
///
/// orig_bb:
/// %cond = icmp eq i32 ()* %ptr, @func
/// br i1 %cond, %then_bb, %else_bb
///
/// then_bb:
/// ; The clone of the original invoke instruction is placed in the "then"
/// ; block, and its normal destination is set to the "merge" block. It is
/// ; not yet promoted.
/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
///
/// else_bb:
/// ; The original invoke instruction is moved into the "else" block, and
/// ; its normal destination is set to the "merge" block.
/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
///
/// merge_bb:
/// ; Uses of the original invoke instruction are replaced by uses of the
/// ; phi node, and the merge block branches to the normal destination.
/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
/// br %normal_dst
///
static Instruction *versionCallSite(CallSite CS, Value *Callee,
MDNode *BranchWeights) {
IRBuilder<> Builder(CS.getInstruction());
Instruction *OrigInst = CS.getInstruction();
BasicBlock *OrigBlock = OrigInst->getParent();
// Create the compare. The called value and callee must have the same type to
// be compared.
if (CS.getCalledValue()->getType() != Callee->getType())
Callee = Builder.CreateBitCast(Callee, CS.getCalledValue()->getType());
auto *Cond = Builder.CreateICmpEQ(CS.getCalledValue(), Callee);
// Create an if-then-else structure. The original instruction is moved into
// the "else" block, and a clone of the original instruction is placed in the
// "then" block.
TerminatorInst *ThenTerm = nullptr;
TerminatorInst *ElseTerm = nullptr;
SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm,
BranchWeights);
BasicBlock *ThenBlock = ThenTerm->getParent();
BasicBlock *ElseBlock = ElseTerm->getParent();
BasicBlock *MergeBlock = OrigInst->getParent();
ThenBlock->setName("if.true.direct_targ");
ElseBlock->setName("if.false.orig_indirect");
MergeBlock->setName("if.end.icp");
Instruction *NewInst = OrigInst->clone();
OrigInst->moveBefore(ElseTerm);
NewInst->insertBefore(ThenTerm);
// If the original call site is an invoke instruction, we have extra work to
// do since invoke instructions are terminating. We have to fix-up phi nodes
// in the invoke's normal and unwind destinations.
if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigInst)) {
auto *NewInvoke = cast<InvokeInst>(NewInst);
// Invoke instructions are terminating, so we don't need the terminator
// instructions that were just created.
ThenTerm->eraseFromParent();
ElseTerm->eraseFromParent();
// Branch from the "merge" block to the original normal destination.
Builder.SetInsertPoint(MergeBlock);
Builder.CreateBr(OrigInvoke->getNormalDest());
// Fix-up phi nodes in the original invoke's normal and unwind destinations.
fixupPHINodeForNormalDest(OrigInvoke, OrigBlock, MergeBlock);
fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock);
// Now set the normal destinations of the invoke instructions to be the
// "merge" block.
OrigInvoke->setNormalDest(MergeBlock);
NewInvoke->setNormalDest(MergeBlock);
}
// Create a phi node for the returned value of the call site.
createRetPHINode(OrigInst, NewInst, MergeBlock, Builder);
return NewInst;
}
bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
const char **FailureReason) {
assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
// Check the return type. The callee's return value type must be bitcast
// compatible with the call site's type.
Type *CallRetTy = CS.getInstruction()->getType();
Type *FuncRetTy = Callee->getReturnType();
if (CallRetTy != FuncRetTy)
if (!CastInst::isBitCastable(FuncRetTy, CallRetTy)) {
if (FailureReason)
*FailureReason = "Return type mismatch";
return false;
}
// The number of formal arguments of the callee.
unsigned NumParams = Callee->getFunctionType()->getNumParams();
// Check the number of arguments. The callee and call site must agree on the
// number of arguments.
if (CS.arg_size() != NumParams && !Callee->isVarArg()) {
if (FailureReason)
*FailureReason = "The number of arguments mismatch";
return false;
}
// Check the argument types. The callee's formal argument types must be
// bitcast compatible with the corresponding actual argument types of the call
// site.
for (unsigned I = 0; I < NumParams; ++I) {
Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I);
Type *ActualTy = CS.getArgument(I)->getType();
if (FormalTy == ActualTy)
continue;
if (!CastInst::isBitCastable(ActualTy, FormalTy)) {
if (FailureReason)
*FailureReason = "Argument type mismatch";
return false;
}
}
return true;
}
Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
CastInst **RetBitCast) {
assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
// Set the called function of the call site to be the given callee.
CS.setCalledFunction(Callee);
// Since the call site will no longer be direct, we must clear metadata that
// is only appropriate for indirect calls. This includes !prof and !callees
// metadata.
CS.getInstruction()->setMetadata(LLVMContext::MD_prof, nullptr);
CS.getInstruction()->setMetadata(LLVMContext::MD_callees, nullptr);
// If the function type of the call site matches that of the callee, no
// additional work is required.
if (CS.getFunctionType() == Callee->getFunctionType())
return CS.getInstruction();
// Save the return types of the call site and callee.
Type *CallSiteRetTy = CS.getInstruction()->getType();
Type *CalleeRetTy = Callee->getReturnType();
// Change the function type of the call site the match that of the callee.
CS.mutateFunctionType(Callee->getFunctionType());
// Inspect the arguments of the call site. If an argument's type doesn't
// match the corresponding formal argument's type in the callee, bitcast it
// to the correct type.
for (Use &U : CS.args()) {
unsigned ArgNo = CS.getArgumentNo(&U);
Type *FormalTy = Callee->getFunctionType()->getParamType(ArgNo);
Type *ActualTy = U.get()->getType();
if (FormalTy != ActualTy) {
auto *Cast = CastInst::Create(Instruction::BitCast, U.get(), FormalTy, "",
CS.getInstruction());
CS.setArgument(ArgNo, Cast);
}
}
// If the return type of the call site doesn't match that of the callee, cast
// the returned value to the appropriate type.
if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy)
createRetBitCast(CS, CallSiteRetTy, RetBitCast);
return CS.getInstruction();
}
Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee,
MDNode *BranchWeights) {
// Version the indirect call site. If the called value is equal to the given
// callee, 'NewInst' will be executed, otherwise the original call site will
// be executed.
Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights);
// Promote 'NewInst' so that it directly calls the desired function.
return promoteCall(CallSite(NewInst), Callee);
}
#undef DEBUG_TYPE

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,200 @@
//===- CloneModule.cpp - Clone an entire module ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the CloneModule interface which makes a copy of an
// entire module.
//
//===----------------------------------------------------------------------===//
#include "llvm/IR/Constant.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
const Comdat *SC = Src->getComdat();
if (!SC)
return;
Comdat *DC = Dst->getParent()->getOrInsertComdat(SC->getName());
DC->setSelectionKind(SC->getSelectionKind());
Dst->setComdat(DC);
}
/// This is not as easy as it might seem because we have to worry about making
/// copies of global variables and functions, and making their (initializers and
/// references, respectively) refer to the right globals.
///
std::unique_ptr<Module> llvm::CloneModule(const Module *M) {
// Create the value map that maps things from the old module over to the new
// module.
ValueToValueMapTy VMap;
return CloneModule(M, VMap);
}
std::unique_ptr<Module> llvm::CloneModule(const Module *M,
ValueToValueMapTy &VMap) {
return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; });
}
std::unique_ptr<Module> llvm::CloneModule(
const Module *M, ValueToValueMapTy &VMap,
function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
// First off, we need to create the new module.
std::unique_ptr<Module> New =
llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext());
New->setDataLayout(M->getDataLayout());
New->setTargetTriple(M->getTargetTriple());
New->setModuleInlineAsm(M->getModuleInlineAsm());
// Loop over all of the global variables, making corresponding globals in the
// new module. Here we add them to the VMap and to the new Module. We
// don't worry about attributes or initializers, they will come later.
//
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I) {
GlobalVariable *GV = new GlobalVariable(*New,
I->getValueType(),
I->isConstant(), I->getLinkage(),
(Constant*) nullptr, I->getName(),
(GlobalVariable*) nullptr,
I->getThreadLocalMode(),
I->getType()->getAddressSpace());
GV->copyAttributesFrom(&*I);
VMap[&*I] = GV;
}
// Loop over the functions in the module, making external functions as before
for (const Function &I : *M) {
Function *NF = Function::Create(cast<FunctionType>(I.getValueType()),
I.getLinkage(), I.getName(), New.get());
NF->copyAttributesFrom(&I);
VMap[&I] = NF;
}
// Loop over the aliases in the module
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
if (!ShouldCloneDefinition(&*I)) {
// An alias cannot act as an external reference, so we need to create
// either a function or a global variable depending on the value type.
// FIXME: Once pointee types are gone we can probably pick one or the
// other.
GlobalValue *GV;
if (I->getValueType()->isFunctionTy())
GV = Function::Create(cast<FunctionType>(I->getValueType()),
GlobalValue::ExternalLinkage, I->getName(),
New.get());
else
GV = new GlobalVariable(
*New, I->getValueType(), false, GlobalValue::ExternalLinkage,
nullptr, I->getName(), nullptr,
I->getThreadLocalMode(), I->getType()->getAddressSpace());
VMap[&*I] = GV;
// We do not copy attributes (mainly because copying between different
// kinds of globals is forbidden), but this is generally not required for
// correctness.
continue;
}
auto *GA = GlobalAlias::create(I->getValueType(),
I->getType()->getPointerAddressSpace(),
I->getLinkage(), I->getName(), New.get());
GA->copyAttributesFrom(&*I);
VMap[&*I] = GA;
}
// Now that all of the things that global variable initializer can refer to
// have been created, loop through and copy the global variable referrers
// over... We also set the attributes on the global now.
//
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I) {
if (I->isDeclaration())
continue;
GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
if (!ShouldCloneDefinition(&*I)) {
// Skip after setting the correct linkage for an external reference.
GV->setLinkage(GlobalValue::ExternalLinkage);
continue;
}
if (I->hasInitializer())
GV->setInitializer(MapValue(I->getInitializer(), VMap));
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
I->getAllMetadata(MDs);
for (auto MD : MDs)
GV->addMetadata(MD.first,
*MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
copyComdat(GV, &*I);
}
// Similarly, copy over function bodies now...
//
for (const Function &I : *M) {
if (I.isDeclaration())
continue;
Function *F = cast<Function>(VMap[&I]);
if (!ShouldCloneDefinition(&I)) {
// Skip after setting the correct linkage for an external reference.
F->setLinkage(GlobalValue::ExternalLinkage);
// Personality function is not valid on a declaration.
F->setPersonalityFn(nullptr);
continue;
}
Function::arg_iterator DestI = F->arg_begin();
for (Function::const_arg_iterator J = I.arg_begin(); J != I.arg_end();
++J) {
DestI->setName(J->getName());
VMap[&*J] = &*DestI++;
}
SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
CloneFunctionInto(F, &I, VMap, /*ModuleLevelChanges=*/true, Returns);
if (I.hasPersonalityFn())
F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap));
copyComdat(F, &I);
}
// And aliases
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
// We already dealt with undefined aliases above.
if (!ShouldCloneDefinition(&*I))
continue;
GlobalAlias *GA = cast<GlobalAlias>(VMap[&*I]);
if (const Constant *C = I->getAliasee())
GA->setAliasee(MapValue(C, VMap));
}
// And named metadata....
for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
E = M->named_metadata_end(); I != E; ++I) {
const NamedMDNode &NMD = *I;
NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
}
return New;
}
extern "C" {
LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) {
return wrap(CloneModule(unwrap(M)).release());
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,164 @@
//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines functions that are used to process llvm.global_ctors.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "ctor_utils"
namespace llvm {
namespace {
/// Given a specified llvm.global_ctors list, remove the listed elements.
void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
// Filter out the initializer elements to remove.
ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer());
SmallVector<Constant *, 10> CAList;
for (unsigned I = 0, E = OldCA->getNumOperands(); I < E; ++I)
if (!CtorsToRemove.test(I))
CAList.push_back(OldCA->getOperand(I));
// Create the new array initializer.
ArrayType *ATy =
ArrayType::get(OldCA->getType()->getElementType(), CAList.size());
Constant *CA = ConstantArray::get(ATy, CAList);
// If we didn't change the number of elements, don't create a new GV.
if (CA->getType() == OldCA->getType()) {
GCL->setInitializer(CA);
return;
}
// Create the new global and insert it next to the existing list.
GlobalVariable *NGV =
new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
CA, "", GCL->getThreadLocalMode());
GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV);
NGV->takeName(GCL);
// Nuke the old list, replacing any uses with the new one.
if (!GCL->use_empty()) {
Constant *V = NGV;
if (V->getType() != GCL->getType())
V = ConstantExpr::getBitCast(V, GCL->getType());
GCL->replaceAllUsesWith(V);
}
GCL->eraseFromParent();
}
/// Given a llvm.global_ctors list that we can understand,
/// return a list of the functions and null terminator as a vector.
std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
if (GV->getInitializer()->isNullValue())
return std::vector<Function *>();
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
std::vector<Function *> Result;
Result.reserve(CA->getNumOperands());
for (auto &V : CA->operands()) {
ConstantStruct *CS = cast<ConstantStruct>(V);
Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
}
return Result;
}
/// Find the llvm.global_ctors list, verifying that all initializers have an
/// init priority of 65535.
GlobalVariable *findGlobalCtors(Module &M) {
GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
if (!GV)
return nullptr;
// Verify that the initializer is simple enough for us to handle. We are
// only allowed to optimize the initializer if it is unique.
if (!GV->hasUniqueInitializer())
return nullptr;
if (isa<ConstantAggregateZero>(GV->getInitializer()))
return GV;
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
for (auto &V : CA->operands()) {
if (isa<ConstantAggregateZero>(V))
continue;
ConstantStruct *CS = cast<ConstantStruct>(V);
if (isa<ConstantPointerNull>(CS->getOperand(1)))
continue;
// Must have a function or null ptr.
if (!isa<Function>(CS->getOperand(1)))
return nullptr;
// Init priority must be standard.
ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
if (CI->getZExtValue() != 65535)
return nullptr;
}
return GV;
}
} // namespace
/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
/// entries for which it returns true. Return true if anything changed.
bool optimizeGlobalCtorsList(Module &M,
function_ref<bool(Function *)> ShouldRemove) {
GlobalVariable *GlobalCtors = findGlobalCtors(M);
if (!GlobalCtors)
return false;
std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors);
if (Ctors.empty())
return false;
bool MadeChange = false;
// Loop over global ctors, optimizing them when we can.
unsigned NumCtors = Ctors.size();
BitVector CtorsToRemove(NumCtors);
for (unsigned i = 0; i != Ctors.size() && NumCtors > 0; ++i) {
Function *F = Ctors[i];
// Found a null terminator in the middle of the list, prune off the rest of
// the list.
if (!F)
continue;
DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
// We cannot simplify external ctor functions.
if (F->empty())
continue;
// If we can evaluate the ctor at compile time, do.
if (ShouldRemove(F)) {
Ctors[i] = nullptr;
CtorsToRemove.set(i);
NumCtors--;
MadeChange = true;
continue;
}
}
if (!MadeChange)
return false;
removeGlobalCtors(GlobalCtors, CtorsToRemove);
return true;
}
} // End llvm namespace

View File

@ -0,0 +1,151 @@
//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
/// DemoteRegToStack - This function takes a virtual register computed by an
/// Instruction and replaces it with a slot in the stack frame, allocated via
/// alloca. This allows the CFG to be changed around without fear of
/// invalidating the SSA information for the value. It returns the pointer to
/// the alloca inserted to create a stack slot for I.
AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
Instruction *AllocaPoint) {
if (I.use_empty()) {
I.eraseFromParent();
return nullptr;
}
Function *F = I.getParent()->getParent();
const DataLayout &DL = F->getParent()->getDataLayout();
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
I.getName()+".reg2mem", AllocaPoint);
} else {
Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
I.getName() + ".reg2mem", &F->getEntryBlock().front());
}
// We cannot demote invoke instructions to the stack if their normal edge
// is critical. Therefore, split the critical edge and create a basic block
// into which the store can be inserted.
if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
if (!II->getNormalDest()->getSinglePredecessor()) {
unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest());
assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!");
BasicBlock *BB = SplitCriticalEdge(II, SuccNum);
assert(BB && "Unable to split critical edge.");
(void)BB;
}
}
// Change all of the users of the instruction to read from the stack slot.
while (!I.use_empty()) {
Instruction *U = cast<Instruction>(I.user_back());
if (PHINode *PN = dyn_cast<PHINode>(U)) {
// If this is a PHI node, we can't insert a load of the value before the
// use. Instead insert the load in the predecessor block corresponding
// to the incoming value.
//
// Note that if there are multiple edges from a basic block to this PHI
// node that we cannot have multiple loads. The problem is that the
// resulting PHI node will have multiple values (from each load) coming in
// from the same block, which is illegal SSA form. For this reason, we
// keep track of and reuse loads we insert.
DenseMap<BasicBlock*, Value*> Loads;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == &I) {
Value *&V = Loads[PN->getIncomingBlock(i)];
if (!V) {
// Insert the load into the predecessor block
V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
PN->getIncomingBlock(i)->getTerminator());
}
PN->setIncomingValue(i, V);
}
} else {
// If this is a normal instruction, just insert a load.
Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
U->replaceUsesOfWith(&I, V);
}
}
// Insert stores of the computed value into the stack slot. We have to be
// careful if I is an invoke instruction, because we can't insert the store
// AFTER the terminator instruction.
BasicBlock::iterator InsertPt;
if (!isa<TerminatorInst>(I)) {
InsertPt = ++I.getIterator();
for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
/* empty */; // Don't insert before PHI nodes or landingpad instrs.
} else {
InvokeInst &II = cast<InvokeInst>(I);
InsertPt = II.getNormalDest()->getFirstInsertionPt();
}
new StoreInst(&I, Slot, &*InsertPt);
return Slot;
}
/// DemotePHIToStack - This function takes a virtual register computed by a PHI
/// node and replaces it with a slot in the stack frame allocated via alloca.
/// The PHI node is deleted. It returns the pointer to the alloca inserted.
AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
if (P->use_empty()) {
P->eraseFromParent();
return nullptr;
}
const DataLayout &DL = P->getModule()->getDataLayout();
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
P->getName()+".reg2mem", AllocaPoint);
} else {
Function *F = P->getParent()->getParent();
Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
P->getName() + ".reg2mem",
&F->getEntryBlock().front());
}
// Iterate over each operand inserting a store in each predecessor.
for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
assert(II->getParent() != P->getIncomingBlock(i) &&
"Invoke edge not supported yet"); (void)II;
}
new StoreInst(P->getIncomingValue(i), Slot,
P->getIncomingBlock(i)->getTerminator());
}
// Insert a load in place of the PHI and replace all uses.
BasicBlock::iterator InsertPt = P->getIterator();
for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
/* empty */; // Don't insert before PHI nodes or landingpad instrs.
Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt);
P->replaceAllUsesWith(V);
// Delete PHI.
P->eraseFromParent();
return Slot;
}

View File

@ -0,0 +1,163 @@
//===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
static void insertCall(Function &CurFn, StringRef Func,
Instruction *InsertionPt, DebugLoc DL) {
Module &M = *InsertionPt->getParent()->getParent()->getParent();
LLVMContext &C = InsertionPt->getParent()->getContext();
if (Func == "mcount" ||
Func == ".mcount" ||
Func == "\01__gnu_mcount_nc" ||
Func == "\01_mcount" ||
Func == "\01mcount" ||
Func == "__mcount" ||
Func == "_mcount" ||
Func == "__cyg_profile_func_enter_bare") {
Constant *Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
Call->setDebugLoc(DL);
return;
}
if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)};
Constant *Fn = M.getOrInsertFunction(
Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false));
Instruction *RetAddr = CallInst::Create(
Intrinsic::getDeclaration(&M, Intrinsic::returnaddress),
ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(C), 0)), "",
InsertionPt);
RetAddr->setDebugLoc(DL);
Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)),
RetAddr};
CallInst *Call =
CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt);
Call->setDebugLoc(DL);
return;
}
// We only know how to call a fixed set of instrumentation functions, because
// they all expect different arguments, etc.
report_fatal_error(Twine("Unknown instrumentation function: '") + Func + "'");
}
static bool runOnFunction(Function &F, bool PostInlining) {
StringRef EntryAttr = PostInlining ? "instrument-function-entry-inlined"
: "instrument-function-entry";
StringRef ExitAttr = PostInlining ? "instrument-function-exit-inlined"
: "instrument-function-exit";
StringRef EntryFunc = F.getFnAttribute(EntryAttr).getValueAsString();
StringRef ExitFunc = F.getFnAttribute(ExitAttr).getValueAsString();
bool Changed = false;
// If the attribute is specified, insert instrumentation and then "consume"
// the attribute so that it's not inserted again if the pass should happen to
// run later for some reason.
if (!EntryFunc.empty()) {
DebugLoc DL;
if (auto SP = F.getSubprogram())
DL = DebugLoc::get(SP->getScopeLine(), 0, SP);
insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL);
Changed = true;
F.removeAttribute(AttributeList::FunctionIndex, EntryAttr);
}
if (!ExitFunc.empty()) {
for (BasicBlock &BB : F) {
TerminatorInst *T = BB.getTerminator();
DebugLoc DL;
if (DebugLoc TerminatorDL = T->getDebugLoc())
DL = TerminatorDL;
else if (auto SP = F.getSubprogram())
DL = DebugLoc::get(0, 0, SP);
if (isa<ReturnInst>(T)) {
insertCall(F, ExitFunc, T, DL);
Changed = true;
}
}
F.removeAttribute(AttributeList::FunctionIndex, ExitAttr);
}
return Changed;
}
namespace {
struct EntryExitInstrumenter : public FunctionPass {
static char ID;
EntryExitInstrumenter() : FunctionPass(ID) {
initializeEntryExitInstrumenterPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<GlobalsAAWrapperPass>();
}
bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); }
};
char EntryExitInstrumenter::ID = 0;
struct PostInlineEntryExitInstrumenter : public FunctionPass {
static char ID;
PostInlineEntryExitInstrumenter() : FunctionPass(ID) {
initializePostInlineEntryExitInstrumenterPass(
*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<GlobalsAAWrapperPass>();
}
bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); }
};
char PostInlineEntryExitInstrumenter::ID = 0;
}
INITIALIZE_PASS(
EntryExitInstrumenter, "ee-instrument",
"Instrument function entry/exit with calls to e.g. mcount() (pre inlining)",
false, false)
INITIALIZE_PASS(PostInlineEntryExitInstrumenter, "post-inline-ee-instrument",
"Instrument function entry/exit with calls to e.g. mcount() "
"(post inlining)",
false, false)
FunctionPass *llvm::createEntryExitInstrumenterPass() {
return new EntryExitInstrumenter();
}
FunctionPass *llvm::createPostInlineEntryExitInstrumenterPass() {
return new PostInlineEntryExitInstrumenter();
}
PreservedAnalyses
llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) {
runOnFunction(F, PostInlining);
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
}

View File

@ -0,0 +1,95 @@
//===- EscapeEnumerator.cpp -----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Defines a helper class that enumerates all possible exits from a function,
// including exception handling.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
static Constant *getDefaultPersonalityFn(Module *M) {
LLVMContext &C = M->getContext();
Triple T(M->getTargetTriple());
EHPersonality Pers = getDefaultEHPersonality(T);
return M->getOrInsertFunction(getEHPersonalityName(Pers),
FunctionType::get(Type::getInt32Ty(C), true));
}
IRBuilder<> *EscapeEnumerator::Next() {
if (Done)
return nullptr;
// Find all 'return', 'resume', and 'unwind' instructions.
while (StateBB != StateE) {
BasicBlock *CurBB = &*StateBB++;
// Branches and invokes do not escape, only unwind, resume, and return
// do.
TerminatorInst *TI = CurBB->getTerminator();
if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
continue;
Builder.SetInsertPoint(TI);
return &Builder;
}
Done = true;
if (!HandleExceptions)
return nullptr;
if (F.doesNotThrow())
return nullptr;
// Find all 'call' instructions that may throw.
SmallVector<Instruction *, 16> Calls;
for (BasicBlock &BB : F)
for (Instruction &II : BB)
if (CallInst *CI = dyn_cast<CallInst>(&II))
if (!CI->doesNotThrow())
Calls.push_back(CI);
if (Calls.empty())
return nullptr;
// Create a cleanup block.
LLVMContext &C = F.getContext();
BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C));
if (!F.hasPersonalityFn()) {
Constant *PersFn = getDefaultPersonalityFn(F.getParent());
F.setPersonalityFn(PersFn);
}
if (isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
report_fatal_error("Funclet EH not supported");
}
LandingPadInst *LPad =
LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
LPad->setCleanup(true);
ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
// Transform the 'call' instructions into 'invoke's branching to the
// cleanup block. Go in reverse order to make prettier BB names.
SmallVector<Value *, 16> Args;
for (unsigned I = Calls.size(); I != 0;) {
CallInst *CI = cast<CallInst>(Calls[--I]);
changeToInvokeAndSplitBasicBlock(CI, CleanupBB);
}
Builder.SetInsertPoint(RI);
return &Builder;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,492 @@
//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Reduce conditional branches in CFG.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
using namespace llvm;
#define DEBUG_TYPE "flattencfg"
namespace {
class FlattenCFGOpt {
AliasAnalysis *AA;
/// \brief Use parallel-and or parallel-or to generate conditions for
/// conditional branches.
bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder);
/// \brief If \param BB is the merge block of an if-region, attempt to merge
/// the if-region with an adjacent if-region upstream if two if-regions
/// contain identical instructions.
bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
/// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
/// are from two if-regions whose entry blocks are \p Head1 and \p
/// Head2. \returns true if \p Block1 and \p Block2 contain identical
/// instructions, and have no memory reference alias with \p Head2.
/// This is used as a legality check for merging if-regions.
bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
BasicBlock *Block1, BasicBlock *Block2);
public:
FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
bool run(BasicBlock *BB);
};
} // end anonymous namespace
/// If \param [in] BB has more than one predecessor that is a conditional
/// branch, attempt to use parallel and/or for the branch condition. \returns
/// true on success.
///
/// Before:
/// ......
/// %cmp10 = fcmp une float %tmp1, %tmp2
/// br i1 %cmp1, label %if.then, label %lor.rhs
///
/// lor.rhs:
/// ......
/// %cmp11 = fcmp une float %tmp3, %tmp4
/// br i1 %cmp11, label %if.then, label %ifend
///
/// if.end: // the merge block
/// ......
///
/// if.then: // has two predecessors, both of them contains conditional branch.
/// ......
/// br label %if.end;
///
/// After:
/// ......
/// %cmp10 = fcmp une float %tmp1, %tmp2
/// ......
/// %cmp11 = fcmp une float %tmp3, %tmp4
/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode.
/// br i1 %cmp12, label %if.then, label %ifend
///
/// if.end:
/// ......
///
/// if.then:
/// ......
/// br label %if.end;
///
/// Current implementation handles two cases.
/// Case 1: \param BB is on the else-path.
///
/// BB1
/// / |
/// BB2 |
/// / \ |
/// BB3 \ | where, BB1, BB2 contain conditional branches.
/// \ | / BB3 contains unconditional branch.
/// \ | / BB4 corresponds to \param BB which is also the merge.
/// BB => BB4
///
///
/// Corresponding source code:
///
/// if (a == b && c == d)
/// statement; // BB3
///
/// Case 2: \param BB BB is on the then-path.
///
/// BB1
/// / |
/// | BB2
/// \ / | where BB1, BB2 contain conditional branches.
/// BB => BB3 | BB3 contains unconditiona branch and corresponds
/// \ / to \param BB. BB4 is the merge.
/// BB4
///
/// Corresponding source code:
///
/// if (a == b || c == d)
/// statement; // BB3
///
/// In both cases, \param BB is the common successor of conditional branches.
/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
/// as its predecessors.
bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
PHINode *PHI = dyn_cast<PHINode>(BB->begin());
if (PHI)
return false; // For simplicity, avoid cases containing PHI nodes.
BasicBlock *LastCondBlock = nullptr;
BasicBlock *FirstCondBlock = nullptr;
BasicBlock *UnCondBlock = nullptr;
int Idx = -1;
// Check predecessors of \param BB.
SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
PI != PE; ++PI) {
BasicBlock *Pred = *PI;
BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());
// All predecessors should terminate with a branch.
if (!PBI)
return false;
BasicBlock *PP = Pred->getSinglePredecessor();
if (PBI->isUnconditional()) {
// Case 1: Pred (BB3) is an unconditional block, it should
// have a single predecessor (BB2) that is also a predecessor
// of \param BB (BB4) and should not have address-taken.
// There should exist only one such unconditional
// branch among the predecessors.
if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
Pred->hasAddressTaken())
return false;
UnCondBlock = Pred;
continue;
}
// Only conditional branches are allowed beyond this point.
assert(PBI->isConditional());
// Condition's unique use should be the branch instruction.
Value *PC = PBI->getCondition();
if (!PC || !PC->hasOneUse())
return false;
if (PP && Preds.count(PP)) {
// These are internal condition blocks to be merged from, e.g.,
// BB2 in both cases.
// Should not be address-taken.
if (Pred->hasAddressTaken())
return false;
// Instructions in the internal condition blocks should be safe
// to hoist up.
for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator();
BI != BE;) {
Instruction *CI = &*BI++;
if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
return false;
}
} else {
// This is the condition block to be merged into, e.g. BB1 in
// both cases.
if (FirstCondBlock)
return false;
FirstCondBlock = Pred;
}
// Find whether BB is uniformly on the true (or false) path
// for all of its predecessors.
BasicBlock *PS1 = PBI->getSuccessor(0);
BasicBlock *PS2 = PBI->getSuccessor(1);
BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
int CIdx = (PS1 == BB) ? 0 : 1;
if (Idx == -1)
Idx = CIdx;
else if (CIdx != Idx)
return false;
// PS is the successor which is not BB. Check successors to identify
// the last conditional branch.
if (Preds.count(PS) == 0) {
// Case 2.
LastCondBlock = Pred;
} else {
// Case 1
BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
if (BPS && BPS->isUnconditional()) {
// Case 1: PS(BB3) should be an unconditional branch.
LastCondBlock = Pred;
}
}
}
if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
return false;
TerminatorInst *TBB = LastCondBlock->getTerminator();
BasicBlock *PS1 = TBB->getSuccessor(0);
BasicBlock *PS2 = TBB->getSuccessor(1);
BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());
// If PS1 does not jump into PS2, but PS2 jumps into PS1,
// attempt branch inversion.
if (!PBI1 || !PBI1->isUnconditional() ||
(PS1->getTerminator()->getSuccessor(0) != PS2)) {
// Check whether PS2 jumps into PS1.
if (!PBI2 || !PBI2->isUnconditional() ||
(PS2->getTerminator()->getSuccessor(0) != PS1))
return false;
// Do branch inversion.
BasicBlock *CurrBlock = LastCondBlock;
bool EverChanged = false;
for (; CurrBlock != FirstCondBlock;
CurrBlock = CurrBlock->getSinglePredecessor()) {
BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
if (!CI)
continue;
CmpInst::Predicate Predicate = CI->getPredicate();
// Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
BI->swapSuccessors();
EverChanged = true;
}
}
return EverChanged;
}
// PS1 must have a conditional branch.
if (!PBI1 || !PBI1->isUnconditional())
return false;
// PS2 should not contain PHI node.
PHI = dyn_cast<PHINode>(PS2->begin());
if (PHI)
return false;
// Do the transformation.
BasicBlock *CB;
BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
bool Iteration = true;
IRBuilder<>::InsertPointGuard Guard(Builder);
Value *PC = PBI->getCondition();
do {
CB = PBI->getSuccessor(1 - Idx);
// Delete the conditional branch.
FirstCondBlock->getInstList().pop_back();
FirstCondBlock->getInstList()
.splice(FirstCondBlock->end(), CB->getInstList());
PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
Value *CC = PBI->getCondition();
// Merge conditions.
Builder.SetInsertPoint(PBI);
Value *NC;
if (Idx == 0)
// Case 2, use parallel or.
NC = Builder.CreateOr(PC, CC);
else
// Case 1, use parallel and.
NC = Builder.CreateAnd(PC, CC);
PBI->replaceUsesOfWith(CC, NC);
PC = NC;
if (CB == LastCondBlock)
Iteration = false;
// Remove internal conditional branches.
CB->dropAllReferences();
// make CB unreachable and let downstream to delete the block.
new UnreachableInst(CB->getContext(), CB);
} while (Iteration);
DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
return true;
}
/// Compare blocks from two if-regions, where \param Head1 is the entry of the
/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param
/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block
// in the 2nd if-region to compare. \returns true if \param Block1 and \param
/// Block2 have identical instructions and do not have memory reference alias
/// with \param Head2.
bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
BasicBlock *Block1,
BasicBlock *Block2) {
TerminatorInst *PTI2 = Head2->getTerminator();
Instruction *PBI2 = &Head2->front();
bool eq1 = (Block1 == Head1);
bool eq2 = (Block2 == Head2);
if (eq1 || eq2) {
// An empty then-path or else-path.
return (eq1 == eq2);
}
// Check whether instructions in Block1 and Block2 are identical
// and do not alias with instructions in Head2.
BasicBlock::iterator iter1 = Block1->begin();
BasicBlock::iterator end1 = Block1->getTerminator()->getIterator();
BasicBlock::iterator iter2 = Block2->begin();
BasicBlock::iterator end2 = Block2->getTerminator()->getIterator();
while (true) {
if (iter1 == end1) {
if (iter2 != end2)
return false;
break;
}
if (!iter1->isIdenticalTo(&*iter2))
return false;
// Illegal to remove instructions with side effects except
// non-volatile stores.
if (iter1->mayHaveSideEffects()) {
Instruction *CurI = &*iter1;
StoreInst *SI = dyn_cast<StoreInst>(CurI);
if (!SI || SI->isVolatile())
return false;
}
// For simplicity and speed, data dependency check can be
// avoided if read from memory doesn't exist.
if (iter1->mayReadFromMemory())
return false;
if (iter1->mayWriteToMemory()) {
for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
// Check alias with Head2.
if (!AA || AA->alias(&*iter1, &*BI))
return false;
}
}
}
++iter1;
++iter2;
}
return true;
}
/// Check whether \param BB is the merge block of a if-region. If yes, check
/// whether there exists an adjacent if-region upstream, the two if-regions
/// contain identical instructions and can be legally merged. \returns true if
/// the two if-regions are merged.
///
/// From:
/// if (a)
/// statement;
/// if (b)
/// statement;
///
/// To:
/// if (a || b)
/// statement;
bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
BasicBlock *IfTrue2, *IfFalse2;
Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
if (!CInst2)
return false;
BasicBlock *SecondEntryBlock = CInst2->getParent();
if (SecondEntryBlock->hasAddressTaken())
return false;
BasicBlock *IfTrue1, *IfFalse1;
Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
if (!CInst1)
return false;
BasicBlock *FirstEntryBlock = CInst1->getParent();
// Either then-path or else-path should be empty.
if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
return false;
if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
return false;
TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
Instruction *PBI2 = &SecondEntryBlock->front();
if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
IfTrue2))
return false;
if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1,
IfFalse2))
return false;
// Check whether \param SecondEntryBlock has side-effect and is safe to
// speculate.
for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
Instruction *CI = &*BI;
if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
!isSafeToSpeculativelyExecute(CI))
return false;
}
// Merge \param SecondEntryBlock into \param FirstEntryBlock.
FirstEntryBlock->getInstList().pop_back();
FirstEntryBlock->getInstList()
.splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
Value *CC = PBI->getCondition();
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
Builder.SetInsertPoint(PBI);
Value *NC = Builder.CreateOr(CInst1, CC);
PBI->replaceUsesOfWith(CC, NC);
Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
// Remove IfTrue1
if (IfTrue1 != FirstEntryBlock) {
IfTrue1->dropAllReferences();
IfTrue1->eraseFromParent();
}
// Remove IfFalse1
if (IfFalse1 != FirstEntryBlock) {
IfFalse1->dropAllReferences();
IfFalse1->eraseFromParent();
}
// Remove \param SecondEntryBlock
SecondEntryBlock->dropAllReferences();
SecondEntryBlock->eraseFromParent();
DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
return true;
}
bool FlattenCFGOpt::run(BasicBlock *BB) {
assert(BB && BB->getParent() && "Block not embedded in function!");
assert(BB->getTerminator() && "Degenerate basic block encountered!");
IRBuilder<> Builder(BB);
if (FlattenParallelAndOr(BB, Builder) || MergeIfRegion(BB, Builder))
return true;
return false;
}
/// FlattenCFG - This function is used to flatten a CFG. For
/// example, it uses parallel-and and parallel-or mode to collapse
/// if-conditions and merge if-regions with identical statements.
bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) {
return FlattenCFGOpt(AA).run(BB);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,267 @@
//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the FunctionImportGlobalProcessing class, used
// to perform the necessary global value handling for function importing.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/IR/InstIterator.h"
using namespace llvm;
/// Checks if we should import SGV as a definition, otherwise import as a
/// declaration.
bool FunctionImportGlobalProcessing::doImportAsDefinition(
const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) {
// Only import the globals requested for importing.
if (!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
return false;
assert(!isa<GlobalAlias>(SGV) &&
"Unexpected global alias in the import list.");
// Otherwise yes.
return true;
}
bool FunctionImportGlobalProcessing::doImportAsDefinition(
const GlobalValue *SGV) {
if (!isPerformingImport())
return false;
return FunctionImportGlobalProcessing::doImportAsDefinition(SGV,
GlobalsToImport);
}
bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
const GlobalValue *SGV) {
assert(SGV->hasLocalLinkage());
// Both the imported references and the original local variable must
// be promoted.
if (!isPerformingImport() && !isModuleExporting())
return false;
if (isPerformingImport()) {
assert((!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)) ||
!isNonRenamableLocal(*SGV)) &&
"Attempting to promote non-renamable local");
// We don't know for sure yet if we are importing this value (as either
// a reference or a def), since we are simply walking all values in the
// module. But by necessity if we end up importing it and it is local,
// it must be promoted, so unconditionally promote all values in the
// importing module.
return true;
}
// When exporting, consult the index. We can have more than one local
// with the same GUID, in the case of same-named locals in different but
// same-named source files that were compiled in their respective directories
// (so the source file name and resulting GUID is the same). Find the one
// in this module.
auto Summary = ImportIndex.findSummaryInModule(
SGV->getGUID(), SGV->getParent()->getModuleIdentifier());
assert(Summary && "Missing summary for global value when exporting");
auto Linkage = Summary->linkage();
if (!GlobalValue::isLocalLinkage(Linkage)) {
assert(!isNonRenamableLocal(*SGV) &&
"Attempting to promote non-renamable local");
return true;
}
return false;
}
#ifndef NDEBUG
bool FunctionImportGlobalProcessing::isNonRenamableLocal(
const GlobalValue &GV) const {
if (!GV.hasLocalLinkage())
return false;
// This needs to stay in sync with the logic in buildModuleSummaryIndex.
if (GV.hasSection())
return true;
if (Used.count(const_cast<GlobalValue *>(&GV)))
return true;
return false;
}
#endif
std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV,
bool DoPromote) {
// For locals that must be promoted to global scope, ensure that
// the promoted name uniquely identifies the copy in the original module,
// using the ID assigned during combined index creation. When importing,
// we rename all locals (not just those that are promoted) in order to
// avoid naming conflicts between locals imported from different modules.
if (SGV->hasLocalLinkage() && (DoPromote || isPerformingImport()))
return ModuleSummaryIndex::getGlobalNameForLocal(
SGV->getName(),
ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier()));
return SGV->getName();
}
GlobalValue::LinkageTypes
FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
bool DoPromote) {
// Any local variable that is referenced by an exported function needs
// to be promoted to global scope. Since we don't currently know which
// functions reference which local variables/functions, we must treat
// all as potentially exported if this module is exporting anything.
if (isModuleExporting()) {
if (SGV->hasLocalLinkage() && DoPromote)
return GlobalValue::ExternalLinkage;
return SGV->getLinkage();
}
// Otherwise, if we aren't importing, no linkage change is needed.
if (!isPerformingImport())
return SGV->getLinkage();
switch (SGV->getLinkage()) {
case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
case GlobalValue::ExternalLinkage:
// External and linkonce definitions are converted to available_externally
// definitions upon import, so that they are available for inlining
// and/or optimization, but are turned into declarations later
// during the EliminateAvailableExternally pass.
if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
return GlobalValue::AvailableExternallyLinkage;
// An imported external declaration stays external.
return SGV->getLinkage();
case GlobalValue::AvailableExternallyLinkage:
// An imported available_externally definition converts
// to external if imported as a declaration.
if (!doImportAsDefinition(SGV))
return GlobalValue::ExternalLinkage;
// An imported available_externally declaration stays that way.
return SGV->getLinkage();
case GlobalValue::WeakAnyLinkage:
// Can't import weak_any definitions correctly, or we might change the
// program semantics, since the linker will pick the first weak_any
// definition and importing would change the order they are seen by the
// linker. The module linking caller needs to enforce this.
assert(!doImportAsDefinition(SGV));
// If imported as a declaration, it becomes external_weak.
return SGV->getLinkage();
case GlobalValue::WeakODRLinkage:
// For weak_odr linkage, there is a guarantee that all copies will be
// equivalent, so the issue described above for weak_any does not exist,
// and the definition can be imported. It can be treated similarly
// to an imported externally visible global value.
if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
return GlobalValue::AvailableExternallyLinkage;
else
return GlobalValue::ExternalLinkage;
case GlobalValue::AppendingLinkage:
// It would be incorrect to import an appending linkage variable,
// since it would cause global constructors/destructors to be
// executed multiple times. This should have already been handled
// by linkIfNeeded, and we will assert in shouldLinkFromSource
// if we try to import, so we simply return AppendingLinkage.
return GlobalValue::AppendingLinkage;
case GlobalValue::InternalLinkage:
case GlobalValue::PrivateLinkage:
// If we are promoting the local to global scope, it is handled
// similarly to a normal externally visible global.
if (DoPromote) {
if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
return GlobalValue::AvailableExternallyLinkage;
else
return GlobalValue::ExternalLinkage;
}
// A non-promoted imported local definition stays local.
// The ThinLTO pass will eventually force-import their definitions.
return SGV->getLinkage();
case GlobalValue::ExternalWeakLinkage:
// External weak doesn't apply to definitions, must be a declaration.
assert(!doImportAsDefinition(SGV));
// Linkage stays external_weak.
return SGV->getLinkage();
case GlobalValue::CommonLinkage:
// Linkage stays common on definitions.
// The ThinLTO pass will eventually force-import their definitions.
return SGV->getLinkage();
}
llvm_unreachable("unknown linkage type");
}
void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
// Check the summaries to see if the symbol gets resolved to a known local
// definition.
if (GV.hasName()) {
ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID());
if (VI) {
// Need to check all summaries are local in case of hash collisions.
bool IsLocal = VI.getSummaryList().size() &&
llvm::all_of(VI.getSummaryList(),
[](const std::unique_ptr<GlobalValueSummary> &Summary) {
return Summary->isDSOLocal();
});
if (IsLocal)
GV.setDSOLocal(true);
}
}
bool DoPromote = false;
if (GV.hasLocalLinkage() &&
((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
// Once we change the name or linkage it is difficult to determine
// again whether we should promote since shouldPromoteLocalToGlobal needs
// to locate the summary (based on GUID from name and linkage). Therefore,
// use DoPromote result saved above.
GV.setName(getName(&GV, DoPromote));
GV.setLinkage(getLinkage(&GV, DoPromote));
if (!GV.hasLocalLinkage())
GV.setVisibility(GlobalValue::HiddenVisibility);
} else
GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
// Remove functions imported as available externally defs from comdats,
// as this is a declaration for the linker, and will be dropped eventually.
// It is illegal for comdats to contain declarations.
auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
// The IRMover should not have placed any imported declarations in
// a comdat, so the only declaration that should be in a comdat
// at this point would be a definition imported as available_externally.
assert(GO->hasAvailableExternallyLinkage() &&
"Expected comdat on definition (possibly available external)");
GO->setComdat(nullptr);
}
}
void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
for (GlobalVariable &GV : M.globals())
processGlobalForThinLTO(GV);
for (Function &SF : M)
processGlobalForThinLTO(SF);
for (GlobalAlias &GA : M.aliases())
processGlobalForThinLTO(GA);
}
bool FunctionImportGlobalProcessing::run() {
processGlobalsForThinLTO();
return false;
}
bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index,
SetVector<GlobalValue *> *GlobalsToImport) {
FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport);
return ThinLTOProcessing.run();
}

View File

@ -0,0 +1,196 @@
//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include <algorithm>
#include <cassert>
using namespace llvm;
/// Return the stronger of the two ordering. If the two orderings are acquire
/// and release, then return AcquireRelease.
///
static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
if ((X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release) ||
(Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release))
return AtomicOrdering::AcquireRelease;
return (AtomicOrdering)std::max((unsigned)X, (unsigned)Y);
}
/// It is safe to destroy a constant iff it is only used by constants itself.
/// Note that constants cannot be cyclic, so this test is pretty easy to
/// implement recursively.
///
bool llvm::isSafeToDestroyConstant(const Constant *C) {
if (isa<GlobalValue>(C))
return false;
if (isa<ConstantData>(C))
return false;
for (const User *U : C->users())
if (const Constant *CU = dyn_cast<Constant>(U)) {
if (!isSafeToDestroyConstant(CU))
return false;
} else
return false;
return true;
}
static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
SmallPtrSetImpl<const PHINode *> &PhiUsers) {
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
if (GV->isExternallyInitialized())
GS.StoredType = GlobalStatus::StoredOnce;
for (const Use &U : V->uses()) {
const User *UR = U.getUser();
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
GS.HasNonInstructionUser = true;
// If the result of the constantexpr isn't pointer type, then we won't
// know to expect it in various places. Just reject early.
if (!isa<PointerType>(CE->getType()))
return true;
if (analyzeGlobalAux(CE, GS, PhiUsers))
return true;
} else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
if (!GS.HasMultipleAccessingFunctions) {
const Function *F = I->getParent()->getParent();
if (!GS.AccessingFunction)
GS.AccessingFunction = F;
else if (GS.AccessingFunction != F)
GS.HasMultipleAccessingFunctions = true;
}
if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
GS.IsLoaded = true;
// Don't hack on volatile loads.
if (LI->isVolatile())
return true;
GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering());
} else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
// Don't allow a store OF the address, only stores TO the address.
if (SI->getOperand(0) == V)
return true;
// Don't hack on volatile stores.
if (SI->isVolatile())
return true;
GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
// If this is a direct store to the global (i.e., the global is a scalar
// value, not an aggregate), keep more specific information about
// stores.
if (GS.StoredType != GlobalStatus::Stored) {
if (const GlobalVariable *GV =
dyn_cast<GlobalVariable>(SI->getOperand(1))) {
Value *StoredVal = SI->getOperand(0);
if (Constant *C = dyn_cast<Constant>(StoredVal)) {
if (C->isThreadDependent()) {
// The stored value changes between threads; don't track it.
return true;
}
}
if (GV->hasInitializer() && StoredVal == GV->getInitializer()) {
if (GS.StoredType < GlobalStatus::InitializerStored)
GS.StoredType = GlobalStatus::InitializerStored;
} else if (isa<LoadInst>(StoredVal) &&
cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
if (GS.StoredType < GlobalStatus::InitializerStored)
GS.StoredType = GlobalStatus::InitializerStored;
} else if (GS.StoredType < GlobalStatus::StoredOnce) {
GS.StoredType = GlobalStatus::StoredOnce;
GS.StoredOnceValue = StoredVal;
} else if (GS.StoredType == GlobalStatus::StoredOnce &&
GS.StoredOnceValue == StoredVal) {
// noop.
} else {
GS.StoredType = GlobalStatus::Stored;
}
} else {
GS.StoredType = GlobalStatus::Stored;
}
}
} else if (isa<BitCastInst>(I)) {
if (analyzeGlobalAux(I, GS, PhiUsers))
return true;
} else if (isa<GetElementPtrInst>(I)) {
if (analyzeGlobalAux(I, GS, PhiUsers))
return true;
} else if (isa<SelectInst>(I)) {
if (analyzeGlobalAux(I, GS, PhiUsers))
return true;
} else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
// PHI nodes we can check just like select or GEP instructions, but we
// have to be careful about infinite recursion.
if (PhiUsers.insert(PN).second) // Not already visited.
if (analyzeGlobalAux(I, GS, PhiUsers))
return true;
} else if (isa<CmpInst>(I)) {
GS.IsCompared = true;
} else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
if (MTI->isVolatile())
return true;
if (MTI->getArgOperand(0) == V)
GS.StoredType = GlobalStatus::Stored;
if (MTI->getArgOperand(1) == V)
GS.IsLoaded = true;
} else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
if (MSI->isVolatile())
return true;
GS.StoredType = GlobalStatus::Stored;
} else if (auto C = ImmutableCallSite(I)) {
if (!C.isCallee(&U))
return true;
GS.IsLoaded = true;
} else {
return true; // Any other non-load instruction might take address!
}
} else if (const Constant *C = dyn_cast<Constant>(UR)) {
GS.HasNonInstructionUser = true;
// We might have a dead and dangling constant hanging off of here.
if (!isSafeToDestroyConstant(C))
return true;
} else {
GS.HasNonInstructionUser = true;
// Otherwise must be some other user.
return true;
}
}
return false;
}
GlobalStatus::GlobalStatus() = default;
bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
SmallPtrSet<const PHINode *, 16> PhiUsers;
return analyzeGlobalAux(V, GS, PhiUsers);
}

Some files were not shown because too many files have changed in this diff Show More