Imported Upstream version 6.10.0.49

Former-commit-id: 1d6753294b2993e1fbf92de9366bb9544db4189b
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2020-01-16 16:38:04 +00:00
parent d94e79959b
commit 468663ddbb
48518 changed files with 2789335 additions and 61176 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,163 @@
//===--------- PolyhedralInfo.cpp - Create Scops from LLVM IR-------------===//
///
/// The LLVM Compiler Infrastructure
///
/// This file is distributed under the University of Illinois Open Source
/// License. See LICENSE.TXT for details.
///
//===----------------------------------------------------------------------===//
///
/// An interface to the Polyhedral analysis engine(Polly) of LLVM.
///
/// This pass provides an interface to the polyhedral analysis performed by
/// Polly.
///
/// This interface provides basic interface like isParallel, isVectorizable
/// that can be used in LLVM transformation passes.
///
/// Work in progress, this file is subject to change.
//===----------------------------------------------------------------------===//
#include "polly/PolyhedralInfo.h"
#include "polly/DependenceInfo.h"
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
#include "polly/ScopInfo.h"
#include "polly/Support/GICHelper.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Support/Debug.h"
#include <isl/map.h>
#include <isl/union_map.h>
using namespace llvm;
using namespace polly;
#define DEBUG_TYPE "polyhedral-info"
static cl::opt<bool> CheckParallel("polly-check-parallel",
cl::desc("Check for parallel loops"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
static cl::opt<bool> CheckVectorizable("polly-check-vectorizable",
cl::desc("Check for vectorizable loops"),
cl::Hidden, cl::init(false),
cl::ZeroOrMore, cl::cat(PollyCategory));
void PolyhedralInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<DependenceInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequiredTransitive<ScopInfoWrapperPass>();
AU.setPreservesAll();
}
bool PolyhedralInfo::runOnFunction(Function &F) {
DI = &getAnalysis<DependenceInfoWrapperPass>();
SI = getAnalysis<ScopInfoWrapperPass>().getSI();
return false;
}
void PolyhedralInfo::print(raw_ostream &OS, const Module *) const {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
for (auto *TopLevelLoop : LI) {
for (auto *L : depth_first(TopLevelLoop)) {
OS.indent(2) << L->getHeader()->getName() << ":\t";
if (CheckParallel && isParallel(L))
OS << "Loop is parallel.\n";
else if (CheckParallel)
OS << "Loop is not parallel.\n";
}
}
}
bool PolyhedralInfo::checkParallel(Loop *L, isl_pw_aff **MinDepDistPtr) const {
bool IsParallel;
const Scop *S = getScopContainingLoop(L);
if (!S)
return false;
const Dependences &D =
DI->getDependences(const_cast<Scop *>(S), Dependences::AL_Access);
if (!D.hasValidDependences())
return false;
DEBUG(dbgs() << "Loop :\t" << L->getHeader()->getName() << ":\n");
isl_union_map *Deps =
D.getDependences(Dependences::TYPE_RAW | Dependences::TYPE_WAW |
Dependences::TYPE_WAR | Dependences::TYPE_RED);
DEBUG(dbgs() << "Dependences :\t" << stringFromIslObj(Deps) << "\n");
isl_union_map *Schedule = getScheduleForLoop(S, L);
DEBUG(dbgs() << "Schedule: \t" << stringFromIslObj(Schedule) << "\n");
IsParallel = D.isParallel(Schedule, Deps, MinDepDistPtr);
isl_union_map_free(Schedule);
return IsParallel;
}
bool PolyhedralInfo::isParallel(Loop *L) const { return checkParallel(L); }
const Scop *PolyhedralInfo::getScopContainingLoop(Loop *L) const {
assert((SI) && "ScopInfoWrapperPass is required by PolyhedralInfo pass!\n");
for (auto &It : *SI) {
Region *R = It.first;
if (R->contains(L))
return It.second.get();
}
return nullptr;
}
// Given a Loop and the containing SCoP, we compute the partial schedule
// by taking union of individual schedules of each ScopStmt within the loop
// and projecting out the inner dimensions from the range of the schedule.
// for (i = 0; i < n; i++)
// for (j = 0; j < n; j++)
// A[j] = 1; //Stmt
//
// The original schedule will be
// Stmt[i0, i1] -> [i0, i1]
// The schedule for the outer loop will be
// Stmt[i0, i1] -> [i0]
// The schedule for the inner loop will be
// Stmt[i0, i1] -> [i0, i1]
__isl_give isl_union_map *PolyhedralInfo::getScheduleForLoop(const Scop *S,
Loop *L) const {
isl_union_map *Schedule = isl_union_map_empty(S->getParamSpace().release());
int CurrDim = S->getRelativeLoopDepth(L);
DEBUG(dbgs() << "Relative loop depth:\t" << CurrDim << "\n");
assert(CurrDim >= 0 && "Loop in region should have at least depth one");
for (auto &SS : *S) {
if (L->contains(SS.getSurroundingLoop())) {
unsigned int MaxDim = SS.getNumIterators();
DEBUG(dbgs() << "Maximum depth of Stmt:\t" << MaxDim << "\n");
isl_map *ScheduleMap = SS.getSchedule().release();
assert(
ScheduleMap &&
"Schedules that contain extension nodes require special handling.");
ScheduleMap = isl_map_project_out(ScheduleMap, isl_dim_out, CurrDim + 1,
MaxDim - CurrDim - 1);
ScheduleMap = isl_map_set_tuple_id(ScheduleMap, isl_dim_in,
SS.getDomainId().release());
Schedule =
isl_union_map_union(Schedule, isl_union_map_from_map(ScheduleMap));
}
}
Schedule = isl_union_map_coalesce(Schedule);
return Schedule;
}
char PolyhedralInfo::ID = 0;
Pass *polly::createPolyhedralInfoPass() { return new PolyhedralInfo(); }
INITIALIZE_PASS_BEGIN(PolyhedralInfo, "polyhedral-info",
"Polly - Interface to polyhedral analysis engine", false,
false);
INITIALIZE_PASS_DEPENDENCY(DependenceInfoWrapperPass);
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass);
INITIALIZE_PASS_END(PolyhedralInfo, "polyhedral-info",
"Polly - Interface to polyhedral analysis engine", false,
false)

View File

@@ -0,0 +1,105 @@
//===- PruneUnprofitable.cpp ----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Mark a SCoP as unfeasible if not deemed profitable to optimize.
//
//===----------------------------------------------------------------------===//
#include "polly/PruneUnprofitable.h"
#include "polly/ScopDetection.h"
#include "polly/ScopInfo.h"
#include "polly/ScopPass.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace polly;
#define DEBUG_TYPE "polly-prune-unprofitable"
namespace {
STATISTIC(ScopsProcessed,
"Number of SCoPs considered for unprofitability pruning");
STATISTIC(ScopsPruned, "Number of pruned SCoPs because it they cannot be "
"optimized in a significant way");
STATISTIC(ScopsSurvived, "Number of SCoPs after pruning");
STATISTIC(NumPrunedLoops, "Number of pruned loops");
STATISTIC(NumPrunedBoxedLoops, "Number of pruned boxed loops");
STATISTIC(NumPrunedAffineLoops, "Number of pruned affine loops");
STATISTIC(NumLoopsInScop, "Number of loops in scops after pruning");
STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after pruning");
STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after pruning");
class PruneUnprofitable : public ScopPass {
private:
void updateStatistics(Scop &S, bool Pruned) {
auto ScopStats = S.getStatistics();
if (Pruned) {
ScopsPruned++;
NumPrunedLoops += ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops;
NumPrunedBoxedLoops += ScopStats.NumBoxedLoops;
NumPrunedAffineLoops += ScopStats.NumAffineLoops;
} else {
ScopsSurvived++;
NumLoopsInScop += ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops;
NumBoxedLoops += ScopStats.NumBoxedLoops;
NumAffineLoops += ScopStats.NumAffineLoops;
}
}
public:
static char ID;
explicit PruneUnprofitable() : ScopPass(ID) {}
PruneUnprofitable(const PruneUnprofitable &) = delete;
PruneUnprofitable &operator=(const PruneUnprofitable &) = delete;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<ScopInfoRegionPass>();
AU.setPreservesAll();
}
bool runOnScop(Scop &S) override {
if (PollyProcessUnprofitable) {
DEBUG(dbgs() << "NOTE: -polly-process-unprofitable active, won't prune "
"anything\n");
return false;
}
ScopsProcessed++;
if (!S.isProfitable(true)) {
DEBUG(dbgs() << "SCoP pruned because it probably cannot be optimized in "
"a significant way\n");
S.invalidate(PROFITABLE, DebugLoc());
updateStatistics(S, true);
} else {
updateStatistics(S, false);
}
return false;
}
};
} // namespace
char PruneUnprofitable::ID;
Pass *polly::createPruneUnprofitablePass() { return new PruneUnprofitable(); }
INITIALIZE_PASS_BEGIN(PruneUnprofitable, "polly-prune-unprofitable",
"Polly - Prune unprofitable SCoPs", false, false)
INITIALIZE_PASS_END(PruneUnprofitable, "polly-prune-unprofitable",
"Polly - Prune unprofitable SCoPs", false, false)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,266 @@
//===- GraphPrinter.cpp - Create a DOT output describing the Scop. --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Create a DOT output describing the Scop.
//
// For each function a dot file is created that shows the control flow graph of
// the function and highlights the detected Scops.
//
//===----------------------------------------------------------------------===//
#include "polly/LinkAllPasses.h"
#include "polly/ScopDetection.h"
#include "polly/Support/ScopLocation.h"
#include "llvm/Analysis/DOTGraphTraitsPass.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Support/CommandLine.h"
using namespace polly;
using namespace llvm;
static cl::opt<std::string>
ViewFilter("polly-view-only",
cl::desc("Only view functions that match this pattern"),
cl::Hidden, cl::init(""), cl::ZeroOrMore);
static cl::opt<bool> ViewAll("polly-view-all",
cl::desc("Also show functions without any scops"),
cl::Hidden, cl::init(false), cl::ZeroOrMore);
namespace llvm {
template <>
struct GraphTraits<ScopDetection *> : public GraphTraits<RegionInfo *> {
static NodeRef getEntryNode(ScopDetection *SD) {
return GraphTraits<RegionInfo *>::getEntryNode(SD->getRI());
}
static nodes_iterator nodes_begin(ScopDetection *SD) {
return nodes_iterator::begin(getEntryNode(SD));
}
static nodes_iterator nodes_end(ScopDetection *SD) {
return nodes_iterator::end(getEntryNode(SD));
}
};
template <>
struct GraphTraits<ScopDetectionWrapperPass *>
: public GraphTraits<ScopDetection *> {
static NodeRef getEntryNode(ScopDetectionWrapperPass *P) {
return GraphTraits<ScopDetection *>::getEntryNode(&P->getSD());
}
static nodes_iterator nodes_begin(ScopDetectionWrapperPass *P) {
return nodes_iterator::begin(getEntryNode(P));
}
static nodes_iterator nodes_end(ScopDetectionWrapperPass *P) {
return nodes_iterator::end(getEntryNode(P));
}
};
template <> struct DOTGraphTraits<RegionNode *> : public DefaultDOTGraphTraits {
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
if (!Node->isSubRegion()) {
BasicBlock *BB = Node->getNodeAs<BasicBlock>();
if (isSimple())
return DOTGraphTraits<const Function *>::getSimpleNodeLabel(
BB, BB->getParent());
else
return DOTGraphTraits<const Function *>::getCompleteNodeLabel(
BB, BB->getParent());
}
return "Not implemented";
}
};
template <>
struct DOTGraphTraits<ScopDetectionWrapperPass *>
: public DOTGraphTraits<RegionNode *> {
DOTGraphTraits(bool isSimple = false)
: DOTGraphTraits<RegionNode *>(isSimple) {}
static std::string getGraphName(ScopDetectionWrapperPass *SD) {
return "Scop Graph";
}
std::string getEdgeAttributes(RegionNode *srcNode,
GraphTraits<RegionInfo *>::ChildIteratorType CI,
ScopDetectionWrapperPass *P) {
RegionNode *destNode = *CI;
auto *SD = &P->getSD();
if (srcNode->isSubRegion() || destNode->isSubRegion())
return "";
// In case of a backedge, do not use it to define the layout of the nodes.
BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
RegionInfo *RI = SD->getRI();
Region *R = RI->getRegionFor(destBB);
while (R && R->getParent())
if (R->getParent()->getEntry() == destBB)
R = R->getParent();
else
break;
if (R && R->getEntry() == destBB && R->contains(srcBB))
return "constraint=false";
return "";
}
std::string getNodeLabel(RegionNode *Node, ScopDetectionWrapperPass *P) {
return DOTGraphTraits<RegionNode *>::getNodeLabel(
Node, reinterpret_cast<RegionNode *>(
P->getSD().getRI()->getTopLevelRegion()));
}
static std::string escapeString(std::string String) {
std::string Escaped;
for (const auto &C : String) {
if (C == '"')
Escaped += '\\';
Escaped += C;
}
return Escaped;
}
// Print the cluster of the subregions. This groups the single basic blocks
// and adds a different background color for each group.
static void printRegionCluster(const ScopDetection *SD, const Region *R,
raw_ostream &O, unsigned depth = 0) {
O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void *>(R)
<< " {\n";
unsigned LineBegin, LineEnd;
std::string FileName;
getDebugLocation(R, LineBegin, LineEnd, FileName);
std::string Location;
if (LineBegin != (unsigned)-1) {
Location = escapeString(FileName + ":" + std::to_string(LineBegin) + "-" +
std::to_string(LineEnd) + "\n");
}
std::string ErrorMessage = SD->regionIsInvalidBecause(R);
ErrorMessage = escapeString(ErrorMessage);
O.indent(2 * (depth + 1))
<< "label = \"" << Location << ErrorMessage << "\";\n";
if (SD->isMaxRegionInScop(*R)) {
O.indent(2 * (depth + 1)) << "style = filled;\n";
// Set color to green.
O.indent(2 * (depth + 1)) << "color = 3";
} else {
O.indent(2 * (depth + 1)) << "style = solid;\n";
int color = (R->getDepth() * 2 % 12) + 1;
// We do not want green again.
if (color == 3)
color = 6;
O.indent(2 * (depth + 1)) << "color = " << color << "\n";
}
for (const auto &SubRegion : *R)
printRegionCluster(SD, SubRegion.get(), O, depth + 1);
RegionInfo *RI = R->getRegionInfo();
for (const auto &BB : R->blocks())
if (RI->getRegionFor(BB) == R)
O.indent(2 * (depth + 1))
<< "Node"
<< static_cast<void *>(RI->getTopLevelRegion()->getBBNode(BB))
<< ";\n";
O.indent(2 * depth) << "}\n";
}
static void
addCustomGraphFeatures(const ScopDetectionWrapperPass *SD,
GraphWriter<ScopDetectionWrapperPass *> &GW) {
raw_ostream &O = GW.getOStream();
O << "\tcolorscheme = \"paired12\"\n";
printRegionCluster(&SD->getSD(), SD->getSD().getRI()->getTopLevelRegion(),
O, 4);
}
};
} // end namespace llvm
struct ScopViewer
: public DOTGraphTraitsViewer<ScopDetectionWrapperPass, false> {
static char ID;
ScopViewer()
: DOTGraphTraitsViewer<ScopDetectionWrapperPass, false>("scops", ID) {}
bool processFunction(Function &F, ScopDetectionWrapperPass &SD) override {
if (ViewFilter != "" && !F.getName().count(ViewFilter))
return false;
if (ViewAll)
return true;
// Check that at least one scop was detected.
return std::distance(SD.getSD().begin(), SD.getSD().end()) > 0;
}
};
char ScopViewer::ID = 0;
struct ScopOnlyViewer
: public DOTGraphTraitsViewer<ScopDetectionWrapperPass, true> {
static char ID;
ScopOnlyViewer()
: DOTGraphTraitsViewer<ScopDetectionWrapperPass, true>("scopsonly", ID) {}
};
char ScopOnlyViewer::ID = 0;
struct ScopPrinter
: public DOTGraphTraitsPrinter<ScopDetectionWrapperPass, false> {
static char ID;
ScopPrinter()
: DOTGraphTraitsPrinter<ScopDetectionWrapperPass, false>("scops", ID) {}
};
char ScopPrinter::ID = 0;
struct ScopOnlyPrinter
: public DOTGraphTraitsPrinter<ScopDetectionWrapperPass, true> {
static char ID;
ScopOnlyPrinter()
: DOTGraphTraitsPrinter<ScopDetectionWrapperPass, true>("scopsonly", ID) {
}
};
char ScopOnlyPrinter::ID = 0;
static RegisterPass<ScopViewer> X("view-scops",
"Polly - View Scops of function");
static RegisterPass<ScopOnlyViewer>
Y("view-scops-only",
"Polly - View Scops of function (with no function bodies)");
static RegisterPass<ScopPrinter> M("dot-scops",
"Polly - Print Scops of function");
static RegisterPass<ScopOnlyPrinter>
N("dot-scops-only",
"Polly - Print Scops of function (with no function bodies)");
Pass *polly::createDOTViewerPass() { return new ScopViewer(); }
Pass *polly::createDOTOnlyViewerPass() { return new ScopOnlyViewer(); }
Pass *polly::createDOTPrinterPass() { return new ScopPrinter(); }
Pass *polly::createDOTOnlyPrinterPass() { return new ScopOnlyPrinter(); }

View File

@@ -0,0 +1 @@
d2bc71fb6d527df34b7d2041bf88e426996a13c6

View File

@@ -0,0 +1,168 @@
//===- ScopPass.cpp - The base class of Passes that operate on Polly IR ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the definitions of the ScopPass members.
//
//===----------------------------------------------------------------------===//
#include "polly/ScopPass.h"
#include "polly/ScopInfo.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
using namespace llvm;
using namespace polly;
bool ScopPass::runOnRegion(Region *R, RGPassManager &RGM) {
S = nullptr;
if (skipRegion(*R))
return false;
if ((S = getAnalysis<ScopInfoRegionPass>().getScop()))
return runOnScop(*S);
return false;
}
void ScopPass::print(raw_ostream &OS, const Module *M) const {
if (S)
printScop(OS, *S);
}
void ScopPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<ScopInfoRegionPass>();
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<ScopDetectionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<SCEVAAWrapperPass>();
AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
AU.addPreserved<RegionInfoPass>();
AU.addPreserved<ScopInfoRegionPass>();
AU.addPreserved<TargetTransformInfoWrapperPass>();
}
namespace polly {
template class OwningInnerAnalysisManagerProxy<ScopAnalysisManager, Function>;
}
namespace llvm {
template class PassManager<Scop, ScopAnalysisManager,
ScopStandardAnalysisResults &, SPMUpdater &>;
template class InnerAnalysisManagerProxy<ScopAnalysisManager, Function>;
template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Scop,
ScopStandardAnalysisResults &>;
template <>
PreservedAnalyses
PassManager<Scop, ScopAnalysisManager, ScopStandardAnalysisResults &,
SPMUpdater &>::run(Scop &S, ScopAnalysisManager &AM,
ScopStandardAnalysisResults &AR, SPMUpdater &U) {
auto PA = PreservedAnalyses::all();
for (auto &Pass : Passes) {
auto PassPA = Pass->run(S, AM, AR, U);
AM.invalidate(S, PassPA);
PA.intersect(std::move(PassPA));
}
// All analyses for 'this' Scop have been invalidated above.
// If ScopPasses affect break other scops they have to propagate this
// information through the updater
PA.preserveSet<AllAnalysesOn<Scop>>();
return PA;
}
bool ScopAnalysisManagerFunctionProxy::Result::invalidate(
Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv) {
// First, check whether our ScopInfo is about to be invalidated
auto PAC = PA.getChecker<ScopAnalysisManagerFunctionProxy>();
if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
Inv.invalidate<ScopInfoAnalysis>(F, PA) ||
Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
Inv.invalidate<LoopAnalysis>(F, PA) ||
Inv.invalidate<DominatorTreeAnalysis>(F, PA)) {
// As everything depends on ScopInfo, we must drop all existing results
for (auto &S : *SI)
if (auto *scop = S.second.get())
if (InnerAM)
InnerAM->clear(*scop, scop->getName());
InnerAM = nullptr;
return true; // Invalidate the proxy result as well.
}
bool allPreserved = PA.allAnalysesInSetPreserved<AllAnalysesOn<Scop>>();
// Invalidate all non-preserved analyses
// Even if all analyses were preserved, we still need to run deferred
// invalidation
for (auto &S : *SI) {
Optional<PreservedAnalyses> InnerPA;
auto *scop = S.second.get();
if (!scop)
continue;
if (auto *OuterProxy =
InnerAM->getCachedResult<FunctionAnalysisManagerScopProxy>(*scop)) {
for (const auto &InvPair : OuterProxy->getOuterInvalidations()) {
auto *OuterAnalysisID = InvPair.first;
const auto &InnerAnalysisIDs = InvPair.second;
if (Inv.invalidate(OuterAnalysisID, F, PA)) {
if (!InnerPA)
InnerPA = PA;
for (auto *InnerAnalysisID : InnerAnalysisIDs)
InnerPA->abandon(InnerAnalysisID);
}
}
if (InnerPA) {
InnerAM->invalidate(*scop, *InnerPA);
continue;
}
}
if (!allPreserved)
InnerAM->invalidate(*scop, PA);
}
return false; // This proxy is still valid
}
template <>
ScopAnalysisManagerFunctionProxy::Result
ScopAnalysisManagerFunctionProxy::run(Function &F,
FunctionAnalysisManager &FAM) {
return Result(*InnerAM, FAM.getResult<ScopInfoAnalysis>(F));
}
} // namespace llvm
namespace polly {
template <>
OwningScopAnalysisManagerFunctionProxy::Result
OwningScopAnalysisManagerFunctionProxy::run(Function &F,
FunctionAnalysisManager &FAM) {
return Result(InnerAM, FAM.getResult<ScopInfoAnalysis>(F));
}
} // namespace polly

View File

@@ -0,0 +1,163 @@
set(LLVM_NO_RTTI 1)
set(ISL_CODEGEN_FILES
CodeGen/IslAst.cpp
CodeGen/IslExprBuilder.cpp
CodeGen/IslNodeBuilder.cpp
CodeGen/CodeGeneration.cpp)
if (GPU_CODEGEN)
set (GPGPU_CODEGEN_FILES
CodeGen/PPCGCodeGeneration.cpp
CodeGen/ManagedMemoryRewrite.cpp
)
endif (GPU_CODEGEN)
# Compile ISL into a separate library.
add_subdirectory(External)
set(POLLY_HEADER_FILES)
if (MSVC_IDE OR XCODE)
file(GLOB_RECURSE POLLY_HEADER_FILES "${POLLY_SOURCE_DIR}/include/polly/*.h")
endif ()
# Use an object-library to add the same files to multiple libs without requiring
# the sources them to be recompiled for each of them.
add_library(PollyCore OBJECT
Analysis/DependenceInfo.cpp
Analysis/PolyhedralInfo.cpp
Analysis/ScopDetection.cpp
Analysis/ScopDetectionDiagnostic.cpp
Analysis/ScopInfo.cpp
Analysis/ScopBuilder.cpp
Analysis/ScopGraphPrinter.cpp
Analysis/ScopPass.cpp
Analysis/PruneUnprofitable.cpp
CodeGen/BlockGenerators.cpp
${ISL_CODEGEN_FILES}
CodeGen/LoopGenerators.cpp
CodeGen/IRBuilder.cpp
CodeGen/Utils.cpp
CodeGen/RuntimeDebugBuilder.cpp
CodeGen/CodegenCleanup.cpp
CodeGen/PerfMonitor.cpp
${GPGPU_CODEGEN_FILES}
Exchange/JSONExporter.cpp
Support/GICHelper.cpp
Support/SCEVAffinator.cpp
Support/SCEVValidator.cpp
Support/RegisterPasses.cpp
Support/ScopHelper.cpp
Support/ScopLocation.cpp
Support/ISLTools.cpp
Support/DumpModulePass.cpp
Support/VirtualInstruction.cpp
${POLLY_JSON_FILES}
Transform/Canonicalization.cpp
Transform/CodePreparation.cpp
Transform/DeadCodeElimination.cpp
Transform/ScheduleOptimizer.cpp
Transform/FlattenSchedule.cpp
Transform/FlattenAlgo.cpp
Transform/ForwardOpTree.cpp
Transform/DeLICM.cpp
Transform/ZoneAlgo.cpp
Transform/Simplify.cpp
Transform/MaximalStaticExpansion.cpp
Transform/RewriteByReferenceParameters.cpp
Transform/ScopInliner.cpp
${POLLY_HEADER_FILES}
)
set_target_properties(PollyCore PROPERTIES FOLDER "Polly")
# Create the library that can be linked into LLVM's tools and Polly's unittests.
# It depends on all library it needs, such that with
# LLVM_POLLY_LINK_INTO_TOOLS=ON, its dependencies like PollyISL are linked as
# well.
add_polly_library(Polly $<TARGET_OBJECTS:PollyCore>)
target_link_libraries(Polly
${ISL_TARGET}
${JSONCPP_LIBRARIES}
)
# Additional dependencies for Polly-ACC.
if (GPU_CODEGEN)
target_link_libraries(Polly PollyPPCG)
endif ()
# Polly-ACC requires the NVPTX backend to work. Ask LLVM about its libraries.
set(nvptx_libs)
if (GPU_CODEGEN)
# This call emits an error if they NVPTX backend is not enable.
llvm_map_components_to_libnames(nvptx_libs NVPTX)
endif ()
if (LLVM_LINK_LLVM_DYLIB)
# The shlib/dylib contains all the LLVM components
# (including NVPTX is enabled) already. Adding them to target_link_libraries
# would cause them being twice in the address space
# (their LLVM*.a/so and their copies in libLLVM.so)
# which results in errors when the two instances try to register the same
# command-line switches.
target_link_libraries(Polly LLVM)
else ()
target_link_libraries(Polly
LLVMSupport
LLVMCore
LLVMScalarOpts
LLVMInstCombine
LLVMTransformUtils
LLVMAnalysis
LLVMipo
LLVMMC
LLVMPasses
LLVMLinker
LLVMIRReader
${nvptx_libs}
# The libraries below are required for darwin: http://PR26392
LLVMBitReader
LLVMMCParser
LLVMObject
LLVMProfileData
LLVMTarget
LLVMVectorize
)
endif ()
# Create a loadable module Polly.so that can be loaded using
# LLVM's/clang's "-load" option.
if (MSVC)
# Add dummy target, because loadable modules are not supported on Windows
add_custom_target(LLVMPolly)
set_target_properties(LLVMPolly PROPERTIES FOLDER "Polly")
else ()
add_polly_loadable_module(LLVMPolly
Polly.cpp
$<TARGET_OBJECTS:PollyCore>
)
# Only add the dependencies that are not part of LLVM. The latter are assumed
# to be already available in the address space the module is loaded into.
# Adding them once more would have the effect that both copies try to register
# the same command line options, to which LLVM reacts with an error.
# If Polly-ACC is enabled, the NVPTX target is also expected to reside in the
# hosts. This is not the case for bugpoint. Use LLVM_POLLY_LINK_INTO_TOOLS=ON
# instead which will automatically resolve the additional dependencies by
# Polly.
target_link_libraries(LLVMPolly ${ISL_TARGET} ${JSONCPP_LIBRARIES})
if (GPU_CODEGEN)
target_link_libraries(LLVMPolly PollyPPCG)
endif ()
set_target_properties(LLVMPolly
PROPERTIES
LINKER_LANGUAGE CXX
PREFIX "")
endif ()
if (TARGET intrinsics_gen)
# Check if we are building as part of an LLVM build
add_dependencies(PollyCore intrinsics_gen)
endif()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,400 @@
//===- CodeGeneration.cpp - Code generate the Scops using ISL. ---------======//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The CodeGeneration pass takes a Scop created by ScopInfo and translates it
// back to LLVM-IR using the ISL code generator.
//
// The Scop describes the high level memory behavior of a control flow region.
// Transformation passes can update the schedule (execution order) of statements
// in the Scop. ISL is used to generate an abstract syntax tree that reflects
// the updated execution order. This clast is used to create new LLVM-IR that is
// computationally equivalent to the original control flow region, but executes
// its code in the new execution order defined by the changed schedule.
//
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/CodeGeneration.h"
#include "polly/CodeGen/IRBuilder.h"
#include "polly/CodeGen/IslAst.h"
#include "polly/CodeGen/IslNodeBuilder.h"
#include "polly/CodeGen/PerfMonitor.h"
#include "polly/CodeGen/Utils.h"
#include "polly/DependenceInfo.h"
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
#include "polly/ScopDetectionDiagnostic.h"
#include "polly/ScopInfo.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "isl/ast.h"
#include <cassert>
#include <utility>
using namespace llvm;
using namespace polly;
#define DEBUG_TYPE "polly-codegen"
static cl::opt<bool> Verify("polly-codegen-verify",
cl::desc("Verify the function generated by Polly"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
bool polly::PerfMonitoring;
static cl::opt<bool, true>
XPerfMonitoring("polly-codegen-perf-monitoring",
cl::desc("Add run-time performance monitoring"), cl::Hidden,
cl::location(polly::PerfMonitoring), cl::init(false),
cl::ZeroOrMore, cl::cat(PollyCategory));
STATISTIC(ScopsProcessed, "Number of SCoP processed");
STATISTIC(CodegenedScops, "Number of successfully generated SCoPs");
STATISTIC(CodegenedAffineLoops,
"Number of original affine loops in SCoPs that have been generated");
STATISTIC(CodegenedBoxedLoops,
"Number of original boxed loops in SCoPs that have been generated");
namespace polly {
/// Mark a basic block unreachable.
///
/// Marks the basic block @p Block unreachable by equipping it with an
/// UnreachableInst.
void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) {
auto *OrigTerminator = Block.getTerminator();
Builder.SetInsertPoint(OrigTerminator);
Builder.CreateUnreachable();
OrigTerminator->eraseFromParent();
}
} // namespace polly
static void verifyGeneratedFunction(Scop &S, Function &F, IslAstInfo &AI) {
if (!Verify || !verifyFunction(F, &errs()))
return;
DEBUG({
errs() << "== ISL Codegen created an invalid function ==\n\n== The "
"SCoP ==\n";
errs() << S;
errs() << "\n== The isl AST ==\n";
AI.print(errs());
errs() << "\n== The invalid function ==\n";
F.print(errs());
});
llvm_unreachable("Polly generated function could not be verified. Add "
"-polly-codegen-verify=false to disable this assertion.");
}
// CodeGeneration adds a lot of BBs without updating the RegionInfo
// We make all created BBs belong to the scop's parent region without any
// nested structure to keep the RegionInfo verifier happy.
static void fixRegionInfo(Function &F, Region &ParentRegion, RegionInfo &RI) {
for (BasicBlock &BB : F) {
if (RI.getRegionFor(&BB))
continue;
RI.setRegionFor(&BB, &ParentRegion);
}
}
/// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from
/// @R.
///
/// CodeGeneration does not copy lifetime markers into the optimized SCoP,
/// which would leave the them only in the original path. This can transform
/// code such as
///
/// llvm.lifetime.start(%p)
/// llvm.lifetime.end(%p)
///
/// into
///
/// if (RTC) {
/// // generated code
/// } else {
/// // original code
/// llvm.lifetime.start(%p)
/// }
/// llvm.lifetime.end(%p)
///
/// The current StackColoring algorithm cannot handle if some, but not all,
/// paths from the end marker to the entry block cross the start marker. Same
/// for start markers that do not always cross the end markers. We avoid any
/// issues by removing all lifetime markers, even from the original code.
///
/// A better solution could be to hoist all llvm.lifetime.start to the split
/// node and all llvm.lifetime.end to the merge node, which should be
/// conservatively correct.
static void removeLifetimeMarkers(Region *R) {
for (auto *BB : R->blocks()) {
auto InstIt = BB->begin();
auto InstEnd = BB->end();
while (InstIt != InstEnd) {
auto NextIt = InstIt;
++NextIt;
if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) {
switch (IT->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
BB->getInstList().erase(InstIt);
break;
default:
break;
}
}
InstIt = NextIt;
}
}
}
static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT,
ScalarEvolution &SE, RegionInfo &RI) {
// Check whether IslAstInfo uses the same isl_ctx. Since -polly-codegen
// reports itself to preserve DependenceInfo and IslAstInfo, we might get
// those analysis that were computed by a different ScopInfo for a different
// Scop structure. When the ScopInfo/Scop object is freed, there is a high
// probability that the new ScopInfo/Scop object will be created at the same
// heap position with the same address. Comparing whether the Scop or ScopInfo
// address is the expected therefore is unreliable.
// Instead, we compare the address of the isl_ctx object. Both, DependenceInfo
// and IslAstInfo must hold a reference to the isl_ctx object to ensure it is
// not freed before the destruction of those analyses which might happen after
// the destruction of the Scop/ScopInfo they refer to. Hence, the isl_ctx
// will not be freed and its space not reused as long there is a
// DependenceInfo or IslAstInfo around.
IslAst &Ast = AI.getIslAst();
if (Ast.getSharedIslCtx() != S.getSharedIslCtx()) {
DEBUG(dbgs() << "Got an IstAst for a different Scop/isl_ctx\n");
return false;
}
// Check if we created an isl_ast root node, otherwise exit.
isl_ast_node *AstRoot = Ast.getAst();
if (!AstRoot)
return false;
// Collect statistics. Do it before we modify the IR to avoid having it any
// influence on the result.
auto ScopStats = S.getStatistics();
ScopsProcessed++;
auto &DL = S.getFunction().getParent()->getDataLayout();
Region *R = &S.getRegion();
assert(!R->isTopLevelRegion() && "Top level regions are not supported");
ScopAnnotator Annotator;
simplifyRegion(R, &DT, &LI, &RI);
assert(R->isSimple());
BasicBlock *EnteringBB = S.getEnteringBlock();
assert(EnteringBB);
PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator);
// Only build the run-time condition and parameters _after_ having
// introduced the conditional branch. This is important as the conditional
// branch will guard the original scop from new induction variables that
// the SCEVExpander may introduce while code generating the parameters and
// which may introduce scalar dependences that prevent us from correctly
// code generating this scop.
BBPair StartExitBlocks =
std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI));
BasicBlock *StartBlock = std::get<0>(StartExitBlocks);
BasicBlock *ExitBlock = std::get<1>(StartExitBlocks);
removeLifetimeMarkers(R);
auto *SplitBlock = StartBlock->getSinglePredecessor();
IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock);
// All arrays must have their base pointers known before
// ScopAnnotator::buildAliasScopes.
NodeBuilder.allocateNewArrays(StartExitBlocks);
Annotator.buildAliasScopes(S);
if (PerfMonitoring) {
PerfMonitor P(S, EnteringBB->getParent()->getParent());
P.initialize();
P.insertRegionStart(SplitBlock->getTerminator());
BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor();
P.insertRegionEnd(MergeBlock->getTerminator());
}
// First generate code for the hoisted invariant loads and transitively the
// parameters they reference. Afterwards, for the remaining parameters that
// might reference the hoisted loads. Finally, build the runtime check
// that might reference both hoisted loads as well as parameters.
// If the hoisting fails we have to bail and execute the original code.
Builder.SetInsertPoint(SplitBlock->getTerminator());
if (!NodeBuilder.preloadInvariantLoads()) {
// Patch the introduced branch condition to ensure that we always execute
// the original SCoP.
auto *FalseI1 = Builder.getFalse();
auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator();
SplitBBTerm->setOperand(0, FalseI1);
// Since the other branch is hence ignored we mark it as unreachable and
// adjust the dominator tree accordingly.
auto *ExitingBlock = StartBlock->getUniqueSuccessor();
assert(ExitingBlock);
auto *MergeBlock = ExitingBlock->getUniqueSuccessor();
assert(MergeBlock);
markBlockUnreachable(*StartBlock, Builder);
markBlockUnreachable(*ExitingBlock, Builder);
auto *ExitingBB = S.getExitingBlock();
assert(ExitingBB);
DT.changeImmediateDominator(MergeBlock, ExitingBB);
DT.eraseNode(ExitingBlock);
isl_ast_node_free(AstRoot);
} else {
NodeBuilder.addParameters(S.getContext().release());
Value *RTC = NodeBuilder.createRTC(AI.getRunCondition());
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
// Explicitly set the insert point to the end of the block to avoid that a
// split at the builder's current
// insert position would move the malloc calls to the wrong BasicBlock.
// Ideally we would just split the block during allocation of the new
// arrays, but this would break the assumption that there are no blocks
// between polly.start and polly.exiting (at this point).
Builder.SetInsertPoint(StartBlock->getTerminator());
NodeBuilder.create(AstRoot);
NodeBuilder.finalize();
fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI);
CodegenedScops++;
CodegenedAffineLoops += ScopStats.NumAffineLoops;
CodegenedBoxedLoops += ScopStats.NumBoxedLoops;
}
Function *F = EnteringBB->getParent();
verifyGeneratedFunction(S, *F, AI);
for (auto *SubF : NodeBuilder.getParallelSubfunctions())
verifyGeneratedFunction(S, *SubF, AI);
// Mark the function such that we run additional cleanup passes on this
// function (e.g. mem2reg to rediscover phi nodes).
F->addFnAttr("polly-optimized");
return true;
}
namespace {
class CodeGeneration : public ScopPass {
public:
static char ID;
/// The data layout used.
const DataLayout *DL;
/// @name The analysis passes we need to generate code.
///
///{
LoopInfo *LI;
IslAstInfo *AI;
DominatorTree *DT;
ScalarEvolution *SE;
RegionInfo *RI;
///}
CodeGeneration() : ScopPass(ID) {}
/// Generate LLVM-IR for the SCoP @p S.
bool runOnScop(Scop &S) override {
// Skip SCoPs in case they're already code-generated by PPCGCodeGeneration.
if (S.isToBeSkipped())
return false;
AI = &getAnalysis<IslAstInfoWrapperPass>().getAI();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DL = &S.getFunction().getParent()->getDataLayout();
RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
return CodeGen(S, *AI, *LI, *DT, *SE, *RI);
}
/// Register all analyses and transformation required.
void getAnalysisUsage(AnalysisUsage &AU) const override {
ScopPass::getAnalysisUsage(AU);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<IslAstInfoWrapperPass>();
AU.addRequired<RegionInfoPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<ScopDetectionWrapperPass>();
AU.addRequired<ScopInfoRegionPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DependenceInfo>();
AU.addPreserved<IslAstInfoWrapperPass>();
// FIXME: We do not yet add regions for the newly generated code to the
// region tree.
}
};
} // namespace
PreservedAnalyses CodeGenerationPass::run(Scop &S, ScopAnalysisManager &SAM,
ScopStandardAnalysisResults &AR,
SPMUpdater &U) {
auto &AI = SAM.getResult<IslAstAnalysis>(S, AR);
if (CodeGen(S, AI, AR.LI, AR.DT, AR.SE, AR.RI)) {
U.invalidateScop(S);
return PreservedAnalyses::none();
}
return PreservedAnalyses::all();
}
char CodeGeneration::ID = 1;
Pass *polly::createCodeGenerationPass() { return new CodeGeneration(); }
INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen",
"Polly - Create LLVM-IR from SCoPs", false, false);
INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
INITIALIZE_PASS_END(CodeGeneration, "polly-codegen",
"Polly - Create LLVM-IR from SCoPs", false, false)

View File

@@ -0,0 +1,139 @@
//===- CodegenCleanup.cpp -------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/CodegenCleanup.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/PassInfo.h"
#include "llvm/PassRegistry.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#define DEBUG_TYPE "polly-cleanup"
using namespace llvm;
using namespace polly;
namespace {
class CodegenCleanup : public FunctionPass {
private:
CodegenCleanup(const CodegenCleanup &) = delete;
const CodegenCleanup &operator=(const CodegenCleanup &) = delete;
llvm::legacy::FunctionPassManager *FPM;
public:
static char ID;
explicit CodegenCleanup() : FunctionPass(ID), FPM(nullptr) {}
/// @name FunctionPass interface
//@{
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {}
virtual bool doInitialization(Module &M) override {
assert(!FPM);
FPM = new llvm::legacy::FunctionPassManager(&M);
// TODO: How to make parent passes discoverable?
// TODO: Should be sensitive to compiler options in PassManagerBuilder, to
// which we do not have access here.
FPM->add(createScopedNoAliasAAWrapperPass());
FPM->add(createTypeBasedAAWrapperPass());
FPM->add(createAAResultsWrapperPass());
// TODO: These are non-conditional passes that run between
// EP_ModuleOptimizerEarly and EP_VectorizerStart just to ensure we do not
// miss any optimization that would have run after Polly with
// -polly-position=early. This can probably be reduced to a more compact set
// of passes.
FPM->add(createCFGSimplificationPass());
FPM->add(createSROAPass());
FPM->add(createEarlyCSEPass());
FPM->add(createPromoteMemoryToRegisterPass());
FPM->add(createInstructionCombiningPass(true));
FPM->add(createCFGSimplificationPass());
FPM->add(createSROAPass());
FPM->add(createEarlyCSEPass(true));
FPM->add(createSpeculativeExecutionIfHasBranchDivergencePass());
FPM->add(createJumpThreadingPass());
FPM->add(createCorrelatedValuePropagationPass());
FPM->add(createCFGSimplificationPass());
FPM->add(createInstructionCombiningPass(true));
FPM->add(createLibCallsShrinkWrapPass());
FPM->add(createTailCallEliminationPass());
FPM->add(createCFGSimplificationPass());
FPM->add(createReassociatePass());
FPM->add(createLoopRotatePass(-1));
FPM->add(createGVNPass());
FPM->add(createLICMPass());
FPM->add(createLoopUnswitchPass());
FPM->add(createCFGSimplificationPass());
FPM->add(createInstructionCombiningPass(true));
FPM->add(createIndVarSimplifyPass());
FPM->add(createLoopIdiomPass());
FPM->add(createLoopDeletionPass());
FPM->add(createCFGSimplificationPass());
FPM->add(createSimpleLoopUnrollPass(3));
FPM->add(createMergedLoadStoreMotionPass());
FPM->add(createGVNPass());
FPM->add(createMemCpyOptPass());
FPM->add(createSCCPPass());
FPM->add(createBitTrackingDCEPass());
FPM->add(createInstructionCombiningPass(true));
FPM->add(createJumpThreadingPass());
FPM->add(createCorrelatedValuePropagationPass());
FPM->add(createDeadStoreEliminationPass());
FPM->add(createLICMPass());
FPM->add(createAggressiveDCEPass());
FPM->add(createCFGSimplificationPass());
FPM->add(createInstructionCombiningPass(true));
FPM->add(createFloat2IntPass());
return FPM->doInitialization();
}
virtual bool doFinalization(Module &M) override {
bool Result = FPM->doFinalization();
delete FPM;
FPM = nullptr;
return Result;
}
virtual bool runOnFunction(llvm::Function &F) override {
if (!F.hasFnAttribute("polly-optimized")) {
DEBUG(dbgs() << F.getName()
<< ": Skipping cleanup because Polly did not optimize it.");
return false;
}
DEBUG(dbgs() << F.getName() << ": Running codegen cleanup...");
return FPM->run(F);
}
//@}
};
char CodegenCleanup::ID;
} // namespace
FunctionPass *polly::createCodegenCleanupPass() { return new CodegenCleanup(); }
INITIALIZE_PASS_BEGIN(CodegenCleanup, "polly-cleanup",
"Polly - Cleanup after code generation", false, false)
INITIALIZE_PASS_END(CodegenCleanup, "polly-cleanup",
"Polly - Cleanup after code generation", false, false)

View File

@@ -0,0 +1,256 @@
//===------ PollyIRBuilder.cpp --------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The Polly IRBuilder file contains Polly specific extensions for the IRBuilder
// that are used e.g. to emit the llvm.loop.parallel metadata.
//
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/IRBuilder.h"
#include "polly/ScopInfo.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
using namespace polly;
static const int MaxArraysInAliasScops = 10;
/// Get a self referencing id metadata node.
///
/// The MDNode looks like this (if arg0/arg1 are not null):
///
/// '!n = metadata !{metadata !n, arg0, arg1}'
///
/// @return The self referencing id metadata node.
static MDNode *getID(LLVMContext &Ctx, Metadata *arg0 = nullptr,
Metadata *arg1 = nullptr) {
MDNode *ID;
SmallVector<Metadata *, 3> Args;
// Use a temporary node to safely create a unique pointer for the first arg.
auto TempNode = MDNode::getTemporary(Ctx, None);
// Reserve operand 0 for loop id self reference.
Args.push_back(TempNode.get());
if (arg0)
Args.push_back(arg0);
if (arg1)
Args.push_back(arg1);
ID = MDNode::get(Ctx, Args);
ID->replaceOperandWith(0, ID);
return ID;
}
ScopAnnotator::ScopAnnotator() : SE(nullptr), AliasScopeDomain(nullptr) {}
void ScopAnnotator::buildAliasScopes(Scop &S) {
SE = S.getSE();
LLVMContext &Ctx = SE->getContext();
AliasScopeDomain = getID(Ctx, MDString::get(Ctx, "polly.alias.scope.domain"));
AliasScopeMap.clear();
OtherAliasScopeListMap.clear();
// We are only interested in arrays, but no scalar references. Scalars should
// be handled easily by basicaa.
SmallVector<ScopArrayInfo *, 10> Arrays;
for (ScopArrayInfo *Array : S.arrays())
if (Array->isArrayKind())
Arrays.push_back(Array);
// The construction of alias scopes is quadratic in the number of arrays
// involved. In case of too many arrays, skip the construction of alias
// information to avoid quadratic increases in compile time and code size.
if (Arrays.size() > MaxArraysInAliasScops)
return;
std::string AliasScopeStr = "polly.alias.scope.";
for (const ScopArrayInfo *Array : Arrays) {
assert(Array->getBasePtr() && "Base pointer must be present");
AliasScopeMap[Array->getBasePtr()] =
getID(Ctx, AliasScopeDomain,
MDString::get(Ctx, (AliasScopeStr + Array->getName()).c_str()));
}
for (const ScopArrayInfo *Array : Arrays) {
MDNode *AliasScopeList = MDNode::get(Ctx, {});
for (const auto &AliasScopePair : AliasScopeMap) {
if (Array->getBasePtr() == AliasScopePair.first)
continue;
Metadata *Args = {AliasScopePair.second};
AliasScopeList =
MDNode::concatenate(AliasScopeList, MDNode::get(Ctx, Args));
}
OtherAliasScopeListMap[Array->getBasePtr()] = AliasScopeList;
}
}
void ScopAnnotator::pushLoop(Loop *L, bool IsParallel) {
ActiveLoops.push_back(L);
if (!IsParallel)
return;
BasicBlock *Header = L->getHeader();
MDNode *Id = getID(Header->getContext());
assert(Id->getOperand(0) == Id && "Expected Id to be a self-reference");
assert(Id->getNumOperands() == 1 && "Unexpected extra operands in Id");
MDNode *Ids = ParallelLoops.empty()
? Id
: MDNode::concatenate(ParallelLoops.back(), Id);
ParallelLoops.push_back(Ids);
}
void ScopAnnotator::popLoop(bool IsParallel) {
ActiveLoops.pop_back();
if (!IsParallel)
return;
assert(!ParallelLoops.empty() && "Expected a parallel loop to pop");
ParallelLoops.pop_back();
}
void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
bool IsLoopVectorizerDisabled) const {
MDNode *MData = nullptr;
if (IsLoopVectorizerDisabled) {
SmallVector<Metadata *, 3> Args;
LLVMContext &Ctx = SE->getContext();
Args.push_back(MDString::get(Ctx, "llvm.loop.vectorize.enable"));
auto *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
Args.push_back(ValueAsMetadata::get(FalseValue));
MData = MDNode::concatenate(MData, getID(Ctx, MDNode::get(Ctx, Args)));
}
if (IsParallel) {
assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate");
MDNode *Ids = ParallelLoops.back();
MDNode *Id = cast<MDNode>(Ids->getOperand(Ids->getNumOperands() - 1));
MData = MDNode::concatenate(MData, Id);
}
B->setMetadata("llvm.loop", MData);
}
/// Get the pointer operand
///
/// @param Inst The instruction to be analyzed.
/// @return the pointer operand in case @p Inst is a memory access
/// instruction and nullptr otherwise.
static llvm::Value *getMemAccInstPointerOperand(Instruction *Inst) {
auto MemInst = MemAccInst::dyn_cast(Inst);
if (!MemInst)
return nullptr;
return MemInst.getPointerOperand();
}
void ScopAnnotator::annotateSecondLevel(llvm::Instruction *Inst,
llvm::Value *BasePtr) {
auto *PtrSCEV = SE->getSCEV(getMemAccInstPointerOperand(Inst));
auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
if (!PtrSCEV)
return;
auto SecondLevelAliasScope = SecondLevelAliasScopeMap.lookup(PtrSCEV);
auto SecondLevelOtherAliasScopeList =
SecondLevelOtherAliasScopeListMap.lookup(PtrSCEV);
if (!SecondLevelAliasScope) {
auto AliasScope = AliasScopeMap.lookup(BasePtr);
if (!AliasScope)
return;
LLVMContext &Ctx = SE->getContext();
SecondLevelAliasScope = getID(
Ctx, AliasScope, MDString::get(Ctx, "second level alias metadata"));
SecondLevelAliasScopeMap[PtrSCEV] = SecondLevelAliasScope;
Metadata *Args = {SecondLevelAliasScope};
auto SecondLevelBasePtrAliasScopeList =
SecondLevelAliasScopeMap.lookup(BasePtrSCEV);
SecondLevelAliasScopeMap[BasePtrSCEV] = MDNode::concatenate(
SecondLevelBasePtrAliasScopeList, MDNode::get(Ctx, Args));
auto OtherAliasScopeList = OtherAliasScopeListMap.lookup(BasePtr);
SecondLevelOtherAliasScopeList = MDNode::concatenate(
OtherAliasScopeList, SecondLevelBasePtrAliasScopeList);
SecondLevelOtherAliasScopeListMap[PtrSCEV] = SecondLevelOtherAliasScopeList;
}
Inst->setMetadata("alias.scope", SecondLevelAliasScope);
Inst->setMetadata("noalias", SecondLevelOtherAliasScopeList);
}
void ScopAnnotator::annotate(Instruction *Inst) {
if (!Inst->mayReadOrWriteMemory())
return;
if (!ParallelLoops.empty())
Inst->setMetadata("llvm.mem.parallel_loop_access", ParallelLoops.back());
// TODO: Use the ScopArrayInfo once available here.
if (!AliasScopeDomain)
return;
// Do not apply annotations on memory operations that take more than one
// pointer. It would be ambiguous to which pointer the annotation applies.
// FIXME: How can we specify annotations for all pointer arguments?
if (isa<CallInst>(Inst) && !isa<MemSetInst>(Inst))
return;
auto *Ptr = getMemAccInstPointerOperand(Inst);
if (!Ptr)
return;
auto *PtrSCEV = SE->getSCEV(Ptr);
auto *BaseSCEV = SE->getPointerBase(PtrSCEV);
auto *SU = dyn_cast<SCEVUnknown>(BaseSCEV);
if (!SU)
return;
auto *BasePtr = SU->getValue();
if (!BasePtr)
return;
auto AliasScope = AliasScopeMap.lookup(BasePtr);
if (!AliasScope) {
BasePtr = AlternativeAliasBases.lookup(BasePtr);
if (!BasePtr)
return;
AliasScope = AliasScopeMap.lookup(BasePtr);
if (!AliasScope)
return;
}
assert(OtherAliasScopeListMap.count(BasePtr) &&
"BasePtr either expected in AliasScopeMap and OtherAlias...Map");
auto *OtherAliasScopeList = OtherAliasScopeListMap[BasePtr];
if (InterIterationAliasFreeBasePtrs.count(BasePtr)) {
annotateSecondLevel(Inst, BasePtr);
return;
}
Inst->setMetadata("alias.scope", AliasScope);
Inst->setMetadata("noalias", OtherAliasScopeList);
}
void ScopAnnotator::addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr) {
if (!BasePtr)
return;
InterIterationAliasFreeBasePtrs.insert(BasePtr);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,381 @@
//===------ LoopGenerators.cpp - IR helper to create loops ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains functions to create scalar and parallel loops as LLVM-IR.
//
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/LoopGenerators.h"
#include "polly/ScopDetection.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
using namespace polly;
static cl::opt<int>
PollyNumThreads("polly-num-threads",
cl::desc("Number of threads to use (0 = auto)"), cl::Hidden,
cl::init(0));
// We generate a loop of either of the following structures:
//
// BeforeBB BeforeBB
// | |
// v v
// GuardBB PreHeaderBB
// / | | _____
// __ PreHeaderBB | v \/ |
// / \ / | HeaderBB latch
// latch HeaderBB | |\ |
// \ / \ / | \------/
// < \ / |
// \ / v
// ExitBB ExitBB
//
// depending on whether or not we know that it is executed at least once. If
// not, GuardBB checks if the loop is executed at least once. If this is the
// case we branch to PreHeaderBB and subsequently to the HeaderBB, which
// contains the loop iv 'polly.indvar', the incremented loop iv
// 'polly.indvar_next' as well as the condition to check if we execute another
// iteration of the loop. After the loop has finished, we branch to ExitBB.
// We expect the type of UB, LB, UB+Stride to be large enough for values that
// UB may take throughout the execution of the loop, including the computation
// of indvar + Stride before the final abort.
Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
PollyIRBuilder &Builder, LoopInfo &LI,
DominatorTree &DT, BasicBlock *&ExitBB,
ICmpInst::Predicate Predicate,
ScopAnnotator *Annotator, bool Parallel, bool UseGuard,
bool LoopVectDisabled) {
Function *F = Builder.GetInsertBlock()->getParent();
LLVMContext &Context = F->getContext();
assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
assert(LoopIVType && "UB is not integer?");
BasicBlock *BeforeBB = Builder.GetInsertBlock();
BasicBlock *GuardBB =
UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
BasicBlock *PreHeaderBB =
BasicBlock::Create(Context, "polly.loop_preheader", F);
// Update LoopInfo
Loop *OuterLoop = LI.getLoopFor(BeforeBB);
Loop *NewLoop = LI.AllocateLoop();
if (OuterLoop)
OuterLoop->addChildLoop(NewLoop);
else
LI.addTopLevelLoop(NewLoop);
if (OuterLoop) {
if (GuardBB)
OuterLoop->addBasicBlockToLoop(GuardBB, LI);
OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
}
NewLoop->addBasicBlockToLoop(HeaderBB, LI);
// Notify the annotator (if present) that we have a new loop, but only
// after the header block is set.
if (Annotator)
Annotator->pushLoop(NewLoop, Parallel);
// ExitBB
ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
ExitBB->setName("polly.loop_exit");
// BeforeBB
if (GuardBB) {
BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
DT.addNewBlock(GuardBB, BeforeBB);
// GuardBB
Builder.SetInsertPoint(GuardBB);
Value *LoopGuard;
LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
LoopGuard->setName("polly.loop_guard");
Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
DT.addNewBlock(PreHeaderBB, GuardBB);
} else {
BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
DT.addNewBlock(PreHeaderBB, BeforeBB);
}
// PreHeaderBB
Builder.SetInsertPoint(PreHeaderBB);
Builder.CreateBr(HeaderBB);
// HeaderBB
DT.addNewBlock(HeaderBB, PreHeaderBB);
Builder.SetInsertPoint(HeaderBB);
PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
IV->addIncoming(LB, PreHeaderBB);
Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
Value *LoopCondition =
Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
// Create the loop latch and annotate it as such.
BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
if (Annotator)
Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
IV->addIncoming(IncrementedIV, HeaderBB);
if (GuardBB)
DT.changeImmediateDominator(ExitBB, GuardBB);
else
DT.changeImmediateDominator(ExitBB, HeaderBB);
// The loop body should be added here.
Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
return IV;
}
Value *ParallelLoopGenerator::createParallelLoop(
Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
ValueMapT &Map, BasicBlock::iterator *LoopBody) {
Function *SubFn;
AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn);
*LoopBody = Builder.GetInsertPoint();
Builder.SetInsertPoint(&*BeforeLoop);
Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
"polly.par.userContext");
// Add one as the upper bound provided by OpenMP is a < comparison
// whereas the codegenForSequential function creates a <= comparison.
UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
// Tell the runtime we start a parallel loop
createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
Builder.CreateCall(SubFn, SubFnParam);
createCallJoinThreads();
return IV;
}
void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn,
Value *SubFnParam, Value *LB,
Value *UB, Value *Stride) {
const std::string Name = "GOMP_parallel_loop_runtime_start";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {PointerType::getUnqual(FunctionType::get(
Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
Builder.getInt8PtrTy(),
Builder.getInt32Ty(),
LongType,
LongType,
LongType};
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
F = Function::Create(Ty, Linkage, Name, M);
}
Value *NumberOfThreads = Builder.getInt32(PollyNumThreads);
Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
Builder.CreateCall(F, Args);
}
Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr,
Value *UBPtr) {
const std::string Name = "GOMP_loop_runtime_next";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
F = Function::Create(Ty, Linkage, Name, M);
}
Value *Args[] = {LBPtr, UBPtr};
Value *Return = Builder.CreateCall(F, Args);
Return = Builder.CreateICmpNE(
Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
return Return;
}
void ParallelLoopGenerator::createCallJoinThreads() {
const std::string Name = "GOMP_parallel_end";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
F = Function::Create(Ty, Linkage, Name, M);
}
Builder.CreateCall(F, {});
}
void ParallelLoopGenerator::createCallCleanupThread() {
const std::string Name = "GOMP_loop_end_nowait";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
F = Function::Create(Ty, Linkage, Name, M);
}
Builder.CreateCall(F, {});
}
Function *ParallelLoopGenerator::createSubFnDefinition() {
Function *F = Builder.GetInsertBlock()->getParent();
std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
Function *SubFn = Function::Create(FT, Function::InternalLinkage,
F->getName() + "_polly_subfn", M);
// Certain backends (e.g., NVPTX) do not support '.'s in function names.
// Hence, we ensure that all '.'s are replaced by '_'s.
std::string FunctionName = SubFn->getName();
std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
SubFn->setName(FunctionName);
// Do not run any polly pass on the new function.
SubFn->addFnAttr(PollySkipFnAttr);
Function::arg_iterator AI = SubFn->arg_begin();
AI->setName("polly.par.userContext");
return SubFn;
}
AllocaInst *
ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
SmallVector<Type *, 8> Members;
for (Value *V : Values)
Members.push_back(V->getType());
const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
// We do not want to allocate the alloca inside any loop, thus we allocate it
// in the entry block of the function and use annotations to denote the actual
// live span (similar to clang).
BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
Instruction *IP = &*EntryBB.getFirstInsertionPt();
StructType *Ty = StructType::get(Builder.getContext(), Members);
AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
"polly.par.userContext", IP);
for (unsigned i = 0; i < Values.size(); i++) {
Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
Builder.CreateStore(Values[i], Address);
}
return Struct;
}
void ParallelLoopGenerator::extractValuesFromStruct(
SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
for (unsigned i = 0; i < OldValues.size(); i++) {
Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
Value *NewValue = Builder.CreateLoad(Address);
NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
Map[OldValues[i]] = NewValue;
}
}
Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData,
SetVector<Value *> Data,
ValueMapT &Map, Function **SubFnPtr) {
BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
Function *SubFn = createSubFnDefinition();
LLVMContext &Context = SubFn->getContext();
// Store the previous basic block.
PrevBB = Builder.GetInsertBlock();
// Create basic blocks.
HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
DT.addNewBlock(HeaderBB, PrevBB);
DT.addNewBlock(ExitBB, HeaderBB);
DT.addNewBlock(CheckNextBB, HeaderBB);
DT.addNewBlock(PreHeaderBB, HeaderBB);
// Fill up basic block HeaderBB.
Builder.SetInsertPoint(HeaderBB);
LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
UserContext = Builder.CreateBitCast(
&*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext");
extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
Map);
Builder.CreateBr(CheckNextBB);
// Add code to check if another set of iterations will be executed.
Builder.SetInsertPoint(CheckNextBB);
Ret1 = createCallGetWorkItem(LBPtr, UBPtr);
HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
"polly.par.hasNextScheduleBlock");
Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
// Add code to load the iv bounds for this set of iterations.
Builder.SetInsertPoint(PreHeaderBB);
LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
// Subtract one as the upper bound provided by OpenMP is a < comparison
// whereas the codegenForSequential function creates a <= comparison.
UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
"polly.par.UBAdjusted");
Builder.CreateBr(CheckNextBB);
Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE,
nullptr, true, /* UseGuard */ false);
BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
// Add code to terminate this subfunction.
Builder.SetInsertPoint(ExitBB);
createCallCleanupThread();
Builder.CreateRetVoid();
Builder.SetInsertPoint(&*LoopBody);
*SubFnPtr = SubFn;
return IV;
}

View File

@@ -0,0 +1,442 @@
//===---- ManagedMemoryRewrite.cpp - Rewrite global & malloc'd memory -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Take a module and rewrite:
// 1. `malloc` -> `polly_mallocManaged`
// 2. `free` -> `polly_freeManaged`
// 3. global arrays with initializers -> global arrays that are initialized
// with a constructor call to
// `polly_mallocManaged`.
//
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/CodeGeneration.h"
#include "polly/CodeGen/IslAst.h"
#include "polly/CodeGen/IslNodeBuilder.h"
#include "polly/CodeGen/PPCGCodeGeneration.h"
#include "polly/CodeGen/Utils.h"
#include "polly/DependenceInfo.h"
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
#include "polly/ScopDetection.h"
#include "polly/ScopInfo.h"
#include "polly/Support/SCEVValidator.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
static cl::opt<bool> RewriteAllocas(
"polly-acc-rewrite-allocas",
cl::desc(
"Ask the managed memory rewriter to also rewrite alloca instructions"),
cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
static cl::opt<bool> IgnoreLinkageForGlobals(
"polly-acc-rewrite-ignore-linkage-for-globals",
cl::desc(
"By default, we only rewrite globals with internal linkage. This flag "
"enables rewriting of globals regardless of linkage"),
cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
#define DEBUG_TYPE "polly-acc-rewrite-managed-memory"
namespace {
static llvm::Function *getOrCreatePollyMallocManaged(Module &M) {
const char *Name = "polly_mallocManaged";
Function *F = M.getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
PollyIRBuilder Builder(M.getContext());
// TODO: How do I get `size_t`? I assume from DataLayout?
FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(),
{Builder.getInt64Ty()}, false);
F = Function::Create(Ty, Linkage, Name, &M);
}
return F;
}
static llvm::Function *getOrCreatePollyFreeManaged(Module &M) {
const char *Name = "polly_freeManaged";
Function *F = M.getFunction(Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
PollyIRBuilder Builder(M.getContext());
// TODO: How do I get `size_t`? I assume from DataLayout?
FunctionType *Ty =
FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, false);
F = Function::Create(Ty, Linkage, Name, &M);
}
return F;
}
// Expand a constant expression `Cur`, which is used at instruction `Parent`
// at index `index`.
// Since a constant expression can expand to multiple instructions, store all
// the expands into a set called `Expands`.
// Note that this goes inorder on the constant expression tree.
// A * ((B * D) + C)
// will be processed with first A, then B * D, then B, then D, and then C.
// Though ConstantExprs are not treated as "trees" but as DAGs, since you can
// have something like this:
// *
// / \
// \ /
// (D)
//
// For the purposes of this expansion, we expand the two occurences of D
// separately. Therefore, we expand the DAG into the tree:
// *
// / \
// D D
// TODO: We don't _have_to do this, but this is the simplest solution.
// We can write a solution that keeps track of which constants have been
// already expanded.
static void expandConstantExpr(ConstantExpr *Cur, PollyIRBuilder &Builder,
Instruction *Parent, int index,
SmallPtrSet<Instruction *, 4> &Expands) {
assert(Cur && "invalid constant expression passed");
Instruction *I = Cur->getAsInstruction();
assert(I && "unable to convert ConstantExpr to Instruction");
DEBUG(dbgs() << "Expanding ConstantExpression: (" << *Cur
<< ") in Instruction: (" << *I << ")\n";);
// Invalidate `Cur` so that no one after this point uses `Cur`. Rather,
// they should mutate `I`.
Cur = nullptr;
Expands.insert(I);
Parent->setOperand(index, I);
// The things that `Parent` uses (its operands) should be created
// before `Parent`.
Builder.SetInsertPoint(Parent);
Builder.Insert(I);
for (unsigned i = 0; i < I->getNumOperands(); i++) {
Value *Op = I->getOperand(i);
assert(isa<Constant>(Op) && "constant must have a constant operand");
if (ConstantExpr *CExprOp = dyn_cast<ConstantExpr>(Op))
expandConstantExpr(CExprOp, Builder, I, i, Expands);
}
}
// Edit all uses of `OldVal` to NewVal` in `Inst`. This will rewrite
// `ConstantExpr`s that are used in the `Inst`.
// Note that `replaceAllUsesWith` is insufficient for this purpose because it
// does not rewrite values in `ConstantExpr`s.
static void rewriteOldValToNew(Instruction *Inst, Value *OldVal, Value *NewVal,
PollyIRBuilder &Builder) {
// This contains a set of instructions in which OldVal must be replaced.
// We start with `Inst`, and we fill it up with the expanded `ConstantExpr`s
// from `Inst`s arguments.
// We need to go through this process because `replaceAllUsesWith` does not
// actually edit `ConstantExpr`s.
SmallPtrSet<Instruction *, 4> InstsToVisit = {Inst};
// Expand all `ConstantExpr`s and place it in `InstsToVisit`.
for (unsigned i = 0; i < Inst->getNumOperands(); i++) {
Value *Operand = Inst->getOperand(i);
if (ConstantExpr *ValueConstExpr = dyn_cast<ConstantExpr>(Operand))
expandConstantExpr(ValueConstExpr, Builder, Inst, i, InstsToVisit);
}
// Now visit each instruction and use `replaceUsesOfWith`. We know that
// will work because `I` cannot have any `ConstantExpr` within it.
for (Instruction *I : InstsToVisit)
I->replaceUsesOfWith(OldVal, NewVal);
}
// Given a value `Current`, return all Instructions that may contain `Current`
// in an expression.
// We need this auxiliary function, because if we have a
// `Constant` that is a user of `V`, we need to recurse into the
// `Constant`s uses to gather the root instruciton.
static void getInstructionUsersOfValue(Value *V,
SmallVector<Instruction *, 4> &Owners) {
if (auto *I = dyn_cast<Instruction>(V)) {
Owners.push_back(I);
} else {
// Anything that is a `User` must be a constant or an instruction.
auto *C = cast<Constant>(V);
for (Use &CUse : C->uses())
getInstructionUsersOfValue(CUse.getUser(), Owners);
}
}
static void
replaceGlobalArray(Module &M, const DataLayout &DL, GlobalVariable &Array,
SmallPtrSet<GlobalVariable *, 4> &ReplacedGlobals) {
// We only want arrays.
ArrayType *ArrayTy = dyn_cast<ArrayType>(Array.getType()->getElementType());
if (!ArrayTy)
return;
Type *ElemTy = ArrayTy->getElementType();
PointerType *ElemPtrTy = ElemTy->getPointerTo();
// We only wish to replace arrays that are visible in the module they
// inhabit. Otherwise, our type edit from [T] to T* would be illegal across
// modules.
const bool OnlyVisibleInsideModule = Array.hasPrivateLinkage() ||
Array.hasInternalLinkage() ||
IgnoreLinkageForGlobals;
if (!OnlyVisibleInsideModule) {
DEBUG(dbgs() << "Not rewriting (" << Array
<< ") to managed memory "
"because it could be visible externally. To force rewrite, "
"use -polly-acc-rewrite-ignore-linkage-for-globals.\n");
return;
}
if (!Array.hasInitializer() ||
!isa<ConstantAggregateZero>(Array.getInitializer())) {
DEBUG(dbgs() << "Not rewriting (" << Array
<< ") to managed memory "
"because it has an initializer which is "
"not a zeroinitializer.\n");
return;
}
// At this point, we have committed to replacing this array.
ReplacedGlobals.insert(&Array);
std::string NewName = Array.getName();
NewName += ".toptr";
GlobalVariable *ReplacementToArr =
cast<GlobalVariable>(M.getOrInsertGlobal(NewName, ElemPtrTy));
ReplacementToArr->setInitializer(ConstantPointerNull::get(ElemPtrTy));
Function *PollyMallocManaged = getOrCreatePollyMallocManaged(M);
std::string FnName = Array.getName();
FnName += ".constructor";
PollyIRBuilder Builder(M.getContext());
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
const GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Function *F = Function::Create(Ty, Linkage, FnName, &M);
BasicBlock *Start = BasicBlock::Create(M.getContext(), "entry", F);
Builder.SetInsertPoint(Start);
const uint64_t ArraySizeInt = DL.getTypeAllocSize(ArrayTy);
Value *ArraySize = Builder.getInt64(ArraySizeInt);
ArraySize->setName("array.size");
Value *AllocatedMemRaw =
Builder.CreateCall(PollyMallocManaged, {ArraySize}, "mem.raw");
Value *AllocatedMemTyped =
Builder.CreatePointerCast(AllocatedMemRaw, ElemPtrTy, "mem.typed");
Builder.CreateStore(AllocatedMemTyped, ReplacementToArr);
Builder.CreateRetVoid();
const int Priority = 0;
appendToGlobalCtors(M, F, Priority, ReplacementToArr);
SmallVector<Instruction *, 4> ArrayUserInstructions;
// Get all instructions that use array. We need to do this weird thing
// because `Constant`s that contain this array neeed to be expanded into
// instructions so that we can replace their parameters. `Constant`s cannot
// be edited easily, so we choose to convert all `Constant`s to
// `Instruction`s and handle all of the uses of `Array` uniformly.
for (Use &ArrayUse : Array.uses())
getInstructionUsersOfValue(ArrayUse.getUser(), ArrayUserInstructions);
for (Instruction *UserOfArrayInst : ArrayUserInstructions) {
Builder.SetInsertPoint(UserOfArrayInst);
// <ty>** -> <ty>*
Value *ArrPtrLoaded = Builder.CreateLoad(ReplacementToArr, "arrptr.load");
// <ty>* -> [ty]*
Value *ArrPtrLoadedBitcasted = Builder.CreateBitCast(
ArrPtrLoaded, ArrayTy->getPointerTo(), "arrptr.bitcast");
rewriteOldValToNew(UserOfArrayInst, &Array, ArrPtrLoadedBitcasted, Builder);
}
}
// We return all `allocas` that may need to be converted to a call to
// cudaMallocManaged.
static void getAllocasToBeManaged(Function &F,
SmallSet<AllocaInst *, 4> &Allocas) {
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
auto *Alloca = dyn_cast<AllocaInst>(&I);
if (!Alloca)
continue;
DEBUG(dbgs() << "Checking if (" << *Alloca << ") may be captured: ");
if (PointerMayBeCaptured(Alloca, /* ReturnCaptures */ false,
/* StoreCaptures */ true)) {
Allocas.insert(Alloca);
DEBUG(dbgs() << "YES (captured).\n");
} else {
DEBUG(dbgs() << "NO (not captured).\n");
}
}
}
}
static void rewriteAllocaAsManagedMemory(AllocaInst *Alloca,
const DataLayout &DL) {
DEBUG(dbgs() << "rewriting: (" << *Alloca << ") to managed mem.\n");
Module *M = Alloca->getModule();
assert(M && "Alloca does not have a module");
PollyIRBuilder Builder(M->getContext());
Builder.SetInsertPoint(Alloca);
Value *MallocManagedFn = getOrCreatePollyMallocManaged(*Alloca->getModule());
const uint64_t Size =
DL.getTypeAllocSize(Alloca->getType()->getElementType());
Value *SizeVal = Builder.getInt64(Size);
Value *RawManagedMem = Builder.CreateCall(MallocManagedFn, {SizeVal});
Value *Bitcasted = Builder.CreateBitCast(RawManagedMem, Alloca->getType());
Function *F = Alloca->getFunction();
assert(F && "Alloca has invalid function");
Bitcasted->takeName(Alloca);
Alloca->replaceAllUsesWith(Bitcasted);
Alloca->eraseFromParent();
for (BasicBlock &BB : *F) {
ReturnInst *Return = dyn_cast<ReturnInst>(BB.getTerminator());
if (!Return)
continue;
Builder.SetInsertPoint(Return);
Value *FreeManagedFn = getOrCreatePollyFreeManaged(*M);
Builder.CreateCall(FreeManagedFn, {RawManagedMem});
}
}
// Replace all uses of `Old` with `New`, even inside `ConstantExpr`.
//
// `replaceAllUsesWith` does replace values in `ConstantExpr`. This function
// actually does replace it in `ConstantExpr`. The caveat is that if there is
// a use that is *outside* a function (say, at global declarations), we fail.
// So, this is meant to be used on values which we know will only be used
// within functions.
//
// This process works by looking through the uses of `Old`. If it finds a
// `ConstantExpr`, it recursively looks for the owning instruction.
// Then, it expands all the `ConstantExpr` to instructions and replaces
// `Old` with `New` in the expanded instructions.
static void replaceAllUsesAndConstantUses(Value *Old, Value *New,
PollyIRBuilder &Builder) {
SmallVector<Instruction *, 4> UserInstructions;
// Get all instructions that use array. We need to do this weird thing
// because `Constant`s that contain this array neeed to be expanded into
// instructions so that we can replace their parameters. `Constant`s cannot
// be edited easily, so we choose to convert all `Constant`s to
// `Instruction`s and handle all of the uses of `Array` uniformly.
for (Use &ArrayUse : Old->uses())
getInstructionUsersOfValue(ArrayUse.getUser(), UserInstructions);
for (Instruction *I : UserInstructions)
rewriteOldValToNew(I, Old, New, Builder);
}
class ManagedMemoryRewritePass : public ModulePass {
public:
static char ID;
GPUArch Architecture;
GPURuntime Runtime;
ManagedMemoryRewritePass() : ModulePass(ID) {}
virtual bool runOnModule(Module &M) {
const DataLayout &DL = M.getDataLayout();
Function *Malloc = M.getFunction("malloc");
if (Malloc) {
PollyIRBuilder Builder(M.getContext());
Function *PollyMallocManaged = getOrCreatePollyMallocManaged(M);
assert(PollyMallocManaged && "unable to create polly_mallocManaged");
replaceAllUsesAndConstantUses(Malloc, PollyMallocManaged, Builder);
Malloc->eraseFromParent();
}
Function *Free = M.getFunction("free");
if (Free) {
PollyIRBuilder Builder(M.getContext());
Function *PollyFreeManaged = getOrCreatePollyFreeManaged(M);
assert(PollyFreeManaged && "unable to create polly_freeManaged");
replaceAllUsesAndConstantUses(Free, PollyFreeManaged, Builder);
Free->eraseFromParent();
}
SmallPtrSet<GlobalVariable *, 4> GlobalsToErase;
for (GlobalVariable &Global : M.globals())
replaceGlobalArray(M, DL, Global, GlobalsToErase);
for (GlobalVariable *G : GlobalsToErase)
G->eraseFromParent();
// Rewrite allocas to cudaMallocs if we are asked to do so.
if (RewriteAllocas) {
SmallSet<AllocaInst *, 4> AllocasToBeManaged;
for (Function &F : M.functions())
getAllocasToBeManaged(F, AllocasToBeManaged);
for (AllocaInst *Alloca : AllocasToBeManaged)
rewriteAllocaAsManagedMemory(Alloca, DL);
}
return true;
}
};
} // namespace
char ManagedMemoryRewritePass::ID = 42;
Pass *polly::createManagedMemoryRewritePassPass(GPUArch Arch,
GPURuntime Runtime) {
ManagedMemoryRewritePass *pass = new ManagedMemoryRewritePass();
pass->Runtime = Runtime;
pass->Architecture = Arch;
return pass;
}
INITIALIZE_PASS_BEGIN(
ManagedMemoryRewritePass, "polly-acc-rewrite-managed-memory",
"Polly - Rewrite all allocations in heap & data section to managed memory",
false, false)
INITIALIZE_PASS_DEPENDENCY(PPCGCodeGeneration);
INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
INITIALIZE_PASS_END(
ManagedMemoryRewritePass, "polly-acc-rewrite-managed-memory",
"Polly - Rewrite all allocations in heap & data section to managed memory",
false, false)

View File

@@ -0,0 +1 @@
d6652146483f6426c13e661b939c1fc78abc4008

Some files were not shown because too many files have changed in this diff Show More