Imported Upstream version 6.10.0.49

Former-commit-id: 1d6753294b2993e1fbf92de9366bb9544db4189b
2020-01-16 16:38:04 +00:00
parent d94e79959b
commit 468663ddbb
48518 changed files with 2789335 additions and 61176 deletions
--- a/external/llvm-project/polly/lib/Analysis/DependenceInfo.cpp
+++ b/external/llvm-project/polly/lib/Analysis/DependenceInfo.cpp
--- a/external/llvm-project/polly/lib/Analysis/PolyhedralInfo.cpp
+++ b/external/llvm-project/polly/lib/Analysis/PolyhedralInfo.cpp
@@ -0,0 +1,163 @@
+//===--------- PolyhedralInfo.cpp  - Create Scops from LLVM IR-------------===//
+///
+///                     The LLVM Compiler Infrastructure
+///
+/// This file is distributed under the University of Illinois Open Source
+/// License. See LICENSE.TXT for details.
+///
+//===----------------------------------------------------------------------===//
+///
+/// An interface to the Polyhedral analysis engine(Polly) of LLVM.
+///
+/// This pass provides an interface to the polyhedral analysis performed by
+/// Polly.
+///
+/// This interface provides basic interface like isParallel, isVectorizable
+/// that can be used in LLVM transformation passes.
+///
+/// Work in progress, this file is subject to change.
+//===----------------------------------------------------------------------===//
+
+#include "polly/PolyhedralInfo.h"
+#include "polly/DependenceInfo.h"
+#include "polly/LinkAllPasses.h"
+#include "polly/Options.h"
+#include "polly/ScopInfo.h"
+#include "polly/Support/GICHelper.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/Debug.h"
+#include <isl/map.h>
+#include <isl/union_map.h>
+
+using namespace llvm;
+using namespace polly;
+
+#define DEBUG_TYPE "polyhedral-info"
+
+static cl::opt<bool> CheckParallel("polly-check-parallel",
+                                   cl::desc("Check for parallel loops"),
+                                   cl::Hidden, cl::init(false), cl::ZeroOrMore,
+                                   cl::cat(PollyCategory));
+
+static cl::opt<bool> CheckVectorizable("polly-check-vectorizable",
+                                       cl::desc("Check for vectorizable loops"),
+                                       cl::Hidden, cl::init(false),
+                                       cl::ZeroOrMore, cl::cat(PollyCategory));
+
+void PolyhedralInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequiredTransitive<DependenceInfoWrapperPass>();
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequiredTransitive<ScopInfoWrapperPass>();
+  AU.setPreservesAll();
+}
+
+bool PolyhedralInfo::runOnFunction(Function &F) {
+  DI = &getAnalysis<DependenceInfoWrapperPass>();
+  SI = getAnalysis<ScopInfoWrapperPass>().getSI();
+  return false;
+}
+
+void PolyhedralInfo::print(raw_ostream &OS, const Module *) const {
+  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  for (auto *TopLevelLoop : LI) {
+    for (auto *L : depth_first(TopLevelLoop)) {
+      OS.indent(2) << L->getHeader()->getName() << ":\t";
+      if (CheckParallel && isParallel(L))
+        OS << "Loop is parallel.\n";
+      else if (CheckParallel)
+        OS << "Loop is not parallel.\n";
+    }
+  }
+}
+
+bool PolyhedralInfo::checkParallel(Loop *L, isl_pw_aff **MinDepDistPtr) const {
+  bool IsParallel;
+  const Scop *S = getScopContainingLoop(L);
+  if (!S)
+    return false;
+  const Dependences &D =
+      DI->getDependences(const_cast<Scop *>(S), Dependences::AL_Access);
+  if (!D.hasValidDependences())
+    return false;
+  DEBUG(dbgs() << "Loop :\t" << L->getHeader()->getName() << ":\n");
+
+  isl_union_map *Deps =
+      D.getDependences(Dependences::TYPE_RAW | Dependences::TYPE_WAW |
+                       Dependences::TYPE_WAR | Dependences::TYPE_RED);
+  DEBUG(dbgs() << "Dependences :\t" << stringFromIslObj(Deps) << "\n");
+
+  isl_union_map *Schedule = getScheduleForLoop(S, L);
+  DEBUG(dbgs() << "Schedule: \t" << stringFromIslObj(Schedule) << "\n");
+
+  IsParallel = D.isParallel(Schedule, Deps, MinDepDistPtr);
+  isl_union_map_free(Schedule);
+  return IsParallel;
+}
+
+bool PolyhedralInfo::isParallel(Loop *L) const { return checkParallel(L); }
+
+const Scop *PolyhedralInfo::getScopContainingLoop(Loop *L) const {
+  assert((SI) && "ScopInfoWrapperPass is required by PolyhedralInfo pass!\n");
+  for (auto &It : *SI) {
+    Region *R = It.first;
+    if (R->contains(L))
+      return It.second.get();
+  }
+  return nullptr;
+}
+
+//  Given a Loop and the containing SCoP, we compute the partial schedule
+//  by taking union of individual schedules of each ScopStmt within the loop
+//  and projecting out the inner dimensions from the range of the schedule.
+//   for (i = 0; i < n; i++)
+//      for (j = 0; j < n; j++)
+//        A[j] = 1;  //Stmt
+//
+//  The original schedule will be
+//    Stmt[i0, i1] -> [i0, i1]
+//  The schedule for the outer loop will be
+//    Stmt[i0, i1] -> [i0]
+//  The schedule for the inner loop will be
+//    Stmt[i0, i1] -> [i0, i1]
+__isl_give isl_union_map *PolyhedralInfo::getScheduleForLoop(const Scop *S,
+                                                             Loop *L) const {
+  isl_union_map *Schedule = isl_union_map_empty(S->getParamSpace().release());
+  int CurrDim = S->getRelativeLoopDepth(L);
+  DEBUG(dbgs() << "Relative loop depth:\t" << CurrDim << "\n");
+  assert(CurrDim >= 0 && "Loop in region should have at least depth one");
+
+  for (auto &SS : *S) {
+    if (L->contains(SS.getSurroundingLoop())) {
+
+      unsigned int MaxDim = SS.getNumIterators();
+      DEBUG(dbgs() << "Maximum depth of Stmt:\t" << MaxDim << "\n");
+      isl_map *ScheduleMap = SS.getSchedule().release();
+      assert(
+          ScheduleMap &&
+          "Schedules that contain extension nodes require special handling.");
+
+      ScheduleMap = isl_map_project_out(ScheduleMap, isl_dim_out, CurrDim + 1,
+                                        MaxDim - CurrDim - 1);
+      ScheduleMap = isl_map_set_tuple_id(ScheduleMap, isl_dim_in,
+                                         SS.getDomainId().release());
+      Schedule =
+          isl_union_map_union(Schedule, isl_union_map_from_map(ScheduleMap));
+    }
+  }
+  Schedule = isl_union_map_coalesce(Schedule);
+  return Schedule;
+}
+
+char PolyhedralInfo::ID = 0;
+
+Pass *polly::createPolyhedralInfoPass() { return new PolyhedralInfo(); }
+
+INITIALIZE_PASS_BEGIN(PolyhedralInfo, "polyhedral-info",
+                      "Polly - Interface to polyhedral analysis engine", false,
+                      false);
+INITIALIZE_PASS_DEPENDENCY(DependenceInfoWrapperPass);
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
+INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass);
+INITIALIZE_PASS_END(PolyhedralInfo, "polyhedral-info",
+                    "Polly - Interface to polyhedral analysis engine", false,
+                    false)
--- a/external/llvm-project/polly/lib/Analysis/PruneUnprofitable.cpp
+++ b/external/llvm-project/polly/lib/Analysis/PruneUnprofitable.cpp
@@ -0,0 +1,105 @@
+//===- PruneUnprofitable.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Mark a SCoP as unfeasible if not deemed profitable to optimize.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/PruneUnprofitable.h"
+#include "polly/ScopDetection.h"
+#include "polly/ScopInfo.h"
+#include "polly/ScopPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace polly;
+
+#define DEBUG_TYPE "polly-prune-unprofitable"
+
+namespace {
+
+STATISTIC(ScopsProcessed,
+          "Number of SCoPs considered for unprofitability pruning");
+STATISTIC(ScopsPruned, "Number of pruned SCoPs because it they cannot be "
+                       "optimized in a significant way");
+STATISTIC(ScopsSurvived, "Number of SCoPs after pruning");
+
+STATISTIC(NumPrunedLoops, "Number of pruned loops");
+STATISTIC(NumPrunedBoxedLoops, "Number of pruned boxed loops");
+STATISTIC(NumPrunedAffineLoops, "Number of pruned affine loops");
+
+STATISTIC(NumLoopsInScop, "Number of loops in scops after pruning");
+STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after pruning");
+STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after pruning");
+
+class PruneUnprofitable : public ScopPass {
+private:
+  void updateStatistics(Scop &S, bool Pruned) {
+    auto ScopStats = S.getStatistics();
+    if (Pruned) {
+      ScopsPruned++;
+      NumPrunedLoops += ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops;
+      NumPrunedBoxedLoops += ScopStats.NumBoxedLoops;
+      NumPrunedAffineLoops += ScopStats.NumAffineLoops;
+    } else {
+      ScopsSurvived++;
+      NumLoopsInScop += ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops;
+      NumBoxedLoops += ScopStats.NumBoxedLoops;
+      NumAffineLoops += ScopStats.NumAffineLoops;
+    }
+  }
+
+public:
+  static char ID;
+
+  explicit PruneUnprofitable() : ScopPass(ID) {}
+  PruneUnprofitable(const PruneUnprofitable &) = delete;
+  PruneUnprofitable &operator=(const PruneUnprofitable &) = delete;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ScopInfoRegionPass>();
+    AU.setPreservesAll();
+  }
+
+  bool runOnScop(Scop &S) override {
+    if (PollyProcessUnprofitable) {
+      DEBUG(dbgs() << "NOTE: -polly-process-unprofitable active, won't prune "
+                      "anything\n");
+      return false;
+    }
+
+    ScopsProcessed++;
+
+    if (!S.isProfitable(true)) {
+      DEBUG(dbgs() << "SCoP pruned because it probably cannot be optimized in "
+                      "a significant way\n");
+      S.invalidate(PROFITABLE, DebugLoc());
+      updateStatistics(S, true);
+    } else {
+      updateStatistics(S, false);
+    }
+
+    return false;
+  }
+};
+
+} // namespace
+
+char PruneUnprofitable::ID;
+
+Pass *polly::createPruneUnprofitablePass() { return new PruneUnprofitable(); }
+
+INITIALIZE_PASS_BEGIN(PruneUnprofitable, "polly-prune-unprofitable",
+                      "Polly - Prune unprofitable SCoPs", false, false)
+INITIALIZE_PASS_END(PruneUnprofitable, "polly-prune-unprofitable",
+                    "Polly - Prune unprofitable SCoPs", false, false)
--- a/external/llvm-project/polly/lib/Analysis/ScopBuilder.cpp
+++ b/external/llvm-project/polly/lib/Analysis/ScopBuilder.cpp
--- a/external/llvm-project/polly/lib/Analysis/ScopDetection.cpp
+++ b/external/llvm-project/polly/lib/Analysis/ScopDetection.cpp
--- a/external/llvm-project/polly/lib/Analysis/ScopDetectionDiagnostic.cpp
+++ b/external/llvm-project/polly/lib/Analysis/ScopDetectionDiagnostic.cpp
--- a/external/llvm-project/polly/lib/Analysis/ScopGraphPrinter.cpp
+++ b/external/llvm-project/polly/lib/Analysis/ScopGraphPrinter.cpp
@@ -0,0 +1,266 @@
+//===- GraphPrinter.cpp - Create a DOT output describing the Scop. --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Create a DOT output describing the Scop.
+//
+// For each function a dot file is created that shows the control flow graph of
+// the function and highlights the detected Scops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/LinkAllPasses.h"
+#include "polly/ScopDetection.h"
+#include "polly/Support/ScopLocation.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace polly;
+using namespace llvm;
+static cl::opt<std::string>
+    ViewFilter("polly-view-only",
+               cl::desc("Only view functions that match this pattern"),
+               cl::Hidden, cl::init(""), cl::ZeroOrMore);
+
+static cl::opt<bool> ViewAll("polly-view-all",
+                             cl::desc("Also show functions without any scops"),
+                             cl::Hidden, cl::init(false), cl::ZeroOrMore);
+
+namespace llvm {
+template <>
+struct GraphTraits<ScopDetection *> : public GraphTraits<RegionInfo *> {
+  static NodeRef getEntryNode(ScopDetection *SD) {
+    return GraphTraits<RegionInfo *>::getEntryNode(SD->getRI());
+  }
+  static nodes_iterator nodes_begin(ScopDetection *SD) {
+    return nodes_iterator::begin(getEntryNode(SD));
+  }
+  static nodes_iterator nodes_end(ScopDetection *SD) {
+    return nodes_iterator::end(getEntryNode(SD));
+  }
+};
+
+template <>
+struct GraphTraits<ScopDetectionWrapperPass *>
+    : public GraphTraits<ScopDetection *> {
+  static NodeRef getEntryNode(ScopDetectionWrapperPass *P) {
+    return GraphTraits<ScopDetection *>::getEntryNode(&P->getSD());
+  }
+  static nodes_iterator nodes_begin(ScopDetectionWrapperPass *P) {
+    return nodes_iterator::begin(getEntryNode(P));
+  }
+  static nodes_iterator nodes_end(ScopDetectionWrapperPass *P) {
+    return nodes_iterator::end(getEntryNode(P));
+  }
+};
+
+template <> struct DOTGraphTraits<RegionNode *> : public DefaultDOTGraphTraits {
+  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+  std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
+    if (!Node->isSubRegion()) {
+      BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+
+      if (isSimple())
+        return DOTGraphTraits<const Function *>::getSimpleNodeLabel(
+            BB, BB->getParent());
+      else
+        return DOTGraphTraits<const Function *>::getCompleteNodeLabel(
+            BB, BB->getParent());
+    }
+
+    return "Not implemented";
+  }
+};
+
+template <>
+struct DOTGraphTraits<ScopDetectionWrapperPass *>
+    : public DOTGraphTraits<RegionNode *> {
+  DOTGraphTraits(bool isSimple = false)
+      : DOTGraphTraits<RegionNode *>(isSimple) {}
+  static std::string getGraphName(ScopDetectionWrapperPass *SD) {
+    return "Scop Graph";
+  }
+
+  std::string getEdgeAttributes(RegionNode *srcNode,
+                                GraphTraits<RegionInfo *>::ChildIteratorType CI,
+                                ScopDetectionWrapperPass *P) {
+    RegionNode *destNode = *CI;
+    auto *SD = &P->getSD();
+
+    if (srcNode->isSubRegion() || destNode->isSubRegion())
+      return "";
+
+    // In case of a backedge, do not use it to define the layout of the nodes.
+    BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
+    BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
+
+    RegionInfo *RI = SD->getRI();
+    Region *R = RI->getRegionFor(destBB);
+
+    while (R && R->getParent())
+      if (R->getParent()->getEntry() == destBB)
+        R = R->getParent();
+      else
+        break;
+
+    if (R && R->getEntry() == destBB && R->contains(srcBB))
+      return "constraint=false";
+
+    return "";
+  }
+
+  std::string getNodeLabel(RegionNode *Node, ScopDetectionWrapperPass *P) {
+    return DOTGraphTraits<RegionNode *>::getNodeLabel(
+        Node, reinterpret_cast<RegionNode *>(
+                  P->getSD().getRI()->getTopLevelRegion()));
+  }
+
+  static std::string escapeString(std::string String) {
+    std::string Escaped;
+
+    for (const auto &C : String) {
+      if (C == '"')
+        Escaped += '\\';
+
+      Escaped += C;
+    }
+    return Escaped;
+  }
+
+  // Print the cluster of the subregions. This groups the single basic blocks
+  // and adds a different background color for each group.
+  static void printRegionCluster(const ScopDetection *SD, const Region *R,
+                                 raw_ostream &O, unsigned depth = 0) {
+    O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void *>(R)
+                        << " {\n";
+    unsigned LineBegin, LineEnd;
+    std::string FileName;
+
+    getDebugLocation(R, LineBegin, LineEnd, FileName);
+
+    std::string Location;
+    if (LineBegin != (unsigned)-1) {
+      Location = escapeString(FileName + ":" + std::to_string(LineBegin) + "-" +
+                              std::to_string(LineEnd) + "\n");
+    }
+
+    std::string ErrorMessage = SD->regionIsInvalidBecause(R);
+    ErrorMessage = escapeString(ErrorMessage);
+    O.indent(2 * (depth + 1))
+        << "label = \"" << Location << ErrorMessage << "\";\n";
+
+    if (SD->isMaxRegionInScop(*R)) {
+      O.indent(2 * (depth + 1)) << "style = filled;\n";
+
+      // Set color to green.
+      O.indent(2 * (depth + 1)) << "color = 3";
+    } else {
+      O.indent(2 * (depth + 1)) << "style = solid;\n";
+
+      int color = (R->getDepth() * 2 % 12) + 1;
+
+      // We do not want green again.
+      if (color == 3)
+        color = 6;
+
+      O.indent(2 * (depth + 1)) << "color = " << color << "\n";
+    }
+
+    for (const auto &SubRegion : *R)
+      printRegionCluster(SD, SubRegion.get(), O, depth + 1);
+
+    RegionInfo *RI = R->getRegionInfo();
+
+    for (const auto &BB : R->blocks())
+      if (RI->getRegionFor(BB) == R)
+        O.indent(2 * (depth + 1))
+            << "Node"
+            << static_cast<void *>(RI->getTopLevelRegion()->getBBNode(BB))
+            << ";\n";
+
+    O.indent(2 * depth) << "}\n";
+  }
+  static void
+  addCustomGraphFeatures(const ScopDetectionWrapperPass *SD,
+                         GraphWriter<ScopDetectionWrapperPass *> &GW) {
+    raw_ostream &O = GW.getOStream();
+    O << "\tcolorscheme = \"paired12\"\n";
+    printRegionCluster(&SD->getSD(), SD->getSD().getRI()->getTopLevelRegion(),
+                       O, 4);
+  }
+};
+
+} // end namespace llvm
+
+struct ScopViewer
+    : public DOTGraphTraitsViewer<ScopDetectionWrapperPass, false> {
+  static char ID;
+  ScopViewer()
+      : DOTGraphTraitsViewer<ScopDetectionWrapperPass, false>("scops", ID) {}
+  bool processFunction(Function &F, ScopDetectionWrapperPass &SD) override {
+    if (ViewFilter != "" && !F.getName().count(ViewFilter))
+      return false;
+
+    if (ViewAll)
+      return true;
+
+    // Check that at least one scop was detected.
+    return std::distance(SD.getSD().begin(), SD.getSD().end()) > 0;
+  }
+};
+char ScopViewer::ID = 0;
+
+struct ScopOnlyViewer
+    : public DOTGraphTraitsViewer<ScopDetectionWrapperPass, true> {
+  static char ID;
+  ScopOnlyViewer()
+      : DOTGraphTraitsViewer<ScopDetectionWrapperPass, true>("scopsonly", ID) {}
+};
+char ScopOnlyViewer::ID = 0;
+
+struct ScopPrinter
+    : public DOTGraphTraitsPrinter<ScopDetectionWrapperPass, false> {
+  static char ID;
+  ScopPrinter()
+      : DOTGraphTraitsPrinter<ScopDetectionWrapperPass, false>("scops", ID) {}
+};
+char ScopPrinter::ID = 0;
+
+struct ScopOnlyPrinter
+    : public DOTGraphTraitsPrinter<ScopDetectionWrapperPass, true> {
+  static char ID;
+  ScopOnlyPrinter()
+      : DOTGraphTraitsPrinter<ScopDetectionWrapperPass, true>("scopsonly", ID) {
+  }
+};
+char ScopOnlyPrinter::ID = 0;
+
+static RegisterPass<ScopViewer> X("view-scops",
+                                  "Polly - View Scops of function");
+
+static RegisterPass<ScopOnlyViewer>
+    Y("view-scops-only",
+      "Polly - View Scops of function (with no function bodies)");
+
+static RegisterPass<ScopPrinter> M("dot-scops",
+                                   "Polly - Print Scops of function");
+
+static RegisterPass<ScopOnlyPrinter>
+    N("dot-scops-only",
+      "Polly - Print Scops of function (with no function bodies)");
+
+Pass *polly::createDOTViewerPass() { return new ScopViewer(); }
+
+Pass *polly::createDOTOnlyViewerPass() { return new ScopOnlyViewer(); }
+
+Pass *polly::createDOTPrinterPass() { return new ScopPrinter(); }
+
+Pass *polly::createDOTOnlyPrinterPass() { return new ScopOnlyPrinter(); }
--- a/external/llvm-project/polly/lib/Analysis/ScopInfo.cpp.REMOVED.git-id
+++ b/external/llvm-project/polly/lib/Analysis/ScopInfo.cpp.REMOVED.git-id
@@ -0,0 +1 @@
+d2bc71fb6d527df34b7d2041bf88e426996a13c6
--- a/external/llvm-project/polly/lib/Analysis/ScopPass.cpp
+++ b/external/llvm-project/polly/lib/Analysis/ScopPass.cpp
@@ -0,0 +1,168 @@
+//===- ScopPass.cpp - The base class of Passes that operate on Polly IR ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the ScopPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/ScopPass.h"
+#include "polly/ScopInfo.h"
+
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+
+using namespace llvm;
+using namespace polly;
+
+bool ScopPass::runOnRegion(Region *R, RGPassManager &RGM) {
+  S = nullptr;
+
+  if (skipRegion(*R))
+    return false;
+
+  if ((S = getAnalysis<ScopInfoRegionPass>().getScop()))
+    return runOnScop(*S);
+
+  return false;
+}
+
+void ScopPass::print(raw_ostream &OS, const Module *M) const {
+  if (S)
+    printScop(OS, *S);
+}
+
+void ScopPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<ScopInfoRegionPass>();
+
+  AU.addPreserved<AAResultsWrapperPass>();
+  AU.addPreserved<BasicAAWrapperPass>();
+  AU.addPreserved<LoopInfoWrapperPass>();
+  AU.addPreserved<DominatorTreeWrapperPass>();
+  AU.addPreserved<GlobalsAAWrapperPass>();
+  AU.addPreserved<ScopDetectionWrapperPass>();
+  AU.addPreserved<ScalarEvolutionWrapperPass>();
+  AU.addPreserved<SCEVAAWrapperPass>();
+  AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+  AU.addPreserved<RegionInfoPass>();
+  AU.addPreserved<ScopInfoRegionPass>();
+  AU.addPreserved<TargetTransformInfoWrapperPass>();
+}
+
+namespace polly {
+template class OwningInnerAnalysisManagerProxy<ScopAnalysisManager, Function>;
+}
+
+namespace llvm {
+
+template class PassManager<Scop, ScopAnalysisManager,
+                           ScopStandardAnalysisResults &, SPMUpdater &>;
+template class InnerAnalysisManagerProxy<ScopAnalysisManager, Function>;
+template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Scop,
+                                         ScopStandardAnalysisResults &>;
+
+template <>
+PreservedAnalyses
+PassManager<Scop, ScopAnalysisManager, ScopStandardAnalysisResults &,
+            SPMUpdater &>::run(Scop &S, ScopAnalysisManager &AM,
+                               ScopStandardAnalysisResults &AR, SPMUpdater &U) {
+  auto PA = PreservedAnalyses::all();
+  for (auto &Pass : Passes) {
+    auto PassPA = Pass->run(S, AM, AR, U);
+
+    AM.invalidate(S, PassPA);
+    PA.intersect(std::move(PassPA));
+  }
+
+  // All analyses for 'this' Scop have been invalidated above.
+  // If ScopPasses affect break other scops they have to propagate this
+  // information through the updater
+  PA.preserveSet<AllAnalysesOn<Scop>>();
+  return PA;
+}
+
+bool ScopAnalysisManagerFunctionProxy::Result::invalidate(
+    Function &F, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &Inv) {
+
+  // First, check whether our ScopInfo is about to be invalidated
+  auto PAC = PA.getChecker<ScopAnalysisManagerFunctionProxy>();
+  if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
+      Inv.invalidate<ScopInfoAnalysis>(F, PA) ||
+      Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
+      Inv.invalidate<LoopAnalysis>(F, PA) ||
+      Inv.invalidate<DominatorTreeAnalysis>(F, PA)) {
+
+    // As everything depends on ScopInfo, we must drop all existing results
+    for (auto &S : *SI)
+      if (auto *scop = S.second.get())
+        if (InnerAM)
+          InnerAM->clear(*scop, scop->getName());
+
+    InnerAM = nullptr;
+    return true; // Invalidate the proxy result as well.
+  }
+
+  bool allPreserved = PA.allAnalysesInSetPreserved<AllAnalysesOn<Scop>>();
+
+  // Invalidate all non-preserved analyses
+  // Even if all analyses were preserved, we still need to run deferred
+  // invalidation
+  for (auto &S : *SI) {
+    Optional<PreservedAnalyses> InnerPA;
+    auto *scop = S.second.get();
+    if (!scop)
+      continue;
+
+    if (auto *OuterProxy =
+            InnerAM->getCachedResult<FunctionAnalysisManagerScopProxy>(*scop)) {
+      for (const auto &InvPair : OuterProxy->getOuterInvalidations()) {
+        auto *OuterAnalysisID = InvPair.first;
+        const auto &InnerAnalysisIDs = InvPair.second;
+
+        if (Inv.invalidate(OuterAnalysisID, F, PA)) {
+          if (!InnerPA)
+            InnerPA = PA;
+          for (auto *InnerAnalysisID : InnerAnalysisIDs)
+            InnerPA->abandon(InnerAnalysisID);
+        }
+      }
+
+      if (InnerPA) {
+        InnerAM->invalidate(*scop, *InnerPA);
+        continue;
+      }
+    }
+
+    if (!allPreserved)
+      InnerAM->invalidate(*scop, PA);
+  }
+
+  return false; // This proxy is still valid
+}
+
+template <>
+ScopAnalysisManagerFunctionProxy::Result
+ScopAnalysisManagerFunctionProxy::run(Function &F,
+                                      FunctionAnalysisManager &FAM) {
+  return Result(*InnerAM, FAM.getResult<ScopInfoAnalysis>(F));
+}
+} // namespace llvm
+
+namespace polly {
+template <>
+OwningScopAnalysisManagerFunctionProxy::Result
+OwningScopAnalysisManagerFunctionProxy::run(Function &F,
+                                            FunctionAnalysisManager &FAM) {
+  return Result(InnerAM, FAM.getResult<ScopInfoAnalysis>(F));
+}
+} // namespace polly
--- a/external/llvm-project/polly/lib/CMakeLists.txt
+++ b/external/llvm-project/polly/lib/CMakeLists.txt
@@ -0,0 +1,163 @@
+set(LLVM_NO_RTTI 1)
+
+set(ISL_CODEGEN_FILES
+    CodeGen/IslAst.cpp
+    CodeGen/IslExprBuilder.cpp
+    CodeGen/IslNodeBuilder.cpp
+    CodeGen/CodeGeneration.cpp)
+
+if (GPU_CODEGEN)
+  set (GPGPU_CODEGEN_FILES
+       CodeGen/PPCGCodeGeneration.cpp
+       CodeGen/ManagedMemoryRewrite.cpp
+       )
+endif (GPU_CODEGEN)
+
+# Compile ISL into a separate library.
+add_subdirectory(External)
+
+set(POLLY_HEADER_FILES)
+if (MSVC_IDE OR XCODE)
+  file(GLOB_RECURSE POLLY_HEADER_FILES "${POLLY_SOURCE_DIR}/include/polly/*.h")
+endif ()
+
+# Use an object-library to add the same files to multiple libs without requiring
+# the sources them to be recompiled for each of them.
+add_library(PollyCore OBJECT
+  Analysis/DependenceInfo.cpp
+  Analysis/PolyhedralInfo.cpp
+  Analysis/ScopDetection.cpp
+  Analysis/ScopDetectionDiagnostic.cpp
+  Analysis/ScopInfo.cpp
+  Analysis/ScopBuilder.cpp
+  Analysis/ScopGraphPrinter.cpp
+  Analysis/ScopPass.cpp
+  Analysis/PruneUnprofitable.cpp
+  CodeGen/BlockGenerators.cpp
+  ${ISL_CODEGEN_FILES}
+  CodeGen/LoopGenerators.cpp
+  CodeGen/IRBuilder.cpp
+  CodeGen/Utils.cpp
+  CodeGen/RuntimeDebugBuilder.cpp
+  CodeGen/CodegenCleanup.cpp
+  CodeGen/PerfMonitor.cpp
+  ${GPGPU_CODEGEN_FILES}
+  Exchange/JSONExporter.cpp
+  Support/GICHelper.cpp
+  Support/SCEVAffinator.cpp
+  Support/SCEVValidator.cpp
+  Support/RegisterPasses.cpp
+  Support/ScopHelper.cpp
+  Support/ScopLocation.cpp
+  Support/ISLTools.cpp
+  Support/DumpModulePass.cpp
+  Support/VirtualInstruction.cpp
+  ${POLLY_JSON_FILES}
+  Transform/Canonicalization.cpp
+  Transform/CodePreparation.cpp
+  Transform/DeadCodeElimination.cpp
+  Transform/ScheduleOptimizer.cpp
+  Transform/FlattenSchedule.cpp
+  Transform/FlattenAlgo.cpp
+  Transform/ForwardOpTree.cpp
+  Transform/DeLICM.cpp
+  Transform/ZoneAlgo.cpp
+  Transform/Simplify.cpp
+  Transform/MaximalStaticExpansion.cpp
+  Transform/RewriteByReferenceParameters.cpp
+  Transform/ScopInliner.cpp
+  ${POLLY_HEADER_FILES}
+  )
+set_target_properties(PollyCore PROPERTIES FOLDER "Polly")
+
+# Create the library that can be linked into LLVM's tools and Polly's unittests.
+# It depends on all library it needs, such that with
+# LLVM_POLLY_LINK_INTO_TOOLS=ON, its dependencies like PollyISL are linked as
+# well.
+add_polly_library(Polly $<TARGET_OBJECTS:PollyCore>)
+target_link_libraries(Polly
+  ${ISL_TARGET}
+  ${JSONCPP_LIBRARIES}
+)
+
+# Additional dependencies for Polly-ACC.
+if (GPU_CODEGEN)
+  target_link_libraries(Polly PollyPPCG)
+endif ()
+
+
+# Polly-ACC requires the NVPTX backend to work. Ask LLVM about its libraries.
+set(nvptx_libs)
+if (GPU_CODEGEN)
+  # This call emits an error if they NVPTX backend is not enable.
+  llvm_map_components_to_libnames(nvptx_libs NVPTX)
+endif ()
+
+if (LLVM_LINK_LLVM_DYLIB)
+  # The shlib/dylib contains all the LLVM components
+  # (including NVPTX is enabled) already. Adding them to target_link_libraries
+  # would cause them being twice in the address space
+  # (their LLVM*.a/so and their copies in libLLVM.so)
+  # which results in errors when the two instances try to register the same
+  # command-line switches.
+  target_link_libraries(Polly LLVM)
+else ()
+  target_link_libraries(Polly
+    LLVMSupport
+    LLVMCore
+    LLVMScalarOpts
+    LLVMInstCombine
+    LLVMTransformUtils
+    LLVMAnalysis
+    LLVMipo
+    LLVMMC
+    LLVMPasses
+    LLVMLinker
+    LLVMIRReader
+    ${nvptx_libs}
+    # The libraries below are required for darwin: http://PR26392
+    LLVMBitReader
+    LLVMMCParser
+    LLVMObject
+    LLVMProfileData
+    LLVMTarget
+    LLVMVectorize
+    )
+endif ()
+
+# Create a loadable module Polly.so that can be loaded using
+# LLVM's/clang's "-load" option.
+if (MSVC)
+  # Add dummy target, because loadable modules are not supported on Windows
+  add_custom_target(LLVMPolly)
+  set_target_properties(LLVMPolly PROPERTIES FOLDER "Polly")
+else ()
+  add_polly_loadable_module(LLVMPolly
+    Polly.cpp
+    $<TARGET_OBJECTS:PollyCore>
+  )
+
+  # Only add the dependencies that are not part of LLVM. The latter are assumed
+  # to be already available in the address space the module is loaded into.
+  # Adding them once more would have the effect that both copies try to register
+  # the same command line options, to which LLVM reacts with an error.
+  # If Polly-ACC is enabled, the NVPTX target is also expected to reside in the
+  # hosts. This is not the case for bugpoint. Use LLVM_POLLY_LINK_INTO_TOOLS=ON
+  # instead which will automatically resolve the additional dependencies by
+  # Polly.
+  target_link_libraries(LLVMPolly ${ISL_TARGET} ${JSONCPP_LIBRARIES})
+  if (GPU_CODEGEN)
+    target_link_libraries(LLVMPolly PollyPPCG)
+  endif ()
+
+  set_target_properties(LLVMPolly
+    PROPERTIES
+    LINKER_LANGUAGE CXX
+    PREFIX "")
+endif ()
+
+if (TARGET intrinsics_gen)
+  # Check if we are building as part of an LLVM build
+  add_dependencies(PollyCore intrinsics_gen)
+endif()
+
--- a/external/llvm-project/polly/lib/CodeGen/BlockGenerators.cpp
+++ b/external/llvm-project/polly/lib/CodeGen/BlockGenerators.cpp
--- a/external/llvm-project/polly/lib/CodeGen/CodeGeneration.cpp
+++ b/external/llvm-project/polly/lib/CodeGen/CodeGeneration.cpp
@@ -0,0 +1,400 @@
+//===- CodeGeneration.cpp - Code generate the Scops using ISL. ---------======//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The CodeGeneration pass takes a Scop created by ScopInfo and translates it
+// back to LLVM-IR using the ISL code generator.
+//
+// The Scop describes the high level memory behavior of a control flow region.
+// Transformation passes can update the schedule (execution order) of statements
+// in the Scop. ISL is used to generate an abstract syntax tree that reflects
+// the updated execution order. This clast is used to create new LLVM-IR that is
+// computationally equivalent to the original control flow region, but executes
+// its code in the new execution order defined by the changed schedule.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/CodeGen/CodeGeneration.h"
+#include "polly/CodeGen/IRBuilder.h"
+#include "polly/CodeGen/IslAst.h"
+#include "polly/CodeGen/IslNodeBuilder.h"
+#include "polly/CodeGen/PerfMonitor.h"
+#include "polly/CodeGen/Utils.h"
+#include "polly/DependenceInfo.h"
+#include "polly/LinkAllPasses.h"
+#include "polly/Options.h"
+#include "polly/ScopDetectionDiagnostic.h"
+#include "polly/ScopInfo.h"
+#include "polly/Support/ScopHelper.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "isl/ast.h"
+#include <cassert>
+#include <utility>
+
+using namespace llvm;
+using namespace polly;
+
+#define DEBUG_TYPE "polly-codegen"
+
+static cl::opt<bool> Verify("polly-codegen-verify",
+                            cl::desc("Verify the function generated by Polly"),
+                            cl::Hidden, cl::init(false), cl::ZeroOrMore,
+                            cl::cat(PollyCategory));
+
+bool polly::PerfMonitoring;
+
+static cl::opt<bool, true>
+    XPerfMonitoring("polly-codegen-perf-monitoring",
+                    cl::desc("Add run-time performance monitoring"), cl::Hidden,
+                    cl::location(polly::PerfMonitoring), cl::init(false),
+                    cl::ZeroOrMore, cl::cat(PollyCategory));
+
+STATISTIC(ScopsProcessed, "Number of SCoP processed");
+STATISTIC(CodegenedScops, "Number of successfully generated SCoPs");
+STATISTIC(CodegenedAffineLoops,
+          "Number of original affine loops in SCoPs that have been generated");
+STATISTIC(CodegenedBoxedLoops,
+          "Number of original boxed loops in SCoPs that have been generated");
+
+namespace polly {
+
+/// Mark a basic block unreachable.
+///
+/// Marks the basic block @p Block unreachable by equipping it with an
+/// UnreachableInst.
+void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) {
+  auto *OrigTerminator = Block.getTerminator();
+  Builder.SetInsertPoint(OrigTerminator);
+  Builder.CreateUnreachable();
+  OrigTerminator->eraseFromParent();
+}
+
+} // namespace polly
+
+static void verifyGeneratedFunction(Scop &S, Function &F, IslAstInfo &AI) {
+  if (!Verify || !verifyFunction(F, &errs()))
+    return;
+
+  DEBUG({
+    errs() << "== ISL Codegen created an invalid function ==\n\n== The "
+              "SCoP ==\n";
+    errs() << S;
+    errs() << "\n== The isl AST ==\n";
+    AI.print(errs());
+    errs() << "\n== The invalid function ==\n";
+    F.print(errs());
+  });
+
+  llvm_unreachable("Polly generated function could not be verified. Add "
+                   "-polly-codegen-verify=false to disable this assertion.");
+}
+
+// CodeGeneration adds a lot of BBs without updating the RegionInfo
+// We make all created BBs belong to the scop's parent region without any
+// nested structure to keep the RegionInfo verifier happy.
+static void fixRegionInfo(Function &F, Region &ParentRegion, RegionInfo &RI) {
+  for (BasicBlock &BB : F) {
+    if (RI.getRegionFor(&BB))
+      continue;
+
+    RI.setRegionFor(&BB, &ParentRegion);
+  }
+}
+
+/// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from
+/// @R.
+///
+/// CodeGeneration does not copy lifetime markers into the optimized SCoP,
+/// which would leave the them only in the original path. This can transform
+/// code such as
+///
+///     llvm.lifetime.start(%p)
+///     llvm.lifetime.end(%p)
+///
+/// into
+///
+///     if (RTC) {
+///       // generated code
+///     } else {
+///       // original code
+///       llvm.lifetime.start(%p)
+///     }
+///     llvm.lifetime.end(%p)
+///
+/// The current StackColoring algorithm cannot handle if some, but not all,
+/// paths from the end marker to the entry block cross the start marker. Same
+/// for start markers that do not always cross the end markers. We avoid any
+/// issues by removing all lifetime markers, even from the original code.
+///
+/// A better solution could be to hoist all llvm.lifetime.start to the split
+/// node and all llvm.lifetime.end to the merge node, which should be
+/// conservatively correct.
+static void removeLifetimeMarkers(Region *R) {
+  for (auto *BB : R->blocks()) {
+    auto InstIt = BB->begin();
+    auto InstEnd = BB->end();
+
+    while (InstIt != InstEnd) {
+      auto NextIt = InstIt;
+      ++NextIt;
+
+      if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) {
+        switch (IT->getIntrinsicID()) {
+        case Intrinsic::lifetime_start:
+        case Intrinsic::lifetime_end:
+          BB->getInstList().erase(InstIt);
+          break;
+        default:
+          break;
+        }
+      }
+
+      InstIt = NextIt;
+    }
+  }
+}
+
+static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT,
+                    ScalarEvolution &SE, RegionInfo &RI) {
+  // Check whether IslAstInfo uses the same isl_ctx. Since -polly-codegen
+  // reports itself to preserve DependenceInfo and IslAstInfo, we might get
+  // those analysis that were computed by a different ScopInfo for a different
+  // Scop structure. When the ScopInfo/Scop object is freed, there is a high
+  // probability that the new ScopInfo/Scop object will be created at the same
+  // heap position with the same address. Comparing whether the Scop or ScopInfo
+  // address is the expected therefore is unreliable.
+  // Instead, we compare the address of the isl_ctx object. Both, DependenceInfo
+  // and IslAstInfo must hold a reference to the isl_ctx object to ensure it is
+  // not freed before the destruction of those analyses which might happen after
+  // the destruction of the Scop/ScopInfo they refer to.  Hence, the isl_ctx
+  // will not be freed and its space not reused as long there is a
+  // DependenceInfo or IslAstInfo around.
+  IslAst &Ast = AI.getIslAst();
+  if (Ast.getSharedIslCtx() != S.getSharedIslCtx()) {
+    DEBUG(dbgs() << "Got an IstAst for a different Scop/isl_ctx\n");
+    return false;
+  }
+
+  // Check if we created an isl_ast root node, otherwise exit.
+  isl_ast_node *AstRoot = Ast.getAst();
+  if (!AstRoot)
+    return false;
+
+  // Collect statistics. Do it before we modify the IR to avoid having it any
+  // influence on the result.
+  auto ScopStats = S.getStatistics();
+  ScopsProcessed++;
+
+  auto &DL = S.getFunction().getParent()->getDataLayout();
+  Region *R = &S.getRegion();
+  assert(!R->isTopLevelRegion() && "Top level regions are not supported");
+
+  ScopAnnotator Annotator;
+
+  simplifyRegion(R, &DT, &LI, &RI);
+  assert(R->isSimple());
+  BasicBlock *EnteringBB = S.getEnteringBlock();
+  assert(EnteringBB);
+  PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator);
+
+  // Only build the run-time condition and parameters _after_ having
+  // introduced the conditional branch. This is important as the conditional
+  // branch will guard the original scop from new induction variables that
+  // the SCEVExpander may introduce while code generating the parameters and
+  // which may introduce scalar dependences that prevent us from correctly
+  // code generating this scop.
+  BBPair StartExitBlocks =
+      std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI));
+  BasicBlock *StartBlock = std::get<0>(StartExitBlocks);
+  BasicBlock *ExitBlock = std::get<1>(StartExitBlocks);
+
+  removeLifetimeMarkers(R);
+  auto *SplitBlock = StartBlock->getSinglePredecessor();
+
+  IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock);
+
+  // All arrays must have their base pointers known before
+  // ScopAnnotator::buildAliasScopes.
+  NodeBuilder.allocateNewArrays(StartExitBlocks);
+  Annotator.buildAliasScopes(S);
+
+  if (PerfMonitoring) {
+    PerfMonitor P(S, EnteringBB->getParent()->getParent());
+    P.initialize();
+    P.insertRegionStart(SplitBlock->getTerminator());
+
+    BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor();
+    P.insertRegionEnd(MergeBlock->getTerminator());
+  }
+
+  // First generate code for the hoisted invariant loads and transitively the
+  // parameters they reference. Afterwards, for the remaining parameters that
+  // might reference the hoisted loads. Finally, build the runtime check
+  // that might reference both hoisted loads as well as parameters.
+  // If the hoisting fails we have to bail and execute the original code.
+  Builder.SetInsertPoint(SplitBlock->getTerminator());
+  if (!NodeBuilder.preloadInvariantLoads()) {
+    // Patch the introduced branch condition to ensure that we always execute
+    // the original SCoP.
+    auto *FalseI1 = Builder.getFalse();
+    auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator();
+    SplitBBTerm->setOperand(0, FalseI1);
+
+    // Since the other branch is hence ignored we mark it as unreachable and
+    // adjust the dominator tree accordingly.
+    auto *ExitingBlock = StartBlock->getUniqueSuccessor();
+    assert(ExitingBlock);
+    auto *MergeBlock = ExitingBlock->getUniqueSuccessor();
+    assert(MergeBlock);
+    markBlockUnreachable(*StartBlock, Builder);
+    markBlockUnreachable(*ExitingBlock, Builder);
+    auto *ExitingBB = S.getExitingBlock();
+    assert(ExitingBB);
+    DT.changeImmediateDominator(MergeBlock, ExitingBB);
+    DT.eraseNode(ExitingBlock);
+
+    isl_ast_node_free(AstRoot);
+  } else {
+    NodeBuilder.addParameters(S.getContext().release());
+    Value *RTC = NodeBuilder.createRTC(AI.getRunCondition());
+
+    Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
+
+    // Explicitly set the insert point to the end of the block to avoid that a
+    // split at the builder's current
+    // insert position would move the malloc calls to the wrong BasicBlock.
+    // Ideally we would just split the block during allocation of the new
+    // arrays, but this would break the assumption that there are no blocks
+    // between polly.start and polly.exiting (at this point).
+    Builder.SetInsertPoint(StartBlock->getTerminator());
+
+    NodeBuilder.create(AstRoot);
+    NodeBuilder.finalize();
+    fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI);
+
+    CodegenedScops++;
+    CodegenedAffineLoops += ScopStats.NumAffineLoops;
+    CodegenedBoxedLoops += ScopStats.NumBoxedLoops;
+  }
+
+  Function *F = EnteringBB->getParent();
+  verifyGeneratedFunction(S, *F, AI);
+  for (auto *SubF : NodeBuilder.getParallelSubfunctions())
+    verifyGeneratedFunction(S, *SubF, AI);
+
+  // Mark the function such that we run additional cleanup passes on this
+  // function (e.g. mem2reg to rediscover phi nodes).
+  F->addFnAttr("polly-optimized");
+  return true;
+}
+
+namespace {
+
+class CodeGeneration : public ScopPass {
+public:
+  static char ID;
+
+  /// The data layout used.
+  const DataLayout *DL;
+
+  /// @name The analysis passes we need to generate code.
+  ///
+  ///{
+  LoopInfo *LI;
+  IslAstInfo *AI;
+  DominatorTree *DT;
+  ScalarEvolution *SE;
+  RegionInfo *RI;
+  ///}
+
+  CodeGeneration() : ScopPass(ID) {}
+
+  /// Generate LLVM-IR for the SCoP @p S.
+  bool runOnScop(Scop &S) override {
+    // Skip SCoPs in case they're already code-generated by PPCGCodeGeneration.
+    if (S.isToBeSkipped())
+      return false;
+
+    AI = &getAnalysis<IslAstInfoWrapperPass>().getAI();
+    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+    DL = &S.getFunction().getParent()->getDataLayout();
+    RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
+    return CodeGen(S, *AI, *LI, *DT, *SE, *RI);
+  }
+
+  /// Register all analyses and transformation required.
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    ScopPass::getAnalysisUsage(AU);
+
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<IslAstInfoWrapperPass>();
+    AU.addRequired<RegionInfoPass>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addRequired<ScopDetectionWrapperPass>();
+    AU.addRequired<ScopInfoRegionPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+
+    AU.addPreserved<DependenceInfo>();
+    AU.addPreserved<IslAstInfoWrapperPass>();
+
+    // FIXME: We do not yet add regions for the newly generated code to the
+    //        region tree.
+  }
+};
+
+} // namespace
+
+PreservedAnalyses CodeGenerationPass::run(Scop &S, ScopAnalysisManager &SAM,
+                                          ScopStandardAnalysisResults &AR,
+                                          SPMUpdater &U) {
+  auto &AI = SAM.getResult<IslAstAnalysis>(S, AR);
+  if (CodeGen(S, AI, AR.LI, AR.DT, AR.SE, AR.RI)) {
+    U.invalidateScop(S);
+    return PreservedAnalyses::none();
+  }
+
+  return PreservedAnalyses::all();
+}
+
+char CodeGeneration::ID = 1;
+
+Pass *polly::createCodeGenerationPass() { return new CodeGeneration(); }
+
+INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen",
+                      "Polly - Create LLVM-IR from SCoPs", false, false);
+INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
+INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
+INITIALIZE_PASS_END(CodeGeneration, "polly-codegen",
+                    "Polly - Create LLVM-IR from SCoPs", false, false)
--- a/external/llvm-project/polly/lib/CodeGen/CodegenCleanup.cpp
+++ b/external/llvm-project/polly/lib/CodeGen/CodegenCleanup.cpp
@@ -0,0 +1,139 @@
+//===- CodegenCleanup.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/CodeGen/CodegenCleanup.h"
+
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/PassInfo.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+
+#define DEBUG_TYPE "polly-cleanup"
+
+using namespace llvm;
+using namespace polly;
+
+namespace {
+
+class CodegenCleanup : public FunctionPass {
+private:
+  CodegenCleanup(const CodegenCleanup &) = delete;
+  const CodegenCleanup &operator=(const CodegenCleanup &) = delete;
+
+  llvm::legacy::FunctionPassManager *FPM;
+
+public:
+  static char ID;
+  explicit CodegenCleanup() : FunctionPass(ID), FPM(nullptr) {}
+
+  /// @name FunctionPass interface
+  //@{
+  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {}
+
+  virtual bool doInitialization(Module &M) override {
+    assert(!FPM);
+
+    FPM = new llvm::legacy::FunctionPassManager(&M);
+
+    // TODO: How to make parent passes discoverable?
+    // TODO: Should be sensitive to compiler options in PassManagerBuilder, to
+    // which we do not have access here.
+    FPM->add(createScopedNoAliasAAWrapperPass());
+    FPM->add(createTypeBasedAAWrapperPass());
+    FPM->add(createAAResultsWrapperPass());
+
+    // TODO: These are non-conditional passes that run between
+    // EP_ModuleOptimizerEarly and EP_VectorizerStart just to ensure we do not
+    // miss any optimization that would have run after Polly with
+    // -polly-position=early. This can probably be reduced to a more compact set
+    // of passes.
+    FPM->add(createCFGSimplificationPass());
+    FPM->add(createSROAPass());
+    FPM->add(createEarlyCSEPass());
+
+    FPM->add(createPromoteMemoryToRegisterPass());
+    FPM->add(createInstructionCombiningPass(true));
+    FPM->add(createCFGSimplificationPass());
+    FPM->add(createSROAPass());
+    FPM->add(createEarlyCSEPass(true));
+    FPM->add(createSpeculativeExecutionIfHasBranchDivergencePass());
+    FPM->add(createJumpThreadingPass());
+    FPM->add(createCorrelatedValuePropagationPass());
+    FPM->add(createCFGSimplificationPass());
+    FPM->add(createInstructionCombiningPass(true));
+    FPM->add(createLibCallsShrinkWrapPass());
+    FPM->add(createTailCallEliminationPass());
+    FPM->add(createCFGSimplificationPass());
+    FPM->add(createReassociatePass());
+    FPM->add(createLoopRotatePass(-1));
+    FPM->add(createGVNPass());
+    FPM->add(createLICMPass());
+    FPM->add(createLoopUnswitchPass());
+    FPM->add(createCFGSimplificationPass());
+    FPM->add(createInstructionCombiningPass(true));
+    FPM->add(createIndVarSimplifyPass());
+    FPM->add(createLoopIdiomPass());
+    FPM->add(createLoopDeletionPass());
+    FPM->add(createCFGSimplificationPass());
+    FPM->add(createSimpleLoopUnrollPass(3));
+    FPM->add(createMergedLoadStoreMotionPass());
+    FPM->add(createGVNPass());
+    FPM->add(createMemCpyOptPass());
+    FPM->add(createSCCPPass());
+    FPM->add(createBitTrackingDCEPass());
+    FPM->add(createInstructionCombiningPass(true));
+    FPM->add(createJumpThreadingPass());
+    FPM->add(createCorrelatedValuePropagationPass());
+    FPM->add(createDeadStoreEliminationPass());
+    FPM->add(createLICMPass());
+    FPM->add(createAggressiveDCEPass());
+    FPM->add(createCFGSimplificationPass());
+    FPM->add(createInstructionCombiningPass(true));
+    FPM->add(createFloat2IntPass());
+
+    return FPM->doInitialization();
+  }
+
+  virtual bool doFinalization(Module &M) override {
+    bool Result = FPM->doFinalization();
+
+    delete FPM;
+    FPM = nullptr;
+
+    return Result;
+  }
+
+  virtual bool runOnFunction(llvm::Function &F) override {
+    if (!F.hasFnAttribute("polly-optimized")) {
+      DEBUG(dbgs() << F.getName()
+                   << ": Skipping cleanup because Polly did not optimize it.");
+      return false;
+    }
+
+    DEBUG(dbgs() << F.getName() << ": Running codegen cleanup...");
+    return FPM->run(F);
+  }
+  //@}
+};
+
+char CodegenCleanup::ID;
+} // namespace
+
+FunctionPass *polly::createCodegenCleanupPass() { return new CodegenCleanup(); }
+
+INITIALIZE_PASS_BEGIN(CodegenCleanup, "polly-cleanup",
+                      "Polly - Cleanup after code generation", false, false)
+INITIALIZE_PASS_END(CodegenCleanup, "polly-cleanup",
+                    "Polly - Cleanup after code generation", false, false)
--- a/external/llvm-project/polly/lib/CodeGen/IRBuilder.cpp
+++ b/external/llvm-project/polly/lib/CodeGen/IRBuilder.cpp
@@ -0,0 +1,256 @@
+//===------ PollyIRBuilder.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Polly IRBuilder file contains Polly specific extensions for the IRBuilder
+// that are used e.g. to emit the llvm.loop.parallel metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/CodeGen/IRBuilder.h"
+#include "polly/ScopInfo.h"
+#include "polly/Support/ScopHelper.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace polly;
+
+static const int MaxArraysInAliasScops = 10;
+
+/// Get a self referencing id metadata node.
+///
+/// The MDNode looks like this (if arg0/arg1 are not null):
+///
+///    '!n = metadata !{metadata !n, arg0, arg1}'
+///
+/// @return The self referencing id metadata node.
+static MDNode *getID(LLVMContext &Ctx, Metadata *arg0 = nullptr,
+                     Metadata *arg1 = nullptr) {
+  MDNode *ID;
+  SmallVector<Metadata *, 3> Args;
+  // Use a temporary node to safely create a unique pointer for the first arg.
+  auto TempNode = MDNode::getTemporary(Ctx, None);
+  // Reserve operand 0 for loop id self reference.
+  Args.push_back(TempNode.get());
+
+  if (arg0)
+    Args.push_back(arg0);
+  if (arg1)
+    Args.push_back(arg1);
+
+  ID = MDNode::get(Ctx, Args);
+  ID->replaceOperandWith(0, ID);
+  return ID;
+}
+
+ScopAnnotator::ScopAnnotator() : SE(nullptr), AliasScopeDomain(nullptr) {}
+
+void ScopAnnotator::buildAliasScopes(Scop &S) {
+  SE = S.getSE();
+
+  LLVMContext &Ctx = SE->getContext();
+  AliasScopeDomain = getID(Ctx, MDString::get(Ctx, "polly.alias.scope.domain"));
+
+  AliasScopeMap.clear();
+  OtherAliasScopeListMap.clear();
+
+  // We are only interested in arrays, but no scalar references. Scalars should
+  // be handled easily by basicaa.
+  SmallVector<ScopArrayInfo *, 10> Arrays;
+  for (ScopArrayInfo *Array : S.arrays())
+    if (Array->isArrayKind())
+      Arrays.push_back(Array);
+
+  // The construction of alias scopes is quadratic in the number of arrays
+  // involved. In case of too many arrays, skip the construction of alias
+  // information to avoid quadratic increases in compile time and code size.
+  if (Arrays.size() > MaxArraysInAliasScops)
+    return;
+
+  std::string AliasScopeStr = "polly.alias.scope.";
+  for (const ScopArrayInfo *Array : Arrays) {
+    assert(Array->getBasePtr() && "Base pointer must be present");
+    AliasScopeMap[Array->getBasePtr()] =
+        getID(Ctx, AliasScopeDomain,
+              MDString::get(Ctx, (AliasScopeStr + Array->getName()).c_str()));
+  }
+
+  for (const ScopArrayInfo *Array : Arrays) {
+    MDNode *AliasScopeList = MDNode::get(Ctx, {});
+    for (const auto &AliasScopePair : AliasScopeMap) {
+      if (Array->getBasePtr() == AliasScopePair.first)
+        continue;
+
+      Metadata *Args = {AliasScopePair.second};
+      AliasScopeList =
+          MDNode::concatenate(AliasScopeList, MDNode::get(Ctx, Args));
+    }
+
+    OtherAliasScopeListMap[Array->getBasePtr()] = AliasScopeList;
+  }
+}
+
+void ScopAnnotator::pushLoop(Loop *L, bool IsParallel) {
+
+  ActiveLoops.push_back(L);
+  if (!IsParallel)
+    return;
+
+  BasicBlock *Header = L->getHeader();
+  MDNode *Id = getID(Header->getContext());
+  assert(Id->getOperand(0) == Id && "Expected Id to be a self-reference");
+  assert(Id->getNumOperands() == 1 && "Unexpected extra operands in Id");
+  MDNode *Ids = ParallelLoops.empty()
+                    ? Id
+                    : MDNode::concatenate(ParallelLoops.back(), Id);
+  ParallelLoops.push_back(Ids);
+}
+
+void ScopAnnotator::popLoop(bool IsParallel) {
+  ActiveLoops.pop_back();
+  if (!IsParallel)
+    return;
+
+  assert(!ParallelLoops.empty() && "Expected a parallel loop to pop");
+  ParallelLoops.pop_back();
+}
+
+void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
+                                      bool IsLoopVectorizerDisabled) const {
+  MDNode *MData = nullptr;
+
+  if (IsLoopVectorizerDisabled) {
+    SmallVector<Metadata *, 3> Args;
+    LLVMContext &Ctx = SE->getContext();
+    Args.push_back(MDString::get(Ctx, "llvm.loop.vectorize.enable"));
+    auto *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
+    Args.push_back(ValueAsMetadata::get(FalseValue));
+    MData = MDNode::concatenate(MData, getID(Ctx, MDNode::get(Ctx, Args)));
+  }
+
+  if (IsParallel) {
+    assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate");
+    MDNode *Ids = ParallelLoops.back();
+    MDNode *Id = cast<MDNode>(Ids->getOperand(Ids->getNumOperands() - 1));
+    MData = MDNode::concatenate(MData, Id);
+  }
+
+  B->setMetadata("llvm.loop", MData);
+}
+
+/// Get the pointer operand
+///
+/// @param Inst The instruction to be analyzed.
+/// @return the pointer operand in case @p Inst is a memory access
+///         instruction and nullptr otherwise.
+static llvm::Value *getMemAccInstPointerOperand(Instruction *Inst) {
+  auto MemInst = MemAccInst::dyn_cast(Inst);
+  if (!MemInst)
+    return nullptr;
+
+  return MemInst.getPointerOperand();
+}
+
+void ScopAnnotator::annotateSecondLevel(llvm::Instruction *Inst,
+                                        llvm::Value *BasePtr) {
+  auto *PtrSCEV = SE->getSCEV(getMemAccInstPointerOperand(Inst));
+  auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
+
+  if (!PtrSCEV)
+    return;
+  auto SecondLevelAliasScope = SecondLevelAliasScopeMap.lookup(PtrSCEV);
+  auto SecondLevelOtherAliasScopeList =
+      SecondLevelOtherAliasScopeListMap.lookup(PtrSCEV);
+  if (!SecondLevelAliasScope) {
+    auto AliasScope = AliasScopeMap.lookup(BasePtr);
+    if (!AliasScope)
+      return;
+    LLVMContext &Ctx = SE->getContext();
+    SecondLevelAliasScope = getID(
+        Ctx, AliasScope, MDString::get(Ctx, "second level alias metadata"));
+    SecondLevelAliasScopeMap[PtrSCEV] = SecondLevelAliasScope;
+    Metadata *Args = {SecondLevelAliasScope};
+    auto SecondLevelBasePtrAliasScopeList =
+        SecondLevelAliasScopeMap.lookup(BasePtrSCEV);
+    SecondLevelAliasScopeMap[BasePtrSCEV] = MDNode::concatenate(
+        SecondLevelBasePtrAliasScopeList, MDNode::get(Ctx, Args));
+    auto OtherAliasScopeList = OtherAliasScopeListMap.lookup(BasePtr);
+    SecondLevelOtherAliasScopeList = MDNode::concatenate(
+        OtherAliasScopeList, SecondLevelBasePtrAliasScopeList);
+    SecondLevelOtherAliasScopeListMap[PtrSCEV] = SecondLevelOtherAliasScopeList;
+  }
+  Inst->setMetadata("alias.scope", SecondLevelAliasScope);
+  Inst->setMetadata("noalias", SecondLevelOtherAliasScopeList);
+}
+
+void ScopAnnotator::annotate(Instruction *Inst) {
+  if (!Inst->mayReadOrWriteMemory())
+    return;
+
+  if (!ParallelLoops.empty())
+    Inst->setMetadata("llvm.mem.parallel_loop_access", ParallelLoops.back());
+
+  // TODO: Use the ScopArrayInfo once available here.
+  if (!AliasScopeDomain)
+    return;
+
+  // Do not apply annotations on memory operations that take more than one
+  // pointer. It would be ambiguous to which pointer the annotation applies.
+  // FIXME: How can we specify annotations for all pointer arguments?
+  if (isa<CallInst>(Inst) && !isa<MemSetInst>(Inst))
+    return;
+
+  auto *Ptr = getMemAccInstPointerOperand(Inst);
+  if (!Ptr)
+    return;
+
+  auto *PtrSCEV = SE->getSCEV(Ptr);
+  auto *BaseSCEV = SE->getPointerBase(PtrSCEV);
+  auto *SU = dyn_cast<SCEVUnknown>(BaseSCEV);
+
+  if (!SU)
+    return;
+
+  auto *BasePtr = SU->getValue();
+
+  if (!BasePtr)
+    return;
+
+  auto AliasScope = AliasScopeMap.lookup(BasePtr);
+
+  if (!AliasScope) {
+    BasePtr = AlternativeAliasBases.lookup(BasePtr);
+    if (!BasePtr)
+      return;
+
+    AliasScope = AliasScopeMap.lookup(BasePtr);
+    if (!AliasScope)
+      return;
+  }
+
+  assert(OtherAliasScopeListMap.count(BasePtr) &&
+         "BasePtr either expected in AliasScopeMap and OtherAlias...Map");
+  auto *OtherAliasScopeList = OtherAliasScopeListMap[BasePtr];
+
+  if (InterIterationAliasFreeBasePtrs.count(BasePtr)) {
+    annotateSecondLevel(Inst, BasePtr);
+    return;
+  }
+
+  Inst->setMetadata("alias.scope", AliasScope);
+  Inst->setMetadata("noalias", OtherAliasScopeList);
+}
+
+void ScopAnnotator::addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr) {
+  if (!BasePtr)
+    return;
+
+  InterIterationAliasFreeBasePtrs.insert(BasePtr);
+}
--- a/external/llvm-project/polly/lib/CodeGen/IslAst.cpp
+++ b/external/llvm-project/polly/lib/CodeGen/IslAst.cpp
--- a/external/llvm-project/polly/lib/CodeGen/IslExprBuilder.cpp
+++ b/external/llvm-project/polly/lib/CodeGen/IslExprBuilder.cpp
--- a/external/llvm-project/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/external/llvm-project/polly/lib/CodeGen/IslNodeBuilder.cpp
--- a/external/llvm-project/polly/lib/CodeGen/LoopGenerators.cpp
+++ b/external/llvm-project/polly/lib/CodeGen/LoopGenerators.cpp
@@ -0,0 +1,381 @@
+//===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions to create scalar and parallel loops as LLVM-IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/CodeGen/LoopGenerators.h"
+#include "polly/ScopDetection.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+using namespace polly;
+
+static cl::opt<int>
+    PollyNumThreads("polly-num-threads",
+                    cl::desc("Number of threads to use (0 = auto)"), cl::Hidden,
+                    cl::init(0));
+
+// We generate a loop of either of the following structures:
+//
+//              BeforeBB                      BeforeBB
+//                 |                             |
+//                 v                             v
+//              GuardBB                      PreHeaderBB
+//              /      |                         |   _____
+//     __  PreHeaderBB  |                        v  \/    |
+//    /  \    /         |                     HeaderBB  latch
+// latch  HeaderBB      |                        |\       |
+//    \  /    \         /                        | \------/
+//     <       \       /                         |
+//              \     /                          v
+//              ExitBB                         ExitBB
+//
+// depending on whether or not we know that it is executed at least once. If
+// not, GuardBB checks if the loop is executed at least once. If this is the
+// case we branch to PreHeaderBB and subsequently to the HeaderBB, which
+// contains the loop iv 'polly.indvar', the incremented loop iv
+// 'polly.indvar_next' as well as the condition to check if we execute another
+// iteration of the loop. After the loop has finished, we branch to ExitBB.
+// We expect the type of UB, LB, UB+Stride to be large enough for values that
+// UB may take throughout the execution of the loop, including the computation
+// of indvar + Stride before the final abort.
+Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
+                         PollyIRBuilder &Builder, LoopInfo &LI,
+                         DominatorTree &DT, BasicBlock *&ExitBB,
+                         ICmpInst::Predicate Predicate,
+                         ScopAnnotator *Annotator, bool Parallel, bool UseGuard,
+                         bool LoopVectDisabled) {
+  Function *F = Builder.GetInsertBlock()->getParent();
+  LLVMContext &Context = F->getContext();
+
+  assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
+  IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
+  assert(LoopIVType && "UB is not integer?");
+
+  BasicBlock *BeforeBB = Builder.GetInsertBlock();
+  BasicBlock *GuardBB =
+      UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
+  BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
+  BasicBlock *PreHeaderBB =
+      BasicBlock::Create(Context, "polly.loop_preheader", F);
+
+  // Update LoopInfo
+  Loop *OuterLoop = LI.getLoopFor(BeforeBB);
+  Loop *NewLoop = LI.AllocateLoop();
+
+  if (OuterLoop)
+    OuterLoop->addChildLoop(NewLoop);
+  else
+    LI.addTopLevelLoop(NewLoop);
+
+  if (OuterLoop) {
+    if (GuardBB)
+      OuterLoop->addBasicBlockToLoop(GuardBB, LI);
+    OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
+  }
+
+  NewLoop->addBasicBlockToLoop(HeaderBB, LI);
+
+  // Notify the annotator (if present) that we have a new loop, but only
+  // after the header block is set.
+  if (Annotator)
+    Annotator->pushLoop(NewLoop, Parallel);
+
+  // ExitBB
+  ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
+  ExitBB->setName("polly.loop_exit");
+
+  // BeforeBB
+  if (GuardBB) {
+    BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
+    DT.addNewBlock(GuardBB, BeforeBB);
+
+    // GuardBB
+    Builder.SetInsertPoint(GuardBB);
+    Value *LoopGuard;
+    LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
+    LoopGuard->setName("polly.loop_guard");
+    Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
+    DT.addNewBlock(PreHeaderBB, GuardBB);
+  } else {
+    BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
+    DT.addNewBlock(PreHeaderBB, BeforeBB);
+  }
+
+  // PreHeaderBB
+  Builder.SetInsertPoint(PreHeaderBB);
+  Builder.CreateBr(HeaderBB);
+
+  // HeaderBB
+  DT.addNewBlock(HeaderBB, PreHeaderBB);
+  Builder.SetInsertPoint(HeaderBB);
+  PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
+  IV->addIncoming(LB, PreHeaderBB);
+  Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
+  Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
+  Value *LoopCondition =
+      Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
+
+  // Create the loop latch and annotate it as such.
+  BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
+  if (Annotator)
+    Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
+
+  IV->addIncoming(IncrementedIV, HeaderBB);
+  if (GuardBB)
+    DT.changeImmediateDominator(ExitBB, GuardBB);
+  else
+    DT.changeImmediateDominator(ExitBB, HeaderBB);
+
+  // The loop body should be added here.
+  Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
+  return IV;
+}
+
+Value *ParallelLoopGenerator::createParallelLoop(
+    Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
+    ValueMapT &Map, BasicBlock::iterator *LoopBody) {
+  Function *SubFn;
+
+  AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
+  BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
+  Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn);
+  *LoopBody = Builder.GetInsertPoint();
+  Builder.SetInsertPoint(&*BeforeLoop);
+
+  Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
+                                            "polly.par.userContext");
+
+  // Add one as the upper bound provided by OpenMP is a < comparison
+  // whereas the codegenForSequential function creates a <= comparison.
+  UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
+
+  // Tell the runtime we start a parallel loop
+  createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
+  Builder.CreateCall(SubFn, SubFnParam);
+  createCallJoinThreads();
+
+  return IV;
+}
+
+void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn,
+                                                   Value *SubFnParam, Value *LB,
+                                                   Value *UB, Value *Stride) {
+  const std::string Name = "GOMP_parallel_loop_runtime_start";
+
+  Function *F = M->getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+
+    Type *Params[] = {PointerType::getUnqual(FunctionType::get(
+                          Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
+                      Builder.getInt8PtrTy(),
+                      Builder.getInt32Ty(),
+                      LongType,
+                      LongType,
+                      LongType};
+
+    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
+    F = Function::Create(Ty, Linkage, Name, M);
+  }
+
+  Value *NumberOfThreads = Builder.getInt32(PollyNumThreads);
+  Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
+
+  Builder.CreateCall(F, Args);
+}
+
+Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr,
+                                                    Value *UBPtr) {
+  const std::string Name = "GOMP_loop_runtime_next";
+
+  Function *F = M->getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+    Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
+    FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
+    F = Function::Create(Ty, Linkage, Name, M);
+  }
+
+  Value *Args[] = {LBPtr, UBPtr};
+  Value *Return = Builder.CreateCall(F, Args);
+  Return = Builder.CreateICmpNE(
+      Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
+  return Return;
+}
+
+void ParallelLoopGenerator::createCallJoinThreads() {
+  const std::string Name = "GOMP_parallel_end";
+
+  Function *F = M->getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+
+    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
+    F = Function::Create(Ty, Linkage, Name, M);
+  }
+
+  Builder.CreateCall(F, {});
+}
+
+void ParallelLoopGenerator::createCallCleanupThread() {
+  const std::string Name = "GOMP_loop_end_nowait";
+
+  Function *F = M->getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+
+    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
+    F = Function::Create(Ty, Linkage, Name, M);
+  }
+
+  Builder.CreateCall(F, {});
+}
+
+Function *ParallelLoopGenerator::createSubFnDefinition() {
+  Function *F = Builder.GetInsertBlock()->getParent();
+  std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
+  FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
+  Function *SubFn = Function::Create(FT, Function::InternalLinkage,
+                                     F->getName() + "_polly_subfn", M);
+
+  // Certain backends (e.g., NVPTX) do not support '.'s in function names.
+  // Hence, we ensure that all '.'s are replaced by '_'s.
+  std::string FunctionName = SubFn->getName();
+  std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
+  SubFn->setName(FunctionName);
+
+  // Do not run any polly pass on the new function.
+  SubFn->addFnAttr(PollySkipFnAttr);
+
+  Function::arg_iterator AI = SubFn->arg_begin();
+  AI->setName("polly.par.userContext");
+
+  return SubFn;
+}
+
+AllocaInst *
+ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
+  SmallVector<Type *, 8> Members;
+
+  for (Value *V : Values)
+    Members.push_back(V->getType());
+
+  const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
+
+  // We do not want to allocate the alloca inside any loop, thus we allocate it
+  // in the entry block of the function and use annotations to denote the actual
+  // live span (similar to clang).
+  BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
+  Instruction *IP = &*EntryBB.getFirstInsertionPt();
+  StructType *Ty = StructType::get(Builder.getContext(), Members);
+  AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
+                                      "polly.par.userContext", IP);
+
+  for (unsigned i = 0; i < Values.size(); i++) {
+    Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
+    Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
+    Builder.CreateStore(Values[i], Address);
+  }
+
+  return Struct;
+}
+
+void ParallelLoopGenerator::extractValuesFromStruct(
+    SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
+  for (unsigned i = 0; i < OldValues.size(); i++) {
+    Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
+    Value *NewValue = Builder.CreateLoad(Address);
+    NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
+    Map[OldValues[i]] = NewValue;
+  }
+}
+
+Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData,
+                                          SetVector<Value *> Data,
+                                          ValueMapT &Map, Function **SubFnPtr) {
+  BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
+  Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
+  Function *SubFn = createSubFnDefinition();
+  LLVMContext &Context = SubFn->getContext();
+
+  // Store the previous basic block.
+  PrevBB = Builder.GetInsertBlock();
+
+  // Create basic blocks.
+  HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
+  ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
+  CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
+  PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
+
+  DT.addNewBlock(HeaderBB, PrevBB);
+  DT.addNewBlock(ExitBB, HeaderBB);
+  DT.addNewBlock(CheckNextBB, HeaderBB);
+  DT.addNewBlock(PreHeaderBB, HeaderBB);
+
+  // Fill up basic block HeaderBB.
+  Builder.SetInsertPoint(HeaderBB);
+  LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
+  UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
+  UserContext = Builder.CreateBitCast(
+      &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext");
+
+  extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
+                          Map);
+  Builder.CreateBr(CheckNextBB);
+
+  // Add code to check if another set of iterations will be executed.
+  Builder.SetInsertPoint(CheckNextBB);
+  Ret1 = createCallGetWorkItem(LBPtr, UBPtr);
+  HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
+                                        "polly.par.hasNextScheduleBlock");
+  Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
+
+  // Add code to load the iv bounds for this set of iterations.
+  Builder.SetInsertPoint(PreHeaderBB);
+  LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
+  UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
+
+  // Subtract one as the upper bound provided by OpenMP is a < comparison
+  // whereas the codegenForSequential function creates a <= comparison.
+  UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
+                         "polly.par.UBAdjusted");
+
+  Builder.CreateBr(CheckNextBB);
+  Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
+  IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE,
+                  nullptr, true, /* UseGuard */ false);
+
+  BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
+
+  // Add code to terminate this subfunction.
+  Builder.SetInsertPoint(ExitBB);
+  createCallCleanupThread();
+  Builder.CreateRetVoid();
+
+  Builder.SetInsertPoint(&*LoopBody);
+  *SubFnPtr = SubFn;
+
+  return IV;
+}
--- a/external/llvm-project/polly/lib/CodeGen/ManagedMemoryRewrite.cpp
+++ b/external/llvm-project/polly/lib/CodeGen/ManagedMemoryRewrite.cpp
@@ -0,0 +1,442 @@
+//===---- ManagedMemoryRewrite.cpp - Rewrite global & malloc'd memory -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Take a module and rewrite:
+// 1. `malloc` -> `polly_mallocManaged`
+// 2. `free` -> `polly_freeManaged`
+// 3. global arrays with initializers -> global arrays that are initialized
+//                                       with a constructor call to
+//                                       `polly_mallocManaged`.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/CodeGen/CodeGeneration.h"
+#include "polly/CodeGen/IslAst.h"
+#include "polly/CodeGen/IslNodeBuilder.h"
+#include "polly/CodeGen/PPCGCodeGeneration.h"
+#include "polly/CodeGen/Utils.h"
+#include "polly/DependenceInfo.h"
+#include "polly/LinkAllPasses.h"
+#include "polly/Options.h"
+#include "polly/ScopDetection.h"
+#include "polly/ScopInfo.h"
+#include "polly/Support/SCEVValidator.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+static cl::opt<bool> RewriteAllocas(
+    "polly-acc-rewrite-allocas",
+    cl::desc(
+        "Ask the managed memory rewriter to also rewrite alloca instructions"),
+    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+
+static cl::opt<bool> IgnoreLinkageForGlobals(
+    "polly-acc-rewrite-ignore-linkage-for-globals",
+    cl::desc(
+        "By default, we only rewrite globals with internal linkage. This flag "
+        "enables rewriting of globals regardless of linkage"),
+    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+
+#define DEBUG_TYPE "polly-acc-rewrite-managed-memory"
+namespace {
+
+static llvm::Function *getOrCreatePollyMallocManaged(Module &M) {
+  const char *Name = "polly_mallocManaged";
+  Function *F = M.getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+    PollyIRBuilder Builder(M.getContext());
+    // TODO: How do I get `size_t`? I assume from DataLayout?
+    FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(),
+                                         {Builder.getInt64Ty()}, false);
+    F = Function::Create(Ty, Linkage, Name, &M);
+  }
+
+  return F;
+}
+
+static llvm::Function *getOrCreatePollyFreeManaged(Module &M) {
+  const char *Name = "polly_freeManaged";
+  Function *F = M.getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+    PollyIRBuilder Builder(M.getContext());
+    // TODO: How do I get `size_t`? I assume from DataLayout?
+    FunctionType *Ty =
+        FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, false);
+    F = Function::Create(Ty, Linkage, Name, &M);
+  }
+
+  return F;
+}
+
+// Expand a constant expression `Cur`, which is used at instruction `Parent`
+// at index `index`.
+// Since a constant expression can expand to multiple instructions, store all
+// the expands into a set called `Expands`.
+// Note that this goes inorder on the constant expression tree.
+// A * ((B * D) + C)
+// will be processed with first A, then B * D, then B, then D, and then C.
+// Though ConstantExprs are not treated as "trees" but as DAGs, since you can
+// have something like this:
+//    *
+//   /  \
+//   \  /
+//    (D)
+//
+// For the purposes of this expansion, we expand the two occurences of D
+// separately. Therefore, we expand the DAG into the tree:
+//  *
+// / \
+// D  D
+// TODO: We don't _have_to do this, but this is the simplest solution.
+// We can write a solution that keeps track of which constants have been
+// already expanded.
+static void expandConstantExpr(ConstantExpr *Cur, PollyIRBuilder &Builder,
+                               Instruction *Parent, int index,
+                               SmallPtrSet<Instruction *, 4> &Expands) {
+  assert(Cur && "invalid constant expression passed");
+  Instruction *I = Cur->getAsInstruction();
+  assert(I && "unable to convert ConstantExpr to Instruction");
+
+  DEBUG(dbgs() << "Expanding ConstantExpression: (" << *Cur
+               << ") in Instruction: (" << *I << ")\n";);
+
+  // Invalidate `Cur` so that no one after this point uses `Cur`. Rather,
+  // they should mutate `I`.
+  Cur = nullptr;
+
+  Expands.insert(I);
+  Parent->setOperand(index, I);
+
+  // The things that `Parent` uses (its operands) should be created
+  // before `Parent`.
+  Builder.SetInsertPoint(Parent);
+  Builder.Insert(I);
+
+  for (unsigned i = 0; i < I->getNumOperands(); i++) {
+    Value *Op = I->getOperand(i);
+    assert(isa<Constant>(Op) && "constant must have a constant operand");
+
+    if (ConstantExpr *CExprOp = dyn_cast<ConstantExpr>(Op))
+      expandConstantExpr(CExprOp, Builder, I, i, Expands);
+  }
+}
+
+// Edit all uses of `OldVal` to NewVal` in `Inst`. This will rewrite
+// `ConstantExpr`s that are used in the `Inst`.
+// Note that `replaceAllUsesWith` is insufficient for this purpose because it
+// does not rewrite values in `ConstantExpr`s.
+static void rewriteOldValToNew(Instruction *Inst, Value *OldVal, Value *NewVal,
+                               PollyIRBuilder &Builder) {
+
+  // This contains a set of instructions in which OldVal must be replaced.
+  // We start with `Inst`, and we fill it up with the expanded `ConstantExpr`s
+  // from `Inst`s arguments.
+  // We need to go through this process because `replaceAllUsesWith` does not
+  // actually edit `ConstantExpr`s.
+  SmallPtrSet<Instruction *, 4> InstsToVisit = {Inst};
+
+  // Expand all `ConstantExpr`s and place it in `InstsToVisit`.
+  for (unsigned i = 0; i < Inst->getNumOperands(); i++) {
+    Value *Operand = Inst->getOperand(i);
+    if (ConstantExpr *ValueConstExpr = dyn_cast<ConstantExpr>(Operand))
+      expandConstantExpr(ValueConstExpr, Builder, Inst, i, InstsToVisit);
+  }
+
+  // Now visit each instruction and use `replaceUsesOfWith`. We know that
+  // will work because `I` cannot have any `ConstantExpr` within it.
+  for (Instruction *I : InstsToVisit)
+    I->replaceUsesOfWith(OldVal, NewVal);
+}
+
+// Given a value `Current`, return all Instructions that may contain `Current`
+// in an expression.
+// We need this auxiliary function, because if we have a
+// `Constant` that is a user of `V`, we need to recurse into the
+// `Constant`s uses to gather the root instruciton.
+static void getInstructionUsersOfValue(Value *V,
+                                       SmallVector<Instruction *, 4> &Owners) {
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    Owners.push_back(I);
+  } else {
+    // Anything that is a `User` must be a constant or an instruction.
+    auto *C = cast<Constant>(V);
+    for (Use &CUse : C->uses())
+      getInstructionUsersOfValue(CUse.getUser(), Owners);
+  }
+}
+
+static void
+replaceGlobalArray(Module &M, const DataLayout &DL, GlobalVariable &Array,
+                   SmallPtrSet<GlobalVariable *, 4> &ReplacedGlobals) {
+  // We only want arrays.
+  ArrayType *ArrayTy = dyn_cast<ArrayType>(Array.getType()->getElementType());
+  if (!ArrayTy)
+    return;
+  Type *ElemTy = ArrayTy->getElementType();
+  PointerType *ElemPtrTy = ElemTy->getPointerTo();
+
+  // We only wish to replace arrays that are visible in the module they
+  // inhabit. Otherwise, our type edit from [T] to T* would be illegal across
+  // modules.
+  const bool OnlyVisibleInsideModule = Array.hasPrivateLinkage() ||
+                                       Array.hasInternalLinkage() ||
+                                       IgnoreLinkageForGlobals;
+  if (!OnlyVisibleInsideModule) {
+    DEBUG(dbgs() << "Not rewriting (" << Array
+                 << ") to managed memory "
+                    "because it could be visible externally. To force rewrite, "
+                    "use -polly-acc-rewrite-ignore-linkage-for-globals.\n");
+    return;
+  }
+
+  if (!Array.hasInitializer() ||
+      !isa<ConstantAggregateZero>(Array.getInitializer())) {
+    DEBUG(dbgs() << "Not rewriting (" << Array
+                 << ") to managed memory "
+                    "because it has an initializer which is "
+                    "not a zeroinitializer.\n");
+    return;
+  }
+
+  // At this point, we have committed to replacing this array.
+  ReplacedGlobals.insert(&Array);
+
+  std::string NewName = Array.getName();
+  NewName += ".toptr";
+  GlobalVariable *ReplacementToArr =
+      cast<GlobalVariable>(M.getOrInsertGlobal(NewName, ElemPtrTy));
+  ReplacementToArr->setInitializer(ConstantPointerNull::get(ElemPtrTy));
+
+  Function *PollyMallocManaged = getOrCreatePollyMallocManaged(M);
+  std::string FnName = Array.getName();
+  FnName += ".constructor";
+  PollyIRBuilder Builder(M.getContext());
+  FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
+  const GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+  Function *F = Function::Create(Ty, Linkage, FnName, &M);
+  BasicBlock *Start = BasicBlock::Create(M.getContext(), "entry", F);
+  Builder.SetInsertPoint(Start);
+
+  const uint64_t ArraySizeInt = DL.getTypeAllocSize(ArrayTy);
+  Value *ArraySize = Builder.getInt64(ArraySizeInt);
+  ArraySize->setName("array.size");
+
+  Value *AllocatedMemRaw =
+      Builder.CreateCall(PollyMallocManaged, {ArraySize}, "mem.raw");
+  Value *AllocatedMemTyped =
+      Builder.CreatePointerCast(AllocatedMemRaw, ElemPtrTy, "mem.typed");
+  Builder.CreateStore(AllocatedMemTyped, ReplacementToArr);
+  Builder.CreateRetVoid();
+
+  const int Priority = 0;
+  appendToGlobalCtors(M, F, Priority, ReplacementToArr);
+
+  SmallVector<Instruction *, 4> ArrayUserInstructions;
+  // Get all instructions that use array. We need to do this weird thing
+  // because `Constant`s that contain this array neeed to be expanded into
+  // instructions so that we can replace their parameters. `Constant`s cannot
+  // be edited easily, so we choose to convert all `Constant`s to
+  // `Instruction`s and handle all of the uses of `Array` uniformly.
+  for (Use &ArrayUse : Array.uses())
+    getInstructionUsersOfValue(ArrayUse.getUser(), ArrayUserInstructions);
+
+  for (Instruction *UserOfArrayInst : ArrayUserInstructions) {
+
+    Builder.SetInsertPoint(UserOfArrayInst);
+    // <ty>** -> <ty>*
+    Value *ArrPtrLoaded = Builder.CreateLoad(ReplacementToArr, "arrptr.load");
+    // <ty>* -> [ty]*
+    Value *ArrPtrLoadedBitcasted = Builder.CreateBitCast(
+        ArrPtrLoaded, ArrayTy->getPointerTo(), "arrptr.bitcast");
+    rewriteOldValToNew(UserOfArrayInst, &Array, ArrPtrLoadedBitcasted, Builder);
+  }
+}
+
+// We return all `allocas` that may need to be converted to a call to
+// cudaMallocManaged.
+static void getAllocasToBeManaged(Function &F,
+                                  SmallSet<AllocaInst *, 4> &Allocas) {
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      auto *Alloca = dyn_cast<AllocaInst>(&I);
+      if (!Alloca)
+        continue;
+      DEBUG(dbgs() << "Checking if (" << *Alloca << ") may be captured: ");
+
+      if (PointerMayBeCaptured(Alloca, /* ReturnCaptures */ false,
+                               /* StoreCaptures */ true)) {
+        Allocas.insert(Alloca);
+        DEBUG(dbgs() << "YES (captured).\n");
+      } else {
+        DEBUG(dbgs() << "NO (not captured).\n");
+      }
+    }
+  }
+}
+
+static void rewriteAllocaAsManagedMemory(AllocaInst *Alloca,
+                                         const DataLayout &DL) {
+  DEBUG(dbgs() << "rewriting: (" << *Alloca << ") to managed mem.\n");
+  Module *M = Alloca->getModule();
+  assert(M && "Alloca does not have a module");
+
+  PollyIRBuilder Builder(M->getContext());
+  Builder.SetInsertPoint(Alloca);
+
+  Value *MallocManagedFn = getOrCreatePollyMallocManaged(*Alloca->getModule());
+  const uint64_t Size =
+      DL.getTypeAllocSize(Alloca->getType()->getElementType());
+  Value *SizeVal = Builder.getInt64(Size);
+  Value *RawManagedMem = Builder.CreateCall(MallocManagedFn, {SizeVal});
+  Value *Bitcasted = Builder.CreateBitCast(RawManagedMem, Alloca->getType());
+
+  Function *F = Alloca->getFunction();
+  assert(F && "Alloca has invalid function");
+
+  Bitcasted->takeName(Alloca);
+  Alloca->replaceAllUsesWith(Bitcasted);
+  Alloca->eraseFromParent();
+
+  for (BasicBlock &BB : *F) {
+    ReturnInst *Return = dyn_cast<ReturnInst>(BB.getTerminator());
+    if (!Return)
+      continue;
+    Builder.SetInsertPoint(Return);
+
+    Value *FreeManagedFn = getOrCreatePollyFreeManaged(*M);
+    Builder.CreateCall(FreeManagedFn, {RawManagedMem});
+  }
+}
+
+// Replace all uses of `Old` with `New`, even inside `ConstantExpr`.
+//
+// `replaceAllUsesWith` does replace values in `ConstantExpr`. This function
+// actually does replace it in `ConstantExpr`. The caveat is that if there is
+// a use that is *outside* a function (say, at global declarations), we fail.
+// So, this is meant to be used on values which we know will only be used
+// within functions.
+//
+// This process works by looking through the uses of `Old`. If it finds a
+// `ConstantExpr`, it recursively looks for the owning instruction.
+// Then, it expands all the `ConstantExpr` to instructions and replaces
+// `Old` with `New` in the expanded instructions.
+static void replaceAllUsesAndConstantUses(Value *Old, Value *New,
+                                          PollyIRBuilder &Builder) {
+  SmallVector<Instruction *, 4> UserInstructions;
+  // Get all instructions that use array. We need to do this weird thing
+  // because `Constant`s that contain this array neeed to be expanded into
+  // instructions so that we can replace their parameters. `Constant`s cannot
+  // be edited easily, so we choose to convert all `Constant`s to
+  // `Instruction`s and handle all of the uses of `Array` uniformly.
+  for (Use &ArrayUse : Old->uses())
+    getInstructionUsersOfValue(ArrayUse.getUser(), UserInstructions);
+
+  for (Instruction *I : UserInstructions)
+    rewriteOldValToNew(I, Old, New, Builder);
+}
+
+class ManagedMemoryRewritePass : public ModulePass {
+public:
+  static char ID;
+  GPUArch Architecture;
+  GPURuntime Runtime;
+
+  ManagedMemoryRewritePass() : ModulePass(ID) {}
+  virtual bool runOnModule(Module &M) {
+    const DataLayout &DL = M.getDataLayout();
+
+    Function *Malloc = M.getFunction("malloc");
+
+    if (Malloc) {
+      PollyIRBuilder Builder(M.getContext());
+      Function *PollyMallocManaged = getOrCreatePollyMallocManaged(M);
+      assert(PollyMallocManaged && "unable to create polly_mallocManaged");
+
+      replaceAllUsesAndConstantUses(Malloc, PollyMallocManaged, Builder);
+      Malloc->eraseFromParent();
+    }
+
+    Function *Free = M.getFunction("free");
+
+    if (Free) {
+      PollyIRBuilder Builder(M.getContext());
+      Function *PollyFreeManaged = getOrCreatePollyFreeManaged(M);
+      assert(PollyFreeManaged && "unable to create polly_freeManaged");
+
+      replaceAllUsesAndConstantUses(Free, PollyFreeManaged, Builder);
+      Free->eraseFromParent();
+    }
+
+    SmallPtrSet<GlobalVariable *, 4> GlobalsToErase;
+    for (GlobalVariable &Global : M.globals())
+      replaceGlobalArray(M, DL, Global, GlobalsToErase);
+    for (GlobalVariable *G : GlobalsToErase)
+      G->eraseFromParent();
+
+    // Rewrite allocas to cudaMallocs if we are asked to do so.
+    if (RewriteAllocas) {
+      SmallSet<AllocaInst *, 4> AllocasToBeManaged;
+      for (Function &F : M.functions())
+        getAllocasToBeManaged(F, AllocasToBeManaged);
+
+      for (AllocaInst *Alloca : AllocasToBeManaged)
+        rewriteAllocaAsManagedMemory(Alloca, DL);
+    }
+
+    return true;
+  }
+};
+
+} // namespace
+char ManagedMemoryRewritePass::ID = 42;
+
+Pass *polly::createManagedMemoryRewritePassPass(GPUArch Arch,
+                                                GPURuntime Runtime) {
+  ManagedMemoryRewritePass *pass = new ManagedMemoryRewritePass();
+  pass->Runtime = Runtime;
+  pass->Architecture = Arch;
+  return pass;
+}
+
+INITIALIZE_PASS_BEGIN(
+    ManagedMemoryRewritePass, "polly-acc-rewrite-managed-memory",
+    "Polly - Rewrite all allocations in heap & data section to managed memory",
+    false, false)
+INITIALIZE_PASS_DEPENDENCY(PPCGCodeGeneration);
+INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
+INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
+INITIALIZE_PASS_END(
+    ManagedMemoryRewritePass, "polly-acc-rewrite-managed-memory",
+    "Polly - Rewrite all allocations in heap & data section to managed memory",
+    false, false)
--- a/external/llvm-project/polly/lib/CodeGen/PPCGCodeGeneration.cpp.REMOVED.git-id
+++ b/external/llvm-project/polly/lib/CodeGen/PPCGCodeGeneration.cpp.REMOVED.git-id
@@ -0,0 +1 @@
+d6652146483f6426c13e661b939c1fc78abc4008
--- a/Show More
+++ b/Show More