Imported Upstream version 6.10.0.49

Former-commit-id: 1d6753294b2993e1fbf92de9366bb9544db4189b
2020-01-16 16:38:04 +00:00
parent d94e79959b
commit 468663ddbb
48518 changed files with 2789335 additions and 61176 deletions
--- a/external/llvm-project/polly/lib/Transform/Canonicalization.cpp
+++ b/external/llvm-project/polly/lib/Transform/Canonicalization.cpp
@@ -0,0 +1,97 @@
+//===---- Canonicalization.cpp - Run canonicalization passes --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Run the set of default canonicalization passes.
+//
+// This pass is mainly used for debugging.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/Canonicalization.h"
+#include "polly/LinkAllPasses.h"
+#include "polly/Options.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+using namespace polly;
+
+static cl::opt<bool>
+    PollyInliner("polly-run-inliner",
+                 cl::desc("Run an early inliner pass before Polly"), cl::Hidden,
+                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+
+void polly::registerCanonicalicationPasses(llvm::legacy::PassManagerBase &PM) {
+  bool UseMemSSA = true;
+  PM.add(polly::createRewriteByrefParamsPass());
+  PM.add(llvm::createPromoteMemoryToRegisterPass());
+  PM.add(llvm::createEarlyCSEPass(UseMemSSA));
+  PM.add(llvm::createInstructionCombiningPass());
+  PM.add(llvm::createCFGSimplificationPass());
+  PM.add(llvm::createTailCallEliminationPass());
+  PM.add(llvm::createCFGSimplificationPass());
+  PM.add(llvm::createReassociatePass());
+  PM.add(llvm::createLoopRotatePass());
+  if (PollyInliner) {
+    PM.add(llvm::createFunctionInliningPass(200));
+    PM.add(llvm::createPromoteMemoryToRegisterPass());
+    PM.add(llvm::createCFGSimplificationPass());
+    PM.add(llvm::createInstructionCombiningPass());
+    PM.add(createBarrierNoopPass());
+  }
+  PM.add(llvm::createInstructionCombiningPass());
+  PM.add(llvm::createIndVarSimplifyPass());
+  PM.add(polly::createCodePreparationPass());
+}
+
+namespace {
+class PollyCanonicalize : public ModulePass {
+  PollyCanonicalize(const PollyCanonicalize &) = delete;
+  const PollyCanonicalize &operator=(const PollyCanonicalize &) = delete;
+
+public:
+  static char ID;
+
+  explicit PollyCanonicalize() : ModulePass(ID) {}
+  ~PollyCanonicalize();
+
+  /// @name FunctionPass interface.
+  //@{
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  virtual void releaseMemory();
+  virtual bool runOnModule(Module &M);
+  virtual void print(raw_ostream &OS, const Module *) const;
+  //@}
+};
+} // namespace
+
+PollyCanonicalize::~PollyCanonicalize() {}
+
+void PollyCanonicalize::getAnalysisUsage(AnalysisUsage &AU) const {}
+
+void PollyCanonicalize::releaseMemory() {}
+
+bool PollyCanonicalize::runOnModule(Module &M) {
+  legacy::PassManager PM;
+  registerCanonicalicationPasses(PM);
+  PM.run(M);
+
+  return true;
+}
+
+void PollyCanonicalize::print(raw_ostream &OS, const Module *) const {}
+
+char PollyCanonicalize::ID = 0;
+
+Pass *polly::createPollyCanonicalizePass() { return new PollyCanonicalize(); }
+
+INITIALIZE_PASS_BEGIN(PollyCanonicalize, "polly-canonicalize",
+                      "Polly - Run canonicalization passes", false, false)
+INITIALIZE_PASS_END(PollyCanonicalize, "polly-canonicalize",
+                    "Polly - Run canonicalization passes", false, false)
--- a/external/llvm-project/polly/lib/Transform/CodePreparation.cpp
+++ b/external/llvm-project/polly/lib/Transform/CodePreparation.cpp
@@ -0,0 +1,122 @@
+//===---- CodePreparation.cpp - Code preparation for Scop Detection -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Polly code preparation pass is executed before SCoP detection. Its
+// currently only splits the entry block of the SCoP to make room for alloc
+// instructions as they are generated during code generation.
+//
+// XXX: In the future, we should remove the need for this pass entirely and
+// instead add this spitting to the code generation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/CodePreparation.h"
+#include "polly/LinkAllPasses.h"
+#include "polly/ScopDetection.h"
+#include "polly/Support/ScopHelper.h"
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+using namespace polly;
+
+namespace {
+
+/// Prepare the IR for the scop detection.
+///
+class CodePreparation : public FunctionPass {
+  CodePreparation(const CodePreparation &) = delete;
+  const CodePreparation &operator=(const CodePreparation &) = delete;
+
+  LoopInfo *LI;
+  ScalarEvolution *SE;
+
+  void clear();
+
+public:
+  static char ID;
+
+  explicit CodePreparation() : FunctionPass(ID) {}
+  ~CodePreparation();
+
+  /// @name FunctionPass interface.
+  //@{
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  virtual void releaseMemory();
+  virtual bool runOnFunction(Function &F);
+  virtual void print(raw_ostream &OS, const Module *) const;
+  //@}
+};
+} // namespace
+
+PreservedAnalyses CodePreparationPass::run(Function &F,
+                                           FunctionAnalysisManager &FAM) {
+
+  // Find first non-alloca instruction. Every basic block has a non-alloca
+  // instruction, as every well formed basic block has a terminator.
+  auto &EntryBlock = F.getEntryBlock();
+  BasicBlock::iterator I = EntryBlock.begin();
+  while (isa<AllocaInst>(I))
+    ++I;
+
+  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+  auto &LI = FAM.getResult<LoopAnalysis>(F);
+
+  // splitBlock updates DT, LI and RI.
+  splitEntryBlockForAlloca(&EntryBlock, &DT, &LI, nullptr);
+
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<LoopAnalysis>();
+  return PA;
+}
+
+void CodePreparation::clear() {}
+
+CodePreparation::~CodePreparation() { clear(); }
+
+void CodePreparation::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequired<ScalarEvolutionWrapperPass>();
+
+  AU.addPreserved<LoopInfoWrapperPass>();
+  AU.addPreserved<RegionInfoPass>();
+  AU.addPreserved<DominatorTreeWrapperPass>();
+  AU.addPreserved<DominanceFrontierWrapperPass>();
+}
+
+bool CodePreparation::runOnFunction(Function &F) {
+  if (skipFunction(F))
+    return false;
+
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+  splitEntryBlockForAlloca(&F.getEntryBlock(), this);
+
+  return true;
+}
+
+void CodePreparation::releaseMemory() { clear(); }
+
+void CodePreparation::print(raw_ostream &OS, const Module *) const {}
+
+char CodePreparation::ID = 0;
+char &polly::CodePreparationID = CodePreparation::ID;
+
+Pass *polly::createCodePreparationPass() { return new CodePreparation(); }
+
+INITIALIZE_PASS_BEGIN(CodePreparation, "polly-prepare",
+                      "Polly - Prepare code for polly", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(CodePreparation, "polly-prepare",
+                    "Polly - Prepare code for polly", false, false)
--- a/external/llvm-project/polly/lib/Transform/DeLICM.cpp
+++ b/external/llvm-project/polly/lib/Transform/DeLICM.cpp
--- a/external/llvm-project/polly/lib/Transform/DeadCodeElimination.cpp
+++ b/external/llvm-project/polly/lib/Transform/DeadCodeElimination.cpp
@@ -0,0 +1,181 @@
+//===- DeadCodeElimination.cpp - Eliminate dead iteration  ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The polyhedral dead code elimination pass analyses a SCoP to eliminate
+// statement instances that can be proven dead.
+// As a consequence, the code generated for this SCoP may execute a statement
+// less often. This means, a statement may be executed only in certain loop
+// iterations or it may not even be part of the generated code at all.
+//
+// This code:
+//
+//    for (i = 0; i < N; i++)
+//        arr[i] = 0;
+//    for (i = 0; i < N; i++)
+//        arr[i] = 10;
+//    for (i = 0; i < N; i++)
+//        arr[i] = i;
+//
+// is e.g. simplified to:
+//
+//    for (i = 0; i < N; i++)
+//        arr[i] = i;
+//
+// The idea and the algorithm used was first implemented by Sven Verdoolaege in
+// the 'ppcg' tool.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/DependenceInfo.h"
+#include "polly/LinkAllPasses.h"
+#include "polly/Options.h"
+#include "polly/ScopInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "isl/flow.h"
+#include "isl/isl-noexceptions.h"
+#include "isl/map.h"
+#include "isl/set.h"
+#include "isl/union_map.h"
+#include "isl/union_set.h"
+
+using namespace llvm;
+using namespace polly;
+
+namespace {
+
+cl::opt<int> DCEPreciseSteps(
+    "polly-dce-precise-steps",
+    cl::desc("The number of precise steps between two approximating "
+             "iterations. (A value of -1 schedules another approximation stage "
+             "before the actual dead code elimination."),
+    cl::ZeroOrMore, cl::init(-1), cl::cat(PollyCategory));
+
+class DeadCodeElim : public ScopPass {
+public:
+  static char ID;
+  explicit DeadCodeElim() : ScopPass(ID) {}
+
+  /// Remove dead iterations from the schedule of @p S.
+  bool runOnScop(Scop &S) override;
+
+  /// Register all analyses and transformation required.
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+  /// Return the set of live iterations.
+  ///
+  /// The set of live iterations are all iterations that write to memory and for
+  /// which we can not prove that there will be a later write that _must_
+  /// overwrite the same memory location and is consequently the only one that
+  /// is visible after the execution of the SCoP.
+  ///
+  isl::union_set getLiveOut(Scop &S);
+  bool eliminateDeadCode(Scop &S, int PreciseSteps);
+};
+} // namespace
+
+char DeadCodeElim::ID = 0;
+
+// To compute the live outs, we compute for the data-locations that are
+// must-written to the last statement that touches these locations. On top of
+// this we add all statements that perform may-write accesses.
+//
+// We could be more precise by removing may-write accesses for which we know
+// that they are overwritten by a must-write after. However, at the moment the
+// only may-writes we introduce access the full (unbounded) array, such that
+// bounded write accesses can not overwrite all of the data-locations. As
+// this means may-writes are in the current situation always live, there is
+// no point in trying to remove them from the live-out set.
+isl::union_set DeadCodeElim::getLiveOut(Scop &S) {
+  isl::union_map Schedule = S.getSchedule();
+  isl::union_map MustWrites = S.getMustWrites();
+  isl::union_map WriteIterations = MustWrites.reverse();
+  isl::union_map WriteTimes = WriteIterations.apply_range(Schedule);
+
+  isl::union_map LastWriteTimes = WriteTimes.lexmax();
+  isl::union_map LastWriteIterations =
+      LastWriteTimes.apply_range(Schedule.reverse());
+
+  isl::union_set Live = LastWriteIterations.range();
+  isl::union_map MayWrites = S.getMayWrites();
+  Live = Live.unite(MayWrites.domain());
+  return Live.coalesce();
+}
+
+/// Performs polyhedral dead iteration elimination by:
+/// o Assuming that the last write to each location is live.
+/// o Following each RAW dependency from a live iteration backwards and adding
+///   that iteration to the live set.
+///
+/// To ensure the set of live iterations does not get too complex we always
+/// combine a certain number of precise steps with one approximating step that
+/// simplifies the life set with an affine hull.
+bool DeadCodeElim::eliminateDeadCode(Scop &S, int PreciseSteps) {
+  DependenceInfo &DI = getAnalysis<DependenceInfo>();
+  const Dependences &D = DI.getDependences(Dependences::AL_Statement);
+
+  if (!D.hasValidDependences())
+    return false;
+
+  isl::union_set Live = getLiveOut(S);
+  isl::union_map Dep = isl::manage(
+      D.getDependences(Dependences::TYPE_RAW | Dependences::TYPE_RED));
+  Dep = Dep.reverse();
+
+  if (PreciseSteps == -1)
+    Live = Live.affine_hull();
+
+  isl::union_set OriginalDomain = S.getDomains();
+  int Steps = 0;
+  while (true) {
+    Steps++;
+
+    isl::union_set Extra = Live.apply(Dep);
+
+    if (Extra.is_subset(Live))
+      break;
+
+    Live = Live.unite(Extra);
+
+    if (Steps > PreciseSteps) {
+      Steps = 0;
+      Live = Live.affine_hull();
+    }
+
+    Live = Live.intersect(OriginalDomain);
+  }
+
+  Live = Live.coalesce();
+
+  bool Changed = S.restrictDomains(Live);
+
+  // FIXME: We can probably avoid the recomputation of all dependences by
+  // updating them explicitly.
+  if (Changed)
+    DI.recomputeDependences(Dependences::AL_Statement);
+  return Changed;
+}
+
+bool DeadCodeElim::runOnScop(Scop &S) {
+  return eliminateDeadCode(S, DCEPreciseSteps);
+}
+
+void DeadCodeElim::getAnalysisUsage(AnalysisUsage &AU) const {
+  ScopPass::getAnalysisUsage(AU);
+  AU.addRequired<DependenceInfo>();
+}
+
+Pass *polly::createDeadCodeElimPass() { return new DeadCodeElim(); }
+
+INITIALIZE_PASS_BEGIN(DeadCodeElim, "polly-dce",
+                      "Polly - Remove dead iterations", false, false)
+INITIALIZE_PASS_DEPENDENCY(DependenceInfo)
+INITIALIZE_PASS_DEPENDENCY(ScopInfoRegionPass)
+INITIALIZE_PASS_END(DeadCodeElim, "polly-dce", "Polly - Remove dead iterations",
+                    false, false)
--- a/external/llvm-project/polly/lib/Transform/FlattenAlgo.cpp
+++ b/external/llvm-project/polly/lib/Transform/FlattenAlgo.cpp
@@ -0,0 +1,342 @@
+//===------ FlattenAlgo.cpp ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Main algorithm of the FlattenSchedulePass. This is a separate file to avoid
+// the unittest for this requiring linking against LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/FlattenAlgo.h"
+#include "polly/Support/ISLOStream.h"
+#include "polly/Support/ISLTools.h"
+#include "llvm/Support/Debug.h"
+#define DEBUG_TYPE "polly-flatten-algo"
+
+using namespace polly;
+using namespace llvm;
+
+namespace {
+
+/// Whether a dimension of a set is bounded (lower and upper) by a constant,
+/// i.e. there are two constants Min and Max, such that every value x of the
+/// chosen dimensions is Min <= x <= Max.
+bool isDimBoundedByConstant(isl::set Set, unsigned dim) {
+  auto ParamDims = Set.dim(isl::dim::param);
+  Set = Set.project_out(isl::dim::param, 0, ParamDims);
+  Set = Set.project_out(isl::dim::set, 0, dim);
+  auto SetDims = Set.dim(isl::dim::set);
+  Set = Set.project_out(isl::dim::set, 1, SetDims - 1);
+  return bool(Set.is_bounded());
+}
+
+/// Whether a dimension of a set is (lower and upper) bounded by a constant or
+/// parameters, i.e. there are two expressions Min_p and Max_p of the parameters
+/// p, such that every value x of the chosen dimensions is
+/// Min_p <= x <= Max_p.
+bool isDimBoundedByParameter(isl::set Set, unsigned dim) {
+  Set = Set.project_out(isl::dim::set, 0, dim);
+  auto SetDims = Set.dim(isl::dim::set);
+  Set = Set.project_out(isl::dim::set, 1, SetDims - 1);
+  return bool(Set.is_bounded());
+}
+
+/// Whether BMap's first out-dimension is not a constant.
+bool isVariableDim(const isl::basic_map &BMap) {
+  auto FixedVal = BMap.plain_get_val_if_fixed(isl::dim::out, 0);
+  return !FixedVal || FixedVal.is_nan();
+}
+
+/// Whether Map's first out dimension is no constant nor piecewise constant.
+bool isVariableDim(const isl::map &Map) {
+  return Map.foreach_basic_map([](isl::basic_map BMap) -> isl::stat {
+    if (isVariableDim(BMap))
+      return isl::stat::error;
+    return isl::stat::ok;
+  }) == isl::stat::ok;
+}
+
+/// Whether UMap's first out dimension is no (piecewise) constant.
+bool isVariableDim(const isl::union_map &UMap) {
+  return UMap.foreach_map([](isl::map Map) -> isl::stat {
+    if (isVariableDim(Map))
+      return isl::stat::error;
+    return isl::stat::ok;
+  }) == isl::stat::ok;
+}
+
+/// Compute @p UPwAff - @p Val.
+isl::union_pw_aff subtract(isl::union_pw_aff UPwAff, isl::val Val) {
+  if (Val.is_zero())
+    return UPwAff;
+
+  auto Result = isl::union_pw_aff::empty(UPwAff.get_space());
+  UPwAff.foreach_pw_aff([=, &Result](isl::pw_aff PwAff) -> isl::stat {
+    auto ValAff =
+        isl::pw_aff(isl::set::universe(PwAff.get_space().domain()), Val);
+    auto Subtracted = PwAff.sub(ValAff);
+    Result = Result.union_add(isl::union_pw_aff(Subtracted));
+    return isl::stat::ok;
+  });
+  return Result;
+}
+
+/// Compute @UPwAff * @p Val.
+isl::union_pw_aff multiply(isl::union_pw_aff UPwAff, isl::val Val) {
+  if (Val.is_one())
+    return UPwAff;
+
+  auto Result = isl::union_pw_aff::empty(UPwAff.get_space());
+  UPwAff.foreach_pw_aff([=, &Result](isl::pw_aff PwAff) -> isl::stat {
+    auto ValAff =
+        isl::pw_aff(isl::set::universe(PwAff.get_space().domain()), Val);
+    auto Multiplied = PwAff.mul(ValAff);
+    Result = Result.union_add(Multiplied);
+    return isl::stat::ok;
+  });
+  return Result;
+}
+
+/// Remove @p n dimensions from @p UMap's range, starting at @p first.
+///
+/// It is assumed that all maps in the maps have at least the necessary number
+/// of out dimensions.
+isl::union_map scheduleProjectOut(const isl::union_map &UMap, unsigned first,
+                                  unsigned n) {
+  if (n == 0)
+    return UMap; /* isl_map_project_out would also reset the tuple, which should
+                    have no effect on schedule ranges */
+
+  auto Result = isl::union_map::empty(UMap.get_space());
+  UMap.foreach_map([=, &Result](isl::map Map) -> isl::stat {
+    auto Outprojected = Map.project_out(isl::dim::out, first, n);
+    Result = Result.add_map(Outprojected);
+    return isl::stat::ok;
+  });
+  return Result;
+}
+
+/// Return the number of dimensions in the input map's range.
+///
+/// Because this function takes an isl_union_map, the out dimensions could be
+/// different. We return the maximum number in this case. However, a different
+/// number of dimensions is not supported by the other code in this file.
+size_t scheduleScatterDims(const isl::union_map &Schedule) {
+  unsigned Dims = 0;
+  Schedule.foreach_map([&Dims](isl::map Map) -> isl::stat {
+    Dims = std::max(Dims, Map.dim(isl::dim::out));
+    return isl::stat::ok;
+  });
+  return Dims;
+}
+
+/// Return the @p pos' range dimension, converted to an isl_union_pw_aff.
+isl::union_pw_aff scheduleExtractDimAff(isl::union_map UMap, unsigned pos) {
+  auto SingleUMap = isl::union_map::empty(UMap.get_space());
+  UMap.foreach_map([=, &SingleUMap](isl::map Map) -> isl::stat {
+    auto MapDims = Map.dim(isl::dim::out);
+    auto SingleMap = Map.project_out(isl::dim::out, 0, pos);
+    SingleMap = SingleMap.project_out(isl::dim::out, 1, MapDims - pos - 1);
+    SingleUMap = SingleUMap.add_map(SingleMap);
+    return isl::stat::ok;
+  });
+
+  auto UAff = isl::union_pw_multi_aff(SingleUMap);
+  auto FirstMAff = isl::multi_union_pw_aff(UAff);
+  return FirstMAff.get_union_pw_aff(0);
+}
+
+/// Flatten a sequence-like first dimension.
+///
+/// A sequence-like scatter dimension is constant, or at least only small
+/// variation, typically the result of ordering a sequence of different
+/// statements. An example would be:
+///   { Stmt_A[] -> [0, X, ...]; Stmt_B[] -> [1, Y, ...] }
+/// to schedule all instances of Stmt_A before any instance of Stmt_B.
+///
+/// To flatten, first begin with an offset of zero. Then determine the lowest
+/// possible value of the dimension, call it "i" [In the example we start at 0].
+/// Considering only schedules with that value, consider only instances with
+/// that value and determine the extent of the next dimension. Let l_X(i) and
+/// u_X(i) its minimum (lower bound) and maximum (upper bound) value. Add them
+/// as "Offset + X - l_X(i)" to the new schedule, then add "u_X(i) - l_X(i) + 1"
+/// to Offset and remove all i-instances from the old schedule. Repeat with the
+/// remaining lowest value i' until there are no instances in the old schedule
+/// left.
+/// The example schedule would be transformed to:
+///   { Stmt_X[] -> [X - l_X, ...]; Stmt_B -> [l_X - u_X + 1 + Y - l_Y, ...] }
+isl::union_map tryFlattenSequence(isl::union_map Schedule) {
+  auto IslCtx = Schedule.get_ctx();
+  auto ScatterSet = isl::set(Schedule.range());
+
+  auto ParamSpace = Schedule.get_space().params();
+  auto Dims = ScatterSet.dim(isl::dim::set);
+  assert(Dims >= 2);
+
+  // Would cause an infinite loop.
+  if (!isDimBoundedByConstant(ScatterSet, 0)) {
+    DEBUG(dbgs() << "Abort; dimension is not of fixed size\n");
+    return nullptr;
+  }
+
+  auto AllDomains = Schedule.domain();
+  auto AllDomainsToNull = isl::union_pw_multi_aff(AllDomains);
+
+  auto NewSchedule = isl::union_map::empty(ParamSpace);
+  auto Counter = isl::pw_aff(isl::local_space(ParamSpace.set_from_params()));
+
+  while (!ScatterSet.is_empty()) {
+    DEBUG(dbgs() << "Next counter:\n  " << Counter << "\n");
+    DEBUG(dbgs() << "Remaining scatter set:\n  " << ScatterSet << "\n");
+    auto ThisSet = ScatterSet.project_out(isl::dim::set, 1, Dims - 1);
+    auto ThisFirst = ThisSet.lexmin();
+    auto ScatterFirst = ThisFirst.add_dims(isl::dim::set, Dims - 1);
+
+    auto SubSchedule = Schedule.intersect_range(ScatterFirst);
+    SubSchedule = scheduleProjectOut(SubSchedule, 0, 1);
+    SubSchedule = flattenSchedule(SubSchedule);
+
+    auto SubDims = scheduleScatterDims(SubSchedule);
+    auto FirstSubSchedule = scheduleProjectOut(SubSchedule, 1, SubDims - 1);
+    auto FirstScheduleAff = scheduleExtractDimAff(FirstSubSchedule, 0);
+    auto RemainingSubSchedule = scheduleProjectOut(SubSchedule, 0, 1);
+
+    auto FirstSubScatter = isl::set(FirstSubSchedule.range());
+    DEBUG(dbgs() << "Next step in sequence is:\n  " << FirstSubScatter << "\n");
+
+    if (!isDimBoundedByParameter(FirstSubScatter, 0)) {
+      DEBUG(dbgs() << "Abort; sequence step is not bounded\n");
+      return nullptr;
+    }
+
+    auto FirstSubScatterMap = isl::map::from_range(FirstSubScatter);
+
+    // isl_set_dim_max returns a strange isl_pw_aff with domain tuple_id of
+    // 'none'. It doesn't match with any space including a 0-dimensional
+    // anonymous tuple.
+    // Interesting, one can create such a set using
+    // isl_set_universe(ParamSpace). Bug?
+    auto PartMin = FirstSubScatterMap.dim_min(0);
+    auto PartMax = FirstSubScatterMap.dim_max(0);
+    auto One = isl::pw_aff(isl::set::universe(ParamSpace.set_from_params()),
+                           isl::val::one(IslCtx));
+    auto PartLen = PartMax.add(PartMin.neg()).add(One);
+
+    auto AllPartMin = isl::union_pw_aff(PartMin).pullback(AllDomainsToNull);
+    auto FirstScheduleAffNormalized = FirstScheduleAff.sub(AllPartMin);
+    auto AllCounter = isl::union_pw_aff(Counter).pullback(AllDomainsToNull);
+    auto FirstScheduleAffWithOffset =
+        FirstScheduleAffNormalized.add(AllCounter);
+
+    auto ScheduleWithOffset = isl::union_map(FirstScheduleAffWithOffset)
+                                  .flat_range_product(RemainingSubSchedule);
+    NewSchedule = NewSchedule.unite(ScheduleWithOffset);
+
+    ScatterSet = ScatterSet.subtract(ScatterFirst);
+    Counter = Counter.add(PartLen);
+  }
+
+  DEBUG(dbgs() << "Sequence-flatten result is:\n  " << NewSchedule << "\n");
+  return NewSchedule;
+}
+
+/// Flatten a loop-like first dimension.
+///
+/// A loop-like dimension is one that depends on a variable (usually a loop's
+/// induction variable). Let the input schedule look like this:
+///   { Stmt[i] -> [i, X, ...] }
+///
+/// To flatten, we determine the largest extent of X which may not depend on the
+/// actual value of i. Let l_X() the smallest possible value of X and u_X() its
+/// largest value. Then, construct a new schedule
+///   { Stmt[i] -> [i * (u_X() - l_X() + 1), ...] }
+isl::union_map tryFlattenLoop(isl::union_map Schedule) {
+  assert(scheduleScatterDims(Schedule) >= 2);
+
+  auto Remaining = scheduleProjectOut(Schedule, 0, 1);
+  auto SubSchedule = flattenSchedule(Remaining);
+  auto SubDims = scheduleScatterDims(SubSchedule);
+
+  auto SubExtent = isl::set(SubSchedule.range());
+  auto SubExtentDims = SubExtent.dim(isl::dim::param);
+  SubExtent = SubExtent.project_out(isl::dim::param, 0, SubExtentDims);
+  SubExtent = SubExtent.project_out(isl::dim::set, 1, SubDims - 1);
+
+  if (!isDimBoundedByConstant(SubExtent, 0)) {
+    DEBUG(dbgs() << "Abort; dimension not bounded by constant\n");
+    return nullptr;
+  }
+
+  auto Min = SubExtent.dim_min(0);
+  DEBUG(dbgs() << "Min bound:\n  " << Min << "\n");
+  auto MinVal = getConstant(Min, false, true);
+  auto Max = SubExtent.dim_max(0);
+  DEBUG(dbgs() << "Max bound:\n  " << Max << "\n");
+  auto MaxVal = getConstant(Max, true, false);
+
+  if (!MinVal || !MaxVal || MinVal.is_nan() || MaxVal.is_nan()) {
+    DEBUG(dbgs() << "Abort; dimension bounds could not be determined\n");
+    return nullptr;
+  }
+
+  auto FirstSubScheduleAff = scheduleExtractDimAff(SubSchedule, 0);
+  auto RemainingSubSchedule = scheduleProjectOut(std::move(SubSchedule), 0, 1);
+
+  auto LenVal = MaxVal.sub(MinVal).add_ui(1);
+  auto FirstSubScheduleNormalized = subtract(FirstSubScheduleAff, MinVal);
+
+  // TODO: Normalize FirstAff to zero (convert to isl_map, determine minimum,
+  // subtract it)
+  auto FirstAff = scheduleExtractDimAff(Schedule, 0);
+  auto Offset = multiply(FirstAff, LenVal);
+  auto Index = FirstSubScheduleNormalized.add(Offset);
+  auto IndexMap = isl::union_map(Index);
+
+  auto Result = IndexMap.flat_range_product(RemainingSubSchedule);
+  DEBUG(dbgs() << "Loop-flatten result is:\n  " << Result << "\n");
+  return Result;
+}
+} // anonymous namespace
+
+isl::union_map polly::flattenSchedule(isl::union_map Schedule) {
+  auto Dims = scheduleScatterDims(Schedule);
+  DEBUG(dbgs() << "Recursive schedule to process:\n  " << Schedule << "\n");
+
+  // Base case; no dimensions left
+  if (Dims == 0) {
+    // TODO: Add one dimension?
+    return Schedule;
+  }
+
+  // Base case; already one-dimensional
+  if (Dims == 1)
+    return Schedule;
+
+  // Fixed dimension; no need to preserve variabledness.
+  if (!isVariableDim(Schedule)) {
+    DEBUG(dbgs() << "Fixed dimension; try sequence flattening\n");
+    auto NewScheduleSequence = tryFlattenSequence(Schedule);
+    if (NewScheduleSequence)
+      return NewScheduleSequence;
+  }
+
+  // Constant stride
+  DEBUG(dbgs() << "Try loop flattening\n");
+  auto NewScheduleLoop = tryFlattenLoop(Schedule);
+  if (NewScheduleLoop)
+    return NewScheduleLoop;
+
+  // Try again without loop condition (may blow up the number of pieces!!)
+  DEBUG(dbgs() << "Try sequence flattening again\n");
+  auto NewScheduleSequence = tryFlattenSequence(Schedule);
+  if (NewScheduleSequence)
+    return NewScheduleSequence;
+
+  // Cannot flatten
+  return Schedule;
+}
--- a/external/llvm-project/polly/lib/Transform/FlattenSchedule.cpp
+++ b/external/llvm-project/polly/lib/Transform/FlattenSchedule.cpp
@@ -0,0 +1,108 @@
+//===------ FlattenSchedule.cpp --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Try to reduce the number of scatter dimension. Useful to make isl_union_map
+// schedules more understandable. This is only intended for debugging and
+// unittests, not for production use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/FlattenSchedule.h"
+#include "polly/FlattenAlgo.h"
+#include "polly/ScopInfo.h"
+#include "polly/ScopPass.h"
+#include "polly/Support/ISLOStream.h"
+#define DEBUG_TYPE "polly-flatten-schedule"
+
+using namespace polly;
+using namespace llvm;
+
+namespace {
+
+/// Print a schedule to @p OS.
+///
+/// Prints the schedule for each statements on a new line.
+void printSchedule(raw_ostream &OS, const isl::union_map &Schedule,
+                   int indent) {
+  Schedule.foreach_map([&OS, indent](isl::map Map) -> isl::stat {
+    OS.indent(indent) << Map << "\n";
+    return isl::stat::ok;
+  });
+}
+
+/// Flatten the schedule stored in an polly::Scop.
+class FlattenSchedule : public ScopPass {
+private:
+  FlattenSchedule(const FlattenSchedule &) = delete;
+  const FlattenSchedule &operator=(const FlattenSchedule &) = delete;
+
+  std::shared_ptr<isl_ctx> IslCtx;
+  isl::union_map OldSchedule;
+
+public:
+  static char ID;
+  explicit FlattenSchedule() : ScopPass(ID) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequiredTransitive<ScopInfoRegionPass>();
+    AU.setPreservesAll();
+  }
+
+  virtual bool runOnScop(Scop &S) override {
+    // Keep a reference to isl_ctx to ensure that it is not freed before we free
+    // OldSchedule.
+    IslCtx = S.getSharedIslCtx();
+
+    DEBUG(dbgs() << "Going to flatten old schedule:\n");
+    OldSchedule = S.getSchedule();
+    DEBUG(printSchedule(dbgs(), OldSchedule, 2));
+
+    auto Domains = S.getDomains();
+    auto RestrictedOldSchedule = OldSchedule.intersect_domain(Domains);
+    DEBUG(dbgs() << "Old schedule with domains:\n");
+    DEBUG(printSchedule(dbgs(), RestrictedOldSchedule, 2));
+
+    auto NewSchedule = flattenSchedule(RestrictedOldSchedule);
+
+    DEBUG(dbgs() << "Flattened new schedule:\n");
+    DEBUG(printSchedule(dbgs(), NewSchedule, 2));
+
+    NewSchedule = NewSchedule.gist_domain(Domains);
+    DEBUG(dbgs() << "Gisted, flattened new schedule:\n");
+    DEBUG(printSchedule(dbgs(), NewSchedule, 2));
+
+    S.setSchedule(NewSchedule);
+    return false;
+  }
+
+  virtual void printScop(raw_ostream &OS, Scop &S) const override {
+    OS << "Schedule before flattening {\n";
+    printSchedule(OS, OldSchedule, 4);
+    OS << "}\n\n";
+
+    OS << "Schedule after flattening {\n";
+    printSchedule(OS, S.getSchedule(), 4);
+    OS << "}\n";
+  }
+
+  virtual void releaseMemory() override {
+    OldSchedule = nullptr;
+    IslCtx.reset();
+  }
+};
+
+char FlattenSchedule::ID;
+} // anonymous namespace
+
+Pass *polly::createFlattenSchedulePass() { return new FlattenSchedule(); }
+
+INITIALIZE_PASS_BEGIN(FlattenSchedule, "polly-flatten-schedule",
+                      "Polly - Flatten schedule", false, false)
+INITIALIZE_PASS_END(FlattenSchedule, "polly-flatten-schedule",
+                    "Polly - Flatten schedule", false, false)
--- a/external/llvm-project/polly/lib/Transform/ForwardOpTree.cpp
+++ b/external/llvm-project/polly/lib/Transform/ForwardOpTree.cpp
--- a/external/llvm-project/polly/lib/Transform/MaximalStaticExpansion.cpp
+++ b/external/llvm-project/polly/lib/Transform/MaximalStaticExpansion.cpp
--- a/external/llvm-project/polly/lib/Transform/RewriteByReferenceParameters.cpp
+++ b/external/llvm-project/polly/lib/Transform/RewriteByReferenceParameters.cpp
@@ -0,0 +1,99 @@
+//===------ RewriteByReferenceParameters.cpp --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass introduces separate 'alloca' instructions for read-only
+// by-reference function parameters to indicate that these paramters are
+// read-only. After this transformation -mem2reg has more freedom to promote
+// variables to registers, which allows SCEV to work in more cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/LinkAllPasses.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+
+#define DEBUG_TYPE "polly-rewrite-byref-params"
+
+using namespace llvm;
+
+namespace {
+
+class RewriteByrefParams : public FunctionPass {
+private:
+  RewriteByrefParams(const RewriteByrefParams &) = delete;
+  const RewriteByrefParams &operator=(const RewriteByrefParams &) = delete;
+
+public:
+  static char ID;
+  explicit RewriteByrefParams() : FunctionPass(ID) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const override {}
+
+  void tryRewriteInstruction(Instruction &Inst) {
+    BasicBlock *Entry = &Inst.getParent()->getParent()->getEntryBlock();
+
+    auto *Call = dyn_cast<CallInst>(&Inst);
+
+    if (!Call)
+      return;
+
+    llvm::Function *F = Call->getCalledFunction();
+
+    if (!F)
+      return;
+
+    // We currently match for a very specific function. In case this proves
+    // useful, we can make this code dependent on readonly metadata.
+    if (!F->hasName() || F->getName() != "_gfortran_transfer_integer_write")
+      return;
+
+    auto *BitCast = dyn_cast<BitCastInst>(Call->getOperand(1));
+
+    if (!BitCast)
+      return;
+
+    auto *Alloca = dyn_cast<AllocaInst>(BitCast->getOperand(0));
+
+    if (!Alloca)
+      return;
+
+    std::string InstName = Alloca->getName();
+
+    auto NewAlloca =
+        new AllocaInst(Alloca->getType()->getElementType(), 0,
+                       "polly_byref_alloca_" + InstName, &*Entry->begin());
+
+    auto *LoadedVal =
+        new LoadInst(Alloca, "polly_byref_load_" + InstName, &Inst);
+
+    new StoreInst(LoadedVal, NewAlloca, &Inst);
+    auto *NewBitCast = new BitCastInst(NewAlloca, BitCast->getType(),
+                                       "polly_byref_cast_" + InstName, &Inst);
+    Call->setOperand(1, NewBitCast);
+  }
+
+  virtual bool runOnFunction(Function &F) override {
+    for (BasicBlock &BB : F)
+      for (Instruction &Inst : BB)
+        tryRewriteInstruction(Inst);
+
+    return true;
+  }
+};
+
+char RewriteByrefParams::ID;
+} // anonymous namespace
+
+Pass *polly::createRewriteByrefParamsPass() { return new RewriteByrefParams(); }
+
+INITIALIZE_PASS_BEGIN(RewriteByrefParams, "polly-rewrite-byref-params",
+                      "Polly - Rewrite by reference parameters", false, false)
+INITIALIZE_PASS_END(RewriteByrefParams, "polly-rewrite-byref-params",
+                    "Polly - Rewrite by reference parameters", false, false)
--- a/external/llvm-project/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/external/llvm-project/polly/lib/Transform/ScheduleOptimizer.cpp
--- a/external/llvm-project/polly/lib/Transform/ScopInliner.cpp
+++ b/external/llvm-project/polly/lib/Transform/ScopInliner.cpp
@@ -0,0 +1,121 @@
+//===---- ScopInliner.cpp - Polyhedral based inliner ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+/// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Take a SCC and:
+// 1. If it has more than one component, bail out (contains cycles)
+// 2. If it has just one component, and if the function is entirely a scop,
+//    inline it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/LinkAllPasses.h"
+#include "polly/RegisterPasses.h"
+#include "polly/ScopDetection.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+
+#define DEBUG_TYPE "polly-scop-inliner"
+
+using namespace polly;
+extern bool polly::PollyAllowFullFunction;
+
+namespace {
+class ScopInliner : public CallGraphSCCPass {
+  using llvm::Pass::doInitialization;
+
+public:
+  static char ID;
+
+  ScopInliner() : CallGraphSCCPass(ID) {}
+
+  bool doInitialization(CallGraph &CG) override {
+    if (!polly::PollyAllowFullFunction) {
+      report_fatal_error(
+          "Aborting from ScopInliner because it only makes sense to run with "
+          "-polly-allow-full-function. "
+          "The heurtistic for ScopInliner checks that the full function is a "
+          "Scop, which happens if and only if polly-allow-full-function is "
+          " enabled. "
+          " If not, the entry block is not included in the Scop");
+    }
+    return true;
+  }
+
+  bool runOnSCC(CallGraphSCC &SCC) override {
+    // We do not try to inline non-trivial SCCs because this would lead to
+    // "infinite" inlining if we are not careful.
+    if (SCC.size() > 1)
+      return false;
+    assert(SCC.size() == 1 && "found empty SCC");
+    Function *F = (*SCC.begin())->getFunction();
+
+    // If the function is a nullptr, or the function is a declaration.
+    if (!F)
+      return false;
+    if (F->isDeclaration()) {
+      DEBUG(dbgs() << "Skipping " << F->getName()
+                   << "because it is a declaration.\n");
+      return false;
+    }
+
+    PassBuilder PB;
+    FunctionAnalysisManager FAM;
+    FAM.registerPass([] { return ScopAnalysis(); });
+    PB.registerFunctionAnalyses(FAM);
+
+    RegionInfo &RI = FAM.getResult<RegionInfoAnalysis>(*F);
+    ScopDetection &SD = FAM.getResult<ScopAnalysis>(*F);
+
+    const bool HasScopAsTopLevelRegion =
+        SD.ValidRegions.count(RI.getTopLevelRegion()) > 0;
+
+    if (HasScopAsTopLevelRegion) {
+      DEBUG(dbgs() << "Skipping " << F->getName()
+                   << " has scop as top level region");
+      F->addFnAttr(llvm::Attribute::AlwaysInline);
+
+      ModuleAnalysisManager MAM;
+      PB.registerModuleAnalyses(MAM);
+      ModulePassManager MPM;
+      MPM.addPass(AlwaysInlinerPass());
+      Module *M = F->getParent();
+      assert(M && "Function has illegal module");
+      MPM.run(*M, MAM);
+    } else {
+      DEBUG(dbgs() << F->getName()
+                   << " does NOT have scop as top level region\n");
+    }
+
+    return false;
+  };
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    CallGraphSCCPass::getAnalysisUsage(AU);
+  }
+};
+
+} // namespace
+char ScopInliner::ID;
+
+Pass *polly::createScopInlinerPass() {
+  ScopInliner *pass = new ScopInliner();
+  return pass;
+}
+
+INITIALIZE_PASS_BEGIN(
+    ScopInliner, "polly-scop-inliner",
+    "inline functions based on how much of the function is a scop.", false,
+    false)
+INITIALIZE_PASS_END(
+    ScopInliner, "polly-scop-inliner",
+    "inline functions based on how much of the function is a scop.", false,
+    false)
--- a/external/llvm-project/polly/lib/Transform/Simplify.cpp
+++ b/external/llvm-project/polly/lib/Transform/Simplify.cpp
--- a/external/llvm-project/polly/lib/Transform/ZoneAlgo.cpp
+++ b/external/llvm-project/polly/lib/Transform/ZoneAlgo.cpp