Imported Upstream version 6.10.0.49

Former-commit-id: 1d6753294b2993e1fbf92de9366bb9544db4189b
2020-01-16 16:38:04 +00:00
parent d94e79959b
commit 468663ddbb
48518 changed files with 2789335 additions and 61176 deletions
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/alias-metadata.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/alias-metadata.ll
@@ -0,0 +1,52 @@
+; RUN: opt %loadPolly -polly-codegen -polly-parallel -S < %s | FileCheck %s
+;
+;    void foo(float *A, float *B) {
+;      for (long i = 0; i < 1000; i++)
+;        for (long j = 0; j < 1000; j++)
+;          A[i] = B[i];
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK: define internal void @foo_polly_subfn
+
+define void @foo(float* %A, float* %B) {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb11, %bb
+  %i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
+  %exitcond1 = icmp ne i64 %i.0, 1000
+  br i1 %exitcond1, label %bb3, label %bb13
+
+bb3:                                              ; preds = %bb2
+  br label %bb4
+
+bb4:                                              ; preds = %bb8, %bb3
+  %j.0 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb8 ]
+  %exitcond = icmp ne i64 %j.0, 1000
+  br i1 %exitcond, label %bb5, label %bb10
+
+bb5:                                              ; preds = %bb4
+  %tmp = getelementptr inbounds float, float* %B, i64 %i.0
+  %tmp7 = getelementptr inbounds float, float* %A, i64 %i.0
+  %tmp6 = load float, float* %tmp, align 4
+  store float %tmp6, float* %tmp7, align 4
+; CHECK: %tmp6_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2
+; CHECK: store float %tmp6_p_scalar_, float* %scevgep8, align 4, !alias.scope !3, !noalias !4
+  br label %bb8
+
+bb8:                                              ; preds = %bb5
+  %tmp9 = add nsw i64 %j.0, 1
+  br label %bb4
+
+bb10:                                             ; preds = %bb4
+  br label %bb11
+
+bb11:                                             ; preds = %bb10
+  %tmp12 = add nsw i64 %i.0, 1
+  br label %bb2
+
+bb13:                                             ; preds = %bb2
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
@@ -0,0 +1,33 @@
+; RUN: opt %loadPolly -polly-codegen -polly-invariant-load-hoisting=true -polly-parallel \
+; RUN: -polly-parallel-force -S < %s | FileCheck %s
+;
+; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction.
+;
+;    void f(float *A) {
+;      for (int i = 1; i < 1000; i++)
+;        A[i] += A[0];
+;    }
+;
+; CHECK:  %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds { float, float* }, { float, float* }* %polly.par.userContext, i32 0
+; CHECK:  store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(float* nocapture %A) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp = load float, float* %A, align 4
+  %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+  %tmp1 = load float, float* %arrayidx1, align 4
+  %add = fadd float %tmp, %tmp1
+  store float %add, float* %arrayidx1, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
@@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -polly-codegen -polly-invariant-load-hoisting=true -polly-parallel \
+; RUN: -polly-parallel-force -S < %s | FileCheck %s
+;
+; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction.
+;
+;    void f(float *A) {
+;      for (int i = 1; i < 1000; i++)
+;        A[i] += /* split bb */ A[0];
+;    }
+;                                           A[0]  tmp (unused)      A
+; CHECK: %polly.par.userContext = alloca { float,    float*,     float* }
+;
+; CHECK:  %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds
+; CHECK:  store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(float* nocapture %A) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body.split ]
+  %tmp = load float, float* %A, align 4
+  br label %for.body.split
+
+for.body.split:
+  %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+  %tmp1 = load float, float* %arrayidx1, align 4
+  %add = fadd float %tmp, %tmp1
+  store float %add, float* %arrayidx1, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
@@ -0,0 +1,72 @@
+; RUN: opt %loadPolly -polly-codegen -polly-invariant-load-hoisting=true -polly-parallel \
+; RUN: -polly-parallel-force -S < %s | FileCheck %s
+;
+; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction but
+; not B[0] as it is not needed
+;
+;    void f(float *A, float *B) {
+;      // Not parallel
+;      for (int i = 1; i < 1000; i++) {
+;        B[i] = B[i+1] + B[0];
+;        // Parallel
+;        for (int j = 1; j < 1000; j++)
+;          A[j] += A[0];
+;      }
+;    }
+;
+;                                           i    A[0]    A
+; CHECK: %polly.par.userContext = alloca { i64, float, float* }
+;
+; CHECK:  %polly.access.B.load =
+; CHECK:  %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds
+; CHECK:  store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
+; CHECK-NOT:  store float %polly.access.B.load, float* %polly.subfn.storeaddr.polly.access.B.load
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(float* %A, float* %B) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc.9, %entry
+  %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc.9 ], [ 1, %entry ]
+  %exitcond3 = icmp ne i64 %indvars.iv1, 1000
+  br i1 %exitcond3, label %for.body, label %for.end.11
+
+for.body:                                         ; preds = %for.cond
+  %tmp = load float, float* %B, align 4
+  %arrayidx1 = getelementptr inbounds float, float* %B, i64 %indvars.iv1
+  %iv.add = add nsw i64 %indvars.iv1, 1
+  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %iv.add
+  %tmp4 = load float, float* %arrayidx2, align 4
+  %add = fadd float %tmp4, %tmp
+  store float %add, float* %arrayidx1, align 4
+  br label %for.cond.2
+
+for.cond.2:                                       ; preds = %for.inc, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 1, %for.body ]
+  %exitcond = icmp ne i64 %indvars.iv, 1000
+  br i1 %exitcond, label %for.body.4, label %for.end
+
+for.body.4:                                       ; preds = %for.cond.2
+  %tmp5 = load float, float* %A, align 4
+  %arrayidx7 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+  %tmp6 = load float, float* %arrayidx7, align 4
+  %add8 = fadd float %tmp6, %tmp5
+  store float %add8, float* %arrayidx7, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body.4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond.2
+
+for.end:                                          ; preds = %for.cond.2
+  br label %for.inc.9
+
+for.inc.9:                                        ; preds = %for.end
+  %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
+  br label %for.cond
+
+for.end.11:                                       ; preds = %for.cond
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
@@ -0,0 +1,35 @@
+; RUN: opt %loadPolly -polly-codegen -polly-invariant-load-hoisting=true -polly-parallel \
+; RUN: -polly-parallel-force -S < %s | FileCheck %s
+;
+; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction.
+;
+;    void f(float *A) {
+;      for (int i = 1; i < 1000; i++)
+;        A[i] += A[0] + A[0];
+;    }
+;
+; CHECK:  %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds 
+; CHECK:  store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(float* nocapture %A) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp = load float, float* %A, align 4
+  %tmp2 = load float, float* %A, align 4
+  %tmpadd = fadd float %tmp, %tmp2
+  %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+  %tmp1 = load float, float* %arrayidx1, align 4
+  %add = fadd float %tmp2, %tmp1
+  store float %add, float* %arrayidx1, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll
@@ -0,0 +1,43 @@
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
+
+; This code has failed the scev based code generation as the scev in the scop
+; contains an AddRecExpr of an outer loop. When generating code, we did not
+; properly forward the value of this expression to the subfunction.
+
+; AST: #pragma omp parallel for
+; AST: for (int c0 = 0; c0 <= 1023; c0 += 1)
+; AST:  Stmt_for_j(c0);
+
+; IR: @single_parallel_loop_polly_subfn
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+@A = common global [1024 x float] zeroinitializer, align 16
+
+define void @single_parallel_loop() nounwind {
+entry:
+  br label %for.i
+
+for.i:
+  %indvar.i = phi i64 [ %indvar.i.next, %for.i.inc], [ 0, %entry ]
+  br label %for.j
+
+for.j:
+  %indvar.j = phi i64 [ %indvar.j.next, %for.j], [ 0, %for.i ]
+  %sum = add i64 %indvar.j, %indvar.i
+  %scevgep = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %sum
+  store float 0.0, float *%scevgep
+  %indvar.j.next = add i64 %indvar.j, 1
+  %exitcond.j = icmp slt i64 %indvar.j.next, 1024
+  br i1 %exitcond.j, label %for.j, label %for.i.inc
+
+for.i.inc:
+  fence seq_cst
+  %indvar.i.next = add i64 %indvar.i, 1
+  %exitcond.i = icmp ne i64 %indvar.i.next, 1024
+  br i1 %exitcond.i, label %for.i, label %exit
+
+exit:
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll
@@ -0,0 +1,31 @@
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
+
+; AST: #pragma simd
+; AST: #pragma omp parallel for
+; AST: for (int c0 = 0; c0 <= 1023; c0 += 1)
+; AST:   Stmt_for_i(c0);
+
+; IR: getelementptr inbounds { [1024 x double]* }, { [1024 x double]* }* %polly.par.userContext, i32 0, i32 0
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @kernel_trmm([1024 x double]* %B) {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %extern = add i64 1, 0
+  br label %for.i
+
+for.i:
+  %indvar.i = phi i64 [ %indvar.i.next, %for.i ], [ 0, %for.cond1.preheader ]
+  %getelementptr = getelementptr [1024 x double], [1024 x double]* %B, i64 %extern, i64 %indvar.i
+  store double 0.000000e+00, double* %getelementptr
+  %indvar.i.next = add i64 %indvar.i, 1
+  %exitcond.i = icmp ne i64 %indvar.i.next, 1024
+  br i1 %exitcond.i, label %for.i, label %end
+
+end:
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
@@ -0,0 +1,67 @@
+; RUN: opt %loadPolly -basicaa -polly-parallel -polly-parallel-force -polly-invariant-load-hoisting=true -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
+; RUN: opt %loadPolly -basicaa -polly-parallel -polly-parallel-force -polly-invariant-load-hoisting=true -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
+
+; The interesting part of this test case is the instruction:
+;   %tmp = bitcast i8* %call to i64**
+; which is not part of the scop. In the SCEV based code generation not '%tmp',
+; but %call is a parameter of the SCoP and we need to make sure its value is
+; properly forwarded to the subfunction.
+
+; AST: #pragma omp parallel for
+; AST: for (int c0 = 0; c0 < cols; c0 += 1)
+; AST:   Stmt_for_body(c0);
+; AST: if (cols <= 0)
+; AST:   Stmt_for_body(0);
+
+; IR: @foo_polly_subfn
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(i64 %cols, i8* noalias %call) {
+entry:
+  %tmp = bitcast i8* %call to i64**
+  br label %for.body
+
+for.body:
+  %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i64*, i64** %tmp, i64 0
+  %tmp1 = load i64*, i64** %arrayidx, align 8
+  %arrayidx.2 = getelementptr inbounds i64, i64* %tmp1, i64 %indvar
+  store i64 1, i64* %arrayidx.2, align 4
+  %indvar.next = add nsw i64 %indvar, 1
+  %cmp = icmp slt i64 %indvar.next, %cols
+  br i1 %cmp, label %for.body, label %end
+
+end:
+  ret void
+}
+
+; Another variation of this test case, now with even more of the index
+; expression defined outside of the scop.
+
+; AST: #pragma omp parallel for
+; AST: for (int c0 = 0; c0 < cols; c0 += 1)
+; AST:   Stmt_for_body(c0);
+; AST: if (cols <= 0)
+; AST:   Stmt_for_body(0);
+
+; IR: @bar_polly_subfn
+
+define void @bar(i64 %cols, i8* noalias %call) {
+entry:
+  %tmp = bitcast i8* %call to i64**
+  %arrayidx = getelementptr inbounds i64*, i64** %tmp, i64 0
+  br label %for.body
+
+for.body:
+  %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+  %tmp1 = load i64*, i64** %arrayidx, align 8
+  %arrayidx.2 = getelementptr inbounds i64, i64* %tmp1, i64 %indvar
+  store i64 1, i64* %arrayidx.2, align 4
+  %indvar.next = add nsw i64 %indvar, 1
+  %cmp = icmp slt i64 %indvar.next, %cols
+  br i1 %cmp, label %for.body, label %end
+
+end:
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll
@@ -0,0 +1,45 @@
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S < %s | FileCheck %s -check-prefix=IR
+
+; Make sure we correctly forward the reference to 'A' to the OpenMP subfunction.
+;
+; void loop_references_outer_ids(float *A) {
+;   for (long i = 0; i < 100; i++)
+;     A[i] = i;
+; }
+
+
+; AST: #pragma simd
+; AST: #pragma omp parallel for
+; AST: for (int c0 = 0; c0 <= 99; c0 += 1)
+; AST:   Stmt_for_body(c0);
+
+; IR-LABEL: polly.parallel.for:
+; IR-NEXT:  %polly.subfn.storeaddr.A = getelementptr inbounds { float* }, { float* }* %polly.par.userContext, i32 0, i32 0
+; IR-NEXT:  store float* %A, float** %polly.subfn.storeaddr.A
+; IR-NEXT:  %polly.par.userContext1 = bitcast { float* }* %polly.par.userContext to i8*
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @loop_references_outer_ids(float* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i64 %i.0, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %conv = sitofp i64 %i.0 to float
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %i.0
+  store float %conv, float* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/loop-bounds-reference-outer-ids.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/loop-bounds-reference-outer-ids.ll
@@ -0,0 +1,99 @@
+; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
+; RUN: opt %loadPolly -polly-parallel -polly-codegen -S < %s | FileCheck %s -check-prefix=IR
+;
+; float A[100];
+;
+; void loop_references_outer_ids(long n) {
+;   for (long i = 0; i < 100; i++)
+;     for (long j = 0; j < 100; j++)
+;       for (long k = 0; k < n + i; k++)
+;         A[j] += i + j + k;
+; }
+
+; In this test case we verify that the j-loop is generated as OpenMP parallel
+; loop and that the values of 'i' and 'n', needed in the loop bounds of the
+; k-loop, are correctly passed to the subfunction.
+
+; AST: #pragma minimal dependence distance: 1
+; AST: for (int c0 = max(0, -n + 1); c0 <= 99; c0 += 1)
+; AST:   #pragma omp parallel for
+; AST:   for (int c1 = 0; c1 <= 99; c1 += 1)
+; AST:     #pragma minimal dependence distance: 1
+; AST:     for (int c2 = 0; c2 < n + c0; c2 += 1)
+; AST:       Stmt_for_body6(c0, c1, c2);
+
+; IR:      %polly.par.userContext = alloca { i64, i64 }
+; IR: %[[R1:[0-9a-z.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %polly.par.userContext, i32 0, i32 0
+; IR-NEXT: store i64 %n, i64* %[[R1]]
+; IR-NEXT: %[[R2:[0-9a-z.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %polly.par.userContext, i32 0, i32 1
+; IR-NEXT: store i64 %polly.indvar, i64* %[[R2]]
+; IR-NEXT: %polly.par.userContext1 = bitcast { i64, i64 }* %polly.par.userContext to i8*
+
+; IR-LABEL: @loop_references_outer_ids_polly_subfn(i8* %polly.par.userContext)
+; IR:       %polly.par.userContext1 = bitcast i8* %polly.par.userContext to { i64, i64 }*
+; IR-NEXT:  %[[R3:[0-9a-z.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %polly.par.userContext1, i32 0, i32 0
+; IR-NEXT:  %[[R4:[0-9a-z.]+]] = load i64, i64* %[[R3]]
+; IR-NEXT:  %[[R5:[0-9a-z.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %polly.par.userContext1, i32 0, i32 1
+; IR-NEXT:  %[[R6:[0-9a-z.]+]] = load i64, i64* %[[R5]]
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@A = common global [100 x float] zeroinitializer, align 16
+
+define void @loop_references_outer_ids(i64 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc03, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc04, %for.inc03 ]
+  %exitcond1 = icmp ne i64 %i.0, 100
+  br i1 %exitcond1, label %for.body, label %for.end15
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc00, %for.body
+  %j.0 = phi i64 [ 0, %for.body ], [ %inc01, %for.inc00 ]
+  %exitcond = icmp ne i64 %j.0, 100
+  br i1 %exitcond, label %for.body3, label %for.end12
+
+for.body3:                                        ; preds = %for.cond1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %for.inc, %for.body3
+  %k.0 = phi i64 [ 0, %for.body3 ], [ %inc, %for.inc ]
+  %add = add nsw i64 %i.0, %n
+  %cmp5 = icmp slt i64 %k.0, %add
+  br i1 %cmp5, label %for.body6, label %for.end
+
+for.body6:                                        ; preds = %for.cond4
+  %add7 = add nsw i64 %i.0, %j.0
+  %add8 = add nsw i64 %add7, %k.0
+  %conv = sitofp i64 %add8 to float
+  %arrayidx = getelementptr inbounds [100 x float], [100 x float]* @A, i64 0, i64 %j.0
+  %tmp = load float, float* %arrayidx, align 4
+  %add9 = fadd float %tmp, %conv
+  store float %add9, float* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body6
+  %inc = add nsw i64 %k.0, 1
+  br label %for.cond4
+
+for.end:                                          ; preds = %for.cond4
+  br label %for.inc00
+
+for.inc00:                                        ; preds = %for.end
+  %inc01 = add nsw i64 %j.0, 1
+  br label %for.cond1
+
+for.end12:                                        ; preds = %for.cond1
+  br label %for.inc03
+
+for.inc03:                                        ; preds = %for.end12
+  %inc04 = add nsw i64 %i.0, 1
+  br label %for.cond
+
+for.end15:                                        ; preds = %for.cond
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/mapped-phi-access.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/mapped-phi-access.ll
@@ -0,0 +1,58 @@
+; RUN: opt %loadPolly -polly-parallel -polly-delicm -polly-codegen -S < %s | FileCheck %s
+;
+; Verify that -polly-parallel can handle mapped scalar MemoryAccesses.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define void @main() local_unnamed_addr #0 {
+entry:
+  %0 = load i8*, i8** undef, align 8, !tbaa !1
+  %1 = load i8*, i8** undef, align 8, !tbaa !1
+  %arraydecay16 = bitcast i8* %1 to double*
+  %arraydecay20 = bitcast i8* %0 to [4000 x double]*
+  br label %for.body65.i226
+
+for.body65.i226:                                  ; preds = %for.inc85.i238, %entry
+  %indvars.iv8.i223 = phi i64 [ 0, %entry ], [ %indvars.iv.next9.i236, %for.inc85.i238 ]
+  %arrayidx70.i224 = getelementptr inbounds double, double* %arraydecay16, i64 %indvars.iv8.i223
+  br label %for.body68.i235
+
+for.body68.i235:                                  ; preds = %for.body68.i235, %for.body65.i226
+  %2 = phi double [ undef, %for.body65.i226 ], [ undef, %for.body68.i235 ]
+  %indvars.iv.i227 = phi i64 [ 0, %for.body65.i226 ], [ %indvars.iv.next.i233, %for.body68.i235 ]
+  %arrayidx74.i228 = getelementptr inbounds [4000 x double], [4000 x double]* %arraydecay20, i64 %indvars.iv8.i223, i64 %indvars.iv.i227
+  %3 = load double, double* %arrayidx74.i228, align 8, !tbaa !5
+  store double undef, double* %arrayidx70.i224, align 8, !tbaa !5
+  %indvars.iv.next.i233 = add nuw nsw i64 %indvars.iv.i227, 1
+  %exitcond.i234 = icmp eq i64 %indvars.iv.next.i233, 4000
+  br i1 %exitcond.i234, label %for.inc85.i238, label %for.body68.i235
+
+for.inc85.i238:                                   ; preds = %for.body68.i235
+  %indvars.iv.next9.i236 = add nuw nsw i64 %indvars.iv8.i223, 1
+  %exitcond10.i237 = icmp eq i64 %indvars.iv.next9.i236, 4000
+  br i1 %exitcond10.i237, label %kernel_gemver_StrictFP.exit, label %for.body65.i226
+
+kernel_gemver_StrictFP.exit:                      ; preds = %for.inc85.i238
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 6.0.0 "}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"double", !3, i64 0}
+
+
+; CHECK-LABEL: define internal void @main_polly_subfn(i8* %polly.par.userContext)
+;
+; CHECK:       polly.stmt.for.body65.i226:
+; CHECK-NEXT:    %polly.access.cast.polly.subfunc.arg.[[R0:[0-9]*]] = bitcast i8* %polly.subfunc.arg.{{[0-9]*}} to double*
+; CHECK-NEXT:    %polly.access.polly.subfunc.arg.[[R1:[0-9]*]] = getelementptr double, double* %polly.access.cast.polly.subfunc.arg.[[R0]], i64 %polly.indvar
+; CHECK-NEXT:    store double undef, double* %polly.access.polly.subfunc.arg.[[R1]]
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
@@ -0,0 +1,77 @@
+; RUN: opt %loadPolly -polly-import-jscop \
+; RUN:                -analyze < %s | FileCheck %s
+
+; RUN: opt %loadPolly -polly-import-jscop \
+; RUN:                -polly-codegen -S < %s \
+; RUN:                -polly-parallel \
+; RUN:                | FileCheck %s -check-prefix=IR
+
+;    void new_multidim_access(long n, long m, float A[][m]) {
+;      for (long i = 0; i < n; i++)
+;        for (long j = 0; j < 100; j++)
+;          A[i][2 * j] += i + j;
+;    }
+
+; CHECK:  ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK:         [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 2i1] };
+; CHECK:    new: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 13 + i1] };
+; CHECK:  MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK:         [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 2i1] };
+; CHECK:    new: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 43 + i1] };
+
+; IR: %polly.access.mul.polly.subfunc.arg.A = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m
+; IR: %6 = add nsw i64 %polly.indvar5, 13
+; IR: %polly.access.add.polly.subfunc.arg.A = add nsw i64 %polly.access.mul.polly.subfunc.arg.A, %6
+; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A
+; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
+
+; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m
+; IR: %7 = add nsw i64 %polly.indvar5, 43
+; IR: %polly.access.add.polly.subfunc.arg.A9 = add nsw i64 %polly.access.mul.polly.subfunc.arg.A8, %7
+; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9
+; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @new_multidim_access(i64 %n, i64 %m, float* %A) {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb15, %bb
+  %i.0 = phi i64 [ 0, %bb ], [ %tmp16, %bb15 ]
+  %tmp = icmp slt i64 %i.0, %n
+  br i1 %tmp, label %bb2, label %bb17
+
+bb2:                                              ; preds = %bb1
+  br label %bb3
+
+bb3:                                              ; preds = %bb12, %bb2
+  %j.0 = phi i64 [ 0, %bb2 ], [ %tmp13, %bb12 ]
+  %exitcond = icmp ne i64 %j.0, 100
+  br i1 %exitcond, label %bb4, label %bb14
+
+bb4:                                              ; preds = %bb3
+  %tmp5 = add nsw i64 %i.0, %j.0
+  %tmp6 = sitofp i64 %tmp5 to float
+  %tmp7 = shl nsw i64 %j.0, 1
+  %tmp8 = mul nsw i64 %i.0, %m
+  %.sum = add i64 %tmp8, %tmp7
+  %tmp9 = getelementptr inbounds float, float* %A, i64 %.sum
+  %tmp10 = load float, float* %tmp9, align 4
+  %tmp11 = fadd float %tmp10, %tmp6
+  store float %tmp11, float* %tmp9, align 4
+  br label %bb12
+
+bb12:                                             ; preds = %bb4
+  %tmp13 = add nsw i64 %j.0, 1
+  br label %bb3
+
+bb14:                                             ; preds = %bb3
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  %tmp16 = add nsw i64 %i.0, 1
+  br label %bb1
+
+bb17:                                             ; preds = %bb1
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/new_multidim_access___%bb1---%bb17.jscop
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/new_multidim_access___%bb1---%bb17.jscop
@@ -0,0 +1,21 @@
+{
+   "context" : "[n, m] -> {  : n <= 9223372036854775807 and n >= -9223372036854775808 and m <= 9223372036854775807 and m >= -9223372036854775808 }",
+   "name" : "bb1 => bb17",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "[n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, i1 + 13] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "[n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, i1 + 43] }"
+            }
+         ],
+         "domain" : "[n, m] -> { Stmt_bb4[i0, i1] : i0 >= 0 and n >= 1 and i0 <= -1 + n and i1 >= 0 and i1 <= 99 }",
+         "name" : "Stmt_bb4",
+         "schedule" : "[n, m] -> { Stmt_bb4[i0, i1] -> [i0, i1] }"
+      }
+   ]
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/recomputed-srem.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/recomputed-srem.ll
@@ -0,0 +1,44 @@
+; RUN: opt %loadPolly -polly-codegen -polly-parallel \
+; RUN: -polly-parallel-force -S < %s | FileCheck %s
+;
+; Test to verify that we pass %rem96 to the parallel subfunction.
+;
+; CHECK:       %[[R:[0-9]*]] = getelementptr inbounds { i32, i32, i64, float*, float*, i32 }, { i32, i32, i64, float*, float*, i32 }* %polly.par.userContext1, i32 0, i32 5
+; CHECK-NEXT:  %polly.subfunc.arg.rem96 = load i32, i32* %[[R]]
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define void @dmxpy(i32 %n1, float* %y, i32 %n2, float* %x) #0 {
+entry:
+  %rem96 = srem i32 %n2, 16
+  %0 = sext i32 %rem96 to i64
+  %1 = add i64 %0, 15
+  br label %for.cond195.preheader
+
+for.cond195.preheader:                            ; preds = %for.inc363, %entry
+  %indvars.iv262 = phi i64 [ %1, %entry ], [ %indvars.iv.next263, %for.inc363 ]
+  %j.0236 = phi i32 [ 0, %entry ], [ %add364, %for.inc363 ]
+  br label %for.body197
+
+for.body197:                                      ; preds = %for.body197, %for.cond195.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body197 ], [ 0, %for.cond195.preheader ]
+  %arrayidx199 = getelementptr inbounds float, float* %y, i64 %indvars.iv
+  %2 = add nsw i64 %indvars.iv262, -6
+  %arrayidx292 = getelementptr inbounds float, float* %x, i64 %2
+  %3 = load float, float* %arrayidx292, align 4
+  store float undef, float* %arrayidx199, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %n1
+  br i1 %exitcond, label %for.body197, label %for.inc363
+
+for.inc363:                                       ; preds = %for.body197
+  %add364 = add nsw i32 %j.0236, 16
+  %cmp193 = icmp slt i32 %add364, %n2
+  %indvars.iv.next263 = add i64 %indvars.iv262, 16
+  br i1 %cmp193, label %for.cond195.preheader, label %for.end365
+
+for.end365:                                       ; preds = %for.inc363
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/reference-argument-from-non-affine-region.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/reference-argument-from-non-affine-region.ll
@@ -0,0 +1,59 @@
+; RUN: opt %loadPolly -polly-parallel \
+; RUN: -polly-parallel-force -polly-codegen -S -verify-dom-info < %s \
+; RUN: | FileCheck %s -check-prefix=IR
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; IR: @GOMP_parallel_loop_runtime_start
+
+@longLimit = external global [9 x [23 x i32]], align 16
+@shortLimit = external global [9 x [14 x i32]], align 16
+
+define void @init_layer3(i32 %down_sample_sblimit) #0 {
+entry:
+  br label %for.cond.463.preheader
+
+for.cond.463.preheader:                           ; preds = %entry
+  br label %for.cond.499.preheader
+
+for.cond.533.preheader:                           ; preds = %for.inc.530
+  ret void
+
+for.cond.499.preheader:                           ; preds = %for.inc.530, %for.cond.463.preheader
+  %indvars.iv140 = phi i64 [ 0, %for.cond.463.preheader ], [ %indvars.iv.next141, %for.inc.530 ]
+  %arrayidx483 = getelementptr inbounds [9 x [23 x i32]], [9 x [23 x i32]]* @longLimit, i64 0, i64 %indvars.iv140, i64 0
+  store i32 undef, i32* %arrayidx483, align 4, !tbaa !1
+  %arrayidx487 = getelementptr inbounds [9 x [23 x i32]], [9 x [23 x i32]]* @longLimit, i64 0, i64 %indvars.iv140, i64 0
+  %tmp = load i32, i32* %arrayidx487, align 4, !tbaa !1
+  %indvars.iv.next135 = add nuw nsw i64 0, 1
+  br label %for.body.502
+
+for.body.502:                                     ; preds = %for.inc.527, %for.cond.499.preheader
+  %indvars.iv137 = phi i64 [ 0, %for.cond.499.preheader ], [ %indvars.iv.next138, %for.inc.527 ]
+  %arrayidx518 = getelementptr inbounds [9 x [14 x i32]], [9 x [14 x i32]]* @shortLimit, i64 0, i64 %indvars.iv140, i64 %indvars.iv137
+  %tmp1 = load i32, i32* %arrayidx518, align 4, !tbaa !1
+  %cmp519 = icmp sgt i32 %tmp1, %down_sample_sblimit
+  br i1 %cmp519, label %if.then.521, label %for.inc.527
+
+if.then.521:                                      ; preds = %for.body.502
+  br label %for.inc.527
+
+for.inc.527:                                      ; preds = %if.then.521, %for.body.502
+  %indvars.iv.next138 = add nuw nsw i64 %indvars.iv137, 1
+  %exitcond139 = icmp ne i64 %indvars.iv.next138, 14
+  br i1 %exitcond139, label %for.body.502, label %for.inc.530
+
+for.inc.530:                                      ; preds = %for.inc.527
+  %indvars.iv.next141 = add nuw nsw i64 %indvars.iv140, 1
+  %exitcond142 = icmp ne i64 %indvars.iv.next141, 9
+  br i1 %exitcond142, label %for.cond.499.preheader, label %for.cond.533.preheader
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.8.0 (trunk 246359)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/reference-other-bb.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/reference-other-bb.ll
@@ -0,0 +1,26 @@
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
+
+; IR: @foo_polly_subfn
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(i32 %sendcount, i8* %recvbuf) {
+entry:
+  br label %sw.bb3
+
+sw.bb3:
+  %tmp = bitcast i8* %recvbuf to double*
+  %cmp75 = icmp sgt i32 %sendcount, 0
+  br i1 %cmp75, label %for.body, label %end
+
+for.body:
+  %i.16 = phi i32 [ %inc04, %for.body ], [ 0, %sw.bb3 ]
+  %idxprom11 = sext i32 %i.16 to i64
+  %arrayidx12 = getelementptr inbounds double, double* %tmp, i64 %idxprom11
+  store double 1.0, double* %arrayidx12, align 8
+  %inc04 = add nsw i32 %i.16, 1
+  %cmp7 = icmp slt i32 %inc04, %sendcount
+  br i1 %cmp7, label %for.body, label %end
+
+end:
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll
@@ -0,0 +1,43 @@
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
+
+
+; - Test the case where scalar evolution references a loop that is outside
+;   of the scop, but does not contain the scop.
+
+; AST:         {
+; AST-NEXT:    #pragma simd
+; AST-NEXT:    #pragma omp parallel for
+; AST-NEXT:    for (int c0 = 0; c0 < -p_0 + symbol; c0 += 1)
+; AST-NEXT:      Stmt_while_body(c0);
+; AST-NEXT:    if (p_0 >= symbol)
+; AST-NEXT:      Stmt_while_body(0);
+; AST-NEXT:    }
+
+; IR: @update_model_polly_subfn
+; IR-NOT: @update_model_polly_subfn_1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@cum_freq = external global [258 x i64], align 16
+
+define void @update_model(i64 %symbol) {
+entry:
+  br label %for.one
+
+for.one:
+  %i.1 = phi i64 [ %dec07, %for.one ], [ %symbol, %entry ]
+  %dec07 = add nsw i64 %i.1, -1
+  br i1 undef, label %for.one, label %while.body
+
+while.body:
+  %indvar = phi i64 [ %sub42, %while.body ], [ %i.1, %for.one ]
+  %sub42 = add nsw i64 %indvar, -1
+  %arrayidx44 = getelementptr inbounds [258 x i64], [258 x i64]* @cum_freq, i64 0, i64 %sub42
+  store i64 1, i64* %arrayidx44, align 4
+  %cmp40 = icmp sgt i64 %sub42, 0
+  br i1 %cmp40, label %while.body, label %while.end
+
+while.end:
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/reference_latest.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/reference_latest.ll
@@ -0,0 +1,44 @@
+; RUN: opt %loadPolly -polly-delicm -polly-simplify -polly-parallel -polly-codegen -S < %s | FileCheck %s
+;
+; Test that parallel codegen handles scalars mapped to other arrays.
+; After mapping "store double %add10" references the array "MemRef2".
+; Its base pointer therefore needs to be made available in the subfunction.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @reference_latest(float* nocapture readonly %data, i32 %n, i32 %m) {
+entry:
+  %0 = alloca double, i64 undef, align 16
+  %conv1 = sext i32 %m to i64
+  br label %while.body
+
+while.body:
+  %indvars.iv211 = phi i64 [ %conv1, %entry ], [ %indvars.iv.next212, %for.end ]
+  br label %for.body
+
+for.body:
+  %indvars.iv207 = phi i64 [ %indvars.iv211, %while.body ], [ %indvars.iv.next208, %for.body ]
+  %arrayidx7 = getelementptr inbounds float, float* %data, i64 0
+  %1 = load float, float* %arrayidx7, align 4
+  %add10 = fadd double undef, undef
+  %indvars.iv.next208 = add nsw i64 %indvars.iv207, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next208 to i32
+  %exitcond210 = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond210, label %for.end, label %for.body
+
+for.end:
+  %arrayidx12 = getelementptr inbounds double, double* %0, i64 %indvars.iv211
+  store double %add10, double* %arrayidx12, align 8
+  %indvars.iv.next212 = add nsw i64 %indvars.iv211, -1
+  %2 = trunc i64 %indvars.iv211 to i32
+  %tobool = icmp eq i32 %2, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+; CHECK-LABEL: define internal void @reference_latest_polly_subfn(i8* %polly.par.userContext)
+
+; CHECK:      %polly.access.polly.subfunc.arg. = getelementptr double, double* %polly.subfunc.arg., i64 %{{[0-9]+}}
+; CHECK-NEXT: store double %p_add{{[0-9]*}}, double* %polly.access.polly.subfunc.arg.
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/single_loop.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/single_loop.ll
@@ -0,0 +1,110 @@
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
+
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST-STRIDE4
+; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-codegen -S < %s | FileCheck %s -check-prefix=IR-STRIDE4
+
+; This extensive test case tests the creation of the full set of OpenMP calls
+; as well as the subfunction creation using a trivial loop as example.
+
+; #define N 1024
+; float A[N];
+;
+; void single_parallel_loop(void) {
+;   for (long i = 0; i < N; i++)
+;     A[i] = 1;
+; }
+
+; AST: #pragma simd
+; AST: #pragma omp parallel for
+; AST: for (int c0 = 0; c0 <= 1023; c0 += 1)
+; AST:   Stmt_S(c0);
+
+; AST-STRIDE4: #pragma omp parallel for
+; AST-STRIDE4: for (int c0 = 0; c0 <= 1023; c0 += 4)
+; AST-STRIDE4:   #pragma simd
+; AST-STRIDE4:   for (int c1 = c0; c1 <= c0 + 3; c1 += 1)
+; AST-STRIDE4:     Stmt_S(c1);
+
+; IR-LABEL: single_parallel_loop()
+; IR-NEXT: entry
+; IR-NEXT:   %polly.par.userContext = alloca
+
+; IR-LABEL: polly.parallel.for:
+; IR-NEXT:   %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8*
+; IR-NEXT:   call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop_polly_subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 1)
+; IR-NEXT:   call void @single_parallel_loop_polly_subfn(i8* %polly.par.userContext1)
+; IR-NEXT:   call void @GOMP_parallel_end()
+; IR-NEXT:   br label %polly.exiting
+
+; IR: define internal void @single_parallel_loop_polly_subfn(i8* %polly.par.userContext) #1
+; IR-LABEL: polly.par.setup:
+; IR-NEXT:   %polly.par.LBPtr = alloca i64
+; IR-NEXT:   %polly.par.UBPtr = alloca i64
+; IR-NEXT:   %polly.par.userContext1 =
+; IR:   br label %polly.par.checkNext
+
+; IR-LABEL: polly.par.exit:
+; IR-NEXT:   call void @GOMP_loop_end_nowait()
+; IR-NEXT:   ret void
+
+; IR-LABEL: polly.par.checkNext:
+; IR-NEXT:   %[[parnext:[._a-zA-Z0-9]*]] = call i8 @GOMP_loop_runtime_next(i64* %polly.par.LBPtr, i64* %polly.par.UBPtr)
+; IR-NEXT:   %[[cmp:[._a-zA-Z0-9]*]] = icmp ne i8 %[[parnext]], 0
+; IR-NEXT:   br i1 %[[cmp]], label %polly.par.loadIVBounds, label %polly.par.exit
+
+; IR-LABEL: polly.par.loadIVBounds:
+; IR-NEXT:   %polly.par.LB = load i64, i64* %polly.par.LBPtr
+; IR-NEXT:   %polly.par.UB = load i64, i64* %polly.par.UBPtr
+; IR-NEXT:   %polly.par.UBAdjusted = sub i64 %polly.par.UB, 1
+; IR-NEXT:   br label %polly.loop_preheader
+
+; IR-LABEL: polly.loop_exit:
+; IR-NEXT:   br label %polly.par.checkNext
+
+; IR-LABEL: polly.loop_header:
+; IR-NEXT:   %polly.indvar = phi i64 [ %polly.par.LB, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ]
+; IR-NEXT:   br label %polly.stmt.S
+
+; IR-LABEL: polly.stmt.S:
+; IR-NEXT:   %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar
+; IR-NEXT:   store float 1.000000e+00, float* %[[gep]]
+; IR-NEXT:   %polly.indvar_next = add nsw i64 %polly.indvar, 1
+; IR-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.par.UBAdjusted
+; IR-NEXT:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
+
+; IR-LABEL: polly.loop_preheader:
+; IR-NEXT:   br label %polly.loop_header
+
+; IR: attributes #1 = { "polly.skip.fn" }
+
+; IR-STRIDE4:   call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop_polly_subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 4)
+; IR-STRIDE4:  add nsw i64 %polly.indvar, 3
+; IR-STRIDE4:  %polly.indvar_next = add nsw i64 %polly.indvar, 4
+; IR-STRIDE4   %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 4
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+@A = common global [1024 x float] zeroinitializer, align 16
+
+define void @single_parallel_loop() nounwind {
+entry:
+  br label %for.i
+
+for.i:
+  %indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ]
+  %scevgep = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
+  %exitcond = icmp ne i64 %indvar, 1024
+  br i1 %exitcond, label %S, label %exit
+
+S:
+  store float 1.0, float* %scevgep
+  br label %for.inc
+
+for.inc:
+  %indvar.next = add i64 %indvar, 1
+  br label %for.i
+
+exit:
+  ret void
+}
--- a/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
+++ b/external/llvm-project/polly/test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
@@ -0,0 +1,48 @@
+; RUN: opt %loadPolly -tbaa -polly-parallel -polly-parallel-force -polly-parallel-force -polly-invariant-load-hoisting=true -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
+; RUN: opt %loadPolly -tbaa -polly-parallel -polly-parallel-force -polly-parallel-force -polly-invariant-load-hoisting=true -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
+
+; #define N 1024
+; float A[N];
+;
+; void single_parallel_loop(void) {
+;   for (long i = 0; i < N; i++)
+;     A[i] = 1;
+; }
+
+; AST: #pragma simd
+; AST: #pragma omp parallel for
+; AST: for (int c0 = 0; c0 <= 1023; c0 += 1)
+; AST:   Stmt_S(c0);
+
+; IR: @single_parallel_loop_polly_subfn
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @single_parallel_loop(float** %A) nounwind {
+entry:
+  br label %for.i
+
+for.i:
+  %indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ]
+  %exitcond = icmp ne i64 %indvar, 1024
+  br i1 %exitcond, label %S, label %exit
+
+S:
+  %ptr = load float*, float** %A,  !tbaa !2
+  %scevgep = getelementptr float, float* %ptr, i64 %indvar
+  %val = load float, float* %scevgep, !tbaa !6
+  %sum = fadd float %val, 1.0
+  store float %sum, float* %scevgep, !tbaa !6
+  br label %for.inc
+
+for.inc:
+  %indvar.next = add i64 %indvar, 1
+  br label %for.i
+
+exit:
+  ret void
+}
+
+!2 = !{!"float", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!6 = !{!"float *ptr", !3, i64 0}
--- a/Show More
+++ b/Show More