Imported Upstream version 6.10.0.49

Former-commit-id: 1d6753294b2993e1fbf92de9366bb9544db4189b
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2020-01-16 16:38:04 +00:00
parent d94e79959b
commit 468663ddbb
48518 changed files with 2789335 additions and 61176 deletions

View File

@@ -0,0 +1,52 @@
; RUN: opt %loadPolly -polly-codegen -polly-parallel -S < %s | FileCheck %s
;
; void foo(float *A, float *B) {
; for (long i = 0; i < 1000; i++)
; for (long j = 0; j < 1000; j++)
; A[i] = B[i];
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: define internal void @foo_polly_subfn
define void @foo(float* %A, float* %B) {
bb:
br label %bb2
bb2: ; preds = %bb11, %bb
%i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
%exitcond1 = icmp ne i64 %i.0, 1000
br i1 %exitcond1, label %bb3, label %bb13
bb3: ; preds = %bb2
br label %bb4
bb4: ; preds = %bb8, %bb3
%j.0 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb8 ]
%exitcond = icmp ne i64 %j.0, 1000
br i1 %exitcond, label %bb5, label %bb10
bb5: ; preds = %bb4
%tmp = getelementptr inbounds float, float* %B, i64 %i.0
%tmp7 = getelementptr inbounds float, float* %A, i64 %i.0
%tmp6 = load float, float* %tmp, align 4
store float %tmp6, float* %tmp7, align 4
; CHECK: %tmp6_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2
; CHECK: store float %tmp6_p_scalar_, float* %scevgep8, align 4, !alias.scope !3, !noalias !4
br label %bb8
bb8: ; preds = %bb5
%tmp9 = add nsw i64 %j.0, 1
br label %bb4
bb10: ; preds = %bb4
br label %bb11
bb11: ; preds = %bb10
%tmp12 = add nsw i64 %i.0, 1
br label %bb2
bb13: ; preds = %bb2
ret void
}

View File

@@ -0,0 +1,33 @@
; RUN: opt %loadPolly -polly-codegen -polly-invariant-load-hoisting=true -polly-parallel \
; RUN: -polly-parallel-force -S < %s | FileCheck %s
;
; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction.
;
; void f(float *A) {
; for (int i = 1; i < 1000; i++)
; A[i] += A[0];
; }
;
; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds { float, float* }, { float, float* }* %polly.par.userContext, i32 0
; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(float* nocapture %A) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
%tmp = load float, float* %A, align 4
%arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv
%tmp1 = load float, float* %arrayidx1, align 4
%add = fadd float %tmp, %tmp1
store float %add, float* %arrayidx1, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

View File

@@ -0,0 +1,38 @@
; RUN: opt %loadPolly -polly-codegen -polly-invariant-load-hoisting=true -polly-parallel \
; RUN: -polly-parallel-force -S < %s | FileCheck %s
;
; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction.
;
; void f(float *A) {
; for (int i = 1; i < 1000; i++)
; A[i] += /* split bb */ A[0];
; }
; A[0] tmp (unused) A
; CHECK: %polly.par.userContext = alloca { float, float*, float* }
;
; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds
; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(float* nocapture %A) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body.split ]
%tmp = load float, float* %A, align 4
br label %for.body.split
for.body.split:
%arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv
%tmp1 = load float, float* %arrayidx1, align 4
%add = fadd float %tmp, %tmp1
store float %add, float* %arrayidx1, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

View File

@@ -0,0 +1,72 @@
; RUN: opt %loadPolly -polly-codegen -polly-invariant-load-hoisting=true -polly-parallel \
; RUN: -polly-parallel-force -S < %s | FileCheck %s
;
; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction but
; not B[0] as it is not needed
;
; void f(float *A, float *B) {
; // Not parallel
; for (int i = 1; i < 1000; i++) {
; B[i] = B[i+1] + B[0];
; // Parallel
; for (int j = 1; j < 1000; j++)
; A[j] += A[0];
; }
; }
;
; i A[0] A
; CHECK: %polly.par.userContext = alloca { i64, float, float* }
;
; CHECK: %polly.access.B.load =
; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds
; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
; CHECK-NOT: store float %polly.access.B.load, float* %polly.subfn.storeaddr.polly.access.B.load
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(float* %A, float* %B) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc.9, %entry
%indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc.9 ], [ 1, %entry ]
%exitcond3 = icmp ne i64 %indvars.iv1, 1000
br i1 %exitcond3, label %for.body, label %for.end.11
for.body: ; preds = %for.cond
%tmp = load float, float* %B, align 4
%arrayidx1 = getelementptr inbounds float, float* %B, i64 %indvars.iv1
%iv.add = add nsw i64 %indvars.iv1, 1
%arrayidx2 = getelementptr inbounds float, float* %B, i64 %iv.add
%tmp4 = load float, float* %arrayidx2, align 4
%add = fadd float %tmp4, %tmp
store float %add, float* %arrayidx1, align 4
br label %for.cond.2
for.cond.2: ; preds = %for.inc, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 1, %for.body ]
%exitcond = icmp ne i64 %indvars.iv, 1000
br i1 %exitcond, label %for.body.4, label %for.end
for.body.4: ; preds = %for.cond.2
%tmp5 = load float, float* %A, align 4
%arrayidx7 = getelementptr inbounds float, float* %A, i64 %indvars.iv
%tmp6 = load float, float* %arrayidx7, align 4
%add8 = fadd float %tmp6, %tmp5
store float %add8, float* %arrayidx7, align 4
br label %for.inc
for.inc: ; preds = %for.body.4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond.2
for.end: ; preds = %for.cond.2
br label %for.inc.9
for.inc.9: ; preds = %for.end
%indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
br label %for.cond
for.end.11: ; preds = %for.cond
ret void
}

View File

@@ -0,0 +1,35 @@
; RUN: opt %loadPolly -polly-codegen -polly-invariant-load-hoisting=true -polly-parallel \
; RUN: -polly-parallel-force -S < %s | FileCheck %s
;
; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction.
;
; void f(float *A) {
; for (int i = 1; i < 1000; i++)
; A[i] += A[0] + A[0];
; }
;
; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds
; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(float* nocapture %A) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
%tmp = load float, float* %A, align 4
%tmp2 = load float, float* %A, align 4
%tmpadd = fadd float %tmp, %tmp2
%arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv
%tmp1 = load float, float* %arrayidx1, align 4
%add = fadd float %tmp2, %tmp1
store float %add, float* %arrayidx1, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

View File

@@ -0,0 +1,43 @@
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; This code has failed the scev based code generation as the scev in the scop
; contains an AddRecExpr of an outer loop. When generating code, we did not
; properly forward the value of this expression to the subfunction.
; AST: #pragma omp parallel for
; AST: for (int c0 = 0; c0 <= 1023; c0 += 1)
; AST: Stmt_for_j(c0);
; IR: @single_parallel_loop_polly_subfn
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@A = common global [1024 x float] zeroinitializer, align 16
define void @single_parallel_loop() nounwind {
entry:
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc], [ 0, %entry ]
br label %for.j
for.j:
%indvar.j = phi i64 [ %indvar.j.next, %for.j], [ 0, %for.i ]
%sum = add i64 %indvar.j, %indvar.i
%scevgep = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %sum
store float 0.0, float *%scevgep
%indvar.j.next = add i64 %indvar.j, 1
%exitcond.j = icmp slt i64 %indvar.j.next, 1024
br i1 %exitcond.j, label %for.j, label %for.i.inc
for.i.inc:
fence seq_cst
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, 1024
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}

View File

@@ -0,0 +1,31 @@
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; AST: #pragma simd
; AST: #pragma omp parallel for
; AST: for (int c0 = 0; c0 <= 1023; c0 += 1)
; AST: Stmt_for_i(c0);
; IR: getelementptr inbounds { [1024 x double]* }, { [1024 x double]* }* %polly.par.userContext, i32 0, i32 0
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @kernel_trmm([1024 x double]* %B) {
entry:
br label %for.cond1.preheader
for.cond1.preheader:
%extern = add i64 1, 0
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i ], [ 0, %for.cond1.preheader ]
%getelementptr = getelementptr [1024 x double], [1024 x double]* %B, i64 %extern, i64 %indvar.i
store double 0.000000e+00, double* %getelementptr
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, 1024
br i1 %exitcond.i, label %for.i, label %end
end:
ret void
}

View File

@@ -0,0 +1,67 @@
; RUN: opt %loadPolly -basicaa -polly-parallel -polly-parallel-force -polly-invariant-load-hoisting=true -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -basicaa -polly-parallel -polly-parallel-force -polly-invariant-load-hoisting=true -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; The interesting part of this test case is the instruction:
; %tmp = bitcast i8* %call to i64**
; which is not part of the scop. In the SCEV based code generation not '%tmp',
; but %call is a parameter of the SCoP and we need to make sure its value is
; properly forwarded to the subfunction.
; AST: #pragma omp parallel for
; AST: for (int c0 = 0; c0 < cols; c0 += 1)
; AST: Stmt_for_body(c0);
; AST: if (cols <= 0)
; AST: Stmt_for_body(0);
; IR: @foo_polly_subfn
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @foo(i64 %cols, i8* noalias %call) {
entry:
%tmp = bitcast i8* %call to i64**
br label %for.body
for.body:
%indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i64*, i64** %tmp, i64 0
%tmp1 = load i64*, i64** %arrayidx, align 8
%arrayidx.2 = getelementptr inbounds i64, i64* %tmp1, i64 %indvar
store i64 1, i64* %arrayidx.2, align 4
%indvar.next = add nsw i64 %indvar, 1
%cmp = icmp slt i64 %indvar.next, %cols
br i1 %cmp, label %for.body, label %end
end:
ret void
}
; Another variation of this test case, now with even more of the index
; expression defined outside of the scop.
; AST: #pragma omp parallel for
; AST: for (int c0 = 0; c0 < cols; c0 += 1)
; AST: Stmt_for_body(c0);
; AST: if (cols <= 0)
; AST: Stmt_for_body(0);
; IR: @bar_polly_subfn
define void @bar(i64 %cols, i8* noalias %call) {
entry:
%tmp = bitcast i8* %call to i64**
%arrayidx = getelementptr inbounds i64*, i64** %tmp, i64 0
br label %for.body
for.body:
%indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
%tmp1 = load i64*, i64** %arrayidx, align 8
%arrayidx.2 = getelementptr inbounds i64, i64* %tmp1, i64 %indvar
store i64 1, i64* %arrayidx.2, align 4
%indvar.next = add nsw i64 %indvar, 1
%cmp = icmp slt i64 %indvar.next, %cols
br i1 %cmp, label %for.body, label %end
end:
ret void
}

View File

@@ -0,0 +1,45 @@
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S < %s | FileCheck %s -check-prefix=IR
; Make sure we correctly forward the reference to 'A' to the OpenMP subfunction.
;
; void loop_references_outer_ids(float *A) {
; for (long i = 0; i < 100; i++)
; A[i] = i;
; }
; AST: #pragma simd
; AST: #pragma omp parallel for
; AST: for (int c0 = 0; c0 <= 99; c0 += 1)
; AST: Stmt_for_body(c0);
; IR-LABEL: polly.parallel.for:
; IR-NEXT: %polly.subfn.storeaddr.A = getelementptr inbounds { float* }, { float* }* %polly.par.userContext, i32 0, i32 0
; IR-NEXT: store float* %A, float** %polly.subfn.storeaddr.A
; IR-NEXT: %polly.par.userContext1 = bitcast { float* }* %polly.par.userContext to i8*
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @loop_references_outer_ids(float* %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond = icmp ne i64 %i.0, 100
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%conv = sitofp i64 %i.0 to float
%arrayidx = getelementptr inbounds float, float* %A, i64 %i.0
store float %conv, float* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i64 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@@ -0,0 +1,99 @@
; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-codegen -S < %s | FileCheck %s -check-prefix=IR
;
; float A[100];
;
; void loop_references_outer_ids(long n) {
; for (long i = 0; i < 100; i++)
; for (long j = 0; j < 100; j++)
; for (long k = 0; k < n + i; k++)
; A[j] += i + j + k;
; }
; In this test case we verify that the j-loop is generated as OpenMP parallel
; loop and that the values of 'i' and 'n', needed in the loop bounds of the
; k-loop, are correctly passed to the subfunction.
; AST: #pragma minimal dependence distance: 1
; AST: for (int c0 = max(0, -n + 1); c0 <= 99; c0 += 1)
; AST: #pragma omp parallel for
; AST: for (int c1 = 0; c1 <= 99; c1 += 1)
; AST: #pragma minimal dependence distance: 1
; AST: for (int c2 = 0; c2 < n + c0; c2 += 1)
; AST: Stmt_for_body6(c0, c1, c2);
; IR: %polly.par.userContext = alloca { i64, i64 }
; IR: %[[R1:[0-9a-z.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %polly.par.userContext, i32 0, i32 0
; IR-NEXT: store i64 %n, i64* %[[R1]]
; IR-NEXT: %[[R2:[0-9a-z.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %polly.par.userContext, i32 0, i32 1
; IR-NEXT: store i64 %polly.indvar, i64* %[[R2]]
; IR-NEXT: %polly.par.userContext1 = bitcast { i64, i64 }* %polly.par.userContext to i8*
; IR-LABEL: @loop_references_outer_ids_polly_subfn(i8* %polly.par.userContext)
; IR: %polly.par.userContext1 = bitcast i8* %polly.par.userContext to { i64, i64 }*
; IR-NEXT: %[[R3:[0-9a-z.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %polly.par.userContext1, i32 0, i32 0
; IR-NEXT: %[[R4:[0-9a-z.]+]] = load i64, i64* %[[R3]]
; IR-NEXT: %[[R5:[0-9a-z.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %polly.par.userContext1, i32 0, i32 1
; IR-NEXT: %[[R6:[0-9a-z.]+]] = load i64, i64* %[[R5]]
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@A = common global [100 x float] zeroinitializer, align 16
define void @loop_references_outer_ids(i64 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc03, %entry
%i.0 = phi i64 [ 0, %entry ], [ %inc04, %for.inc03 ]
%exitcond1 = icmp ne i64 %i.0, 100
br i1 %exitcond1, label %for.body, label %for.end15
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc00, %for.body
%j.0 = phi i64 [ 0, %for.body ], [ %inc01, %for.inc00 ]
%exitcond = icmp ne i64 %j.0, 100
br i1 %exitcond, label %for.body3, label %for.end12
for.body3: ; preds = %for.cond1
br label %for.cond4
for.cond4: ; preds = %for.inc, %for.body3
%k.0 = phi i64 [ 0, %for.body3 ], [ %inc, %for.inc ]
%add = add nsw i64 %i.0, %n
%cmp5 = icmp slt i64 %k.0, %add
br i1 %cmp5, label %for.body6, label %for.end
for.body6: ; preds = %for.cond4
%add7 = add nsw i64 %i.0, %j.0
%add8 = add nsw i64 %add7, %k.0
%conv = sitofp i64 %add8 to float
%arrayidx = getelementptr inbounds [100 x float], [100 x float]* @A, i64 0, i64 %j.0
%tmp = load float, float* %arrayidx, align 4
%add9 = fadd float %tmp, %conv
store float %add9, float* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body6
%inc = add nsw i64 %k.0, 1
br label %for.cond4
for.end: ; preds = %for.cond4
br label %for.inc00
for.inc00: ; preds = %for.end
%inc01 = add nsw i64 %j.0, 1
br label %for.cond1
for.end12: ; preds = %for.cond1
br label %for.inc03
for.inc03: ; preds = %for.end12
%inc04 = add nsw i64 %i.0, 1
br label %for.cond
for.end15: ; preds = %for.cond
ret void
}

View File

@@ -0,0 +1,58 @@
; RUN: opt %loadPolly -polly-parallel -polly-delicm -polly-codegen -S < %s | FileCheck %s
;
; Verify that -polly-parallel can handle mapped scalar MemoryAccesses.
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind uwtable
define void @main() local_unnamed_addr #0 {
entry:
%0 = load i8*, i8** undef, align 8, !tbaa !1
%1 = load i8*, i8** undef, align 8, !tbaa !1
%arraydecay16 = bitcast i8* %1 to double*
%arraydecay20 = bitcast i8* %0 to [4000 x double]*
br label %for.body65.i226
for.body65.i226: ; preds = %for.inc85.i238, %entry
%indvars.iv8.i223 = phi i64 [ 0, %entry ], [ %indvars.iv.next9.i236, %for.inc85.i238 ]
%arrayidx70.i224 = getelementptr inbounds double, double* %arraydecay16, i64 %indvars.iv8.i223
br label %for.body68.i235
for.body68.i235: ; preds = %for.body68.i235, %for.body65.i226
%2 = phi double [ undef, %for.body65.i226 ], [ undef, %for.body68.i235 ]
%indvars.iv.i227 = phi i64 [ 0, %for.body65.i226 ], [ %indvars.iv.next.i233, %for.body68.i235 ]
%arrayidx74.i228 = getelementptr inbounds [4000 x double], [4000 x double]* %arraydecay20, i64 %indvars.iv8.i223, i64 %indvars.iv.i227
%3 = load double, double* %arrayidx74.i228, align 8, !tbaa !5
store double undef, double* %arrayidx70.i224, align 8, !tbaa !5
%indvars.iv.next.i233 = add nuw nsw i64 %indvars.iv.i227, 1
%exitcond.i234 = icmp eq i64 %indvars.iv.next.i233, 4000
br i1 %exitcond.i234, label %for.inc85.i238, label %for.body68.i235
for.inc85.i238: ; preds = %for.body68.i235
%indvars.iv.next9.i236 = add nuw nsw i64 %indvars.iv8.i223, 1
%exitcond10.i237 = icmp eq i64 %indvars.iv.next9.i236, 4000
br i1 %exitcond10.i237, label %kernel_gemver_StrictFP.exit, label %for.body65.i226
kernel_gemver_StrictFP.exit: ; preds = %for.inc85.i238
ret void
}
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 6.0.0 "}
!1 = !{!2, !2, i64 0}
!2 = !{!"any pointer", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6, !6, i64 0}
!6 = !{!"double", !3, i64 0}
; CHECK-LABEL: define internal void @main_polly_subfn(i8* %polly.par.userContext)
;
; CHECK: polly.stmt.for.body65.i226:
; CHECK-NEXT: %polly.access.cast.polly.subfunc.arg.[[R0:[0-9]*]] = bitcast i8* %polly.subfunc.arg.{{[0-9]*}} to double*
; CHECK-NEXT: %polly.access.polly.subfunc.arg.[[R1:[0-9]*]] = getelementptr double, double* %polly.access.cast.polly.subfunc.arg.[[R0]], i64 %polly.indvar
; CHECK-NEXT: store double undef, double* %polly.access.polly.subfunc.arg.[[R1]]

View File

@@ -0,0 +1,77 @@
; RUN: opt %loadPolly -polly-import-jscop \
; RUN: -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-import-jscop \
; RUN: -polly-codegen -S < %s \
; RUN: -polly-parallel \
; RUN: | FileCheck %s -check-prefix=IR
; void new_multidim_access(long n, long m, float A[][m]) {
; for (long i = 0; i < n; i++)
; for (long j = 0; j < 100; j++)
; A[i][2 * j] += i + j;
; }
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 2i1] };
; CHECK: new: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 13 + i1] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 2i1] };
; CHECK: new: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 43 + i1] };
; IR: %polly.access.mul.polly.subfunc.arg.A = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m
; IR: %6 = add nsw i64 %polly.indvar5, 13
; IR: %polly.access.add.polly.subfunc.arg.A = add nsw i64 %polly.access.mul.polly.subfunc.arg.A, %6
; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A
; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m
; IR: %7 = add nsw i64 %polly.indvar5, 43
; IR: %polly.access.add.polly.subfunc.arg.A9 = add nsw i64 %polly.access.mul.polly.subfunc.arg.A8, %7
; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9
; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @new_multidim_access(i64 %n, i64 %m, float* %A) {
bb:
br label %bb1
bb1: ; preds = %bb15, %bb
%i.0 = phi i64 [ 0, %bb ], [ %tmp16, %bb15 ]
%tmp = icmp slt i64 %i.0, %n
br i1 %tmp, label %bb2, label %bb17
bb2: ; preds = %bb1
br label %bb3
bb3: ; preds = %bb12, %bb2
%j.0 = phi i64 [ 0, %bb2 ], [ %tmp13, %bb12 ]
%exitcond = icmp ne i64 %j.0, 100
br i1 %exitcond, label %bb4, label %bb14
bb4: ; preds = %bb3
%tmp5 = add nsw i64 %i.0, %j.0
%tmp6 = sitofp i64 %tmp5 to float
%tmp7 = shl nsw i64 %j.0, 1
%tmp8 = mul nsw i64 %i.0, %m
%.sum = add i64 %tmp8, %tmp7
%tmp9 = getelementptr inbounds float, float* %A, i64 %.sum
%tmp10 = load float, float* %tmp9, align 4
%tmp11 = fadd float %tmp10, %tmp6
store float %tmp11, float* %tmp9, align 4
br label %bb12
bb12: ; preds = %bb4
%tmp13 = add nsw i64 %j.0, 1
br label %bb3
bb14: ; preds = %bb3
br label %bb15
bb15: ; preds = %bb14
%tmp16 = add nsw i64 %i.0, 1
br label %bb1
bb17: ; preds = %bb1
ret void
}

View File

@@ -0,0 +1,21 @@
{
"context" : "[n, m] -> { : n <= 9223372036854775807 and n >= -9223372036854775808 and m <= 9223372036854775807 and m >= -9223372036854775808 }",
"name" : "bb1 => bb17",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, i1 + 13] }"
},
{
"kind" : "write",
"relation" : "[n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, i1 + 43] }"
}
],
"domain" : "[n, m] -> { Stmt_bb4[i0, i1] : i0 >= 0 and n >= 1 and i0 <= -1 + n and i1 >= 0 and i1 <= 99 }",
"name" : "Stmt_bb4",
"schedule" : "[n, m] -> { Stmt_bb4[i0, i1] -> [i0, i1] }"
}
]
}

View File

@@ -0,0 +1,44 @@
; RUN: opt %loadPolly -polly-codegen -polly-parallel \
; RUN: -polly-parallel-force -S < %s | FileCheck %s
;
; Test to verify that we pass %rem96 to the parallel subfunction.
;
; CHECK: %[[R:[0-9]*]] = getelementptr inbounds { i32, i32, i64, float*, float*, i32 }, { i32, i32, i64, float*, float*, i32 }* %polly.par.userContext1, i32 0, i32 5
; CHECK-NEXT: %polly.subfunc.arg.rem96 = load i32, i32* %[[R]]
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind uwtable
define void @dmxpy(i32 %n1, float* %y, i32 %n2, float* %x) #0 {
entry:
%rem96 = srem i32 %n2, 16
%0 = sext i32 %rem96 to i64
%1 = add i64 %0, 15
br label %for.cond195.preheader
for.cond195.preheader: ; preds = %for.inc363, %entry
%indvars.iv262 = phi i64 [ %1, %entry ], [ %indvars.iv.next263, %for.inc363 ]
%j.0236 = phi i32 [ 0, %entry ], [ %add364, %for.inc363 ]
br label %for.body197
for.body197: ; preds = %for.body197, %for.cond195.preheader
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body197 ], [ 0, %for.cond195.preheader ]
%arrayidx199 = getelementptr inbounds float, float* %y, i64 %indvars.iv
%2 = add nsw i64 %indvars.iv262, -6
%arrayidx292 = getelementptr inbounds float, float* %x, i64 %2
%3 = load float, float* %arrayidx292, align 4
store float undef, float* %arrayidx199, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, %n1
br i1 %exitcond, label %for.body197, label %for.inc363
for.inc363: ; preds = %for.body197
%add364 = add nsw i32 %j.0236, 16
%cmp193 = icmp slt i32 %add364, %n2
%indvars.iv.next263 = add i64 %indvars.iv262, 16
br i1 %cmp193, label %for.cond195.preheader, label %for.end365
for.end365: ; preds = %for.inc363
ret void
}

View File

@@ -0,0 +1,59 @@
; RUN: opt %loadPolly -polly-parallel \
; RUN: -polly-parallel-force -polly-codegen -S -verify-dom-info < %s \
; RUN: | FileCheck %s -check-prefix=IR
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; IR: @GOMP_parallel_loop_runtime_start
@longLimit = external global [9 x [23 x i32]], align 16
@shortLimit = external global [9 x [14 x i32]], align 16
define void @init_layer3(i32 %down_sample_sblimit) #0 {
entry:
br label %for.cond.463.preheader
for.cond.463.preheader: ; preds = %entry
br label %for.cond.499.preheader
for.cond.533.preheader: ; preds = %for.inc.530
ret void
for.cond.499.preheader: ; preds = %for.inc.530, %for.cond.463.preheader
%indvars.iv140 = phi i64 [ 0, %for.cond.463.preheader ], [ %indvars.iv.next141, %for.inc.530 ]
%arrayidx483 = getelementptr inbounds [9 x [23 x i32]], [9 x [23 x i32]]* @longLimit, i64 0, i64 %indvars.iv140, i64 0
store i32 undef, i32* %arrayidx483, align 4, !tbaa !1
%arrayidx487 = getelementptr inbounds [9 x [23 x i32]], [9 x [23 x i32]]* @longLimit, i64 0, i64 %indvars.iv140, i64 0
%tmp = load i32, i32* %arrayidx487, align 4, !tbaa !1
%indvars.iv.next135 = add nuw nsw i64 0, 1
br label %for.body.502
for.body.502: ; preds = %for.inc.527, %for.cond.499.preheader
%indvars.iv137 = phi i64 [ 0, %for.cond.499.preheader ], [ %indvars.iv.next138, %for.inc.527 ]
%arrayidx518 = getelementptr inbounds [9 x [14 x i32]], [9 x [14 x i32]]* @shortLimit, i64 0, i64 %indvars.iv140, i64 %indvars.iv137
%tmp1 = load i32, i32* %arrayidx518, align 4, !tbaa !1
%cmp519 = icmp sgt i32 %tmp1, %down_sample_sblimit
br i1 %cmp519, label %if.then.521, label %for.inc.527
if.then.521: ; preds = %for.body.502
br label %for.inc.527
for.inc.527: ; preds = %if.then.521, %for.body.502
%indvars.iv.next138 = add nuw nsw i64 %indvars.iv137, 1
%exitcond139 = icmp ne i64 %indvars.iv.next138, 14
br i1 %exitcond139, label %for.body.502, label %for.inc.530
for.inc.530: ; preds = %for.inc.527
%indvars.iv.next141 = add nuw nsw i64 %indvars.iv140, 1
%exitcond142 = icmp ne i64 %indvars.iv.next141, 9
br i1 %exitcond142, label %for.cond.499.preheader, label %for.cond.533.preheader
}
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.8.0 (trunk 246359)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}

View File

@@ -0,0 +1,26 @@
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; IR: @foo_polly_subfn
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @foo(i32 %sendcount, i8* %recvbuf) {
entry:
br label %sw.bb3
sw.bb3:
%tmp = bitcast i8* %recvbuf to double*
%cmp75 = icmp sgt i32 %sendcount, 0
br i1 %cmp75, label %for.body, label %end
for.body:
%i.16 = phi i32 [ %inc04, %for.body ], [ 0, %sw.bb3 ]
%idxprom11 = sext i32 %i.16 to i64
%arrayidx12 = getelementptr inbounds double, double* %tmp, i64 %idxprom11
store double 1.0, double* %arrayidx12, align 8
%inc04 = add nsw i32 %i.16, 1
%cmp7 = icmp slt i32 %inc04, %sendcount
br i1 %cmp7, label %for.body, label %end
end:
ret void
}

View File

@@ -0,0 +1,43 @@
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; - Test the case where scalar evolution references a loop that is outside
; of the scop, but does not contain the scop.
; AST: {
; AST-NEXT: #pragma simd
; AST-NEXT: #pragma omp parallel for
; AST-NEXT: for (int c0 = 0; c0 < -p_0 + symbol; c0 += 1)
; AST-NEXT: Stmt_while_body(c0);
; AST-NEXT: if (p_0 >= symbol)
; AST-NEXT: Stmt_while_body(0);
; AST-NEXT: }
; IR: @update_model_polly_subfn
; IR-NOT: @update_model_polly_subfn_1
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@cum_freq = external global [258 x i64], align 16
define void @update_model(i64 %symbol) {
entry:
br label %for.one
for.one:
%i.1 = phi i64 [ %dec07, %for.one ], [ %symbol, %entry ]
%dec07 = add nsw i64 %i.1, -1
br i1 undef, label %for.one, label %while.body
while.body:
%indvar = phi i64 [ %sub42, %while.body ], [ %i.1, %for.one ]
%sub42 = add nsw i64 %indvar, -1
%arrayidx44 = getelementptr inbounds [258 x i64], [258 x i64]* @cum_freq, i64 0, i64 %sub42
store i64 1, i64* %arrayidx44, align 4
%cmp40 = icmp sgt i64 %sub42, 0
br i1 %cmp40, label %while.body, label %while.end
while.end:
ret void
}

View File

@@ -0,0 +1,44 @@
; RUN: opt %loadPolly -polly-delicm -polly-simplify -polly-parallel -polly-codegen -S < %s | FileCheck %s
;
; Test that parallel codegen handles scalars mapped to other arrays.
; After mapping "store double %add10" references the array "MemRef2".
; Its base pointer therefore needs to be made available in the subfunction.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @reference_latest(float* nocapture readonly %data, i32 %n, i32 %m) {
entry:
%0 = alloca double, i64 undef, align 16
%conv1 = sext i32 %m to i64
br label %while.body
while.body:
%indvars.iv211 = phi i64 [ %conv1, %entry ], [ %indvars.iv.next212, %for.end ]
br label %for.body
for.body:
%indvars.iv207 = phi i64 [ %indvars.iv211, %while.body ], [ %indvars.iv.next208, %for.body ]
%arrayidx7 = getelementptr inbounds float, float* %data, i64 0
%1 = load float, float* %arrayidx7, align 4
%add10 = fadd double undef, undef
%indvars.iv.next208 = add nsw i64 %indvars.iv207, 1
%lftr.wideiv = trunc i64 %indvars.iv.next208 to i32
%exitcond210 = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond210, label %for.end, label %for.body
for.end:
%arrayidx12 = getelementptr inbounds double, double* %0, i64 %indvars.iv211
store double %add10, double* %arrayidx12, align 8
%indvars.iv.next212 = add nsw i64 %indvars.iv211, -1
%2 = trunc i64 %indvars.iv211 to i32
%tobool = icmp eq i32 %2, 0
br i1 %tobool, label %while.end, label %while.body
while.end:
ret void
}
; CHECK-LABEL: define internal void @reference_latest_polly_subfn(i8* %polly.par.userContext)
; CHECK: %polly.access.polly.subfunc.arg. = getelementptr double, double* %polly.subfunc.arg., i64 %{{[0-9]+}}
; CHECK-NEXT: store double %p_add{{[0-9]*}}, double* %polly.access.polly.subfunc.arg.

View File

@@ -0,0 +1,110 @@
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST-STRIDE4
; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-codegen -S < %s | FileCheck %s -check-prefix=IR-STRIDE4
; This extensive test case tests the creation of the full set of OpenMP calls
; as well as the subfunction creation using a trivial loop as example.
; #define N 1024
; float A[N];
;
; void single_parallel_loop(void) {
; for (long i = 0; i < N; i++)
; A[i] = 1;
; }
; AST: #pragma simd
; AST: #pragma omp parallel for
; AST: for (int c0 = 0; c0 <= 1023; c0 += 1)
; AST: Stmt_S(c0);
; AST-STRIDE4: #pragma omp parallel for
; AST-STRIDE4: for (int c0 = 0; c0 <= 1023; c0 += 4)
; AST-STRIDE4: #pragma simd
; AST-STRIDE4: for (int c1 = c0; c1 <= c0 + 3; c1 += 1)
; AST-STRIDE4: Stmt_S(c1);
; IR-LABEL: single_parallel_loop()
; IR-NEXT: entry
; IR-NEXT: %polly.par.userContext = alloca
; IR-LABEL: polly.parallel.for:
; IR-NEXT: %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8*
; IR-NEXT: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop_polly_subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 1)
; IR-NEXT: call void @single_parallel_loop_polly_subfn(i8* %polly.par.userContext1)
; IR-NEXT: call void @GOMP_parallel_end()
; IR-NEXT: br label %polly.exiting
; IR: define internal void @single_parallel_loop_polly_subfn(i8* %polly.par.userContext) #1
; IR-LABEL: polly.par.setup:
; IR-NEXT: %polly.par.LBPtr = alloca i64
; IR-NEXT: %polly.par.UBPtr = alloca i64
; IR-NEXT: %polly.par.userContext1 =
; IR: br label %polly.par.checkNext
; IR-LABEL: polly.par.exit:
; IR-NEXT: call void @GOMP_loop_end_nowait()
; IR-NEXT: ret void
; IR-LABEL: polly.par.checkNext:
; IR-NEXT: %[[parnext:[._a-zA-Z0-9]*]] = call i8 @GOMP_loop_runtime_next(i64* %polly.par.LBPtr, i64* %polly.par.UBPtr)
; IR-NEXT: %[[cmp:[._a-zA-Z0-9]*]] = icmp ne i8 %[[parnext]], 0
; IR-NEXT: br i1 %[[cmp]], label %polly.par.loadIVBounds, label %polly.par.exit
; IR-LABEL: polly.par.loadIVBounds:
; IR-NEXT: %polly.par.LB = load i64, i64* %polly.par.LBPtr
; IR-NEXT: %polly.par.UB = load i64, i64* %polly.par.UBPtr
; IR-NEXT: %polly.par.UBAdjusted = sub i64 %polly.par.UB, 1
; IR-NEXT: br label %polly.loop_preheader
; IR-LABEL: polly.loop_exit:
; IR-NEXT: br label %polly.par.checkNext
; IR-LABEL: polly.loop_header:
; IR-NEXT: %polly.indvar = phi i64 [ %polly.par.LB, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ]
; IR-NEXT: br label %polly.stmt.S
; IR-LABEL: polly.stmt.S:
; IR-NEXT: %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar
; IR-NEXT: store float 1.000000e+00, float* %[[gep]]
; IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.par.UBAdjusted
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
; IR-LABEL: polly.loop_preheader:
; IR-NEXT: br label %polly.loop_header
; IR: attributes #1 = { "polly.skip.fn" }
; IR-STRIDE4: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop_polly_subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 4)
; IR-STRIDE4: add nsw i64 %polly.indvar, 3
; IR-STRIDE4: %polly.indvar_next = add nsw i64 %polly.indvar, 4
; IR-STRIDE4 %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 4
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@A = common global [1024 x float] zeroinitializer, align 16
define void @single_parallel_loop() nounwind {
entry:
br label %for.i
for.i:
%indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ]
%scevgep = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %S, label %exit
S:
store float 1.0, float* %scevgep
br label %for.inc
for.inc:
%indvar.next = add i64 %indvar, 1
br label %for.i
exit:
ret void
}

View File

@@ -0,0 +1,48 @@
; RUN: opt %loadPolly -tbaa -polly-parallel -polly-parallel-force -polly-parallel-force -polly-invariant-load-hoisting=true -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -tbaa -polly-parallel -polly-parallel-force -polly-parallel-force -polly-invariant-load-hoisting=true -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; #define N 1024
; float A[N];
;
; void single_parallel_loop(void) {
; for (long i = 0; i < N; i++)
; A[i] = 1;
; }
; AST: #pragma simd
; AST: #pragma omp parallel for
; AST: for (int c0 = 0; c0 <= 1023; c0 += 1)
; AST: Stmt_S(c0);
; IR: @single_parallel_loop_polly_subfn
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
define void @single_parallel_loop(float** %A) nounwind {
entry:
br label %for.i
for.i:
%indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ]
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %S, label %exit
S:
%ptr = load float*, float** %A, !tbaa !2
%scevgep = getelementptr float, float* %ptr, i64 %indvar
%val = load float, float* %scevgep, !tbaa !6
%sum = fadd float %val, 1.0
store float %sum, float* %scevgep, !tbaa !6
br label %for.inc
for.inc:
%indvar.next = add i64 %indvar, 1
br label %for.i
exit:
ret void
}
!2 = !{!"float", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!6 = !{!"float *ptr", !3, i64 0}

Some files were not shown because too many files have changed in this diff Show More