Imported Upstream version 6.10.0.49

Former-commit-id: 1d6753294b2993e1fbf92de9366bb9544db4189b
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2020-01-16 16:38:04 +00:00
parent d94e79959b
commit 468663ddbb
48518 changed files with 2789335 additions and 61176 deletions

View File

@@ -0,0 +1,16 @@
; RUN: opt %loadPolly -polly-opt-isl -S < %s
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
define void @sdbout_label() nounwind {
entry:
br label %for.cond
for.cond: ; preds = %for.cond, %entry
%0 = phi i32 [ 0, %entry ], [ %1, %for.cond ]
%1 = add nsw i32 %0, 1
%exitcond72 = icmp eq i32 %1, 7
br i1 %exitcond72, label %sw.epilog66, label %for.cond
sw.epilog66: ; preds = %for.cond
ret void
}

View File

@@ -0,0 +1,204 @@
; RUN: opt %loadPolly -basicaa -polly-opt-isl -polly-vectorizer=polly < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
@stdout = external global %struct._IO_FILE*
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
@.str1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
define void @init_array() nounwind uwtable {
entry:
br label %for.cond
for.cond: ; preds = %for.inc17, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc18, %for.inc17 ]
%cmp = icmp slt i32 %i.0, 1536
br i1 %cmp, label %for.body, label %for.end19
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%cmp2 = icmp slt i32 %j.0, 1536
br i1 %cmp2, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%mul = mul nsw i32 %i.0, %j.0
%rem = srem i32 %mul, 1024
%add = add nsw i32 1, %rem
%conv = sitofp i32 %add to double
%div = fdiv double %conv, 2.000000e+00
%conv4 = fptrunc double %div to float
%idxprom = sext i32 %j.0 to i64
%idxprom5 = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i32 0, i64 %idxprom5
%arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i32 0, i64 %idxprom
store float %conv4, float* %arrayidx6, align 4
%mul7 = mul nsw i32 %i.0, %j.0
%rem8 = srem i32 %mul7, 1024
%add9 = add nsw i32 1, %rem8
%conv10 = sitofp i32 %add9 to double
%div11 = fdiv double %conv10, 2.000000e+00
%conv12 = fptrunc double %div11 to float
%idxprom13 = sext i32 %j.0 to i64
%idxprom14 = sext i32 %i.0 to i64
%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i32 0, i64 %idxprom14
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i32 0, i64 %idxprom13
store float %conv12, float* %arrayidx16, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc17
for.inc17: ; preds = %for.end
%inc18 = add nsw i32 %i.0, 1
br label %for.cond
for.end19: ; preds = %for.cond
ret void
}
define void @print_array() nounwind uwtable {
entry:
br label %for.cond
for.cond: ; preds = %for.inc10, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
%cmp = icmp slt i32 %i.0, 1536
br i1 %cmp, label %for.body, label %for.end12
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%cmp2 = icmp slt i32 %j.0, 1536
br i1 %cmp2, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
%idxprom = sext i32 %j.0 to i64
%idxprom4 = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom4
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i32 0, i64 %idxprom
%1 = load float, float* %arrayidx5, align 4
%conv = fpext float %1 to double
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), double %conv)
%rem = srem i32 %j.0, 80
%cmp6 = icmp eq i32 %rem, 79
br i1 %cmp6, label %if.then, label %if.end
if.then: ; preds = %for.body3
%2 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
%call8 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i32 0, i32 0))
br label %if.end
if.end: ; preds = %if.then, %for.body3
br label %for.inc
for.inc: ; preds = %if.end
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
%3 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
%call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i32 0, i32 0))
br label %for.inc10
for.inc10: ; preds = %for.end
%inc11 = add nsw i32 %i.0, 1
br label %for.cond
for.end12: ; preds = %for.cond
ret void
}
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
define i32 @main() nounwind uwtable {
entry:
call void @init_array()
br label %for.cond
for.cond: ; preds = %for.inc28, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc29, %for.inc28 ]
%cmp = icmp slt i32 %i.0, 1536
br i1 %cmp, label %for.body, label %for.end30
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc25, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc26, %for.inc25 ]
%cmp2 = icmp slt i32 %j.0, 1536
br i1 %cmp2, label %for.body3, label %for.end27
for.body3: ; preds = %for.cond1
%idxprom = sext i32 %j.0 to i64
%idxprom4 = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom4
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i32 0, i64 %idxprom
store float 0.000000e+00, float* %arrayidx5, align 4
br label %for.cond6
for.cond6: ; preds = %for.inc, %for.body3
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
%cmp7 = icmp slt i32 %k.0, 1536
br i1 %cmp7, label %for.body8, label %for.end
for.body8: ; preds = %for.cond6
%idxprom9 = sext i32 %j.0 to i64
%idxprom10 = sext i32 %i.0 to i64
%arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom10
%arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx11, i32 0, i64 %idxprom9
%0 = load float, float* %arrayidx12, align 4
%idxprom13 = sext i32 %k.0 to i64
%idxprom14 = sext i32 %i.0 to i64
%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i32 0, i64 %idxprom14
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i32 0, i64 %idxprom13
%1 = load float, float* %arrayidx16, align 4
%idxprom17 = sext i32 %j.0 to i64
%idxprom18 = sext i32 %k.0 to i64
%arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i32 0, i64 %idxprom18
%arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx19, i32 0, i64 %idxprom17
%2 = load float, float* %arrayidx20, align 4
%mul = fmul float %1, %2
%add = fadd float %0, %mul
%idxprom21 = sext i32 %j.0 to i64
%idxprom22 = sext i32 %i.0 to i64
%arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom22
%arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx23, i32 0, i64 %idxprom21
store float %add, float* %arrayidx24, align 4
br label %for.inc
for.inc: ; preds = %for.body8
%inc = add nsw i32 %k.0, 1
br label %for.cond6
for.end: ; preds = %for.cond6
br label %for.inc25
for.inc25: ; preds = %for.end
%inc26 = add nsw i32 %j.0, 1
br label %for.cond1
for.end27: ; preds = %for.cond1
br label %for.inc28
for.inc28: ; preds = %for.end27
%inc29 = add nsw i32 %i.0, 1
br label %for.cond
for.end30: ; preds = %for.cond
ret i32 0
}

View File

@@ -0,0 +1,23 @@
; RUN: opt %loadPolly -polly-opt-isl -S < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Check that we handle statements with an empty iteration domain correctly.
define void @f() {
entry:
%A = alloca double
br label %for
for:
%indvar = phi i32 [ %indvar.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i32 %indvar, -1
br i1 %exitcond, label %for.inc, label %return
for.inc:
%indvar.next = add i32 %indvar, 1
store double 1.0, double* %A
br label %for
return:
ret void
}

View File

@@ -0,0 +1,69 @@
; RUN: opt -S %loadPolly -basicaa -polly-opt-isl -polly-opt-fusion=max -polly-ast -analyze < %s | FileCheck %s
; RUN: opt -S %loadPolly -basicaa -polly-opt-isl -polly-opt-fusion=max -polly-ast -analyze -polly-dependences-computeout=1 < %s | FileCheck %s -check-prefix=TIMEOUT
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
; for(i = 0; i < 100; i++ )
; S1: A[i] = 2;
;
; for (i = 0; i < 10; i++ )
; S2: A[i] = 5;
;
; for (i = 0; i < 200; i++ )
; S3: A[i] = 5;
define void @sequential_writes() {
entry:
%A = alloca [200 x i32]
br label %S1
S1:
%indvar.1 = phi i64 [ 0, %entry ], [ %indvar.next.1, %S1 ]
%arrayidx.1 = getelementptr [200 x i32], [200 x i32]* %A, i64 0, i64 %indvar.1
store i32 2, i32* %arrayidx.1
%indvar.next.1 = add i64 %indvar.1, 1
%exitcond.1 = icmp ne i64 %indvar.next.1, 100
br i1 %exitcond.1, label %S1, label %exit.1
exit.1:
br label %S2
S2:
%indvar.2 = phi i64 [ 0, %exit.1 ], [ %indvar.next.2, %S2 ]
%arrayidx.2 = getelementptr [200 x i32], [200 x i32]* %A, i64 0, i64 %indvar.2
store i32 5, i32* %arrayidx.2
%indvar.next.2 = add i64 %indvar.2, 1
%exitcond.2 = icmp ne i64 %indvar.next.2, 10
br i1 %exitcond.2, label %S2, label %exit.2
exit.2:
br label %S3
S3:
%indvar.3 = phi i64 [ 0, %exit.2 ], [ %indvar.next.3, %S3 ]
%arrayidx.3 = getelementptr [200 x i32], [200 x i32]* %A, i64 0, i64 %indvar.3
store i32 7, i32* %arrayidx.3
%indvar.next.3 = add i64 %indvar.3, 1
%exitcond.3 = icmp ne i64 %indvar.next.3, 200
br i1 %exitcond.3, label %S3 , label %exit.3
exit.3:
ret void
}
; CHECK: for (int c0 = 0; c0 <= 199; c0 += 1) {
; CHECK: if (c0 <= 99) {
; CHECK: Stmt_S1(c0);
; CHECK: if (c0 <= 9)
; CHECK: Stmt_S2(c0);
; CHECK: }
; CHECK: Stmt_S3(c0);
; CHECK: }
; TIMEOUT: for (int c0 = 0; c0 <= 99; c0 += 1)
; TIMEOUT: Stmt_S1(c0);
; TIMEOUT: for (int c0 = 0; c0 <= 9; c0 += 1)
; TIMEOUT: Stmt_S2(c0);
; TIMEOUT: for (int c0 = 0; c0 <= 199; c0 += 1)
; TIMEOUT: Stmt_S3(c0);

View File

@@ -0,0 +1,234 @@
; RUN: opt %loadPolly -analyze -polly-process-unprofitable -polly-remarks-minimal \
; RUN: -polly-opt-isl -polly-pattern-matching-based-opts=true \
; RUN: -polly-target-throughput-vector-fma=1 \
; RUN: -polly-target-latency-vector-fma=1 \
; RUN: -polly-ast -polly-target-vector-register-bitwidth=4096 \
; RUN: -polly-target-1st-cache-level-associativity=3 < %s | FileCheck %s
;
; /* Test that Polly does not crash due to configurations that can lead to
; incorrect tile size computations.
; The parameters are setup such that Car in `getMacroKernelParams`
; is evaluated to 0. */
;
; static const int N = 3000;
;
; void f(int A[N][N], int B[N][N], int C[N][N]) {
; for (int i = 0; i < N; i++) {
; for (int j = 0; j < N; j++) {
; A[i][j] = 0;
; for (int k = 0; k < N; k++) {
; A[i][j] += B[i][k] * C[k][j];
; }
; }
; }
; }
;
; CHECK: // 1st level tiling - Tiles
; CHECK-NEXT: for (int c0 = 0; c0 <= 93; c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= 93; c1 += 1) {
; CHECK-NEXT: // 1st level tiling - Points
; CHECK-NEXT: for (int c2 = 0; c2 <= min(31, -32 * c0 + 2999); c2 += 1)
; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, -32 * c1 + 2999); c3 += 1)
; CHECK-NEXT: Stmt_for_body3(32 * c0 + c2, 32 * c1 + c3);
; CHECK-NEXT: }
; CHECK-NEXT: // Inter iteration alias-free
; CHECK-NEXT: // Register tiling - Tiles
; CHECK-NEXT: for (int c0 = 0; c0 <= 23; c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= 2999; c1 += 1)
; CHECK-NEXT: for (int c2 = 0; c2 <= 2999; c2 += 1) {
; CHECK-NEXT: // Register tiling - Points
; CHECK-NEXT: {
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 1, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 2, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 3, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 4, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 5, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 6, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 7, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 8, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 9, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 10, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 11, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 12, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 13, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 14, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 15, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 16, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 17, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 18, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 19, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 20, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 21, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 22, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 23, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 24, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 25, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 26, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 27, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 28, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 29, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 30, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 31, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 32, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 33, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 34, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 35, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 36, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 37, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 38, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 39, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 40, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 41, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 42, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 43, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 44, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 45, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 46, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 47, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 48, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 49, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 50, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 51, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 52, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 53, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 54, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 55, c2);
; CHECK-NEXT: if (c0 <= 22) {
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 56, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 57, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 58, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 59, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 60, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 61, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 62, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 63, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 64, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 65, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 66, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 67, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 68, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 69, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 70, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 71, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 72, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 73, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 74, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 75, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 76, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 77, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 78, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 79, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 80, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 81, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 82, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 83, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 84, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 85, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 86, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 87, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 88, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 89, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 90, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 91, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 92, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 93, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 94, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 95, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 96, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 97, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 98, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 99, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 100, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 101, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 102, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 103, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 104, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 105, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 106, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 107, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 108, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 109, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 110, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 111, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 112, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 113, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 114, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 115, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 116, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 117, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 118, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 119, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 120, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 121, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 122, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 123, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 124, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 125, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 126, c2);
; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 127, c2);
; CHECK-NEXT: }
; CHECK-NEXT: }
; CHECK-NEXT: }
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define void @f([3000 x i32]* %A, [3000 x i32]* %B, [3000 x i32]* %C) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc24, %entry
%indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc24 ], [ 0, %entry ]
%exitcond6 = icmp ne i64 %indvars.iv4, 3000
br i1 %exitcond6, label %for.body, label %for.end26
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc21, %for.body
%indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc21 ], [ 0, %for.body ]
%exitcond3 = icmp ne i64 %indvars.iv1, 3000
br i1 %exitcond3, label %for.body3, label %for.end23
for.body3: ; preds = %for.cond1
%arrayidx5 = getelementptr inbounds [3000 x i32], [3000 x i32]* %A, i64 %indvars.iv4, i64 %indvars.iv1
store i32 0, i32* %arrayidx5, align 4
br label %for.cond6
for.cond6: ; preds = %for.inc, %for.body3
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body3 ]
%exitcond = icmp ne i64 %indvars.iv, 3000
br i1 %exitcond, label %for.body8, label %for.end
for.body8: ; preds = %for.cond6
%arrayidx12 = getelementptr inbounds [3000 x i32], [3000 x i32]* %B, i64 %indvars.iv4, i64 %indvars.iv
%tmp = load i32, i32* %arrayidx12, align 4
%arrayidx16 = getelementptr inbounds [3000 x i32], [3000 x i32]* %C, i64 %indvars.iv, i64 %indvars.iv1
%tmp7 = load i32, i32* %arrayidx16, align 4
%mul = mul nsw i32 %tmp, %tmp7
%arrayidx20 = getelementptr inbounds [3000 x i32], [3000 x i32]* %A, i64 %indvars.iv4, i64 %indvars.iv1
%tmp8 = load i32, i32* %arrayidx20, align 4
%add = add nsw i32 %tmp8, %mul
store i32 %add, i32* %arrayidx20, align 4
br label %for.inc
for.inc: ; preds = %for.body8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond6
for.end: ; preds = %for.cond6
br label %for.inc21
for.inc21: ; preds = %for.end
%indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
br label %for.cond1
for.end23: ; preds = %for.cond1
br label %for.inc24
for.inc24: ; preds = %for.end23
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
br label %for.cond
for.end26: ; preds = %for.cond
ret void
}

View File

@@ -0,0 +1,91 @@
; RUN: opt -S %loadPolly -polly-pattern-matching-based-opts=false \
; RUN: -polly-vectorizer=stripmine -polly-opt-isl -polly-ast -analyze \
; RUN: < %s | FileCheck %s
; CHECK: // 1st level tiling - Tiles
; CHECK-NEXT: #pragma known-parallel
; CHECK-NEXT: for (int c0 = 0; c0 <= floord(ni - 1, 32); c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nj - 1, 32); c1 += 1)
; CHECK-NEXT: for (int c2 = 0; c2 <= floord(nk - 1, 32); c2 += 1) {
; CHECK-NEXT: // 1st level tiling - Points
; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, ni - 32 * c0 - 1); c3 += 1) {
; CHECK-NEXT: for (int c4 = 0; c4 <= min(7, -8 * c1 + nj / 4 - 1); c4 += 1)
; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
; CHECK-NEXT: // SIMD
; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1)
; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);
; CHECK-NEXT: }
; CHECK-NEXT: if (32 * c1 + 31 >= nj)
; CHECK-NEXT: #pragma minimal dependence distance: 1
; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
; CHECK-NEXT: // SIMD
; CHECK-NEXT: for (int c6 = 0; c6 < nj % 4; c6 += 1)
; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, -(nj % 4) + nj + c6, 32 * c2 + c5);
; CHECK-NEXT: }
; CHECK-NEXT: }
; CHECK-NEXT: }
; Function Attrs: nounwind uwtable
define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, [1024 x double]* %C, [1024 x double]* %A, [1024 x double]* %B) #0 {
entry:
%cmp.27 = icmp sgt i32 %ni, 0
br i1 %cmp.27, label %for.cond.1.preheader.lr.ph, label %for.end.22
for.cond.1.preheader.lr.ph: ; preds = %entry
br label %for.cond.1.preheader
for.cond.1.preheader: ; preds = %for.cond.1.preheader.lr.ph, %for.inc.20
%indvars.iv33 = phi i64 [ 0, %for.cond.1.preheader.lr.ph ], [ %indvars.iv.next34, %for.inc.20 ]
%cmp2.25 = icmp sgt i32 %nj, 0
br i1 %cmp2.25, label %for.cond.4.preheader.lr.ph, label %for.inc.20
for.cond.4.preheader.lr.ph: ; preds = %for.cond.1.preheader
br label %for.cond.4.preheader
for.cond.4.preheader: ; preds = %for.cond.4.preheader.lr.ph, %for.inc.17
%indvars.iv29 = phi i64 [ 0, %for.cond.4.preheader.lr.ph ], [ %indvars.iv.next30, %for.inc.17 ]
%cmp5.23 = icmp sgt i32 %nk, 0
br i1 %cmp5.23, label %for.body.6.lr.ph, label %for.inc.17
for.body.6.lr.ph: ; preds = %for.cond.4.preheader
br label %for.body.6
for.body.6: ; preds = %for.body.6.lr.ph, %for.body.6
%indvars.iv = phi i64 [ 0, %for.body.6.lr.ph ], [ %indvars.iv.next, %for.body.6 ]
%arrayidx8 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv33, i64 %indvars.iv
%0 = load double, double* %arrayidx8, align 8
%arrayidx12 = getelementptr inbounds [1024 x double], [1024 x double]* %B, i64 %indvars.iv, i64 %indvars.iv29
%1 = load double, double* %arrayidx12, align 8
%mul = fmul double %0, %1
%arrayidx16 = getelementptr inbounds [1024 x double], [1024 x double]* %C, i64 %indvars.iv33, i64 %indvars.iv29
%2 = load double, double* %arrayidx16, align 8
%add = fadd double %2, %mul
store double %add, double* %arrayidx16, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, %nk
br i1 %exitcond, label %for.body.6, label %for.cond.4.for.inc.17_crit_edge
for.cond.4.for.inc.17_crit_edge: ; preds = %for.body.6
br label %for.inc.17
for.inc.17: ; preds = %for.cond.4.for.inc.17_crit_edge, %for.cond.4.preheader
%indvars.iv.next30 = add nuw nsw i64 %indvars.iv29, 1
%lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
%exitcond32 = icmp ne i32 %lftr.wideiv31, %nj
br i1 %exitcond32, label %for.cond.4.preheader, label %for.cond.1.for.inc.20_crit_edge
for.cond.1.for.inc.20_crit_edge: ; preds = %for.inc.17
br label %for.inc.20
for.inc.20: ; preds = %for.cond.1.for.inc.20_crit_edge, %for.cond.1.preheader
%indvars.iv.next34 = add nuw nsw i64 %indvars.iv33, 1
%lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32
%exitcond36 = icmp ne i32 %lftr.wideiv35, %ni
br i1 %exitcond36, label %for.cond.1.preheader, label %for.cond.for.end.22_crit_edge
for.cond.for.end.22_crit_edge: ; preds = %for.inc.20
br label %for.end.22
for.end.22: ; preds = %for.cond.for.end.22_crit_edge, %entry
ret void
}

View File

@@ -0,0 +1,55 @@
{
"arrays" : [
{
"name" : "MemRef_C1",
"sizes" : [ "*" ],
"type" : "double"
},
{
"name" : "MemRef_A",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_B",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_C",
"sizes" : [ "*", "1024" ],
"type" : "double"
}
],
"context" : "{ : }",
"name" : "%for.body---%for.end24",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C1[0] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> [i0, i1, i2] }"
}
]
}

View File

@@ -0,0 +1,55 @@
{
"arrays" : [
{
"name" : "MemRef_C1",
"sizes" : [ "*" ],
"type" : "double"
},
{
"name" : "MemRef_A",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_B",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_C",
"sizes" : [ "*", "1024" ],
"type" : "double"
}
],
"context" : "{ : }",
"name" : "%for.body---%for.end24",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C1[0] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> [i0, i1, i2] }"
}
]
}

View File

@@ -0,0 +1,46 @@
{
"arrays" : [
{
"name" : "MemRef_B",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_C",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "New_MemRef_A",
"sizes" : [ "1024", "1024" ],
"type" : "double"
}
],
"context" : "{ : }",
"name" : "%for.cond1.preheader---%for.end18",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> New_MemRef_A[i0, i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> [i0, i1, i2] }"
}
]
}

View File

@@ -0,0 +1,40 @@
; RUN: opt %loadPolly -polly-opt-isl -analyze \
; RUN: -polly-ast -polly-tile-sizes=1,64 < %s | FileCheck %s
; CHECK: for (int c0 = 0; c0 <= 1023; c0 += 1)
; CHECK: for (int c1 = 0; c1 <= 7; c1 += 1)
; CHECK: for (int c3 = 0; c3 <= 63; c3 += 1)
; CHECK: Stmt_for_body3(c0, 64 * c1 + c3);
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
; Function Attrs: nounwind
define void @line([512 x i32]* %A) {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body3.lr.ph
for.body3.lr.ph: ; preds = %for.inc5, %entry.split
%i.0 = phi i32 [ 0, %entry.split ], [ %inc6, %for.inc5 ]
br label %for.body3
for.body3: ; preds = %for.body3.lr.ph, %for.body3
%j.0 = phi i32 [ 0, %for.body3.lr.ph ], [ %inc, %for.body3 ]
%mul = mul nsw i32 %j.0, %i.0
%rem = srem i32 %mul, 42
%arrayidx4 = getelementptr inbounds [512 x i32], [512 x i32]* %A, i32 %i.0, i32 %j.0
store i32 %rem, i32* %arrayidx4, align 4
%inc = add nsw i32 %j.0, 1
%cmp2 = icmp slt i32 %inc, 512
br i1 %cmp2, label %for.body3, label %for.inc5
for.inc5: ; preds = %for.body3
%inc6 = add nsw i32 %i.0, 1
%cmp = icmp slt i32 %inc6, 1024
br i1 %cmp, label %for.body3.lr.ph, label %for.end7
for.end7: ; preds = %for.inc5
ret void
}

View File

@@ -0,0 +1,39 @@
; RUN: opt %loadPolly -polly-opt-isl -analyze -polly-ast -polly-tile-sizes=64,1 < %s | FileCheck %s
; CHECK: for (int c0 = 0; c0 <= 15; c0 += 1)
; CHECK: for (int c1 = 0; c1 <= 511; c1 += 1)
; CHECK: for (int c2 = 0; c2 <= 63; c2 += 1)
; CHECK: Stmt_for_body3(64 * c0 + c2, c1);
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
; Function Attrs: nounwind
define void @line([512 x i32]* %A) {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body3.lr.ph
for.body3.lr.ph: ; preds = %for.inc5, %entry.split
%i.0 = phi i32 [ 0, %entry.split ], [ %inc6, %for.inc5 ]
br label %for.body3
for.body3: ; preds = %for.body3.lr.ph, %for.body3
%j.0 = phi i32 [ 0, %for.body3.lr.ph ], [ %inc, %for.body3 ]
%mul = mul nsw i32 %j.0, %i.0
%rem = srem i32 %mul, 42
%arrayidx4 = getelementptr inbounds [512 x i32], [512 x i32]* %A, i32 %i.0, i32 %j.0
store i32 %rem, i32* %arrayidx4, align 4
%inc = add nsw i32 %j.0, 1
%cmp2 = icmp slt i32 %inc, 512
br i1 %cmp2, label %for.body3, label %for.inc5
for.inc5: ; preds = %for.body3
%inc6 = add nsw i32 %i.0, 1
%cmp = icmp slt i32 %inc6, 1024
br i1 %cmp, label %for.body3.lr.ph, label %for.end7
for.end7: ; preds = %for.inc5
ret void
}

View File

@@ -0,0 +1,101 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true \
; RUN: -polly-target-throughput-vector-fma=1 \
; RUN: -polly-target-latency-vector-fma=8 \
; RUN: -polly-target-1st-cache-level-associativity=8 \
; RUN: -polly-target-2nd-cache-level-associativity=8 \
; RUN: -polly-target-1st-cache-level-size=32768 \
; RUN: -polly-target-2nd-cache-level-size=262144 \
; RUN: -polly-optimized-scops \
; RUN: -polly-target-vector-register-bitwidth=256 \
; RUN: -disable-output < %s 2>&1 | FileCheck %s
;
; /* C := alpha*A*B + beta*C */
; for (i = 0; i < _PB_NI; i++)
; for (j = 0; j < _PB_NJ; j++)
; {
; C[i][j] *= beta;
; for (k = 0; k < _PB_NK; ++k)
; C[i][j] += alpha * A[i][k] * B[k][j];
; }
;
; CHECK: double Packed_B[ { [] -> [(256)] } ][ { [] -> [(256)] } ][ { [] -> [(8)] } ];
; CHECK-NEXT: double Packed_A[ { [] -> [(24)] } ][ { [] -> [(256)] } ][ { [] -> [(4)] } ]; // Element size 8
;
; CHECK: { Stmt_Copy_0[i0, i1, i2] -> MemRef_arg6[i0, i2] };
; CHECK-NEXT: new: { Stmt_Copy_0[i0, i1, i2] -> Packed_A[o0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 96*floor((-i0 + 4o0 + o2)/96) = -i0 + 4o0 + o2 and 0 <= o1 <= 255 and o2 >= 0 and -4o0 <= o2 <= 95 - 4o0 and o2 <= 3 };
;
; CHECK: { Stmt_Copy_0[i0, i1, i2] -> MemRef_arg7[i2, i1] };
; CHECK-NEXT: new: { Stmt_Copy_0[i0, i1, i2] -> Packed_B[o0, o1, i1 - 8o0] : 256*floor((-i2 + o1)/256) = -i2 + o1 and -7 + i1 <= 8o0 <= i1 and 0 <= o1 <= 255 };
;
; CHECK: CopyStmt_0
; CHECK-NEXT: Domain :=
; CHECK-NEXT: { CopyStmt_0[i0, i1, i2] : 0 <= i0 <= 1055 and 0 <= i1 <= 1055 and 0 <= i2 <= 1023 };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: ;
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: ;
; CHECK-NEXT: new: { CopyStmt_0[i0, i1, i2] -> Packed_B[o0, o1, i1 - 8o0] : 256*floor((-i2 + o1)/256) = -i2 + o1 and -7 + i1 <= 8o0 <= i1 and 0 <= o1 <= 255 };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: ;
; CHECK-NEXT: new: { CopyStmt_0[i0, i1, i2] -> MemRef_arg7[i2, i1] };
; CHECK-NEXT: CopyStmt_1
; CHECK-NEXT: Domain :=
; CHECK-NEXT: { CopyStmt_1[i0, i1, i2] : 0 <= i0 <= 1055 and 0 <= i1 <= 1055 and 0 <= i2 <= 1023 };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: ;
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: ;
; CHECK-NEXT: new: { CopyStmt_1[i0, i1, i2] -> Packed_A[o0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 96*floor((-i0 + 4o0 + o2)/96) = -i0 + 4o0 + o2 and 0 <= o1 <= 255 and o2 >= 0 and -4o0 <= o2 <= 95 - 4o0 and o2 <= 3 };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: ;
; CHECK-NEXT: new: { CopyStmt_1[i0, i1, i2] -> MemRef_arg6[i0, i2] };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1024 x double]* %arg6, [1056 x double]* %arg7) #0 {
bb:
br label %bb8
bb8: ; preds = %bb29, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ]
br label %bb9
bb9: ; preds = %bb26, %bb8
%tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ]
%tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10
%tmp12 = load double, double* %tmp11, align 8
%tmp13 = fmul double %tmp12, %arg4
store double %tmp13, double* %tmp11, align 8
br label %Copy_0
Copy_0: ; preds = %Copy_0, %bb9
%tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ]
%tmp16 = getelementptr inbounds [1024 x double], [1024 x double]* %arg6, i64 %tmp, i64 %tmp15
%tmp17 = load double, double* %tmp16, align 8
%tmp18 = fmul double %tmp17, %arg3
%tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10
%tmp20 = load double, double* %tmp19, align 8
%tmp21 = fmul double %tmp18, %tmp20
%tmp22 = load double, double* %tmp11, align 8
%tmp23 = fadd double %tmp22, %tmp21
store double %tmp23, double* %tmp11, align 8
%tmp24 = add nuw nsw i64 %tmp15, 1
%tmp25 = icmp ne i64 %tmp24, 1024
br i1 %tmp25, label %Copy_0, label %bb26
bb26: ; preds = %Copy_0
%tmp27 = add nuw nsw i64 %tmp10, 1
%tmp28 = icmp ne i64 %tmp27, 1056
br i1 %tmp28, label %bb9, label %bb29
bb29: ; preds = %bb26
%tmp30 = add nuw nsw i64 %tmp, 1
%tmp31 = icmp ne i64 %tmp30, 1056
br i1 %tmp31, label %bb8, label %bb32
bb32: ; preds = %bb29
ret void
}
attributes #0 = { nounwind uwtable "target-cpu"="x86-64" "target-features"="+aes,+avx,+cmov,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" }

View File

@@ -0,0 +1,134 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true \
; RUN: -polly-target-throughput-vector-fma=1 \
; RUN: -polly-target-latency-vector-fma=8 \
; RUN: -polly-target-1st-cache-level-associativity=8 \
; RUN: -polly-target-2nd-cache-level-associativity=8 \
; RUN: -polly-target-1st-cache-level-size=32768 \
; RUN: -polly-target-2nd-cache-level-size=262144 -polly-ast \
; RUN: -polly-target-vector-register-bitwidth=256 \
; RUN: -analyze < %s | FileCheck %s
;
; /* C := alpha*A*B + beta*C */
; /* _PB_NK % Kc != 0 */
; for (i = 0; i < _PB_NI; i++)
; for (j = 0; j < _PB_NJ; j++)
; {
; C[i][j] *= beta;
; for (k = 0; k < _PB_NK; ++k)
; C[i][j] += alpha * A[i][k] * B[k][j];
; }
;
; CHECK: {
; CHECK-NEXT: // 1st level tiling - Tiles
; CHECK-NEXT: for (int c0 = 0; c0 <= 32; c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= 32; c1 += 1) {
; CHECK-NEXT: // 1st level tiling - Points
; CHECK-NEXT: for (int c2 = 0; c2 <= 31; c2 += 1)
; CHECK-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
; CHECK-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
; CHECK-NEXT: }
; CHECK-NEXT: // Inter iteration alias-free
; CHECK-NEXT: // 1st level tiling - Tiles
; CHECK-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) {
; CHECK-NEXT: for (int c3 = 0; c3 <= 1055; c3 += 1)
; CHECK-NEXT: for (int c4 = 256 * c1; c4 <= min(1022, 256 * c1 + 255); c4 += 1)
; CHECK-NEXT: CopyStmt_0(0, c3, c4);
; CHECK-NEXT: for (int c2 = 0; c2 <= 10; c2 += 1) {
; CHECK-NEXT: for (int c3 = 96 * c2; c3 <= 96 * c2 + 95; c3 += 1)
; CHECK-NEXT: for (int c5 = 256 * c1; c5 <= min(1022, 256 * c1 + 255); c5 += 1)
; CHECK-NEXT: CopyStmt_1(c3, 0, c5);
; CHECK-NEXT: // 1st level tiling - Points
; CHECK-NEXT: // Register tiling - Tiles
; CHECK-NEXT: for (int c3 = 0; c3 <= 131; c3 += 1)
; CHECK-NEXT: for (int c4 = 0; c4 <= 23; c4 += 1)
; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, -256 * c1 + 1022); c5 += 1) {
; CHECK-NEXT: // Loop Vectorizer Disabled
; CHECK-NEXT: // Register tiling - Points
; CHECK-NEXT: {
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 1, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 2, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 3, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 4, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 5, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 6, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 7, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 1, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 2, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 3, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 4, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 5, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 6, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 7, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 1, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 2, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 3, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 4, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 5, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 6, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 7, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 1, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 2, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 3, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 4, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 5, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 6, 256 * c1 + c5);
; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 7, 256 * c1 + c5);
; CHECK-NEXT: }
; CHECK-NEXT: }
; CHECK-NEXT: }
; CHECK-NEXT: }
; CHECK-NEXT: }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1023 x double]* %arg6, [1056 x double]* %arg7) #0 {
bb:
br label %bb8
bb8: ; preds = %bb29, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ]
br label %bb9
bb9: ; preds = %bb26, %bb8
%tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ]
%tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10
%tmp12 = load double, double* %tmp11, align 8
%tmp13 = fmul double %tmp12, %arg4
store double %tmp13, double* %tmp11, align 8
br label %Copy_0
Copy_0: ; preds = %Copy_0, %bb9
%tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ]
%tmp16 = getelementptr inbounds [1023 x double], [1023 x double]* %arg6, i64 %tmp, i64 %tmp15
%tmp17 = load double, double* %tmp16, align 8
%tmp18 = fmul double %tmp17, %arg3
%tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10
%tmp20 = load double, double* %tmp19, align 8
%tmp21 = fmul double %tmp18, %tmp20
%tmp22 = load double, double* %tmp11, align 8
%tmp23 = fadd double %tmp22, %tmp21
store double %tmp23, double* %tmp11, align 8
%tmp24 = add nuw nsw i64 %tmp15, 1
%tmp25 = icmp ne i64 %tmp24, 1023
br i1 %tmp25, label %Copy_0, label %bb26
bb26: ; preds = %Copy_0
%tmp27 = add nuw nsw i64 %tmp10, 1
%tmp28 = icmp ne i64 %tmp27, 1056
br i1 %tmp28, label %bb9, label %bb29
bb29: ; preds = %bb26
%tmp30 = add nuw nsw i64 %tmp, 1
%tmp31 = icmp ne i64 %tmp30, 1056
br i1 %tmp31, label %bb8, label %bb32
bb32: ; preds = %bb29
ret void
}
attributes #0 = { nounwind uwtable "target-cpu"="x86-64" "target-features"="+aes,+avx,+cmov,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" }

View File

@@ -0,0 +1,107 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-ast -analyze < %s | FileCheck %s
;
; void jacobi1d(long T, long N, float *A, float *B) {
; long t, i, j;
; for (t = 0; t < T; t++) {
; for (i = 1; i < N - 1; i++)
; B[i] = 0.33333 * (A[i - 1] + A[i] + A[i + 1]);
; for (j = 1; j < N - 1; j++)
; A[j] = 0.33333 * (B[i - 1] + B[i] + B[i + 1]);
; }
; }
; Verify that we do not tile bands that have just a single dimension.
; CHECK: for (int c0 = 0; c0 < T; c0 += 1) {
; CHECK: for (int c1 = 0; c1 < N - 2; c1 += 1)
; CHECK: Stmt_for_body3(c0, c1);
; CHECK: for (int c1 = 0; c1 < N - 2; c1 += 1)
; CHECK: Stmt_for_body15(c0, c1);
; CHECK: }
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @jacobi1d(i64 %T, i64 %N, float* %A, float* %B) {
entry:
%tmp = add i64 %N, -1
%tmp1 = icmp sgt i64 %tmp, 1
%smax = select i1 %tmp1, i64 %tmp, i64 1
br label %for.cond
for.cond: ; preds = %for.inc30, %entry
%t.0 = phi i64 [ 0, %entry ], [ %inc31, %for.inc30 ]
%cmp = icmp slt i64 %t.0, %T
br i1 %cmp, label %for.body, label %for.end32
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%i.0 = phi i64 [ 1, %for.body ], [ %inc, %for.inc ]
%sub = add nsw i64 %N, -1
%cmp2 = icmp slt i64 %i.0, %sub
br i1 %cmp2, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%sub4 = add nsw i64 %i.0, -1
%arrayidx = getelementptr inbounds float, float* %A, i64 %sub4
%tmp2 = load float, float* %arrayidx, align 4
%arrayidx5 = getelementptr inbounds float, float* %A, i64 %i.0
%tmp3 = load float, float* %arrayidx5, align 4
%add = fadd float %tmp2, %tmp3
%add6 = add nuw nsw i64 %i.0, 1
%arrayidx7 = getelementptr inbounds float, float* %A, i64 %add6
%tmp4 = load float, float* %arrayidx7, align 4
%add8 = fadd float %add, %tmp4
%conv = fpext float %add8 to double
%mul = fmul double %conv, 3.333300e-01
%conv9 = fptrunc double %mul to float
%arrayidx10 = getelementptr inbounds float, float* %B, i64 %i.0
store float %conv9, float* %arrayidx10, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nuw nsw i64 %i.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.cond11
for.cond11: ; preds = %for.inc27, %for.end
%j.0 = phi i64 [ 1, %for.end ], [ %inc28, %for.inc27 ]
%sub12 = add nsw i64 %N, -1
%cmp13 = icmp slt i64 %j.0, %sub12
br i1 %cmp13, label %for.body15, label %for.end29
for.body15: ; preds = %for.cond11
%sub16 = add nsw i64 %smax, -1
%arrayidx17 = getelementptr inbounds float, float* %B, i64 %sub16
%tmp5 = load float, float* %arrayidx17, align 4
%arrayidx18 = getelementptr inbounds float, float* %B, i64 %smax
%tmp6 = load float, float* %arrayidx18, align 4
%add19 = fadd float %tmp5, %tmp6
%add20 = add nsw i64 %smax, 1
%arrayidx21 = getelementptr inbounds float, float* %B, i64 %add20
%tmp7 = load float, float* %arrayidx21, align 4
%add22 = fadd float %add19, %tmp7
%conv23 = fpext float %add22 to double
%mul24 = fmul double %conv23, 3.333300e-01
%conv25 = fptrunc double %mul24 to float
%arrayidx26 = getelementptr inbounds float, float* %A, i64 %j.0
store float %conv25, float* %arrayidx26, align 4
br label %for.inc27
for.inc27: ; preds = %for.body15
%inc28 = add nuw nsw i64 %j.0, 1
br label %for.cond11
for.end29: ; preds = %for.cond11
br label %for.inc30
for.inc30: ; preds = %for.end29
%inc31 = add nuw nsw i64 %t.0, 1
br label %for.cond
for.end32: ; preds = %for.cond
ret void
}

View File

@@ -0,0 +1,69 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-ast -polly-tiling=0 -polly-parallel -polly-opt-outer-coincidence=no -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-opt-isl -polly-ast -polly-tiling=0 -polly-parallel -polly-opt-outer-coincidence=yes -analyze < %s | FileCheck %s --check-prefix=OUTER
; By skewing, the diagonal can be made parallel. ISL does this when the Check
; the 'outer_coincidence' option is enabled.
;
; void func(int m, int n, float A[static const restrict m][n]) {
; for (int i = 1; i < m; i+=1)
; for (int j = 1; j < n; j+=1)
; A[i][j] = A[i-1][j] + A[i][j-1];
;}
define void @func(i64 %m, i64 %n, float* noalias nonnull %A) #0 {
entry:
br label %for.cond
for.cond: ; preds = %for.inc11, %entry
%i.0 = phi i64 [ 1, %entry ], [ %add12, %for.inc11 ]
%cmp = icmp slt i64 %i.0, %m
br i1 %cmp, label %for.cond1.preheader, label %for.end13
for.cond1.preheader: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.cond1.preheader, %for.body3
%j.0 = phi i64 [ %add10, %for.body3 ], [ 1, %for.cond1.preheader ]
%cmp2 = icmp slt i64 %j.0, %n
br i1 %cmp2, label %for.body3, label %for.inc11
for.body3: ; preds = %for.cond1
%sub = add nsw i64 %i.0, -1
%tmp = mul nsw i64 %sub, %n
%arrayidx = getelementptr inbounds float, float* %A, i64 %tmp
%arrayidx4 = getelementptr inbounds float, float* %arrayidx, i64 %j.0
%tmp13 = load float, float* %arrayidx4, align 4
%sub5 = add nsw i64 %j.0, -1
%tmp14 = mul nsw i64 %i.0, %n
%arrayidx6 = getelementptr inbounds float, float* %A, i64 %tmp14
%arrayidx7 = getelementptr inbounds float, float* %arrayidx6, i64 %sub5
%tmp15 = load float, float* %arrayidx7, align 4
%add = fadd float %tmp13, %tmp15
%tmp16 = mul nsw i64 %i.0, %n
%arrayidx8 = getelementptr inbounds float, float* %A, i64 %tmp16
%arrayidx9 = getelementptr inbounds float, float* %arrayidx8, i64 %j.0
store float %add, float* %arrayidx9, align 4
%add10 = add nuw nsw i64 %j.0, 1
br label %for.cond1
for.inc11: ; preds = %for.cond1
%add12 = add nuw nsw i64 %i.0, 1
br label %for.cond
for.end13: ; preds = %for.cond
ret void
}
; CHECK: #pragma minimal dependence distance: 1
; CHECK-NEXT: for (int c0 = 0; c0 < m - 1; c0 += 1)
; CHECK-NEXT: #pragma minimal dependence distance: 1
; CHECK-NEXT: for (int c1 = 0; c1 < n - 1; c1 += 1)
; CHECK-NEXT: Stmt_for_body3(c0, c1);
; OUTER: #pragma minimal dependence distance: 1
; OUTER-NEXT: for (int c0 = 0; c0 < m + n - 3; c0 += 1)
; OUTER-NEXT: #pragma simd
; OUTER-NEXT: #pragma known-parallel
; OUTER-NEXT: for (int c1 = max(0, -n + c0 + 2); c1 <= min(m - 2, c0); c1 += 1)
; OUTER-NEXT: Stmt_for_body3(c1, c0 - c1);

View File

@@ -0,0 +1,76 @@
; RUN: opt %loadPolly \
; RUN: -polly-pattern-matching-based-opts=true \
; RUN: -polly-optree -polly-delicm -polly-simplify \
; RUN: -polly-opt-isl -debug < %s 2>&1 \
; RUN: | FileCheck %s
; REQUIRES: asserts
; Check that the pattern matching detects the matrix multiplication pattern
; after a full run of -polly-optree and -polly-delicm, where the write access
; is not through the original memory access, but trough a PHI node that was
; delicmed. This test covers the polybench 2mm and 3mm cases.
;
; CHECK: The matrix multiplication pattern was detected
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind uwtable
define void @kernel_2mm(i32 %ni, i32 %nj, i32 %nk, i32 %nl, double %alpha, double %beta, [1800 x double]* nocapture %tmp, [2200 x double]* nocapture readonly %A, [1800 x double]* nocapture readonly %B, [2400 x double]* nocapture readnone %C, [2400 x double]* nocapture readnone %D) local_unnamed_addr #0 {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %for.inc25, %entry.split
%indvars.iv50 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next51, %for.inc25 ]
br label %for.body3
for.body3: ; preds = %for.inc22, %for.body
%indvars.iv46 = phi i64 [ 0, %for.body ], [ %indvars.iv.next47, %for.inc22 ]
%arrayidx5 = getelementptr inbounds [1800 x double], [1800 x double]* %tmp, i64 %indvars.iv50, i64 %indvars.iv46
store double 0.000000e+00, double* %arrayidx5, align 8, !tbaa !2
br label %for.body8
for.body8: ; preds = %for.body8, %for.body3
%0 = phi double [ 0.000000e+00, %for.body3 ], [ %add, %for.body8 ]
%indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]
%arrayidx12 = getelementptr inbounds [2200 x double], [2200 x double]* %A, i64 %indvars.iv50, i64 %indvars.iv
%1 = load double, double* %arrayidx12, align 8, !tbaa !2
%mul = fmul double %1, %alpha
%arrayidx16 = getelementptr inbounds [1800 x double], [1800 x double]* %B, i64 %indvars.iv, i64 %indvars.iv46
%2 = load double, double* %arrayidx16, align 8, !tbaa !2
%mul17 = fmul double %mul, %2
%add = fadd double %0, %mul17
store double %add, double* %arrayidx5, align 8, !tbaa !2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 2200
br i1 %exitcond, label %for.inc22, label %for.body8
for.inc22: ; preds = %for.body8
%indvars.iv.next47 = add nuw nsw i64 %indvars.iv46, 1
%exitcond48 = icmp eq i64 %indvars.iv.next47, 1800
br i1 %exitcond48, label %for.inc25, label %for.body3
for.inc25: ; preds = %for.inc22
%indvars.iv.next51 = add nuw nsw i64 %indvars.iv50, 1
%exitcond52 = icmp eq i64 %indvars.iv.next51, 1600
br i1 %exitcond52, label %for.end27, label %for.body
for.end27: ; preds = %for.inc25
ret void
}
attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-clflushopt,-clwb,-clzero,-fma4,-lwp,-mwaitx,-pku,-prefetchwt1,-prfchw,-rdseed,-rtm,-sgx,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 6.0.0 (trunk 309912) (llvm/trunk 309933)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"double", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}

View File

@@ -0,0 +1,66 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=false \
; RUN: -debug < %s 2>&1| FileCheck %s
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -debug < %s 2>&1| FileCheck %s --check-prefix=PATTERN-MATCHING-OPTS
; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -stats -disable-output < %s 2>&1| FileCheck %s --check-prefix=STATS -match-full-lines
; REQUIRES: asserts
;
; /* C := alpha*A*B + beta*C */
; for (i = 0; i < _PB_NI; i++)
; for (j = 0; j < _PB_NJ; j++)
; {
; C[i][j] *= beta;
; for (k = 0; k < _PB_NK; ++k)
; C[i][j] += alpha * A[i][k] * B[k][j];
; }
;
; CHECK-NOT: The matrix multiplication pattern was detected
; PATTERN-MATCHING-OPTS: The matrix multiplication pattern was detected
; STATS: 1 polly-opt-isl - Number of matrix multiplication patterns detected and optimized
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1024 x double]* %arg6, [1056 x double]* %arg7) #0 {
bb:
br label %bb8
bb8: ; preds = %bb29, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ]
br label %bb9
bb9: ; preds = %bb26, %bb8
%tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ]
%tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10
%tmp12 = load double, double* %tmp11, align 8
%tmp13 = fmul double %tmp12, %arg4
store double %tmp13, double* %tmp11, align 8
br label %Copy_0
Copy_0: ; preds = %Copy_0, %bb9
%tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ]
%tmp16 = getelementptr inbounds [1024 x double], [1024 x double]* %arg6, i64 %tmp, i64 %tmp15
%tmp17 = load double, double* %tmp16, align 8
%tmp18 = fmul double %tmp17, %arg3
%tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10
%tmp20 = load double, double* %tmp19, align 8
%tmp21 = fmul double %tmp18, %tmp20
%tmp22 = load double, double* %tmp11, align 8
%tmp23 = fadd double %tmp22, %tmp21
store double %tmp23, double* %tmp11, align 8
%tmp24 = add nuw nsw i64 %tmp15, 1
%tmp25 = icmp ne i64 %tmp24, 1024
br i1 %tmp25, label %Copy_0, label %bb26
bb26: ; preds = %Copy_0
%tmp27 = add nuw nsw i64 %tmp10, 1
%tmp28 = icmp ne i64 %tmp27, 1056
br i1 %tmp28, label %bb9, label %bb29
bb29: ; preds = %bb26
%tmp30 = add nuw nsw i64 %tmp, 1
%tmp31 = icmp ne i64 %tmp30, 1056
br i1 %tmp31, label %bb8, label %bb32
bb32: ; preds = %bb29
ret void
}

View File

@@ -0,0 +1,69 @@
; RUN: opt %loadPolly -polly-opt-isl -polly-invariant-load-hoisting=true \
; RUN: -polly-pattern-matching-based-opts=true \
; RUN: -polly-target-throughput-vector-fma=1 \
; RUN: -polly-target-latency-vector-fma=1 \
; RUN: -polly-codegen -polly-target-1st-cache-level-associativity=8 \
; RUN: -polly-target-2nd-cache-level-associativity=8 \
; RUN: -polly-target-1st-cache-level-size=32768 \
; RUN: -polly-target-vector-register-bitwidth=256 \
; RUN: -polly-target-2nd-cache-level-size=262144 -S < %s \
; RUN: | FileCheck %s
;
; This test case checks whether Polly generates second level alias metadata
; to distinguish the specific accesses in case of the ublas gemm kernel.
;
; CHECK: !11 = distinct !{!11, !0, !"second level alias metadata"}
; CHECK: !12 = distinct !{!12, !0, !"second level alias metadata"}
; CHECK: !13 = !{!3, !4, !5, !6, !11}
; CHECK: !14 = distinct !{!14, !0, !"second level alias metadata"}
; CHECK: !15 = !{!3, !4, !5, !6, !11, !12}
; CHECK: !16 = distinct !{!16, !0, !"second level alias metadata"}
; CHECK: !17 = !{!3, !4, !5, !6, !11, !12, !14}
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1024 x double]* %arg6, [1056 x double]* %arg7) {
bb:
br label %bb8
bb8: ; preds = %bb29, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ]
br label %bb9
bb9: ; preds = %bb26, %bb8
%tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ]
%tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10
%tmp12 = load double, double* %tmp11, align 8
%tmp13 = fmul double %tmp12, %arg4
store double %tmp13, double* %tmp11, align 8
br label %Copy_0
Copy_0: ; preds = %Copy_0, %bb9
%tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ]
%tmp16 = getelementptr inbounds [1024 x double], [1024 x double]* %arg6, i64 %tmp, i64 %tmp15
%tmp17 = load double, double* %tmp16, align 8
%tmp18 = fmul double %tmp17, %arg3
%tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10
%tmp20 = load double, double* %tmp19, align 8
%tmp21 = fmul double %tmp18, %tmp20
%tmp22 = load double, double* %tmp11, align 8
%tmp23 = fadd double %tmp22, %tmp21
store double %tmp23, double* %tmp11, align 8
%tmp24 = add nuw nsw i64 %tmp15, 1
%tmp25 = icmp ne i64 %tmp24, 1024
br i1 %tmp25, label %Copy_0, label %bb26
bb26: ; preds = %Copy_0
%tmp27 = add nuw nsw i64 %tmp10, 1
%tmp28 = icmp ne i64 %tmp27, 1056
br i1 %tmp28, label %bb9, label %bb29
bb29: ; preds = %bb26
%tmp30 = add nuw nsw i64 %tmp, 1
%tmp31 = icmp ne i64 %tmp30, 1056
br i1 %tmp31, label %bb8, label %bb32
bb32: ; preds = %bb29
ret void
}

View File

@@ -0,0 +1,64 @@
; RUN: opt %loadPolly -polly-import-jscop \
; RUN: -polly-import-jscop-postfix=transformed \
; RUN: -polly-pattern-matching-based-opts=true \
; RUN: -polly-target-throughput-vector-fma=1 \
; RUN: -polly-target-latency-vector-fma=8 \
; RUN: -polly-target-1st-cache-level-associativity=8 \
; RUN: -polly-target-2nd-cache-level-associativity=8 \
; RUN: -polly-target-1st-cache-level-size=32768 \
; RUN: -polly-target-vector-register-bitwidth=256 \
; RUN: -polly-target-2nd-cache-level-size=262144 \
; RUN: -polly-opt-isl -debug < %s 2>&1 \
; RUN: | FileCheck %s
; REQUIRES: asserts
;
; Check that the pattern matching detects the matrix multiplication pattern
; in case scalar memory accesses were replaced by accesses to newly created
; arrays.
;
; CHECK: The matrix multiplication pattern was detected
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, double %A, [1024 x double]* %B, [1024 x double]* %C) {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc16, %entry.split
%indvars.iv35 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next36, %for.inc16 ]
br label %for.cond4.preheader
for.cond4.preheader: ; preds = %for.inc13, %for.cond1.preheader
%indvars.iv32 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next33, %for.inc13 ]
br label %for.body6
for.body6: ; preds = %for.body6, %for.cond4.preheader
%indvars.iv = phi i64 [ 0, %for.cond4.preheader ], [ %indvars.iv.next, %for.body6 ]
%arrayidx8 = getelementptr inbounds [1024 x double], [1024 x double]* %B, i64 %indvars.iv, i64 %indvars.iv32
%tmp = load double, double* %arrayidx8, align 8
%mul = fmul double %tmp, %A
%arrayidx12 = getelementptr inbounds [1024 x double], [1024 x double]* %C, i64 %indvars.iv35, i64 %indvars.iv32
%tmp1 = load double, double* %arrayidx12, align 8
%add = fadd double %tmp1, %mul
store double %add, double* %arrayidx12, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.body6, label %for.inc13
for.inc13: ; preds = %for.body6
%indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
%exitcond34 = icmp ne i64 %indvars.iv.next33, 1024
br i1 %exitcond34, label %for.cond4.preheader, label %for.inc16
for.inc16: ; preds = %for.inc13
%indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
%exitcond37 = icmp ne i64 %indvars.iv.next36, 1024
br i1 %exitcond37, label %for.cond1.preheader, label %for.end18
for.end18: ; preds = %for.inc16
ret void
}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More