Imported Upstream version 5.18.0.167

Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2018-10-20 08:25:10 +00:00
parent e19d552987
commit b084638f15
28489 changed files with 184 additions and 3866856 deletions

View File

@ -1,14 +0,0 @@
; RUN: opt < %s -loop-unroll -disable-output
define i32 @main() {
entry:
br label %no_exit
no_exit: ; preds = %no_exit, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ] ; <i32> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp ne i32 %indvar.next, -2147483648 ; <i1> [#uses=1]
br i1 %exitcond, label %no_exit, label %loopexit
loopexit: ; preds = %no_exit
ret i32 0
}

View File

@ -1,22 +0,0 @@
; RUN: opt < %s -loop-unroll -loop-simplify -disable-output
define void @print_board() {
entry:
br label %no_exit.1
no_exit.1: ; preds = %cond_false.2, %entry
br label %no_exit.2
no_exit.2: ; preds = %no_exit.2, %no_exit.1
%indvar1 = phi i32 [ 0, %no_exit.1 ], [ %indvar.next2, %no_exit.2 ] ; <i32> [#uses=1]
%indvar.next2 = add i32 %indvar1, 1 ; <i32> [#uses=2]
%exitcond3 = icmp ne i32 %indvar.next2, 7 ; <i1> [#uses=1]
br i1 %exitcond3, label %no_exit.2, label %loopexit.2
loopexit.2: ; preds = %no_exit.2
br i1 false, label %cond_true.2, label %cond_false.2
cond_true.2: ; preds = %loopexit.2
ret void
cond_false.2: ; preds = %loopexit.2
br i1 false, label %no_exit.1, label %loopexit.1
loopexit.1: ; preds = %cond_false.2
ret void
}

View File

@ -1,16 +0,0 @@
; RUN: opt < %s -loop-unroll -S | grep bb72.2
define void @vorbis_encode_noisebias_setup() {
entry:
br label %cond_true.outer
cond_true.outer: ; preds = %bb72, %entry
%indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ] ; <i32> [#uses=1]
br label %bb72
bb72: ; preds = %cond_true.outer
%indvar.next2 = add i32 %indvar1.ph, 1 ; <i32> [#uses=2]
%exitcond3 = icmp eq i32 %indvar.next2, 3 ; <i1> [#uses=1]
br i1 %exitcond3, label %cond_true138, label %cond_true.outer
cond_true138: ; preds = %bb72
ret void
}

View File

@ -1,17 +0,0 @@
; PR 1334
; RUN: opt < %s -loop-unroll -disable-output
define void @sal__math_float_manipulator_7__math__joint_array_dcv_ops__Omultiply__3([6 x float]* %agg.result) {
entry:
%tmp282911 = zext i8 0 to i32 ; <i32> [#uses=1]
br label %cond_next
cond_next: ; preds = %cond_next, %entry
%indvar = phi i8 [ 0, %entry ], [ %indvar.next, %cond_next ] ; <i8> [#uses=1]
%indvar.next = add i8 %indvar, 1 ; <i8> [#uses=2]
%exitcond = icmp eq i8 %indvar.next, 7 ; <i1> [#uses=1]
br i1 %exitcond, label %bb27, label %cond_next
bb27: ; preds = %cond_next
%tmp282911.lcssa = phi i32 [ %tmp282911, %cond_next ] ; <i32> [#uses=0]
ret void
}

View File

@ -1,36 +0,0 @@
; RUN: opt < %s -loop-unroll -S | not grep undef
; PR1385
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "i686-apple-darwin8"
%struct.__mpz_struct = type { i32, i32, i32* }
define void @Foo(%struct.__mpz_struct* %base) {
entry:
%want = alloca [1 x %struct.__mpz_struct], align 16 ; <[1 x %struct.__mpz_struct]*> [#uses=4]
%want1 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1]
call void @__gmpz_init( %struct.__mpz_struct* %want1 )
%want27 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1]
%want3 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1]
%want2 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=2]
br label %bb
bb: ; preds = %bb, %entry
%i.01.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=1]
%want23.0 = phi %struct.__mpz_struct* [ %want27, %entry ], [ %want2, %bb ] ; <%struct.__mpz_struct*> [#uses=1]
call void @__gmpz_mul( %struct.__mpz_struct* %want23.0, %struct.__mpz_struct* %want3, %struct.__mpz_struct* %base )
%indvar.next = add i32 %i.01.0, 1 ; <i32> [#uses=2]
%exitcond = icmp ne i32 %indvar.next, 2 ; <i1> [#uses=1]
br i1 %exitcond, label %bb, label %bb10
bb10: ; preds = %bb
%want2.lcssa = phi %struct.__mpz_struct* [ %want2, %bb ] ; <%struct.__mpz_struct*> [#uses=1]
call void @__gmpz_clear( %struct.__mpz_struct* %want2.lcssa )
ret void
}
declare void @__gmpz_init(%struct.__mpz_struct*)
declare void @__gmpz_mul(%struct.__mpz_struct*, %struct.__mpz_struct*, %struct.__mpz_struct*)
declare void @__gmpz_clear(%struct.__mpz_struct*)

View File

@ -1,18 +0,0 @@
; RUN: opt < %s -loop-unroll -unroll-count=3 -S | grep bb72.2
define void @foo(i32 %trips) {
entry:
br label %cond_true.outer
cond_true.outer:
%indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]
br label %bb72
bb72:
%indvar.next2 = add i32 %indvar1.ph, 1
%exitcond3 = icmp eq i32 %indvar.next2, %trips
br i1 %exitcond3, label %cond_true138, label %cond_true.outer
cond_true138:
ret void
}

View File

@ -1,295 +0,0 @@
; RUN: opt < %s -disable-output -loop-unroll
; PR1770
; PR1947
%struct.cl_engine = type { i32, i16, i32, i8**, i8**, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
%struct.cl_limits = type { i32, i32, i32, i32, i16, i64 }
%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
%struct.cli_bm_patt = type { i8*, i32, i8*, i8*, i8, %struct.cli_bm_patt* }
%struct.cli_ctx = type { i8**, i64*, %struct.cli_matcher*, %struct.cl_engine*, %struct.cl_limits*, i32, i32, i32, i32, %struct.cli_dconf* }
%struct.cli_dconf = type { i32, i32, i32, i32, i32, i32, i32 }
%struct.cli_matcher = type { i16, i8, i32*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
declare i8* @calloc(i64, i64)
define fastcc i32 @cli_scanpe(i32 %desc, %struct.cli_ctx* %ctx) {
entry:
br i1 false, label %cond_next17, label %cond_true14
cond_true14: ; preds = %entry
ret i32 0
cond_next17: ; preds = %entry
br i1 false, label %LeafBlock, label %LeafBlock1250
LeafBlock1250: ; preds = %cond_next17
ret i32 0
LeafBlock: ; preds = %cond_next17
br i1 false, label %cond_next33, label %cond_true30
cond_true30: ; preds = %LeafBlock
ret i32 0
cond_next33: ; preds = %LeafBlock
br i1 false, label %cond_next90, label %cond_true42
cond_true42: ; preds = %cond_next33
ret i32 0
cond_next90: ; preds = %cond_next33
br i1 false, label %cond_next100, label %cond_true97
cond_true97: ; preds = %cond_next90
ret i32 0
cond_next100: ; preds = %cond_next90
br i1 false, label %cond_next109, label %cond_true106
cond_true106: ; preds = %cond_next100
ret i32 0
cond_next109: ; preds = %cond_next100
br i1 false, label %cond_false, label %cond_true118
cond_true118: ; preds = %cond_next109
ret i32 0
cond_false: ; preds = %cond_next109
br i1 false, label %NodeBlock1482, label %cond_true126
cond_true126: ; preds = %cond_false
ret i32 0
NodeBlock1482: ; preds = %cond_false
br i1 false, label %cond_next285, label %NodeBlock1480
NodeBlock1480: ; preds = %NodeBlock1482
ret i32 0
cond_next285: ; preds = %NodeBlock1482
br i1 false, label %cond_next320, label %cond_true294
cond_true294: ; preds = %cond_next285
ret i32 0
cond_next320: ; preds = %cond_next285
br i1 false, label %LeafBlock1491, label %LeafBlock1493
LeafBlock1493: ; preds = %cond_next320
ret i32 0
LeafBlock1491: ; preds = %cond_next320
br i1 false, label %cond_true400, label %cond_true378
cond_true378: ; preds = %LeafBlock1491
ret i32 1
cond_true400: ; preds = %LeafBlock1491
br i1 false, label %cond_next413, label %cond_true406
cond_true406: ; preds = %cond_true400
ret i32 0
cond_next413: ; preds = %cond_true400
br i1 false, label %cond_next429, label %cond_true424
cond_true424: ; preds = %cond_next413
ret i32 0
cond_next429: ; preds = %cond_next413
br i1 false, label %NodeBlock1557, label %NodeBlock1579
NodeBlock1579: ; preds = %cond_next429
ret i32 0
NodeBlock1557: ; preds = %cond_next429
br i1 false, label %LeafBlock1543, label %NodeBlock1555
NodeBlock1555: ; preds = %NodeBlock1557
ret i32 0
LeafBlock1543: ; preds = %NodeBlock1557
br i1 false, label %cond_next870, label %cond_next663
cond_next663: ; preds = %LeafBlock1543
ret i32 0
cond_next870: ; preds = %LeafBlock1543
br i1 false, label %cond_true1012, label %cond_true916
cond_true916: ; preds = %cond_next870
ret i32 0
cond_true1012: ; preds = %cond_next870
br i1 false, label %cond_next3849, label %cond_true2105
cond_true2105: ; preds = %cond_true1012
ret i32 0
cond_next3849: ; preds = %cond_true1012
br i1 false, label %cond_next4378, label %bb6559
bb3862: ; preds = %cond_next4385
br i1 false, label %cond_false3904, label %cond_true3876
cond_true3876: ; preds = %bb3862
ret i32 0
cond_false3904: ; preds = %bb3862
br i1 false, label %cond_next4003, label %cond_true3935
cond_true3935: ; preds = %cond_false3904
ret i32 0
cond_next4003: ; preds = %cond_false3904
br i1 false, label %cond_next5160, label %cond_next4015
cond_next4015: ; preds = %cond_next4003
ret i32 0
cond_next4378: ; preds = %cond_next3849
br i1 false, label %cond_next4385, label %bb4393
cond_next4385: ; preds = %cond_next4378
br i1 false, label %bb3862, label %bb4393
bb4393: ; preds = %cond_next4385, %cond_next4378
ret i32 0
cond_next5160: ; preds = %cond_next4003
br i1 false, label %bb5188, label %bb6559
bb5188: ; preds = %cond_next5160
br i1 false, label %cond_next5285, label %cond_true5210
cond_true5210: ; preds = %bb5188
ret i32 0
cond_next5285: ; preds = %bb5188
br i1 false, label %cond_true5302, label %cond_true5330
cond_true5302: ; preds = %cond_next5285
br i1 false, label %bb7405, label %bb7367
cond_true5330: ; preds = %cond_next5285
ret i32 0
bb6559: ; preds = %cond_next5160, %cond_next3849
ret i32 0
bb7367: ; preds = %cond_true5302
ret i32 0
bb7405: ; preds = %cond_true5302
br i1 false, label %cond_next8154, label %cond_true7410
cond_true7410: ; preds = %bb7405
ret i32 0
cond_next8154: ; preds = %bb7405
br i1 false, label %cond_true8235, label %bb9065
cond_true8235: ; preds = %cond_next8154
br i1 false, label %bb8274, label %bb8245
bb8245: ; preds = %cond_true8235
ret i32 0
bb8274: ; preds = %cond_true8235
br i1 false, label %cond_next8358, label %cond_true8295
cond_true8295: ; preds = %bb8274
ret i32 0
cond_next8358: ; preds = %bb8274
br i1 false, label %cond_next.i509, label %cond_true8371
cond_true8371: ; preds = %cond_next8358
ret i32 -123
cond_next.i509: ; preds = %cond_next8358
br i1 false, label %bb36.i, label %bb33.i
bb33.i: ; preds = %cond_next.i509
ret i32 0
bb36.i: ; preds = %cond_next.i509
br i1 false, label %cond_next54.i, label %cond_true51.i
cond_true51.i: ; preds = %bb36.i
ret i32 0
cond_next54.i: ; preds = %bb36.i
%tmp10.i.i527 = call i8* @calloc( i64 0, i64 1 ) ; <i8*> [#uses=1]
br i1 false, label %cond_next11.i.i, label %bb132.i
bb132.i: ; preds = %cond_next54.i
ret i32 0
cond_next11.i.i: ; preds = %cond_next54.i
br i1 false, label %bb32.i.i545, label %cond_true1008.critedge.i
bb32.i.i545: ; preds = %cond_next11.i.i
br i1 false, label %cond_next349.i, label %cond_true184.i
cond_true184.i: ; preds = %bb32.i.i545
ret i32 0
cond_next349.i: ; preds = %bb32.i.i545
br i1 false, label %cond_next535.i, label %cond_true1008.critedge1171.i
cond_next535.i: ; preds = %cond_next349.i
br i1 false, label %cond_next569.i, label %cond_false574.i
cond_next569.i: ; preds = %cond_next535.i
br i1 false, label %cond_next670.i, label %cond_true1008.critedge1185.i
cond_false574.i: ; preds = %cond_next535.i
ret i32 0
cond_next670.i: ; preds = %cond_next569.i
br i1 false, label %cond_true692.i, label %cond_next862.i
cond_true692.i: ; preds = %cond_next670.i
br i1 false, label %cond_false742.i, label %cond_true718.i
cond_true718.i: ; preds = %cond_true692.i
ret i32 0
cond_false742.i: ; preds = %cond_true692.i
br i1 false, label %cond_true784.i, label %cond_next9079
cond_true784.i: ; preds = %cond_next811.i, %cond_false742.i
%indvar1411.i.reg2mem.0 = phi i8 [ %indvar.next1412.i, %cond_next811.i ], [ 0, %cond_false742.i ] ; <i8> [#uses=1]
br i1 false, label %cond_true1008.critedge1190.i, label %cond_next811.i
cond_next811.i: ; preds = %cond_true784.i
%indvar.next1412.i = add i8 %indvar1411.i.reg2mem.0, 1 ; <i8> [#uses=2]
%tmp781.i = icmp eq i8 %indvar.next1412.i, 3 ; <i1> [#uses=1]
br i1 %tmp781.i, label %cond_next9079, label %cond_true784.i
cond_next862.i: ; preds = %cond_next670.i
ret i32 0
cond_true1008.critedge.i: ; preds = %cond_next11.i.i
ret i32 0
cond_true1008.critedge1171.i: ; preds = %cond_next349.i
ret i32 0
cond_true1008.critedge1185.i: ; preds = %cond_next569.i
ret i32 0
cond_true1008.critedge1190.i: ; preds = %cond_true784.i
%tmp621.i532.lcssa610 = phi i8* [ %tmp10.i.i527, %cond_true784.i ] ; <i8*> [#uses=0]
ret i32 0
bb9065: ; preds = %cond_next8154
ret i32 0
cond_next9079: ; preds = %cond_next811.i, %cond_false742.i
ret i32 0
}

View File

@ -1,103 +0,0 @@
; RUN: opt < %s -loop-unroll -S -unroll-count=4 | FileCheck %s
; Test phi update after partial unroll.
declare i1 @check() nounwind
; CHECK: @test
; CHECK: if.else:
; CHECK: if.then.loopexit
; CHECK: %sub5.lcssa = phi i32 [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ]
; CHECK: if.else.3
define void @test1(i32 %i, i32 %j) nounwind uwtable ssp {
entry:
%cond1 = call zeroext i1 @check()
br i1 %cond1, label %if.then, label %if.else.lr.ph
if.else.lr.ph: ; preds = %entry
br label %if.else
if.else: ; preds = %if.else, %if.else.lr.ph
%sub = phi i32 [ %i, %if.else.lr.ph ], [ %sub5, %if.else ]
%sub5 = sub i32 %sub, %j
%cond2 = call zeroext i1 @check()
br i1 %cond2, label %if.then, label %if.else
if.then: ; preds = %if.else, %entry
%i.tr = phi i32 [ %i, %entry ], [ %sub5, %if.else ]
ret void
}
; PR7318: assertion failure after doing a simple loop unroll
;
; CHECK-LABEL: @test2(
; CHECK: bb1.bb2_crit_edge:
; CHECK: %.lcssa = phi i32 [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ]
; CHECK: bb1.3:
define i32 @test2(i32* nocapture %p, i32 %n) nounwind readonly {
entry:
%0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1]
br i1 %0, label %bb.nph, label %bb2
bb.nph: ; preds = %entry
%tmp = zext i32 %n to i64 ; <i64> [#uses=1]
br label %bb
bb: ; preds = %bb.nph, %bb1
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb1 ] ; <i64> [#uses=2]
%s.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb1 ] ; <i32> [#uses=1]
%scevgep = getelementptr i32, i32* %p, i64 %indvar ; <i32*> [#uses=1]
%1 = load i32, i32* %scevgep, align 1 ; <i32> [#uses=1]
%2 = add nsw i32 %1, %s.01 ; <i32> [#uses=2]
br label %bb1
bb1: ; preds = %bb
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2]
%exitcond = icmp ne i64 %indvar.next, %tmp ; <i1> [#uses=1]
br i1 %exitcond, label %bb, label %bb1.bb2_crit_edge
bb1.bb2_crit_edge: ; preds = %bb1
%.lcssa = phi i32 [ %2, %bb1 ] ; <i32> [#uses=1]
br label %bb2
bb2: ; preds = %bb1.bb2_crit_edge, %entry
%s.0.lcssa = phi i32 [ %.lcssa, %bb1.bb2_crit_edge ], [ 0, %entry ] ; <i32> [#uses=1]
ret i32 %s.0.lcssa
}
; Check phi update for loop with an early-exit.
;
; CHECK-LABEL: @test3(
; CHECK: return.loopexit:
; CHECK: %tmp7.i.lcssa = phi i32 [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ]
; CHECK: exit.3:
define i32 @test3() nounwind uwtable ssp align 2 {
entry:
%cond1 = call zeroext i1 @check()
br i1 %cond1, label %return, label %if.end
if.end: ; preds = %entry
br label %do.body
do.body: ; preds = %do.cond, %if.end
%cond2 = call zeroext i1 @check()
br i1 %cond2, label %exit, label %do.cond
exit: ; preds = %do.body
%tmp7.i = load i32, i32* undef, align 8
br i1 undef, label %do.cond, label %land.lhs.true
land.lhs.true: ; preds = %exit
br i1 undef, label %return, label %do.cond
do.cond: ; preds = %land.lhs.true, %exit, %do.body
%cond3 = call zeroext i1 @check()
br i1 %cond3, label %do.end, label %do.body
do.end: ; preds = %do.cond
br label %return
return: ; preds = %do.end, %land.lhs.true, %entry
%retval.0 = phi i32 [ 0, %do.end ], [ 0, %entry ], [ %tmp7.i, %land.lhs.true ]
ret i32 %retval.0
}

View File

@ -1,41 +0,0 @@
; RUN: opt -S < %s -loop-unroll -unroll-count=4 | FileCheck %s
;
; Test induction variable simplify after loop unrolling. It should
; expose nice opportunities for GVN.
;
; CHECK-NOT: while.body also ensures that loop unrolling (with SCEV)
; removes unrolled loop exits given that 128 is a multiple of 4.
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
; PR10534: LoopUnroll not keeping canonical induction variable...
; CHECK: while.body:
; CHECK-NOT: while.body.1:
; CHECK: %shr.1 = lshr i32 %bit_addr.addr.01, 5
; CHECK: %arrayidx.1 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.1
; CHECK: %shr.2 = lshr i32 %bit_addr.addr.01, 5
; CHECK: %arrayidx.2 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.2
; CHECK: %shr.3 = lshr i32 %bit_addr.addr.01, 5
; CHECK: %arrayidx.3 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.3
define void @FlipBit(i32* nocapture %bitmap, i32 %bit_addr, i32 %nbits) nounwind {
entry:
br label %while.body
while.body:
%nbits.addr.02 = phi i32 [ 128, %entry ], [ %dec, %while.body ]
%bit_addr.addr.01 = phi i32 [ 0, %entry ], [ %inc, %while.body ]
%dec = add i32 %nbits.addr.02, -1
%shr = lshr i32 %bit_addr.addr.01, 5
%rem = and i32 %bit_addr.addr.01, 31
%shl = shl i32 1, %rem
%arrayidx = getelementptr inbounds i32, i32* %bitmap, i32 %shr
%tmp6 = load i32, i32* %arrayidx, align 4
%xor = xor i32 %tmp6, %shl
store i32 %xor, i32* %arrayidx, align 4
%inc = add i32 %bit_addr.addr.01, 1
%tobool = icmp eq i32 %dec, 0
br i1 %tobool, label %while.end, label %while.body
while.end:
ret void
}

View File

@ -1,62 +0,0 @@
; RUN: opt -S < %s -instcombine -inline -jump-threading -loop-unroll -unroll-count=4 | FileCheck %s
;
; This is a test case that required a number of setup passes because
; it depends on block order.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.6.8"
declare i1 @check() nounwind
declare i32 @getval() nounwind
; Check that the loop exit merges values from all the iterations. This
; could be a tad fragile, but it's a good test.
;
; CHECK-LABEL: @foo(
; CHECK: return:
; CHECK: %retval.0 = phi i32 [ %tmp7.i, %land.lhs.true ], [ 0, %do.cond ], [ %tmp7.i.1, %land.lhs.true.1 ], [ 0, %do.cond.1 ], [ %tmp7.i.2, %land.lhs.true.2 ], [ 0, %do.cond.2 ], [ %tmp7.i.3, %land.lhs.true.3 ], [ 0, %do.cond.3 ]
; CHECK-NOT: @bar(
; CHECK: bar.exit.3
define i32 @foo() uwtable ssp align 2 {
entry:
br i1 undef, label %return, label %if.end
if.end: ; preds = %entry
%call2 = call i32 @getval()
br label %do.body
do.body: ; preds = %do.cond, %if.end
%call6 = call i32 @bar()
%cmp = icmp ne i32 %call6, 0
br i1 %cmp, label %land.lhs.true, label %do.cond
land.lhs.true: ; preds = %do.body
%call10 = call i32 @getval()
%cmp11 = icmp eq i32 0, %call10
br i1 %cmp11, label %return, label %do.cond
do.cond: ; preds = %land.lhs.true, %do.body
%cmp18 = icmp sle i32 0, %call2
br i1 %cmp18, label %do.body, label %return
return: ; preds = %do.cond, %land.lhs.true, %entry
%retval.0 = phi i32 [ 0, %entry ], [ %call6, %land.lhs.true ], [ 0, %do.cond ]
ret i32 %retval.0
}
define linkonce_odr i32 @bar() nounwind uwtable ssp align 2 {
entry:
br i1 undef, label %land.lhs.true, label %cond.end
land.lhs.true: ; preds = %entry
%cmp4 = call zeroext i1 @check()
br i1 %cmp4, label %cond.true, label %cond.end
cond.true: ; preds = %land.lhs.true
%tmp7 = call i32 @getval()
br label %cond.end
cond.end: ; preds = %cond.true, %land.lhs.true, %entry
%cond = phi i32 [ %tmp7, %cond.true ], [ 0, %land.lhs.true ], [ 0, %entry ]
ret i32 %cond
}

View File

@ -1,36 +0,0 @@
; RUN: opt < %s -S -loop-unroll -unroll-threshold=150 | FileCheck %s
;
; Verify that trunc i64 to i32 is considered free by loop unrolling
; heuristics when i32 is a native type.
; This should result in full unrolling this loop with size=7, TC=19.
; If the trunc were not free we would have 8*19=152 > 150.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
; Check that for.body was unrolled 19 times.
; CHECK-LABEL: @test(
; CHECK: %0 = load
; CHECK: %conv = sext i8 %0 to i32
; CHECK: %add.1 = add nsw i32 %conv.1, %conv
; CHECK: %add.18 = add nsw i32 %conv.18, %add.17
; CHECK: ret i32 %add.18
define i32 @test(i8* %arr) nounwind uwtable readnone {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %arr, i64 %indvars.iv
%0 = load i8, i8* %arrayidx, align 1
%conv = sext i8 %0 to i32
%add = add nsw i32 %conv, %sum.02
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv1 = trunc i64 %indvars.iv.next to i32
%exitcond2 = icmp eq i32 %lftr.wideiv1, 19
br i1 %exitcond2, label %for.end, label %for.body
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
}

View File

@ -1,40 +0,0 @@
; RUN: opt < %s -S -loop-unroll -simplifycfg | FileCheck %s
; PR12513: Loop unrolling breaks with indirect branches.
; If loop unrolling attempts to transform this loop, it replaces the
; indirectbr successors. SimplifyCFG then considers them to be unreachable.
declare void @subtract() nounwind uwtable
; CHECK-NOT: unreachable
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
entry:
%vals19 = alloca [5 x i32], align 16
%x20 = alloca i32, align 4
store i32 135, i32* %x20, align 4
br label %for.body
for.body: ; preds = ; %call2_termjoin, %call3_termjoin
%indvars.iv = phi i64 [ 0, %entry ], [ %joinphi15.in.in, %call2_termjoin ]
%a6 = call coldcc i8* @funca(i8* blockaddress(@main, %for.body_code), i8*
blockaddress(@main, %for.body_codeprime)) nounwind
indirectbr i8* %a6, [label %for.body_code, label %for.body_codeprime]
for.body_code: ; preds = %for.body
call void @subtract()
br label %call2_termjoin
call2_termjoin: ; preds = %for.body_codeprime, %for.body_code
%joinphi15.in.in = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %joinphi15.in.in, 5
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %call2_termjoin
ret i32 0
for.body_codeprime: ; preds = %for.body
call void @subtract_v2(i64 %indvars.iv)
br label %call2_termjoin
}
declare coldcc i8* @funca(i8*, i8*) readonly
declare void @subtract_v2(i64) nounwind uwtable

View File

@ -1,169 +0,0 @@
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor | FileCheck %s
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor -enable-falkor-hwpf-unroll-fix=0 | FileCheck %s --check-prefix=NOHWPF
; Check that loop unroller doesn't exhaust HW prefetcher resources.
; Partial unroll 2 times for this loop on falkor instead of 4.
; NOHWPF-LABEL: @unroll1(
; NOHWPF-LABEL: loop:
; NOHWPF-NEXT: phi
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: icmp
; NOHWPF-NEXT: br
; NOHWPF-NEXT-LABEL: exit:
;
; CHECK-LABEL: @unroll1(
; CHECK-LABEL: loop:
; CHECK-NEXT: phi
; CHECK-NEXT: getelementptr
; CHECK-NEXT: load
; CHECK-NEXT: getelementptr
; CHECK-NEXT: load
; CHECK-NEXT: add
; CHECK-NEXT: getelementptr
; CHECK-NEXT: load
; CHECK-NEXT: getelementptr
; CHECK-NEXT: load
; CHECK-NEXT: add
; CHECK-NEXT: icmp
; CHECK-NEXT: br
; CHECK-NEXT-LABEL: exit:
define void @unroll1(i32* %p, i32* %p2) {
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
%gep = getelementptr inbounds i32, i32* %p, i32 %iv
%load = load volatile i32, i32* %gep
%gep2 = getelementptr inbounds i32, i32* %p2, i32 %iv
%load2 = load volatile i32, i32* %gep2
%inc = add i32 %iv, 1
%exitcnd = icmp uge i32 %inc, 1024
br i1 %exitcnd, label %exit, label %loop
exit:
ret void
}
; Partial unroll 4 times for this loop on falkor instead of 8.
; NOHWPF-LABEL: @unroll2(
; NOHWPF-LABEL: loop2:
; NOHWPF-NEXT: phi
; NOHWPF-NEXT: phi
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: add
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: add
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: add
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: add
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: add
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: add
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: add
; NOHWPF-NEXT: getelementptr
; NOHWPF-NEXT: load
; NOHWPF-NEXT: add
; NOHWPF-NEXT: add
; NOHWPF-NEXT: icmp
; NOHWPF-NEXT: br
; NOHWPF-NEXT-LABEL: exit2:
;
; CHECK-LABEL: @unroll2(
; CHECK-LABEL: loop2:
; CHECK-NEXT: phi
; CHECK-NEXT: phi
; CHECK-NEXT: getelementptr
; CHECK-NEXT: load
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: getelementptr
; CHECK-NEXT: load
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: getelementptr
; CHECK-NEXT: load
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: getelementptr
; CHECK-NEXT: load
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: icmp
; CHECK-NEXT: br
; CHECK-NEXT-LABEL: exit2:
define void @unroll2(i32* %p) {
entry:
br label %loop1
loop1:
%iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
%outer.sum = phi i32 [ 0, %entry ], [ %sum, %loop1.latch ]
br label %loop2.header
loop2.header:
br label %loop2
loop2:
%iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
%sum = phi i32 [ %outer.sum, %loop2.header ], [ %sum.inc, %loop2 ]
%gep = getelementptr inbounds i32, i32* %p, i32 %iv2
%load = load i32, i32* %gep
%sum.inc = add i32 %sum, %load
%inc2 = add i32 %iv2, 1
%exitcnd2 = icmp uge i32 %inc2, 1024
br i1 %exitcnd2, label %exit2, label %loop2
exit2:
br label %loop1.latch
loop1.latch:
%inc1 = add i32 %iv1, 1
%exitcnd1 = icmp uge i32 %inc1, 1024
br i1 %exitcnd2, label %exit, label %loop1
exit:
ret void
}

View File

@ -1,43 +0,0 @@
; RUN: opt -loop-unroll -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=UNROLL
; RUN: opt -loop-unroll -unroll-max-upperbound=0 -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=NOUNROLL
; This IR comes from this C code:
;
; for (int i = 0; i < 4; i++) {
; if (src[i] == 1) {
; *dst = i;
; break;
; }
; }
;
; This test is meant to check that this loop is unrolled into four iterations.
; UNROLL-LABEL: @test
; UNROLL: load i32, i32*
; UNROLL: load i32, i32*
; UNROLL: load i32, i32*
; UNROLL: load i32, i32*
; UNROLL-NOT: load i32, i32*
; NOUNROLL-LABEL: @test
; NOUNROLL: load i32, i32*
; NOUNROLL-NOT: load i32, i32*
define void @test(i32* %dst, i32* %src) {
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%0 = sext i32 %i to i64
%1 = getelementptr inbounds i32, i32* %src, i64 %0
%2 = load i32, i32* %1
%inc = add nsw i32 %i, 1
%cmp1 = icmp slt i32 %inc, 4
%cmp3 = icmp eq i32 %2, 1
%or.cond = and i1 %cmp3, %cmp1
br i1 %or.cond, label %for.body, label %exit
exit: ; preds = %for.body
store i32 %i, i32* %dst
ret void
}

View File

@ -1,3 +0,0 @@
if not 'AArch64' in config.root.targets:
config.unsupported = True

View File

@ -1,76 +0,0 @@
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
; Partial unroll 8 times for this loop.
define void @unroll1() nounwind {
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
%inc = add i32 %iv, 1
%exitcnd = icmp uge i32 %inc, 1024
br i1 %exitcnd, label %exit, label %loop
exit:
ret void
}
; CHECK: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: icmp
; Partial unroll 16 times for this loop.
define void @unroll2() nounwind {
entry:
br label %loop1
loop1:
%iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
br label %loop2.header
loop2.header:
br label %loop2
loop2:
%iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
%inc2 = add i32 %iv2, 1
%exitcnd2 = icmp uge i32 %inc2, 1024
br i1 %exitcnd2, label %exit2, label %loop2
exit2:
br label %loop1.latch
loop1.latch:
%inc1 = add i32 %iv1, 1
%exitcnd1 = icmp uge i32 %inc1, 1024
br i1 %exitcnd2, label %exit, label %loop1
exit:
ret void
}
; CHECK: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: icmp

View File

@ -1,39 +0,0 @@
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; Tests for unrolling loops with run-time trip counts
; EPILOG: %xtraiter = and i32 %n
; EPILOG: for.body:
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
; EPILOG: for.body.epil:
; PROLOG: %xtraiter = and i32 %n
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
; PROLOG: for.body.prol:
; PROLOG: for.body:
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
%cmp1 = icmp eq i32 %n, 0
br i1 %cmp1, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %sum.02
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
ret i32 %sum.0.lcssa
}

View File

@ -1,3 +0,0 @@
if not 'AMDGPU' in config.root.targets:
config.unsupported = True

View File

@ -1,33 +0,0 @@
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -S < %s | FileCheck %s
; CHECK-LABEL: @test_unroll_convergent_barrier(
; CHECK: call void @llvm.amdgcn.s.barrier()
; CHECK: call void @llvm.amdgcn.s.barrier()
; CHECK: call void @llvm.amdgcn.s.barrier()
; CHECK: call void @llvm.amdgcn.s.barrier()
; CHECK-NOT: br
define amdgpu_kernel void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx.in = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %indvars.iv
%arrayidx.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %indvars.iv
%load = load i32, i32 addrspace(1)* %arrayidx.in
call void @llvm.amdgcn.s.barrier() #1
%add = add i32 %load, %sum.02
store i32 %add, i32 addrspace(1)* %arrayidx.out
%indvars.iv.next = add i32 %indvars.iv, 1
%exitcond = icmp eq i32 %indvars.iv.next, 4
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
declare void @llvm.amdgcn.s.barrier() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind convergent }

View File

@ -1,154 +0,0 @@
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s
; Check that we full unroll loop to be able to eliminate alloca
; CHECK-LABEL: @non_invariant_ind
; CHECK: for.body:
; CHECK-NOT: br
; CHECK: store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
; CHECK: ret void
define amdgpu_kernel void @non_invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
%arr = alloca [64 x i32], align 4
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
%arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x
%tmp15 = load i32, i32* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
for.body: ; preds = %for.body, %entry
%i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%idxprom = sext i32 %i.015 to i64
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
%arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem
store i32 %tmp16, i32* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
; Check that we unroll inner loop but not outer
; CHECK-LABEL: @invariant_ind
; CHECK: %[[exitcond:[^ ]+]] = icmp eq i32 %{{.*}}, 32
; CHECK: br i1 %[[exitcond]]
; CHECK-NOT: icmp eq i32 %{{.*}}, 100
define amdgpu_kernel void @invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
%arr = alloca [64 x i32], align 4
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.cond2.preheader
for.cond2.preheader: ; preds = %for.cond.cleanup5, %entry
%i.026 = phi i32 [ 0, %entry ], [ %inc10, %for.cond.cleanup5 ]
%idxprom = sext i32 %i.026 to i64
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
%tmp15 = load i32, i32 addrspace(1)* %arrayidx, align 4
br label %for.body6
for.cond.cleanup: ; preds = %for.cond.cleanup5
%arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x
%tmp16 = load i32, i32* %arrayidx13, align 4
%arrayidx15 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp16, i32 addrspace(1)* %arrayidx15, align 4
ret void
for.cond.cleanup5: ; preds = %for.body6
%inc10 = add nuw nsw i32 %i.026, 1
%exitcond27 = icmp eq i32 %inc10, 32
br i1 %exitcond27, label %for.cond.cleanup, label %for.cond2.preheader
for.body6: ; preds = %for.body6, %for.cond2.preheader
%j.025 = phi i32 [ 0, %for.cond2.preheader ], [ %inc, %for.body6 ]
%add = add nsw i32 %j.025, %tmp1
%rem = srem i32 %add, 64
%arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem
store i32 %tmp15, i32* %arrayidx8, align 4
%inc = add nuw nsw i32 %j.025, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup5, label %for.body6
}
; Check we do not enforce unroll if alloca is too big
; CHECK-LABEL: @too_big
; CHECK: for.body:
; CHECK: icmp eq i32 %{{.*}}, 100
; CHECK: br
define amdgpu_kernel void @too_big(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
%arr = alloca [256 x i32], align 4
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
%arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %x
%tmp15 = load i32, i32* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
for.body: ; preds = %for.body, %entry
%i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%idxprom = sext i32 %i.015 to i64
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
%arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %rem
store i32 %tmp16, i32* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
; Check we do not enforce unroll if alloca is dynamic
; CHECK-LABEL: @dynamic_size_alloca(
; CHECK: alloca i32, i32 %n
; CHECK: for.body:
; CHECK: icmp eq i32 %{{.*}}, 100
; CHECK: br
define amdgpu_kernel void @dynamic_size_alloca(i32 addrspace(1)* nocapture %a, i32 %n, i32 %x) {
entry:
%arr = alloca i32, i32 %n, align 4
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
%arrayidx5 = getelementptr inbounds i32, i32* %arr, i32 %x
%tmp15 = load i32, i32* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
for.body: ; preds = %for.body, %entry
%i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%idxprom = sext i32 %i.015 to i64
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
%arrayidx3 = getelementptr inbounds i32, i32* %arr, i32 %rem
store i32 %tmp16, i32* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare i32 @llvm.amdgcn.workgroup.id.x() #1
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #1
attributes #1 = { nounwind readnone }

Some files were not shown because too many files have changed in this diff Show More