You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,14 +0,0 @@
|
||||
; RUN: opt < %s -loop-unroll -disable-output
|
||||
|
||||
define i32 @main() {
|
||||
entry:
|
||||
br label %no_exit
|
||||
no_exit: ; preds = %no_exit, %entry
|
||||
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ] ; <i32> [#uses=1]
|
||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
|
||||
%exitcond = icmp ne i32 %indvar.next, -2147483648 ; <i1> [#uses=1]
|
||||
br i1 %exitcond, label %no_exit, label %loopexit
|
||||
loopexit: ; preds = %no_exit
|
||||
ret i32 0
|
||||
}
|
||||
|
@ -1,22 +0,0 @@
|
||||
; RUN: opt < %s -loop-unroll -loop-simplify -disable-output
|
||||
|
||||
define void @print_board() {
|
||||
entry:
|
||||
br label %no_exit.1
|
||||
no_exit.1: ; preds = %cond_false.2, %entry
|
||||
br label %no_exit.2
|
||||
no_exit.2: ; preds = %no_exit.2, %no_exit.1
|
||||
%indvar1 = phi i32 [ 0, %no_exit.1 ], [ %indvar.next2, %no_exit.2 ] ; <i32> [#uses=1]
|
||||
%indvar.next2 = add i32 %indvar1, 1 ; <i32> [#uses=2]
|
||||
%exitcond3 = icmp ne i32 %indvar.next2, 7 ; <i1> [#uses=1]
|
||||
br i1 %exitcond3, label %no_exit.2, label %loopexit.2
|
||||
loopexit.2: ; preds = %no_exit.2
|
||||
br i1 false, label %cond_true.2, label %cond_false.2
|
||||
cond_true.2: ; preds = %loopexit.2
|
||||
ret void
|
||||
cond_false.2: ; preds = %loopexit.2
|
||||
br i1 false, label %no_exit.1, label %loopexit.1
|
||||
loopexit.1: ; preds = %cond_false.2
|
||||
ret void
|
||||
}
|
||||
|
@ -1,16 +0,0 @@
|
||||
; RUN: opt < %s -loop-unroll -S | grep bb72.2
|
||||
|
||||
define void @vorbis_encode_noisebias_setup() {
|
||||
entry:
|
||||
br label %cond_true.outer
|
||||
cond_true.outer: ; preds = %bb72, %entry
|
||||
%indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ] ; <i32> [#uses=1]
|
||||
br label %bb72
|
||||
bb72: ; preds = %cond_true.outer
|
||||
%indvar.next2 = add i32 %indvar1.ph, 1 ; <i32> [#uses=2]
|
||||
%exitcond3 = icmp eq i32 %indvar.next2, 3 ; <i1> [#uses=1]
|
||||
br i1 %exitcond3, label %cond_true138, label %cond_true.outer
|
||||
cond_true138: ; preds = %bb72
|
||||
ret void
|
||||
}
|
||||
|
@ -1,17 +0,0 @@
|
||||
; PR 1334
|
||||
; RUN: opt < %s -loop-unroll -disable-output
|
||||
|
||||
define void @sal__math_float_manipulator_7__math__joint_array_dcv_ops__Omultiply__3([6 x float]* %agg.result) {
|
||||
entry:
|
||||
%tmp282911 = zext i8 0 to i32 ; <i32> [#uses=1]
|
||||
br label %cond_next
|
||||
cond_next: ; preds = %cond_next, %entry
|
||||
%indvar = phi i8 [ 0, %entry ], [ %indvar.next, %cond_next ] ; <i8> [#uses=1]
|
||||
%indvar.next = add i8 %indvar, 1 ; <i8> [#uses=2]
|
||||
%exitcond = icmp eq i8 %indvar.next, 7 ; <i1> [#uses=1]
|
||||
br i1 %exitcond, label %bb27, label %cond_next
|
||||
bb27: ; preds = %cond_next
|
||||
%tmp282911.lcssa = phi i32 [ %tmp282911, %cond_next ] ; <i32> [#uses=0]
|
||||
ret void
|
||||
}
|
||||
|
@ -1,36 +0,0 @@
|
||||
; RUN: opt < %s -loop-unroll -S | not grep undef
|
||||
; PR1385
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
|
||||
target triple = "i686-apple-darwin8"
|
||||
%struct.__mpz_struct = type { i32, i32, i32* }
|
||||
|
||||
|
||||
define void @Foo(%struct.__mpz_struct* %base) {
|
||||
entry:
|
||||
%want = alloca [1 x %struct.__mpz_struct], align 16 ; <[1 x %struct.__mpz_struct]*> [#uses=4]
|
||||
%want1 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1]
|
||||
call void @__gmpz_init( %struct.__mpz_struct* %want1 )
|
||||
%want27 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1]
|
||||
%want3 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1]
|
||||
%want2 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=2]
|
||||
br label %bb
|
||||
|
||||
bb: ; preds = %bb, %entry
|
||||
%i.01.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=1]
|
||||
%want23.0 = phi %struct.__mpz_struct* [ %want27, %entry ], [ %want2, %bb ] ; <%struct.__mpz_struct*> [#uses=1]
|
||||
call void @__gmpz_mul( %struct.__mpz_struct* %want23.0, %struct.__mpz_struct* %want3, %struct.__mpz_struct* %base )
|
||||
%indvar.next = add i32 %i.01.0, 1 ; <i32> [#uses=2]
|
||||
%exitcond = icmp ne i32 %indvar.next, 2 ; <i1> [#uses=1]
|
||||
br i1 %exitcond, label %bb, label %bb10
|
||||
|
||||
bb10: ; preds = %bb
|
||||
%want2.lcssa = phi %struct.__mpz_struct* [ %want2, %bb ] ; <%struct.__mpz_struct*> [#uses=1]
|
||||
call void @__gmpz_clear( %struct.__mpz_struct* %want2.lcssa )
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @__gmpz_init(%struct.__mpz_struct*)
|
||||
declare void @__gmpz_mul(%struct.__mpz_struct*, %struct.__mpz_struct*, %struct.__mpz_struct*)
|
||||
declare void @__gmpz_clear(%struct.__mpz_struct*)
|
||||
|
@ -1,18 +0,0 @@
|
||||
; RUN: opt < %s -loop-unroll -unroll-count=3 -S | grep bb72.2
|
||||
|
||||
define void @foo(i32 %trips) {
|
||||
entry:
|
||||
br label %cond_true.outer
|
||||
|
||||
cond_true.outer:
|
||||
%indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]
|
||||
br label %bb72
|
||||
|
||||
bb72:
|
||||
%indvar.next2 = add i32 %indvar1.ph, 1
|
||||
%exitcond3 = icmp eq i32 %indvar.next2, %trips
|
||||
br i1 %exitcond3, label %cond_true138, label %cond_true.outer
|
||||
|
||||
cond_true138:
|
||||
ret void
|
||||
}
|
@ -1,295 +0,0 @@
|
||||
; RUN: opt < %s -disable-output -loop-unroll
|
||||
; PR1770
|
||||
; PR1947
|
||||
|
||||
%struct.cl_engine = type { i32, i16, i32, i8**, i8**, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
|
||||
%struct.cl_limits = type { i32, i32, i32, i32, i16, i64 }
|
||||
%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
|
||||
%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
|
||||
%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
|
||||
%struct.cli_bm_patt = type { i8*, i32, i8*, i8*, i8, %struct.cli_bm_patt* }
|
||||
%struct.cli_ctx = type { i8**, i64*, %struct.cli_matcher*, %struct.cl_engine*, %struct.cl_limits*, i32, i32, i32, i32, %struct.cli_dconf* }
|
||||
%struct.cli_dconf = type { i32, i32, i32, i32, i32, i32, i32 }
|
||||
%struct.cli_matcher = type { i16, i8, i32*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
|
||||
|
||||
declare i8* @calloc(i64, i64)
|
||||
|
||||
define fastcc i32 @cli_scanpe(i32 %desc, %struct.cli_ctx* %ctx) {
|
||||
entry:
|
||||
br i1 false, label %cond_next17, label %cond_true14
|
||||
|
||||
cond_true14: ; preds = %entry
|
||||
ret i32 0
|
||||
|
||||
cond_next17: ; preds = %entry
|
||||
br i1 false, label %LeafBlock, label %LeafBlock1250
|
||||
|
||||
LeafBlock1250: ; preds = %cond_next17
|
||||
ret i32 0
|
||||
|
||||
LeafBlock: ; preds = %cond_next17
|
||||
br i1 false, label %cond_next33, label %cond_true30
|
||||
|
||||
cond_true30: ; preds = %LeafBlock
|
||||
ret i32 0
|
||||
|
||||
cond_next33: ; preds = %LeafBlock
|
||||
br i1 false, label %cond_next90, label %cond_true42
|
||||
|
||||
cond_true42: ; preds = %cond_next33
|
||||
ret i32 0
|
||||
|
||||
cond_next90: ; preds = %cond_next33
|
||||
br i1 false, label %cond_next100, label %cond_true97
|
||||
|
||||
cond_true97: ; preds = %cond_next90
|
||||
ret i32 0
|
||||
|
||||
cond_next100: ; preds = %cond_next90
|
||||
br i1 false, label %cond_next109, label %cond_true106
|
||||
|
||||
cond_true106: ; preds = %cond_next100
|
||||
ret i32 0
|
||||
|
||||
cond_next109: ; preds = %cond_next100
|
||||
br i1 false, label %cond_false, label %cond_true118
|
||||
|
||||
cond_true118: ; preds = %cond_next109
|
||||
ret i32 0
|
||||
|
||||
cond_false: ; preds = %cond_next109
|
||||
br i1 false, label %NodeBlock1482, label %cond_true126
|
||||
|
||||
cond_true126: ; preds = %cond_false
|
||||
ret i32 0
|
||||
|
||||
NodeBlock1482: ; preds = %cond_false
|
||||
br i1 false, label %cond_next285, label %NodeBlock1480
|
||||
|
||||
NodeBlock1480: ; preds = %NodeBlock1482
|
||||
ret i32 0
|
||||
|
||||
cond_next285: ; preds = %NodeBlock1482
|
||||
br i1 false, label %cond_next320, label %cond_true294
|
||||
|
||||
cond_true294: ; preds = %cond_next285
|
||||
ret i32 0
|
||||
|
||||
cond_next320: ; preds = %cond_next285
|
||||
br i1 false, label %LeafBlock1491, label %LeafBlock1493
|
||||
|
||||
LeafBlock1493: ; preds = %cond_next320
|
||||
ret i32 0
|
||||
|
||||
LeafBlock1491: ; preds = %cond_next320
|
||||
br i1 false, label %cond_true400, label %cond_true378
|
||||
|
||||
cond_true378: ; preds = %LeafBlock1491
|
||||
ret i32 1
|
||||
|
||||
cond_true400: ; preds = %LeafBlock1491
|
||||
br i1 false, label %cond_next413, label %cond_true406
|
||||
|
||||
cond_true406: ; preds = %cond_true400
|
||||
ret i32 0
|
||||
|
||||
cond_next413: ; preds = %cond_true400
|
||||
br i1 false, label %cond_next429, label %cond_true424
|
||||
|
||||
cond_true424: ; preds = %cond_next413
|
||||
ret i32 0
|
||||
|
||||
cond_next429: ; preds = %cond_next413
|
||||
br i1 false, label %NodeBlock1557, label %NodeBlock1579
|
||||
|
||||
NodeBlock1579: ; preds = %cond_next429
|
||||
ret i32 0
|
||||
|
||||
NodeBlock1557: ; preds = %cond_next429
|
||||
br i1 false, label %LeafBlock1543, label %NodeBlock1555
|
||||
|
||||
NodeBlock1555: ; preds = %NodeBlock1557
|
||||
ret i32 0
|
||||
|
||||
LeafBlock1543: ; preds = %NodeBlock1557
|
||||
br i1 false, label %cond_next870, label %cond_next663
|
||||
|
||||
cond_next663: ; preds = %LeafBlock1543
|
||||
ret i32 0
|
||||
|
||||
cond_next870: ; preds = %LeafBlock1543
|
||||
br i1 false, label %cond_true1012, label %cond_true916
|
||||
|
||||
cond_true916: ; preds = %cond_next870
|
||||
ret i32 0
|
||||
|
||||
cond_true1012: ; preds = %cond_next870
|
||||
br i1 false, label %cond_next3849, label %cond_true2105
|
||||
|
||||
cond_true2105: ; preds = %cond_true1012
|
||||
ret i32 0
|
||||
|
||||
cond_next3849: ; preds = %cond_true1012
|
||||
br i1 false, label %cond_next4378, label %bb6559
|
||||
|
||||
bb3862: ; preds = %cond_next4385
|
||||
br i1 false, label %cond_false3904, label %cond_true3876
|
||||
|
||||
cond_true3876: ; preds = %bb3862
|
||||
ret i32 0
|
||||
|
||||
cond_false3904: ; preds = %bb3862
|
||||
br i1 false, label %cond_next4003, label %cond_true3935
|
||||
|
||||
cond_true3935: ; preds = %cond_false3904
|
||||
ret i32 0
|
||||
|
||||
cond_next4003: ; preds = %cond_false3904
|
||||
br i1 false, label %cond_next5160, label %cond_next4015
|
||||
|
||||
cond_next4015: ; preds = %cond_next4003
|
||||
ret i32 0
|
||||
|
||||
cond_next4378: ; preds = %cond_next3849
|
||||
br i1 false, label %cond_next4385, label %bb4393
|
||||
|
||||
cond_next4385: ; preds = %cond_next4378
|
||||
br i1 false, label %bb3862, label %bb4393
|
||||
|
||||
bb4393: ; preds = %cond_next4385, %cond_next4378
|
||||
ret i32 0
|
||||
|
||||
cond_next5160: ; preds = %cond_next4003
|
||||
br i1 false, label %bb5188, label %bb6559
|
||||
|
||||
bb5188: ; preds = %cond_next5160
|
||||
br i1 false, label %cond_next5285, label %cond_true5210
|
||||
|
||||
cond_true5210: ; preds = %bb5188
|
||||
ret i32 0
|
||||
|
||||
cond_next5285: ; preds = %bb5188
|
||||
br i1 false, label %cond_true5302, label %cond_true5330
|
||||
|
||||
cond_true5302: ; preds = %cond_next5285
|
||||
br i1 false, label %bb7405, label %bb7367
|
||||
|
||||
cond_true5330: ; preds = %cond_next5285
|
||||
ret i32 0
|
||||
|
||||
bb6559: ; preds = %cond_next5160, %cond_next3849
|
||||
ret i32 0
|
||||
|
||||
bb7367: ; preds = %cond_true5302
|
||||
ret i32 0
|
||||
|
||||
bb7405: ; preds = %cond_true5302
|
||||
br i1 false, label %cond_next8154, label %cond_true7410
|
||||
|
||||
cond_true7410: ; preds = %bb7405
|
||||
ret i32 0
|
||||
|
||||
cond_next8154: ; preds = %bb7405
|
||||
br i1 false, label %cond_true8235, label %bb9065
|
||||
|
||||
cond_true8235: ; preds = %cond_next8154
|
||||
br i1 false, label %bb8274, label %bb8245
|
||||
|
||||
bb8245: ; preds = %cond_true8235
|
||||
ret i32 0
|
||||
|
||||
bb8274: ; preds = %cond_true8235
|
||||
br i1 false, label %cond_next8358, label %cond_true8295
|
||||
|
||||
cond_true8295: ; preds = %bb8274
|
||||
ret i32 0
|
||||
|
||||
cond_next8358: ; preds = %bb8274
|
||||
br i1 false, label %cond_next.i509, label %cond_true8371
|
||||
|
||||
cond_true8371: ; preds = %cond_next8358
|
||||
ret i32 -123
|
||||
|
||||
cond_next.i509: ; preds = %cond_next8358
|
||||
br i1 false, label %bb36.i, label %bb33.i
|
||||
|
||||
bb33.i: ; preds = %cond_next.i509
|
||||
ret i32 0
|
||||
|
||||
bb36.i: ; preds = %cond_next.i509
|
||||
br i1 false, label %cond_next54.i, label %cond_true51.i
|
||||
|
||||
cond_true51.i: ; preds = %bb36.i
|
||||
ret i32 0
|
||||
|
||||
cond_next54.i: ; preds = %bb36.i
|
||||
%tmp10.i.i527 = call i8* @calloc( i64 0, i64 1 ) ; <i8*> [#uses=1]
|
||||
br i1 false, label %cond_next11.i.i, label %bb132.i
|
||||
|
||||
bb132.i: ; preds = %cond_next54.i
|
||||
ret i32 0
|
||||
|
||||
cond_next11.i.i: ; preds = %cond_next54.i
|
||||
br i1 false, label %bb32.i.i545, label %cond_true1008.critedge.i
|
||||
|
||||
bb32.i.i545: ; preds = %cond_next11.i.i
|
||||
br i1 false, label %cond_next349.i, label %cond_true184.i
|
||||
|
||||
cond_true184.i: ; preds = %bb32.i.i545
|
||||
ret i32 0
|
||||
|
||||
cond_next349.i: ; preds = %bb32.i.i545
|
||||
br i1 false, label %cond_next535.i, label %cond_true1008.critedge1171.i
|
||||
|
||||
cond_next535.i: ; preds = %cond_next349.i
|
||||
br i1 false, label %cond_next569.i, label %cond_false574.i
|
||||
|
||||
cond_next569.i: ; preds = %cond_next535.i
|
||||
br i1 false, label %cond_next670.i, label %cond_true1008.critedge1185.i
|
||||
|
||||
cond_false574.i: ; preds = %cond_next535.i
|
||||
ret i32 0
|
||||
|
||||
cond_next670.i: ; preds = %cond_next569.i
|
||||
br i1 false, label %cond_true692.i, label %cond_next862.i
|
||||
|
||||
cond_true692.i: ; preds = %cond_next670.i
|
||||
br i1 false, label %cond_false742.i, label %cond_true718.i
|
||||
|
||||
cond_true718.i: ; preds = %cond_true692.i
|
||||
ret i32 0
|
||||
|
||||
cond_false742.i: ; preds = %cond_true692.i
|
||||
br i1 false, label %cond_true784.i, label %cond_next9079
|
||||
|
||||
cond_true784.i: ; preds = %cond_next811.i, %cond_false742.i
|
||||
%indvar1411.i.reg2mem.0 = phi i8 [ %indvar.next1412.i, %cond_next811.i ], [ 0, %cond_false742.i ] ; <i8> [#uses=1]
|
||||
br i1 false, label %cond_true1008.critedge1190.i, label %cond_next811.i
|
||||
|
||||
cond_next811.i: ; preds = %cond_true784.i
|
||||
%indvar.next1412.i = add i8 %indvar1411.i.reg2mem.0, 1 ; <i8> [#uses=2]
|
||||
%tmp781.i = icmp eq i8 %indvar.next1412.i, 3 ; <i1> [#uses=1]
|
||||
br i1 %tmp781.i, label %cond_next9079, label %cond_true784.i
|
||||
|
||||
cond_next862.i: ; preds = %cond_next670.i
|
||||
ret i32 0
|
||||
|
||||
cond_true1008.critedge.i: ; preds = %cond_next11.i.i
|
||||
ret i32 0
|
||||
|
||||
cond_true1008.critedge1171.i: ; preds = %cond_next349.i
|
||||
ret i32 0
|
||||
|
||||
cond_true1008.critedge1185.i: ; preds = %cond_next569.i
|
||||
ret i32 0
|
||||
|
||||
cond_true1008.critedge1190.i: ; preds = %cond_true784.i
|
||||
%tmp621.i532.lcssa610 = phi i8* [ %tmp10.i.i527, %cond_true784.i ] ; <i8*> [#uses=0]
|
||||
ret i32 0
|
||||
|
||||
bb9065: ; preds = %cond_next8154
|
||||
ret i32 0
|
||||
|
||||
cond_next9079: ; preds = %cond_next811.i, %cond_false742.i
|
||||
ret i32 0
|
||||
}
|
@ -1,103 +0,0 @@
|
||||
; RUN: opt < %s -loop-unroll -S -unroll-count=4 | FileCheck %s
|
||||
; Test phi update after partial unroll.
|
||||
|
||||
declare i1 @check() nounwind
|
||||
|
||||
; CHECK: @test
|
||||
; CHECK: if.else:
|
||||
; CHECK: if.then.loopexit
|
||||
; CHECK: %sub5.lcssa = phi i32 [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ]
|
||||
; CHECK: if.else.3
|
||||
define void @test1(i32 %i, i32 %j) nounwind uwtable ssp {
|
||||
entry:
|
||||
%cond1 = call zeroext i1 @check()
|
||||
br i1 %cond1, label %if.then, label %if.else.lr.ph
|
||||
|
||||
if.else.lr.ph: ; preds = %entry
|
||||
br label %if.else
|
||||
|
||||
if.else: ; preds = %if.else, %if.else.lr.ph
|
||||
%sub = phi i32 [ %i, %if.else.lr.ph ], [ %sub5, %if.else ]
|
||||
%sub5 = sub i32 %sub, %j
|
||||
%cond2 = call zeroext i1 @check()
|
||||
br i1 %cond2, label %if.then, label %if.else
|
||||
|
||||
if.then: ; preds = %if.else, %entry
|
||||
%i.tr = phi i32 [ %i, %entry ], [ %sub5, %if.else ]
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
; PR7318: assertion failure after doing a simple loop unroll
|
||||
;
|
||||
; CHECK-LABEL: @test2(
|
||||
; CHECK: bb1.bb2_crit_edge:
|
||||
; CHECK: %.lcssa = phi i32 [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ]
|
||||
; CHECK: bb1.3:
|
||||
define i32 @test2(i32* nocapture %p, i32 %n) nounwind readonly {
|
||||
entry:
|
||||
%0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1]
|
||||
br i1 %0, label %bb.nph, label %bb2
|
||||
|
||||
bb.nph: ; preds = %entry
|
||||
%tmp = zext i32 %n to i64 ; <i64> [#uses=1]
|
||||
br label %bb
|
||||
|
||||
bb: ; preds = %bb.nph, %bb1
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb1 ] ; <i64> [#uses=2]
|
||||
%s.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb1 ] ; <i32> [#uses=1]
|
||||
%scevgep = getelementptr i32, i32* %p, i64 %indvar ; <i32*> [#uses=1]
|
||||
%1 = load i32, i32* %scevgep, align 1 ; <i32> [#uses=1]
|
||||
%2 = add nsw i32 %1, %s.01 ; <i32> [#uses=2]
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb
|
||||
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2]
|
||||
%exitcond = icmp ne i64 %indvar.next, %tmp ; <i1> [#uses=1]
|
||||
br i1 %exitcond, label %bb, label %bb1.bb2_crit_edge
|
||||
|
||||
bb1.bb2_crit_edge: ; preds = %bb1
|
||||
%.lcssa = phi i32 [ %2, %bb1 ] ; <i32> [#uses=1]
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb1.bb2_crit_edge, %entry
|
||||
%s.0.lcssa = phi i32 [ %.lcssa, %bb1.bb2_crit_edge ], [ 0, %entry ] ; <i32> [#uses=1]
|
||||
ret i32 %s.0.lcssa
|
||||
}
|
||||
|
||||
; Check phi update for loop with an early-exit.
|
||||
;
|
||||
; CHECK-LABEL: @test3(
|
||||
; CHECK: return.loopexit:
|
||||
; CHECK: %tmp7.i.lcssa = phi i32 [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ]
|
||||
; CHECK: exit.3:
|
||||
define i32 @test3() nounwind uwtable ssp align 2 {
|
||||
entry:
|
||||
%cond1 = call zeroext i1 @check()
|
||||
br i1 %cond1, label %return, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %do.cond, %if.end
|
||||
%cond2 = call zeroext i1 @check()
|
||||
br i1 %cond2, label %exit, label %do.cond
|
||||
|
||||
exit: ; preds = %do.body
|
||||
%tmp7.i = load i32, i32* undef, align 8
|
||||
br i1 undef, label %do.cond, label %land.lhs.true
|
||||
|
||||
land.lhs.true: ; preds = %exit
|
||||
br i1 undef, label %return, label %do.cond
|
||||
|
||||
do.cond: ; preds = %land.lhs.true, %exit, %do.body
|
||||
%cond3 = call zeroext i1 @check()
|
||||
br i1 %cond3, label %do.end, label %do.body
|
||||
|
||||
do.end: ; preds = %do.cond
|
||||
br label %return
|
||||
|
||||
return: ; preds = %do.end, %land.lhs.true, %entry
|
||||
%retval.0 = phi i32 [ 0, %do.end ], [ 0, %entry ], [ %tmp7.i, %land.lhs.true ]
|
||||
ret i32 %retval.0
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
; RUN: opt -S < %s -loop-unroll -unroll-count=4 | FileCheck %s
|
||||
;
|
||||
; Test induction variable simplify after loop unrolling. It should
|
||||
; expose nice opportunities for GVN.
|
||||
;
|
||||
; CHECK-NOT: while.body also ensures that loop unrolling (with SCEV)
|
||||
; removes unrolled loop exits given that 128 is a multiple of 4.
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
|
||||
|
||||
; PR10534: LoopUnroll not keeping canonical induction variable...
|
||||
; CHECK: while.body:
|
||||
; CHECK-NOT: while.body.1:
|
||||
; CHECK: %shr.1 = lshr i32 %bit_addr.addr.01, 5
|
||||
; CHECK: %arrayidx.1 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.1
|
||||
; CHECK: %shr.2 = lshr i32 %bit_addr.addr.01, 5
|
||||
; CHECK: %arrayidx.2 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.2
|
||||
; CHECK: %shr.3 = lshr i32 %bit_addr.addr.01, 5
|
||||
; CHECK: %arrayidx.3 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.3
|
||||
define void @FlipBit(i32* nocapture %bitmap, i32 %bit_addr, i32 %nbits) nounwind {
|
||||
entry:
|
||||
br label %while.body
|
||||
|
||||
while.body:
|
||||
%nbits.addr.02 = phi i32 [ 128, %entry ], [ %dec, %while.body ]
|
||||
%bit_addr.addr.01 = phi i32 [ 0, %entry ], [ %inc, %while.body ]
|
||||
%dec = add i32 %nbits.addr.02, -1
|
||||
%shr = lshr i32 %bit_addr.addr.01, 5
|
||||
%rem = and i32 %bit_addr.addr.01, 31
|
||||
%shl = shl i32 1, %rem
|
||||
%arrayidx = getelementptr inbounds i32, i32* %bitmap, i32 %shr
|
||||
%tmp6 = load i32, i32* %arrayidx, align 4
|
||||
%xor = xor i32 %tmp6, %shl
|
||||
store i32 %xor, i32* %arrayidx, align 4
|
||||
%inc = add i32 %bit_addr.addr.01, 1
|
||||
%tobool = icmp eq i32 %dec, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end:
|
||||
ret void
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
; RUN: opt -S < %s -instcombine -inline -jump-threading -loop-unroll -unroll-count=4 | FileCheck %s
|
||||
;
|
||||
; This is a test case that required a number of setup passes because
|
||||
; it depends on block order.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-macosx10.6.8"
|
||||
|
||||
declare i1 @check() nounwind
|
||||
declare i32 @getval() nounwind
|
||||
|
||||
; Check that the loop exit merges values from all the iterations. This
|
||||
; could be a tad fragile, but it's a good test.
|
||||
;
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK: return:
|
||||
; CHECK: %retval.0 = phi i32 [ %tmp7.i, %land.lhs.true ], [ 0, %do.cond ], [ %tmp7.i.1, %land.lhs.true.1 ], [ 0, %do.cond.1 ], [ %tmp7.i.2, %land.lhs.true.2 ], [ 0, %do.cond.2 ], [ %tmp7.i.3, %land.lhs.true.3 ], [ 0, %do.cond.3 ]
|
||||
; CHECK-NOT: @bar(
|
||||
; CHECK: bar.exit.3
|
||||
define i32 @foo() uwtable ssp align 2 {
|
||||
entry:
|
||||
br i1 undef, label %return, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
%call2 = call i32 @getval()
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %do.cond, %if.end
|
||||
%call6 = call i32 @bar()
|
||||
%cmp = icmp ne i32 %call6, 0
|
||||
br i1 %cmp, label %land.lhs.true, label %do.cond
|
||||
|
||||
land.lhs.true: ; preds = %do.body
|
||||
%call10 = call i32 @getval()
|
||||
%cmp11 = icmp eq i32 0, %call10
|
||||
br i1 %cmp11, label %return, label %do.cond
|
||||
|
||||
do.cond: ; preds = %land.lhs.true, %do.body
|
||||
%cmp18 = icmp sle i32 0, %call2
|
||||
br i1 %cmp18, label %do.body, label %return
|
||||
|
||||
return: ; preds = %do.cond, %land.lhs.true, %entry
|
||||
%retval.0 = phi i32 [ 0, %entry ], [ %call6, %land.lhs.true ], [ 0, %do.cond ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
define linkonce_odr i32 @bar() nounwind uwtable ssp align 2 {
|
||||
entry:
|
||||
br i1 undef, label %land.lhs.true, label %cond.end
|
||||
|
||||
land.lhs.true: ; preds = %entry
|
||||
%cmp4 = call zeroext i1 @check()
|
||||
br i1 %cmp4, label %cond.true, label %cond.end
|
||||
|
||||
cond.true: ; preds = %land.lhs.true
|
||||
%tmp7 = call i32 @getval()
|
||||
br label %cond.end
|
||||
|
||||
cond.end: ; preds = %cond.true, %land.lhs.true, %entry
|
||||
%cond = phi i32 [ %tmp7, %cond.true ], [ 0, %land.lhs.true ], [ 0, %entry ]
|
||||
ret i32 %cond
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-threshold=150 | FileCheck %s
|
||||
;
|
||||
; Verify that trunc i64 to i32 is considered free by loop unrolling
|
||||
; heuristics when i32 is a native type.
|
||||
; This should result in full unrolling this loop with size=7, TC=19.
|
||||
; If the trunc were not free we would have 8*19=152 > 150.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
|
||||
; Check that for.body was unrolled 19 times.
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK: %0 = load
|
||||
; CHECK: %conv = sext i8 %0 to i32
|
||||
; CHECK: %add.1 = add nsw i32 %conv.1, %conv
|
||||
; CHECK: %add.18 = add nsw i32 %conv.18, %add.17
|
||||
; CHECK: ret i32 %add.18
|
||||
define i32 @test(i8* %arr) nounwind uwtable readnone {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i8, i8* %arr, i64 %indvars.iv
|
||||
%0 = load i8, i8* %arrayidx, align 1
|
||||
%conv = sext i8 %0 to i32
|
||||
%add = add nsw i32 %conv, %sum.02
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv1 = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond2 = icmp eq i32 %lftr.wideiv1, 19
|
||||
br i1 %exitcond2, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
%add.lcssa = phi i32 [ %add, %for.body ]
|
||||
ret i32 %add.lcssa
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
; RUN: opt < %s -S -loop-unroll -simplifycfg | FileCheck %s
|
||||
; PR12513: Loop unrolling breaks with indirect branches.
|
||||
; If loop unrolling attempts to transform this loop, it replaces the
|
||||
; indirectbr successors. SimplifyCFG then considers them to be unreachable.
|
||||
declare void @subtract() nounwind uwtable
|
||||
|
||||
; CHECK-NOT: unreachable
|
||||
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
|
||||
entry:
|
||||
%vals19 = alloca [5 x i32], align 16
|
||||
%x20 = alloca i32, align 4
|
||||
store i32 135, i32* %x20, align 4
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = ; %call2_termjoin, %call3_termjoin
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %joinphi15.in.in, %call2_termjoin ]
|
||||
%a6 = call coldcc i8* @funca(i8* blockaddress(@main, %for.body_code), i8*
|
||||
blockaddress(@main, %for.body_codeprime)) nounwind
|
||||
indirectbr i8* %a6, [label %for.body_code, label %for.body_codeprime]
|
||||
|
||||
for.body_code: ; preds = %for.body
|
||||
call void @subtract()
|
||||
br label %call2_termjoin
|
||||
|
||||
call2_termjoin: ; preds = %for.body_codeprime, %for.body_code
|
||||
%joinphi15.in.in = add i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %joinphi15.in.in, 5
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %call2_termjoin
|
||||
ret i32 0
|
||||
|
||||
for.body_codeprime: ; preds = %for.body
|
||||
call void @subtract_v2(i64 %indvars.iv)
|
||||
br label %call2_termjoin
|
||||
}
|
||||
|
||||
declare coldcc i8* @funca(i8*, i8*) readonly
|
||||
|
||||
declare void @subtract_v2(i64) nounwind uwtable
|
@ -1,169 +0,0 @@
|
||||
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor | FileCheck %s
|
||||
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor -enable-falkor-hwpf-unroll-fix=0 | FileCheck %s --check-prefix=NOHWPF
|
||||
|
||||
; Check that loop unroller doesn't exhaust HW prefetcher resources.
|
||||
|
||||
; Partial unroll 2 times for this loop on falkor instead of 4.
|
||||
; NOHWPF-LABEL: @unroll1(
|
||||
; NOHWPF-LABEL: loop:
|
||||
; NOHWPF-NEXT: phi
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: icmp
|
||||
; NOHWPF-NEXT: br
|
||||
; NOHWPF-NEXT-LABEL: exit:
|
||||
;
|
||||
; CHECK-LABEL: @unroll1(
|
||||
; CHECK-LABEL: loop:
|
||||
; CHECK-NEXT: phi
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: load
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: load
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: load
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: load
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: icmp
|
||||
; CHECK-NEXT: br
|
||||
; CHECK-NEXT-LABEL: exit:
|
||||
define void @unroll1(i32* %p, i32* %p2) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
|
||||
%gep = getelementptr inbounds i32, i32* %p, i32 %iv
|
||||
%load = load volatile i32, i32* %gep
|
||||
|
||||
%gep2 = getelementptr inbounds i32, i32* %p2, i32 %iv
|
||||
%load2 = load volatile i32, i32* %gep2
|
||||
|
||||
%inc = add i32 %iv, 1
|
||||
%exitcnd = icmp uge i32 %inc, 1024
|
||||
br i1 %exitcnd, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Partial unroll 4 times for this loop on falkor instead of 8.
|
||||
; NOHWPF-LABEL: @unroll2(
|
||||
; NOHWPF-LABEL: loop2:
|
||||
; NOHWPF-NEXT: phi
|
||||
; NOHWPF-NEXT: phi
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: getelementptr
|
||||
; NOHWPF-NEXT: load
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: add
|
||||
; NOHWPF-NEXT: icmp
|
||||
; NOHWPF-NEXT: br
|
||||
; NOHWPF-NEXT-LABEL: exit2:
|
||||
;
|
||||
; CHECK-LABEL: @unroll2(
|
||||
; CHECK-LABEL: loop2:
|
||||
; CHECK-NEXT: phi
|
||||
; CHECK-NEXT: phi
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: load
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: load
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: load
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: load
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: icmp
|
||||
; CHECK-NEXT: br
|
||||
; CHECK-NEXT-LABEL: exit2:
|
||||
|
||||
define void @unroll2(i32* %p) {
|
||||
entry:
|
||||
br label %loop1
|
||||
|
||||
loop1:
|
||||
%iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
|
||||
%outer.sum = phi i32 [ 0, %entry ], [ %sum, %loop1.latch ]
|
||||
br label %loop2.header
|
||||
|
||||
loop2.header:
|
||||
br label %loop2
|
||||
|
||||
loop2:
|
||||
%iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
|
||||
%sum = phi i32 [ %outer.sum, %loop2.header ], [ %sum.inc, %loop2 ]
|
||||
%gep = getelementptr inbounds i32, i32* %p, i32 %iv2
|
||||
%load = load i32, i32* %gep
|
||||
%sum.inc = add i32 %sum, %load
|
||||
%inc2 = add i32 %iv2, 1
|
||||
%exitcnd2 = icmp uge i32 %inc2, 1024
|
||||
br i1 %exitcnd2, label %exit2, label %loop2
|
||||
|
||||
exit2:
|
||||
br label %loop1.latch
|
||||
|
||||
loop1.latch:
|
||||
%inc1 = add i32 %iv1, 1
|
||||
%exitcnd1 = icmp uge i32 %inc1, 1024
|
||||
br i1 %exitcnd2, label %exit, label %loop1
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
@ -1,43 +0,0 @@
|
||||
; RUN: opt -loop-unroll -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=UNROLL
|
||||
; RUN: opt -loop-unroll -unroll-max-upperbound=0 -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=NOUNROLL
|
||||
|
||||
; This IR comes from this C code:
|
||||
;
|
||||
; for (int i = 0; i < 4; i++) {
|
||||
; if (src[i] == 1) {
|
||||
; *dst = i;
|
||||
; break;
|
||||
; }
|
||||
; }
|
||||
;
|
||||
; This test is meant to check that this loop is unrolled into four iterations.
|
||||
|
||||
; UNROLL-LABEL: @test
|
||||
; UNROLL: load i32, i32*
|
||||
; UNROLL: load i32, i32*
|
||||
; UNROLL: load i32, i32*
|
||||
; UNROLL: load i32, i32*
|
||||
; UNROLL-NOT: load i32, i32*
|
||||
; NOUNROLL-LABEL: @test
|
||||
; NOUNROLL: load i32, i32*
|
||||
; NOUNROLL-NOT: load i32, i32*
|
||||
|
||||
define void @test(i32* %dst, i32* %src) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%0 = sext i32 %i to i64
|
||||
%1 = getelementptr inbounds i32, i32* %src, i64 %0
|
||||
%2 = load i32, i32* %1
|
||||
%inc = add nsw i32 %i, 1
|
||||
%cmp1 = icmp slt i32 %inc, 4
|
||||
%cmp3 = icmp eq i32 %2, 1
|
||||
%or.cond = and i1 %cmp3, %cmp1
|
||||
br i1 %or.cond, label %for.body, label %exit
|
||||
|
||||
exit: ; preds = %for.body
|
||||
store i32 %i, i32* %dst
|
||||
ret void
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
if not 'AArch64' in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
@ -1,76 +0,0 @@
|
||||
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
|
||||
|
||||
; Partial unroll 8 times for this loop.
|
||||
define void @unroll1() nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%inc = add i32 %iv, 1
|
||||
%exitcnd = icmp uge i32 %inc, 1024
|
||||
br i1 %exitcnd, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: icmp
|
||||
|
||||
; Partial unroll 16 times for this loop.
|
||||
define void @unroll2() nounwind {
|
||||
entry:
|
||||
br label %loop1
|
||||
|
||||
loop1:
|
||||
%iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
|
||||
br label %loop2.header
|
||||
|
||||
loop2.header:
|
||||
br label %loop2
|
||||
|
||||
loop2:
|
||||
%iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
|
||||
%inc2 = add i32 %iv2, 1
|
||||
%exitcnd2 = icmp uge i32 %inc2, 1024
|
||||
br i1 %exitcnd2, label %exit2, label %loop2
|
||||
|
||||
exit2:
|
||||
br label %loop1.latch
|
||||
|
||||
loop1.latch:
|
||||
%inc1 = add i32 %iv1, 1
|
||||
%exitcnd1 = icmp uge i32 %inc1, 1024
|
||||
br i1 %exitcnd2, label %exit, label %loop1
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
; CHECK: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: icmp
|
@ -1,39 +0,0 @@
|
||||
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
|
||||
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||
|
||||
; Tests for unrolling loops with run-time trip counts
|
||||
|
||||
; EPILOG: %xtraiter = and i32 %n
|
||||
; EPILOG: for.body:
|
||||
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
|
||||
; EPILOG: for.body.epil:
|
||||
|
||||
; PROLOG: %xtraiter = and i32 %n
|
||||
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
|
||||
; PROLOG: for.body.prol:
|
||||
; PROLOG: for.body:
|
||||
|
||||
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
|
||||
entry:
|
||||
%cmp1 = icmp eq i32 %n, 0
|
||||
br i1 %cmp1, label %for.end, label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %0, %sum.02
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
||||
ret i32 %sum.0.lcssa
|
||||
}
|
||||
|
||||
|
@ -1,3 +0,0 @@
|
||||
if not 'AMDGPU' in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
@ -1,33 +0,0 @@
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -S < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @test_unroll_convergent_barrier(
|
||||
; CHECK: call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK: call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK: call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK: call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NOT: br
|
||||
define amdgpu_kernel void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
|
||||
%arrayidx.in = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %indvars.iv
|
||||
%arrayidx.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %indvars.iv
|
||||
%load = load i32, i32 addrspace(1)* %arrayidx.in
|
||||
call void @llvm.amdgcn.s.barrier() #1
|
||||
%add = add i32 %load, %sum.02
|
||||
store i32 %add, i32 addrspace(1)* %arrayidx.out
|
||||
%indvars.iv.next = add i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, 4
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind convergent }
|
@ -1,154 +0,0 @@
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s
|
||||
|
||||
; Check that we full unroll loop to be able to eliminate alloca
|
||||
; CHECK-LABEL: @non_invariant_ind
|
||||
; CHECK: for.body:
|
||||
; CHECK-NOT: br
|
||||
; CHECK: store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
|
||||
; CHECK: ret void
|
||||
|
||||
define amdgpu_kernel void @non_invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
|
||||
entry:
|
||||
%arr = alloca [64 x i32], align 4
|
||||
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
%arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x
|
||||
%tmp15 = load i32, i32* %arrayidx5, align 4
|
||||
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
|
||||
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%idxprom = sext i32 %i.015 to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
|
||||
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
|
||||
%add = add nsw i32 %i.015, %tmp1
|
||||
%rem = srem i32 %add, 64
|
||||
%arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem
|
||||
store i32 %tmp16, i32* %arrayidx3, align 4
|
||||
%inc = add nuw nsw i32 %i.015, 1
|
||||
%exitcond = icmp eq i32 %inc, 100
|
||||
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
; Check that we unroll inner loop but not outer
|
||||
; CHECK-LABEL: @invariant_ind
|
||||
; CHECK: %[[exitcond:[^ ]+]] = icmp eq i32 %{{.*}}, 32
|
||||
; CHECK: br i1 %[[exitcond]]
|
||||
; CHECK-NOT: icmp eq i32 %{{.*}}, 100
|
||||
|
||||
define amdgpu_kernel void @invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
|
||||
entry:
|
||||
%arr = alloca [64 x i32], align 4
|
||||
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
br label %for.cond2.preheader
|
||||
|
||||
for.cond2.preheader: ; preds = %for.cond.cleanup5, %entry
|
||||
%i.026 = phi i32 [ 0, %entry ], [ %inc10, %for.cond.cleanup5 ]
|
||||
%idxprom = sext i32 %i.026 to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
|
||||
%tmp15 = load i32, i32 addrspace(1)* %arrayidx, align 4
|
||||
br label %for.body6
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond.cleanup5
|
||||
%arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x
|
||||
%tmp16 = load i32, i32* %arrayidx13, align 4
|
||||
%arrayidx15 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
|
||||
store i32 %tmp16, i32 addrspace(1)* %arrayidx15, align 4
|
||||
ret void
|
||||
|
||||
for.cond.cleanup5: ; preds = %for.body6
|
||||
%inc10 = add nuw nsw i32 %i.026, 1
|
||||
%exitcond27 = icmp eq i32 %inc10, 32
|
||||
br i1 %exitcond27, label %for.cond.cleanup, label %for.cond2.preheader
|
||||
|
||||
for.body6: ; preds = %for.body6, %for.cond2.preheader
|
||||
%j.025 = phi i32 [ 0, %for.cond2.preheader ], [ %inc, %for.body6 ]
|
||||
%add = add nsw i32 %j.025, %tmp1
|
||||
%rem = srem i32 %add, 64
|
||||
%arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem
|
||||
store i32 %tmp15, i32* %arrayidx8, align 4
|
||||
%inc = add nuw nsw i32 %j.025, 1
|
||||
%exitcond = icmp eq i32 %inc, 100
|
||||
br i1 %exitcond, label %for.cond.cleanup5, label %for.body6
|
||||
}
|
||||
|
||||
; Check we do not enforce unroll if alloca is too big
|
||||
; CHECK-LABEL: @too_big
|
||||
; CHECK: for.body:
|
||||
; CHECK: icmp eq i32 %{{.*}}, 100
|
||||
; CHECK: br
|
||||
|
||||
define amdgpu_kernel void @too_big(i32 addrspace(1)* nocapture %a, i32 %x) {
|
||||
entry:
|
||||
%arr = alloca [256 x i32], align 4
|
||||
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
%arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %x
|
||||
%tmp15 = load i32, i32* %arrayidx5, align 4
|
||||
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
|
||||
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%idxprom = sext i32 %i.015 to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
|
||||
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
|
||||
%add = add nsw i32 %i.015, %tmp1
|
||||
%rem = srem i32 %add, 64
|
||||
%arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %rem
|
||||
store i32 %tmp16, i32* %arrayidx3, align 4
|
||||
%inc = add nuw nsw i32 %i.015, 1
|
||||
%exitcond = icmp eq i32 %inc, 100
|
||||
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
; Check we do not enforce unroll if alloca is dynamic
|
||||
; CHECK-LABEL: @dynamic_size_alloca(
|
||||
; CHECK: alloca i32, i32 %n
|
||||
; CHECK: for.body:
|
||||
; CHECK: icmp eq i32 %{{.*}}, 100
|
||||
; CHECK: br
|
||||
|
||||
define amdgpu_kernel void @dynamic_size_alloca(i32 addrspace(1)* nocapture %a, i32 %n, i32 %x) {
|
||||
entry:
|
||||
%arr = alloca i32, i32 %n, align 4
|
||||
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %arr, i32 %x
|
||||
%tmp15 = load i32, i32* %arrayidx5, align 4
|
||||
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
|
||||
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%idxprom = sext i32 %i.015 to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
|
||||
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
|
||||
%add = add nsw i32 %i.015, %tmp1
|
||||
%rem = srem i32 %add, 64
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %arr, i32 %rem
|
||||
store i32 %tmp16, i32* %arrayidx3, align 4
|
||||
%inc = add nuw nsw i32 %i.015, 1
|
||||
%exitcond = icmp eq i32 %inc, 100
|
||||
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #1
|
||||
|
||||
attributes #1 = { nounwind readnone }
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user