You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
@ -1,215 +0,0 @@
|
||||
; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s
|
||||
|
||||
; Provide legal integer types.
|
||||
target datalayout = "n8:16:32:64"
|
||||
|
||||
|
||||
define void @foobar(i32 %n) nounwind {
|
||||
|
||||
; CHECK-LABEL: foobar(
|
||||
; CHECK: phi double
|
||||
|
||||
entry:
|
||||
%cond = icmp eq i32 %n, 0 ; <i1>:0 [#uses=2]
|
||||
br i1 %cond, label %return, label %bb.nph
|
||||
|
||||
bb.nph: ; preds = %entry
|
||||
%umax = select i1 %cond, i32 1, i32 %n ; <i32> [#uses=1]
|
||||
br label %bb
|
||||
|
||||
bb: ; preds = %bb, %bb.nph
|
||||
%i.03 = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
|
||||
tail call void @bar( i32 %i.03 ) nounwind
|
||||
%tmp1 = uitofp i32 %i.03 to double ; <double>:1 [#uses=1]
|
||||
tail call void @foo( double %tmp1 ) nounwind
|
||||
%indvar.next = add nsw nuw i32 %i.03, 1 ; <i32> [#uses=2]
|
||||
%exitcond = icmp eq i32 %indvar.next, %umax ; <i1> [#uses=1]
|
||||
br i1 %exitcond, label %return, label %bb
|
||||
|
||||
return: ; preds = %bb, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; Unable to eliminate cast because the mantissa bits for double are not enough
|
||||
; to hold all of i64 IV bits.
|
||||
define void @foobar2(i64 %n) nounwind {
|
||||
|
||||
; CHECK-LABEL: foobar2(
|
||||
; CHECK-NOT: phi double
|
||||
; CHECK-NOT: phi float
|
||||
|
||||
entry:
|
||||
%cond = icmp eq i64 %n, 0 ; <i1>:0 [#uses=2]
|
||||
br i1 %cond, label %return, label %bb.nph
|
||||
|
||||
bb.nph: ; preds = %entry
|
||||
%umax = select i1 %cond, i64 1, i64 %n ; <i64> [#uses=1]
|
||||
br label %bb
|
||||
|
||||
bb: ; preds = %bb, %bb.nph
|
||||
%i.03 = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=3]
|
||||
%tmp1 = trunc i64 %i.03 to i32 ; <i32>:1 [#uses=1]
|
||||
tail call void @bar( i32 %tmp1 ) nounwind
|
||||
%tmp2 = uitofp i64 %i.03 to double ; <double>:2 [#uses=1]
|
||||
tail call void @foo( double %tmp2 ) nounwind
|
||||
%indvar.next = add nsw nuw i64 %i.03, 1 ; <i64> [#uses=2]
|
||||
%exitcond = icmp eq i64 %indvar.next, %umax ; <i1> [#uses=1]
|
||||
br i1 %exitcond, label %return, label %bb
|
||||
|
||||
return: ; preds = %bb, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; Unable to eliminate cast due to potentional overflow.
|
||||
define void @foobar3() nounwind {
|
||||
|
||||
; CHECK-LABEL: foobar3(
|
||||
; CHECK-NOT: phi double
|
||||
; CHECK-NOT: phi float
|
||||
|
||||
entry:
|
||||
%tmp0 = tail call i32 (...) @nn( ) nounwind ; <i32>:0 [#uses=1]
|
||||
%cond = icmp eq i32 %tmp0, 0 ; <i1>:1 [#uses=1]
|
||||
br i1 %cond, label %return, label %bb
|
||||
|
||||
bb: ; preds = %bb, %entry
|
||||
%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
|
||||
tail call void @bar( i32 %i.03 ) nounwind
|
||||
%tmp2 = uitofp i32 %i.03 to double ; <double>:2 [#uses=1]
|
||||
tail call void @foo( double %tmp2 ) nounwind
|
||||
%indvar.next = add nuw nsw i32 %i.03, 1 ; <i32>:3 [#uses=2]
|
||||
%tmp4 = tail call i32 (...) @nn( ) nounwind ; <i32>:4 [#uses=1]
|
||||
%exitcond = icmp ugt i32 %tmp4, %indvar.next ; <i1>:5 [#uses=1]
|
||||
br i1 %exitcond, label %bb, label %return
|
||||
|
||||
return: ; preds = %bb, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; Unable to eliminate cast due to overflow.
|
||||
define void @foobar4() nounwind {
|
||||
|
||||
; CHECK-LABEL: foobar4(
|
||||
; CHECK-NOT: phi double
|
||||
; CHECK-NOT: phi float
|
||||
|
||||
entry:
|
||||
br label %bb.nph
|
||||
|
||||
bb.nph: ; preds = %entry
|
||||
br label %bb
|
||||
|
||||
bb: ; preds = %bb, %bb.nph
|
||||
%i.03 = phi i8 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
|
||||
%tmp2 = sext i8 %i.03 to i32 ; <i32>:0 [#uses=1]
|
||||
tail call void @bar( i32 %tmp2 ) nounwind
|
||||
%tmp3 = uitofp i8 %i.03 to double ; <double>:1 [#uses=1]
|
||||
tail call void @foo( double %tmp3 ) nounwind
|
||||
%indvar.next = add nsw nuw i8 %i.03, 1 ; <i32> [#uses=2]
|
||||
%tmp = sext i8 %indvar.next to i32
|
||||
%exitcond = icmp eq i32 %tmp, 32767 ; <i1> [#uses=1]
|
||||
br i1 %exitcond, label %return, label %bb
|
||||
|
||||
return: ; preds = %bb, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; Unable to eliminate cast because the integer IV overflows (accum exceeds
|
||||
; SINT_MAX).
|
||||
|
||||
define i32 @foobar5() {
|
||||
; CHECK-LABEL: foobar5(
|
||||
; CHECK-NOT: phi double
|
||||
; CHECK-NOT: phi float
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ]
|
||||
%iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ]
|
||||
%tmp1 = sitofp i32 %accum to double
|
||||
tail call void @foo( double %tmp1 ) nounwind
|
||||
%accum.next = add i32 %accum, 9597741
|
||||
%iv.next = add nuw nsw i32 %iv, 1
|
||||
%exitcond = icmp ugt i32 %iv, 235
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret i32 %accum.next
|
||||
}
|
||||
|
||||
; Can eliminate if we set nsw and, thus, think that we don't overflow SINT_MAX.
|
||||
|
||||
define i32 @foobar6() {
|
||||
; CHECK-LABEL: foobar6(
|
||||
; CHECK: phi double
|
||||
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ]
|
||||
%iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ]
|
||||
%tmp1 = sitofp i32 %accum to double
|
||||
tail call void @foo( double %tmp1 ) nounwind
|
||||
%accum.next = add nsw i32 %accum, 9597741
|
||||
%iv.next = add nuw nsw i32 %iv, 1
|
||||
%exitcond = icmp ugt i32 %iv, 235
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret i32 %accum.next
|
||||
}
|
||||
|
||||
; Unable to eliminate cast because the integer IV overflows (accum exceeds
|
||||
; UINT_MAX).
|
||||
|
||||
define i32 @foobar7() {
|
||||
; CHECK-LABEL: foobar7(
|
||||
; CHECK-NOT: phi double
|
||||
; CHECK-NOT: phi float
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ]
|
||||
%iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ]
|
||||
%tmp1 = uitofp i32 %accum to double
|
||||
tail call void @foo( double %tmp1 ) nounwind
|
||||
%accum.next = add i32 %accum, 9597741
|
||||
%iv.next = add nuw nsw i32 %iv, 1
|
||||
%exitcond = icmp ugt i32 %iv, 235
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret i32 %accum.next
|
||||
}
|
||||
|
||||
; Can eliminate if we set nuw and, thus, think that we don't overflow UINT_MAX.
|
||||
|
||||
define i32 @foobar8() {
|
||||
; CHECK-LABEL: foobar8(
|
||||
; CHECK: phi double
|
||||
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ]
|
||||
%iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ]
|
||||
%tmp1 = uitofp i32 %accum to double
|
||||
tail call void @foo( double %tmp1 ) nounwind
|
||||
%accum.next = add nuw i32 %accum, 9597741
|
||||
%iv.next = add nuw nsw i32 %iv, 1
|
||||
%exitcond = icmp ugt i32 %iv, 235
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret i32 %accum.next
|
||||
}
|
||||
|
||||
declare void @bar(i32)
|
||||
|
||||
declare void @foo(double)
|
||||
|
||||
declare i32 @nn(...)
|
@ -1,130 +0,0 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin11
|
||||
|
||||
define void @_ZN4llvm20SelectionDAGLowering14visitInlineAsmENS_8CallSiteE() nounwind ssp align 2 {
|
||||
entry:
|
||||
br i1 undef, label %bb3.i, label %bb4.i
|
||||
|
||||
bb3.i: ; preds = %entry
|
||||
unreachable
|
||||
|
||||
bb4.i: ; preds = %entry
|
||||
br i1 undef, label %bb.i.i, label %_ZNK4llvm8CallSite14getCalledValueEv.exit
|
||||
|
||||
bb.i.i: ; preds = %bb4.i
|
||||
unreachable
|
||||
|
||||
_ZNK4llvm8CallSite14getCalledValueEv.exit: ; preds = %bb4.i
|
||||
br i1 undef, label %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit, label %bb6.i
|
||||
|
||||
bb6.i: ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit
|
||||
unreachable
|
||||
|
||||
_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit
|
||||
br i1 undef, label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit, label %bb.i
|
||||
|
||||
bb.i: ; preds = %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
|
||||
br label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
|
||||
|
||||
_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit: ; preds = %bb.i, %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
|
||||
br i1 undef, label %bb50, label %bb27
|
||||
|
||||
bb27: ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
|
||||
br i1 undef, label %bb1.i727, label %bb.i.i726
|
||||
|
||||
bb.i.i726: ; preds = %bb27
|
||||
unreachable
|
||||
|
||||
bb1.i727: ; preds = %bb27
|
||||
unreachable
|
||||
|
||||
bb50: ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
|
||||
br label %bb107
|
||||
|
||||
bb51: ; preds = %bb107
|
||||
br i1 undef, label %bb105, label %bb106
|
||||
|
||||
bb105: ; preds = %bb51
|
||||
unreachable
|
||||
|
||||
bb106: ; preds = %bb51
|
||||
br label %bb107
|
||||
|
||||
bb107: ; preds = %bb106, %bb50
|
||||
br i1 undef, label %bb108, label %bb51
|
||||
|
||||
bb108: ; preds = %bb107
|
||||
br i1 undef, label %bb242, label %bb114
|
||||
|
||||
bb114: ; preds = %bb108
|
||||
br i1 undef, label %bb141, label %bb116
|
||||
|
||||
bb116: ; preds = %bb114
|
||||
br i1 undef, label %bb120, label %bb121
|
||||
|
||||
bb120: ; preds = %bb116
|
||||
unreachable
|
||||
|
||||
bb121: ; preds = %bb116
|
||||
unreachable
|
||||
|
||||
bb141: ; preds = %bb114
|
||||
br i1 undef, label %bb182, label %bb143
|
||||
|
||||
bb143: ; preds = %bb141
|
||||
br label %bb157
|
||||
|
||||
bb144: ; preds = %bb.i.i.i843
|
||||
switch i32 undef, label %bb155 [
|
||||
i32 2, label %bb153
|
||||
i32 6, label %bb153
|
||||
i32 4, label %bb153
|
||||
]
|
||||
|
||||
bb153: ; preds = %bb144, %bb144, %bb144
|
||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
||||
br label %bb157
|
||||
|
||||
bb155: ; preds = %bb144
|
||||
unreachable
|
||||
|
||||
bb157: ; preds = %bb153, %bb143
|
||||
%indvar = phi i32 [ %indvar.next, %bb153 ], [ 0, %bb143 ] ; <i32> [#uses=2]
|
||||
%0 = icmp eq i32 undef, %indvar ; <i1> [#uses=1]
|
||||
switch i16 undef, label %bb6.i841 [
|
||||
i16 9, label %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
|
||||
i16 26, label %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
|
||||
]
|
||||
|
||||
bb6.i841: ; preds = %bb157
|
||||
unreachable
|
||||
|
||||
_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit: ; preds = %bb157, %bb157
|
||||
br i1 undef, label %bb.i.i.i843, label %bb1.i.i.i844
|
||||
|
||||
bb.i.i.i843: ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
|
||||
br i1 %0, label %bb158, label %bb144
|
||||
|
||||
bb1.i.i.i844: ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
|
||||
unreachable
|
||||
|
||||
bb158: ; preds = %bb.i.i.i843
|
||||
br i1 undef, label %bb177, label %bb176
|
||||
|
||||
bb176: ; preds = %bb158
|
||||
unreachable
|
||||
|
||||
bb177: ; preds = %bb158
|
||||
br i1 undef, label %bb179, label %bb178
|
||||
|
||||
bb178: ; preds = %bb177
|
||||
unreachable
|
||||
|
||||
bb179: ; preds = %bb177
|
||||
unreachable
|
||||
|
||||
bb182: ; preds = %bb141
|
||||
unreachable
|
||||
|
||||
bb242: ; preds = %bb108
|
||||
unreachable
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | FileCheck %s
|
||||
;
|
||||
; Test LSR's OptimizeShadowIV. Handle a floating-point IV with a
|
||||
; nonzero initial value.
|
||||
; rdar://9786536
|
||||
|
||||
; Provide legal integer types.
|
||||
target datalayout = "n8:16:32:64"
|
||||
|
||||
|
||||
; First, make sure LSR doesn't crash on an empty IVUsers list.
|
||||
; CHECK-LABEL: @dummyIV(
|
||||
; CHECK-NOT: phi
|
||||
; CHECK-NOT: sitofp
|
||||
; CHECK: br
|
||||
define void @dummyIV() nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
|
||||
%conv = sitofp i32 %i.01 to double
|
||||
%inc = add nsw i32 %i.01, 1
|
||||
br i1 undef, label %loop, label %for.end
|
||||
|
||||
for.end:
|
||||
unreachable
|
||||
}
|
||||
|
||||
; Now check that the computed double constant is correct.
|
||||
; CHECK-LABEL: @doubleIV(
|
||||
; CHECK: phi double [ -3.900000e+01, %entry ]
|
||||
; CHECK: br
|
||||
define void @doubleIV() nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
|
||||
%conv = sitofp i32 %i.01 to double
|
||||
%div = fdiv double %conv, 4.000000e+01
|
||||
%inc = add nsw i32 %i.01, 1
|
||||
br i1 undef, label %loop, label %for.end
|
||||
|
||||
for.end:
|
||||
unreachable
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
;
|
||||
; PR11431: handle a phi operand that is replaced by a postinc user.
|
||||
; LSR first expands %t3 to %t2 in %phi
|
||||
; LSR then expands %t2 in %phi into two decrements, one on each loop exit.
|
||||
|
||||
target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
declare i1 @check() nounwind
|
||||
|
||||
; Check that LSR did something close to the behavior at the time of the bug.
|
||||
; CHECK: @sqlite3DropTriggerPtr
|
||||
; CHECK: incq %r{{[a-d]}}x
|
||||
; CHECK: jne
|
||||
; CHECK: decq %r{{[a-d]}}x
|
||||
; CHECK: ret
|
||||
define i64 @sqlite3DropTriggerPtr() nounwind {
|
||||
bb:
|
||||
%cmp = call zeroext i1 @check()
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb4, %bb
|
||||
%t0 = phi i64 [ 0, %bb ], [ %t3, %bb4 ]
|
||||
%t2 = phi i64 [ 1, %bb ], [ %t5, %bb4 ]
|
||||
%t3 = add nsw i64 %t0, 1
|
||||
br i1 %cmp, label %bb4, label %bb8
|
||||
|
||||
bb4: ; preds = %bb1
|
||||
%t5 = add nsw i64 %t2, 1
|
||||
br i1 %cmp, label %bb1, label %bb8
|
||||
|
||||
bb8: ; preds = %bb8, %bb4
|
||||
%phi = phi i64 [ %t3, %bb1 ], [ %t2, %bb4 ]
|
||||
ret i64 %phi
|
||||
}
|
@ -1,93 +0,0 @@
|
||||
; RUN: opt < %s -loop-reduce -S | FileCheck %s
|
||||
;
|
||||
; Test LSR's ability to prune formulae that refer to nonexistent
|
||||
; AddRecs in other loops.
|
||||
;
|
||||
; Unable to reduce this case further because it requires LSR to exceed
|
||||
; ComplexityLimit.
|
||||
;
|
||||
; We really just want to ensure that LSR can process this loop without
|
||||
; finding an unsatisfactory solution and bailing out. I've added
|
||||
; dummyout, an obvious candidate for postinc replacement so we can
|
||||
; verify that LSR removes it.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-darwin"
|
||||
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK: for.body:
|
||||
; CHECK: %lsr.iv
|
||||
; CHECK-NOT: %dummyout
|
||||
; CHECK: ret
|
||||
define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
|
||||
entry:
|
||||
%cmp34 = icmp eq i64 %count, 0
|
||||
br i1 %cmp34, label %for.end29, label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%dummyiv = phi i64 [ %dummycnt, %for.body ], [ 0, %entry ]
|
||||
%indvars.iv39 = phi i64 [ %indvars.iv.next40, %for.body ], [ 0, %entry ]
|
||||
%dp.036 = phi i32* [ %add.ptr, %for.body ], [ %destrow, %entry ]
|
||||
%p.035 = phi float* [ %incdec.ptr4, %for.body ], [ %srcrow, %entry ]
|
||||
%incdec.ptr = getelementptr inbounds float, float* %p.035, i64 1
|
||||
%0 = load float, float* %incdec.ptr, align 4
|
||||
%incdec.ptr2 = getelementptr inbounds float, float* %p.035, i64 2
|
||||
%1 = load float, float* %incdec.ptr2, align 4
|
||||
%incdec.ptr3 = getelementptr inbounds float, float* %p.035, i64 3
|
||||
%2 = load float, float* %incdec.ptr3, align 4
|
||||
%incdec.ptr4 = getelementptr inbounds float, float* %p.035, i64 4
|
||||
%3 = load float, float* %incdec.ptr4, align 4
|
||||
%4 = load i32, i32* %dp.036, align 4
|
||||
%conv5 = fptoui float %0 to i32
|
||||
%or = or i32 %4, %conv5
|
||||
%arrayidx6 = getelementptr inbounds i32, i32* %dp.036, i64 1
|
||||
%5 = load i32, i32* %arrayidx6, align 4
|
||||
%conv7 = fptoui float %1 to i32
|
||||
%or8 = or i32 %5, %conv7
|
||||
%arrayidx9 = getelementptr inbounds i32, i32* %dp.036, i64 2
|
||||
%6 = load i32, i32* %arrayidx9, align 4
|
||||
%conv10 = fptoui float %2 to i32
|
||||
%or11 = or i32 %6, %conv10
|
||||
%arrayidx12 = getelementptr inbounds i32, i32* %dp.036, i64 3
|
||||
%7 = load i32, i32* %arrayidx12, align 4
|
||||
%conv13 = fptoui float %3 to i32
|
||||
%or14 = or i32 %7, %conv13
|
||||
store i32 %or, i32* %dp.036, align 4
|
||||
store i32 %or8, i32* %arrayidx6, align 4
|
||||
store i32 %or11, i32* %arrayidx9, align 4
|
||||
store i32 %or14, i32* %arrayidx12, align 4
|
||||
%add.ptr = getelementptr inbounds i32, i32* %dp.036, i64 4
|
||||
%indvars.iv.next40 = add i64 %indvars.iv39, 4
|
||||
%dummycnt = add i64 %dummyiv, 1
|
||||
%cmp = icmp ult i64 %indvars.iv.next40, %count
|
||||
br i1 %cmp, label %for.body, label %for.cond19.preheader
|
||||
|
||||
for.cond19.preheader: ; preds = %for.body
|
||||
%dummyout = add i64 %dummyiv, 1
|
||||
%rem = and i64 %count, 3
|
||||
%cmp2130 = icmp eq i64 %rem, 0
|
||||
br i1 %cmp2130, label %for.end29, label %for.body23.lr.ph
|
||||
|
||||
for.body23.lr.ph: ; preds = %for.cond19.preheader
|
||||
%8 = and i64 %count, 3
|
||||
br label %for.body23
|
||||
|
||||
for.body23: ; preds = %for.body23, %for.body23.lr.ph
|
||||
%indvars.iv = phi i64 [ 0, %for.body23.lr.ph ], [ %indvars.iv.next, %for.body23 ]
|
||||
%dp.132 = phi i32* [ %add.ptr, %for.body23.lr.ph ], [ %incdec.ptr28, %for.body23 ]
|
||||
%p.131 = phi float* [ %incdec.ptr4, %for.body23.lr.ph ], [ %incdec.ptr24, %for.body23 ]
|
||||
%incdec.ptr24 = getelementptr inbounds float, float* %p.131, i64 1
|
||||
%9 = load float, float* %incdec.ptr24, align 4
|
||||
%10 = load i32, i32* %dp.132, align 4
|
||||
%conv25 = fptoui float %9 to i32
|
||||
%or26 = or i32 %10, %conv25
|
||||
store i32 %or26, i32* %dp.132, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%incdec.ptr28 = getelementptr inbounds i32, i32* %dp.132, i64 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, %8
|
||||
br i1 %exitcond, label %for.end29, label %for.body23
|
||||
|
||||
for.end29: ; preds = %entry, %for.body23, %for.cond19.preheader
|
||||
%result = phi i64 [ 0, %entry ], [ %dummyout, %for.body23 ], [ %dummyout, %for.cond19.preheader ]
|
||||
ret i64 %result
|
||||
}
|
@ -1,147 +0,0 @@
|
||||
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s
|
||||
|
||||
declare i1 @check() nounwind
|
||||
declare i1 @foo(i8*, i8*, i8*) nounwind
|
||||
|
||||
; Check that redundant phi elimination ran
|
||||
; CHECK: @test
|
||||
; CHECK: %while.body.i
|
||||
; CHECK: movs
|
||||
; CHECK-NOT: movs
|
||||
; CHECK: %for.end.i
|
||||
define i32 @test(i8* %base) nounwind uwtable ssp {
|
||||
entry:
|
||||
br label %while.body.lr.ph.i
|
||||
|
||||
while.body.lr.ph.i: ; preds = %cond.true.i
|
||||
br label %while.body.i
|
||||
|
||||
while.body.i: ; preds = %cond.true29.i, %while.body.lr.ph.i
|
||||
%indvars.iv7.i = phi i64 [ 16, %while.body.lr.ph.i ], [ %indvars.iv.next8.i, %cond.true29.i ]
|
||||
%i.05.i = phi i64 [ 0, %while.body.lr.ph.i ], [ %indvars.iv7.i, %cond.true29.i ]
|
||||
%sext.i = shl i64 %i.05.i, 32
|
||||
%idx.ext.i = ashr exact i64 %sext.i, 32
|
||||
%add.ptr.sum.i = add i64 %idx.ext.i, 16
|
||||
br label %for.body.i
|
||||
|
||||
for.body.i: ; preds = %for.body.i, %while.body.i
|
||||
%indvars.iv.i = phi i64 [ 0, %while.body.i ], [ %indvars.iv.next.i, %for.body.i ]
|
||||
%add.ptr.sum = add i64 %add.ptr.sum.i, %indvars.iv.i
|
||||
%arrayidx22.i = getelementptr inbounds i8, i8* %base, i64 %add.ptr.sum
|
||||
%0 = load i8, i8* %arrayidx22.i, align 1
|
||||
%indvars.iv.next.i = add i64 %indvars.iv.i, 1
|
||||
%cmp = call i1 @check() nounwind
|
||||
br i1 %cmp, label %for.end.i, label %for.body.i
|
||||
|
||||
for.end.i: ; preds = %for.body.i
|
||||
%add.ptr.i144 = getelementptr inbounds i8, i8* %base, i64 %add.ptr.sum.i
|
||||
%cmp2 = tail call i1 @foo(i8* %add.ptr.i144, i8* %add.ptr.i144, i8* undef) nounwind
|
||||
br i1 %cmp2, label %cond.true29.i, label %cond.false35.i
|
||||
|
||||
cond.true29.i: ; preds = %for.end.i
|
||||
%indvars.iv.next8.i = add i64 %indvars.iv7.i, 16
|
||||
br i1 false, label %exit, label %while.body.i
|
||||
|
||||
cond.false35.i: ; preds = %for.end.i
|
||||
unreachable
|
||||
|
||||
exit: ; preds = %cond.true29.i, %cond.true.i
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
%struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771 = type { i32, i32, i32 }
|
||||
|
||||
@tags = external global [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], align 16
|
||||
|
||||
; PR11782: SCEVExpander assert
|
||||
;
|
||||
; Test phi reuse after LSR that requires SCEVExpander to hoist an
|
||||
; interesting GEP.
|
||||
;
|
||||
; CHECK: @test2
|
||||
; CHECK: %entry
|
||||
; CHECK-NOT: mov
|
||||
; CHECK: je
|
||||
define void @test2(i32 %n) nounwind uwtable {
|
||||
entry:
|
||||
br i1 undef, label %while.end, label %for.cond468
|
||||
|
||||
for.cond468: ; preds = %if.then477, %entry
|
||||
%indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ]
|
||||
%k.0.in = phi i32* [ %last, %if.then477 ], [ getelementptr inbounds ([5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 0, i32 2), %entry ]
|
||||
%k.0 = load i32, i32* %k.0.in, align 4
|
||||
%0 = trunc i64 %indvars.iv1163 to i32
|
||||
%cmp469 = icmp slt i32 %0, %n
|
||||
br i1 %cmp469, label %for.body471, label %for.inc498
|
||||
|
||||
for.body471: ; preds = %for.cond468
|
||||
%first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 1
|
||||
%1 = load i32, i32* %first, align 4
|
||||
br i1 undef, label %if.then477, label %for.inc498
|
||||
|
||||
if.then477: ; preds = %for.body471
|
||||
%last = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 2
|
||||
%indvars.iv.next1164 = add i64 %indvars.iv1163, 1
|
||||
br label %for.cond468
|
||||
|
||||
for.inc498: ; preds = %for.inc498, %for.body471, %for.cond468
|
||||
br label %for.inc498
|
||||
|
||||
while.end: ; preds = %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; PR12898: SCEVExpander crash
|
||||
; Test redundant phi elimination when the deleted phi's increment is
|
||||
; itself a phi.
|
||||
;
|
||||
; CHECK: @test3
|
||||
; CHECK: %for.body3.lr.ph.us.i.loopexit
|
||||
; CHECK-NEXT: Parent Loop
|
||||
; CHECK-NEXT: Inner Loop
|
||||
; CHECK-NEXT: incq
|
||||
; CHECK: testb
|
||||
; CHECK: je
|
||||
; CHECK: jmp
|
||||
define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp {
|
||||
entry:
|
||||
br i1 undef, label %meshBB1, label %meshBB5
|
||||
|
||||
for.inc8.us.i: ; preds = %for.body3.us.i
|
||||
br i1 undef, label %meshBB1, label %meshBB
|
||||
|
||||
for.body3.us.i: ; preds = %meshBB, %for.body3.lr.ph.us.i
|
||||
%indvars.iv.i.SV.phi = phi i64 [ %indvars.iv.next.i, %meshBB ], [ 0, %for.body3.lr.ph.us.i ]
|
||||
%storemerge13.us.i.SV.phi = phi i32 [ 0, %meshBB ], [ 0, %for.body3.lr.ph.us.i ]
|
||||
%Opq.sa.calc12 = sub i32 undef, 227
|
||||
%0 = add nsw i64 %indvars.iv.i.SV.phi, %indvars.iv8.i.SV.phi26
|
||||
%1 = trunc i64 %0 to i32
|
||||
%mul.i.us.i = mul nsw i32 0, %1
|
||||
%arrayidx5.us.i = getelementptr inbounds double, double* %u, i64 %indvars.iv.i.SV.phi
|
||||
%2 = load double, double* %arrayidx5.us.i, align 8
|
||||
%indvars.iv.next.i = add i64 %indvars.iv.i.SV.phi, 1
|
||||
br i1 undef, label %for.inc8.us.i, label %meshBB
|
||||
|
||||
for.body3.lr.ph.us.i: ; preds = %meshBB1, %meshBB
|
||||
%indvars.iv8.i.SV.phi26 = phi i64 [ undef, %meshBB1 ], [ %indvars.iv8.i.SV.phi24, %meshBB ]
|
||||
%arrayidx.us.i = getelementptr inbounds double, double* undef, i64 %indvars.iv8.i.SV.phi26
|
||||
%3 = add i64 %indvars.iv8.i.SV.phi26, 1
|
||||
br label %for.body3.us.i
|
||||
|
||||
for.inc8.us.i2: ; preds = %meshBB5
|
||||
unreachable
|
||||
|
||||
eval_At_times_u.exit: ; preds = %meshBB5
|
||||
ret void
|
||||
|
||||
meshBB: ; preds = %for.body3.us.i, %for.inc8.us.i
|
||||
%indvars.iv8.i.SV.phi24 = phi i64 [ undef, %for.body3.us.i ], [ %3, %for.inc8.us.i ]
|
||||
%meshStackVariable.phi = phi i32 [ %Opq.sa.calc12, %for.body3.us.i ], [ undef, %for.inc8.us.i ]
|
||||
br i1 undef, label %for.body3.lr.ph.us.i, label %for.body3.us.i
|
||||
|
||||
meshBB1: ; preds = %for.inc8.us.i, %entry
|
||||
br label %for.body3.lr.ph.us.i
|
||||
|
||||
meshBB5: ; preds = %entry
|
||||
br i1 undef, label %eval_At_times_u.exit, label %for.inc8.us.i2
|
||||
}
|
@ -1,264 +0,0 @@
|
||||
; RUN: opt < %s -loop-reduce -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Show that the b^2 is expanded correctly.
|
||||
define i32 @test_01(i32 %a) {
|
||||
; CHECK-LABEL: @test_01
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
|
||||
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
|
||||
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
|
||||
; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
|
||||
; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B2]], -1
|
||||
; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
|
||||
; CHECK-NEXT: ret i32 [[R2]]
|
||||
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop: ; preds = %loop, %entry
|
||||
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
|
||||
%b = add i32 %a, 1
|
||||
%b.pow.2 = mul i32 %b, %b
|
||||
%result = add i32 %b.pow.2, %indvars.iv
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, 80
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
; Show that b^8 is expanded correctly.
|
||||
define i32 @test_02(i32 %a) {
|
||||
; CHECK-LABEL: @test_02
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
|
||||
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
|
||||
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
|
||||
; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
|
||||
; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
|
||||
; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
|
||||
; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B8]], -1
|
||||
; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
|
||||
; CHECK-NEXT: ret i32 [[R2]]
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop: ; preds = %loop, %entry
|
||||
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
|
||||
%b = add i32 %a, 1
|
||||
%b.pow.2 = mul i32 %b, %b
|
||||
%b.pow.4 = mul i32 %b.pow.2, %b.pow.2
|
||||
%b.pow.8 = mul i32 %b.pow.4, %b.pow.4
|
||||
%result = add i32 %b.pow.8, %indvars.iv
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, 80
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
; Show that b^27 (27 = 1 + 2 + 8 + 16) is expanded correctly.
|
||||
define i32 @test_03(i32 %a) {
|
||||
; CHECK-LABEL: @test_03
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
|
||||
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
|
||||
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
|
||||
; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
|
||||
; CHECK-NEXT: [[B3:[^ ]+]] = mul i32 [[B]], [[B2]]
|
||||
; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
|
||||
; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
|
||||
; CHECK-NEXT: [[B11:[^ ]+]] = mul i32 [[B3]], [[B8]]
|
||||
; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]]
|
||||
; CHECK-NEXT: [[B27:[^ ]+]] = mul i32 [[B11]], [[B16]]
|
||||
; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B27]], -1
|
||||
; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
|
||||
; CHECK-NEXT: ret i32 [[R2]]
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop: ; preds = %loop, %entry
|
||||
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
|
||||
%b = add i32 %a, 1
|
||||
%b.pow.2 = mul i32 %b, %b
|
||||
%b.pow.4 = mul i32 %b.pow.2, %b.pow.2
|
||||
%b.pow.8 = mul i32 %b.pow.4, %b.pow.4
|
||||
%b.pow.16 = mul i32 %b.pow.8, %b.pow.8
|
||||
%b.pow.24 = mul i32 %b.pow.16, %b.pow.8
|
||||
%b.pow.25 = mul i32 %b.pow.24, %b
|
||||
%b.pow.26 = mul i32 %b.pow.25, %b
|
||||
%b.pow.27 = mul i32 %b.pow.26, %b
|
||||
%result = add i32 %b.pow.27, %indvars.iv
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, 80
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
; Show how linear calculation of b^16 is turned into logarithmic.
|
||||
define i32 @test_04(i32 %a) {
|
||||
; CHECK-LABEL: @test_04
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
|
||||
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
|
||||
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
|
||||
; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
|
||||
; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
|
||||
; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
|
||||
; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]]
|
||||
; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B16]], -1
|
||||
; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
|
||||
; CHECK-NEXT: ret i32 [[R2]]
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop: ; preds = %loop, %entry
|
||||
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
|
||||
%b = add i32 %a, 1
|
||||
%b.pow.2 = mul i32 %b, %b
|
||||
%b.pow.3 = mul i32 %b.pow.2, %b
|
||||
%b.pow.4 = mul i32 %b.pow.3, %b
|
||||
%b.pow.5 = mul i32 %b.pow.4, %b
|
||||
%b.pow.6 = mul i32 %b.pow.5, %b
|
||||
%b.pow.7 = mul i32 %b.pow.6, %b
|
||||
%b.pow.8 = mul i32 %b.pow.7, %b
|
||||
%b.pow.9 = mul i32 %b.pow.8, %b
|
||||
%b.pow.10 = mul i32 %b.pow.9, %b
|
||||
%b.pow.11 = mul i32 %b.pow.10, %b
|
||||
%b.pow.12 = mul i32 %b.pow.11, %b
|
||||
%b.pow.13 = mul i32 %b.pow.12, %b
|
||||
%b.pow.14 = mul i32 %b.pow.13, %b
|
||||
%b.pow.15 = mul i32 %b.pow.14, %b
|
||||
%b.pow.16 = mul i32 %b.pow.15, %b
|
||||
%result = add i32 %b.pow.16, %indvars.iv
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, 80
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
; The output here is reasonably big, we just check that the amount of expanded
|
||||
; instructions is sane.
|
||||
define i32 @test_05(i32 %a) {
|
||||
; CHECK-LABEL: @test_05
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
|
||||
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
|
||||
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
|
||||
; CHECK: exit:
|
||||
; CHECK: %100
|
||||
; CHECK-NOT: %150
|
||||
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop: ; preds = %loop, %entry
|
||||
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
|
||||
%tmp3 = add i32 %a, 1
|
||||
%tmp4 = mul i32 %tmp3, %tmp3
|
||||
%tmp5 = mul i32 %tmp4, %tmp4
|
||||
%tmp6 = mul i32 %tmp5, %tmp5
|
||||
%tmp7 = mul i32 %tmp6, %tmp6
|
||||
%tmp8 = mul i32 %tmp7, %tmp7
|
||||
%tmp9 = mul i32 %tmp8, %tmp8
|
||||
%tmp10 = mul i32 %tmp9, %tmp9
|
||||
%tmp11 = mul i32 %tmp10, %tmp10
|
||||
%tmp12 = mul i32 %tmp11, %tmp11
|
||||
%tmp13 = mul i32 %tmp12, %tmp12
|
||||
%tmp14 = mul i32 %tmp13, %tmp13
|
||||
%tmp15 = mul i32 %tmp14, %tmp14
|
||||
%tmp16 = mul i32 %tmp15, %tmp15
|
||||
%tmp17 = mul i32 %tmp16, %tmp16
|
||||
%tmp18 = mul i32 %tmp17, %tmp17
|
||||
%tmp19 = mul i32 %tmp18, %tmp18
|
||||
%tmp20 = mul i32 %tmp19, %tmp19
|
||||
%tmp22 = add i32 %tmp20, %indvars.iv
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, 80
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret i32 %tmp22
|
||||
}
|
||||
|
||||
; Show that the transformation works even if the calculation involves different
|
||||
; values inside.
|
||||
define i32 @test_06(i32 %a, i32 %c) {
|
||||
; CHECK-LABEL: @test_06
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
|
||||
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
|
||||
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
|
||||
; CHECK: exit:
|
||||
; CHECK: [[B:[^ ]+]] = add i32 %a, 1
|
||||
; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
|
||||
; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
|
||||
; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
|
||||
; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]]
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop: ; preds = %loop, %entry
|
||||
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
|
||||
%b = add i32 %a, 1
|
||||
%b.pow.2.tmp = mul i32 %b, %b
|
||||
%b.pow.2 = mul i32 %b.pow.2.tmp, %c
|
||||
%b.pow.3 = mul i32 %b.pow.2, %b
|
||||
%b.pow.4 = mul i32 %b.pow.3, %b
|
||||
%b.pow.5 = mul i32 %b.pow.4, %b
|
||||
%b.pow.6.tmp = mul i32 %b.pow.5, %b
|
||||
%b.pow.6 = mul i32 %b.pow.6.tmp, %c
|
||||
%b.pow.7 = mul i32 %b.pow.6, %b
|
||||
%b.pow.8 = mul i32 %b.pow.7, %b
|
||||
%b.pow.9 = mul i32 %b.pow.8, %b
|
||||
%b.pow.10 = mul i32 %b.pow.9, %b
|
||||
%b.pow.11 = mul i32 %b.pow.10, %b
|
||||
%b.pow.12.tmp = mul i32 %b.pow.11, %b
|
||||
%b.pow.12 = mul i32 %c, %b.pow.12.tmp
|
||||
%b.pow.13 = mul i32 %b.pow.12, %b
|
||||
%b.pow.14 = mul i32 %b.pow.13, %b
|
||||
%b.pow.15 = mul i32 %b.pow.14, %b
|
||||
%b.pow.16 = mul i32 %b.pow.15, %b
|
||||
%result = add i32 %b.pow.16, %indvars.iv
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, 80
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret i32 %result
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -S < %s
|
||||
; PR33077. Check the LSR Use formula to be inserted is already canonicalized and
|
||||
; will not trigger assertion.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; Function Attrs: uwtable
|
||||
define void @foo() {
|
||||
cHeapLvb.exit:
|
||||
br label %not_zero48.us
|
||||
|
||||
not_zero48.us: ; preds = %not_zero48.us, %cHeapLvb.exit
|
||||
%indvars.iv.us = phi i64 [ %indvars.iv.next.us.7, %not_zero48.us ], [ undef, %cHeapLvb.exit ]
|
||||
%0 = phi i32 [ %13, %not_zero48.us ], [ undef, %cHeapLvb.exit ]
|
||||
%indvars.iv.next.us = add nuw nsw i64 %indvars.iv.us, 1
|
||||
%1 = add i32 %0, 2
|
||||
%2 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %indvars.iv.next.us
|
||||
%3 = load i32, i32 addrspace(1)* %2, align 4
|
||||
%4 = add i32 %0, 3
|
||||
%5 = load i32, i32 addrspace(1)* undef, align 4
|
||||
%6 = sub i32 undef, %5
|
||||
%factor.us.2 = shl i32 %6, 1
|
||||
%7 = add i32 %factor.us.2, %1
|
||||
%8 = load i32, i32 addrspace(1)* undef, align 4
|
||||
%9 = sub i32 %7, %8
|
||||
%factor.us.3 = shl i32 %9, 1
|
||||
%10 = add i32 %factor.us.3, %4
|
||||
%11 = load i32, i32 addrspace(1)* undef, align 4
|
||||
%12 = sub i32 %10, %11
|
||||
%factor.us.4 = shl i32 %12, 1
|
||||
%13 = add i32 %0, 8
|
||||
%indvars.iv.next.us.7 = add nsw i64 %indvars.iv.us, 8
|
||||
br label %not_zero48.us
|
||||
}
|
||||
|
@ -1,65 +0,0 @@
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -lsr-insns-cost=false -S < %s | FileCheck %s
|
||||
; Check LSR formula canonicalization will put loop invariant regs before
|
||||
; induction variable of current loop, so exprs involving loop invariant regs
|
||||
; can be promoted outside of current loop.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @foo(i32 %size, i32 %nsteps, i8* nocapture %maxarray, i8* nocapture readnone %buffer, i32 %init) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%cmp25 = icmp sgt i32 %nsteps, 0
|
||||
br i1 %cmp25, label %for.cond1.preheader.lr.ph, label %for.end12
|
||||
|
||||
for.cond1.preheader.lr.ph: ; preds = %entry
|
||||
%cmp223 = icmp sgt i32 %size, 1
|
||||
%t0 = sext i32 %init to i64
|
||||
%wide.trip.count = zext i32 %size to i64
|
||||
%wide.trip.count31 = zext i32 %nsteps to i64
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader: ; preds = %for.inc10, %for.cond1.preheader.lr.ph
|
||||
%indvars.iv28 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next29, %for.inc10 ]
|
||||
br i1 %cmp223, label %for.body3.lr.ph, label %for.inc10
|
||||
|
||||
for.body3.lr.ph: ; preds = %for.cond1.preheader
|
||||
%t1 = add nsw i64 %indvars.iv28, %t0
|
||||
%t2 = trunc i64 %indvars.iv28 to i8
|
||||
br label %for.body3
|
||||
|
||||
; Make sure loop invariant items are grouped together so that load address can
|
||||
; be represented in one getelementptr.
|
||||
; CHECK-LABEL: for.body3:
|
||||
; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ 1, %for.body3.lr.ph ], [ {{.*}}, %for.body3 ]
|
||||
; CHECK-NOT: = phi i64
|
||||
; CHECK-NEXT: [[LOADADDR:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
|
||||
; CHECK-NEXT: = load i8, i8* [[LOADADDR]], align 1
|
||||
; CHECK: br i1 %exitcond, label %for.inc10.loopexit, label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.body3, %for.body3.lr.ph
|
||||
%indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
|
||||
%t5 = trunc i64 %indvars.iv to i8
|
||||
%t3 = add nsw i64 %t1, %indvars.iv
|
||||
%arrayidx = getelementptr inbounds i8, i8* %maxarray, i64 %t3
|
||||
%t4 = load i8, i8* %arrayidx, align 1
|
||||
%add5 = add i8 %t4, %t5
|
||||
%add6 = add i8 %add5, %t2
|
||||
%arrayidx9 = getelementptr inbounds i8, i8* %maxarray, i64 %indvars.iv
|
||||
store i8 %add6, i8* %arrayidx9, align 1
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond, label %for.inc10.loopexit, label %for.body3
|
||||
|
||||
for.inc10.loopexit: ; preds = %for.body3
|
||||
br label %for.inc10
|
||||
|
||||
for.inc10: ; preds = %for.inc10.loopexit, %for.cond1.preheader
|
||||
%indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1
|
||||
%exitcond32 = icmp eq i64 %indvars.iv.next29, %wide.trip.count31
|
||||
br i1 %exitcond32, label %for.end12.loopexit, label %for.cond1.preheader
|
||||
|
||||
for.end12.loopexit: ; preds = %for.inc10
|
||||
br label %for.end12
|
||||
|
||||
for.end12: ; preds = %for.end12.loopexit, %entry
|
||||
ret void
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
; RUN: opt -S -loop-reduce < %s | FileCheck %s
|
||||
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @incorrect_offset_scaling(i64, i64*) {
|
||||
top:
|
||||
br label %L
|
||||
|
||||
L: ; preds = %idxend.10, %idxend, %L2, %top
|
||||
br i1 undef, label %L, label %L1
|
||||
|
||||
L1: ; preds = %L1.preheader, %L2
|
||||
%r13 = phi i64 [ %r1, %L2 ], [ 1, %L ]
|
||||
; CHECK: %lsr.iv = phi i64 [ 0, %L{{[^ ]+}} ], [ %lsr.iv.next, %L2 ]
|
||||
; CHECK-NOT: %lsr.iv = phi i64 [ -1, %L{{[^ ]+}} ], [ %lsr.iv.next, %L2 ]
|
||||
; CHECK: br
|
||||
%r0 = add i64 %r13, -1
|
||||
br label %idxend.8
|
||||
|
||||
L2: ; preds = %idxend.8
|
||||
%r1 = add i64 %r13, 1
|
||||
br i1 undef, label %L, label %L1
|
||||
|
||||
if6: ; preds = %idxend.8
|
||||
%r2 = add i64 %0, -1
|
||||
%r3 = load i64, i64* %1, align 8
|
||||
; CHECK: %r2 = add i64 %0, -1
|
||||
; CHECK: %r3 = load i64
|
||||
br label %ib
|
||||
|
||||
idxend.8: ; preds = %L1
|
||||
br i1 undef, label %if6, label %L2
|
||||
|
||||
ib: ; preds = %if6
|
||||
%r4 = mul i64 %r3, %r0
|
||||
%r5 = add i64 %r2, %r4
|
||||
%r6 = icmp ult i64 %r5, undef
|
||||
; CHECK: %r4 = mul i64 %r3, %lsr.iv
|
||||
; CHECK: %r5 = add i64 %r2, %r4
|
||||
; CHECK: %r6 = icmp ult i64 %r5, undef
|
||||
; CHECK: %r7 = getelementptr i64, i64* undef, i64 %r5
|
||||
%r7 = getelementptr i64, i64* undef, i64 %r5
|
||||
store i64 1, i64* %r7, align 8
|
||||
br label %L
|
||||
}
|
@ -1,302 +0,0 @@
|
||||
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
|
||||
|
||||
; @simple is the most basic chain of address induction variables. Chaining
|
||||
; saves at least one register and avoids complex addressing and setup
|
||||
; code.
|
||||
;
|
||||
; X64: @simple
|
||||
; %x * 4
|
||||
; X64: shlq $2
|
||||
; no other address computation in the preheader
|
||||
; X64-NEXT: xorl
|
||||
; X64-NEXT: .p2align
|
||||
; X64: %loop
|
||||
; no complex address modes
|
||||
; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
|
||||
;
|
||||
; X32: @simple
|
||||
; no expensive address computation in the preheader
|
||||
; X32-NOT: imul
|
||||
; X32: %loop
|
||||
; no complex address modes
|
||||
; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
|
||||
define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
loop:
|
||||
%iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
|
||||
%s = phi i32 [ 0, %entry ], [ %s4, %loop ]
|
||||
%v = load i32, i32* %iv
|
||||
%iv1 = getelementptr inbounds i32, i32* %iv, i32 %x
|
||||
%v1 = load i32, i32* %iv1
|
||||
%iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x
|
||||
%v2 = load i32, i32* %iv2
|
||||
%iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x
|
||||
%v3 = load i32, i32* %iv3
|
||||
%s1 = add i32 %s, %v
|
||||
%s2 = add i32 %s1, %v1
|
||||
%s3 = add i32 %s2, %v2
|
||||
%s4 = add i32 %s3, %v3
|
||||
%iv4 = getelementptr inbounds i32, i32* %iv3, i32 %x
|
||||
%cmp = icmp eq i32* %iv4, %b
|
||||
br i1 %cmp, label %exit, label %loop
|
||||
exit:
|
||||
ret i32 %s4
|
||||
}
|
||||
|
||||
; @user is not currently chained because the IV is live across memory ops.
|
||||
;
|
||||
; X64: @user
|
||||
; X64: shlq $4
|
||||
; X64: lea
|
||||
; X64: lea
|
||||
; X64: %loop
|
||||
; complex address modes
|
||||
; X64: (%{{[^)]+}},%{{[^)]+}},
|
||||
;
|
||||
; X32: @user
|
||||
; expensive address computation in the preheader
|
||||
; X32: shll $4
|
||||
; X32: lea
|
||||
; X32: lea
|
||||
; X32: %loop
|
||||
; complex address modes
|
||||
; X32: (%{{[^)]+}},%{{[^)]+}},
|
||||
define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
loop:
|
||||
%iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
|
||||
%s = phi i32 [ 0, %entry ], [ %s4, %loop ]
|
||||
%v = load i32, i32* %iv
|
||||
%iv1 = getelementptr inbounds i32, i32* %iv, i32 %x
|
||||
%v1 = load i32, i32* %iv1
|
||||
%iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x
|
||||
%v2 = load i32, i32* %iv2
|
||||
%iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x
|
||||
%v3 = load i32, i32* %iv3
|
||||
%s1 = add i32 %s, %v
|
||||
%s2 = add i32 %s1, %v1
|
||||
%s3 = add i32 %s2, %v2
|
||||
%s4 = add i32 %s3, %v3
|
||||
%iv4 = getelementptr inbounds i32, i32* %iv3, i32 %x
|
||||
store i32 %s4, i32* %iv
|
||||
%cmp = icmp eq i32* %iv4, %b
|
||||
br i1 %cmp, label %exit, label %loop
|
||||
exit:
|
||||
ret i32 %s4
|
||||
}
|
||||
|
||||
; @extrastride is a slightly more interesting case of a single
|
||||
; complete chain with multiple strides. The test case IR is what LSR
|
||||
; used to do, and exactly what we don't want to do. LSR's new IV
|
||||
; chaining feature should now undo the damage.
|
||||
;
|
||||
; X64: extrastride:
|
||||
; We currently don't handle this on X64 because the sexts cause
|
||||
; strange increment expressions like this:
|
||||
; IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
|
||||
;
|
||||
; X32: extrastride:
|
||||
; no spills in the preheader
|
||||
; X32-NOT: mov{{.*}}(%esp){{$}}
|
||||
; X32: %for.body{{$}}
|
||||
; no complex address modes
|
||||
; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
|
||||
; no reloads
|
||||
; X32-NOT: (%esp)
|
||||
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
|
||||
entry:
|
||||
%cmp8 = icmp eq i32 %z, 0
|
||||
br i1 %cmp8, label %for.end, label %for.body.lr.ph
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%add.ptr.sum = shl i32 %main_stride, 1 ; s*2
|
||||
%add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3
|
||||
%add.ptr2.sum = add i32 %x, %main_stride ; s + x
|
||||
%add.ptr4.sum = shl i32 %main_stride, 2 ; s*4
|
||||
%add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.lr.ph, %for.body
|
||||
%main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
|
||||
%i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
||||
%res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
|
||||
%0 = bitcast i8* %main.addr.011 to i32*
|
||||
%1 = load i32, i32* %0, align 4
|
||||
%add.ptr = getelementptr inbounds i8, i8* %main.addr.011, i32 %main_stride
|
||||
%2 = bitcast i8* %add.ptr to i32*
|
||||
%3 = load i32, i32* %2, align 4
|
||||
%add.ptr1 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr.sum
|
||||
%4 = bitcast i8* %add.ptr1 to i32*
|
||||
%5 = load i32, i32* %4, align 4
|
||||
%add.ptr2 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr1.sum
|
||||
%6 = bitcast i8* %add.ptr2 to i32*
|
||||
%7 = load i32, i32* %6, align 4
|
||||
%add.ptr3 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr4.sum
|
||||
%8 = bitcast i8* %add.ptr3 to i32*
|
||||
%9 = load i32, i32* %8, align 4
|
||||
%add = add i32 %3, %1
|
||||
%add4 = add i32 %add, %5
|
||||
%add5 = add i32 %add4, %7
|
||||
%add6 = add i32 %add5, %9
|
||||
store i32 %add6, i32* %res.addr.09, align 4
|
||||
%add.ptr6 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr3.sum
|
||||
%add.ptr7 = getelementptr inbounds i32, i32* %res.addr.09, i32 %y
|
||||
%inc = add i32 %i.010, 1
|
||||
%cmp = icmp eq i32 %inc, %z
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; @foldedidx is an unrolled variant of this loop:
|
||||
; for (unsigned long i = 0; i < len; i += s) {
|
||||
; c[i] = a[i] + b[i];
|
||||
; }
|
||||
; where 's' can be folded into the addressing mode.
|
||||
; Consequently, we should *not* form any chains.
|
||||
;
|
||||
; X64: foldedidx:
|
||||
; X64: movzbl -3(
|
||||
;
|
||||
; X32: foldedidx:
|
||||
; X32: movzbl 400(
|
||||
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.07
|
||||
%0 = load i8, i8* %arrayidx, align 1
|
||||
%conv5 = zext i8 %0 to i32
|
||||
%arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.07
|
||||
%1 = load i8, i8* %arrayidx1, align 1
|
||||
%conv26 = zext i8 %1 to i32
|
||||
%add = add nsw i32 %conv26, %conv5
|
||||
%conv3 = trunc i32 %add to i8
|
||||
%arrayidx4 = getelementptr inbounds i8, i8* %c, i32 %i.07
|
||||
store i8 %conv3, i8* %arrayidx4, align 1
|
||||
%inc1 = or i32 %i.07, 1
|
||||
%arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %inc1
|
||||
%2 = load i8, i8* %arrayidx.1, align 1
|
||||
%conv5.1 = zext i8 %2 to i32
|
||||
%arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %inc1
|
||||
%3 = load i8, i8* %arrayidx1.1, align 1
|
||||
%conv26.1 = zext i8 %3 to i32
|
||||
%add.1 = add nsw i32 %conv26.1, %conv5.1
|
||||
%conv3.1 = trunc i32 %add.1 to i8
|
||||
%arrayidx4.1 = getelementptr inbounds i8, i8* %c, i32 %inc1
|
||||
store i8 %conv3.1, i8* %arrayidx4.1, align 1
|
||||
%inc.12 = or i32 %i.07, 2
|
||||
%arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %inc.12
|
||||
%4 = load i8, i8* %arrayidx.2, align 1
|
||||
%conv5.2 = zext i8 %4 to i32
|
||||
%arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %inc.12
|
||||
%5 = load i8, i8* %arrayidx1.2, align 1
|
||||
%conv26.2 = zext i8 %5 to i32
|
||||
%add.2 = add nsw i32 %conv26.2, %conv5.2
|
||||
%conv3.2 = trunc i32 %add.2 to i8
|
||||
%arrayidx4.2 = getelementptr inbounds i8, i8* %c, i32 %inc.12
|
||||
store i8 %conv3.2, i8* %arrayidx4.2, align 1
|
||||
%inc.23 = or i32 %i.07, 3
|
||||
%arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %inc.23
|
||||
%6 = load i8, i8* %arrayidx.3, align 1
|
||||
%conv5.3 = zext i8 %6 to i32
|
||||
%arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %inc.23
|
||||
%7 = load i8, i8* %arrayidx1.3, align 1
|
||||
%conv26.3 = zext i8 %7 to i32
|
||||
%add.3 = add nsw i32 %conv26.3, %conv5.3
|
||||
%conv3.3 = trunc i32 %add.3 to i8
|
||||
%arrayidx4.3 = getelementptr inbounds i8, i8* %c, i32 %inc.23
|
||||
store i8 %conv3.3, i8* %arrayidx4.3, align 1
|
||||
%inc.3 = add nsw i32 %i.07, 4
|
||||
%exitcond.3 = icmp eq i32 %inc.3, 400
|
||||
br i1 %exitcond.3, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
; @multioper tests instructions with multiple IV user operands. We
|
||||
; should be able to chain them independent of each other.
|
||||
;
|
||||
; X64: @multioper
|
||||
; X64: %for.body
|
||||
; X64: movl %{{.*}},4)
|
||||
; X64-NEXT: leal 1(
|
||||
; X64-NEXT: movl %{{.*}},4)
|
||||
; X64-NEXT: leal 2(
|
||||
; X64-NEXT: movl %{{.*}},4)
|
||||
; X64-NEXT: leal 3(
|
||||
; X64-NEXT: movl %{{.*}},4)
|
||||
;
|
||||
; X32: @multioper
|
||||
; X32: %for.body
|
||||
; X32: movl %{{.*}},4)
|
||||
; X32-NEXT: leal 1(
|
||||
; X32-NEXT: movl %{{.*}},4)
|
||||
; X32-NEXT: leal 2(
|
||||
; X32-NEXT: movl %{{.*}},4)
|
||||
; X32-NEXT: leal 3(
|
||||
; X32-NEXT: movl %{{.*}},4)
|
||||
define void @multioper(i32* %a, i32 %n) nounwind {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%p = phi i32* [ %p.next, %for.body ], [ %a, %entry ]
|
||||
%i = phi i32 [ %inc4, %for.body ], [ 0, %entry ]
|
||||
store i32 %i, i32* %p, align 4
|
||||
%inc1 = or i32 %i, 1
|
||||
%add.ptr.i1 = getelementptr inbounds i32, i32* %p, i32 1
|
||||
store i32 %inc1, i32* %add.ptr.i1, align 4
|
||||
%inc2 = add nsw i32 %i, 2
|
||||
%add.ptr.i2 = getelementptr inbounds i32, i32* %p, i32 2
|
||||
store i32 %inc2, i32* %add.ptr.i2, align 4
|
||||
%inc3 = add nsw i32 %i, 3
|
||||
%add.ptr.i3 = getelementptr inbounds i32, i32* %p, i32 3
|
||||
store i32 %inc3, i32* %add.ptr.i3, align 4
|
||||
%p.next = getelementptr inbounds i32, i32* %p, i32 4
|
||||
%inc4 = add nsw i32 %i, 4
|
||||
%cmp = icmp slt i32 %inc4, %n
|
||||
br i1 %cmp, label %for.body, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; @testCmpZero has a ICmpZero LSR use that should not be hidden from
|
||||
; LSR. Profitable chains should have more than one nonzero increment
|
||||
; anyway.
|
||||
;
|
||||
; X32: @testCmpZero
|
||||
; X32: %for.body82.us
|
||||
; X32: cmp
|
||||
; X32: jne
|
||||
define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
|
||||
entry:
|
||||
%dest0 = getelementptr inbounds i8, i8* %src, i32 %srcidx
|
||||
%source0 = getelementptr inbounds i8, i8* %dst, i32 %dstidx
|
||||
%add.ptr79.us.sum = add i32 %srcidx, %len
|
||||
%lftr.limit = getelementptr i8, i8* %src, i32 %add.ptr79.us.sum
|
||||
br label %for.body82.us
|
||||
|
||||
for.body82.us:
|
||||
%dest = phi i8* [ %dest0, %entry ], [ %incdec.ptr91.us, %for.body82.us ]
|
||||
%source = phi i8* [ %source0, %entry ], [ %add.ptr83.us, %for.body82.us ]
|
||||
%0 = bitcast i8* %source to i32*
|
||||
%1 = load i32, i32* %0, align 4
|
||||
%trunc = trunc i32 %1 to i8
|
||||
%add.ptr83.us = getelementptr inbounds i8, i8* %source, i32 4
|
||||
%incdec.ptr91.us = getelementptr inbounds i8, i8* %dest, i32 1
|
||||
store i8 %trunc, i8* %dest, align 1
|
||||
%exitcond = icmp eq i8* %incdec.ptr91.us, %lftr.limit
|
||||
br i1 %exitcond, label %return, label %for.body82.us
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
@ -1,96 +0,0 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32
|
||||
|
||||
; @sharedidx is an unrolled variant of this loop:
|
||||
; for (unsigned long i = 0; i < len; i += s) {
|
||||
; c[i] = a[i] + b[i];
|
||||
; }
|
||||
; where 's' cannot be folded into the addressing mode.
|
||||
;
|
||||
; This is not quite profitable to chain. But with -stress-ivchain, we
|
||||
; can form three address chains in place of the shared induction
|
||||
; variable.
|
||||
|
||||
; X64: sharedidx:
|
||||
; X64: %for.body.preheader
|
||||
; X64-NOT: leal ({{.*}},4)
|
||||
; X64: %for.body.1
|
||||
|
||||
; X32: sharedidx:
|
||||
; X32: %for.body.2
|
||||
; X32: add
|
||||
; X32: add
|
||||
; X32: add
|
||||
; X32: add
|
||||
; X32: add
|
||||
; X32: %for.body.3
|
||||
define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
|
||||
entry:
|
||||
%cmp8 = icmp eq i32 %len, 0
|
||||
br i1 %cmp8, label %for.end, label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body.3
|
||||
%i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
|
||||
%0 = load i8, i8* %arrayidx, align 1
|
||||
%conv6 = zext i8 %0 to i32
|
||||
%arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.09
|
||||
%1 = load i8, i8* %arrayidx1, align 1
|
||||
%conv27 = zext i8 %1 to i32
|
||||
%add = add nsw i32 %conv27, %conv6
|
||||
%conv3 = trunc i32 %add to i8
|
||||
%arrayidx4 = getelementptr inbounds i8, i8* %c, i32 %i.09
|
||||
store i8 %conv3, i8* %arrayidx4, align 1
|
||||
%add5 = add i32 %i.09, %s
|
||||
%cmp = icmp ult i32 %add5, %len
|
||||
br i1 %cmp, label %for.body.1, label %for.end
|
||||
|
||||
for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
|
||||
ret void
|
||||
|
||||
for.body.1: ; preds = %for.body
|
||||
%arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
|
||||
%2 = load i8, i8* %arrayidx.1, align 1
|
||||
%conv6.1 = zext i8 %2 to i32
|
||||
%arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %add5
|
||||
%3 = load i8, i8* %arrayidx1.1, align 1
|
||||
%conv27.1 = zext i8 %3 to i32
|
||||
%add.1 = add nsw i32 %conv27.1, %conv6.1
|
||||
%conv3.1 = trunc i32 %add.1 to i8
|
||||
%arrayidx4.1 = getelementptr inbounds i8, i8* %c, i32 %add5
|
||||
store i8 %conv3.1, i8* %arrayidx4.1, align 1
|
||||
%add5.1 = add i32 %add5, %s
|
||||
%cmp.1 = icmp ult i32 %add5.1, %len
|
||||
br i1 %cmp.1, label %for.body.2, label %for.end
|
||||
|
||||
for.body.2: ; preds = %for.body.1
|
||||
%arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
|
||||
%4 = load i8, i8* %arrayidx.2, align 1
|
||||
%conv6.2 = zext i8 %4 to i32
|
||||
%arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %add5.1
|
||||
%5 = load i8, i8* %arrayidx1.2, align 1
|
||||
%conv27.2 = zext i8 %5 to i32
|
||||
%add.2 = add nsw i32 %conv27.2, %conv6.2
|
||||
%conv3.2 = trunc i32 %add.2 to i8
|
||||
%arrayidx4.2 = getelementptr inbounds i8, i8* %c, i32 %add5.1
|
||||
store i8 %conv3.2, i8* %arrayidx4.2, align 1
|
||||
%add5.2 = add i32 %add5.1, %s
|
||||
%cmp.2 = icmp ult i32 %add5.2, %len
|
||||
br i1 %cmp.2, label %for.body.3, label %for.end
|
||||
|
||||
for.body.3: ; preds = %for.body.2
|
||||
%arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
|
||||
%6 = load i8, i8* %arrayidx.3, align 1
|
||||
%conv6.3 = zext i8 %6 to i32
|
||||
%arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %add5.2
|
||||
%7 = load i8, i8* %arrayidx1.3, align 1
|
||||
%conv27.3 = zext i8 %7 to i32
|
||||
%add.3 = add nsw i32 %conv27.3, %conv6.3
|
||||
%conv3.3 = trunc i32 %add.3 to i8
|
||||
%arrayidx4.3 = getelementptr inbounds i8, i8* %c, i32 %add5.2
|
||||
store i8 %conv3.3, i8* %arrayidx4.3, align 1
|
||||
%add5.3 = add i32 %add5.2, %s
|
||||
%cmp.3 = icmp ult i32 %add5.3, %len
|
||||
br i1 %cmp.3, label %for.body, label %for.end
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
if not 'X86' in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
@ -1,58 +0,0 @@
|
||||
; REQUIRES: x86-registered-target
|
||||
; RUN: opt -loop-reduce -S < %s | FileCheck %s
|
||||
|
||||
; Strength reduction analysis here relies on IV Users analysis, that
|
||||
; only finds users among instructions with types that are treated as
|
||||
; legal by the data layout. When running this test on pure non-x86
|
||||
; configs (for example, ARM 64), it gets confused with the target
|
||||
; triple and uses a default data layout instead. This default layout
|
||||
; does not have any legal types (even i32), so the transformation
|
||||
; does not happen.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx"
|
||||
|
||||
; PR15470: LSR miscompile. The test2 function should return '1'.
|
||||
;
|
||||
; SCEV expander cannot expand quadratic recurrences outside of the
|
||||
; loop. This recurrence depends on %sub.us, so can't be expanded.
|
||||
; We cannot fold SCEVUnknown (sub.us) with recurrences since it is
|
||||
; declared after the loop.
|
||||
;
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK-LABEL: test2.loop:
|
||||
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ 1, %entry ]
|
||||
; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, -1
|
||||
; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
|
||||
;
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next, 0
|
||||
; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0
|
||||
; CHECK: %0 = sub i32 0, %sub.us
|
||||
; CHECK: %1 = sub i32 %0, %lsr.iv.next
|
||||
; CHECK: %sext.us = mul i32 %lsr.iv.next2, %1
|
||||
; CHECK: %f = ashr i32 %sext.us, 24
|
||||
; CHECK: ret i32 %f
|
||||
define i32 @test2() {
|
||||
entry:
|
||||
br label %test2.loop
|
||||
|
||||
test2.loop:
|
||||
%inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
|
||||
%inc11.us = add nsw i32 %inc1115.us, 1
|
||||
%cmp.us = icmp slt i32 %inc11.us, 2
|
||||
br i1 %cmp.us, label %test2.loop, label %for.end
|
||||
|
||||
for.end:
|
||||
%tobool.us = icmp eq i32 %inc1115.us, 0
|
||||
%sub.us = select i1 %tobool.us, i32 0, i32 0
|
||||
%mul.us = shl i32 %inc1115.us, 24
|
||||
%sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
|
||||
%sext.us = mul i32 %mul.us, %sub.cond.us
|
||||
%f = ashr i32 %sext.us, 24
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %f
|
||||
}
|
@ -1,60 +0,0 @@
|
||||
; RUN: opt < %s -loop-reduce -lsr-filter-same-scaled-reg=true -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
%struct.ham = type { i8, i8, [5 x i32], i64, i64, i64 }
|
||||
|
||||
@global = external local_unnamed_addr global %struct.ham, align 8
|
||||
|
||||
define void @foo() local_unnamed_addr {
|
||||
bb:
|
||||
%tmp = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 3), align 8
|
||||
%tmp1 = and i64 %tmp, 1792
|
||||
%tmp2 = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 4), align 8
|
||||
%tmp3 = add i64 %tmp1, %tmp2
|
||||
%tmp4 = load i8*, i8** null, align 8
|
||||
%tmp5 = getelementptr inbounds i8, i8* %tmp4, i64 0
|
||||
%tmp6 = sub i64 0, %tmp3
|
||||
%tmp7 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp6
|
||||
%tmp8 = inttoptr i64 0 to i8*
|
||||
br label %bb9
|
||||
|
||||
; Without filtering non-optimal formulae with the same ScaledReg and Scale, the strategy
|
||||
; to narrow LSR search space by picking winner reg will generate only one lsr.iv and
|
||||
; unoptimal result.
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK: bb9:
|
||||
; CHECK-NEXT: = phi i8*
|
||||
; CHECK-NEXT: = phi i8*
|
||||
|
||||
bb9: ; preds = %bb12, %bb
|
||||
%tmp10 = phi i8* [ %tmp7, %bb ], [ %tmp16, %bb12 ]
|
||||
%tmp11 = phi i8* [ %tmp8, %bb ], [ %tmp17, %bb12 ]
|
||||
br i1 false, label %bb18, label %bb12
|
||||
|
||||
bb12: ; preds = %bb9
|
||||
%tmp13 = getelementptr inbounds i8, i8* %tmp10, i64 8
|
||||
%tmp14 = bitcast i8* %tmp13 to i64*
|
||||
%tmp15 = load i64, i64* %tmp14, align 1
|
||||
%tmp16 = getelementptr inbounds i8, i8* %tmp10, i64 16
|
||||
%tmp17 = getelementptr inbounds i8, i8* %tmp11, i64 16
|
||||
br label %bb9
|
||||
|
||||
bb18: ; preds = %bb9
|
||||
%tmp19 = icmp ugt i8* %tmp11, null
|
||||
%tmp20 = getelementptr inbounds i8, i8* %tmp10, i64 8
|
||||
%tmp21 = getelementptr inbounds i8, i8* %tmp11, i64 8
|
||||
%tmp22 = select i1 %tmp19, i8* %tmp10, i8* %tmp20
|
||||
%tmp23 = select i1 %tmp19, i8* %tmp11, i8* %tmp21
|
||||
br label %bb24
|
||||
|
||||
bb24: ; preds = %bb24, %bb18
|
||||
%tmp25 = phi i8* [ %tmp27, %bb24 ], [ %tmp22, %bb18 ]
|
||||
%tmp26 = phi i8* [ %tmp29, %bb24 ], [ %tmp23, %bb18 ]
|
||||
%tmp27 = getelementptr inbounds i8, i8* %tmp25, i64 1
|
||||
%tmp28 = load i8, i8* %tmp25, align 1
|
||||
%tmp29 = getelementptr inbounds i8, i8* %tmp26, i64 1
|
||||
store i8 %tmp28, i8* %tmp26, align 1
|
||||
%tmp30 = icmp eq i8* %tmp29, %tmp5
|
||||
br label %bb24
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
|
||||
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
|
||||
|
||||
; OPT test checks that LSR optimize compare for static counter to compare with 0.
|
||||
|
||||
; BOTH: for.body:
|
||||
; INSN: icmp eq i64 %lsr.iv.next, 0
|
||||
; REGS: icmp eq i64 %indvars.iv.next, 1024
|
||||
|
||||
; LLC test checks that LSR optimize compare for static counter.
|
||||
; That means that instead of creating the following:
|
||||
; movl %ecx, (%rdx,%rax,4)
|
||||
; incq %rax
|
||||
; cmpq $1024, %rax
|
||||
; LSR should optimize out cmp:
|
||||
; movl %ecx, 4096(%rdx,%rax)
|
||||
; addq $4, %rax
|
||||
; or
|
||||
; movl %ecx, 4096(%rdx,%rax,4)
|
||||
; incq %rax
|
||||
|
||||
; CHECK: LBB0_1:
|
||||
; CHECK-NEXT: movl 4096(%{{.+}},[[REG:%[0-9a-z]+]]
|
||||
; CHECK-NEXT: addl 4096(%{{.+}},[[REG]]
|
||||
; CHECK-NEXT: movl %{{.+}}, 4096(%{{.+}},[[REG]]
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: jne
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
|
||||
%tmp = load i32, i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
|
||||
%tmp1 = load i32, i32* %arrayidx2, align 4
|
||||
%add = add nsw i32 %tmp1, %tmp
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
|
||||
store i32 %add, i32* %arrayidx4, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
|
||||
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
|
||||
|
||||
; OPT checks that LSR prefers less instructions to less registers.
|
||||
; For x86 LSR should prefer complicated address to new lsr induction
|
||||
; variables.
|
||||
|
||||
; BOTH: for.body:
|
||||
; INSN: getelementptr i32, i32* %x, i64 %indvars.iv
|
||||
; INSN: getelementptr i32, i32* %y, i64 %indvars.iv
|
||||
; INSN: getelementptr i32, i32* %q, i64 %indvars.iv
|
||||
; REGS %lsr.iv4 = phi
|
||||
; REGS %lsr.iv2 = phi
|
||||
; REGS %lsr.iv1 = phi
|
||||
; REGS: getelementptr i32, i32* %lsr.iv1, i64 1
|
||||
; REGS: getelementptr i32, i32* %lsr.iv2, i64 1
|
||||
; REGS: getelementptr i32, i32* %lsr.iv4, i64 1
|
||||
|
||||
; LLC checks that LSR prefers less instructions to less registers.
|
||||
; LSR should prefer complicated address to additonal add instructions.
|
||||
|
||||
; CHECK: LBB0_2:
|
||||
; CHECK-NEXT: movl (%r{{.+}},
|
||||
; CHECK-NEXT: addl (%r{{.+}},
|
||||
; CHECK-NEXT: movl %e{{.+}}, (%r{{.+}},
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q, i32 %n) {
|
||||
entry:
|
||||
%cmp10 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
%wide.trip.count = zext i32 %n to i64
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup.loopexit: ; preds = %for.body
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.preheader
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
|
||||
%tmp = load i32, i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
|
||||
%tmp1 = load i32, i32* %arrayidx2, align 4
|
||||
%add = add nsw i32 %tmp1, %tmp
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
|
||||
store i32 %add, i32* %arrayidx4, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -loop-reduce -S < %s | FileCheck %s
|
||||
; Check when we use an outerloop induction variable inside of an innerloop
|
||||
; induction value expr, LSR can still choose to use single induction variable
|
||||
; for the innerloop and share it in multiple induction value exprs.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @foo(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8* %maxarray) {
|
||||
entry:
|
||||
%cmp215 = icmp sgt i32 %size, 1
|
||||
%t0 = zext i32 %size to i64
|
||||
%t1 = sext i32 %nsteps to i64
|
||||
%sub2 = sub i64 %t0, 2
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.inc, %entry
|
||||
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %for.inc ], [ 0, %entry ]
|
||||
%t2 = mul nsw i64 %indvars.iv2, %t0
|
||||
br i1 %cmp215, label %for.body2.preheader, label %for.inc
|
||||
|
||||
for.body2.preheader: ; preds = %for.body
|
||||
br label %for.body2
|
||||
|
||||
; Check LSR only generates two induction variables for for.body2 one for compare and
|
||||
; one to shared by multiple array accesses.
|
||||
; CHECK: for.body2:
|
||||
; CHECK-NEXT: [[LSRAR:%[^,]+]] = phi i8* [ %scevgep, %for.body2 ], [ %maxarray, %for.body2.preheader ]
|
||||
; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ %0, %for.body2.preheader ]
|
||||
; CHECK-NOT: = phi i64 [ {{.*}}, %for.body2 ], [ {{.*}}, %for.body2.preheader ]
|
||||
; CHECK: [[LSRINT:%[^,]+]] = ptrtoint i8* [[LSRAR]] to i64
|
||||
; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* [[LSRAR]], i64 1
|
||||
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP1]], align 1
|
||||
; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* %1, i64 [[LSRINT]]
|
||||
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP2]], align 1
|
||||
; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSRINT]]
|
||||
; CHECK: store i8 {{.*}}, i8* [[SCEVGEP3]], align 1
|
||||
; CHECK: [[LSRNEXT:%[^,]+]] = add i64 [[LSR]], -1
|
||||
; CHECK: %exitcond = icmp ne i64 [[LSRNEXT]], 0
|
||||
; CHECK: br i1 %exitcond, label %for.body2, label %for.inc.loopexit
|
||||
|
||||
for.body2: ; preds = %for.body2.preheader, %for.body2
|
||||
%indvars.iv = phi i64 [ 1, %for.body2.preheader ], [ %indvars.iv.next, %for.body2 ]
|
||||
%arrayidx1 = getelementptr inbounds i8, i8* %maxarray, i64 %indvars.iv
|
||||
%v1 = load i8, i8* %arrayidx1, align 1
|
||||
%idx2 = add nsw i64 %indvars.iv, %sub2
|
||||
%arrayidx2 = getelementptr inbounds i8, i8* %maxarray, i64 %idx2
|
||||
%v2 = load i8, i8* %arrayidx2, align 1
|
||||
%tmpv = xor i8 %v1, %v2
|
||||
%t4 = add nsw i64 %t2, %indvars.iv
|
||||
%add.ptr = getelementptr inbounds i8, i8* %maxarray, i64 %t4
|
||||
store i8 %tmpv, i8* %add.ptr, align 1
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%wide.trip.count = zext i32 %size to i64
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond, label %for.body2, label %for.inc.loopexit
|
||||
|
||||
for.inc.loopexit: ; preds = %for.body2
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.inc.loopexit, %for.body
|
||||
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
|
||||
%cmp = icmp slt i64 %indvars.iv.next3, %t1
|
||||
br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||
|
||||
for.end.loopexit: ; preds = %for.inc
|
||||
ret void
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
; RUN: opt -S -loop-reduce -mcpu=corei7-avx -mtriple=x86_64-apple-macosx < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @indvar_expansion(i8* nocapture readonly %rowsptr) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
; SCEVExpander used to create induction variables in the loop %for.cond while
|
||||
; expanding the recurrence start value of loop strength reduced values from
|
||||
; %vector.body.
|
||||
|
||||
; CHECK-LABEL: indvar_expansion
|
||||
; CHECK: for.cond:
|
||||
; CHECK-NOT: phi i3
|
||||
; CHECK: br i1 {{.+}}, label %for.cond
|
||||
|
||||
for.cond:
|
||||
%indvars.iv44 = phi i64 [ %indvars.iv.next45, %for.cond ], [ 0, %entry ]
|
||||
%cmp = icmp eq i8 undef, 0
|
||||
%indvars.iv.next45 = add nuw nsw i64 %indvars.iv44, 1
|
||||
br i1 %cmp, label %for.cond, label %for.cond2
|
||||
|
||||
for.cond2:
|
||||
br i1 undef, label %for.cond2, label %for.body14.lr.ph
|
||||
|
||||
for.body14.lr.ph:
|
||||
%sext = shl i64 %indvars.iv44, 32
|
||||
%0 = ashr exact i64 %sext, 32
|
||||
%1 = sub i64 undef, %indvars.iv44
|
||||
%2 = and i64 %1, 4294967295
|
||||
%3 = add i64 %2, 1
|
||||
%fold = add i64 %1, 1
|
||||
%n.mod.vf = and i64 %fold, 7
|
||||
%n.vec = sub i64 %3, %n.mod.vf
|
||||
%end.idx.rnd.down = add i64 %n.vec, %0
|
||||
br label %vector.body
|
||||
|
||||
vector.body:
|
||||
%index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ]
|
||||
%4 = getelementptr inbounds i8, i8* %rowsptr, i64 %index
|
||||
%5 = bitcast i8* %4 to <4 x i8>*
|
||||
%wide.load = load <4 x i8>, <4 x i8>* %5, align 1
|
||||
%index.next = add i64 %index, 8
|
||||
%6 = icmp eq i64 %index.next, %end.idx.rnd.down
|
||||
br i1 %6, label %for.end24, label %vector.body
|
||||
|
||||
for.end24:
|
||||
ret void
|
||||
}
|
@ -1,67 +0,0 @@
|
||||
; RUN: opt < %s -loop-reduce -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.9.0"
|
||||
|
||||
; LSR shouldn't normalize IV if it can't be denormalized to the original
|
||||
; expression. In this testcase, the normalized expression was denormalized to
|
||||
; an expression different from the original, and we were losing sign extension.
|
||||
|
||||
; CHECK: [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8
|
||||
; CHECK: {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32
|
||||
|
||||
@j = common global i32 0, align 4
|
||||
@c = common global i32 0, align 4
|
||||
@g = common global i32 0, align 4
|
||||
@h = common global i8 0, align 1
|
||||
@d = common global i32 0, align 4
|
||||
@i = common global i32 0, align 4
|
||||
@e = common global i32 0, align 4
|
||||
@.str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1
|
||||
@a = common global i32 0, align 4
|
||||
@b = common global i16 0, align 2
|
||||
|
||||
; Function Attrs: nounwind optsize ssp uwtable
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
store i8 0, i8* @h, align 1
|
||||
%0 = load i32, i32* @j, align 4
|
||||
%tobool.i = icmp eq i32 %0, 0
|
||||
%1 = load i32, i32* @d, align 4
|
||||
%cmp3 = icmp sgt i32 %1, -1
|
||||
%.lobit = lshr i32 %1, 31
|
||||
%.lobit.not = xor i32 %.lobit, 1
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %fn3.exit
|
||||
%inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ]
|
||||
%conv = sext i8 %inc9 to i32
|
||||
br i1 %tobool.i, label %fn3.exit, label %land.rhs.i
|
||||
|
||||
land.rhs.i: ; preds = %for.body
|
||||
store i32 0, i32* @c, align 4
|
||||
br label %fn3.exit
|
||||
|
||||
fn3.exit: ; preds = %for.body, %land.rhs.i
|
||||
%inc = add i8 %inc9, 1
|
||||
%cmp = icmp sgt i8 %inc, -1
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end: ; preds = %fn3.exit
|
||||
%.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0
|
||||
store i32 %conv, i32* @g, align 4
|
||||
store i32 %.lobit.not., i32* @i, align 4
|
||||
store i8 %inc, i8* @h, align 1
|
||||
%conv7 = sext i8 %inc to i32
|
||||
%add = add nsw i32 %conv7, %conv
|
||||
store i32 %add, i32* @e, align 4
|
||||
%call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %add) #2
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind optsize
|
||||
declare i32 @printf(i8* nocapture readonly, ...) #1
|
||||
|
||||
attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { nounwind optsize }
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user