Imported Upstream version 5.18.0.167

Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
Xamarin Public Jenkins (auto-signing)
2018-10-20 08:25:10 +00:00
parent e19d552987
commit b084638f15
28489 changed files with 184 additions and 3866856 deletions

View File

@ -1,215 +0,0 @@
; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s
; Provide legal integer types.
target datalayout = "n8:16:32:64"
define void @foobar(i32 %n) nounwind {
; CHECK-LABEL: foobar(
; CHECK: phi double
entry:
%cond = icmp eq i32 %n, 0 ; <i1>:0 [#uses=2]
br i1 %cond, label %return, label %bb.nph
bb.nph: ; preds = %entry
%umax = select i1 %cond, i32 1, i32 %n ; <i32> [#uses=1]
br label %bb
bb: ; preds = %bb, %bb.nph
%i.03 = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
tail call void @bar( i32 %i.03 ) nounwind
%tmp1 = uitofp i32 %i.03 to double ; <double>:1 [#uses=1]
tail call void @foo( double %tmp1 ) nounwind
%indvar.next = add nsw nuw i32 %i.03, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %umax ; <i1> [#uses=1]
br i1 %exitcond, label %return, label %bb
return: ; preds = %bb, %entry
ret void
}
; Unable to eliminate cast because the mantissa bits for double are not enough
; to hold all of i64 IV bits.
define void @foobar2(i64 %n) nounwind {
; CHECK-LABEL: foobar2(
; CHECK-NOT: phi double
; CHECK-NOT: phi float
entry:
%cond = icmp eq i64 %n, 0 ; <i1>:0 [#uses=2]
br i1 %cond, label %return, label %bb.nph
bb.nph: ; preds = %entry
%umax = select i1 %cond, i64 1, i64 %n ; <i64> [#uses=1]
br label %bb
bb: ; preds = %bb, %bb.nph
%i.03 = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=3]
%tmp1 = trunc i64 %i.03 to i32 ; <i32>:1 [#uses=1]
tail call void @bar( i32 %tmp1 ) nounwind
%tmp2 = uitofp i64 %i.03 to double ; <double>:2 [#uses=1]
tail call void @foo( double %tmp2 ) nounwind
%indvar.next = add nsw nuw i64 %i.03, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %indvar.next, %umax ; <i1> [#uses=1]
br i1 %exitcond, label %return, label %bb
return: ; preds = %bb, %entry
ret void
}
; Unable to eliminate cast due to potentional overflow.
define void @foobar3() nounwind {
; CHECK-LABEL: foobar3(
; CHECK-NOT: phi double
; CHECK-NOT: phi float
entry:
%tmp0 = tail call i32 (...) @nn( ) nounwind ; <i32>:0 [#uses=1]
%cond = icmp eq i32 %tmp0, 0 ; <i1>:1 [#uses=1]
br i1 %cond, label %return, label %bb
bb: ; preds = %bb, %entry
%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
tail call void @bar( i32 %i.03 ) nounwind
%tmp2 = uitofp i32 %i.03 to double ; <double>:2 [#uses=1]
tail call void @foo( double %tmp2 ) nounwind
%indvar.next = add nuw nsw i32 %i.03, 1 ; <i32>:3 [#uses=2]
%tmp4 = tail call i32 (...) @nn( ) nounwind ; <i32>:4 [#uses=1]
%exitcond = icmp ugt i32 %tmp4, %indvar.next ; <i1>:5 [#uses=1]
br i1 %exitcond, label %bb, label %return
return: ; preds = %bb, %entry
ret void
}
; Unable to eliminate cast due to overflow.
define void @foobar4() nounwind {
; CHECK-LABEL: foobar4(
; CHECK-NOT: phi double
; CHECK-NOT: phi float
entry:
br label %bb.nph
bb.nph: ; preds = %entry
br label %bb
bb: ; preds = %bb, %bb.nph
%i.03 = phi i8 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
%tmp2 = sext i8 %i.03 to i32 ; <i32>:0 [#uses=1]
tail call void @bar( i32 %tmp2 ) nounwind
%tmp3 = uitofp i8 %i.03 to double ; <double>:1 [#uses=1]
tail call void @foo( double %tmp3 ) nounwind
%indvar.next = add nsw nuw i8 %i.03, 1 ; <i32> [#uses=2]
%tmp = sext i8 %indvar.next to i32
%exitcond = icmp eq i32 %tmp, 32767 ; <i1> [#uses=1]
br i1 %exitcond, label %return, label %bb
return: ; preds = %bb, %entry
ret void
}
; Unable to eliminate cast because the integer IV overflows (accum exceeds
; SINT_MAX).
define i32 @foobar5() {
; CHECK-LABEL: foobar5(
; CHECK-NOT: phi double
; CHECK-NOT: phi float
entry:
br label %loop
loop:
%accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ]
%iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ]
%tmp1 = sitofp i32 %accum to double
tail call void @foo( double %tmp1 ) nounwind
%accum.next = add i32 %accum, 9597741
%iv.next = add nuw nsw i32 %iv, 1
%exitcond = icmp ugt i32 %iv, 235
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %accum.next
}
; Can eliminate if we set nsw and, thus, think that we don't overflow SINT_MAX.
define i32 @foobar6() {
; CHECK-LABEL: foobar6(
; CHECK: phi double
entry:
br label %loop
loop:
%accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ]
%iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ]
%tmp1 = sitofp i32 %accum to double
tail call void @foo( double %tmp1 ) nounwind
%accum.next = add nsw i32 %accum, 9597741
%iv.next = add nuw nsw i32 %iv, 1
%exitcond = icmp ugt i32 %iv, 235
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %accum.next
}
; Unable to eliminate cast because the integer IV overflows (accum exceeds
; UINT_MAX).
define i32 @foobar7() {
; CHECK-LABEL: foobar7(
; CHECK-NOT: phi double
; CHECK-NOT: phi float
entry:
br label %loop
loop:
%accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ]
%iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ]
%tmp1 = uitofp i32 %accum to double
tail call void @foo( double %tmp1 ) nounwind
%accum.next = add i32 %accum, 9597741
%iv.next = add nuw nsw i32 %iv, 1
%exitcond = icmp ugt i32 %iv, 235
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %accum.next
}
; Can eliminate if we set nuw and, thus, think that we don't overflow UINT_MAX.
define i32 @foobar8() {
; CHECK-LABEL: foobar8(
; CHECK: phi double
entry:
br label %loop
loop:
%accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ]
%iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ]
%tmp1 = uitofp i32 %accum to double
tail call void @foo( double %tmp1 ) nounwind
%accum.next = add nuw i32 %accum, 9597741
%iv.next = add nuw nsw i32 %iv, 1
%exitcond = icmp ugt i32 %iv, 235
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %accum.next
}
declare void @bar(i32)
declare void @foo(double)
declare i32 @nn(...)

View File

@ -1,130 +0,0 @@
; RUN: llc < %s -mtriple=i386-apple-darwin11
define void @_ZN4llvm20SelectionDAGLowering14visitInlineAsmENS_8CallSiteE() nounwind ssp align 2 {
entry:
br i1 undef, label %bb3.i, label %bb4.i
bb3.i: ; preds = %entry
unreachable
bb4.i: ; preds = %entry
br i1 undef, label %bb.i.i, label %_ZNK4llvm8CallSite14getCalledValueEv.exit
bb.i.i: ; preds = %bb4.i
unreachable
_ZNK4llvm8CallSite14getCalledValueEv.exit: ; preds = %bb4.i
br i1 undef, label %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit, label %bb6.i
bb6.i: ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit
unreachable
_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit
br i1 undef, label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit, label %bb.i
bb.i: ; preds = %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
br label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit: ; preds = %bb.i, %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
br i1 undef, label %bb50, label %bb27
bb27: ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
br i1 undef, label %bb1.i727, label %bb.i.i726
bb.i.i726: ; preds = %bb27
unreachable
bb1.i727: ; preds = %bb27
unreachable
bb50: ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
br label %bb107
bb51: ; preds = %bb107
br i1 undef, label %bb105, label %bb106
bb105: ; preds = %bb51
unreachable
bb106: ; preds = %bb51
br label %bb107
bb107: ; preds = %bb106, %bb50
br i1 undef, label %bb108, label %bb51
bb108: ; preds = %bb107
br i1 undef, label %bb242, label %bb114
bb114: ; preds = %bb108
br i1 undef, label %bb141, label %bb116
bb116: ; preds = %bb114
br i1 undef, label %bb120, label %bb121
bb120: ; preds = %bb116
unreachable
bb121: ; preds = %bb116
unreachable
bb141: ; preds = %bb114
br i1 undef, label %bb182, label %bb143
bb143: ; preds = %bb141
br label %bb157
bb144: ; preds = %bb.i.i.i843
switch i32 undef, label %bb155 [
i32 2, label %bb153
i32 6, label %bb153
i32 4, label %bb153
]
bb153: ; preds = %bb144, %bb144, %bb144
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
br label %bb157
bb155: ; preds = %bb144
unreachable
bb157: ; preds = %bb153, %bb143
%indvar = phi i32 [ %indvar.next, %bb153 ], [ 0, %bb143 ] ; <i32> [#uses=2]
%0 = icmp eq i32 undef, %indvar ; <i1> [#uses=1]
switch i16 undef, label %bb6.i841 [
i16 9, label %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
i16 26, label %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
]
bb6.i841: ; preds = %bb157
unreachable
_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit: ; preds = %bb157, %bb157
br i1 undef, label %bb.i.i.i843, label %bb1.i.i.i844
bb.i.i.i843: ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
br i1 %0, label %bb158, label %bb144
bb1.i.i.i844: ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
unreachable
bb158: ; preds = %bb.i.i.i843
br i1 undef, label %bb177, label %bb176
bb176: ; preds = %bb158
unreachable
bb177: ; preds = %bb158
br i1 undef, label %bb179, label %bb178
bb178: ; preds = %bb177
unreachable
bb179: ; preds = %bb177
unreachable
bb182: ; preds = %bb141
unreachable
bb242: ; preds = %bb108
unreachable
}

View File

@ -1,47 +0,0 @@
; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | FileCheck %s
;
; Test LSR's OptimizeShadowIV. Handle a floating-point IV with a
; nonzero initial value.
; rdar://9786536
; Provide legal integer types.
target datalayout = "n8:16:32:64"
; First, make sure LSR doesn't crash on an empty IVUsers list.
; CHECK-LABEL: @dummyIV(
; CHECK-NOT: phi
; CHECK-NOT: sitofp
; CHECK: br
define void @dummyIV() nounwind {
entry:
br label %loop
loop:
%i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
%conv = sitofp i32 %i.01 to double
%inc = add nsw i32 %i.01, 1
br i1 undef, label %loop, label %for.end
for.end:
unreachable
}
; Now check that the computed double constant is correct.
; CHECK-LABEL: @doubleIV(
; CHECK: phi double [ -3.900000e+01, %entry ]
; CHECK: br
define void @doubleIV() nounwind {
entry:
br label %loop
loop:
%i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
%conv = sitofp i32 %i.01 to double
%div = fdiv double %conv, 4.000000e+01
%inc = add nsw i32 %i.01, 1
br i1 undef, label %loop, label %for.end
for.end:
unreachable
}

View File

@ -1,36 +0,0 @@
; RUN: llc < %s | FileCheck %s
;
; PR11431: handle a phi operand that is replaced by a postinc user.
; LSR first expands %t3 to %t2 in %phi
; LSR then expands %t2 in %phi into two decrements, one on each loop exit.
target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
declare i1 @check() nounwind
; Check that LSR did something close to the behavior at the time of the bug.
; CHECK: @sqlite3DropTriggerPtr
; CHECK: incq %r{{[a-d]}}x
; CHECK: jne
; CHECK: decq %r{{[a-d]}}x
; CHECK: ret
define i64 @sqlite3DropTriggerPtr() nounwind {
bb:
%cmp = call zeroext i1 @check()
br label %bb1
bb1: ; preds = %bb4, %bb
%t0 = phi i64 [ 0, %bb ], [ %t3, %bb4 ]
%t2 = phi i64 [ 1, %bb ], [ %t5, %bb4 ]
%t3 = add nsw i64 %t0, 1
br i1 %cmp, label %bb4, label %bb8
bb4: ; preds = %bb1
%t5 = add nsw i64 %t2, 1
br i1 %cmp, label %bb1, label %bb8
bb8: ; preds = %bb8, %bb4
%phi = phi i64 [ %t3, %bb1 ], [ %t2, %bb4 ]
ret i64 %phi
}

View File

@ -1,93 +0,0 @@
; RUN: opt < %s -loop-reduce -S | FileCheck %s
;
; Test LSR's ability to prune formulae that refer to nonexistent
; AddRecs in other loops.
;
; Unable to reduce this case further because it requires LSR to exceed
; ComplexityLimit.
;
; We really just want to ensure that LSR can process this loop without
; finding an unsatisfactory solution and bailing out. I've added
; dummyout, an obvious candidate for postinc replacement so we can
; verify that LSR removes it.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin"
; CHECK-LABEL: @test(
; CHECK: for.body:
; CHECK: %lsr.iv
; CHECK-NOT: %dummyout
; CHECK: ret
define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
entry:
%cmp34 = icmp eq i64 %count, 0
br i1 %cmp34, label %for.end29, label %for.body
for.body: ; preds = %entry, %for.body
%dummyiv = phi i64 [ %dummycnt, %for.body ], [ 0, %entry ]
%indvars.iv39 = phi i64 [ %indvars.iv.next40, %for.body ], [ 0, %entry ]
%dp.036 = phi i32* [ %add.ptr, %for.body ], [ %destrow, %entry ]
%p.035 = phi float* [ %incdec.ptr4, %for.body ], [ %srcrow, %entry ]
%incdec.ptr = getelementptr inbounds float, float* %p.035, i64 1
%0 = load float, float* %incdec.ptr, align 4
%incdec.ptr2 = getelementptr inbounds float, float* %p.035, i64 2
%1 = load float, float* %incdec.ptr2, align 4
%incdec.ptr3 = getelementptr inbounds float, float* %p.035, i64 3
%2 = load float, float* %incdec.ptr3, align 4
%incdec.ptr4 = getelementptr inbounds float, float* %p.035, i64 4
%3 = load float, float* %incdec.ptr4, align 4
%4 = load i32, i32* %dp.036, align 4
%conv5 = fptoui float %0 to i32
%or = or i32 %4, %conv5
%arrayidx6 = getelementptr inbounds i32, i32* %dp.036, i64 1
%5 = load i32, i32* %arrayidx6, align 4
%conv7 = fptoui float %1 to i32
%or8 = or i32 %5, %conv7
%arrayidx9 = getelementptr inbounds i32, i32* %dp.036, i64 2
%6 = load i32, i32* %arrayidx9, align 4
%conv10 = fptoui float %2 to i32
%or11 = or i32 %6, %conv10
%arrayidx12 = getelementptr inbounds i32, i32* %dp.036, i64 3
%7 = load i32, i32* %arrayidx12, align 4
%conv13 = fptoui float %3 to i32
%or14 = or i32 %7, %conv13
store i32 %or, i32* %dp.036, align 4
store i32 %or8, i32* %arrayidx6, align 4
store i32 %or11, i32* %arrayidx9, align 4
store i32 %or14, i32* %arrayidx12, align 4
%add.ptr = getelementptr inbounds i32, i32* %dp.036, i64 4
%indvars.iv.next40 = add i64 %indvars.iv39, 4
%dummycnt = add i64 %dummyiv, 1
%cmp = icmp ult i64 %indvars.iv.next40, %count
br i1 %cmp, label %for.body, label %for.cond19.preheader
for.cond19.preheader: ; preds = %for.body
%dummyout = add i64 %dummyiv, 1
%rem = and i64 %count, 3
%cmp2130 = icmp eq i64 %rem, 0
br i1 %cmp2130, label %for.end29, label %for.body23.lr.ph
for.body23.lr.ph: ; preds = %for.cond19.preheader
%8 = and i64 %count, 3
br label %for.body23
for.body23: ; preds = %for.body23, %for.body23.lr.ph
%indvars.iv = phi i64 [ 0, %for.body23.lr.ph ], [ %indvars.iv.next, %for.body23 ]
%dp.132 = phi i32* [ %add.ptr, %for.body23.lr.ph ], [ %incdec.ptr28, %for.body23 ]
%p.131 = phi float* [ %incdec.ptr4, %for.body23.lr.ph ], [ %incdec.ptr24, %for.body23 ]
%incdec.ptr24 = getelementptr inbounds float, float* %p.131, i64 1
%9 = load float, float* %incdec.ptr24, align 4
%10 = load i32, i32* %dp.132, align 4
%conv25 = fptoui float %9 to i32
%or26 = or i32 %10, %conv25
store i32 %or26, i32* %dp.132, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%incdec.ptr28 = getelementptr inbounds i32, i32* %dp.132, i64 1
%exitcond = icmp eq i64 %indvars.iv.next, %8
br i1 %exitcond, label %for.end29, label %for.body23
for.end29: ; preds = %entry, %for.body23, %for.cond19.preheader
%result = phi i64 [ 0, %entry ], [ %dummyout, %for.body23 ], [ %dummyout, %for.cond19.preheader ]
ret i64 %result
}

View File

@ -1,147 +0,0 @@
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s
declare i1 @check() nounwind
declare i1 @foo(i8*, i8*, i8*) nounwind
; Check that redundant phi elimination ran
; CHECK: @test
; CHECK: %while.body.i
; CHECK: movs
; CHECK-NOT: movs
; CHECK: %for.end.i
define i32 @test(i8* %base) nounwind uwtable ssp {
entry:
br label %while.body.lr.ph.i
while.body.lr.ph.i: ; preds = %cond.true.i
br label %while.body.i
while.body.i: ; preds = %cond.true29.i, %while.body.lr.ph.i
%indvars.iv7.i = phi i64 [ 16, %while.body.lr.ph.i ], [ %indvars.iv.next8.i, %cond.true29.i ]
%i.05.i = phi i64 [ 0, %while.body.lr.ph.i ], [ %indvars.iv7.i, %cond.true29.i ]
%sext.i = shl i64 %i.05.i, 32
%idx.ext.i = ashr exact i64 %sext.i, 32
%add.ptr.sum.i = add i64 %idx.ext.i, 16
br label %for.body.i
for.body.i: ; preds = %for.body.i, %while.body.i
%indvars.iv.i = phi i64 [ 0, %while.body.i ], [ %indvars.iv.next.i, %for.body.i ]
%add.ptr.sum = add i64 %add.ptr.sum.i, %indvars.iv.i
%arrayidx22.i = getelementptr inbounds i8, i8* %base, i64 %add.ptr.sum
%0 = load i8, i8* %arrayidx22.i, align 1
%indvars.iv.next.i = add i64 %indvars.iv.i, 1
%cmp = call i1 @check() nounwind
br i1 %cmp, label %for.end.i, label %for.body.i
for.end.i: ; preds = %for.body.i
%add.ptr.i144 = getelementptr inbounds i8, i8* %base, i64 %add.ptr.sum.i
%cmp2 = tail call i1 @foo(i8* %add.ptr.i144, i8* %add.ptr.i144, i8* undef) nounwind
br i1 %cmp2, label %cond.true29.i, label %cond.false35.i
cond.true29.i: ; preds = %for.end.i
%indvars.iv.next8.i = add i64 %indvars.iv7.i, 16
br i1 false, label %exit, label %while.body.i
cond.false35.i: ; preds = %for.end.i
unreachable
exit: ; preds = %cond.true29.i, %cond.true.i
ret i32 0
}
%struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771 = type { i32, i32, i32 }
@tags = external global [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], align 16
; PR11782: SCEVExpander assert
;
; Test phi reuse after LSR that requires SCEVExpander to hoist an
; interesting GEP.
;
; CHECK: @test2
; CHECK: %entry
; CHECK-NOT: mov
; CHECK: je
define void @test2(i32 %n) nounwind uwtable {
entry:
br i1 undef, label %while.end, label %for.cond468
for.cond468: ; preds = %if.then477, %entry
%indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ]
%k.0.in = phi i32* [ %last, %if.then477 ], [ getelementptr inbounds ([5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 0, i32 2), %entry ]
%k.0 = load i32, i32* %k.0.in, align 4
%0 = trunc i64 %indvars.iv1163 to i32
%cmp469 = icmp slt i32 %0, %n
br i1 %cmp469, label %for.body471, label %for.inc498
for.body471: ; preds = %for.cond468
%first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 1
%1 = load i32, i32* %first, align 4
br i1 undef, label %if.then477, label %for.inc498
if.then477: ; preds = %for.body471
%last = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 2
%indvars.iv.next1164 = add i64 %indvars.iv1163, 1
br label %for.cond468
for.inc498: ; preds = %for.inc498, %for.body471, %for.cond468
br label %for.inc498
while.end: ; preds = %entry
ret void
}
; PR12898: SCEVExpander crash
; Test redundant phi elimination when the deleted phi's increment is
; itself a phi.
;
; CHECK: @test3
; CHECK: %for.body3.lr.ph.us.i.loopexit
; CHECK-NEXT: Parent Loop
; CHECK-NEXT: Inner Loop
; CHECK-NEXT: incq
; CHECK: testb
; CHECK: je
; CHECK: jmp
define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp {
entry:
br i1 undef, label %meshBB1, label %meshBB5
for.inc8.us.i: ; preds = %for.body3.us.i
br i1 undef, label %meshBB1, label %meshBB
for.body3.us.i: ; preds = %meshBB, %for.body3.lr.ph.us.i
%indvars.iv.i.SV.phi = phi i64 [ %indvars.iv.next.i, %meshBB ], [ 0, %for.body3.lr.ph.us.i ]
%storemerge13.us.i.SV.phi = phi i32 [ 0, %meshBB ], [ 0, %for.body3.lr.ph.us.i ]
%Opq.sa.calc12 = sub i32 undef, 227
%0 = add nsw i64 %indvars.iv.i.SV.phi, %indvars.iv8.i.SV.phi26
%1 = trunc i64 %0 to i32
%mul.i.us.i = mul nsw i32 0, %1
%arrayidx5.us.i = getelementptr inbounds double, double* %u, i64 %indvars.iv.i.SV.phi
%2 = load double, double* %arrayidx5.us.i, align 8
%indvars.iv.next.i = add i64 %indvars.iv.i.SV.phi, 1
br i1 undef, label %for.inc8.us.i, label %meshBB
for.body3.lr.ph.us.i: ; preds = %meshBB1, %meshBB
%indvars.iv8.i.SV.phi26 = phi i64 [ undef, %meshBB1 ], [ %indvars.iv8.i.SV.phi24, %meshBB ]
%arrayidx.us.i = getelementptr inbounds double, double* undef, i64 %indvars.iv8.i.SV.phi26
%3 = add i64 %indvars.iv8.i.SV.phi26, 1
br label %for.body3.us.i
for.inc8.us.i2: ; preds = %meshBB5
unreachable
eval_At_times_u.exit: ; preds = %meshBB5
ret void
meshBB: ; preds = %for.body3.us.i, %for.inc8.us.i
%indvars.iv8.i.SV.phi24 = phi i64 [ undef, %for.body3.us.i ], [ %3, %for.inc8.us.i ]
%meshStackVariable.phi = phi i32 [ %Opq.sa.calc12, %for.body3.us.i ], [ undef, %for.inc8.us.i ]
br i1 undef, label %for.body3.lr.ph.us.i, label %for.body3.us.i
meshBB1: ; preds = %for.inc8.us.i, %entry
br label %for.body3.lr.ph.us.i
meshBB5: ; preds = %entry
br i1 undef, label %eval_At_times_u.exit, label %for.inc8.us.i2
}

View File

@ -1,264 +0,0 @@
; RUN: opt < %s -loop-reduce -S | FileCheck %s
target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Show that the b^2 is expanded correctly.
define i32 @test_01(i32 %a) {
; CHECK-LABEL: @test_01
; CHECK: entry:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
; CHECK: exit:
; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B2]], -1
; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
; CHECK-NEXT: ret i32 [[R2]]
entry:
br label %loop
loop: ; preds = %loop, %entry
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
%b = add i32 %a, 1
%b.pow.2 = mul i32 %b, %b
%result = add i32 %b.pow.2, %indvars.iv
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
%exitcond = icmp eq i32 %indvars.iv.next, 80
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %result
}
; Show that b^8 is expanded correctly.
define i32 @test_02(i32 %a) {
; CHECK-LABEL: @test_02
; CHECK: entry:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
; CHECK: exit:
; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B8]], -1
; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
; CHECK-NEXT: ret i32 [[R2]]
entry:
br label %loop
loop: ; preds = %loop, %entry
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
%b = add i32 %a, 1
%b.pow.2 = mul i32 %b, %b
%b.pow.4 = mul i32 %b.pow.2, %b.pow.2
%b.pow.8 = mul i32 %b.pow.4, %b.pow.4
%result = add i32 %b.pow.8, %indvars.iv
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
%exitcond = icmp eq i32 %indvars.iv.next, 80
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %result
}
; Show that b^27 (27 = 1 + 2 + 8 + 16) is expanded correctly.
define i32 @test_03(i32 %a) {
; CHECK-LABEL: @test_03
; CHECK: entry:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
; CHECK: exit:
; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
; CHECK-NEXT: [[B3:[^ ]+]] = mul i32 [[B]], [[B2]]
; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
; CHECK-NEXT: [[B11:[^ ]+]] = mul i32 [[B3]], [[B8]]
; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]]
; CHECK-NEXT: [[B27:[^ ]+]] = mul i32 [[B11]], [[B16]]
; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B27]], -1
; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
; CHECK-NEXT: ret i32 [[R2]]
entry:
br label %loop
loop: ; preds = %loop, %entry
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
%b = add i32 %a, 1
%b.pow.2 = mul i32 %b, %b
%b.pow.4 = mul i32 %b.pow.2, %b.pow.2
%b.pow.8 = mul i32 %b.pow.4, %b.pow.4
%b.pow.16 = mul i32 %b.pow.8, %b.pow.8
%b.pow.24 = mul i32 %b.pow.16, %b.pow.8
%b.pow.25 = mul i32 %b.pow.24, %b
%b.pow.26 = mul i32 %b.pow.25, %b
%b.pow.27 = mul i32 %b.pow.26, %b
%result = add i32 %b.pow.27, %indvars.iv
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
%exitcond = icmp eq i32 %indvars.iv.next, 80
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %result
}
; Show how linear calculation of b^16 is turned into logarithmic.
define i32 @test_04(i32 %a) {
; CHECK-LABEL: @test_04
; CHECK: entry:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
; CHECK: exit:
; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]]
; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B16]], -1
; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
; CHECK-NEXT: ret i32 [[R2]]
entry:
br label %loop
loop: ; preds = %loop, %entry
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
%b = add i32 %a, 1
%b.pow.2 = mul i32 %b, %b
%b.pow.3 = mul i32 %b.pow.2, %b
%b.pow.4 = mul i32 %b.pow.3, %b
%b.pow.5 = mul i32 %b.pow.4, %b
%b.pow.6 = mul i32 %b.pow.5, %b
%b.pow.7 = mul i32 %b.pow.6, %b
%b.pow.8 = mul i32 %b.pow.7, %b
%b.pow.9 = mul i32 %b.pow.8, %b
%b.pow.10 = mul i32 %b.pow.9, %b
%b.pow.11 = mul i32 %b.pow.10, %b
%b.pow.12 = mul i32 %b.pow.11, %b
%b.pow.13 = mul i32 %b.pow.12, %b
%b.pow.14 = mul i32 %b.pow.13, %b
%b.pow.15 = mul i32 %b.pow.14, %b
%b.pow.16 = mul i32 %b.pow.15, %b
%result = add i32 %b.pow.16, %indvars.iv
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
%exitcond = icmp eq i32 %indvars.iv.next, 80
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %result
}
; The output here is reasonably big, we just check that the amount of expanded
; instructions is sane.
define i32 @test_05(i32 %a) {
; CHECK-LABEL: @test_05
; CHECK: entry:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
; CHECK: exit:
; CHECK: %100
; CHECK-NOT: %150
entry:
br label %loop
loop: ; preds = %loop, %entry
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
%tmp3 = add i32 %a, 1
%tmp4 = mul i32 %tmp3, %tmp3
%tmp5 = mul i32 %tmp4, %tmp4
%tmp6 = mul i32 %tmp5, %tmp5
%tmp7 = mul i32 %tmp6, %tmp6
%tmp8 = mul i32 %tmp7, %tmp7
%tmp9 = mul i32 %tmp8, %tmp8
%tmp10 = mul i32 %tmp9, %tmp9
%tmp11 = mul i32 %tmp10, %tmp10
%tmp12 = mul i32 %tmp11, %tmp11
%tmp13 = mul i32 %tmp12, %tmp12
%tmp14 = mul i32 %tmp13, %tmp13
%tmp15 = mul i32 %tmp14, %tmp14
%tmp16 = mul i32 %tmp15, %tmp15
%tmp17 = mul i32 %tmp16, %tmp16
%tmp18 = mul i32 %tmp17, %tmp17
%tmp19 = mul i32 %tmp18, %tmp18
%tmp20 = mul i32 %tmp19, %tmp19
%tmp22 = add i32 %tmp20, %indvars.iv
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
%exitcond = icmp eq i32 %indvars.iv.next, 80
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %tmp22
}
; Show that the transformation works even if the calculation involves different
; values inside.
define i32 @test_06(i32 %a, i32 %c) {
; CHECK-LABEL: @test_06
; CHECK: entry:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
; CHECK: exit:
; CHECK: [[B:[^ ]+]] = add i32 %a, 1
; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]]
entry:
br label %loop
loop: ; preds = %loop, %entry
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
%b = add i32 %a, 1
%b.pow.2.tmp = mul i32 %b, %b
%b.pow.2 = mul i32 %b.pow.2.tmp, %c
%b.pow.3 = mul i32 %b.pow.2, %b
%b.pow.4 = mul i32 %b.pow.3, %b
%b.pow.5 = mul i32 %b.pow.4, %b
%b.pow.6.tmp = mul i32 %b.pow.5, %b
%b.pow.6 = mul i32 %b.pow.6.tmp, %c
%b.pow.7 = mul i32 %b.pow.6, %b
%b.pow.8 = mul i32 %b.pow.7, %b
%b.pow.9 = mul i32 %b.pow.8, %b
%b.pow.10 = mul i32 %b.pow.9, %b
%b.pow.11 = mul i32 %b.pow.10, %b
%b.pow.12.tmp = mul i32 %b.pow.11, %b
%b.pow.12 = mul i32 %c, %b.pow.12.tmp
%b.pow.13 = mul i32 %b.pow.12, %b
%b.pow.14 = mul i32 %b.pow.13, %b
%b.pow.15 = mul i32 %b.pow.14, %b
%b.pow.16 = mul i32 %b.pow.15, %b
%result = add i32 %b.pow.16, %indvars.iv
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
%exitcond = icmp eq i32 %indvars.iv.next, 80
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %result
}

View File

@ -1,36 +0,0 @@
; REQUIRES: asserts
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -S < %s
; PR33077. Check the LSR Use formula to be inserted is already canonicalized and
; will not trigger assertion.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: uwtable
define void @foo() {
cHeapLvb.exit:
br label %not_zero48.us
not_zero48.us: ; preds = %not_zero48.us, %cHeapLvb.exit
%indvars.iv.us = phi i64 [ %indvars.iv.next.us.7, %not_zero48.us ], [ undef, %cHeapLvb.exit ]
%0 = phi i32 [ %13, %not_zero48.us ], [ undef, %cHeapLvb.exit ]
%indvars.iv.next.us = add nuw nsw i64 %indvars.iv.us, 1
%1 = add i32 %0, 2
%2 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %indvars.iv.next.us
%3 = load i32, i32 addrspace(1)* %2, align 4
%4 = add i32 %0, 3
%5 = load i32, i32 addrspace(1)* undef, align 4
%6 = sub i32 undef, %5
%factor.us.2 = shl i32 %6, 1
%7 = add i32 %factor.us.2, %1
%8 = load i32, i32 addrspace(1)* undef, align 4
%9 = sub i32 %7, %8
%factor.us.3 = shl i32 %9, 1
%10 = add i32 %factor.us.3, %4
%11 = load i32, i32 addrspace(1)* undef, align 4
%12 = sub i32 %10, %11
%factor.us.4 = shl i32 %12, 1
%13 = add i32 %0, 8
%indvars.iv.next.us.7 = add nsw i64 %indvars.iv.us, 8
br label %not_zero48.us
}

View File

@ -1,65 +0,0 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -lsr-insns-cost=false -S < %s | FileCheck %s
; Check LSR formula canonicalization will put loop invariant regs before
; induction variable of current loop, so exprs involving loop invariant regs
; can be promoted outside of current loop.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @foo(i32 %size, i32 %nsteps, i8* nocapture %maxarray, i8* nocapture readnone %buffer, i32 %init) local_unnamed_addr #0 {
entry:
%cmp25 = icmp sgt i32 %nsteps, 0
br i1 %cmp25, label %for.cond1.preheader.lr.ph, label %for.end12
for.cond1.preheader.lr.ph: ; preds = %entry
%cmp223 = icmp sgt i32 %size, 1
%t0 = sext i32 %init to i64
%wide.trip.count = zext i32 %size to i64
%wide.trip.count31 = zext i32 %nsteps to i64
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc10, %for.cond1.preheader.lr.ph
%indvars.iv28 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next29, %for.inc10 ]
br i1 %cmp223, label %for.body3.lr.ph, label %for.inc10
for.body3.lr.ph: ; preds = %for.cond1.preheader
%t1 = add nsw i64 %indvars.iv28, %t0
%t2 = trunc i64 %indvars.iv28 to i8
br label %for.body3
; Make sure loop invariant items are grouped together so that load address can
; be represented in one getelementptr.
; CHECK-LABEL: for.body3:
; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ 1, %for.body3.lr.ph ], [ {{.*}}, %for.body3 ]
; CHECK-NOT: = phi i64
; CHECK-NEXT: [[LOADADDR:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
; CHECK-NEXT: = load i8, i8* [[LOADADDR]], align 1
; CHECK: br i1 %exitcond, label %for.inc10.loopexit, label %for.body3
for.body3: ; preds = %for.body3, %for.body3.lr.ph
%indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
%t5 = trunc i64 %indvars.iv to i8
%t3 = add nsw i64 %t1, %indvars.iv
%arrayidx = getelementptr inbounds i8, i8* %maxarray, i64 %t3
%t4 = load i8, i8* %arrayidx, align 1
%add5 = add i8 %t4, %t5
%add6 = add i8 %add5, %t2
%arrayidx9 = getelementptr inbounds i8, i8* %maxarray, i64 %indvars.iv
store i8 %add6, i8* %arrayidx9, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.inc10.loopexit, label %for.body3
for.inc10.loopexit: ; preds = %for.body3
br label %for.inc10
for.inc10: ; preds = %for.inc10.loopexit, %for.cond1.preheader
%indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1
%exitcond32 = icmp eq i64 %indvars.iv.next29, %wide.trip.count31
br i1 %exitcond32, label %for.end12.loopexit, label %for.cond1.preheader
for.end12.loopexit: ; preds = %for.inc10
br label %for.end12
for.end12: ; preds = %for.end12.loopexit, %entry
ret void
}

View File

@ -1,46 +0,0 @@
; RUN: opt -S -loop-reduce < %s | FileCheck %s
target triple = "x86_64-unknown-unknown"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define void @incorrect_offset_scaling(i64, i64*) {
top:
br label %L
L: ; preds = %idxend.10, %idxend, %L2, %top
br i1 undef, label %L, label %L1
L1: ; preds = %L1.preheader, %L2
%r13 = phi i64 [ %r1, %L2 ], [ 1, %L ]
; CHECK: %lsr.iv = phi i64 [ 0, %L{{[^ ]+}} ], [ %lsr.iv.next, %L2 ]
; CHECK-NOT: %lsr.iv = phi i64 [ -1, %L{{[^ ]+}} ], [ %lsr.iv.next, %L2 ]
; CHECK: br
%r0 = add i64 %r13, -1
br label %idxend.8
L2: ; preds = %idxend.8
%r1 = add i64 %r13, 1
br i1 undef, label %L, label %L1
if6: ; preds = %idxend.8
%r2 = add i64 %0, -1
%r3 = load i64, i64* %1, align 8
; CHECK: %r2 = add i64 %0, -1
; CHECK: %r3 = load i64
br label %ib
idxend.8: ; preds = %L1
br i1 undef, label %if6, label %L2
ib: ; preds = %if6
%r4 = mul i64 %r3, %r0
%r5 = add i64 %r2, %r4
%r6 = icmp ult i64 %r5, undef
; CHECK: %r4 = mul i64 %r3, %lsr.iv
; CHECK: %r5 = add i64 %r2, %r4
; CHECK: %r6 = icmp ult i64 %r5, undef
; CHECK: %r7 = getelementptr i64, i64* undef, i64 %r5
%r7 = getelementptr i64, i64* undef, i64 %r5
store i64 1, i64* %r7, align 8
br label %L
}

View File

@ -1,302 +0,0 @@
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
; @simple is the most basic chain of address induction variables. Chaining
; saves at least one register and avoids complex addressing and setup
; code.
;
; X64: @simple
; %x * 4
; X64: shlq $2
; no other address computation in the preheader
; X64-NEXT: xorl
; X64-NEXT: .p2align
; X64: %loop
; no complex address modes
; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
;
; X32: @simple
; no expensive address computation in the preheader
; X32-NOT: imul
; X32: %loop
; no complex address modes
; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
entry:
br label %loop
loop:
%iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
%s = phi i32 [ 0, %entry ], [ %s4, %loop ]
%v = load i32, i32* %iv
%iv1 = getelementptr inbounds i32, i32* %iv, i32 %x
%v1 = load i32, i32* %iv1
%iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x
%v2 = load i32, i32* %iv2
%iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x
%v3 = load i32, i32* %iv3
%s1 = add i32 %s, %v
%s2 = add i32 %s1, %v1
%s3 = add i32 %s2, %v2
%s4 = add i32 %s3, %v3
%iv4 = getelementptr inbounds i32, i32* %iv3, i32 %x
%cmp = icmp eq i32* %iv4, %b
br i1 %cmp, label %exit, label %loop
exit:
ret i32 %s4
}
; @user is not currently chained because the IV is live across memory ops.
;
; X64: @user
; X64: shlq $4
; X64: lea
; X64: lea
; X64: %loop
; complex address modes
; X64: (%{{[^)]+}},%{{[^)]+}},
;
; X32: @user
; expensive address computation in the preheader
; X32: shll $4
; X32: lea
; X32: lea
; X32: %loop
; complex address modes
; X32: (%{{[^)]+}},%{{[^)]+}},
define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
entry:
br label %loop
loop:
%iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
%s = phi i32 [ 0, %entry ], [ %s4, %loop ]
%v = load i32, i32* %iv
%iv1 = getelementptr inbounds i32, i32* %iv, i32 %x
%v1 = load i32, i32* %iv1
%iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x
%v2 = load i32, i32* %iv2
%iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x
%v3 = load i32, i32* %iv3
%s1 = add i32 %s, %v
%s2 = add i32 %s1, %v1
%s3 = add i32 %s2, %v2
%s4 = add i32 %s3, %v3
%iv4 = getelementptr inbounds i32, i32* %iv3, i32 %x
store i32 %s4, i32* %iv
%cmp = icmp eq i32* %iv4, %b
br i1 %cmp, label %exit, label %loop
exit:
ret i32 %s4
}
; @extrastride is a slightly more interesting case of a single
; complete chain with multiple strides. The test case IR is what LSR
; used to do, and exactly what we don't want to do. LSR's new IV
; chaining feature should now undo the damage.
;
; X64: extrastride:
; We currently don't handle this on X64 because the sexts cause
; strange increment expressions like this:
; IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
;
; X32: extrastride:
; no spills in the preheader
; X32-NOT: mov{{.*}}(%esp){{$}}
; X32: %for.body{{$}}
; no complex address modes
; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
; no reloads
; X32-NOT: (%esp)
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
entry:
%cmp8 = icmp eq i32 %z, 0
br i1 %cmp8, label %for.end, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
%add.ptr.sum = shl i32 %main_stride, 1 ; s*2
%add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3
%add.ptr2.sum = add i32 %x, %main_stride ; s + x
%add.ptr4.sum = shl i32 %main_stride, 2 ; s*4
%add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x
br label %for.body
for.body: ; preds = %for.body.lr.ph, %for.body
%main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
%i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
%0 = bitcast i8* %main.addr.011 to i32*
%1 = load i32, i32* %0, align 4
%add.ptr = getelementptr inbounds i8, i8* %main.addr.011, i32 %main_stride
%2 = bitcast i8* %add.ptr to i32*
%3 = load i32, i32* %2, align 4
%add.ptr1 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr.sum
%4 = bitcast i8* %add.ptr1 to i32*
%5 = load i32, i32* %4, align 4
%add.ptr2 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr1.sum
%6 = bitcast i8* %add.ptr2 to i32*
%7 = load i32, i32* %6, align 4
%add.ptr3 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr4.sum
%8 = bitcast i8* %add.ptr3 to i32*
%9 = load i32, i32* %8, align 4
%add = add i32 %3, %1
%add4 = add i32 %add, %5
%add5 = add i32 %add4, %7
%add6 = add i32 %add5, %9
store i32 %add6, i32* %res.addr.09, align 4
%add.ptr6 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr3.sum
%add.ptr7 = getelementptr inbounds i32, i32* %res.addr.09, i32 %y
%inc = add i32 %i.010, 1
%cmp = icmp eq i32 %inc, %z
br i1 %cmp, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; @foldedidx is an unrolled variant of this loop:
; for (unsigned long i = 0; i < len; i += s) {
; c[i] = a[i] + b[i];
; }
; where 's' can be folded into the addressing mode.
; Consequently, we should *not* form any chains.
;
; X64: foldedidx:
; X64: movzbl -3(
;
; X32: foldedidx:
; X32: movzbl 400(
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.07
%0 = load i8, i8* %arrayidx, align 1
%conv5 = zext i8 %0 to i32
%arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.07
%1 = load i8, i8* %arrayidx1, align 1
%conv26 = zext i8 %1 to i32
%add = add nsw i32 %conv26, %conv5
%conv3 = trunc i32 %add to i8
%arrayidx4 = getelementptr inbounds i8, i8* %c, i32 %i.07
store i8 %conv3, i8* %arrayidx4, align 1
%inc1 = or i32 %i.07, 1
%arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %inc1
%2 = load i8, i8* %arrayidx.1, align 1
%conv5.1 = zext i8 %2 to i32
%arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %inc1
%3 = load i8, i8* %arrayidx1.1, align 1
%conv26.1 = zext i8 %3 to i32
%add.1 = add nsw i32 %conv26.1, %conv5.1
%conv3.1 = trunc i32 %add.1 to i8
%arrayidx4.1 = getelementptr inbounds i8, i8* %c, i32 %inc1
store i8 %conv3.1, i8* %arrayidx4.1, align 1
%inc.12 = or i32 %i.07, 2
%arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %inc.12
%4 = load i8, i8* %arrayidx.2, align 1
%conv5.2 = zext i8 %4 to i32
%arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %inc.12
%5 = load i8, i8* %arrayidx1.2, align 1
%conv26.2 = zext i8 %5 to i32
%add.2 = add nsw i32 %conv26.2, %conv5.2
%conv3.2 = trunc i32 %add.2 to i8
%arrayidx4.2 = getelementptr inbounds i8, i8* %c, i32 %inc.12
store i8 %conv3.2, i8* %arrayidx4.2, align 1
%inc.23 = or i32 %i.07, 3
%arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %inc.23
%6 = load i8, i8* %arrayidx.3, align 1
%conv5.3 = zext i8 %6 to i32
%arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %inc.23
%7 = load i8, i8* %arrayidx1.3, align 1
%conv26.3 = zext i8 %7 to i32
%add.3 = add nsw i32 %conv26.3, %conv5.3
%conv3.3 = trunc i32 %add.3 to i8
%arrayidx4.3 = getelementptr inbounds i8, i8* %c, i32 %inc.23
store i8 %conv3.3, i8* %arrayidx4.3, align 1
%inc.3 = add nsw i32 %i.07, 4
%exitcond.3 = icmp eq i32 %inc.3, 400
br i1 %exitcond.3, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; @multioper tests instructions with multiple IV user operands. We
; should be able to chain them independent of each other.
;
; X64: @multioper
; X64: %for.body
; X64: movl %{{.*}},4)
; X64-NEXT: leal 1(
; X64-NEXT: movl %{{.*}},4)
; X64-NEXT: leal 2(
; X64-NEXT: movl %{{.*}},4)
; X64-NEXT: leal 3(
; X64-NEXT: movl %{{.*}},4)
;
; X32: @multioper
; X32: %for.body
; X32: movl %{{.*}},4)
; X32-NEXT: leal 1(
; X32-NEXT: movl %{{.*}},4)
; X32-NEXT: leal 2(
; X32-NEXT: movl %{{.*}},4)
; X32-NEXT: leal 3(
; X32-NEXT: movl %{{.*}},4)
define void @multioper(i32* %a, i32 %n) nounwind {
entry:
br label %for.body
for.body:
%p = phi i32* [ %p.next, %for.body ], [ %a, %entry ]
%i = phi i32 [ %inc4, %for.body ], [ 0, %entry ]
store i32 %i, i32* %p, align 4
%inc1 = or i32 %i, 1
%add.ptr.i1 = getelementptr inbounds i32, i32* %p, i32 1
store i32 %inc1, i32* %add.ptr.i1, align 4
%inc2 = add nsw i32 %i, 2
%add.ptr.i2 = getelementptr inbounds i32, i32* %p, i32 2
store i32 %inc2, i32* %add.ptr.i2, align 4
%inc3 = add nsw i32 %i, 3
%add.ptr.i3 = getelementptr inbounds i32, i32* %p, i32 3
store i32 %inc3, i32* %add.ptr.i3, align 4
%p.next = getelementptr inbounds i32, i32* %p, i32 4
%inc4 = add nsw i32 %i, 4
%cmp = icmp slt i32 %inc4, %n
br i1 %cmp, label %for.body, label %exit
exit:
ret void
}
; @testCmpZero has a ICmpZero LSR use that should not be hidden from
; LSR. Profitable chains should have more than one nonzero increment
; anyway.
;
; X32: @testCmpZero
; X32: %for.body82.us
; X32: cmp
; X32: jne
define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
entry:
%dest0 = getelementptr inbounds i8, i8* %src, i32 %srcidx
%source0 = getelementptr inbounds i8, i8* %dst, i32 %dstidx
%add.ptr79.us.sum = add i32 %srcidx, %len
%lftr.limit = getelementptr i8, i8* %src, i32 %add.ptr79.us.sum
br label %for.body82.us
for.body82.us:
%dest = phi i8* [ %dest0, %entry ], [ %incdec.ptr91.us, %for.body82.us ]
%source = phi i8* [ %source0, %entry ], [ %add.ptr83.us, %for.body82.us ]
%0 = bitcast i8* %source to i32*
%1 = load i32, i32* %0, align 4
%trunc = trunc i32 %1 to i8
%add.ptr83.us = getelementptr inbounds i8, i8* %source, i32 4
%incdec.ptr91.us = getelementptr inbounds i8, i8* %dest, i32 1
store i8 %trunc, i8* %dest, align 1
%exitcond = icmp eq i8* %incdec.ptr91.us, %lftr.limit
br i1 %exitcond, label %return, label %for.body82.us
return:
ret void
}

View File

@ -1,96 +0,0 @@
; REQUIRES: asserts
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64
; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32
; @sharedidx is an unrolled variant of this loop:
; for (unsigned long i = 0; i < len; i += s) {
; c[i] = a[i] + b[i];
; }
; where 's' cannot be folded into the addressing mode.
;
; This is not quite profitable to chain. But with -stress-ivchain, we
; can form three address chains in place of the shared induction
; variable.
; X64: sharedidx:
; X64: %for.body.preheader
; X64-NOT: leal ({{.*}},4)
; X64: %for.body.1
; X32: sharedidx:
; X32: %for.body.2
; X32: add
; X32: add
; X32: add
; X32: add
; X32: add
; X32: %for.body.3
define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
entry:
%cmp8 = icmp eq i32 %len, 0
br i1 %cmp8, label %for.end, label %for.body
for.body: ; preds = %entry, %for.body.3
%i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
%0 = load i8, i8* %arrayidx, align 1
%conv6 = zext i8 %0 to i32
%arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.09
%1 = load i8, i8* %arrayidx1, align 1
%conv27 = zext i8 %1 to i32
%add = add nsw i32 %conv27, %conv6
%conv3 = trunc i32 %add to i8
%arrayidx4 = getelementptr inbounds i8, i8* %c, i32 %i.09
store i8 %conv3, i8* %arrayidx4, align 1
%add5 = add i32 %i.09, %s
%cmp = icmp ult i32 %add5, %len
br i1 %cmp, label %for.body.1, label %for.end
for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
ret void
for.body.1: ; preds = %for.body
%arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
%2 = load i8, i8* %arrayidx.1, align 1
%conv6.1 = zext i8 %2 to i32
%arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %add5
%3 = load i8, i8* %arrayidx1.1, align 1
%conv27.1 = zext i8 %3 to i32
%add.1 = add nsw i32 %conv27.1, %conv6.1
%conv3.1 = trunc i32 %add.1 to i8
%arrayidx4.1 = getelementptr inbounds i8, i8* %c, i32 %add5
store i8 %conv3.1, i8* %arrayidx4.1, align 1
%add5.1 = add i32 %add5, %s
%cmp.1 = icmp ult i32 %add5.1, %len
br i1 %cmp.1, label %for.body.2, label %for.end
for.body.2: ; preds = %for.body.1
%arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
%4 = load i8, i8* %arrayidx.2, align 1
%conv6.2 = zext i8 %4 to i32
%arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %add5.1
%5 = load i8, i8* %arrayidx1.2, align 1
%conv27.2 = zext i8 %5 to i32
%add.2 = add nsw i32 %conv27.2, %conv6.2
%conv3.2 = trunc i32 %add.2 to i8
%arrayidx4.2 = getelementptr inbounds i8, i8* %c, i32 %add5.1
store i8 %conv3.2, i8* %arrayidx4.2, align 1
%add5.2 = add i32 %add5.1, %s
%cmp.2 = icmp ult i32 %add5.2, %len
br i1 %cmp.2, label %for.body.3, label %for.end
for.body.3: ; preds = %for.body.2
%arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
%6 = load i8, i8* %arrayidx.3, align 1
%conv6.3 = zext i8 %6 to i32
%arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %add5.2
%7 = load i8, i8* %arrayidx1.3, align 1
%conv27.3 = zext i8 %7 to i32
%add.3 = add nsw i32 %conv27.3, %conv6.3
%conv3.3 = trunc i32 %add.3 to i8
%arrayidx4.3 = getelementptr inbounds i8, i8* %c, i32 %add5.2
store i8 %conv3.3, i8* %arrayidx4.3, align 1
%add5.3 = add i32 %add5.2, %s
%cmp.3 = icmp ult i32 %add5.3, %len
br i1 %cmp.3, label %for.body, label %for.end
}

View File

@ -1,3 +0,0 @@
if not 'X86' in config.root.targets:
config.unsupported = True

View File

@ -1,58 +0,0 @@
; REQUIRES: x86-registered-target
; RUN: opt -loop-reduce -S < %s | FileCheck %s
; Strength reduction analysis here relies on IV Users analysis, that
; only finds users among instructions with types that are treated as
; legal by the data layout. When running this test on pure non-x86
; configs (for example, ARM 64), it gets confused with the target
; triple and uses a default data layout instead. This default layout
; does not have any legal types (even i32), so the transformation
; does not happen.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx"
; PR15470: LSR miscompile. The test2 function should return '1'.
;
; SCEV expander cannot expand quadratic recurrences outside of the
; loop. This recurrence depends on %sub.us, so can't be expanded.
; We cannot fold SCEVUnknown (sub.us) with recurrences since it is
; declared after the loop.
;
; CHECK-LABEL: @test2
; CHECK-LABEL: test2.loop:
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ 1, %entry ]
; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, -1
; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
;
; CHECK-LABEL: for.end:
; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next, 0
; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0
; CHECK: %0 = sub i32 0, %sub.us
; CHECK: %1 = sub i32 %0, %lsr.iv.next
; CHECK: %sext.us = mul i32 %lsr.iv.next2, %1
; CHECK: %f = ashr i32 %sext.us, 24
; CHECK: ret i32 %f
define i32 @test2() {
entry:
br label %test2.loop
test2.loop:
%inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
%inc11.us = add nsw i32 %inc1115.us, 1
%cmp.us = icmp slt i32 %inc11.us, 2
br i1 %cmp.us, label %test2.loop, label %for.end
for.end:
%tobool.us = icmp eq i32 %inc1115.us, 0
%sub.us = select i1 %tobool.us, i32 0, i32 0
%mul.us = shl i32 %inc1115.us, 24
%sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
%sext.us = mul i32 %mul.us, %sub.cond.us
%f = ashr i32 %sext.us, 24
br label %exit
exit:
ret i32 %f
}

View File

@ -1,60 +0,0 @@
; RUN: opt < %s -loop-reduce -lsr-filter-same-scaled-reg=true -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
%struct.ham = type { i8, i8, [5 x i32], i64, i64, i64 }
@global = external local_unnamed_addr global %struct.ham, align 8
define void @foo() local_unnamed_addr {
bb:
%tmp = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 3), align 8
%tmp1 = and i64 %tmp, 1792
%tmp2 = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 4), align 8
%tmp3 = add i64 %tmp1, %tmp2
%tmp4 = load i8*, i8** null, align 8
%tmp5 = getelementptr inbounds i8, i8* %tmp4, i64 0
%tmp6 = sub i64 0, %tmp3
%tmp7 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp6
%tmp8 = inttoptr i64 0 to i8*
br label %bb9
; Without filtering non-optimal formulae with the same ScaledReg and Scale, the strategy
; to narrow LSR search space by picking winner reg will generate only one lsr.iv and
; unoptimal result.
; CHECK-LABEL: @foo(
; CHECK: bb9:
; CHECK-NEXT: = phi i8*
; CHECK-NEXT: = phi i8*
bb9: ; preds = %bb12, %bb
%tmp10 = phi i8* [ %tmp7, %bb ], [ %tmp16, %bb12 ]
%tmp11 = phi i8* [ %tmp8, %bb ], [ %tmp17, %bb12 ]
br i1 false, label %bb18, label %bb12
bb12: ; preds = %bb9
%tmp13 = getelementptr inbounds i8, i8* %tmp10, i64 8
%tmp14 = bitcast i8* %tmp13 to i64*
%tmp15 = load i64, i64* %tmp14, align 1
%tmp16 = getelementptr inbounds i8, i8* %tmp10, i64 16
%tmp17 = getelementptr inbounds i8, i8* %tmp11, i64 16
br label %bb9
bb18: ; preds = %bb9
%tmp19 = icmp ugt i8* %tmp11, null
%tmp20 = getelementptr inbounds i8, i8* %tmp10, i64 8
%tmp21 = getelementptr inbounds i8, i8* %tmp11, i64 8
%tmp22 = select i1 %tmp19, i8* %tmp10, i8* %tmp20
%tmp23 = select i1 %tmp19, i8* %tmp11, i8* %tmp21
br label %bb24
bb24: ; preds = %bb24, %bb18
%tmp25 = phi i8* [ %tmp27, %bb24 ], [ %tmp22, %bb18 ]
%tmp26 = phi i8* [ %tmp29, %bb24 ], [ %tmp23, %bb18 ]
%tmp27 = getelementptr inbounds i8, i8* %tmp25, i64 1
%tmp28 = load i8, i8* %tmp25, align 1
%tmp29 = getelementptr inbounds i8, i8* %tmp26, i64 1
store i8 %tmp28, i8* %tmp26, align 1
%tmp30 = icmp eq i8* %tmp29, %tmp5
br label %bb24
}

View File

@ -1,52 +0,0 @@
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
; OPT test checks that LSR optimize compare for static counter to compare with 0.
; BOTH: for.body:
; INSN: icmp eq i64 %lsr.iv.next, 0
; REGS: icmp eq i64 %indvars.iv.next, 1024
; LLC test checks that LSR optimize compare for static counter.
; That means that instead of creating the following:
; movl %ecx, (%rdx,%rax,4)
; incq %rax
; cmpq $1024, %rax
; LSR should optimize out cmp:
; movl %ecx, 4096(%rdx,%rax)
; addq $4, %rax
; or
; movl %ecx, 4096(%rdx,%rax,4)
; incq %rax
; CHECK: LBB0_1:
; CHECK-NEXT: movl 4096(%{{.+}},[[REG:%[0-9a-z]+]]
; CHECK-NEXT: addl 4096(%{{.+}},[[REG]]
; CHECK-NEXT: movl %{{.+}}, 4096(%{{.+}},[[REG]]
; CHECK-NOT: cmp
; CHECK: jne
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: norecurse nounwind uwtable
define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
%tmp = load i32, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
%tmp1 = load i32, i32* %arrayidx2, align 4
%add = add nsw i32 %tmp1, %tmp
%arrayidx4 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
store i32 %add, i32* %arrayidx4, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

View File

@ -1,58 +0,0 @@
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
; OPT checks that LSR prefers less instructions to less registers.
; For x86 LSR should prefer complicated address to new lsr induction
; variables.
; BOTH: for.body:
; INSN: getelementptr i32, i32* %x, i64 %indvars.iv
; INSN: getelementptr i32, i32* %y, i64 %indvars.iv
; INSN: getelementptr i32, i32* %q, i64 %indvars.iv
; REGS %lsr.iv4 = phi
; REGS %lsr.iv2 = phi
; REGS %lsr.iv1 = phi
; REGS: getelementptr i32, i32* %lsr.iv1, i64 1
; REGS: getelementptr i32, i32* %lsr.iv2, i64 1
; REGS: getelementptr i32, i32* %lsr.iv4, i64 1
; LLC checks that LSR prefers less instructions to less registers.
; LSR should prefer complicated address to additonal add instructions.
; CHECK: LBB0_2:
; CHECK-NEXT: movl (%r{{.+}},
; CHECK-NEXT: addl (%r{{.+}},
; CHECK-NEXT: movl %e{{.+}}, (%r{{.+}},
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: norecurse nounwind uwtable
define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q, i32 %n) {
entry:
%cmp10 = icmp sgt i32 %n, 0
br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %n to i64
br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void
for.body: ; preds = %for.body, %for.body.preheader
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
%tmp = load i32, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
%tmp1 = load i32, i32* %arrayidx2, align 4
%add = add nsw i32 %tmp1, %tmp
%arrayidx4 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
store i32 %add, i32* %arrayidx4, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}

View File

@ -1,69 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-reduce -S < %s | FileCheck %s
; Check when we use an outerloop induction variable inside of an innerloop
; induction value expr, LSR can still choose to use single induction variable
; for the innerloop and share it in multiple induction value exprs.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @foo(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8* %maxarray) {
entry:
%cmp215 = icmp sgt i32 %size, 1
%t0 = zext i32 %size to i64
%t1 = sext i32 %nsteps to i64
%sub2 = sub i64 %t0, 2
br label %for.body
for.body: ; preds = %for.inc, %entry
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %for.inc ], [ 0, %entry ]
%t2 = mul nsw i64 %indvars.iv2, %t0
br i1 %cmp215, label %for.body2.preheader, label %for.inc
for.body2.preheader: ; preds = %for.body
br label %for.body2
; Check LSR only generates two induction variables for for.body2 one for compare and
; one to shared by multiple array accesses.
; CHECK: for.body2:
; CHECK-NEXT: [[LSRAR:%[^,]+]] = phi i8* [ %scevgep, %for.body2 ], [ %maxarray, %for.body2.preheader ]
; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ %0, %for.body2.preheader ]
; CHECK-NOT: = phi i64 [ {{.*}}, %for.body2 ], [ {{.*}}, %for.body2.preheader ]
; CHECK: [[LSRINT:%[^,]+]] = ptrtoint i8* [[LSRAR]] to i64
; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* [[LSRAR]], i64 1
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP1]], align 1
; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* %1, i64 [[LSRINT]]
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP2]], align 1
; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSRINT]]
; CHECK: store i8 {{.*}}, i8* [[SCEVGEP3]], align 1
; CHECK: [[LSRNEXT:%[^,]+]] = add i64 [[LSR]], -1
; CHECK: %exitcond = icmp ne i64 [[LSRNEXT]], 0
; CHECK: br i1 %exitcond, label %for.body2, label %for.inc.loopexit
for.body2: ; preds = %for.body2.preheader, %for.body2
%indvars.iv = phi i64 [ 1, %for.body2.preheader ], [ %indvars.iv.next, %for.body2 ]
%arrayidx1 = getelementptr inbounds i8, i8* %maxarray, i64 %indvars.iv
%v1 = load i8, i8* %arrayidx1, align 1
%idx2 = add nsw i64 %indvars.iv, %sub2
%arrayidx2 = getelementptr inbounds i8, i8* %maxarray, i64 %idx2
%v2 = load i8, i8* %arrayidx2, align 1
%tmpv = xor i8 %v1, %v2
%t4 = add nsw i64 %t2, %indvars.iv
%add.ptr = getelementptr inbounds i8, i8* %maxarray, i64 %t4
store i8 %tmpv, i8* %add.ptr, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%wide.trip.count = zext i32 %size to i64
%exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.body2, label %for.inc.loopexit
for.inc.loopexit: ; preds = %for.body2
br label %for.inc
for.inc: ; preds = %for.inc.loopexit, %for.body
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
%cmp = icmp slt i64 %indvars.iv.next3, %t1
br i1 %cmp, label %for.body, label %for.end.loopexit
for.end.loopexit: ; preds = %for.inc
ret void
}

View File

@ -1,50 +0,0 @@
; RUN: opt -S -loop-reduce -mcpu=corei7-avx -mtriple=x86_64-apple-macosx < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define void @indvar_expansion(i8* nocapture readonly %rowsptr) {
entry:
br label %for.cond
; SCEVExpander used to create induction variables in the loop %for.cond while
; expanding the recurrence start value of loop strength reduced values from
; %vector.body.
; CHECK-LABEL: indvar_expansion
; CHECK: for.cond:
; CHECK-NOT: phi i3
; CHECK: br i1 {{.+}}, label %for.cond
for.cond:
%indvars.iv44 = phi i64 [ %indvars.iv.next45, %for.cond ], [ 0, %entry ]
%cmp = icmp eq i8 undef, 0
%indvars.iv.next45 = add nuw nsw i64 %indvars.iv44, 1
br i1 %cmp, label %for.cond, label %for.cond2
for.cond2:
br i1 undef, label %for.cond2, label %for.body14.lr.ph
for.body14.lr.ph:
%sext = shl i64 %indvars.iv44, 32
%0 = ashr exact i64 %sext, 32
%1 = sub i64 undef, %indvars.iv44
%2 = and i64 %1, 4294967295
%3 = add i64 %2, 1
%fold = add i64 %1, 1
%n.mod.vf = and i64 %fold, 7
%n.vec = sub i64 %3, %n.mod.vf
%end.idx.rnd.down = add i64 %n.vec, %0
br label %vector.body
vector.body:
%index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ]
%4 = getelementptr inbounds i8, i8* %rowsptr, i64 %index
%5 = bitcast i8* %4 to <4 x i8>*
%wide.load = load <4 x i8>, <4 x i8>* %5, align 1
%index.next = add i64 %index, 8
%6 = icmp eq i64 %index.next, %end.idx.rnd.down
br i1 %6, label %for.end24, label %vector.body
for.end24:
ret void
}

View File

@ -1,67 +0,0 @@
; RUN: opt < %s -loop-reduce -S | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
; LSR shouldn't normalize IV if it can't be denormalized to the original
; expression. In this testcase, the normalized expression was denormalized to
; an expression different from the original, and we were losing sign extension.
; CHECK: [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8
; CHECK: {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32
@j = common global i32 0, align 4
@c = common global i32 0, align 4
@g = common global i32 0, align 4
@h = common global i8 0, align 1
@d = common global i32 0, align 4
@i = common global i32 0, align 4
@e = common global i32 0, align 4
@.str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1
@a = common global i32 0, align 4
@b = common global i16 0, align 2
; Function Attrs: nounwind optsize ssp uwtable
define i32 @main() #0 {
entry:
store i8 0, i8* @h, align 1
%0 = load i32, i32* @j, align 4
%tobool.i = icmp eq i32 %0, 0
%1 = load i32, i32* @d, align 4
%cmp3 = icmp sgt i32 %1, -1
%.lobit = lshr i32 %1, 31
%.lobit.not = xor i32 %.lobit, 1
br label %for.body
for.body: ; preds = %entry, %fn3.exit
%inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ]
%conv = sext i8 %inc9 to i32
br i1 %tobool.i, label %fn3.exit, label %land.rhs.i
land.rhs.i: ; preds = %for.body
store i32 0, i32* @c, align 4
br label %fn3.exit
fn3.exit: ; preds = %for.body, %land.rhs.i
%inc = add i8 %inc9, 1
%cmp = icmp sgt i8 %inc, -1
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %fn3.exit
%.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0
store i32 %conv, i32* @g, align 4
store i32 %.lobit.not., i32* @i, align 4
store i8 %inc, i8* @h, align 1
%conv7 = sext i8 %inc to i32
%add = add nsw i32 %conv7, %conv
store i32 %add, i32* @e, align 4
%call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %add) #2
ret i32 0
}
; Function Attrs: nounwind optsize
declare i32 @printf(i8* nocapture readonly, ...) #1
attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind optsize }

Some files were not shown because too many files have changed in this diff Show More