Files
acceptance-tests
data
debian
docs
external
Newtonsoft.Json
api-doc-tools
api-snapshot
aspnetwebstack
binary-reference-assemblies
bockbuild
boringssl
cecil
cecil-legacy
corefx
corert
helix-binaries
ikdasm
ikvm
illinker-test-assets
linker
llvm
bindings
cmake
docs
examples
include
lib
projects
resources
runtimes
scripts
test
Analysis
Assembler
Bindings
Bitcode
BugPoint
CodeGen
DebugInfo
Examples
ExecutionEngine
Feature
FileCheck
Instrumentation
Integer
JitListener
LTO
Linker
MC
Object
ObjectYAML
Other
SafepointIRVerifier
SymbolRewriter
TableGen
ThinLTO
Transforms
ADCE
AddDiscriminators
AlignmentFromAssumptions
ArgumentPromotion
AtomicExpand
BDCE
BranchFolding
CallSiteSplitting
CalledValuePropagation
CodeExtractor
CodeGenPrepare
ConstProp
ConstantHoisting
ConstantMerge
Coroutines
CorrelatedValuePropagation
CrossDSOCFI
DCE
DeadArgElim
DeadStoreElimination
DivRemPairs
EarlyCSE
EliminateAvailableExternally
EntryExitInstrumenter
ExpandMemCmp
Float2Int
ForcedFunctionAttrs
FunctionAttrs
FunctionImport
GCOVProfiling
GVN
GVNHoist
GVNSink
GlobalDCE
GlobalMerge
GlobalOpt
GlobalSplit
GuardWidening
IPConstantProp
IRCE
IndVarSimplify
IndirectBrExpand
InferAddressSpaces
InferFunctionAttrs
Inline
InstCombine
InstMerge
InstNamer
InstSimplify
InterleavedAccess
Internalize
JumpThreading
LCSSA
LICM
LoadStoreVectorizer
LoopDataPrefetch
LoopDeletion
LoopDistribute
LoopIdiom
LoopInterchange
LoopLoadElim
LoopPredication
LoopReroll
LoopRotate
LoopSimplify
LoopSimplifyCFG
LoopStrengthReduce
LoopUnroll
LoopUnswitch
LoopVectorize
LoopVersioning
LoopVersioningLICM
LowerAtomic
LowerExpectIntrinsic
LowerGuardIntrinsic
LowerInvoke
LowerSwitch
LowerTypeTests
Mem2Reg
MemCpyOpt
MergeFunc
MergeICmps
MetaRenamer
NameAnonGlobals
NaryReassociate
NewGVN
ObjCARC
PGOProfile
PartiallyInlineLibCalls
PhaseOrdering
PlaceSafepoints
PreISelIntrinsicLowering
PruneEH
Reassociate
Reg2Mem
RewriteStatepointsForGC
SCCP
SLPVectorizer
AArch64
AMDGPU
ARM
PowerPC
SystemZ
X86
PR32086.ll
PR34635.ll
PR35628_1.ll
PR35628_2.ll
PR35777.ll
PR35865.ll
addsub.ll
aggregate.ll
align.ll
arith-add.ll
arith-fp.ll
arith-mul.ll
arith-sub.ll
atomics.ll
bad_types.ll
barriercall.ll
bitreverse.ll
blending-shuffle.ll
bswap.ll
call.ll
cast.ll
cmp_sel.ll
commutativity.ll
compare-reduce.ll
consecutive-access.ll
continue_vectorizing.ll
crash_7zip.ll
crash_binaryop.ll
crash_bullet.ll
crash_bullet3.ll
crash_cmpop.ll
crash_dequeue.ll
crash_flop7.ll
crash_gep.ll
crash_lencod.ll
crash_mandeltext.ll
crash_netbsd_decompress.ll
crash_scheduling.ll
crash_sim4b1.ll
crash_smallpt.ll
crash_vectorizeTree.ll
cross_block_slp.ll
cse.ll
ctlz.ll
ctpop.ll
cttz.ll
cycle_dup.ll
debug_info.ll
diamond.ll
external_user.ll
extract.ll
extract_in_tree_user.ll
extractcost.ll
extractelement.ll
fabs.ll
fcopysign.ll
flag.ll
fma.ll
fptosi.ll
fptoui.ll
fround.ll.REMOVED.git-id
funclet.ll
gep.ll
gep_mismatch.ll
hoist.ll
horizontal-list.ll.REMOVED.git-id
horizontal-minmax.ll.REMOVED.git-id
horizontal.ll
implicitfloat.ll
in-tree-user.ll
insert-after-bundle.ll
insert-element-build-vector.ll
insertvalue.ll
intrinsic.ll
jumbled-load-multiuse.ll
jumbled-load.ll
limit.ll
lit.local.cfg
load-merge.ll
long_chains.ll
loopinvariant.ll
metadata.ll
minimum-sizes.ll
multi_block.ll
multi_user.ll
non-vectorizable-intrinsic.ll
odd_store.ll
operandorder.ll
opt.ll
ordering.ll
phi.ll
phi3.ll
phi_landingpad.ll
phi_overalignedtype.ll
powof2div.ll
pr16571.ll
pr16628.ll
pr16899.ll
pr18060.ll
pr19657.ll
pr23510.ll
pr27163.ll
pr31599.ll
propagate_ir_flags.ll
reduction.ll
reduction2.ll
reduction_loads.ll
reduction_unrolled.ll
remark_horcost.ll
remark_listcost.ll
remark_not_all_parts.ll
remark_unsupported.ll
reorder_phi.ll
return.ll
reverse_extract_elements.ll
rgb_phi.ll
saxpy.ll
schedule-bundle.ll
schedule_budget.ll
scheduling.ll
shift-ashr.ll
shift-lshr.ll
shift-shl.ll
simple-loop.ll
simplebb.ll
sitofp.ll
sqrt.ll
store-jumbled.ll
stores_vectorize.ll
tiny-tree.ll
uitofp.ll
undef_vect.ll
unreachable.ll
value-bug.ll
vect_copyable_in_binops.ll
vector.ll
vector_gep.ll
visit-dominated.ll
XCore
int_sideeffect.ll
SROA
SafeStack
SampleProfile
Scalarizer
SeparateConstOffsetFromGEP
SimpleLoopUnswitch
SimplifyCFG
Sink
SpeculateAroundPHIs
SpeculativeExecution
StraightLineStrengthReduce
StripDeadPrototypes
StripSymbols
StructurizeCFG
TailCallElim
ThinLTOBitcodeWriter
Util
WholeProgramDevirt
Unit
Verifier
YAMLParser
tools
.clang-format
CMakeLists.txt
TestRunner.sh
lit.cfg.py
lit.site.cfg.py.in
tools
unittests
utils
.arcconfig
.clang-format
.clang-tidy
.gitattributes
.gitignore
CMakeLists.txt
CODE_OWNERS.TXT
CREDITS.TXT
LICENSE.TXT
LLVMBuild.txt
README.txt
RELEASE_TESTERS.TXT
configure
llvm.spec.in
nuget-buildtasks
nunit-lite
roslyn-binaries
rx
xunit-binaries
ikvm-native
libgc
llvm
m4
man
mcs
mk
mono
msvc
po
runtime
samples
scripts
support
tools
COPYING.LIB
LICENSE
Makefile.am
Makefile.in
NEWS
README.md
acinclude.m4
aclocal.m4
autogen.sh
code_of_conduct.md
compile
config.guess
config.h.in
config.rpath
config.sub
configure.REMOVED.git-id
configure.ac.REMOVED.git-id
depcomp
install-sh
ltmain.sh.REMOVED.git-id
missing
mkinstalldirs
mono-uninstalled.pc.in
test-driver
winconfig.h
linux-packaging-mono/external/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll

172 lines
7.3 KiB
LLVM
Raw Normal View History

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
; CHECK: tiny_tree_fully_vectorizable
; CHECK: load <2 x double>
; CHECK: store <2 x double>
; CHECK: ret
define void @tiny_tree_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
entry:
%cmp12 = icmp eq i64 %count, 0
br i1 %cmp12, label %for.end, label %for.body
for.body: ; preds = %entry, %for.body
%i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
%src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
%0 = load double, double* %src.addr.013, align 8
store double %0, double* %dst.addr.014, align 8
%arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 1
%1 = load double, double* %arrayidx2, align 8
%arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
store double %1, double* %arrayidx3, align 8
%add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
%add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015
%inc = add i64 %i.015, 1
%exitcond = icmp eq i64 %inc, %count
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; CHECK: tiny_tree_fully_vectorizable2
; CHECK: load <4 x float>
; CHECK: store <4 x float>
; CHECK: ret
define void @tiny_tree_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
entry:
%cmp20 = icmp eq i64 %count, 0
br i1 %cmp20, label %for.end, label %for.body
for.body: ; preds = %entry, %for.body
%i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
%src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
%0 = load float, float* %src.addr.021, align 4
store float %0, float* %dst.addr.022, align 4
%arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 1
%1 = load float, float* %arrayidx2, align 4
%arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
store float %1, float* %arrayidx3, align 4
%arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
%2 = load float, float* %arrayidx4, align 4
%arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
store float %2, float* %arrayidx5, align 4
%arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
%3 = load float, float* %arrayidx6, align 4
%arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
store float %3, float* %arrayidx7, align 4
%add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
%add.ptr8 = getelementptr inbounds float, float* %dst.addr.022, i64 %i.023
%inc = add i64 %i.023, 1
%exitcond = icmp eq i64 %inc, %count
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; We do not vectorize the tiny tree which is not fully vectorizable.
; CHECK: tiny_tree_not_fully_vectorizable
; CHECK-NOT: <2 x double>
; CHECK: ret
define void @tiny_tree_not_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
entry:
%cmp12 = icmp eq i64 %count, 0
br i1 %cmp12, label %for.end, label %for.body
for.body: ; preds = %entry, %for.body
%i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
%src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
%0 = load double, double* %src.addr.013, align 8
store double %0, double* %dst.addr.014, align 8
%arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 2
%1 = load double, double* %arrayidx2, align 8
%arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
store double %1, double* %arrayidx3, align 8
%add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
%add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015
%inc = add i64 %i.015, 1
%exitcond = icmp eq i64 %inc, %count
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; CHECK: tiny_tree_not_fully_vectorizable2
; CHECK-NOT: <2 x double>
; CHECK: ret
define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
entry:
%cmp20 = icmp eq i64 %count, 0
br i1 %cmp20, label %for.end, label %for.body
for.body: ; preds = %entry, %for.body
%i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
%src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
%0 = load float, float* %src.addr.021, align 4
store float %0, float* %dst.addr.022, align 4
%arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 4
%1 = load float, float* %arrayidx2, align 4
%arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
store float %1, float* %arrayidx3, align 4
%arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
%2 = load float, float* %arrayidx4, align 4
%arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
store float %2, float* %arrayidx5, align 4
%arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
%3 = load float, float* %arrayidx6, align 4
%arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
store float %3, float* %arrayidx7, align 4
%add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
%add.ptr8 = getelementptr inbounds float, float* %dst.addr.022, i64 %i.023
%inc = add i64 %i.023, 1
%exitcond = icmp eq i64 %inc, %count
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; CHECK-LABEL: store_splat
; CHECK: store <4 x float>
define void @store_splat(float*, float) {
%3 = getelementptr inbounds float, float* %0, i64 0
store float %1, float* %3, align 4
%4 = getelementptr inbounds float, float* %0, i64 1
store float %1, float* %4, align 4
%5 = getelementptr inbounds float, float* %0, i64 2
store float %1, float* %5, align 4
%6 = getelementptr inbounds float, float* %0, i64 3
store float %1, float* %6, align 4
ret void
}
; CHECK-LABEL: store_const
; CHECK: store <4 x i32>
define void @store_const(i32* %a) {
entry:
%ptr0 = getelementptr inbounds i32, i32* %a, i64 0
store i32 10, i32* %ptr0, align 4
%ptr1 = getelementptr inbounds i32, i32* %a, i64 1
store i32 30, i32* %ptr1, align 4
%ptr2 = getelementptr inbounds i32, i32* %a, i64 2
store i32 20, i32* %ptr2, align 4
%ptr3 = getelementptr inbounds i32, i32* %a, i64 3
store i32 40, i32* %ptr3, align 4
ret void
}