You've already forked linux-packaging-mono
acceptance-tests
data
debian
docs
external
Newtonsoft.Json
api-doc-tools
api-snapshot
aspnetwebstack
binary-reference-assemblies
bockbuild
boringssl
cecil
cecil-legacy
corefx
corert
helix-binaries
ikdasm
ikvm
illinker-test-assets
linker
llvm
bindings
cmake
docs
examples
include
lib
projects
resources
runtimes
scripts
test
Analysis
Assembler
Bindings
Bitcode
BugPoint
CodeGen
DebugInfo
Examples
ExecutionEngine
Feature
FileCheck
Instrumentation
Integer
JitListener
LTO
Linker
MC
Object
ObjectYAML
Other
SafepointIRVerifier
SymbolRewriter
TableGen
ThinLTO
Transforms
ADCE
AddDiscriminators
AlignmentFromAssumptions
ArgumentPromotion
AtomicExpand
BDCE
BranchFolding
CallSiteSplitting
CalledValuePropagation
CodeExtractor
CodeGenPrepare
ConstProp
ConstantHoisting
ConstantMerge
Coroutines
CorrelatedValuePropagation
CrossDSOCFI
DCE
DeadArgElim
DeadStoreElimination
DivRemPairs
EarlyCSE
EliminateAvailableExternally
EntryExitInstrumenter
ExpandMemCmp
Float2Int
ForcedFunctionAttrs
FunctionAttrs
FunctionImport
GCOVProfiling
GVN
GVNHoist
GVNSink
GlobalDCE
GlobalMerge
GlobalOpt
GlobalSplit
GuardWidening
IPConstantProp
IRCE
IndVarSimplify
IndirectBrExpand
InferAddressSpaces
InferFunctionAttrs
Inline
InstCombine
InstMerge
InstNamer
InstSimplify
InterleavedAccess
Internalize
JumpThreading
LCSSA
LICM
LoadStoreVectorizer
LoopDataPrefetch
LoopDeletion
LoopDistribute
LoopIdiom
LoopInterchange
LoopLoadElim
LoopPredication
LoopReroll
LoopRotate
LoopSimplify
LoopSimplifyCFG
LoopStrengthReduce
LoopUnroll
LoopUnswitch
LoopVectorize
AArch64
AMDGPU
ARM
PowerPC
SystemZ
X86
XCore
12-12-11-if-conv.ll
2012-10-20-infloop.ll
2012-10-22-isconsec.ll
2016-07-27-loop-vec.ll
align.ll
bsd_regex.ll
bzip_reverse_loops.ll
calloc.ll
cast-induction.ll
conditional-assignment.ll
consec_no_gep.ll
consecutive-ptr-uniforms.ll
control-flow.ll
cpp-new-array.ll
dbg.value.ll
dead_instructions.ll
debugloc.ll
diag-missing-instr-debug-loc.ll
diag-with-hotness-info-2.ll
diag-with-hotness-info.ll
discriminator.ll
duplicated-metadata.ll
ee-crash.ll
exact.ll
fcmp-vectorize.ll
first-order-recurrence.ll
flags.ll
float-induction.ll
float-reduction.ll
funcall.ll
gcc-examples.ll
gep_with_bitcast.ll
global_alias.ll
hints-trans.ll
hoist-loads.ll
i8-induction.ll
icmp-uniforms.ll
if-conv-crash.ll
if-conversion-edgemasks.ll
if-conversion-nest.ll
if-conversion-reduction.ll
if-conversion.ll
if-pred-non-void.ll
if-pred-not-when-safe.ll
if-pred-stores.ll
incorrect-dom-info.ll
increment.ll
induction-step.ll
induction.ll
induction_plus.ll
infiniteloop.ll
int_sideeffect.ll
interleaved-accesses-1.ll
interleaved-accesses-2.ll
interleaved-accesses-3.ll
interleaved-accesses-alias.ll
interleaved-accesses-pred-stores.ll
interleaved-accesses.ll
intrinsic.ll
iv_outside_user.ll
lcssa-crash.ll
legal_preheader_check.ll
lifetime.ll
loop-form.ll
loop-scalars.ll
loop-vect-memdep.ll
memdep.ll
metadata-unroll.ll
metadata-width.ll
metadata.ll
miniters.ll
minmax_reduction.ll
multi-use-reduction-bug.ll
multiple-address-spaces.ll
multiple-strides-vectorization.ll
no_array_bounds.ll
no_idiv_reduction.ll
no_int_induction.ll
no_outside_user.ll
no_switch.ll
noalias-md-licm.ll
noalias-md.ll
nofloat.ll
non-const-n.ll
nontemporal.ll
nsw-crash.ll
opt.ll
optsize.ll
partial-lcssa.ll
phi-cost.ll
phi-hang.ll
pr25281.ll
pr28541.ll
pr30654-phiscev-sext-trunc.ll
pr31098.ll
pr31190.ll
pr32859.ll
pr33706.ll
pr34681.ll
pr35773.ll
ptr-induction.ll
ptr_loops.ll
read-only.ll
reduction-small-size.ll
reduction.ll
reverse_induction.ll
reverse_iter.ll
runtime-check-address-space.ll
runtime-check-readonly-address-space.ll
runtime-check-readonly.ll
runtime-check.ll
runtime-limit.ll
safegep.ll
same-base-access.ll
scalar-select.ll
scalar_after_vectorization.ll
scev-exitlim-crash.ll
simple-unroll.ll
small-loop.ll
start-non-zero.ll
store-shuffle-bug.ll
struct_access.ll
tbaa-nodep.ll
tripcount.ll
undef-inst-bug.ll
unroll-novec-memcheck-metadata.ll
unroll.ll
unroll_novec.ll
unsafe-dep-remark.ll
unsized-pointee-crash.ll
value-ptr-bug.ll
vect-phiscev-sext-trunc.ll
vect.omp.persistence.ll
vect.stats.ll
vector-geps.ll
vectorize-once.ll
version-mem-access.ll
write-only.ll
zero-sized-pointee-crash.ll
LoopVersioning
LoopVersioningLICM
LowerAtomic
LowerExpectIntrinsic
LowerGuardIntrinsic
LowerInvoke
LowerSwitch
LowerTypeTests
Mem2Reg
MemCpyOpt
MergeFunc
MergeICmps
MetaRenamer
NameAnonGlobals
NaryReassociate
NewGVN
ObjCARC
PGOProfile
PartiallyInlineLibCalls
PhaseOrdering
PlaceSafepoints
PreISelIntrinsicLowering
PruneEH
Reassociate
Reg2Mem
RewriteStatepointsForGC
SCCP
SLPVectorizer
SROA
SafeStack
SampleProfile
Scalarizer
SeparateConstOffsetFromGEP
SimpleLoopUnswitch
SimplifyCFG
Sink
SpeculateAroundPHIs
SpeculativeExecution
StraightLineStrengthReduce
StripDeadPrototypes
StripSymbols
StructurizeCFG
TailCallElim
ThinLTOBitcodeWriter
Util
WholeProgramDevirt
Unit
Verifier
YAMLParser
tools
.clang-format
CMakeLists.txt
TestRunner.sh
lit.cfg.py
lit.site.cfg.py.in
tools
unittests
utils
.arcconfig
.clang-format
.clang-tidy
.gitattributes
.gitignore
CMakeLists.txt
CODE_OWNERS.TXT
CREDITS.TXT
LICENSE.TXT
LLVMBuild.txt
README.txt
RELEASE_TESTERS.TXT
configure
llvm.spec.in
nuget-buildtasks
nunit-lite
roslyn-binaries
rx
xunit-binaries
ikvm-native
libgc
llvm
m4
man
mcs
mk
mono
msvc
po
runtime
samples
scripts
support
tools
COPYING.LIB
LICENSE
Makefile.am
Makefile.in
NEWS
README.md
acinclude.m4
aclocal.m4
autogen.sh
code_of_conduct.md
compile
config.guess
config.h.in
config.rpath
config.sub
configure.REMOVED.git-id
configure.ac.REMOVED.git-id
depcomp
install-sh
ltmain.sh.REMOVED.git-id
missing
mkinstalldirs
mono-uninstalled.pc.in
test-driver
winconfig.h
108 lines
3.4 KiB
LLVM
108 lines
3.4 KiB
LLVM
![]() |
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
|
||
|
|
||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||
|
|
||
|
; This is kernel11 from "LivermoreLoops". We can't vectorize it because we
|
||
|
; access both x[k] and x[k-1].
|
||
|
;
|
||
|
; void kernel11(double *x, double *y, int n) {
|
||
|
; for ( int k=1 ; k<n ; k++ )
|
||
|
; x[k] = x[k-1] + y[k];
|
||
|
; }
|
||
|
|
||
|
; CHECK-LABEL: @kernel11(
|
||
|
; CHECK-NOT: <4 x double>
|
||
|
; CHECK: ret
|
||
|
define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
|
||
|
%1 = alloca double*, align 8
|
||
|
%2 = alloca double*, align 8
|
||
|
%3 = alloca i32, align 4
|
||
|
%k = alloca i32, align 4
|
||
|
store double* %x, double** %1, align 8
|
||
|
store double* %y, double** %2, align 8
|
||
|
store i32 %n, i32* %3, align 4
|
||
|
store i32 1, i32* %k, align 4
|
||
|
br label %4
|
||
|
|
||
|
; <label>:4 ; preds = %25, %0
|
||
|
%5 = load i32, i32* %k, align 4
|
||
|
%6 = load i32, i32* %3, align 4
|
||
|
%7 = icmp slt i32 %5, %6
|
||
|
br i1 %7, label %8, label %28
|
||
|
|
||
|
; <label>:8 ; preds = %4
|
||
|
%9 = load i32, i32* %k, align 4
|
||
|
%10 = sub nsw i32 %9, 1
|
||
|
%11 = sext i32 %10 to i64
|
||
|
%12 = load double*, double** %1, align 8
|
||
|
%13 = getelementptr inbounds double, double* %12, i64 %11
|
||
|
%14 = load double, double* %13, align 8
|
||
|
%15 = load i32, i32* %k, align 4
|
||
|
%16 = sext i32 %15 to i64
|
||
|
%17 = load double*, double** %2, align 8
|
||
|
%18 = getelementptr inbounds double, double* %17, i64 %16
|
||
|
%19 = load double, double* %18, align 8
|
||
|
%20 = fadd double %14, %19
|
||
|
%21 = load i32, i32* %k, align 4
|
||
|
%22 = sext i32 %21 to i64
|
||
|
%23 = load double*, double** %1, align 8
|
||
|
%24 = getelementptr inbounds double, double* %23, i64 %22
|
||
|
store double %20, double* %24, align 8
|
||
|
br label %25
|
||
|
|
||
|
; <label>:25 ; preds = %8
|
||
|
%26 = load i32, i32* %k, align 4
|
||
|
%27 = add nsw i32 %26, 1
|
||
|
store i32 %27, i32* %k, align 4
|
||
|
br label %4
|
||
|
|
||
|
; <label>:28 ; preds = %4
|
||
|
ret i32 0
|
||
|
}
|
||
|
|
||
|
|
||
|
; A[i*7] is scalarized, and the different scalars can in theory wrap
|
||
|
; around and overwrite other scalar elements. However we can still
|
||
|
; vectorize because we can version the loop to avoid this case.
|
||
|
;
|
||
|
; void foo(int *a) {
|
||
|
; for (int i=0; i<256; ++i) {
|
||
|
; int x = a[i*7];
|
||
|
; if (x>3)
|
||
|
; x = x*x+x*4;
|
||
|
; a[i*7] = x+3;
|
||
|
; }
|
||
|
; }
|
||
|
|
||
|
; CHECK-LABEL: @func2(
|
||
|
; CHECK: <4 x i32>
|
||
|
; CHECK: ret
|
||
|
define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
|
||
|
br label %1
|
||
|
|
||
|
; <label>:1 ; preds = %7, %0
|
||
|
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %7 ]
|
||
|
%2 = mul nsw i64 %indvars.iv, 7
|
||
|
%3 = getelementptr inbounds i32, i32* %a, i64 %2
|
||
|
%4 = load i32, i32* %3, align 4
|
||
|
%5 = icmp sgt i32 %4, 3
|
||
|
br i1 %5, label %6, label %7
|
||
|
|
||
|
; <label>:6 ; preds = %1
|
||
|
%tmp = add i32 %4, 4
|
||
|
%tmp1 = mul i32 %tmp, %4
|
||
|
br label %7
|
||
|
|
||
|
; <label>:7 ; preds = %6, %1
|
||
|
%x.0 = phi i32 [ %tmp1, %6 ], [ %4, %1 ]
|
||
|
%8 = add nsw i32 %x.0, 3
|
||
|
store i32 %8, i32* %3, align 4
|
||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||
|
%exitcond = icmp eq i32 %lftr.wideiv, 256
|
||
|
br i1 %exitcond, label %9, label %1
|
||
|
|
||
|
; <label>:9 ; preds = %7
|
||
|
ret i32 0
|
||
|
}
|