You've already forked linux-packaging-mono
acceptance-tests
data
debian
docs
external
Newtonsoft.Json
api-doc-tools
api-snapshot
aspnetwebstack
bdwgc
binary-reference-assemblies
bockbuild
boringssl
cecil
cecil-legacy
corefx
corert
helix-binaries
ikdasm
ikvm
illinker-test-assets
linker
llvm-project
clang
clang-tools-extra
compiler-rt
eng
libcxx
libcxxabi
libunwind
lld
lldb
llvm
bindings
cmake
docs
examples
include
lib
projects
resources
runtimes
scripts
test
Analysis
Assembler
Bindings
Bitcode
BugPoint
CodeGen
DebugInfo
Examples
ExecutionEngine
Feature
FileCheck
Instrumentation
Integer
JitListener
LTO
Linker
MC
Object
ObjectYAML
Other
SafepointIRVerifier
SymbolRewriter
TableGen
ThinLTO
Transforms
ADCE
AddDiscriminators
AlignmentFromAssumptions
ArgumentPromotion
AtomicExpand
BDCE
BranchFolding
CallSiteSplitting
CalledValuePropagation
CodeExtractor
CodeGenPrepare
ConstProp
ConstantHoisting
ConstantMerge
Coroutines
CorrelatedValuePropagation
CrossDSOCFI
DCE
DeadArgElim
DeadStoreElimination
DivRemPairs
EarlyCSE
EliminateAvailableExternally
EntryExitInstrumenter
ExpandMemCmp
Float2Int
ForcedFunctionAttrs
FunctionAttrs
FunctionImport
GCOVProfiling
GVN
GVNHoist
GVNSink
GlobalDCE
GlobalMerge
GlobalOpt
GlobalSplit
GuardWidening
IPConstantProp
IRCE
IndVarSimplify
IndirectBrExpand
InferAddressSpaces
InferFunctionAttrs
Inline
InstCombine
InstMerge
InstNamer
InstSimplify
InterleavedAccess
Internalize
JumpThreading
LCSSA
LICM
LoadStoreVectorizer
LoopDataPrefetch
LoopDeletion
LoopDistribute
LoopIdiom
LoopInterchange
LoopLoadElim
LoopPredication
LoopReroll
LoopRotate
LoopSimplify
LoopSimplifyCFG
LoopStrengthReduce
LoopUnroll
LoopUnswitch
LoopVectorize
AArch64
AMDGPU
ARM
PowerPC
SystemZ
X86
XCore
12-12-11-if-conv.ll
2012-10-20-infloop.ll
2012-10-22-isconsec.ll
2016-07-27-loop-vec.ll
align.ll
bsd_regex.ll
bzip_reverse_loops.ll
calloc.ll
cast-induction.ll
conditional-assignment.ll
consec_no_gep.ll
consecutive-ptr-uniforms.ll
control-flow.ll
cpp-new-array.ll
dbg.value.ll
dead_instructions.ll
debugloc.ll
diag-missing-instr-debug-loc.ll
diag-with-hotness-info-2.ll
diag-with-hotness-info.ll
discriminator.ll
duplicated-metadata.ll
ee-crash.ll
exact.ll
fcmp-vectorize.ll
first-order-recurrence.ll
flags.ll
float-induction.ll
float-reduction.ll
funcall.ll
gcc-examples.ll
gep_with_bitcast.ll
global_alias.ll
hints-trans.ll
hoist-loads.ll
i8-induction.ll
icmp-uniforms.ll
if-conv-crash.ll
if-conversion-edgemasks.ll
if-conversion-nest.ll
if-conversion-reduction.ll
if-conversion.ll
if-pred-non-void.ll
if-pred-not-when-safe.ll
if-pred-stores.ll
incorrect-dom-info.ll
increment.ll
induction-step.ll
induction.ll
induction_plus.ll
infiniteloop.ll
int_sideeffect.ll
interleaved-accesses-1.ll
interleaved-accesses-2.ll
interleaved-accesses-3.ll
interleaved-accesses-alias.ll
interleaved-accesses-pred-stores.ll
interleaved-accesses.ll
intrinsic.ll
iv_outside_user.ll
lcssa-crash.ll
legal_preheader_check.ll
lifetime.ll
loop-form.ll
loop-scalars.ll
loop-vect-memdep.ll
memdep.ll
metadata-unroll.ll
metadata-width.ll
metadata.ll
miniters.ll
minmax_reduction.ll
multi-use-reduction-bug.ll
multiple-address-spaces.ll
multiple-strides-vectorization.ll
no_array_bounds.ll
no_idiv_reduction.ll
no_int_induction.ll
no_outside_user.ll
no_switch.ll
noalias-md-licm.ll
noalias-md.ll
nofloat.ll
non-const-n.ll
nontemporal.ll
nsw-crash.ll
opt.ll
optsize.ll
partial-lcssa.ll
phi-cost.ll
phi-hang.ll
pr25281.ll
pr28541.ll
pr30654-phiscev-sext-trunc.ll
pr31098.ll
pr31190.ll
pr32859.ll
pr33706.ll
pr34681.ll
pr35773.ll
ptr-induction.ll
ptr_loops.ll
read-only.ll
reduction-small-size.ll
reduction.ll
reverse_induction.ll
reverse_iter.ll
runtime-check-address-space.ll
runtime-check-readonly-address-space.ll
runtime-check-readonly.ll
runtime-check.ll
runtime-limit.ll
safegep.ll
same-base-access.ll
scalar-select.ll
scalar_after_vectorization.ll
scev-exitlim-crash.ll
simple-unroll.ll
small-loop.ll
start-non-zero.ll
store-shuffle-bug.ll
struct_access.ll
tbaa-nodep.ll
tripcount.ll
undef-inst-bug.ll
unroll-novec-memcheck-metadata.ll
unroll.ll
unroll_novec.ll
unsafe-dep-remark.ll
unsized-pointee-crash.ll
value-ptr-bug.ll
vect-phiscev-sext-trunc.ll
vect.omp.persistence.ll
vect.stats.ll
vector-geps.ll
vectorize-once.ll
version-mem-access.ll
write-only.ll
zero-sized-pointee-crash.ll
LoopVersioning
LoopVersioningLICM
LowerAtomic
LowerExpectIntrinsic
LowerGuardIntrinsic
LowerInvoke
LowerSwitch
LowerTypeTests
Mem2Reg
MemCpyOpt
MergeFunc
MergeICmps
MetaRenamer
NameAnonGlobals
NaryReassociate
NewGVN
ObjCARC
PGOProfile
PartiallyInlineLibCalls
PhaseOrdering
PlaceSafepoints
PreISelIntrinsicLowering
PruneEH
Reassociate
Reg2Mem
RewriteStatepointsForGC
SCCP
SLPVectorizer
SROA
SafeStack
SampleProfile
Scalarizer
SeparateConstOffsetFromGEP
SimpleLoopUnswitch
SimplifyCFG
Sink
SpeculateAroundPHIs
SpeculativeExecution
StraightLineStrengthReduce
StripDeadPrototypes
StripSymbols
StructurizeCFG
TailCallElim
ThinLTOBitcodeWriter
Util
WholeProgramDevirt
Unit
Verifier
YAMLParser
tools
.clang-format
CMakeLists.txt
TestRunner.sh
lit.cfg.py
lit.site.cfg.py.in
tools
unittests
utils
.arcconfig
.clang-format
.clang-tidy
.gitattributes
.gitignore
CMakeLists.txt
CODE_OWNERS.TXT
CREDITS.TXT
LICENSE.TXT
LLVMBuild.txt
README.txt
RELEASE_TESTERS.TXT
configure
llvm.spec.in
version.txt.in
nuget
openmp
polly
Directory.Build.props
Directory.Build.targets
NuGet.config
azure-pipelines.yml
build.cmd
build.sh
dir.common.props
global.json
llvm.proj
mxe-Win64.cmake.in
nuget-buildtasks
nunit-lite
roslyn-binaries
rx
xunit-binaries
how-to-bump-roslyn-binaries.md
ikvm-native
llvm
m4
man
mcs
mono
msvc
netcore
po
runtime
samples
scripts
support
tools
COPYING.LIB
LICENSE
Makefile.am
Makefile.in
NEWS
README.md
acinclude.m4
aclocal.m4
autogen.sh
code_of_conduct.md
compile
config.guess
config.h.in
config.rpath
config.sub
configure.REMOVED.git-id
configure.ac.REMOVED.git-id
depcomp
install-sh
ltmain.sh.REMOVED.git-id
missing
mkinstalldirs
mono-uninstalled.pc.in
test-driver
winconfig.h
222 lines
8.1 KiB
LLVM
222 lines
8.1 KiB
LLVM
![]() |
; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
|
||
|
|
||
|
; Check vectorization that would ordinarily require a runtime bounds
|
||
|
; check on the pointers when mixing address spaces. For now we cannot
|
||
|
; assume address spaces do not alias, and we can't assume that
|
||
|
; different pointers are directly comparable.
|
||
|
;
|
||
|
; These all test this basic loop for different combinations of address
|
||
|
; spaces, and swapping in globals or adding noalias.
|
||
|
;
|
||
|
;void foo(int addrspace(N)* [noalias] a, int addrspace(M)* [noalias] b, int n)
|
||
|
;{
|
||
|
; for (int i = 0; i < n; ++i)
|
||
|
; {
|
||
|
; a[i] = 3 * b[i];
|
||
|
; }
|
||
|
;}
|
||
|
|
||
|
; Artificial datalayout
|
||
|
target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
|
||
|
|
||
|
|
||
|
@g_as1 = common addrspace(1) global [1024 x i32] zeroinitializer, align 16
|
||
|
@q_as2 = common addrspace(2) global [1024 x i32] zeroinitializer, align 16
|
||
|
|
||
|
; Both parameters are unidentified objects with the same address
|
||
|
; space, so this should vectorize normally.
|
||
|
define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 {
|
||
|
; CHECK-LABEL: @foo(
|
||
|
; CHECK: <4 x i32>
|
||
|
; CHECK: ret
|
||
|
|
||
|
entry:
|
||
|
%cmp1 = icmp slt i32 0, %n
|
||
|
br i1 %cmp1, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %entry, %for.body
|
||
|
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||
|
%idxprom = sext i32 %i.02 to i64
|
||
|
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
|
||
|
%0 = load i32, i32 addrspace(1)* %arrayidx, align 4
|
||
|
%mul = mul nsw i32 %0, 3
|
||
|
%idxprom1 = sext i32 %i.02 to i64
|
||
|
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
|
||
|
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
|
||
|
%inc = add nsw i32 %i.02, 1
|
||
|
%cmp = icmp slt i32 %inc, %n
|
||
|
br i1 %cmp, label %for.body, label %for.end
|
||
|
|
||
|
for.end: ; preds = %for.body, %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Parameters are unidentified and different address spaces, so cannot vectorize.
|
||
|
define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 {
|
||
|
; CHECK-LABEL: @bar0(
|
||
|
; CHECK-NOT: <4 x i32>
|
||
|
; CHECK: ret
|
||
|
|
||
|
entry:
|
||
|
%cmp1 = icmp slt i32 0, %n
|
||
|
br i1 %cmp1, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %entry, %for.body
|
||
|
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||
|
%idxprom = sext i32 %i.02 to i64
|
||
|
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
|
||
|
%0 = load i32, i32 addrspace(1)* %arrayidx, align 4
|
||
|
%mul = mul nsw i32 %0, 3
|
||
|
%idxprom1 = sext i32 %i.02 to i64
|
||
|
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
|
||
|
store i32 %mul, i32* %arrayidx2, align 4
|
||
|
%inc = add nsw i32 %i.02, 1
|
||
|
%cmp = icmp slt i32 %inc, %n
|
||
|
br i1 %cmp, label %for.body, label %for.end
|
||
|
|
||
|
for.end: ; preds = %for.body, %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Swapped arguments should be the same
|
||
|
define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 {
|
||
|
; CHECK-LABEL: @bar1(
|
||
|
; CHECK-NOT: <4 x i32>
|
||
|
; CHECK: ret
|
||
|
|
||
|
entry:
|
||
|
%cmp1 = icmp slt i32 0, %n
|
||
|
br i1 %cmp1, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %entry, %for.body
|
||
|
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||
|
%idxprom = sext i32 %i.02 to i64
|
||
|
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
|
||
|
%0 = load i32, i32* %arrayidx, align 4
|
||
|
%mul = mul nsw i32 %0, 3
|
||
|
%idxprom1 = sext i32 %i.02 to i64
|
||
|
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
|
||
|
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
|
||
|
%inc = add nsw i32 %i.02, 1
|
||
|
%cmp = icmp slt i32 %inc, %n
|
||
|
br i1 %cmp, label %for.body, label %for.end
|
||
|
|
||
|
for.end: ; preds = %for.body, %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; We should still be able to vectorize with noalias even if the
|
||
|
; address spaces are different.
|
||
|
define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 {
|
||
|
; CHECK-LABEL: @bar2(
|
||
|
; CHECK: <4 x i32>
|
||
|
; CHECK: ret
|
||
|
|
||
|
entry:
|
||
|
%cmp1 = icmp slt i32 0, %n
|
||
|
br i1 %cmp1, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %entry, %for.body
|
||
|
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||
|
%idxprom = sext i32 %i.02 to i64
|
||
|
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
|
||
|
%0 = load i32, i32 addrspace(1)* %arrayidx, align 4
|
||
|
%mul = mul nsw i32 %0, 3
|
||
|
%idxprom1 = sext i32 %i.02 to i64
|
||
|
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
|
||
|
store i32 %mul, i32* %arrayidx2, align 4
|
||
|
%inc = add nsw i32 %i.02, 1
|
||
|
%cmp = icmp slt i32 %inc, %n
|
||
|
br i1 %cmp, label %for.body, label %for.end
|
||
|
|
||
|
for.end: ; preds = %for.body, %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Store to identified global with different address space. This isn't
|
||
|
; generally safe and shouldn't be vectorized.
|
||
|
define void @arst0(i32* %b, i32 %n) #0 {
|
||
|
; CHECK-LABEL: @arst0(
|
||
|
; CHECK-NOT: <4 x i32>
|
||
|
; CHECK: ret
|
||
|
|
||
|
entry:
|
||
|
%cmp1 = icmp slt i32 0, %n
|
||
|
br i1 %cmp1, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %entry, %for.body
|
||
|
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||
|
%idxprom = sext i32 %i.02 to i64
|
||
|
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
|
||
|
%0 = load i32, i32* %arrayidx, align 4
|
||
|
%mul = mul nsw i32 %0, 3
|
||
|
%idxprom1 = sext i32 %i.02 to i64
|
||
|
%arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
|
||
|
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
|
||
|
%inc = add nsw i32 %i.02, 1
|
||
|
%cmp = icmp slt i32 %inc, %n
|
||
|
br i1 %cmp, label %for.body, label %for.end
|
||
|
|
||
|
for.end: ; preds = %for.body, %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
|
||
|
; Load from identified global with different address space.
|
||
|
; This isn't generally safe and shouldn't be vectorized.
|
||
|
define void @arst1(i32* %b, i32 %n) #0 {
|
||
|
; CHECK-LABEL: @arst1(
|
||
|
; CHECK-NOT: <4 x i32>
|
||
|
; CHECK: ret
|
||
|
|
||
|
entry:
|
||
|
%cmp1 = icmp slt i32 0, %n
|
||
|
br i1 %cmp1, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %entry, %for.body
|
||
|
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||
|
%idxprom = sext i32 %i.02 to i64
|
||
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
|
||
|
%0 = load i32, i32 addrspace(1)* %arrayidx, align 4
|
||
|
%mul = mul nsw i32 %0, 3
|
||
|
%idxprom1 = sext i32 %i.02 to i64
|
||
|
%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom1
|
||
|
store i32 %mul, i32* %arrayidx2, align 4
|
||
|
%inc = add nsw i32 %i.02, 1
|
||
|
%cmp = icmp slt i32 %inc, %n
|
||
|
br i1 %cmp, label %for.body, label %for.end
|
||
|
|
||
|
for.end: ; preds = %for.body, %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Read and write to 2 identified globals in different address
|
||
|
; spaces. This should be vectorized.
|
||
|
define void @aoeu(i32 %n) #0 {
|
||
|
; CHECK-LABEL: @aoeu(
|
||
|
; CHECK: <4 x i32>
|
||
|
; CHECK: ret
|
||
|
|
||
|
entry:
|
||
|
%cmp1 = icmp slt i32 0, %n
|
||
|
br i1 %cmp1, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %entry, %for.body
|
||
|
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||
|
%idxprom = sext i32 %i.02 to i64
|
||
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
|
||
|
%0 = load i32, i32 addrspace(2)* %arrayidx, align 4
|
||
|
%mul = mul nsw i32 %0, 3
|
||
|
%idxprom1 = sext i32 %i.02 to i64
|
||
|
%arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
|
||
|
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
|
||
|
%inc = add nsw i32 %i.02, 1
|
||
|
%cmp = icmp slt i32 %inc, %n
|
||
|
br i1 %cmp, label %for.body, label %for.end
|
||
|
|
||
|
for.end: ; preds = %for.body, %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|