You've already forked linux-packaging-mono
acceptance-tests
data
docs
external
Newtonsoft.Json
api-doc-tools
api-snapshot
aspnetwebstack
binary-reference-assemblies
bockbuild
boringssl
cecil
cecil-legacy
corefx
corert
helix-binaries
ikdasm
ikvm
illinker-test-assets
linker
llvm
bindings
cmake
docs
examples
include
lib
projects
resources
runtimes
scripts
test
Analysis
Assembler
Bindings
Bitcode
BugPoint
CodeGen
DebugInfo
Examples
ExecutionEngine
Feature
FileCheck
Instrumentation
Integer
JitListener
LTO
Linker
MC
Object
ObjectYAML
Other
SafepointIRVerifier
SymbolRewriter
TableGen
ThinLTO
Transforms
ADCE
AddDiscriminators
AlignmentFromAssumptions
ArgumentPromotion
AtomicExpand
BDCE
BranchFolding
CallSiteSplitting
CalledValuePropagation
CodeExtractor
CodeGenPrepare
ConstProp
ConstantHoisting
ConstantMerge
Coroutines
CorrelatedValuePropagation
CrossDSOCFI
DCE
DeadArgElim
DeadStoreElimination
DivRemPairs
EarlyCSE
EliminateAvailableExternally
EntryExitInstrumenter
ExpandMemCmp
Float2Int
ForcedFunctionAttrs
FunctionAttrs
FunctionImport
GCOVProfiling
GVN
GVNHoist
GVNSink
GlobalDCE
GlobalMerge
GlobalOpt
GlobalSplit
GuardWidening
IPConstantProp
IRCE
IndVarSimplify
IndirectBrExpand
InferAddressSpaces
InferFunctionAttrs
Inline
InstCombine
InstMerge
InstNamer
InstSimplify
InterleavedAccess
Internalize
JumpThreading
LCSSA
LICM
LoadStoreVectorizer
LoopDataPrefetch
LoopDeletion
LoopDistribute
LoopIdiom
LoopInterchange
LoopLoadElim
LoopPredication
LoopReroll
LoopRotate
LoopSimplify
LoopSimplifyCFG
LoopStrengthReduce
LoopUnroll
LoopUnswitch
LoopVectorize
LoopVersioning
LoopVersioningLICM
LowerAtomic
LowerExpectIntrinsic
LowerGuardIntrinsic
LowerInvoke
LowerSwitch
LowerTypeTests
Mem2Reg
MemCpyOpt
MergeFunc
MergeICmps
MetaRenamer
NameAnonGlobals
NaryReassociate
NewGVN
ObjCARC
PGOProfile
PartiallyInlineLibCalls
PhaseOrdering
PlaceSafepoints
PreISelIntrinsicLowering
PruneEH
Reassociate
Reg2Mem
RewriteStatepointsForGC
SCCP
SLPVectorizer
AArch64
AMDGPU
ARM
PowerPC
SystemZ
X86
PR32086.ll
PR34635.ll
PR35628_1.ll
PR35628_2.ll
PR35777.ll
PR35865.ll
addsub.ll
aggregate.ll
align.ll
arith-add.ll
arith-fp.ll
arith-mul.ll
arith-sub.ll
atomics.ll
bad_types.ll
barriercall.ll
bitreverse.ll
blending-shuffle.ll
bswap.ll
call.ll
cast.ll
cmp_sel.ll
commutativity.ll
compare-reduce.ll
consecutive-access.ll
continue_vectorizing.ll
crash_7zip.ll
crash_binaryop.ll
crash_bullet.ll
crash_bullet3.ll
crash_cmpop.ll
crash_dequeue.ll
crash_flop7.ll
crash_gep.ll
crash_lencod.ll
crash_mandeltext.ll
crash_netbsd_decompress.ll
crash_scheduling.ll
crash_sim4b1.ll
crash_smallpt.ll
crash_vectorizeTree.ll
cross_block_slp.ll
cse.ll
ctlz.ll
ctpop.ll
cttz.ll
cycle_dup.ll
debug_info.ll
diamond.ll
external_user.ll
extract.ll
extract_in_tree_user.ll
extractcost.ll
extractelement.ll
fabs.ll
fcopysign.ll
flag.ll
fma.ll
fptosi.ll
fptoui.ll
fround.ll.REMOVED.git-id
funclet.ll
gep.ll
gep_mismatch.ll
hoist.ll
horizontal-list.ll.REMOVED.git-id
horizontal-minmax.ll.REMOVED.git-id
horizontal.ll
implicitfloat.ll
in-tree-user.ll
insert-after-bundle.ll
insert-element-build-vector.ll
insertvalue.ll
intrinsic.ll
jumbled-load-multiuse.ll
jumbled-load.ll
limit.ll
lit.local.cfg
load-merge.ll
long_chains.ll
loopinvariant.ll
metadata.ll
minimum-sizes.ll
multi_block.ll
multi_user.ll
non-vectorizable-intrinsic.ll
odd_store.ll
operandorder.ll
opt.ll
ordering.ll
phi.ll
phi3.ll
phi_landingpad.ll
phi_overalignedtype.ll
powof2div.ll
pr16571.ll
pr16628.ll
pr16899.ll
pr18060.ll
pr19657.ll
pr23510.ll
pr27163.ll
pr31599.ll
propagate_ir_flags.ll
reduction.ll
reduction2.ll
reduction_loads.ll
reduction_unrolled.ll
remark_horcost.ll
remark_listcost.ll
remark_not_all_parts.ll
remark_unsupported.ll
reorder_phi.ll
return.ll
reverse_extract_elements.ll
rgb_phi.ll
saxpy.ll
schedule-bundle.ll
schedule_budget.ll
scheduling.ll
shift-ashr.ll
shift-lshr.ll
shift-shl.ll
simple-loop.ll
simplebb.ll
sitofp.ll
sqrt.ll
store-jumbled.ll
stores_vectorize.ll
tiny-tree.ll
uitofp.ll
undef_vect.ll
unreachable.ll
value-bug.ll
vect_copyable_in_binops.ll
vector.ll
vector_gep.ll
visit-dominated.ll
XCore
int_sideeffect.ll
SROA
SafeStack
SampleProfile
Scalarizer
SeparateConstOffsetFromGEP
SimpleLoopUnswitch
SimplifyCFG
Sink
SpeculateAroundPHIs
SpeculativeExecution
StraightLineStrengthReduce
StripDeadPrototypes
StripSymbols
StructurizeCFG
TailCallElim
ThinLTOBitcodeWriter
Util
WholeProgramDevirt
Unit
Verifier
YAMLParser
tools
.clang-format
CMakeLists.txt
TestRunner.sh
lit.cfg.py
lit.site.cfg.py.in
tools
unittests
utils
.arcconfig
.clang-format
.clang-tidy
.gitattributes
.gitignore
CMakeLists.txt
CODE_OWNERS.TXT
CREDITS.TXT
LICENSE.TXT
LLVMBuild.txt
README.txt
RELEASE_TESTERS.TXT
configure
llvm.spec.in
nuget-buildtasks
nunit-lite
roslyn-binaries
rx
xunit-binaries
ikvm-native
libgc
llvm
m4
man
mcs
mk
mono
msvc
po
runtime
samples
scripts
support
tools
COPYING.LIB
LICENSE
Makefile.am
Makefile.in
NEWS
README.md
acinclude.m4
aclocal.m4
autogen.sh
code_of_conduct.md
compile
config.guess
config.h.in
config.rpath
config.sub
configure.REMOVED.git-id
configure.ac.REMOVED.git-id
depcomp
install-sh
ltmain.sh.REMOVED.git-id
missing
mkinstalldirs
mono-uninstalled.pc.in
test-driver
winconfig.h
177 lines
8.0 KiB
LLVM
177 lines
8.0 KiB
LLVM
![]() |
; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||
|
|
||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||
|
target triple = "x86_64-apple-macosx10.8.0"
|
||
|
|
||
|
declare double @sin(double)
|
||
|
declare double @cos(double)
|
||
|
declare double @pow(double, double)
|
||
|
declare double @exp2(double)
|
||
|
declare double @sqrt(double)
|
||
|
declare i64 @round(i64)
|
||
|
|
||
|
|
||
|
define void @sin_libm(double* %a, double* %b) {
|
||
|
; CHECK-LABEL: @sin_libm(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
|
||
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
|
||
|
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
|
||
|
; CHECK-NEXT: ret void
|
||
|
;
|
||
|
%a0 = load double, double* %a, align 8
|
||
|
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||
|
%a1 = load double, double* %idx1, align 8
|
||
|
%sin1 = tail call double @sin(double %a0) nounwind readnone
|
||
|
%sin2 = tail call double @sin(double %a1) nounwind readnone
|
||
|
store double %sin1, double* %b, align 8
|
||
|
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||
|
store double %sin2, double* %idx2, align 8
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define void @cos_libm(double* %a, double* %b) {
|
||
|
; CHECK-LABEL: @cos_libm(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]])
|
||
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
|
||
|
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
|
||
|
; CHECK-NEXT: ret void
|
||
|
;
|
||
|
%a0 = load double, double* %a, align 8
|
||
|
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||
|
%a1 = load double, double* %idx1, align 8
|
||
|
%cos1 = tail call double @cos(double %a0) nounwind readnone
|
||
|
%cos2 = tail call double @cos(double %a1) nounwind readnone
|
||
|
store double %cos1, double* %b, align 8
|
||
|
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||
|
store double %cos2, double* %idx2, align 8
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define void @pow_libm(double* %a, double* %b) {
|
||
|
; CHECK-LABEL: @pow_libm(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]])
|
||
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
|
||
|
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
|
||
|
; CHECK-NEXT: ret void
|
||
|
;
|
||
|
%a0 = load double, double* %a, align 8
|
||
|
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||
|
%a1 = load double, double* %idx1, align 8
|
||
|
%pow1 = tail call double @pow(double %a0, double %a0) nounwind readnone
|
||
|
%pow2 = tail call double @pow(double %a1, double %a1) nounwind readnone
|
||
|
store double %pow1, double* %b, align 8
|
||
|
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||
|
store double %pow2, double* %idx2, align 8
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define void @exp_libm(double* %a, double* %b) {
|
||
|
; CHECK-LABEL: @exp_libm(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]])
|
||
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
|
||
|
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
|
||
|
; CHECK-NEXT: ret void
|
||
|
;
|
||
|
%a0 = load double, double* %a, align 8
|
||
|
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||
|
%a1 = load double, double* %idx1, align 8
|
||
|
%exp1 = tail call double @exp2(double %a0) nounwind readnone
|
||
|
%exp2 = tail call double @exp2(double %a1) nounwind readnone
|
||
|
store double %exp1, double* %b, align 8
|
||
|
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||
|
store double %exp2, double* %idx2, align 8
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; No fast-math-flags are required to convert sqrt library calls to an intrinsic.
|
||
|
; We just need to know that errno is not set (readnone).
|
||
|
|
||
|
define void @sqrt_libm_no_errno(double* %a, double* %b) {
|
||
|
; CHECK-LABEL: @sqrt_libm_no_errno(
|
||
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
|
||
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
|
||
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
|
||
|
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
|
||
|
; CHECK-NEXT: ret void
|
||
|
;
|
||
|
%a0 = load double, double* %a, align 8
|
||
|
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||
|
%a1 = load double, double* %idx1, align 8
|
||
|
%sqrt1 = tail call double @sqrt(double %a0) nounwind readnone
|
||
|
%sqrt2 = tail call double @sqrt(double %a1) nounwind readnone
|
||
|
store double %sqrt1, double* %b, align 8
|
||
|
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||
|
store double %sqrt2, double* %idx2, align 8
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; The sqrt intrinsic does not set errno, but a non-constant sqrt call might, so this can't vectorize.
|
||
|
; The nnan on the call does not matter because there's no guarantee in the C standard that a negative
|
||
|
; input would result in a nan output ("On a domain error, the function returns an
|
||
|
; implementation-defined value.")
|
||
|
|
||
|
define void @sqrt_libm_errno(double* %a, double* %b) {
|
||
|
; CHECK-LABEL: @sqrt_libm_errno(
|
||
|
; CHECK-NEXT: [[A0:%.*]] = load double, double* %a, align 8
|
||
|
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* %a, i64 1
|
||
|
; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDX1]], align 8
|
||
|
; CHECK-NEXT: [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #2
|
||
|
; CHECK-NEXT: [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #2
|
||
|
; CHECK-NEXT: store double [[SQRT1]], double* %b, align 8
|
||
|
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* %b, i64 1
|
||
|
; CHECK-NEXT: store double [[SQRT2]], double* [[IDX2]], align 8
|
||
|
; CHECK-NEXT: ret void
|
||
|
;
|
||
|
%a0 = load double, double* %a, align 8
|
||
|
%idx1 = getelementptr inbounds double, double* %a, i64 1
|
||
|
%a1 = load double, double* %idx1, align 8
|
||
|
%sqrt1 = tail call nnan double @sqrt(double %a0) nounwind
|
||
|
%sqrt2 = tail call nnan double @sqrt(double %a1) nounwind
|
||
|
store double %sqrt1, double* %b, align 8
|
||
|
%idx2 = getelementptr inbounds double, double* %b, i64 1
|
||
|
store double %sqrt2, double* %idx2, align 8
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Negative test case
|
||
|
define void @round_custom(i64* %a, i64* %b) {
|
||
|
; CHECK-LABEL: @round_custom(
|
||
|
; CHECK-NEXT: [[A0:%.*]] = load i64, i64* %a, align 8
|
||
|
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i64, i64* %a, i64 1
|
||
|
; CHECK-NEXT: [[A1:%.*]] = load i64, i64* [[IDX1]], align 8
|
||
|
; CHECK-NEXT: [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #3
|
||
|
; CHECK-NEXT: [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #3
|
||
|
; CHECK-NEXT: store i64 [[ROUND1]], i64* %b, align 8
|
||
|
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i64, i64* %b, i64 1
|
||
|
; CHECK-NEXT: store i64 [[ROUND2]], i64* [[IDX2]], align 8
|
||
|
; CHECK-NEXT: ret void
|
||
|
;
|
||
|
%a0 = load i64, i64* %a, align 8
|
||
|
%idx1 = getelementptr inbounds i64, i64* %a, i64 1
|
||
|
%a1 = load i64, i64* %idx1, align 8
|
||
|
%round1 = tail call i64 @round(i64 %a0) nounwind readnone
|
||
|
%round2 = tail call i64 @round(i64 %a1) nounwind readnone
|
||
|
store i64 %round1, i64* %b, align 8
|
||
|
%idx2 = getelementptr inbounds i64, i64* %b, i64 1
|
||
|
store i64 %round2, i64* %idx2, align 8
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
|
||
|
; CHECK: declare <2 x double> @llvm.sin.v2f64(<2 x double>) [[ATTR0:#[0-9]+]]
|
||
|
; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) [[ATTR0]]
|
||
|
; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) [[ATTR0]]
|
||
|
; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) [[ATTR0]]
|
||
|
|
||
|
; CHECK: attributes [[ATTR0]] = { nounwind readnone speculatable }
|
||
|
|