Files
acceptance-tests
data
debian
docs
external
Newtonsoft.Json
api-doc-tools
api-snapshot
aspnetwebstack
bdwgc
binary-reference-assemblies
bockbuild
boringssl
cecil
cecil-legacy
corefx
corert
helix-binaries
ikdasm
ikvm
illinker-test-assets
linker
llvm-project
clang
clang-tools-extra
compiler-rt
libcxx
libcxxabi
libunwind
lld
lldb
llvm
bindings
cmake
docs
examples
include
lib
Analysis
AsmParser
BinaryFormat
Bitcode
CodeGen
DebugInfo
Demangle
ExecutionEngine
FuzzMutate
Fuzzer
IR
IRReader
LTO
LineEditor
Linker
MC
Object
ObjectYAML
Option
Passes
ProfileData
Support
TableGen
Target
AArch64
AMDGPU
ARC
ARM
AVR
BPF
Hexagon
Lanai
MSP430
Mips
NVPTX
Nios2
PowerPC
RISCV
Sparc
SystemZ
WebAssembly
X86
AsmParser
Disassembler
InstPrinter
MCTargetDesc
TargetInfo
Utils
CMakeLists.txt
LLVMBuild.txt
README-FPStack.txt
README-MMX.txt
README-SSE.txt
README-UNIMPLEMENTED.txt
README-X86-64.txt
README.txt
X86.h
X86.td
X86AsmPrinter.cpp
X86AsmPrinter.h
X86CallFrameOptimization.cpp
X86CallLowering.cpp
X86CallLowering.h
X86CallingConv.cpp
X86CallingConv.h
X86CallingConv.td
X86CmovConversion.cpp
X86DomainReassignment.cpp
X86EvexToVex.cpp
X86ExpandPseudo.cpp
X86FastISel.cpp.REMOVED.git-id
X86FixupBWInsts.cpp
X86FixupLEAs.cpp
X86FixupSetCC.cpp
X86FlagsCopyLowering.cpp
X86FloatingPoint.cpp
X86FrameLowering.cpp.REMOVED.git-id
X86FrameLowering.h
X86GenRegisterBankInfo.def
X86ISelDAGToDAG.cpp.REMOVED.git-id
X86ISelLowering.cpp.REMOVED.git-id
X86ISelLowering.h
X86Instr3DNow.td
X86InstrAVX512.td.REMOVED.git-id
X86InstrArithmetic.td
X86InstrBuilder.h
X86InstrCMovSetCC.td
X86InstrCompiler.td
X86InstrControl.td
X86InstrExtension.td
X86InstrFMA.td
X86InstrFMA3Info.cpp
X86InstrFMA3Info.h
X86InstrFPStack.td
X86InstrFormats.td
X86InstrFragmentsSIMD.td
X86InstrInfo.cpp.REMOVED.git-id
X86InstrInfo.h
X86InstrInfo.td.REMOVED.git-id
X86InstrMMX.td
X86InstrMPX.td
X86InstrSGX.td
X86InstrSSE.td.REMOVED.git-id
X86InstrSVM.td
X86InstrShiftRotate.td
X86InstrSystem.td
X86InstrTSX.td
X86InstrVMX.td
X86InstrVecCompiler.td
X86InstrXOP.td
X86InstructionSelector.cpp
X86InterleavedAccess.cpp
X86IntrinsicsInfo.h.REMOVED.git-id
X86LegalizerInfo.cpp
X86LegalizerInfo.h
X86MCInstLower.cpp
X86MachineFunctionInfo.cpp
X86MachineFunctionInfo.h
X86MacroFusion.cpp
X86MacroFusion.h
X86OptimizeLEAs.cpp
X86PadShortFunction.cpp
X86RegisterBankInfo.cpp
X86RegisterBankInfo.h
X86RegisterBanks.td
X86RegisterInfo.cpp
X86RegisterInfo.h
X86RegisterInfo.td
X86RetpolineThunks.cpp
X86SchedBroadwell.td.REMOVED.git-id
X86SchedHaswell.td.REMOVED.git-id
X86SchedSandyBridge.td.REMOVED.git-id
X86SchedSkylakeClient.td.REMOVED.git-id
X86SchedSkylakeServer.td.REMOVED.git-id
X86Schedule.td
X86ScheduleAtom.td
X86ScheduleBtVer2.td
X86ScheduleSLM.td
X86ScheduleZnver1.td
X86SelectionDAGInfo.cpp
X86SelectionDAGInfo.h
X86ShuffleDecodeConstantPool.cpp
X86ShuffleDecodeConstantPool.h
X86Subtarget.cpp
X86Subtarget.h
X86TargetMachine.cpp
X86TargetMachine.h
X86TargetObjectFile.cpp
X86TargetObjectFile.h
X86TargetTransformInfo.cpp.REMOVED.git-id
X86TargetTransformInfo.h
X86VZeroUpper.cpp
X86WinAllocaExpander.cpp
X86WinEHState.cpp
XCore
CMakeLists.txt
LLVMBuild.txt
README.txt
Target.cpp
TargetIntrinsicInfo.cpp
TargetLoweringObjectFile.cpp
TargetMachine.cpp
TargetMachineC.cpp
Testing
ToolDrivers
Transforms
WindowsManifest
XRay
CMakeLists.txt
LLVMBuild.txt
projects
resources
runtimes
scripts
test
tools
unittests
utils
.arcconfig
.clang-format
.clang-tidy
.gitattributes
.gitignore
CMakeLists.txt
CODE_OWNERS.TXT
CREDITS.TXT
LICENSE.TXT
LLVMBuild.txt
README.txt
RELEASE_TESTERS.TXT
configure
llvm.spec.in
openmp
polly
nuget-buildtasks
nunit-lite
roslyn-binaries
rx
xunit-binaries
how-to-bump-roslyn-binaries.md
ikvm-native
llvm
m4
man
mcs
mk
mono
msvc
netcore
po
runtime
samples
scripts
support
tools
COPYING.LIB
LICENSE
Makefile.am
Makefile.in
NEWS
README.md
acinclude.m4
aclocal.m4
autogen.sh
code_of_conduct.md
compile
config.guess
config.h.in
config.rpath
config.sub
configure.REMOVED.git-id
configure.ac.REMOVED.git-id
depcomp
install-sh
ltmain.sh.REMOVED.git-id
missing
mkinstalldirs
mono-uninstalled.pc.in
test-driver
winconfig.h
linux-packaging-mono/external/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp

350 lines
11 KiB
C++
Raw Normal View History

//===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Define several functions to decode x86 specific shuffle semantics using
// constants from the constant pool.
//
//===----------------------------------------------------------------------===//
#include "Utils/X86ShuffleDecode.h"
#include "llvm/ADT/APInt.h"
#include "llvm/IR/Constants.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
//===----------------------------------------------------------------------===//
namespace llvm {
static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
APInt &UndefElts,
SmallVectorImpl<uint64_t> &RawMask) {
// It is not an error for shuffle masks to not be a vector of
// MaskEltSizeInBits because the constant pool uniques constants by their
// bit representation.
// e.g. the following take up the same space in the constant pool:
// i128 -170141183420855150465331762880109871104
//
// <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
//
// <4 x i32> <i32 -2147483648, i32 -2147483648,
// i32 -2147483648, i32 -2147483648>
Type *CstTy = C->getType();
if (!CstTy->isVectorTy())
return false;
Type *CstEltTy = CstTy->getVectorElementType();
if (!CstEltTy->isIntegerTy())
return false;
unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
unsigned NumCstElts = CstTy->getVectorNumElements();
assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
"Unaligned shuffle mask size");
unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
UndefElts = APInt(NumMaskElts, 0);
RawMask.resize(NumMaskElts, 0);
// Fast path - if the constants match the mask size then copy direct.
if (MaskEltSizeInBits == CstEltSizeInBits) {
assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
for (unsigned i = 0; i != NumMaskElts; ++i) {
Constant *COp = C->getAggregateElement(i);
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
return false;
if (isa<UndefValue>(COp)) {
UndefElts.setBit(i);
RawMask[i] = 0;
continue;
}
auto *Elt = cast<ConstantInt>(COp);
RawMask[i] = Elt->getValue().getZExtValue();
}
return true;
}
// Extract all the undef/constant element data and pack into single bitsets.
APInt UndefBits(CstSizeInBits, 0);
APInt MaskBits(CstSizeInBits, 0);
for (unsigned i = 0; i != NumCstElts; ++i) {
Constant *COp = C->getAggregateElement(i);
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
return false;
unsigned BitOffset = i * CstEltSizeInBits;
if (isa<UndefValue>(COp)) {
UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits);
continue;
}
MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset);
}
// Now extract the undef/constant bit data into the raw shuffle masks.
for (unsigned i = 0; i != NumMaskElts; ++i) {
unsigned BitOffset = i * MaskEltSizeInBits;
APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);
// Only treat the element as UNDEF if all bits are UNDEF, otherwise
// treat it as zero.
if (EltUndef.isAllOnesValue()) {
UndefElts.setBit(i);
RawMask[i] = 0;
continue;
}
APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset);
RawMask[i] = EltBits.getZExtValue();
}
return true;
}
void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
(void)MaskTySize;
assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
"Unexpected vector size.");
// The shuffle mask requires a byte vector.
APInt UndefElts;
SmallVector<uint64_t, 64> RawMask;
if (!extractConstantMask(C, 8, UndefElts, RawMask))
return;
unsigned NumElts = RawMask.size();
assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
"Unexpected number of vector elements.");
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
uint64_t Element = RawMask[i];
// If the high bit (7) of the byte is set, the element is zeroed.
if (Element & (1 << 7))
ShuffleMask.push_back(SM_SentinelZero);
else {
// For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
// lane of the vector we're inside.
unsigned Base = i & ~0xf;
// Only the least significant 4 bits of the byte are used.
int Index = Base + (Element & 0xf);
ShuffleMask.push_back(Index);
}
}
}
void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
(void)MaskTySize;
assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
"Unexpected vector size.");
assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
// The shuffle mask requires elements the same size as the target.
APInt UndefElts;
SmallVector<uint64_t, 16> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
unsigned NumElts = RawMask.size();
unsigned NumEltsPerLane = 128 / ElSize;
assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
"Unexpected number of vector elements.");
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
int Index = i & ~(NumEltsPerLane - 1);
uint64_t Element = RawMask[i];
if (ElSize == 64)
Index += (Element >> 1) & 0x1;
else
Index += Element & 0x3;
ShuffleMask.push_back(Index);
}
}
void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
(void)MaskTySize;
assert((MaskTySize == 128 || MaskTySize == 256) && "Unexpected vector size.");
// The shuffle mask requires elements the same size as the target.
APInt UndefElts;
SmallVector<uint64_t, 8> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
unsigned NumElts = RawMask.size();
unsigned NumEltsPerLane = 128 / ElSize;
assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
"Unexpected number of vector elements.");
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
// VPERMIL2 Operation.
// Bits[3] - Match Bit.
// Bits[2:1] - (Per Lane) PD Shuffle Mask.
// Bits[2:0] - (Per Lane) PS Shuffle Mask.
uint64_t Selector = RawMask[i];
unsigned MatchBit = (Selector >> 3) & 0x1;
// M2Z[0:1] MatchBit
// 0Xb X Source selected by Selector index.
// 10b 0 Source selected by Selector index.
// 10b 1 Zero.
// 11b 0 Zero.
// 11b 1 Source selected by Selector index.
if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
ShuffleMask.push_back(SM_SentinelZero);
continue;
}
int Index = i & ~(NumEltsPerLane - 1);
if (ElSize == 64)
Index += (Selector >> 1) & 0x1;
else
Index += Selector & 0x3;
int Src = (Selector >> 2) & 0x1;
Index += Src * NumElts;
ShuffleMask.push_back(Index);
}
}
void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
assert(C->getType()->getPrimitiveSizeInBits() == 128 &&
"Unexpected vector size.");
// The shuffle mask requires a byte vector.
APInt UndefElts;
SmallVector<uint64_t, 16> RawMask;
if (!extractConstantMask(C, 8, UndefElts, RawMask))
return;
unsigned NumElts = RawMask.size();
assert(NumElts == 16 && "Unexpected number of vector elements.");
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
// VPPERM Operation
// Bits[4:0] - Byte Index (0 - 31)
// Bits[7:5] - Permute Operation
//
// Permute Operation:
// 0 - Source byte (no logical operation).
// 1 - Invert source byte.
// 2 - Bit reverse of source byte.
// 3 - Bit reverse of inverted source byte.
// 4 - 00h (zero - fill).
// 5 - FFh (ones - fill).
// 6 - Most significant bit of source byte replicated in all bit positions.
// 7 - Invert most significant bit of source byte and replicate in all bit
// positions.
uint64_t Element = RawMask[i];
uint64_t Index = Element & 0x1F;
uint64_t PermuteOp = (Element >> 5) & 0x7;
if (PermuteOp == 4) {
ShuffleMask.push_back(SM_SentinelZero);
continue;
}
if (PermuteOp != 0) {
ShuffleMask.clear();
return;
}
ShuffleMask.push_back((int)Index);
}
}
void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
(void)MaskTySize;
assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
"Unexpected vector size.");
assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
"Unexpected vector element size.");
// The shuffle mask requires elements the same size as the target.
APInt UndefElts;
SmallVector<uint64_t, 64> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
unsigned NumElts = RawMask.size();
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
int Index = RawMask[i] & (NumElts - 1);
ShuffleMask.push_back(Index);
}
}
void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
(void)MaskTySize;
assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
"Unexpected vector size.");
assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
"Unexpected vector element size.");
// The shuffle mask requires elements the same size as the target.
APInt UndefElts;
SmallVector<uint64_t, 64> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
unsigned NumElts = RawMask.size();
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
int Index = RawMask[i] & (NumElts*2 - 1);
ShuffleMask.push_back(Index);
}
}
} // llvm namespace