You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
76
external/llvm/lib/Target/AArch64/AArch64.h
vendored
76
external/llvm/lib/Target/AArch64/AArch64.h
vendored
@ -1,76 +0,0 @@
|
||||
//==-- AArch64.h - Top-level interface for AArch64 --------------*- C++ -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the entry points for global functions defined in the LLVM
|
||||
// AArch64 back-end.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64_H
|
||||
#define LLVM_LIB_TARGET_AARCH64_AARCH64_H
|
||||
|
||||
#include "MCTargetDesc/AArch64MCTargetDesc.h"
|
||||
#include "Utils/AArch64BaseInfo.h"
|
||||
#include "llvm/Support/DataTypes.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AArch64RegisterBankInfo;
|
||||
class AArch64Subtarget;
|
||||
class AArch64TargetMachine;
|
||||
class FunctionPass;
|
||||
class InstructionSelector;
|
||||
class MachineFunctionPass;
|
||||
|
||||
FunctionPass *createAArch64DeadRegisterDefinitions();
|
||||
FunctionPass *createAArch64RedundantCopyEliminationPass();
|
||||
FunctionPass *createAArch64CondBrTuning();
|
||||
FunctionPass *createAArch64ConditionalCompares();
|
||||
FunctionPass *createAArch64AdvSIMDScalar();
|
||||
FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *createAArch64StorePairSuppressPass();
|
||||
FunctionPass *createAArch64ExpandPseudoPass();
|
||||
FunctionPass *createAArch64LoadStoreOptimizationPass();
|
||||
FunctionPass *createAArch64SIMDInstrOptPass();
|
||||
ModulePass *createAArch64PromoteConstantPass();
|
||||
FunctionPass *createAArch64ConditionOptimizerPass();
|
||||
FunctionPass *createAArch64A57FPLoadBalancing();
|
||||
FunctionPass *createAArch64A53Fix835769();
|
||||
FunctionPass *createFalkorHWPFFixPass();
|
||||
FunctionPass *createFalkorMarkStridedAccessesPass();
|
||||
|
||||
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
|
||||
|
||||
FunctionPass *createAArch64CollectLOHPass();
|
||||
InstructionSelector *
|
||||
createAArch64InstructionSelector(const AArch64TargetMachine &,
|
||||
AArch64Subtarget &, AArch64RegisterBankInfo &);
|
||||
|
||||
void initializeAArch64A53Fix835769Pass(PassRegistry&);
|
||||
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
|
||||
void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
|
||||
void initializeAArch64CollectLOHPass(PassRegistry&);
|
||||
void initializeAArch64CondBrTuningPass(PassRegistry &);
|
||||
void initializeAArch64ConditionalComparesPass(PassRegistry&);
|
||||
void initializeAArch64ConditionOptimizerPass(PassRegistry&);
|
||||
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
|
||||
void initializeAArch64ExpandPseudoPass(PassRegistry&);
|
||||
void initializeAArch64LoadStoreOptPass(PassRegistry&);
|
||||
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
|
||||
void initializeAArch64PromoteConstantPass(PassRegistry&);
|
||||
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
|
||||
void initializeAArch64StorePairSuppressPass(PassRegistry&);
|
||||
void initializeFalkorHWPFFixPass(PassRegistry&);
|
||||
void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
|
||||
void initializeLDTLSCleanupPass(PassRegistry&);
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
507
external/llvm/lib/Target/AArch64/AArch64.td
vendored
507
external/llvm/lib/Target/AArch64/AArch64.td
vendored
File diff suppressed because it is too large
Load Diff
@ -1,244 +0,0 @@
|
||||
//===-- AArch64A53Fix835769.cpp -------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This pass changes code to work around Cortex-A53 erratum 835769.
|
||||
// It works around it by inserting a nop instruction in code sequences that
|
||||
// in some circumstances may trigger the erratum.
|
||||
// It inserts a nop instruction between a sequence of the following 2 classes
|
||||
// of instructions:
|
||||
// instr 1: mem-instr (including loads, stores and prefetches).
|
||||
// instr 2: non-SIMD integer multiply-accumulate writing 64-bit X registers.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "aarch64-fix-cortex-a53-835769"
|
||||
|
||||
STATISTIC(NumNopsAdded, "Number of Nops added to work around erratum 835769");
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helper functions
|
||||
|
||||
// Is the instruction a match for the instruction that comes first in the
|
||||
// sequence of instructions that can trigger the erratum?
|
||||
static bool isFirstInstructionInSequence(MachineInstr *MI) {
|
||||
// Must return true if this instruction is a load, a store or a prefetch.
|
||||
switch (MI->getOpcode()) {
|
||||
case AArch64::PRFMl:
|
||||
case AArch64::PRFMroW:
|
||||
case AArch64::PRFMroX:
|
||||
case AArch64::PRFMui:
|
||||
case AArch64::PRFUMi:
|
||||
return true;
|
||||
default:
|
||||
return MI->mayLoadOrStore();
|
||||
}
|
||||
}
|
||||
|
||||
// Is the instruction a match for the instruction that comes second in the
|
||||
// sequence that can trigger the erratum?
|
||||
static bool isSecondInstructionInSequence(MachineInstr *MI) {
|
||||
// Must return true for non-SIMD integer multiply-accumulates, writing
|
||||
// to a 64-bit register.
|
||||
switch (MI->getOpcode()) {
|
||||
// Erratum cannot be triggered when the destination register is 32 bits,
|
||||
// therefore only include the following.
|
||||
case AArch64::MSUBXrrr:
|
||||
case AArch64::MADDXrrr:
|
||||
case AArch64::SMADDLrrr:
|
||||
case AArch64::SMSUBLrrr:
|
||||
case AArch64::UMADDLrrr:
|
||||
case AArch64::UMSUBLrrr:
|
||||
// Erratum can only be triggered by multiply-adds, not by regular
|
||||
// non-accumulating multiplies, i.e. when Ra=XZR='11111'
|
||||
return MI->getOperand(3).getReg() != AArch64::XZR;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
class AArch64A53Fix835769 : public MachineFunctionPass {
|
||||
const TargetInstrInfo *TII;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
explicit AArch64A53Fix835769() : MachineFunctionPass(ID) {
|
||||
initializeAArch64A53Fix835769Pass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
|
||||
MachineFunctionProperties getRequiredProperties() const override {
|
||||
return MachineFunctionProperties().set(
|
||||
MachineFunctionProperties::Property::NoVRegs);
|
||||
}
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return "Workaround A53 erratum 835769 pass";
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
private:
|
||||
bool runOnBasicBlock(MachineBasicBlock &MBB);
|
||||
};
|
||||
char AArch64A53Fix835769::ID = 0;
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
INITIALIZE_PASS(AArch64A53Fix835769, "aarch64-fix-cortex-a53-835769-pass",
|
||||
"AArch64 fix for A53 erratum 835769", false, false)
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool
|
||||
AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) {
|
||||
DEBUG(dbgs() << "***** AArch64A53Fix835769 *****\n");
|
||||
bool Changed = false;
|
||||
TII = F.getSubtarget().getInstrInfo();
|
||||
|
||||
for (auto &MBB : F) {
|
||||
Changed |= runOnBasicBlock(MBB);
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// Return the block that was fallen through to get to MBB, if any,
|
||||
// otherwise nullptr.
|
||||
static MachineBasicBlock *getBBFallenThrough(MachineBasicBlock *MBB,
|
||||
const TargetInstrInfo *TII) {
|
||||
// Get the previous machine basic block in the function.
|
||||
MachineFunction::iterator MBBI(MBB);
|
||||
|
||||
// Can't go off top of function.
|
||||
if (MBBI == MBB->getParent()->begin())
|
||||
return nullptr;
|
||||
|
||||
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
|
||||
SmallVector<MachineOperand, 2> Cond;
|
||||
|
||||
MachineBasicBlock *PrevBB = &*std::prev(MBBI);
|
||||
for (MachineBasicBlock *S : MBB->predecessors())
|
||||
if (S == PrevBB && !TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) && !TBB &&
|
||||
!FBB)
|
||||
return S;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Iterate through fallen through blocks trying to find a previous non-pseudo if
|
||||
// there is one, otherwise return nullptr. Only look for instructions in
|
||||
// previous blocks, not the current block, since we only use this to look at
|
||||
// previous blocks.
|
||||
static MachineInstr *getLastNonPseudo(MachineBasicBlock &MBB,
|
||||
const TargetInstrInfo *TII) {
|
||||
MachineBasicBlock *FMBB = &MBB;
|
||||
|
||||
// If there is no non-pseudo in the current block, loop back around and try
|
||||
// the previous block (if there is one).
|
||||
while ((FMBB = getBBFallenThrough(FMBB, TII))) {
|
||||
for (MachineInstr &I : make_range(FMBB->rbegin(), FMBB->rend()))
|
||||
if (!I.isPseudo())
|
||||
return &I;
|
||||
}
|
||||
|
||||
// There was no previous non-pseudo in the fallen through blocks
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static void insertNopBeforeInstruction(MachineBasicBlock &MBB, MachineInstr* MI,
|
||||
const TargetInstrInfo *TII) {
|
||||
// If we are the first instruction of the block, put the NOP at the end of
|
||||
// the previous fallthrough block
|
||||
if (MI == &MBB.front()) {
|
||||
MachineInstr *I = getLastNonPseudo(MBB, TII);
|
||||
assert(I && "Expected instruction");
|
||||
DebugLoc DL = I->getDebugLoc();
|
||||
BuildMI(I->getParent(), DL, TII->get(AArch64::HINT)).addImm(0);
|
||||
}
|
||||
else {
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
BuildMI(MBB, MI, DL, TII->get(AArch64::HINT)).addImm(0);
|
||||
}
|
||||
|
||||
++NumNopsAdded;
|
||||
}
|
||||
|
||||
bool
|
||||
AArch64A53Fix835769::runOnBasicBlock(MachineBasicBlock &MBB) {
|
||||
bool Changed = false;
|
||||
DEBUG(dbgs() << "Running on MBB: " << MBB << " - scanning instructions...\n");
|
||||
|
||||
// First, scan the basic block, looking for a sequence of 2 instructions
|
||||
// that match the conditions under which the erratum may trigger.
|
||||
|
||||
// List of terminating instructions in matching sequences
|
||||
std::vector<MachineInstr*> Sequences;
|
||||
unsigned Idx = 0;
|
||||
MachineInstr *PrevInstr = nullptr;
|
||||
|
||||
// Try and find the last non-pseudo instruction in any fallen through blocks,
|
||||
// if there isn't one, then we use nullptr to represent that.
|
||||
PrevInstr = getLastNonPseudo(MBB, TII);
|
||||
|
||||
for (auto &MI : MBB) {
|
||||
MachineInstr *CurrInstr = &MI;
|
||||
DEBUG(dbgs() << " Examining: " << MI);
|
||||
if (PrevInstr) {
|
||||
DEBUG(dbgs() << " PrevInstr: " << *PrevInstr
|
||||
<< " CurrInstr: " << *CurrInstr
|
||||
<< " isFirstInstructionInSequence(PrevInstr): "
|
||||
<< isFirstInstructionInSequence(PrevInstr) << "\n"
|
||||
<< " isSecondInstructionInSequence(CurrInstr): "
|
||||
<< isSecondInstructionInSequence(CurrInstr) << "\n");
|
||||
if (isFirstInstructionInSequence(PrevInstr) &&
|
||||
isSecondInstructionInSequence(CurrInstr)) {
|
||||
DEBUG(dbgs() << " ** pattern found at Idx " << Idx << "!\n");
|
||||
Sequences.push_back(CurrInstr);
|
||||
}
|
||||
}
|
||||
if (!CurrInstr->isPseudo())
|
||||
PrevInstr = CurrInstr;
|
||||
++Idx;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "Scan complete, " << Sequences.size()
|
||||
<< " occurrences of pattern found.\n");
|
||||
|
||||
// Then update the basic block, inserting nops between the detected sequences.
|
||||
for (auto &MI : Sequences) {
|
||||
Changed = true;
|
||||
insertNopBeforeInstruction(MBB, MI, TII);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// Factory function used by AArch64TargetMachine to add the pass to
|
||||
// the passmanager.
|
||||
FunctionPass *llvm::createAArch64A53Fix835769() {
|
||||
return new AArch64A53Fix835769();
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,413 +0,0 @@
|
||||
//===-- AArch64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// When profitable, replace GPR targeting i64 instructions with their
|
||||
// AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined
|
||||
// as minimizing the number of cross-class register copies.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TODO: Graph based predicate heuristics.
|
||||
// Walking the instruction list linearly will get many, perhaps most, of
|
||||
// the cases, but to do a truly thorough job of this, we need a more
|
||||
// wholistic approach.
|
||||
//
|
||||
// This optimization is very similar in spirit to the register allocator's
|
||||
// spill placement, only here we're determining where to place cross-class
|
||||
// register copies rather than spills. As such, a similar approach is
|
||||
// called for.
|
||||
//
|
||||
// We want to build up a set of graphs of all instructions which are candidates
|
||||
// for transformation along with instructions which generate their inputs and
|
||||
// consume their outputs. For each edge in the graph, we assign a weight
|
||||
// based on whether there is a copy required there (weight zero if not) and
|
||||
// the block frequency of the block containing the defining or using
|
||||
// instruction, whichever is less. Our optimization is then a graph problem
|
||||
// to minimize the total weight of all the graphs, then transform instructions
|
||||
// and add or remove copy instructions as called for to implement the
|
||||
// solution.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64.h"
|
||||
#include "AArch64InstrInfo.h"
|
||||
#include "AArch64RegisterInfo.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "aarch64-simd-scalar"
|
||||
|
||||
// Allow forcing all i64 operations with equivalent SIMD instructions to use
|
||||
// them. For stress-testing the transformation function.
|
||||
static cl::opt<bool>
|
||||
TransformAll("aarch64-simd-scalar-force-all",
|
||||
cl::desc("Force use of AdvSIMD scalar instructions everywhere"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
|
||||
STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
|
||||
STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
|
||||
|
||||
#define AARCH64_ADVSIMD_NAME "AdvSIMD Scalar Operation Optimization"
|
||||
|
||||
namespace {
|
||||
class AArch64AdvSIMDScalar : public MachineFunctionPass {
|
||||
MachineRegisterInfo *MRI;
|
||||
const TargetInstrInfo *TII;
|
||||
|
||||
private:
|
||||
// isProfitableToTransform - Predicate function to determine whether an
|
||||
// instruction should be transformed to its equivalent AdvSIMD scalar
|
||||
// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
|
||||
bool isProfitableToTransform(const MachineInstr &MI) const;
|
||||
|
||||
// transformInstruction - Perform the transformation of an instruction
|
||||
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
|
||||
// to be the correct register class, minimizing cross-class copies.
|
||||
void transformInstruction(MachineInstr &MI);
|
||||
|
||||
// processMachineBasicBlock - Main optimzation loop.
|
||||
bool processMachineBasicBlock(MachineBasicBlock *MBB);
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid.
|
||||
explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {
|
||||
initializeAArch64AdvSIMDScalarPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
|
||||
StringRef getPassName() const override { return AARCH64_ADVSIMD_NAME; }
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
char AArch64AdvSIMDScalar::ID = 0;
|
||||
} // end anonymous namespace
|
||||
|
||||
INITIALIZE_PASS(AArch64AdvSIMDScalar, "aarch64-simd-scalar",
|
||||
AARCH64_ADVSIMD_NAME, false, false)
|
||||
|
||||
static bool isGPR64(unsigned Reg, unsigned SubReg,
|
||||
const MachineRegisterInfo *MRI) {
|
||||
if (SubReg)
|
||||
return false;
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg))
|
||||
return MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::GPR64RegClass);
|
||||
return AArch64::GPR64RegClass.contains(Reg);
|
||||
}
|
||||
|
||||
static bool isFPR64(unsigned Reg, unsigned SubReg,
|
||||
const MachineRegisterInfo *MRI) {
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg))
|
||||
return (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR64RegClass) &&
|
||||
SubReg == 0) ||
|
||||
(MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR128RegClass) &&
|
||||
SubReg == AArch64::dsub);
|
||||
// Physical register references just check the register class directly.
|
||||
return (AArch64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
|
||||
(AArch64::FPR128RegClass.contains(Reg) && SubReg == AArch64::dsub);
|
||||
}
|
||||
|
||||
// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
|
||||
// copy instruction. Return zero_reg if the instruction is not a copy.
|
||||
static MachineOperand *getSrcFromCopy(MachineInstr *MI,
|
||||
const MachineRegisterInfo *MRI,
|
||||
unsigned &SubReg) {
|
||||
SubReg = 0;
|
||||
// The "FMOV Xd, Dn" instruction is the typical form.
|
||||
if (MI->getOpcode() == AArch64::FMOVDXr ||
|
||||
MI->getOpcode() == AArch64::FMOVXDr)
|
||||
return &MI->getOperand(1);
|
||||
// A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see
|
||||
// these at this stage, but it's easy to check for.
|
||||
if (MI->getOpcode() == AArch64::UMOVvi64 && MI->getOperand(2).getImm() == 0) {
|
||||
SubReg = AArch64::dsub;
|
||||
return &MI->getOperand(1);
|
||||
}
|
||||
// Or just a plain COPY instruction. This can be directly to/from FPR64,
|
||||
// or it can be a dsub subreg reference to an FPR128.
|
||||
if (MI->getOpcode() == AArch64::COPY) {
|
||||
if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
|
||||
MRI) &&
|
||||
isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI))
|
||||
return &MI->getOperand(1);
|
||||
if (isGPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
|
||||
MRI) &&
|
||||
isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(),
|
||||
MRI)) {
|
||||
SubReg = MI->getOperand(1).getSubReg();
|
||||
return &MI->getOperand(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, this is some other kind of instruction.
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent
|
||||
// that we're considering transforming to, return that AdvSIMD opcode. For all
|
||||
// others, return the original opcode.
|
||||
static unsigned getTransformOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
break;
|
||||
// FIXME: Lots more possibilities.
|
||||
case AArch64::ADDXrr:
|
||||
return AArch64::ADDv1i64;
|
||||
case AArch64::SUBXrr:
|
||||
return AArch64::SUBv1i64;
|
||||
case AArch64::ANDXrr:
|
||||
return AArch64::ANDv8i8;
|
||||
case AArch64::EORXrr:
|
||||
return AArch64::EORv8i8;
|
||||
case AArch64::ORRXrr:
|
||||
return AArch64::ORRv8i8;
|
||||
}
|
||||
// No AdvSIMD equivalent, so just return the original opcode.
|
||||
return Opc;
|
||||
}
|
||||
|
||||
static bool isTransformable(const MachineInstr &MI) {
|
||||
unsigned Opc = MI.getOpcode();
|
||||
return Opc != getTransformOpcode(Opc);
|
||||
}
|
||||
|
||||
// isProfitableToTransform - Predicate function to determine whether an
|
||||
// instruction should be transformed to its equivalent AdvSIMD scalar
|
||||
// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
|
||||
bool AArch64AdvSIMDScalar::isProfitableToTransform(
|
||||
const MachineInstr &MI) const {
|
||||
// If this instruction isn't eligible to be transformed (no SIMD equivalent),
|
||||
// early exit since that's the common case.
|
||||
if (!isTransformable(MI))
|
||||
return false;
|
||||
|
||||
// Count the number of copies we'll need to add and approximate the number
|
||||
// of copies that a transform will enable us to remove.
|
||||
unsigned NumNewCopies = 3;
|
||||
unsigned NumRemovableCopies = 0;
|
||||
|
||||
unsigned OrigSrc0 = MI.getOperand(1).getReg();
|
||||
unsigned OrigSrc1 = MI.getOperand(2).getReg();
|
||||
unsigned SubReg0;
|
||||
unsigned SubReg1;
|
||||
if (!MRI->def_empty(OrigSrc0)) {
|
||||
MachineRegisterInfo::def_instr_iterator Def =
|
||||
MRI->def_instr_begin(OrigSrc0);
|
||||
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
|
||||
MachineOperand *MOSrc0 = getSrcFromCopy(&*Def, MRI, SubReg0);
|
||||
// If the source was from a copy, we don't need to insert a new copy.
|
||||
if (MOSrc0)
|
||||
--NumNewCopies;
|
||||
// If there are no other users of the original source, we can delete
|
||||
// that instruction.
|
||||
if (MOSrc0 && MRI->hasOneNonDBGUse(OrigSrc0))
|
||||
++NumRemovableCopies;
|
||||
}
|
||||
if (!MRI->def_empty(OrigSrc1)) {
|
||||
MachineRegisterInfo::def_instr_iterator Def =
|
||||
MRI->def_instr_begin(OrigSrc1);
|
||||
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
|
||||
MachineOperand *MOSrc1 = getSrcFromCopy(&*Def, MRI, SubReg1);
|
||||
if (MOSrc1)
|
||||
--NumNewCopies;
|
||||
// If there are no other users of the original source, we can delete
|
||||
// that instruction.
|
||||
if (MOSrc1 && MRI->hasOneNonDBGUse(OrigSrc1))
|
||||
++NumRemovableCopies;
|
||||
}
|
||||
|
||||
// If any of the uses of the original instructions is a cross class copy,
|
||||
// that's a copy that will be removable if we transform. Likewise, if
|
||||
// any of the uses is a transformable instruction, it's likely the tranforms
|
||||
// will chain, enabling us to save a copy there, too. This is an aggressive
|
||||
// heuristic that approximates the graph based cost analysis described above.
|
||||
unsigned Dst = MI.getOperand(0).getReg();
|
||||
bool AllUsesAreCopies = true;
|
||||
for (MachineRegisterInfo::use_instr_nodbg_iterator
|
||||
Use = MRI->use_instr_nodbg_begin(Dst),
|
||||
E = MRI->use_instr_nodbg_end();
|
||||
Use != E; ++Use) {
|
||||
unsigned SubReg;
|
||||
if (getSrcFromCopy(&*Use, MRI, SubReg) || isTransformable(*Use))
|
||||
++NumRemovableCopies;
|
||||
// If the use is an INSERT_SUBREG, that's still something that can
|
||||
// directly use the FPR64, so we don't invalidate AllUsesAreCopies. It's
|
||||
// preferable to have it use the FPR64 in most cases, as if the source
|
||||
// vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely.
|
||||
// Ditto for a lane insert.
|
||||
else if (Use->getOpcode() == AArch64::INSERT_SUBREG ||
|
||||
Use->getOpcode() == AArch64::INSvi64gpr)
|
||||
;
|
||||
else
|
||||
AllUsesAreCopies = false;
|
||||
}
|
||||
// If all of the uses of the original destination register are copies to
|
||||
// FPR64, then we won't end up having a new copy back to GPR64 either.
|
||||
if (AllUsesAreCopies)
|
||||
--NumNewCopies;
|
||||
|
||||
// If a transform will not increase the number of cross-class copies required,
|
||||
// return true.
|
||||
if (NumNewCopies <= NumRemovableCopies)
|
||||
return true;
|
||||
|
||||
// Finally, even if we otherwise wouldn't transform, check if we're forcing
|
||||
// transformation of everything.
|
||||
return TransformAll;
|
||||
}
|
||||
|
||||
static MachineInstr *insertCopy(const TargetInstrInfo *TII, MachineInstr &MI,
|
||||
unsigned Dst, unsigned Src, bool IsKill) {
|
||||
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
|
||||
TII->get(AArch64::COPY), Dst)
|
||||
.addReg(Src, getKillRegState(IsKill));
|
||||
DEBUG(dbgs() << " adding copy: " << *MIB);
|
||||
++NumCopiesInserted;
|
||||
return MIB;
|
||||
}
|
||||
|
||||
// transformInstruction - Perform the transformation of an instruction
|
||||
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
|
||||
// to be the correct register class, minimizing cross-class copies.
|
||||
void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) {
|
||||
DEBUG(dbgs() << "Scalar transform: " << MI);
|
||||
|
||||
MachineBasicBlock *MBB = MI.getParent();
|
||||
unsigned OldOpc = MI.getOpcode();
|
||||
unsigned NewOpc = getTransformOpcode(OldOpc);
|
||||
assert(OldOpc != NewOpc && "transform an instruction to itself?!");
|
||||
|
||||
// Check if we need a copy for the source registers.
|
||||
unsigned OrigSrc0 = MI.getOperand(1).getReg();
|
||||
unsigned OrigSrc1 = MI.getOperand(2).getReg();
|
||||
unsigned Src0 = 0, SubReg0;
|
||||
unsigned Src1 = 0, SubReg1;
|
||||
bool KillSrc0 = false, KillSrc1 = false;
|
||||
if (!MRI->def_empty(OrigSrc0)) {
|
||||
MachineRegisterInfo::def_instr_iterator Def =
|
||||
MRI->def_instr_begin(OrigSrc0);
|
||||
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
|
||||
MachineOperand *MOSrc0 = getSrcFromCopy(&*Def, MRI, SubReg0);
|
||||
// If there are no other users of the original source, we can delete
|
||||
// that instruction.
|
||||
if (MOSrc0) {
|
||||
Src0 = MOSrc0->getReg();
|
||||
KillSrc0 = MOSrc0->isKill();
|
||||
// Src0 is going to be reused, thus, it cannot be killed anymore.
|
||||
MOSrc0->setIsKill(false);
|
||||
if (MRI->hasOneNonDBGUse(OrigSrc0)) {
|
||||
assert(MOSrc0 && "Can't delete copy w/o a valid original source!");
|
||||
Def->eraseFromParent();
|
||||
++NumCopiesDeleted;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!MRI->def_empty(OrigSrc1)) {
|
||||
MachineRegisterInfo::def_instr_iterator Def =
|
||||
MRI->def_instr_begin(OrigSrc1);
|
||||
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
|
||||
MachineOperand *MOSrc1 = getSrcFromCopy(&*Def, MRI, SubReg1);
|
||||
// If there are no other users of the original source, we can delete
|
||||
// that instruction.
|
||||
if (MOSrc1) {
|
||||
Src1 = MOSrc1->getReg();
|
||||
KillSrc1 = MOSrc1->isKill();
|
||||
// Src0 is going to be reused, thus, it cannot be killed anymore.
|
||||
MOSrc1->setIsKill(false);
|
||||
if (MRI->hasOneNonDBGUse(OrigSrc1)) {
|
||||
assert(MOSrc1 && "Can't delete copy w/o a valid original source!");
|
||||
Def->eraseFromParent();
|
||||
++NumCopiesDeleted;
|
||||
}
|
||||
}
|
||||
}
|
||||
// If we weren't able to reference the original source directly, create a
|
||||
// copy.
|
||||
if (!Src0) {
|
||||
SubReg0 = 0;
|
||||
Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
|
||||
insertCopy(TII, MI, Src0, OrigSrc0, KillSrc0);
|
||||
KillSrc0 = true;
|
||||
}
|
||||
if (!Src1) {
|
||||
SubReg1 = 0;
|
||||
Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
|
||||
insertCopy(TII, MI, Src1, OrigSrc1, KillSrc1);
|
||||
KillSrc1 = true;
|
||||
}
|
||||
|
||||
// Create a vreg for the destination.
|
||||
// FIXME: No need to do this if the ultimate user expects an FPR64.
|
||||
// Check for that and avoid the copy if possible.
|
||||
unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
|
||||
|
||||
// For now, all of the new instructions have the same simple three-register
|
||||
// form, so no need to special case based on what instruction we're
|
||||
// building.
|
||||
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), Dst)
|
||||
.addReg(Src0, getKillRegState(KillSrc0), SubReg0)
|
||||
.addReg(Src1, getKillRegState(KillSrc1), SubReg1);
|
||||
|
||||
// Now copy the result back out to a GPR.
|
||||
// FIXME: Try to avoid this if all uses could actually just use the FPR64
|
||||
// directly.
|
||||
insertCopy(TII, MI, MI.getOperand(0).getReg(), Dst, true);
|
||||
|
||||
// Erase the old instruction.
|
||||
MI.eraseFromParent();
|
||||
|
||||
++NumScalarInsnsUsed;
|
||||
}
|
||||
|
||||
// processMachineBasicBlock - Main optimzation loop.
|
||||
bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
|
||||
bool Changed = false;
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
|
||||
MachineInstr &MI = *I++;
|
||||
if (isProfitableToTransform(MI)) {
|
||||
transformInstruction(MI);
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// runOnMachineFunction - Pass entry point from PassManager.
|
||||
bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
|
||||
bool Changed = false;
|
||||
DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n");
|
||||
|
||||
if (skipFunction(mf.getFunction()))
|
||||
return false;
|
||||
|
||||
MRI = &mf.getRegInfo();
|
||||
TII = mf.getSubtarget().getInstrInfo();
|
||||
|
||||
// Just check things on a one-block-at-a-time basis.
|
||||
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
|
||||
if (processMachineBasicBlock(&*I))
|
||||
Changed = true;
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// createAArch64AdvSIMDScalar - Factory function used by AArch64TargetMachine
|
||||
// to add the pass to the PassManager.
|
||||
FunctionPass *llvm::createAArch64AdvSIMDScalar() {
|
||||
return new AArch64AdvSIMDScalar();
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,404 +0,0 @@
|
||||
//===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// This file implements the lowering of LLVM calls to machine code calls for
|
||||
/// GlobalISel.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64CallLowering.h"
|
||||
#include "AArch64ISelLowering.h"
|
||||
#include "AArch64MachineFunctionInfo.h"
|
||||
#include "AArch64Subtarget.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/Analysis.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
#include "llvm/CodeGen/LowLevelType.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineMemOperand.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineValueType.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/CodeGen/ValueTypes.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
|
||||
: CallLowering(&TLI) {}
|
||||
|
||||
namespace {
|
||||
struct IncomingArgHandler : public CallLowering::ValueHandler {
|
||||
IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
|
||||
CCAssignFn *AssignFn)
|
||||
: ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
|
||||
|
||||
unsigned getStackAddress(uint64_t Size, int64_t Offset,
|
||||
MachinePointerInfo &MPO) override {
|
||||
auto &MFI = MIRBuilder.getMF().getFrameInfo();
|
||||
int FI = MFI.CreateFixedObject(Size, Offset, true);
|
||||
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
|
||||
unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
|
||||
MIRBuilder.buildFrameIndex(AddrReg, FI);
|
||||
StackUsed = std::max(StackUsed, Size + Offset);
|
||||
return AddrReg;
|
||||
}
|
||||
|
||||
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
|
||||
CCValAssign &VA) override {
|
||||
markPhysRegUsed(PhysReg);
|
||||
switch (VA.getLocInfo()) {
|
||||
default:
|
||||
MIRBuilder.buildCopy(ValVReg, PhysReg);
|
||||
break;
|
||||
case CCValAssign::LocInfo::SExt:
|
||||
case CCValAssign::LocInfo::ZExt:
|
||||
case CCValAssign::LocInfo::AExt: {
|
||||
auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
|
||||
MIRBuilder.buildTrunc(ValVReg, Copy);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
|
||||
MachinePointerInfo &MPO, CCValAssign &VA) override {
|
||||
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
|
||||
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
|
||||
0);
|
||||
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
|
||||
}
|
||||
|
||||
/// How the physical register gets marked varies between formal
|
||||
/// parameters (it's a basic-block live-in), and a call instruction
|
||||
/// (it's an implicit-def of the BL).
|
||||
virtual void markPhysRegUsed(unsigned PhysReg) = 0;
|
||||
|
||||
uint64_t StackUsed;
|
||||
};
|
||||
|
||||
struct FormalArgHandler : public IncomingArgHandler {
|
||||
FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
|
||||
CCAssignFn *AssignFn)
|
||||
: IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
|
||||
|
||||
void markPhysRegUsed(unsigned PhysReg) override {
|
||||
MIRBuilder.getMBB().addLiveIn(PhysReg);
|
||||
}
|
||||
};
|
||||
|
||||
struct CallReturnHandler : public IncomingArgHandler {
|
||||
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
|
||||
MachineInstrBuilder MIB, CCAssignFn *AssignFn)
|
||||
: IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
|
||||
|
||||
void markPhysRegUsed(unsigned PhysReg) override {
|
||||
MIB.addDef(PhysReg, RegState::Implicit);
|
||||
}
|
||||
|
||||
MachineInstrBuilder MIB;
|
||||
};
|
||||
|
||||
struct OutgoingArgHandler : public CallLowering::ValueHandler {
|
||||
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
|
||||
MachineInstrBuilder MIB, CCAssignFn *AssignFn,
|
||||
CCAssignFn *AssignFnVarArg)
|
||||
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
|
||||
AssignFnVarArg(AssignFnVarArg), StackSize(0) {}
|
||||
|
||||
unsigned getStackAddress(uint64_t Size, int64_t Offset,
|
||||
MachinePointerInfo &MPO) override {
|
||||
LLT p0 = LLT::pointer(0, 64);
|
||||
LLT s64 = LLT::scalar(64);
|
||||
unsigned SPReg = MRI.createGenericVirtualRegister(p0);
|
||||
MIRBuilder.buildCopy(SPReg, AArch64::SP);
|
||||
|
||||
unsigned OffsetReg = MRI.createGenericVirtualRegister(s64);
|
||||
MIRBuilder.buildConstant(OffsetReg, Offset);
|
||||
|
||||
unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
|
||||
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
|
||||
|
||||
MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
|
||||
return AddrReg;
|
||||
}
|
||||
|
||||
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
|
||||
CCValAssign &VA) override {
|
||||
MIB.addUse(PhysReg, RegState::Implicit);
|
||||
unsigned ExtReg = extendRegister(ValVReg, VA);
|
||||
MIRBuilder.buildCopy(PhysReg, ExtReg);
|
||||
}
|
||||
|
||||
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
|
||||
MachinePointerInfo &MPO, CCValAssign &VA) override {
|
||||
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
|
||||
MPO, MachineMemOperand::MOStore, Size, 0);
|
||||
MIRBuilder.buildStore(ValVReg, Addr, *MMO);
|
||||
}
|
||||
|
||||
bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
CCValAssign::LocInfo LocInfo,
|
||||
const CallLowering::ArgInfo &Info,
|
||||
CCState &State) override {
|
||||
bool Res;
|
||||
if (Info.IsFixed)
|
||||
Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
|
||||
else
|
||||
Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
|
||||
|
||||
StackSize = State.getNextStackOffset();
|
||||
return Res;
|
||||
}
|
||||
|
||||
MachineInstrBuilder MIB;
|
||||
CCAssignFn *AssignFnVarArg;
|
||||
uint64_t StackSize;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void AArch64CallLowering::splitToValueTypes(
|
||||
const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
|
||||
const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
|
||||
const SplitArgTy &PerformArgSplit) const {
|
||||
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
|
||||
LLVMContext &Ctx = OrigArg.Ty->getContext();
|
||||
|
||||
SmallVector<EVT, 4> SplitVTs;
|
||||
SmallVector<uint64_t, 4> Offsets;
|
||||
ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
|
||||
|
||||
if (SplitVTs.size() == 1) {
|
||||
// No splitting to do, but we want to replace the original type (e.g. [1 x
|
||||
// double] -> double).
|
||||
SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
|
||||
OrigArg.Flags, OrigArg.IsFixed);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned FirstRegIdx = SplitArgs.size();
|
||||
bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
|
||||
OrigArg.Ty, CallConv, false);
|
||||
for (auto SplitVT : SplitVTs) {
|
||||
Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
|
||||
SplitArgs.push_back(
|
||||
ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
|
||||
SplitTy, OrigArg.Flags, OrigArg.IsFixed});
|
||||
if (NeedsRegBlock)
|
||||
SplitArgs.back().Flags.setInConsecutiveRegs();
|
||||
}
|
||||
|
||||
SplitArgs.back().Flags.setInConsecutiveRegsLast();
|
||||
|
||||
for (unsigned i = 0; i < Offsets.size(); ++i)
|
||||
PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
|
||||
}
|
||||
|
||||
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
|
||||
const Value *Val, unsigned VReg) const {
|
||||
MachineFunction &MF = MIRBuilder.getMF();
|
||||
const Function &F = MF.getFunction();
|
||||
|
||||
auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
|
||||
assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
|
||||
bool Success = true;
|
||||
if (VReg) {
|
||||
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
|
||||
CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
auto &DL = F.getParent()->getDataLayout();
|
||||
|
||||
ArgInfo OrigArg{VReg, Val->getType()};
|
||||
setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
|
||||
|
||||
SmallVector<ArgInfo, 8> SplitArgs;
|
||||
splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
|
||||
[&](unsigned Reg, uint64_t Offset) {
|
||||
MIRBuilder.buildExtract(Reg, VReg, Offset);
|
||||
});
|
||||
|
||||
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
|
||||
Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
|
||||
}
|
||||
|
||||
MIRBuilder.insertInstr(MIB);
|
||||
return Success;
|
||||
}
|
||||
|
||||
bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
|
||||
const Function &F,
|
||||
ArrayRef<unsigned> VRegs) const {
|
||||
MachineFunction &MF = MIRBuilder.getMF();
|
||||
MachineBasicBlock &MBB = MIRBuilder.getMBB();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
auto &DL = F.getParent()->getDataLayout();
|
||||
|
||||
SmallVector<ArgInfo, 8> SplitArgs;
|
||||
unsigned i = 0;
|
||||
for (auto &Arg : F.args()) {
|
||||
if (DL.getTypeStoreSize(Arg.getType()) == 0)
|
||||
continue;
|
||||
ArgInfo OrigArg{VRegs[i], Arg.getType()};
|
||||
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
|
||||
bool Split = false;
|
||||
LLT Ty = MRI.getType(VRegs[i]);
|
||||
unsigned Dst = VRegs[i];
|
||||
|
||||
splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
|
||||
[&](unsigned Reg, uint64_t Offset) {
|
||||
if (!Split) {
|
||||
Split = true;
|
||||
Dst = MRI.createGenericVirtualRegister(Ty);
|
||||
MIRBuilder.buildUndef(Dst);
|
||||
}
|
||||
unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
|
||||
MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
|
||||
Dst = Tmp;
|
||||
});
|
||||
|
||||
if (Dst != VRegs[i])
|
||||
MIRBuilder.buildCopy(VRegs[i], Dst);
|
||||
++i;
|
||||
}
|
||||
|
||||
if (!MBB.empty())
|
||||
MIRBuilder.setInstr(*MBB.begin());
|
||||
|
||||
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
|
||||
CCAssignFn *AssignFn =
|
||||
TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
|
||||
|
||||
FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
|
||||
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
|
||||
return false;
|
||||
|
||||
if (F.isVarArg()) {
|
||||
if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
|
||||
// FIXME: we need to reimplement saveVarArgsRegisters from
|
||||
// AArch64ISelLowering.
|
||||
return false;
|
||||
}
|
||||
|
||||
// We currently pass all varargs at 8-byte alignment.
|
||||
uint64_t StackOffset = alignTo(Handler.StackUsed, 8);
|
||||
|
||||
auto &MFI = MIRBuilder.getMF().getFrameInfo();
|
||||
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
|
||||
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
|
||||
}
|
||||
|
||||
// Move back to the end of the basic block.
|
||||
MIRBuilder.setMBB(MBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
|
||||
CallingConv::ID CallConv,
|
||||
const MachineOperand &Callee,
|
||||
const ArgInfo &OrigRet,
|
||||
ArrayRef<ArgInfo> OrigArgs) const {
|
||||
MachineFunction &MF = MIRBuilder.getMF();
|
||||
const Function &F = MF.getFunction();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
auto &DL = F.getParent()->getDataLayout();
|
||||
|
||||
SmallVector<ArgInfo, 8> SplitArgs;
|
||||
for (auto &OrigArg : OrigArgs) {
|
||||
splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv,
|
||||
[&](unsigned Reg, uint64_t Offset) {
|
||||
MIRBuilder.buildExtract(Reg, OrigArg.Reg, Offset);
|
||||
});
|
||||
}
|
||||
|
||||
// Find out which ABI gets to decide where things go.
|
||||
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
|
||||
CCAssignFn *AssignFnFixed =
|
||||
TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
|
||||
CCAssignFn *AssignFnVarArg =
|
||||
TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/true);
|
||||
|
||||
auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
|
||||
|
||||
// Create a temporarily-floating call instruction so we can add the implicit
|
||||
// uses of arg registers.
|
||||
auto MIB = MIRBuilder.buildInstrNoInsert(Callee.isReg() ? AArch64::BLR
|
||||
: AArch64::BL);
|
||||
MIB.add(Callee);
|
||||
|
||||
// Tell the call which registers are clobbered.
|
||||
auto TRI = MF.getSubtarget().getRegisterInfo();
|
||||
MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
|
||||
|
||||
// Do the actual argument marshalling.
|
||||
SmallVector<unsigned, 8> PhysRegs;
|
||||
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
|
||||
AssignFnVarArg);
|
||||
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
|
||||
return false;
|
||||
|
||||
// Now we can add the actual call instruction to the correct basic block.
|
||||
MIRBuilder.insertInstr(MIB);
|
||||
|
||||
// If Callee is a reg, since it is used by a target specific
|
||||
// instruction, it must have a register class matching the
|
||||
// constraint of that instruction.
|
||||
if (Callee.isReg())
|
||||
MIB->getOperand(0).setReg(constrainOperandRegClass(
|
||||
MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
|
||||
*MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(),
|
||||
Callee.getReg(), 0));
|
||||
|
||||
// Finally we can copy the returned value back into its virtual-register. In
|
||||
// symmetry with the arugments, the physical register must be an
|
||||
// implicit-define of the call instruction.
|
||||
CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
|
||||
if (OrigRet.Reg) {
|
||||
SplitArgs.clear();
|
||||
|
||||
SmallVector<uint64_t, 8> RegOffsets;
|
||||
SmallVector<unsigned, 8> SplitRegs;
|
||||
splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv(),
|
||||
[&](unsigned Reg, uint64_t Offset) {
|
||||
RegOffsets.push_back(Offset);
|
||||
SplitRegs.push_back(Reg);
|
||||
});
|
||||
|
||||
CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
|
||||
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
|
||||
return false;
|
||||
|
||||
if (!RegOffsets.empty())
|
||||
MIRBuilder.buildSequence(OrigRet.Reg, SplitRegs, RegOffsets);
|
||||
}
|
||||
|
||||
CallSeqStart.addImm(Handler.StackSize).addImm(0);
|
||||
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
|
||||
.addImm(Handler.StackSize)
|
||||
.addImm(0);
|
||||
|
||||
return true;
|
||||
}
|
@ -1,65 +0,0 @@
|
||||
//===- AArch64CallLowering.h - Call lowering --------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// This file describes how to lower LLVM calls to machine code calls.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
|
||||
#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AArch64TargetLowering;
|
||||
class CCValAssign;
|
||||
class DataLayout;
|
||||
class MachineIRBuilder;
|
||||
class MachineRegisterInfo;
|
||||
class Type;
|
||||
|
||||
class AArch64CallLowering: public CallLowering {
|
||||
public:
|
||||
AArch64CallLowering(const AArch64TargetLowering &TLI);
|
||||
|
||||
bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
|
||||
unsigned VReg) const override;
|
||||
|
||||
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
|
||||
ArrayRef<unsigned> VRegs) const override;
|
||||
|
||||
bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
|
||||
const MachineOperand &Callee, const ArgInfo &OrigRet,
|
||||
ArrayRef<ArgInfo> OrigArgs) const override;
|
||||
|
||||
private:
|
||||
using RegHandler = std::function<void(MachineIRBuilder &, Type *, unsigned,
|
||||
CCValAssign &)>;
|
||||
|
||||
using MemHandler =
|
||||
std::function<void(MachineIRBuilder &, int, CCValAssign &)>;
|
||||
|
||||
using SplitArgTy = std::function<void(unsigned, uint64_t)>;
|
||||
|
||||
void splitToValueTypes(const ArgInfo &OrigArgInfo,
|
||||
SmallVectorImpl<ArgInfo> &SplitArgs,
|
||||
const DataLayout &DL, MachineRegisterInfo &MRI,
|
||||
CallingConv::ID CallConv,
|
||||
const SplitArgTy &SplitArg) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
|
@ -1,139 +0,0 @@
|
||||
//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the custom routines for the AArch64 Calling Convention
|
||||
// that aren't done by tablegen.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
|
||||
#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
|
||||
|
||||
#include "AArch64.h"
|
||||
#include "AArch64InstrInfo.h"
|
||||
#include "AArch64Subtarget.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
|
||||
namespace {
|
||||
using namespace llvm;
|
||||
|
||||
static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
|
||||
AArch64::X3, AArch64::X4, AArch64::X5,
|
||||
AArch64::X6, AArch64::X7};
|
||||
static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
|
||||
AArch64::H3, AArch64::H4, AArch64::H5,
|
||||
AArch64::H6, AArch64::H7};
|
||||
static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
|
||||
AArch64::S3, AArch64::S4, AArch64::S5,
|
||||
AArch64::S6, AArch64::S7};
|
||||
static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
|
||||
AArch64::D3, AArch64::D4, AArch64::D5,
|
||||
AArch64::D6, AArch64::D7};
|
||||
static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
|
||||
AArch64::Q3, AArch64::Q4, AArch64::Q5,
|
||||
AArch64::Q6, AArch64::Q7};
|
||||
|
||||
static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
|
||||
MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
|
||||
CCState &State, unsigned SlotAlign) {
|
||||
unsigned Size = LocVT.getSizeInBits() / 8;
|
||||
unsigned StackAlign =
|
||||
State.getMachineFunction().getDataLayout().getStackAlignment();
|
||||
unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
|
||||
|
||||
for (auto &It : PendingMembers) {
|
||||
It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
|
||||
State.addLoc(It);
|
||||
SlotAlign = 1;
|
||||
}
|
||||
|
||||
// All pending members have now been allocated
|
||||
PendingMembers.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
|
||||
/// [N x Ty] type must still be contiguous in memory though.
|
||||
static bool CC_AArch64_Custom_Stack_Block(
|
||||
unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
|
||||
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
|
||||
SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
|
||||
|
||||
// Add the argument to the list to be allocated once we know the size of the
|
||||
// block.
|
||||
PendingMembers.push_back(
|
||||
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
|
||||
|
||||
if (!ArgFlags.isInConsecutiveRegsLast())
|
||||
return true;
|
||||
|
||||
return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
|
||||
}
|
||||
|
||||
/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
|
||||
/// registers. If no such sequence is available, mark the rest of the registers
|
||||
/// of that type as used and place the argument on the stack.
|
||||
static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||
CCValAssign::LocInfo &LocInfo,
|
||||
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
|
||||
// Try to allocate a contiguous block of registers, each of the correct
|
||||
// size to hold one member.
|
||||
ArrayRef<MCPhysReg> RegList;
|
||||
if (LocVT.SimpleTy == MVT::i64)
|
||||
RegList = XRegList;
|
||||
else if (LocVT.SimpleTy == MVT::f16)
|
||||
RegList = HRegList;
|
||||
else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
|
||||
RegList = SRegList;
|
||||
else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
|
||||
RegList = DRegList;
|
||||
else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
|
||||
RegList = QRegList;
|
||||
else {
|
||||
// Not an array we want to split up after all.
|
||||
return false;
|
||||
}
|
||||
|
||||
SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
|
||||
|
||||
// Add the argument to the list to be allocated once we know the size of the
|
||||
// block.
|
||||
PendingMembers.push_back(
|
||||
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
|
||||
|
||||
if (!ArgFlags.isInConsecutiveRegsLast())
|
||||
return true;
|
||||
|
||||
unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
|
||||
if (RegResult) {
|
||||
for (auto &It : PendingMembers) {
|
||||
It.convertToReg(RegResult);
|
||||
State.addLoc(It);
|
||||
++RegResult;
|
||||
}
|
||||
PendingMembers.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Mark all regs in the class as unavailable
|
||||
for (auto Reg : RegList)
|
||||
State.AllocateReg(Reg);
|
||||
|
||||
const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
|
||||
State.getMachineFunction().getSubtarget());
|
||||
unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
|
||||
|
||||
return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,368 +0,0 @@
|
||||
//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This describes the calling conventions for AArch64 architecture.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// CCIfAlign - Match of the original alignment of the arg
|
||||
class CCIfAlign<string Align, CCAction A> :
|
||||
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
|
||||
/// CCIfBigEndian - Match only if we're in big endian mode.
|
||||
class CCIfBigEndian<CCAction A> :
|
||||
CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM AAPCS64 Calling Convention
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def CC_AArch64_AAPCS : CallingConv<[
|
||||
CCIfType<[iPTR], CCBitConvertToType<i64>>,
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
|
||||
|
||||
// Big endian vectors must be passed as if they were 1-element vectors so that
|
||||
// their lanes are in a consistent order.
|
||||
CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
|
||||
CCBitConvertToType<f64>>>,
|
||||
CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
|
||||
CCBitConvertToType<f128>>>,
|
||||
|
||||
// An SRet is passed in X8, not X0 like a normal pointer parameter.
|
||||
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
|
||||
|
||||
// Put ByVal arguments directly on the stack. Minimum size and alignment of a
|
||||
// slot is 64-bit.
|
||||
CCIfByVal<CCPassByVal<8, 8>>,
|
||||
|
||||
// The 'nest' parameter, if any, is passed in X18.
|
||||
// Darwin uses X18 as the platform register and hence 'nest' isn't currently
|
||||
// supported there.
|
||||
CCIfNest<CCAssignToReg<[X18]>>,
|
||||
|
||||
// Pass SwiftSelf in a callee saved register.
|
||||
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
|
||||
|
||||
// A SwiftError is passed in X21.
|
||||
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
|
||||
|
||||
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
|
||||
|
||||
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
|
||||
// up to eight each of GPR and FPR.
|
||||
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
// i128 is split to two i64s, we can't fit half to register X7.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
|
||||
[X0, X1, X3, X5]>>>,
|
||||
|
||||
// i128 is split to two i64s, and its stack alignment is 16 bytes.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
|
||||
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
|
||||
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
|
||||
// If more than will fit in registers, pass them on the stack instead.
|
||||
CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>,
|
||||
CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
|
||||
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
|
||||
CCAssignToStack<8, 8>>,
|
||||
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
|
||||
def RetCC_AArch64_AAPCS : CallingConv<[
|
||||
CCIfType<[iPTR], CCBitConvertToType<i64>>,
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
|
||||
|
||||
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
|
||||
|
||||
// Big endian vectors must be passed as if they were 1-element vectors so that
|
||||
// their lanes are in a consistent order.
|
||||
CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
|
||||
CCBitConvertToType<f64>>>,
|
||||
CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
|
||||
CCBitConvertToType<f128>>>,
|
||||
|
||||
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
|
||||
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
|
||||
]>;
|
||||
|
||||
// Vararg functions on windows pass floats in integer registers
|
||||
def CC_AArch64_Win64_VarArg : CallingConv<[
|
||||
CCIfType<[f16, f32], CCPromoteToType<f64>>,
|
||||
CCIfType<[f64], CCBitConvertToType<i64>>,
|
||||
CCDelegateTo<CC_AArch64_AAPCS>
|
||||
]>;
|
||||
|
||||
|
||||
// Darwin uses a calling convention which differs in only two ways
|
||||
// from the standard one at this level:
|
||||
// + i128s (i.e. split i64s) don't need even registers.
|
||||
// + Stack slots are sized as needed rather than being at least 64-bit.
|
||||
def CC_AArch64_DarwinPCS : CallingConv<[
|
||||
CCIfType<[iPTR], CCBitConvertToType<i64>>,
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
|
||||
|
||||
// An SRet is passed in X8, not X0 like a normal pointer parameter.
|
||||
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
|
||||
|
||||
// Put ByVal arguments directly on the stack. Minimum size and alignment of a
|
||||
// slot is 64-bit.
|
||||
CCIfByVal<CCPassByVal<8, 8>>,
|
||||
|
||||
// Pass SwiftSelf in a callee saved register.
|
||||
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
|
||||
|
||||
// A SwiftError is passed in X21.
|
||||
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
|
||||
|
||||
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
|
||||
|
||||
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
|
||||
// up to eight each of GPR and FPR.
|
||||
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
// i128 is split to two i64s, we can't fit half to register X7.
|
||||
CCIfType<[i64],
|
||||
CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6],
|
||||
[W0, W1, W2, W3, W4, W5, W6]>>>,
|
||||
// i128 is split to two i64s, and its stack alignment is 16 bytes.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
|
||||
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
|
||||
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
|
||||
// If more than will fit in registers, pass them on the stack instead.
|
||||
CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
|
||||
CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
|
||||
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
|
||||
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
|
||||
CCAssignToStack<8, 8>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
|
||||
def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
|
||||
CCIfType<[iPTR], CCBitConvertToType<i64>>,
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
|
||||
|
||||
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Stack_Block">>,
|
||||
|
||||
// Handle all scalar types as either i64 or f64.
|
||||
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
|
||||
CCIfType<[f16, f32], CCPromoteToType<f64>>,
|
||||
|
||||
// Everything is on the stack.
|
||||
// i128 is split to two i64s, and its stack alignment is 16 bytes.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
|
||||
CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
|
||||
CCAssignToStack<8, 8>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
|
||||
// The WebKit_JS calling convention only passes the first argument (the callee)
|
||||
// in register and the remaining arguments on stack. We allow 32bit stack slots,
|
||||
// so that WebKit can write partial values in the stack and define the other
|
||||
// 32bit quantity as undef.
|
||||
def CC_AArch64_WebKit_JS : CallingConv<[
|
||||
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
|
||||
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
|
||||
|
||||
// Pass the remaining arguments on the stack instead.
|
||||
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
|
||||
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
|
||||
]>;
|
||||
|
||||
def RetCC_AArch64_WebKit_JS : CallingConv<[
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM64 Calling Convention for GHC
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// This calling convention is specific to the Glasgow Haskell Compiler.
|
||||
// The only documentation is the GHC source code, specifically the C header
|
||||
// file:
|
||||
//
|
||||
// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h
|
||||
//
|
||||
// which defines the registers for the Spineless Tagless G-Machine (STG) that
|
||||
// GHC uses to implement lazy evaluation. The generic STG machine has a set of
|
||||
// registers which are mapped to appropriate set of architecture specific
|
||||
// registers for each CPU architecture.
|
||||
//
|
||||
// The STG Machine is documented here:
|
||||
//
|
||||
// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode
|
||||
//
|
||||
// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
|
||||
// register mapping".
|
||||
|
||||
def CC_AArch64_GHC : CallingConv<[
|
||||
CCIfType<[iPTR], CCBitConvertToType<i64>>,
|
||||
|
||||
// Handle all vector types as either f64 or v2f64.
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>,
|
||||
|
||||
CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
|
||||
CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>,
|
||||
CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>,
|
||||
|
||||
// Promote i8/i16/i32 arguments to i64.
|
||||
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
|
||||
|
||||
// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
|
||||
CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
|
||||
]>;
|
||||
|
||||
// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
|
||||
// presumably a callee to someone. External functions may not do so, but this
|
||||
// is currently safe since BL has LR as an implicit-def and what happens after a
|
||||
// tail call doesn't matter.
|
||||
//
|
||||
// It would be better to model its preservation semantics properly (create a
|
||||
// vreg on entry, use it in RET & tail call generation; make that vreg def if we
|
||||
// end up saving LR as part of a call frame). Watch this space...
|
||||
def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
|
||||
X23, X24, X25, X26, X27, X28,
|
||||
D8, D9, D10, D11,
|
||||
D12, D13, D14, D15)>;
|
||||
|
||||
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
|
||||
// 'this' and the pointer return value are both passed in X0 in these cases,
|
||||
// this can be partially modelled by treating X0 as a callee-saved register;
|
||||
// only the resulting RegMask is used; the SaveList is ignored
|
||||
//
|
||||
// (For generic ARM 64-bit ABI code, clang will not generate constructors or
|
||||
// destructors with 'this' returns, so this RegMask will not be used in that
|
||||
// case)
|
||||
def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
|
||||
|
||||
def CSR_AArch64_AAPCS_SwiftError
|
||||
: CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
|
||||
|
||||
// The function used by Darwin to obtain the address of a thread-local variable
|
||||
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
|
||||
// fast path for calculation, but other registers except X0 (argument/return)
|
||||
// and LR (it is a call, after all) are preserved.
|
||||
def CSR_AArch64_TLS_Darwin
|
||||
: CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
|
||||
FP,
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
||||
// We can only handle a register pair with adjacent registers, the register pair
|
||||
// should belong to the same class as well. Since the access function on the
|
||||
// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
|
||||
// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
|
||||
def CSR_AArch64_CXX_TLS_Darwin
|
||||
: CalleeSavedRegs<(add CSR_AArch64_AAPCS,
|
||||
(sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
|
||||
(sequence "D%u", 0, 31))>;
|
||||
|
||||
// CSRs that are handled by prologue, epilogue.
|
||||
def CSR_AArch64_CXX_TLS_Darwin_PE
|
||||
: CalleeSavedRegs<(add LR, FP)>;
|
||||
|
||||
// CSRs that are handled explicitly via copies.
|
||||
def CSR_AArch64_CXX_TLS_Darwin_ViaCopy
|
||||
: CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>;
|
||||
|
||||
// The ELF stub used for TLS-descriptor access saves every feasible
|
||||
// register. Only X0 and LR are clobbered.
|
||||
def CSR_AArch64_TLS_ELF
|
||||
: CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP,
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
||||
def CSR_AArch64_AllRegs
|
||||
: CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP,
|
||||
(sequence "X%u", 0, 28), FP, LR, SP,
|
||||
(sequence "B%u", 0, 31), (sequence "H%u", 0, 31),
|
||||
(sequence "S%u", 0, 31), (sequence "D%u", 0, 31),
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
||||
def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>;
|
||||
|
||||
def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
|
||||
(sequence "X%u", 9, 15))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AARCH64 Mono calling conventions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def CC_AArch64_Mono_DarwinPCS : CallingConv<[
|
||||
|
||||
// Mono marks the parameter it wants to pass in this non-abi register with
|
||||
// the 'inreg' attribute.
|
||||
CCIfInReg<CCAssignToReg<[X15]>>,
|
||||
|
||||
CCDelegateTo<CC_AArch64_DarwinPCS>
|
||||
]>;
|
||||
|
||||
def CC_AArch64_Mono_AAPCS : CallingConv<[
|
||||
|
||||
// Mono marks the parameter it wants to pass in this non-abi register with
|
||||
// the 'inreg' attribute.
|
||||
CCIfInReg<CCAssignToReg<[X15]>>,
|
||||
|
||||
CCDelegateTo<CC_AArch64_AAPCS>
|
||||
]>;
|
@ -1,150 +0,0 @@
|
||||
//===-- AArch64CleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Local-dynamic access to thread-local variables proceeds in three stages.
|
||||
//
|
||||
// 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated
|
||||
// in much the same way as a general-dynamic TLS-descriptor access against
|
||||
// the special symbol _TLS_MODULE_BASE.
|
||||
// 2. The variable's offset from _TLS_MODULE_BASE_ is calculated using
|
||||
// instructions with "dtprel" modifiers.
|
||||
// 3. These two are added, together with TPIDR_EL0, to obtain the variable's
|
||||
// true address.
|
||||
//
|
||||
// This is only better than general-dynamic access to the variable if two or
|
||||
// more of the first stage TLS-descriptor calculations can be combined. This
|
||||
// pass looks through a function and performs such combinations.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "AArch64.h"
|
||||
#include "AArch64InstrInfo.h"
|
||||
#include "AArch64MachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define TLSCLEANUP_PASS_NAME "AArch64 Local Dynamic TLS Access Clean-up"
|
||||
|
||||
namespace {
|
||||
struct LDTLSCleanup : public MachineFunctionPass {
|
||||
static char ID;
|
||||
LDTLSCleanup() : MachineFunctionPass(ID) {
|
||||
initializeLDTLSCleanupPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||
if (AFI->getNumLocalDynamicTLSAccesses() < 2) {
|
||||
// No point folding accesses if there isn't at least two.
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
|
||||
return VisitNode(DT->getRootNode(), 0);
|
||||
}
|
||||
|
||||
// Visit the dominator subtree rooted at Node in pre-order.
|
||||
// If TLSBaseAddrReg is non-null, then use that to replace any
|
||||
// TLS_base_addr instructions. Otherwise, create the register
|
||||
// when the first such instruction is seen, and then use it
|
||||
// as we encounter more instructions.
|
||||
bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
|
||||
MachineBasicBlock *BB = Node->getBlock();
|
||||
bool Changed = false;
|
||||
|
||||
// Traverse the current block.
|
||||
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
|
||||
++I) {
|
||||
switch (I->getOpcode()) {
|
||||
case AArch64::TLSDESC_CALLSEQ:
|
||||
// Make sure it's a local dynamic access.
|
||||
if (!I->getOperand(0).isSymbol() ||
|
||||
strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
|
||||
break;
|
||||
|
||||
if (TLSBaseAddrReg)
|
||||
I = replaceTLSBaseAddrCall(*I, TLSBaseAddrReg);
|
||||
else
|
||||
I = setRegister(*I, &TLSBaseAddrReg);
|
||||
Changed = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Visit the children of this block in the dominator tree.
|
||||
for (MachineDomTreeNode *N : *Node) {
|
||||
Changed |= VisitNode(N, TLSBaseAddrReg);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// Replace the TLS_base_addr instruction I with a copy from
|
||||
// TLSBaseAddrReg, returning the new instruction.
|
||||
MachineInstr *replaceTLSBaseAddrCall(MachineInstr &I,
|
||||
unsigned TLSBaseAddrReg) {
|
||||
MachineFunction *MF = I.getParent()->getParent();
|
||||
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
||||
|
||||
// Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
|
||||
// code sequence assumes the address will be.
|
||||
MachineInstr *Copy = BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), AArch64::X0)
|
||||
.addReg(TLSBaseAddrReg);
|
||||
|
||||
// Erase the TLS_base_addr instruction.
|
||||
I.eraseFromParent();
|
||||
|
||||
return Copy;
|
||||
}
|
||||
|
||||
// Create a virtual register in *TLSBaseAddrReg, and populate it by
|
||||
// inserting a copy instruction after I. Returns the new instruction.
|
||||
MachineInstr *setRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
|
||||
MachineFunction *MF = I.getParent()->getParent();
|
||||
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
||||
|
||||
// Create a virtual register for the TLS base address.
|
||||
MachineRegisterInfo &RegInfo = MF->getRegInfo();
|
||||
*TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
|
||||
// Insert a copy from X0 to TLSBaseAddrReg for later.
|
||||
MachineInstr *Copy =
|
||||
BuildMI(*I.getParent(), ++I.getIterator(), I.getDebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
|
||||
.addReg(AArch64::X0);
|
||||
|
||||
return Copy;
|
||||
}
|
||||
|
||||
StringRef getPassName() const override { return TLSCLEANUP_PASS_NAME; }
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
INITIALIZE_PASS(LDTLSCleanup, "aarch64-local-dynamic-tls-cleanup",
|
||||
TLSCLEANUP_PASS_NAME, false, false)
|
||||
|
||||
char LDTLSCleanup::ID = 0;
|
||||
FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() {
|
||||
return new LDTLSCleanup();
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,338 +0,0 @@
|
||||
//===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
/// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions
|
||||
/// into a conditional branch (B.cond), when the NZCV flags can be set for
|
||||
/// "free". This is preferred on targets that have more flexibility when
|
||||
/// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming
|
||||
/// all other variables are equal). This can also reduce register pressure.
|
||||
///
|
||||
/// A few examples:
|
||||
///
|
||||
/// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
|
||||
/// cbz w8, .LBB_2 -> b.eq .LBB0_2
|
||||
///
|
||||
/// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
|
||||
/// cbz w8, .LBB1_2 -> b.eq .LBB1_2
|
||||
///
|
||||
/// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
|
||||
/// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64.h"
|
||||
#include "AArch64Subtarget.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "aarch64-cond-br-tuning"
|
||||
#define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning"
|
||||
|
||||
namespace {
|
||||
class AArch64CondBrTuning : public MachineFunctionPass {
|
||||
const AArch64InstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
|
||||
MachineRegisterInfo *MRI;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
AArch64CondBrTuning() : MachineFunctionPass(ID) {
|
||||
initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; }
|
||||
|
||||
private:
|
||||
MachineInstr *getOperandDef(const MachineOperand &MO);
|
||||
MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting);
|
||||
MachineInstr *convertToCondBr(MachineInstr &MI);
|
||||
bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI);
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
char AArch64CondBrTuning::ID = 0;
|
||||
|
||||
INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning",
|
||||
AARCH64_CONDBR_TUNING_NAME, false, false)
|
||||
|
||||
void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) {
|
||||
if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
|
||||
return nullptr;
|
||||
return MRI->getUniqueVRegDef(MO.getReg());
|
||||
}
|
||||
|
||||
MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
|
||||
bool IsFlagSetting) {
|
||||
// If this is already the flag setting version of the instruction (e.g., SUBS)
|
||||
// just make sure the implicit-def of NZCV isn't marked dead.
|
||||
if (IsFlagSetting) {
|
||||
for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands();
|
||||
I != E; ++I) {
|
||||
MachineOperand &MO = MI.getOperand(I);
|
||||
if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV)
|
||||
MO.setIsDead(false);
|
||||
}
|
||||
return &MI;
|
||||
}
|
||||
bool Is64Bit;
|
||||
unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit);
|
||||
unsigned NewDestReg = MI.getOperand(0).getReg();
|
||||
if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg()))
|
||||
NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
|
||||
TII->get(NewOpc), NewDestReg);
|
||||
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
|
||||
MIB.add(MI.getOperand(I));
|
||||
|
||||
return MIB;
|
||||
}
|
||||
|
||||
MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) {
|
||||
AArch64CC::CondCode CC;
|
||||
MachineBasicBlock *TargetMBB = TII->getBranchDestBlock(MI);
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected opcode!");
|
||||
|
||||
case AArch64::CBZW:
|
||||
case AArch64::CBZX:
|
||||
CC = AArch64CC::EQ;
|
||||
break;
|
||||
case AArch64::CBNZW:
|
||||
case AArch64::CBNZX:
|
||||
CC = AArch64CC::NE;
|
||||
break;
|
||||
case AArch64::TBZW:
|
||||
case AArch64::TBZX:
|
||||
CC = AArch64CC::PL;
|
||||
break;
|
||||
case AArch64::TBNZW:
|
||||
case AArch64::TBNZX:
|
||||
CC = AArch64CC::MI;
|
||||
break;
|
||||
}
|
||||
return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc))
|
||||
.addImm(CC)
|
||||
.addMBB(TargetMBB);
|
||||
}
|
||||
|
||||
bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
|
||||
MachineInstr &DefMI) {
|
||||
// We don't want NZCV bits live across blocks.
|
||||
if (MI.getParent() != DefMI.getParent())
|
||||
return false;
|
||||
|
||||
bool IsFlagSetting = true;
|
||||
unsigned MIOpc = MI.getOpcode();
|
||||
MachineInstr *NewCmp = nullptr, *NewBr = nullptr;
|
||||
switch (DefMI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case AArch64::ADDWri:
|
||||
case AArch64::ADDWrr:
|
||||
case AArch64::ADDWrs:
|
||||
case AArch64::ADDWrx:
|
||||
case AArch64::ANDWri:
|
||||
case AArch64::ANDWrr:
|
||||
case AArch64::ANDWrs:
|
||||
case AArch64::BICWrr:
|
||||
case AArch64::BICWrs:
|
||||
case AArch64::SUBWri:
|
||||
case AArch64::SUBWrr:
|
||||
case AArch64::SUBWrs:
|
||||
case AArch64::SUBWrx:
|
||||
IsFlagSetting = false;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AArch64::ADDSWri:
|
||||
case AArch64::ADDSWrr:
|
||||
case AArch64::ADDSWrs:
|
||||
case AArch64::ADDSWrx:
|
||||
case AArch64::ANDSWri:
|
||||
case AArch64::ANDSWrr:
|
||||
case AArch64::ANDSWrs:
|
||||
case AArch64::BICSWrr:
|
||||
case AArch64::BICSWrs:
|
||||
case AArch64::SUBSWri:
|
||||
case AArch64::SUBSWrr:
|
||||
case AArch64::SUBSWrs:
|
||||
case AArch64::SUBSWrx:
|
||||
switch (MIOpc) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected opcode!");
|
||||
|
||||
case AArch64::CBZW:
|
||||
case AArch64::CBNZW:
|
||||
case AArch64::TBZW:
|
||||
case AArch64::TBNZW:
|
||||
// Check to see if the TBZ/TBNZ is checking the sign bit.
|
||||
if ((MIOpc == AArch64::TBZW || MIOpc == AArch64::TBNZW) &&
|
||||
MI.getOperand(1).getImm() != 31)
|
||||
return false;
|
||||
|
||||
// There must not be any instruction between DefMI and MI that clobbers or
|
||||
// reads NZCV.
|
||||
MachineBasicBlock::iterator I(DefMI), E(MI);
|
||||
for (I = std::next(I); I != E; ++I) {
|
||||
if (I->modifiesRegister(AArch64::NZCV, TRI) ||
|
||||
I->readsRegister(AArch64::NZCV, TRI))
|
||||
return false;
|
||||
}
|
||||
DEBUG(dbgs() << " Replacing instructions:\n ");
|
||||
DEBUG(DefMI.print(dbgs()));
|
||||
DEBUG(dbgs() << " ");
|
||||
DEBUG(MI.print(dbgs()));
|
||||
|
||||
NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
|
||||
NewBr = convertToCondBr(MI);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case AArch64::ADDXri:
|
||||
case AArch64::ADDXrr:
|
||||
case AArch64::ADDXrs:
|
||||
case AArch64::ADDXrx:
|
||||
case AArch64::ANDXri:
|
||||
case AArch64::ANDXrr:
|
||||
case AArch64::ANDXrs:
|
||||
case AArch64::BICXrr:
|
||||
case AArch64::BICXrs:
|
||||
case AArch64::SUBXri:
|
||||
case AArch64::SUBXrr:
|
||||
case AArch64::SUBXrs:
|
||||
case AArch64::SUBXrx:
|
||||
IsFlagSetting = false;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AArch64::ADDSXri:
|
||||
case AArch64::ADDSXrr:
|
||||
case AArch64::ADDSXrs:
|
||||
case AArch64::ADDSXrx:
|
||||
case AArch64::ANDSXri:
|
||||
case AArch64::ANDSXrr:
|
||||
case AArch64::ANDSXrs:
|
||||
case AArch64::BICSXrr:
|
||||
case AArch64::BICSXrs:
|
||||
case AArch64::SUBSXri:
|
||||
case AArch64::SUBSXrr:
|
||||
case AArch64::SUBSXrs:
|
||||
case AArch64::SUBSXrx:
|
||||
switch (MIOpc) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected opcode!");
|
||||
|
||||
case AArch64::CBZX:
|
||||
case AArch64::CBNZX:
|
||||
case AArch64::TBZX:
|
||||
case AArch64::TBNZX: {
|
||||
// Check to see if the TBZ/TBNZ is checking the sign bit.
|
||||
if ((MIOpc == AArch64::TBZX || MIOpc == AArch64::TBNZX) &&
|
||||
MI.getOperand(1).getImm() != 63)
|
||||
return false;
|
||||
// There must not be any instruction between DefMI and MI that clobbers or
|
||||
// reads NZCV.
|
||||
MachineBasicBlock::iterator I(DefMI), E(MI);
|
||||
for (I = std::next(I); I != E; ++I) {
|
||||
if (I->modifiesRegister(AArch64::NZCV, TRI) ||
|
||||
I->readsRegister(AArch64::NZCV, TRI))
|
||||
return false;
|
||||
}
|
||||
DEBUG(dbgs() << " Replacing instructions:\n ");
|
||||
DEBUG(DefMI.print(dbgs()));
|
||||
DEBUG(dbgs() << " ");
|
||||
DEBUG(MI.print(dbgs()));
|
||||
|
||||
NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
|
||||
NewBr = convertToCondBr(MI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
(void)NewCmp; (void)NewBr;
|
||||
assert(NewCmp && NewBr && "Expected new instructions.");
|
||||
|
||||
DEBUG(dbgs() << " with instruction:\n ");
|
||||
DEBUG(NewCmp->print(dbgs()));
|
||||
DEBUG(dbgs() << " ");
|
||||
DEBUG(NewBr->print(dbgs()));
|
||||
|
||||
// If this was a flag setting version of the instruction, we use the original
|
||||
// instruction by just clearing the dead marked on the implicit-def of NCZV.
|
||||
// Therefore, we should not erase this instruction.
|
||||
if (!IsFlagSetting)
|
||||
DefMI.eraseFromParent();
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "********** AArch64 Conditional Branch Tuning **********\n"
|
||||
<< "********** Function: " << MF.getName() << '\n');
|
||||
|
||||
TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
TRI = MF.getSubtarget().getRegisterInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
|
||||
bool Changed = false;
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
bool LocalChange = false;
|
||||
for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(),
|
||||
E = MBB.end();
|
||||
I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
case AArch64::CBZW:
|
||||
case AArch64::CBZX:
|
||||
case AArch64::CBNZW:
|
||||
case AArch64::CBNZX:
|
||||
case AArch64::TBZW:
|
||||
case AArch64::TBZX:
|
||||
case AArch64::TBNZW:
|
||||
case AArch64::TBNZX:
|
||||
MachineInstr *DefMI = getOperandDef(MI.getOperand(0));
|
||||
LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI));
|
||||
break;
|
||||
}
|
||||
// If the optimization was successful, we can't optimize any other
|
||||
// branches because doing so would clobber the NZCV flags.
|
||||
if (LocalChange) {
|
||||
Changed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createAArch64CondBrTuning() {
|
||||
return new AArch64CondBrTuning();
|
||||
}
|
@ -1,449 +0,0 @@
|
||||
//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass tries to make consecutive compares of values use same operands to
|
||||
// allow CSE pass to remove duplicated instructions. For this it analyzes
|
||||
// branches and adjusts comparisons with immediate values by converting:
|
||||
// * GE -> GT
|
||||
// * GT -> GE
|
||||
// * LT -> LE
|
||||
// * LE -> LT
|
||||
// and adjusting immediate values appropriately. It basically corrects two
|
||||
// immediate values towards each other to make them equal.
|
||||
//
|
||||
// Consider the following example in C:
|
||||
//
|
||||
// if ((a < 5 && ...) || (a > 5 && ...)) {
|
||||
// ~~~~~ ~~~~~
|
||||
// ^ ^
|
||||
// x y
|
||||
//
|
||||
// Here both "x" and "y" expressions compare "a" with "5". When "x" evaluates
|
||||
// to "false", "y" can just check flags set by the first comparison. As a
|
||||
// result of the canonicalization employed by
|
||||
// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific
|
||||
// code, assembly ends up in the form that is not CSE friendly:
|
||||
//
|
||||
// ...
|
||||
// cmp w8, #4
|
||||
// b.gt .LBB0_3
|
||||
// ...
|
||||
// .LBB0_3:
|
||||
// cmp w8, #6
|
||||
// b.lt .LBB0_6
|
||||
// ...
|
||||
//
|
||||
// Same assembly after the pass:
|
||||
//
|
||||
// ...
|
||||
// cmp w8, #5
|
||||
// b.ge .LBB0_3
|
||||
// ...
|
||||
// .LBB0_3:
|
||||
// cmp w8, #5 // <-- CSE pass removes this instruction
|
||||
// b.le .LBB0_6
|
||||
// ...
|
||||
//
|
||||
// Currently only SUBS and ADDS followed by b.?? are supported.
|
||||
//
|
||||
// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"
|
||||
// TODO: handle other conditional instructions (e.g. CSET)
|
||||
// TODO: allow second branching to be anything if it doesn't require adjusting
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64.h"
|
||||
#include "MCTargetDesc/AArch64AddressingModes.h"
|
||||
#include "Utils/AArch64BaseInfo.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DepthFirstIterator.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <tuple>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "aarch64-condopt"
|
||||
|
||||
STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted");
|
||||
|
||||
namespace {
|
||||
|
||||
class AArch64ConditionOptimizer : public MachineFunctionPass {
|
||||
const TargetInstrInfo *TII;
|
||||
MachineDominatorTree *DomTree;
|
||||
const MachineRegisterInfo *MRI;
|
||||
|
||||
public:
|
||||
// Stores immediate, compare instruction opcode and branch condition (in this
|
||||
// order) of adjusted comparison.
|
||||
using CmpInfo = std::tuple<int, unsigned, AArch64CC::CondCode>;
|
||||
|
||||
static char ID;
|
||||
|
||||
AArch64ConditionOptimizer() : MachineFunctionPass(ID) {
|
||||
initializeAArch64ConditionOptimizerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
MachineInstr *findSuitableCompare(MachineBasicBlock *MBB);
|
||||
CmpInfo adjustCmp(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);
|
||||
void modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info);
|
||||
bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,
|
||||
int ToImm);
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return "AArch64 Condition Optimizer";
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char AArch64ConditionOptimizer::ID = 0;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt",
|
||||
"AArch64 CondOpt Pass", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
||||
INITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt",
|
||||
"AArch64 CondOpt Pass", false, false)
|
||||
|
||||
FunctionPass *llvm::createAArch64ConditionOptimizerPass() {
|
||||
return new AArch64ConditionOptimizer();
|
||||
}
|
||||
|
||||
void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addPreserved<MachineDominatorTree>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
// Finds compare instruction that corresponds to supported types of branching.
|
||||
// Returns the instruction or nullptr on failures or detecting unsupported
|
||||
// instructions.
|
||||
MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
|
||||
MachineBasicBlock *MBB) {
|
||||
MachineBasicBlock::iterator I = MBB->getFirstTerminator();
|
||||
if (I == MBB->end())
|
||||
return nullptr;
|
||||
|
||||
if (I->getOpcode() != AArch64::Bcc)
|
||||
return nullptr;
|
||||
|
||||
// Since we may modify cmp of this MBB, make sure NZCV does not live out.
|
||||
for (auto SuccBB : MBB->successors())
|
||||
if (SuccBB->isLiveIn(AArch64::NZCV))
|
||||
return nullptr;
|
||||
|
||||
// Now find the instruction controlling the terminator.
|
||||
for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
|
||||
--I;
|
||||
assert(!I->isTerminator() && "Spurious terminator");
|
||||
// Check if there is any use of NZCV between CMP and Bcc.
|
||||
if (I->readsRegister(AArch64::NZCV))
|
||||
return nullptr;
|
||||
switch (I->getOpcode()) {
|
||||
// cmp is an alias for subs with a dead destination register.
|
||||
case AArch64::SUBSWri:
|
||||
case AArch64::SUBSXri:
|
||||
// cmn is an alias for adds with a dead destination register.
|
||||
case AArch64::ADDSWri:
|
||||
case AArch64::ADDSXri: {
|
||||
unsigned ShiftAmt = AArch64_AM::getShiftValue(I->getOperand(3).getImm());
|
||||
if (!I->getOperand(2).isImm()) {
|
||||
DEBUG(dbgs() << "Immediate of cmp is symbolic, " << *I << '\n');
|
||||
return nullptr;
|
||||
} else if (I->getOperand(2).getImm() << ShiftAmt >= 0xfff) {
|
||||
DEBUG(dbgs() << "Immediate of cmp may be out of range, " << *I << '\n');
|
||||
return nullptr;
|
||||
} else if (!MRI->use_empty(I->getOperand(0).getReg())) {
|
||||
DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
|
||||
return nullptr;
|
||||
}
|
||||
return &*I;
|
||||
}
|
||||
// Prevent false positive case like:
|
||||
// cmp w19, #0
|
||||
// cinc w0, w19, gt
|
||||
// ...
|
||||
// fcmp d8, #0.0
|
||||
// b.gt .LBB0_5
|
||||
case AArch64::FCMPDri:
|
||||
case AArch64::FCMPSri:
|
||||
case AArch64::FCMPESri:
|
||||
case AArch64::FCMPEDri:
|
||||
|
||||
case AArch64::SUBSWrr:
|
||||
case AArch64::SUBSXrr:
|
||||
case AArch64::ADDSWrr:
|
||||
case AArch64::ADDSXrr:
|
||||
case AArch64::FCMPSrr:
|
||||
case AArch64::FCMPDrr:
|
||||
case AArch64::FCMPESrr:
|
||||
case AArch64::FCMPEDrr:
|
||||
// Skip comparison instructions without immediate operands.
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB) << '\n');
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Changes opcode adds <-> subs considering register operand width.
|
||||
static int getComplementOpc(int Opc) {
|
||||
switch (Opc) {
|
||||
case AArch64::ADDSWri: return AArch64::SUBSWri;
|
||||
case AArch64::ADDSXri: return AArch64::SUBSXri;
|
||||
case AArch64::SUBSWri: return AArch64::ADDSWri;
|
||||
case AArch64::SUBSXri: return AArch64::ADDSXri;
|
||||
default:
|
||||
llvm_unreachable("Unexpected opcode");
|
||||
}
|
||||
}
|
||||
|
||||
// Changes form of comparison inclusive <-> exclusive.
|
||||
static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) {
|
||||
switch (Cmp) {
|
||||
case AArch64CC::GT: return AArch64CC::GE;
|
||||
case AArch64CC::GE: return AArch64CC::GT;
|
||||
case AArch64CC::LT: return AArch64CC::LE;
|
||||
case AArch64CC::LE: return AArch64CC::LT;
|
||||
default:
|
||||
llvm_unreachable("Unexpected condition code");
|
||||
}
|
||||
}
|
||||
|
||||
// Transforms GT -> GE, GE -> GT, LT -> LE, LE -> LT by updating comparison
|
||||
// operator and condition code.
|
||||
AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
|
||||
MachineInstr *CmpMI, AArch64CC::CondCode Cmp) {
|
||||
unsigned Opc = CmpMI->getOpcode();
|
||||
|
||||
// CMN (compare with negative immediate) is an alias to ADDS (as
|
||||
// "operand - negative" == "operand + positive")
|
||||
bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);
|
||||
|
||||
int Correction = (Cmp == AArch64CC::GT) ? 1 : -1;
|
||||
// Negate Correction value for comparison with negative immediate (CMN).
|
||||
if (Negative) {
|
||||
Correction = -Correction;
|
||||
}
|
||||
|
||||
const int OldImm = (int)CmpMI->getOperand(2).getImm();
|
||||
const int NewImm = std::abs(OldImm + Correction);
|
||||
|
||||
// Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by
|
||||
// adjusting compare instruction opcode.
|
||||
if (OldImm == 0 && ((Negative && Correction == 1) ||
|
||||
(!Negative && Correction == -1))) {
|
||||
Opc = getComplementOpc(Opc);
|
||||
}
|
||||
|
||||
return CmpInfo(NewImm, Opc, getAdjustedCmp(Cmp));
|
||||
}
|
||||
|
||||
// Applies changes to comparison instruction suggested by adjustCmp().
|
||||
void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
|
||||
const CmpInfo &Info) {
|
||||
int Imm;
|
||||
unsigned Opc;
|
||||
AArch64CC::CondCode Cmp;
|
||||
std::tie(Imm, Opc, Cmp) = Info;
|
||||
|
||||
MachineBasicBlock *const MBB = CmpMI->getParent();
|
||||
|
||||
// Change immediate in comparison instruction (ADDS or SUBS).
|
||||
BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc))
|
||||
.add(CmpMI->getOperand(0))
|
||||
.add(CmpMI->getOperand(1))
|
||||
.addImm(Imm)
|
||||
.add(CmpMI->getOperand(3));
|
||||
CmpMI->eraseFromParent();
|
||||
|
||||
// The fact that this comparison was picked ensures that it's related to the
|
||||
// first terminator instruction.
|
||||
MachineInstr &BrMI = *MBB->getFirstTerminator();
|
||||
|
||||
// Change condition in branch instruction.
|
||||
BuildMI(*MBB, BrMI, BrMI.getDebugLoc(), TII->get(AArch64::Bcc))
|
||||
.addImm(Cmp)
|
||||
.add(BrMI.getOperand(1));
|
||||
BrMI.eraseFromParent();
|
||||
|
||||
MBB->updateTerminator();
|
||||
|
||||
++NumConditionsAdjusted;
|
||||
}
|
||||
|
||||
// Parse a condition code returned by AnalyzeBranch, and compute the CondCode
|
||||
// corresponding to TBB.
|
||||
// Returns true if parsing was successful, otherwise false is returned.
|
||||
static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
|
||||
// A normal br.cond simply has the condition code.
|
||||
if (Cond[0].getImm() != -1) {
|
||||
assert(Cond.size() == 1 && "Unknown Cond array format");
|
||||
CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Adjusts one cmp instruction to another one if result of adjustment will allow
|
||||
// CSE. Returns true if compare instruction was changed, otherwise false is
|
||||
// returned.
|
||||
bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
|
||||
AArch64CC::CondCode Cmp, MachineInstr *To, int ToImm)
|
||||
{
|
||||
CmpInfo Info = adjustCmp(CmpMI, Cmp);
|
||||
if (std::get<0>(Info) == ToImm && std::get<1>(Info) == To->getOpcode()) {
|
||||
modifyCmp(CmpMI, Info);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
||||
DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
|
||||
<< "********** Function: " << MF.getName() << '\n');
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
TII = MF.getSubtarget().getInstrInfo();
|
||||
DomTree = &getAnalysis<MachineDominatorTree>();
|
||||
MRI = &MF.getRegInfo();
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
// Visit blocks in dominator tree pre-order. The pre-order enables multiple
|
||||
// cmp-conversions from the same head block.
|
||||
// Note that updateDomTree() modifies the children of the DomTree node
|
||||
// currently being visited. The df_iterator supports that; it doesn't look at
|
||||
// child_begin() / child_end() until after a node has been visited.
|
||||
for (MachineDomTreeNode *I : depth_first(DomTree)) {
|
||||
MachineBasicBlock *HBB = I->getBlock();
|
||||
|
||||
SmallVector<MachineOperand, 4> HeadCond;
|
||||
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
|
||||
if (TII->analyzeBranch(*HBB, TBB, FBB, HeadCond)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Equivalence check is to skip loops.
|
||||
if (!TBB || TBB == HBB) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SmallVector<MachineOperand, 4> TrueCond;
|
||||
MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
|
||||
if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
MachineInstr *HeadCmpMI = findSuitableCompare(HBB);
|
||||
if (!HeadCmpMI) {
|
||||
continue;
|
||||
}
|
||||
|
||||
MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
|
||||
if (!TrueCmpMI) {
|
||||
continue;
|
||||
}
|
||||
|
||||
AArch64CC::CondCode HeadCmp;
|
||||
if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
AArch64CC::CondCode TrueCmp;
|
||||
if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
|
||||
const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
|
||||
|
||||
DEBUG(dbgs() << "Head branch:\n");
|
||||
DEBUG(dbgs() << "\tcondition: "
|
||||
<< AArch64CC::getCondCodeName(HeadCmp) << '\n');
|
||||
DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');
|
||||
|
||||
DEBUG(dbgs() << "True branch:\n");
|
||||
DEBUG(dbgs() << "\tcondition: "
|
||||
<< AArch64CC::getCondCodeName(TrueCmp) << '\n');
|
||||
DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
|
||||
|
||||
if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
|
||||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
|
||||
std::abs(TrueImm - HeadImm) == 2) {
|
||||
// This branch transforms machine instructions that correspond to
|
||||
//
|
||||
// 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
|
||||
// 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
|
||||
//
|
||||
// into
|
||||
//
|
||||
// 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
|
||||
// 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
|
||||
|
||||
CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
|
||||
CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
|
||||
if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
|
||||
std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {
|
||||
modifyCmp(HeadCmpMI, HeadCmpInfo);
|
||||
modifyCmp(TrueCmpMI, TrueCmpInfo);
|
||||
Changed = true;
|
||||
}
|
||||
} else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
|
||||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
|
||||
std::abs(TrueImm - HeadImm) == 1) {
|
||||
// This branch transforms machine instructions that correspond to
|
||||
//
|
||||
// 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
|
||||
// 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
|
||||
//
|
||||
// into
|
||||
//
|
||||
// 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
|
||||
// 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
|
||||
|
||||
// GT -> GE transformation increases immediate value, so picking the
|
||||
// smaller one; LT -> LE decreases immediate value so invert the choice.
|
||||
bool adjustHeadCond = (HeadImm < TrueImm);
|
||||
if (HeadCmp == AArch64CC::LT) {
|
||||
adjustHeadCond = !adjustHeadCond;
|
||||
}
|
||||
|
||||
if (adjustHeadCond) {
|
||||
Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
|
||||
} else {
|
||||
Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
|
||||
}
|
||||
}
|
||||
// Other transformation cases almost never occur due to generation of < or >
|
||||
// comparisons instead of <= and >=.
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,216 +0,0 @@
|
||||
//==-- AArch64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file When allowed by the instruction, replace a dead definition of a GPR
|
||||
/// with the zero register. This makes the code a bit friendlier towards the
|
||||
/// hardware's register renamer.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64.h"
|
||||
#include "AArch64RegisterInfo.h"
|
||||
#include "AArch64Subtarget.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/ISDOpcodes.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "aarch64-dead-defs"
|
||||
|
||||
STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
|
||||
|
||||
#define AARCH64_DEAD_REG_DEF_NAME "AArch64 Dead register definitions"
|
||||
|
||||
namespace {
|
||||
class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
|
||||
private:
|
||||
const TargetRegisterInfo *TRI;
|
||||
const MachineRegisterInfo *MRI;
|
||||
const TargetInstrInfo *TII;
|
||||
bool Changed;
|
||||
void processMachineBasicBlock(MachineBasicBlock &MBB);
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid.
|
||||
AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {
|
||||
initializeAArch64DeadRegisterDefinitionsPass(
|
||||
*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
|
||||
StringRef getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
bool shouldSkip(const MachineInstr &MI, const MachineFunction &MF) const;
|
||||
};
|
||||
char AArch64DeadRegisterDefinitions::ID = 0;
|
||||
} // end anonymous namespace
|
||||
|
||||
INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs",
|
||||
AARCH64_DEAD_REG_DEF_NAME, false, false)
|
||||
|
||||
static bool usesFrameIndex(const MachineInstr &MI) {
|
||||
for (const MachineOperand &MO : MI.uses())
|
||||
if (MO.isFI())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
AArch64DeadRegisterDefinitions::shouldSkip(const MachineInstr &MI,
|
||||
const MachineFunction &MF) const {
|
||||
if (!MF.getSubtarget<AArch64Subtarget>().hasLSE())
|
||||
return false;
|
||||
|
||||
#define CASE_AARCH64_ATOMIC_(PREFIX) \
|
||||
case AArch64::PREFIX##X: \
|
||||
case AArch64::PREFIX##W: \
|
||||
case AArch64::PREFIX##H: \
|
||||
case AArch64::PREFIX##B
|
||||
|
||||
for (const MachineMemOperand *MMO : MI.memoperands()) {
|
||||
if (MMO->isAtomic()) {
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDADDA):
|
||||
CASE_AARCH64_ATOMIC_(LDADDAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDCLRA):
|
||||
CASE_AARCH64_ATOMIC_(LDCLRAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDEORA):
|
||||
CASE_AARCH64_ATOMIC_(LDEORAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDSETA):
|
||||
CASE_AARCH64_ATOMIC_(LDSETAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDSMAXA):
|
||||
CASE_AARCH64_ATOMIC_(LDSMAXAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDSMINA):
|
||||
CASE_AARCH64_ATOMIC_(LDSMINAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDUMAXA):
|
||||
CASE_AARCH64_ATOMIC_(LDUMAXAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDUMINA):
|
||||
CASE_AARCH64_ATOMIC_(LDUMINAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(SWPA):
|
||||
CASE_AARCH64_ATOMIC_(SWPAL):
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef CASE_AARCH64_ATOMIC_
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
|
||||
MachineBasicBlock &MBB) {
|
||||
const MachineFunction &MF = *MBB.getParent();
|
||||
for (MachineInstr &MI : MBB) {
|
||||
if (usesFrameIndex(MI)) {
|
||||
// We need to skip this instruction because while it appears to have a
|
||||
// dead def it uses a frame index which might expand into a multi
|
||||
// instruction sequence during EPI.
|
||||
DEBUG(dbgs() << " Ignoring, operand is frame index\n");
|
||||
continue;
|
||||
}
|
||||
if (MI.definesRegister(AArch64::XZR) || MI.definesRegister(AArch64::WZR)) {
|
||||
// It is not allowed to write to the same register (not even the zero
|
||||
// register) twice in a single instruction.
|
||||
DEBUG(dbgs() << " Ignoring, XZR or WZR already used by the instruction\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (shouldSkip(MI, MF)) {
|
||||
DEBUG(dbgs() << " Ignoring, Atomic instruction with acquire semantics using WZR/XZR\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
const MCInstrDesc &Desc = MI.getDesc();
|
||||
for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
|
||||
MachineOperand &MO = MI.getOperand(I);
|
||||
if (!MO.isReg() || !MO.isDef())
|
||||
continue;
|
||||
// We should not have any relevant physreg defs that are replacable by
|
||||
// zero before register allocation. So we just check for dead vreg defs.
|
||||
unsigned Reg = MO.getReg();
|
||||
if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
|
||||
(!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
|
||||
continue;
|
||||
assert(!MO.isImplicit() && "Unexpected implicit def!");
|
||||
DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
|
||||
MI.print(dbgs()));
|
||||
// Be careful not to change the register if it's a tied operand.
|
||||
if (MI.isRegTiedToUseOperand(I)) {
|
||||
DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
|
||||
continue;
|
||||
}
|
||||
const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
|
||||
unsigned NewReg;
|
||||
if (RC == nullptr) {
|
||||
DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
|
||||
continue;
|
||||
} else if (RC->contains(AArch64::WZR))
|
||||
NewReg = AArch64::WZR;
|
||||
else if (RC->contains(AArch64::XZR))
|
||||
NewReg = AArch64::XZR;
|
||||
else {
|
||||
DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
|
||||
continue;
|
||||
}
|
||||
DEBUG(dbgs() << " Replacing with zero register. New:\n ");
|
||||
MO.setReg(NewReg);
|
||||
MO.setIsDead();
|
||||
DEBUG(MI.print(dbgs()));
|
||||
++NumDeadDefsReplaced;
|
||||
Changed = true;
|
||||
// Only replace one dead register, see check for zero register above.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan the function for instructions that have a dead definition of a
|
||||
// register. Replace that register with the zero register when possible.
|
||||
bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
TRI = MF.getSubtarget().getRegisterInfo();
|
||||
TII = MF.getSubtarget().getInstrInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
|
||||
Changed = false;
|
||||
for (auto &MBB : MF)
|
||||
processMachineBasicBlock(MBB);
|
||||
return Changed;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createAArch64DeadRegisterDefinitions() {
|
||||
return new AArch64DeadRegisterDefinitions();
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
||||
974f968ec2c4dcc770b5a9a166c08c74e693a434
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user