You've already forked linux-packaging-mono
Imported Upstream version 5.18.0.167
Former-commit-id: 289509151e0fee68a1b591a20c9f109c3c789d3a
This commit is contained in:
parent
e19d552987
commit
b084638f15
268
external/llvm/lib/Target/AMDGPU/AMDGPU.h
vendored
268
external/llvm/lib/Target/AMDGPU/AMDGPU.h
vendored
@ -1,268 +0,0 @@
|
||||
//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
/// \file
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
|
||||
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetMachine;
|
||||
class FunctionPass;
|
||||
class GCNTargetMachine;
|
||||
class ModulePass;
|
||||
class Pass;
|
||||
class Target;
|
||||
class TargetMachine;
|
||||
class TargetOptions;
|
||||
class PassRegistry;
|
||||
class Module;
|
||||
|
||||
// R600 Passes
|
||||
FunctionPass *createR600VectorRegMerger();
|
||||
FunctionPass *createR600ExpandSpecialInstrsPass();
|
||||
FunctionPass *createR600EmitClauseMarkers();
|
||||
FunctionPass *createR600ClauseMergePass();
|
||||
FunctionPass *createR600Packetizer();
|
||||
FunctionPass *createR600ControlFlowFinalizer();
|
||||
FunctionPass *createAMDGPUCFGStructurizerPass();
|
||||
FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
|
||||
|
||||
// SI Passes
|
||||
FunctionPass *createSIAnnotateControlFlowPass();
|
||||
FunctionPass *createSIFoldOperandsPass();
|
||||
FunctionPass *createSIPeepholeSDWAPass();
|
||||
FunctionPass *createSILowerI1CopiesPass();
|
||||
FunctionPass *createSIShrinkInstructionsPass();
|
||||
FunctionPass *createSILoadStoreOptimizerPass();
|
||||
FunctionPass *createSIWholeQuadModePass();
|
||||
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
|
||||
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
|
||||
FunctionPass *createSIFixSGPRCopiesPass();
|
||||
FunctionPass *createSIMemoryLegalizerPass();
|
||||
FunctionPass *createSIDebuggerInsertNopsPass();
|
||||
FunctionPass *createSIInsertWaitsPass();
|
||||
FunctionPass *createSIInsertWaitcntsPass();
|
||||
FunctionPass *createSIFixWWMLivenessPass();
|
||||
FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &);
|
||||
FunctionPass *createAMDGPUUseNativeCallsPass();
|
||||
FunctionPass *createAMDGPUCodeGenPreparePass();
|
||||
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
|
||||
FunctionPass *createAMDGPURewriteOutArgumentsPass();
|
||||
|
||||
void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
|
||||
|
||||
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
|
||||
extern char &AMDGPUMachineCFGStructurizerID;
|
||||
|
||||
void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
|
||||
|
||||
Pass *createAMDGPUAnnotateKernelFeaturesPass();
|
||||
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
|
||||
extern char &AMDGPUAnnotateKernelFeaturesID;
|
||||
|
||||
ModulePass *createAMDGPULowerIntrinsicsPass();
|
||||
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
|
||||
extern char &AMDGPULowerIntrinsicsID;
|
||||
|
||||
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
|
||||
extern char &AMDGPURewriteOutArgumentsID;
|
||||
|
||||
void initializeR600ClauseMergePassPass(PassRegistry &);
|
||||
extern char &R600ClauseMergePassID;
|
||||
|
||||
void initializeR600ControlFlowFinalizerPass(PassRegistry &);
|
||||
extern char &R600ControlFlowFinalizerID;
|
||||
|
||||
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &);
|
||||
extern char &R600ExpandSpecialInstrsPassID;
|
||||
|
||||
void initializeR600VectorRegMergerPass(PassRegistry &);
|
||||
extern char &R600VectorRegMergerID;
|
||||
|
||||
void initializeR600PacketizerPass(PassRegistry &);
|
||||
extern char &R600PacketizerID;
|
||||
|
||||
void initializeSIFoldOperandsPass(PassRegistry &);
|
||||
extern char &SIFoldOperandsID;
|
||||
|
||||
void initializeSIPeepholeSDWAPass(PassRegistry &);
|
||||
extern char &SIPeepholeSDWAID;
|
||||
|
||||
void initializeSIShrinkInstructionsPass(PassRegistry&);
|
||||
extern char &SIShrinkInstructionsID;
|
||||
|
||||
void initializeSIFixSGPRCopiesPass(PassRegistry &);
|
||||
extern char &SIFixSGPRCopiesID;
|
||||
|
||||
void initializeSIFixVGPRCopiesPass(PassRegistry &);
|
||||
extern char &SIFixVGPRCopiesID;
|
||||
|
||||
void initializeSILowerI1CopiesPass(PassRegistry &);
|
||||
extern char &SILowerI1CopiesID;
|
||||
|
||||
void initializeSILoadStoreOptimizerPass(PassRegistry &);
|
||||
extern char &SILoadStoreOptimizerID;
|
||||
|
||||
void initializeSIWholeQuadModePass(PassRegistry &);
|
||||
extern char &SIWholeQuadModeID;
|
||||
|
||||
void initializeSILowerControlFlowPass(PassRegistry &);
|
||||
extern char &SILowerControlFlowID;
|
||||
|
||||
void initializeSIInsertSkipsPass(PassRegistry &);
|
||||
extern char &SIInsertSkipsPassID;
|
||||
|
||||
void initializeSIOptimizeExecMaskingPass(PassRegistry &);
|
||||
extern char &SIOptimizeExecMaskingID;
|
||||
|
||||
void initializeSIFixWWMLivenessPass(PassRegistry &);
|
||||
extern char &SIFixWWMLivenessID;
|
||||
|
||||
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &);
|
||||
extern char &AMDGPUSimplifyLibCallsID;
|
||||
|
||||
void initializeAMDGPUUseNativeCallsPass(PassRegistry &);
|
||||
extern char &AMDGPUUseNativeCallsID;
|
||||
|
||||
// Passes common to R600 and SI
|
||||
FunctionPass *createAMDGPUPromoteAlloca();
|
||||
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
|
||||
extern char &AMDGPUPromoteAllocaID;
|
||||
|
||||
Pass *createAMDGPUStructurizeCFGPass();
|
||||
FunctionPass *createAMDGPUISelDag(
|
||||
TargetMachine *TM = nullptr,
|
||||
CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
|
||||
ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
|
||||
ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
|
||||
FunctionPass *createAMDGPUAnnotateUniformValues();
|
||||
|
||||
ModulePass* createAMDGPUUnifyMetadataPass();
|
||||
void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
|
||||
extern char &AMDGPUUnifyMetadataID;
|
||||
|
||||
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
|
||||
extern char &SIOptimizeExecMaskingPreRAID;
|
||||
|
||||
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
|
||||
extern char &AMDGPUAnnotateUniformValuesPassID;
|
||||
|
||||
void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
|
||||
extern char &AMDGPUCodeGenPrepareID;
|
||||
|
||||
void initializeSIAnnotateControlFlowPass(PassRegistry&);
|
||||
extern char &SIAnnotateControlFlowPassID;
|
||||
|
||||
void initializeSIMemoryLegalizerPass(PassRegistry&);
|
||||
extern char &SIMemoryLegalizerID;
|
||||
|
||||
void initializeSIDebuggerInsertNopsPass(PassRegistry&);
|
||||
extern char &SIDebuggerInsertNopsID;
|
||||
|
||||
void initializeSIInsertWaitsPass(PassRegistry&);
|
||||
extern char &SIInsertWaitsID;
|
||||
|
||||
void initializeSIInsertWaitcntsPass(PassRegistry&);
|
||||
extern char &SIInsertWaitcntsID;
|
||||
|
||||
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
|
||||
extern char &AMDGPUUnifyDivergentExitNodesID;
|
||||
|
||||
ImmutablePass *createAMDGPUAAWrapperPass();
|
||||
void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
|
||||
|
||||
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
|
||||
|
||||
Pass *createAMDGPUFunctionInliningPass();
|
||||
void initializeAMDGPUInlinerPass(PassRegistry&);
|
||||
|
||||
ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
|
||||
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
|
||||
extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
|
||||
|
||||
Target &getTheAMDGPUTarget();
|
||||
Target &getTheGCNTarget();
|
||||
|
||||
namespace AMDGPU {
|
||||
enum TargetIndex {
|
||||
TI_CONSTDATA_START,
|
||||
TI_SCRATCH_RSRC_DWORD0,
|
||||
TI_SCRATCH_RSRC_DWORD1,
|
||||
TI_SCRATCH_RSRC_DWORD2,
|
||||
TI_SCRATCH_RSRC_DWORD3
|
||||
};
|
||||
}
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
/// OpenCL uses address spaces to differentiate between
|
||||
/// various memory regions on the hardware. On the CPU
|
||||
/// all of the address spaces point to the same memory,
|
||||
/// however on the GPU, each address space points to
|
||||
/// a separate piece of memory that is unique from other
|
||||
/// memory locations.
|
||||
struct AMDGPUAS {
|
||||
// The following address space values depend on the triple environment.
|
||||
unsigned PRIVATE_ADDRESS; ///< Address space for private memory.
|
||||
unsigned FLAT_ADDRESS; ///< Address space for flat memory.
|
||||
unsigned REGION_ADDRESS; ///< Address space for region memory.
|
||||
|
||||
enum : unsigned {
|
||||
// The maximum value for flat, generic, local, private, constant and region.
|
||||
MAX_COMMON_ADDRESS = 5,
|
||||
|
||||
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
|
||||
CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
|
||||
LOCAL_ADDRESS = 3, ///< Address space for local memory.
|
||||
/// Address space for direct addressible parameter memory (CONST0)
|
||||
PARAM_D_ADDRESS = 6,
|
||||
/// Address space for indirect addressible parameter memory (VTX1)
|
||||
PARAM_I_ADDRESS = 7,
|
||||
|
||||
// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on
|
||||
// this order to be able to dynamically index a constant buffer, for
|
||||
// example:
|
||||
//
|
||||
// ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
|
||||
|
||||
CONSTANT_BUFFER_0 = 8,
|
||||
CONSTANT_BUFFER_1 = 9,
|
||||
CONSTANT_BUFFER_2 = 10,
|
||||
CONSTANT_BUFFER_3 = 11,
|
||||
CONSTANT_BUFFER_4 = 12,
|
||||
CONSTANT_BUFFER_5 = 13,
|
||||
CONSTANT_BUFFER_6 = 14,
|
||||
CONSTANT_BUFFER_7 = 15,
|
||||
CONSTANT_BUFFER_8 = 16,
|
||||
CONSTANT_BUFFER_9 = 17,
|
||||
CONSTANT_BUFFER_10 = 18,
|
||||
CONSTANT_BUFFER_11 = 19,
|
||||
CONSTANT_BUFFER_12 = 20,
|
||||
CONSTANT_BUFFER_13 = 21,
|
||||
CONSTANT_BUFFER_14 = 22,
|
||||
CONSTANT_BUFFER_15 = 23,
|
||||
|
||||
// Some places use this if the address space can't be determined.
|
||||
UNKNOWN_ADDRESS_SPACE = ~0u,
|
||||
};
|
||||
};
|
||||
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
AMDGPUAS getAMDGPUAS(const Module &M);
|
||||
AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
|
||||
AMDGPUAS getAMDGPUAS(Triple T);
|
||||
} // namespace AMDGPU
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
785
external/llvm/lib/Target/AMDGPU/AMDGPU.td
vendored
785
external/llvm/lib/Target/AMDGPU/AMDGPU.td
vendored
File diff suppressed because it is too large
Load Diff
@ -1,159 +0,0 @@
|
||||
//===- AMDGPUAliasAnalysis ------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
/// This is the AMGPU address space based alias analysis pass.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUAliasAnalysis.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/MemoryLocation.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include <cassert>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-aa"
|
||||
|
||||
// Register this pass...
|
||||
char AMDGPUAAWrapperPass::ID = 0;
|
||||
|
||||
INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa",
|
||||
"AMDGPU Address space based Alias Analysis", false, true)
|
||||
|
||||
ImmutablePass *llvm::createAMDGPUAAWrapperPass() {
|
||||
return new AMDGPUAAWrapperPass();
|
||||
}
|
||||
|
||||
void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
// Must match the table in getAliasResult.
|
||||
AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_)
|
||||
: Arch(Arch_), AS(AS_) {
|
||||
// These arrarys are indexed by address space value
|
||||
// enum elements 0 ... to 5
|
||||
static const AliasResult ASAliasRulesPrivIsZero[6][6] = {
|
||||
/* Private Global Constant Group Flat Region*/
|
||||
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
|
||||
/* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias},
|
||||
/* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias},
|
||||
/* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
|
||||
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
|
||||
/* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
|
||||
};
|
||||
static const AliasResult ASAliasRulesGenIsZero[6][6] = {
|
||||
/* Flat Global Constant Group Region Private */
|
||||
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
|
||||
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
|
||||
/* Constant */ {MayAlias, NoAlias , MayAlias, NoAlias , NoAlias, NoAlias},
|
||||
/* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
|
||||
/* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias},
|
||||
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias}
|
||||
};
|
||||
assert(AS.MAX_COMMON_ADDRESS <= 5);
|
||||
if (AS.FLAT_ADDRESS == 0) {
|
||||
assert(AS.GLOBAL_ADDRESS == 1 &&
|
||||
AS.REGION_ADDRESS == 4 &&
|
||||
AS.LOCAL_ADDRESS == 3 &&
|
||||
AS.CONSTANT_ADDRESS == 2 &&
|
||||
AS.PRIVATE_ADDRESS == 5);
|
||||
ASAliasRules = &ASAliasRulesGenIsZero;
|
||||
} else {
|
||||
assert(AS.PRIVATE_ADDRESS == 0 &&
|
||||
AS.GLOBAL_ADDRESS == 1 &&
|
||||
AS.CONSTANT_ADDRESS == 2 &&
|
||||
AS.LOCAL_ADDRESS == 3 &&
|
||||
AS.FLAT_ADDRESS == 4 &&
|
||||
AS.REGION_ADDRESS == 5);
|
||||
ASAliasRules = &ASAliasRulesPrivIsZero;
|
||||
}
|
||||
}
|
||||
|
||||
AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
|
||||
unsigned AS2) const {
|
||||
if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS) {
|
||||
if (Arch == Triple::amdgcn)
|
||||
report_fatal_error("Pointer address space out of range");
|
||||
return AS1 == AS2 ? MayAlias : NoAlias;
|
||||
}
|
||||
|
||||
return (*ASAliasRules)[AS1][AS2];
|
||||
}
|
||||
|
||||
AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
|
||||
const MemoryLocation &LocB) {
|
||||
unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
|
||||
unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
|
||||
|
||||
AliasResult Result = ASAliasRules.getAliasResult(asA, asB);
|
||||
if (Result == NoAlias) return Result;
|
||||
|
||||
// Forward the query to the next alias analysis.
|
||||
return AAResultBase::alias(LocA, LocB);
|
||||
}
|
||||
|
||||
bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
|
||||
bool OrLocal) {
|
||||
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
|
||||
|
||||
if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
|
||||
if (GV->isConstant())
|
||||
return true;
|
||||
} else if (const Argument *Arg = dyn_cast<Argument>(Base)) {
|
||||
const Function *F = Arg->getParent();
|
||||
|
||||
// Only assume constant memory for arguments on kernels.
|
||||
switch (F->getCallingConv()) {
|
||||
default:
|
||||
return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
|
||||
case CallingConv::AMDGPU_LS:
|
||||
case CallingConv::AMDGPU_HS:
|
||||
case CallingConv::AMDGPU_ES:
|
||||
case CallingConv::AMDGPU_GS:
|
||||
case CallingConv::AMDGPU_VS:
|
||||
case CallingConv::AMDGPU_PS:
|
||||
case CallingConv::AMDGPU_CS:
|
||||
case CallingConv::AMDGPU_KERNEL:
|
||||
case CallingConv::SPIR_KERNEL:
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned ArgNo = Arg->getArgNo();
|
||||
/* On an argument, ReadOnly attribute indicates that the function does
|
||||
not write through this pointer argument, even though it may write
|
||||
to the memory that the pointer points to.
|
||||
On an argument, ReadNone attribute indicates that the function does
|
||||
not dereference that pointer argument, even though it may read or write
|
||||
the memory that the pointer points to if accessed through other pointers.
|
||||
*/
|
||||
if (F->hasParamAttribute(ArgNo, Attribute::NoAlias) &&
|
||||
(F->hasParamAttribute(ArgNo, Attribute::ReadNone) ||
|
||||
F->hasParamAttribute(ArgNo, Attribute::ReadOnly))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
|
||||
}
|
@ -1,115 +0,0 @@
|
||||
//===- AMDGPUAliasAnalysis --------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
/// This is the AMGPU address space based alias analysis pass.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class DataLayout;
|
||||
class MDNode;
|
||||
class MemoryLocation;
|
||||
|
||||
/// A simple AA result that uses TBAA metadata to answer queries.
|
||||
class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
|
||||
friend AAResultBase<AMDGPUAAResult>;
|
||||
|
||||
const DataLayout &DL;
|
||||
AMDGPUAS AS;
|
||||
|
||||
public:
|
||||
explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
|
||||
DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS, T.getArch()) {}
|
||||
AMDGPUAAResult(AMDGPUAAResult &&Arg)
|
||||
: AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
|
||||
ASAliasRules(Arg.ASAliasRules){}
|
||||
|
||||
/// Handle invalidation events from the new pass manager.
|
||||
///
|
||||
/// By definition, this result is stateless and so remains valid.
|
||||
bool invalidate(Function &, const PreservedAnalyses &) { return false; }
|
||||
|
||||
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
|
||||
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
|
||||
|
||||
private:
|
||||
bool Aliases(const MDNode *A, const MDNode *B) const;
|
||||
bool PathAliases(const MDNode *A, const MDNode *B) const;
|
||||
|
||||
class ASAliasRulesTy {
|
||||
public:
|
||||
ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_);
|
||||
|
||||
AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
|
||||
|
||||
private:
|
||||
Triple::ArchType Arch;
|
||||
AMDGPUAS AS;
|
||||
const AliasResult (*ASAliasRules)[6][6];
|
||||
} ASAliasRules;
|
||||
};
|
||||
|
||||
/// Analysis pass providing a never-invalidated alias analysis result.
|
||||
class AMDGPUAA : public AnalysisInfoMixin<AMDGPUAA> {
|
||||
friend AnalysisInfoMixin<AMDGPUAA>;
|
||||
|
||||
static char PassID;
|
||||
|
||||
public:
|
||||
using Result = AMDGPUAAResult;
|
||||
|
||||
AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
|
||||
return AMDGPUAAResult(F.getParent()->getDataLayout(),
|
||||
Triple(F.getParent()->getTargetTriple()));
|
||||
}
|
||||
};
|
||||
|
||||
/// Legacy wrapper pass to provide the AMDGPUAAResult object.
|
||||
class AMDGPUAAWrapperPass : public ImmutablePass {
|
||||
std::unique_ptr<AMDGPUAAResult> Result;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AMDGPUAAWrapperPass() : ImmutablePass(ID) {
|
||||
initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
AMDGPUAAResult &getResult() { return *Result; }
|
||||
const AMDGPUAAResult &getResult() const { return *Result; }
|
||||
|
||||
bool doInitialization(Module &M) override {
|
||||
Result.reset(new AMDGPUAAResult(M.getDataLayout(),
|
||||
Triple(M.getTargetTriple())));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool doFinalization(Module &M) override {
|
||||
Result.reset();
|
||||
return false;
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
|
@ -1,93 +0,0 @@
|
||||
//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This pass marks all internal functions as always_inline and creates
|
||||
/// duplicates of all other functions and marks the duplicates as always_inline.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
static cl::opt<bool> StressCalls(
|
||||
"amdgpu-stress-function-calls",
|
||||
cl::Hidden,
|
||||
cl::desc("Force all functions to be noinline"),
|
||||
cl::init(false));
|
||||
|
||||
class AMDGPUAlwaysInline : public ModulePass {
|
||||
bool GlobalOpt;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AMDGPUAlwaysInline(bool GlobalOpt = false) :
|
||||
ModulePass(ID), GlobalOpt(GlobalOpt) { }
|
||||
bool runOnModule(Module &M) override;
|
||||
StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; }
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
|
||||
"AMDGPU Inline All Functions", false, false)
|
||||
|
||||
char AMDGPUAlwaysInline::ID = 0;
|
||||
|
||||
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
|
||||
std::vector<GlobalAlias*> AliasesToRemove;
|
||||
std::vector<Function *> FuncsToClone;
|
||||
|
||||
for (GlobalAlias &A : M.aliases()) {
|
||||
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
|
||||
A.replaceAllUsesWith(F);
|
||||
AliasesToRemove.push_back(&A);
|
||||
}
|
||||
}
|
||||
|
||||
if (GlobalOpt) {
|
||||
for (GlobalAlias* A : AliasesToRemove) {
|
||||
A->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
auto NewAttr = StressCalls ? Attribute::NoInline : Attribute::AlwaysInline;
|
||||
auto IncompatAttr
|
||||
= StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
|
||||
|
||||
for (Function &F : M) {
|
||||
if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
|
||||
!F.hasFnAttribute(IncompatAttr))
|
||||
FuncsToClone.push_back(&F);
|
||||
}
|
||||
|
||||
for (Function *F : FuncsToClone) {
|
||||
ValueToValueMapTy VMap;
|
||||
Function *NewFunc = CloneFunction(F, VMap);
|
||||
NewFunc->setLinkage(GlobalValue::InternalLinkage);
|
||||
F->replaceAllUsesWith(NewFunc);
|
||||
}
|
||||
|
||||
for (Function &F : M) {
|
||||
if (F.hasLocalLinkage() && !F.hasFnAttribute(IncompatAttr)) {
|
||||
F.addFnAttr(NewAttr);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
|
||||
return new AMDGPUAlwaysInline(GlobalOpt);
|
||||
}
|
@ -1,328 +0,0 @@
|
||||
//===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file This pass adds target attributes to functions which use intrinsics
|
||||
/// which will impact calling convention lowering.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/Analysis/CallGraphSCCPass.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Use.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
|
||||
private:
|
||||
const TargetMachine *TM = nullptr;
|
||||
AMDGPUAS AS;
|
||||
|
||||
bool addFeatureAttributes(Function &F);
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
|
||||
|
||||
bool doInitialization(CallGraph &CG) override;
|
||||
bool runOnSCC(CallGraphSCC &SCC) override;
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return "AMDGPU Annotate Kernel Features";
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesAll();
|
||||
CallGraphSCCPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
|
||||
static bool visitConstantExprsRecursively(
|
||||
const Constant *EntryC,
|
||||
SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
|
||||
AMDGPUAS AS);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char AMDGPUAnnotateKernelFeatures::ID = 0;
|
||||
|
||||
char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
|
||||
|
||||
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
|
||||
"Add AMDGPU function attributes", false, false)
|
||||
|
||||
|
||||
// The queue ptr is only needed when casting to flat, not from it.
|
||||
static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
|
||||
return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
|
||||
}
|
||||
|
||||
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
|
||||
const AMDGPUAS &AS) {
|
||||
return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
|
||||
AMDGPUAS AS) {
|
||||
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
|
||||
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
|
||||
return castRequiresQueuePtr(SrcAS, AS);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
|
||||
const Constant *EntryC,
|
||||
SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
|
||||
AMDGPUAS AS) {
|
||||
|
||||
if (!ConstantExprVisited.insert(EntryC).second)
|
||||
return false;
|
||||
|
||||
SmallVector<const Constant *, 16> Stack;
|
||||
Stack.push_back(EntryC);
|
||||
|
||||
while (!Stack.empty()) {
|
||||
const Constant *C = Stack.pop_back_val();
|
||||
|
||||
// Check this constant expression.
|
||||
if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
|
||||
if (visitConstantExpr(CE, AS))
|
||||
return true;
|
||||
}
|
||||
|
||||
// Visit all sub-expressions.
|
||||
for (const Use &U : C->operands()) {
|
||||
const auto *OpC = dyn_cast<Constant>(U);
|
||||
if (!OpC)
|
||||
continue;
|
||||
|
||||
if (!ConstantExprVisited.insert(OpC).second)
|
||||
continue;
|
||||
|
||||
Stack.push_back(OpC);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// We do not need to note the x workitem or workgroup id because they are always
|
||||
// initialized.
|
||||
//
|
||||
// TODO: We should not add the attributes if the known compile time workgroup
|
||||
// size is 1 for y/z.
|
||||
static StringRef intrinsicToAttrName(Intrinsic::ID ID,
|
||||
bool &NonKernelOnly,
|
||||
bool &IsQueuePtr) {
|
||||
switch (ID) {
|
||||
case Intrinsic::amdgcn_workitem_id_x:
|
||||
NonKernelOnly = true;
|
||||
return "amdgpu-work-item-id-x";
|
||||
case Intrinsic::amdgcn_workgroup_id_x:
|
||||
NonKernelOnly = true;
|
||||
return "amdgpu-work-group-id-x";
|
||||
case Intrinsic::amdgcn_workitem_id_y:
|
||||
case Intrinsic::r600_read_tidig_y:
|
||||
return "amdgpu-work-item-id-y";
|
||||
case Intrinsic::amdgcn_workitem_id_z:
|
||||
case Intrinsic::r600_read_tidig_z:
|
||||
return "amdgpu-work-item-id-z";
|
||||
case Intrinsic::amdgcn_workgroup_id_y:
|
||||
case Intrinsic::r600_read_tgid_y:
|
||||
return "amdgpu-work-group-id-y";
|
||||
case Intrinsic::amdgcn_workgroup_id_z:
|
||||
case Intrinsic::r600_read_tgid_z:
|
||||
return "amdgpu-work-group-id-z";
|
||||
case Intrinsic::amdgcn_dispatch_ptr:
|
||||
return "amdgpu-dispatch-ptr";
|
||||
case Intrinsic::amdgcn_dispatch_id:
|
||||
return "amdgpu-dispatch-id";
|
||||
case Intrinsic::amdgcn_kernarg_segment_ptr:
|
||||
return "amdgpu-kernarg-segment-ptr";
|
||||
case Intrinsic::amdgcn_implicitarg_ptr:
|
||||
return "amdgpu-implicitarg-ptr";
|
||||
case Intrinsic::amdgcn_queue_ptr:
|
||||
case Intrinsic::trap:
|
||||
case Intrinsic::debugtrap:
|
||||
IsQueuePtr = true;
|
||||
return "amdgpu-queue-ptr";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
static bool handleAttr(Function &Parent, const Function &Callee,
|
||||
StringRef Name) {
|
||||
if (Callee.hasFnAttribute(Name)) {
|
||||
Parent.addFnAttr(Name);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
|
||||
bool &NeedQueuePtr) {
|
||||
// X ids unnecessarily propagated to kernels.
|
||||
static const StringRef AttrNames[] = {
|
||||
{ "amdgpu-work-item-id-x" },
|
||||
{ "amdgpu-work-item-id-y" },
|
||||
{ "amdgpu-work-item-id-z" },
|
||||
{ "amdgpu-work-group-id-x" },
|
||||
{ "amdgpu-work-group-id-y" },
|
||||
{ "amdgpu-work-group-id-z" },
|
||||
{ "amdgpu-dispatch-ptr" },
|
||||
{ "amdgpu-dispatch-id" },
|
||||
{ "amdgpu-kernarg-segment-ptr" },
|
||||
{ "amdgpu-implicitarg-ptr" }
|
||||
};
|
||||
|
||||
if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
|
||||
NeedQueuePtr = true;
|
||||
|
||||
for (StringRef AttrName : AttrNames)
|
||||
handleAttr(Parent, Callee, AttrName);
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
|
||||
bool HasFlat = ST.hasFlatAddressSpace();
|
||||
bool HasApertureRegs = ST.hasApertureRegs();
|
||||
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
|
||||
|
||||
bool Changed = false;
|
||||
bool NeedQueuePtr = false;
|
||||
bool HaveCall = false;
|
||||
bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
|
||||
|
||||
for (BasicBlock &BB : F) {
|
||||
for (Instruction &I : BB) {
|
||||
CallSite CS(&I);
|
||||
if (CS) {
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
|
||||
// TODO: Do something with indirect calls.
|
||||
if (!Callee) {
|
||||
if (!CS.isInlineAsm())
|
||||
HaveCall = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
Intrinsic::ID IID = Callee->getIntrinsicID();
|
||||
if (IID == Intrinsic::not_intrinsic) {
|
||||
HaveCall = true;
|
||||
copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
|
||||
Changed = true;
|
||||
} else {
|
||||
bool NonKernelOnly = false;
|
||||
StringRef AttrName = intrinsicToAttrName(IID,
|
||||
NonKernelOnly, NeedQueuePtr);
|
||||
if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
|
||||
F.addFnAttr(AttrName);
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (NeedQueuePtr || HasApertureRegs)
|
||||
continue;
|
||||
|
||||
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
|
||||
if (castRequiresQueuePtr(ASC, AS)) {
|
||||
NeedQueuePtr = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
for (const Use &U : I.operands()) {
|
||||
const auto *OpC = dyn_cast<Constant>(U);
|
||||
if (!OpC)
|
||||
continue;
|
||||
|
||||
if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
|
||||
NeedQueuePtr = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (NeedQueuePtr) {
|
||||
F.addFnAttr("amdgpu-queue-ptr");
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
// TODO: We could refine this to captured pointers that could possibly be
|
||||
// accessed by flat instructions. For now this is mostly a poor way of
|
||||
// estimating whether there are calls before argument lowering.
|
||||
if (HasFlat && !IsFunc && HaveCall) {
|
||||
F.addFnAttr("amdgpu-flat-scratch");
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
|
||||
Module &M = SCC.getCallGraph().getModule();
|
||||
Triple TT(M.getTargetTriple());
|
||||
|
||||
bool Changed = false;
|
||||
for (CallGraphNode *I : SCC) {
|
||||
Function *F = I->getFunction();
|
||||
if (!F || F->isDeclaration())
|
||||
continue;
|
||||
|
||||
Changed |= addFeatureAttributes(*F);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
|
||||
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
|
||||
if (!TPC)
|
||||
report_fatal_error("TargetMachine is required");
|
||||
|
||||
AS = AMDGPU::getAMDGPUAS(CG.getModule());
|
||||
TM = &TPC->getTM<TargetMachine>();
|
||||
return false;
|
||||
}
|
||||
|
||||
Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
|
||||
return new AMDGPUAnnotateKernelFeatures();
|
||||
}
|
@ -1,192 +0,0 @@
|
||||
//===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This pass adds amdgpu.uniform metadata to IR values so this information
|
||||
/// can be used during instruction selection.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUIntrinsicInfo.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/Analysis/DivergenceAnalysis.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-annotate-uniform"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class AMDGPUAnnotateUniformValues : public FunctionPass,
|
||||
public InstVisitor<AMDGPUAnnotateUniformValues> {
|
||||
DivergenceAnalysis *DA;
|
||||
MemoryDependenceResults *MDR;
|
||||
LoopInfo *LI;
|
||||
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
|
||||
bool isKernelFunc;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
AMDGPUAnnotateUniformValues() :
|
||||
FunctionPass(ID) { }
|
||||
bool doInitialization(Module &M) override;
|
||||
bool runOnFunction(Function &F) override;
|
||||
StringRef getPassName() const override {
|
||||
return "AMDGPU Annotate Uniform Values";
|
||||
}
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<DivergenceAnalysis>();
|
||||
AU.addRequired<MemoryDependenceWrapperPass>();
|
||||
AU.addRequired<LoopInfoWrapperPass>();
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
void visitBranchInst(BranchInst &I);
|
||||
void visitLoadInst(LoadInst &I);
|
||||
bool isClobberedInFunction(LoadInst * Load);
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
|
||||
"Add AMDGPU uniform metadata", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
|
||||
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
|
||||
"Add AMDGPU uniform metadata", false, false)
|
||||
|
||||
char AMDGPUAnnotateUniformValues::ID = 0;
|
||||
|
||||
static void setUniformMetadata(Instruction *I) {
|
||||
I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
|
||||
}
|
||||
static void setNoClobberMetadata(Instruction *I) {
|
||||
I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
|
||||
}
|
||||
|
||||
static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) {
|
||||
for (auto I : predecessors(Root))
|
||||
if (Set.insert(I))
|
||||
DFS(I, Set);
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
|
||||
// 1. get Loop for the Load->getparent();
|
||||
// 2. if it exists, collect all the BBs from the most outer
|
||||
// loop and check for the writes. If NOT - start DFS over all preds.
|
||||
// 3. Start DFS over all preds from the most outer loop header.
|
||||
SetVector<BasicBlock *> Checklist;
|
||||
BasicBlock *Start = Load->getParent();
|
||||
Checklist.insert(Start);
|
||||
const Value *Ptr = Load->getPointerOperand();
|
||||
const Loop *L = LI->getLoopFor(Start);
|
||||
if (L) {
|
||||
const Loop *P = L;
|
||||
do {
|
||||
L = P;
|
||||
P = P->getParentLoop();
|
||||
} while (P);
|
||||
Checklist.insert(L->block_begin(), L->block_end());
|
||||
Start = L->getHeader();
|
||||
}
|
||||
|
||||
DFS(Start, Checklist);
|
||||
for (auto &BB : Checklist) {
|
||||
BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
|
||||
BasicBlock::iterator(Load) : BB->end();
|
||||
auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
|
||||
StartIt, BB, Load);
|
||||
if (Q.isClobber() || Q.isUnknown())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
|
||||
if (I.isUnconditional())
|
||||
return;
|
||||
|
||||
Value *Cond = I.getCondition();
|
||||
if (!DA->isUniform(Cond))
|
||||
return;
|
||||
|
||||
setUniformMetadata(I.getParent()->getTerminator());
|
||||
}
|
||||
|
||||
void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
|
||||
Value *Ptr = I.getPointerOperand();
|
||||
if (!DA->isUniform(Ptr))
|
||||
return;
|
||||
auto isGlobalLoad = [&](LoadInst &Load)->bool {
|
||||
return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
|
||||
};
|
||||
// We're tracking up to the Function boundaries
|
||||
// We cannot go beyond because of FunctionPass restrictions
|
||||
// Thus we can ensure that memory not clobbered for memory
|
||||
// operations that live in kernel only.
|
||||
bool NotClobbered = isKernelFunc && !isClobberedInFunction(&I);
|
||||
Instruction *PtrI = dyn_cast<Instruction>(Ptr);
|
||||
if (!PtrI && NotClobbered && isGlobalLoad(I)) {
|
||||
if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
|
||||
// Lookup for the existing GEP
|
||||
if (noClobberClones.count(Ptr)) {
|
||||
PtrI = noClobberClones[Ptr];
|
||||
} else {
|
||||
// Create GEP of the Value
|
||||
Function *F = I.getParent()->getParent();
|
||||
Value *Idx = Constant::getIntegerValue(
|
||||
Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
|
||||
// Insert GEP at the entry to make it dominate all uses
|
||||
PtrI = GetElementPtrInst::Create(
|
||||
Ptr->getType()->getPointerElementType(), Ptr,
|
||||
ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
|
||||
}
|
||||
I.replaceUsesOfWith(Ptr, PtrI);
|
||||
}
|
||||
}
|
||||
|
||||
if (PtrI) {
|
||||
setUniformMetadata(PtrI);
|
||||
if (NotClobbered)
|
||||
setNoClobberMetadata(PtrI);
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
|
||||
AMDGPUASI = AMDGPU::getAMDGPUAS(M);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
|
||||
if (skipFunction(F))
|
||||
return false;
|
||||
|
||||
DA = &getAnalysis<DivergenceAnalysis>();
|
||||
MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
|
||||
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
|
||||
|
||||
visit(F);
|
||||
noClobberClones.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
FunctionPass *
|
||||
llvm::createAMDGPUAnnotateUniformValues() {
|
||||
return new AMDGPUAnnotateUniformValues();
|
||||
}
|
@ -1,131 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUArgumentUsageInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
|
||||
|
||||
INITIALIZE_PASS(AMDGPUArgumentUsageInfo, DEBUG_TYPE,
|
||||
"Argument Register Usage Information Storage", false, true)
|
||||
|
||||
void ArgDescriptor::print(raw_ostream &OS,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
if (!isSet()) {
|
||||
OS << "<not set>\n";
|
||||
return;
|
||||
}
|
||||
|
||||
if (isRegister())
|
||||
OS << "Reg " << printReg(getRegister(), TRI) << '\n';
|
||||
else
|
||||
OS << "Stack offset " << getStackOffset() << '\n';
|
||||
}
|
||||
|
||||
char AMDGPUArgumentUsageInfo::ID = 0;
|
||||
|
||||
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
|
||||
|
||||
bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUArgumentUsageInfo::doFinalization(Module &M) {
|
||||
ArgInfoMap.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
|
||||
for (const auto &FI : ArgInfoMap) {
|
||||
OS << "Arguments for " << FI.first->getName() << '\n'
|
||||
<< " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
|
||||
<< " DispatchPtr: " << FI.second.DispatchPtr
|
||||
<< " QueuePtr: " << FI.second.QueuePtr
|
||||
<< " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
|
||||
<< " DispatchID: " << FI.second.DispatchID
|
||||
<< " FlatScratchInit: " << FI.second.FlatScratchInit
|
||||
<< " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
|
||||
<< " GridWorkgroupCountX: " << FI.second.GridWorkGroupCountX
|
||||
<< " GridWorkgroupCountY: " << FI.second.GridWorkGroupCountY
|
||||
<< " GridWorkgroupCountZ: " << FI.second.GridWorkGroupCountZ
|
||||
<< " WorkGroupIDX: " << FI.second.WorkGroupIDX
|
||||
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
|
||||
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
|
||||
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
|
||||
<< " PrivateSegmentWaveByteOffset: "
|
||||
<< FI.second.PrivateSegmentWaveByteOffset
|
||||
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
|
||||
<< " ImplicitArgPtr: " << FI.second.ImplicitArgPtr
|
||||
<< " WorkItemIDX " << FI.second.WorkItemIDX
|
||||
<< " WorkItemIDY " << FI.second.WorkItemIDY
|
||||
<< " WorkItemIDZ " << FI.second.WorkItemIDZ
|
||||
<< '\n';
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
|
||||
AMDGPUFunctionArgInfo::getPreloadedValue(
|
||||
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
||||
switch (Value) {
|
||||
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
|
||||
return std::make_pair(
|
||||
PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
|
||||
&AMDGPU::SGPR_128RegClass);
|
||||
}
|
||||
case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
|
||||
return std::make_pair(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
|
||||
return std::make_pair(WorkGroupIDX ? &WorkGroupIDX : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
|
||||
return std::make_pair(WorkGroupIDY ? &WorkGroupIDY : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
|
||||
return std::make_pair(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
|
||||
return std::make_pair(
|
||||
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
|
||||
return std::make_pair(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR:
|
||||
return std::make_pair(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::DISPATCH_ID:
|
||||
return std::make_pair(DispatchID ? &DispatchID : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
|
||||
return std::make_pair(FlatScratchInit ? &FlatScratchInit : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::DISPATCH_PTR:
|
||||
return std::make_pair(DispatchPtr ? &DispatchPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::QUEUE_PTR:
|
||||
return std::make_pair(QueuePtr ? &QueuePtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
|
||||
return std::make_pair(WorkItemIDX ? &WorkItemIDX : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
|
||||
return std::make_pair(WorkItemIDY ? &WorkItemIDY : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
|
||||
return std::make_pair(WorkItemIDZ ? &WorkItemIDZ : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
}
|
||||
llvm_unreachable("unexpected preloaded value type");
|
||||
}
|
@ -1,177 +0,0 @@
|
||||
//==- AMDGPUArgumentrUsageInfo.h - Function Arg Usage Info -------*- C++ -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class Function;
|
||||
class raw_ostream;
|
||||
class SISubtarget;
|
||||
class TargetMachine;
|
||||
class TargetRegisterClass;
|
||||
class TargetRegisterInfo;
|
||||
|
||||
struct ArgDescriptor {
|
||||
private:
|
||||
friend struct AMDGPUFunctionArgInfo;
|
||||
friend class AMDGPUArgumentUsageInfo;
|
||||
|
||||
union {
|
||||
unsigned Register;
|
||||
unsigned StackOffset;
|
||||
};
|
||||
|
||||
bool IsStack : 1;
|
||||
bool IsSet : 1;
|
||||
|
||||
ArgDescriptor(unsigned Val = 0, bool IsStack = false, bool IsSet = false)
|
||||
: Register(Val), IsStack(IsStack), IsSet(IsSet) {}
|
||||
public:
|
||||
static ArgDescriptor createRegister(unsigned Reg) {
|
||||
return ArgDescriptor(Reg, false, true);
|
||||
}
|
||||
|
||||
static ArgDescriptor createStack(unsigned Reg) {
|
||||
return ArgDescriptor(Reg, true, true);
|
||||
}
|
||||
|
||||
bool isSet() const {
|
||||
return IsSet;
|
||||
}
|
||||
|
||||
explicit operator bool() const {
|
||||
return isSet();
|
||||
}
|
||||
|
||||
bool isRegister() const {
|
||||
return !IsStack;
|
||||
}
|
||||
|
||||
unsigned getRegister() const {
|
||||
assert(!IsStack);
|
||||
return Register;
|
||||
}
|
||||
|
||||
unsigned getStackOffset() const {
|
||||
assert(IsStack);
|
||||
return StackOffset;
|
||||
}
|
||||
|
||||
void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const;
|
||||
};
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {
|
||||
Arg.print(OS);
|
||||
return OS;
|
||||
}
|
||||
|
||||
struct AMDGPUFunctionArgInfo {
|
||||
enum PreloadedValue {
|
||||
// SGPRS:
|
||||
PRIVATE_SEGMENT_BUFFER = 0,
|
||||
DISPATCH_PTR = 1,
|
||||
QUEUE_PTR = 2,
|
||||
KERNARG_SEGMENT_PTR = 3,
|
||||
DISPATCH_ID = 4,
|
||||
FLAT_SCRATCH_INIT = 5,
|
||||
WORKGROUP_ID_X = 10,
|
||||
WORKGROUP_ID_Y = 11,
|
||||
WORKGROUP_ID_Z = 12,
|
||||
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
|
||||
IMPLICIT_BUFFER_PTR = 15,
|
||||
IMPLICIT_ARG_PTR = 16,
|
||||
|
||||
// VGPRS:
|
||||
WORKITEM_ID_X = 17,
|
||||
WORKITEM_ID_Y = 18,
|
||||
WORKITEM_ID_Z = 19,
|
||||
FIRST_VGPR_VALUE = WORKITEM_ID_X
|
||||
};
|
||||
|
||||
// Kernel input registers setup for the HSA ABI in allocation order.
|
||||
|
||||
// User SGPRs in kernels
|
||||
// XXX - Can these require argument spills?
|
||||
ArgDescriptor PrivateSegmentBuffer;
|
||||
ArgDescriptor DispatchPtr;
|
||||
ArgDescriptor QueuePtr;
|
||||
ArgDescriptor KernargSegmentPtr;
|
||||
ArgDescriptor DispatchID;
|
||||
ArgDescriptor FlatScratchInit;
|
||||
ArgDescriptor PrivateSegmentSize;
|
||||
ArgDescriptor GridWorkGroupCountX;
|
||||
ArgDescriptor GridWorkGroupCountY;
|
||||
ArgDescriptor GridWorkGroupCountZ;
|
||||
|
||||
// System SGPRs in kernels.
|
||||
ArgDescriptor WorkGroupIDX;
|
||||
ArgDescriptor WorkGroupIDY;
|
||||
ArgDescriptor WorkGroupIDZ;
|
||||
ArgDescriptor WorkGroupInfo;
|
||||
ArgDescriptor PrivateSegmentWaveByteOffset;
|
||||
|
||||
// Pointer with offset from kernargsegmentptr to where special ABI arguments
|
||||
// are passed to callable functions.
|
||||
ArgDescriptor ImplicitArgPtr;
|
||||
|
||||
// Input registers for non-HSA ABI
|
||||
ArgDescriptor ImplicitBufferPtr = 0;
|
||||
|
||||
// VGPRs inputs. These are always v0, v1 and v2 for entry functions.
|
||||
ArgDescriptor WorkItemIDX;
|
||||
ArgDescriptor WorkItemIDY;
|
||||
ArgDescriptor WorkItemIDZ;
|
||||
|
||||
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
|
||||
getPreloadedValue(PreloadedValue Value) const;
|
||||
};
|
||||
|
||||
class AMDGPUArgumentUsageInfo : public ImmutablePass {
|
||||
private:
|
||||
static const AMDGPUFunctionArgInfo ExternFunctionInfo;
|
||||
DenseMap<const Function *, AMDGPUFunctionArgInfo> ArgInfoMap;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AMDGPUArgumentUsageInfo() : ImmutablePass(ID) { }
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
bool doInitialization(Module &M) override;
|
||||
bool doFinalization(Module &M) override;
|
||||
|
||||
void print(raw_ostream &OS, const Module *M = nullptr) const override;
|
||||
|
||||
void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) {
|
||||
ArgInfoMap[&F] = ArgInfo;
|
||||
}
|
||||
|
||||
const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const {
|
||||
auto I = ArgInfoMap.find(&F);
|
||||
if (I == ArgInfoMap.end()) {
|
||||
assert(F.isDeclaration());
|
||||
return ExternFunctionInfo;
|
||||
}
|
||||
|
||||
return I->second;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
1278
external/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
vendored
1278
external/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
vendored
File diff suppressed because it is too large
Load Diff
207
external/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
vendored
207
external/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
vendored
@ -1,207 +0,0 @@
|
||||
//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief AMDGPU Assembly printer class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDKernelCodeT.h"
|
||||
#include "MCTargetDesc/AMDGPUHSAMetadataStreamer.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetStreamer;
|
||||
class MCOperand;
|
||||
class SISubtarget;
|
||||
|
||||
class AMDGPUAsmPrinter final : public AsmPrinter {
|
||||
private:
|
||||
// Track resource usage for callee functions.
|
||||
struct SIFunctionResourceInfo {
|
||||
// Track the number of explicitly used VGPRs. Special registers reserved at
|
||||
// the end are tracked separately.
|
||||
int32_t NumVGPR = 0;
|
||||
int32_t NumExplicitSGPR = 0;
|
||||
uint64_t PrivateSegmentSize = 0;
|
||||
bool UsesVCC = false;
|
||||
bool UsesFlatScratch = false;
|
||||
bool HasDynamicallySizedStack = false;
|
||||
bool HasRecursion = false;
|
||||
|
||||
int32_t getTotalNumSGPRs(const SISubtarget &ST) const;
|
||||
};
|
||||
|
||||
// Track resource usage for kernels / entry functions.
|
||||
struct SIProgramInfo {
|
||||
// Fields set in PGM_RSRC1 pm4 packet.
|
||||
uint32_t VGPRBlocks = 0;
|
||||
uint32_t SGPRBlocks = 0;
|
||||
uint32_t Priority = 0;
|
||||
uint32_t FloatMode = 0;
|
||||
uint32_t Priv = 0;
|
||||
uint32_t DX10Clamp = 0;
|
||||
uint32_t DebugMode = 0;
|
||||
uint32_t IEEEMode = 0;
|
||||
uint64_t ScratchSize = 0;
|
||||
|
||||
uint64_t ComputePGMRSrc1 = 0;
|
||||
|
||||
// Fields set in PGM_RSRC2 pm4 packet.
|
||||
uint32_t LDSBlocks = 0;
|
||||
uint32_t ScratchBlocks = 0;
|
||||
|
||||
uint64_t ComputePGMRSrc2 = 0;
|
||||
|
||||
uint32_t NumVGPR = 0;
|
||||
uint32_t NumSGPR = 0;
|
||||
uint32_t LDSSize = 0;
|
||||
bool FlatUsed = false;
|
||||
|
||||
// Number of SGPRs that meets number of waves per execution unit request.
|
||||
uint32_t NumSGPRsForWavesPerEU = 0;
|
||||
|
||||
// Number of VGPRs that meets number of waves per execution unit request.
|
||||
uint32_t NumVGPRsForWavesPerEU = 0;
|
||||
|
||||
// If ReservedVGPRCount is 0 then must be 0. Otherwise, this is the first
|
||||
// fixed VGPR number reserved.
|
||||
uint16_t ReservedVGPRFirst = 0;
|
||||
|
||||
// The number of consecutive VGPRs reserved.
|
||||
uint16_t ReservedVGPRCount = 0;
|
||||
|
||||
// Fixed SGPR number used to hold wave scratch offset for entire kernel
|
||||
// execution, or std::numeric_limits<uint16_t>::max() if the register is not
|
||||
// used or not known.
|
||||
uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
|
||||
std::numeric_limits<uint16_t>::max();
|
||||
|
||||
// Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
|
||||
// kernel execution, or std::numeric_limits<uint16_t>::max() if the register
|
||||
// is not used or not known.
|
||||
uint16_t DebuggerPrivateSegmentBufferSGPR =
|
||||
std::numeric_limits<uint16_t>::max();
|
||||
|
||||
// Whether there is recursion, dynamic allocas, indirect calls or some other
|
||||
// reason there may be statically unknown stack usage.
|
||||
bool DynamicCallStack = false;
|
||||
|
||||
// Bonus information for debugging.
|
||||
bool VCCUsed = false;
|
||||
|
||||
SIProgramInfo() = default;
|
||||
};
|
||||
|
||||
SIProgramInfo CurrentProgramInfo;
|
||||
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
|
||||
|
||||
AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream;
|
||||
std::map<uint32_t, uint32_t> PALMetadataMap;
|
||||
|
||||
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
|
||||
SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const;
|
||||
|
||||
void readPALMetadata(Module &M);
|
||||
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
|
||||
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
|
||||
const MachineFunction &MF) const;
|
||||
void findNumUsedRegistersSI(const MachineFunction &MF,
|
||||
unsigned &NumSGPR,
|
||||
unsigned &NumVGPR) const;
|
||||
|
||||
AMDGPU::HSAMD::Kernel::CodeProps::Metadata getHSACodeProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const;
|
||||
AMDGPU::HSAMD::Kernel::DebugProps::Metadata getHSADebugProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const;
|
||||
|
||||
/// \brief Emit register usage information so that the GPU driver
|
||||
/// can correctly setup the GPU state.
|
||||
void EmitProgramInfoR600(const MachineFunction &MF);
|
||||
void EmitProgramInfoSI(const MachineFunction &MF,
|
||||
const SIProgramInfo &KernelInfo);
|
||||
void EmitPALMetadata(const MachineFunction &MF,
|
||||
const SIProgramInfo &KernelInfo);
|
||||
void emitCommonFunctionComments(uint32_t NumVGPR,
|
||||
uint32_t NumSGPR,
|
||||
uint64_t ScratchSize,
|
||||
uint64_t CodeSize);
|
||||
|
||||
public:
|
||||
explicit AMDGPUAsmPrinter(TargetMachine &TM,
|
||||
std::unique_ptr<MCStreamer> Streamer);
|
||||
|
||||
StringRef getPassName() const override;
|
||||
|
||||
const MCSubtargetInfo* getSTI() const;
|
||||
|
||||
AMDGPUTargetStreamer* getTargetStreamer() const;
|
||||
|
||||
bool doFinalization(Module &M) override;
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
/// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated
|
||||
/// pseudo lowering.
|
||||
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
|
||||
|
||||
/// \brief Lower the specified LLVM Constant to an MCExpr.
|
||||
/// The AsmPrinter::lowerConstantof does not know how to lower
|
||||
/// addrspacecast, therefore they should be lowered by this function.
|
||||
const MCExpr *lowerConstant(const Constant *CV) override;
|
||||
|
||||
/// \brief tblgen'erated driver function for lowering simple MI->MC pseudo
|
||||
/// instructions.
|
||||
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
|
||||
const MachineInstr *MI);
|
||||
|
||||
/// Implemented in AMDGPUMCInstLower.cpp
|
||||
void EmitInstruction(const MachineInstr *MI) override;
|
||||
|
||||
void EmitFunctionBodyStart() override;
|
||||
|
||||
void EmitFunctionEntryLabel() override;
|
||||
|
||||
void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override;
|
||||
|
||||
void EmitGlobalVariable(const GlobalVariable *GV) override;
|
||||
|
||||
void EmitStartOfAsmFile(Module &M) override;
|
||||
|
||||
void EmitEndOfAsmFile(Module &M) override;
|
||||
|
||||
bool isBlockOnlyReachableByFallthrough(
|
||||
const MachineBasicBlock *MBB) const override;
|
||||
|
||||
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
unsigned AsmVariant, const char *ExtraCode,
|
||||
raw_ostream &O) override;
|
||||
|
||||
protected:
|
||||
mutable std::vector<std::string> DisasmLines, HexLines;
|
||||
mutable size_t DisasmLineMaxLen;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
|
@ -1,184 +0,0 @@
|
||||
//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// This file implements the lowering of LLVM calls to machine code calls for
|
||||
/// GlobalISel.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUCallLowering.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUISelLowering.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "SIISelLowering.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
|
||||
: CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
|
||||
}
|
||||
|
||||
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
|
||||
const Value *Val, unsigned VReg) const {
|
||||
MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
|
||||
Type *ParamTy,
|
||||
unsigned Offset) const {
|
||||
|
||||
MachineFunction &MF = MIRBuilder.getMF();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const Function &F = MF.getFunction();
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
|
||||
LLT PtrType = getLLTForType(*PtrTy, DL);
|
||||
unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
|
||||
unsigned KernArgSegmentPtr =
|
||||
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
|
||||
unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
|
||||
|
||||
unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
|
||||
MIRBuilder.buildConstant(OffsetReg, Offset);
|
||||
|
||||
MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
|
||||
|
||||
return DstReg;
|
||||
}
|
||||
|
||||
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
|
||||
Type *ParamTy, unsigned Offset,
|
||||
unsigned DstReg) const {
|
||||
MachineFunction &MF = MIRBuilder.getMF();
|
||||
const Function &F = MF.getFunction();
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
|
||||
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
|
||||
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
|
||||
unsigned Align = DL.getABITypeAlignment(ParamTy);
|
||||
unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
|
||||
|
||||
MachineMemOperand *MMO =
|
||||
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MONonTemporal |
|
||||
MachineMemOperand::MOInvariant,
|
||||
TypeSize, Align);
|
||||
|
||||
MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
|
||||
}
|
||||
|
||||
bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
|
||||
const Function &F,
|
||||
ArrayRef<unsigned> VRegs) const {
|
||||
|
||||
MachineFunction &MF = MIRBuilder.getMF();
|
||||
const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget());
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
|
||||
|
||||
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
|
||||
if (Info->hasPrivateSegmentBuffer()) {
|
||||
unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
|
||||
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
|
||||
CCInfo.AllocateReg(PrivateSegmentBufferReg);
|
||||
}
|
||||
|
||||
if (Info->hasDispatchPtr()) {
|
||||
unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
|
||||
// FIXME: Need to add reg as live-in
|
||||
CCInfo.AllocateReg(DispatchPtrReg);
|
||||
}
|
||||
|
||||
if (Info->hasQueuePtr()) {
|
||||
unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
|
||||
// FIXME: Need to add reg as live-in
|
||||
CCInfo.AllocateReg(QueuePtrReg);
|
||||
}
|
||||
|
||||
if (Info->hasKernargSegmentPtr()) {
|
||||
unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
|
||||
const LLT P2 = LLT::pointer(2, 64);
|
||||
unsigned VReg = MRI.createGenericVirtualRegister(P2);
|
||||
MRI.addLiveIn(InputPtrReg, VReg);
|
||||
MIRBuilder.getMBB().addLiveIn(InputPtrReg);
|
||||
MIRBuilder.buildCopy(VReg, InputPtrReg);
|
||||
CCInfo.AllocateReg(InputPtrReg);
|
||||
}
|
||||
|
||||
if (Info->hasDispatchID()) {
|
||||
unsigned DispatchIDReg = Info->addDispatchID(*TRI);
|
||||
// FIXME: Need to add reg as live-in
|
||||
CCInfo.AllocateReg(DispatchIDReg);
|
||||
}
|
||||
|
||||
if (Info->hasFlatScratchInit()) {
|
||||
unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
|
||||
// FIXME: Need to add reg as live-in
|
||||
CCInfo.AllocateReg(FlatScratchInitReg);
|
||||
}
|
||||
|
||||
unsigned NumArgs = F.arg_size();
|
||||
Function::const_arg_iterator CurOrigArg = F.arg_begin();
|
||||
const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
|
||||
for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
|
||||
EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
|
||||
|
||||
// We can only hanlde simple value types at the moment.
|
||||
if (!ValEVT.isSimple())
|
||||
return false;
|
||||
MVT ValVT = ValEVT.getSimpleVT();
|
||||
ISD::ArgFlagsTy Flags;
|
||||
ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
|
||||
setArgFlags(OrigArg, i + 1, DL, F);
|
||||
Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
|
||||
CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
|
||||
/*IsVarArg=*/false);
|
||||
bool Res =
|
||||
AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
|
||||
|
||||
// Fail if we don't know how to handle this type.
|
||||
if (Res)
|
||||
return false;
|
||||
}
|
||||
|
||||
Function::const_arg_iterator Arg = F.arg_begin();
|
||||
|
||||
if (F.getCallingConv() == CallingConv::AMDGPU_VS) {
|
||||
for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
MRI.addLiveIn(VA.getLocReg(), VRegs[i]);
|
||||
MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
|
||||
MIRBuilder.buildCopy(VRegs[i], VA.getLocReg());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
|
||||
// FIXME: We should be getting DebugInfo from the arguments some how.
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
lowerParameter(MIRBuilder, Arg->getType(),
|
||||
VA.getLocMemOffset() +
|
||||
Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
//===- lib/Target/AMDGPU/AMDGPUCallLowering.h - Call lowering -*- C++ -*---===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// This file describes how to lower LLVM calls to machine code calls.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetLowering;
|
||||
|
||||
class AMDGPUCallLowering: public CallLowering {
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
|
||||
unsigned Offset) const;
|
||||
|
||||
void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
|
||||
unsigned Offset, unsigned DstReg) const;
|
||||
|
||||
public:
|
||||
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
|
||||
|
||||
bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
|
||||
unsigned VReg) const override;
|
||||
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
|
||||
ArrayRef<unsigned> VRegs) const override;
|
||||
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
|
||||
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
|
||||
};
|
||||
} // End of namespace llvm;
|
||||
#endif
|
173
external/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
vendored
173
external/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
vendored
@ -1,173 +0,0 @@
|
||||
//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This describes the calling conventions for the AMD Radeon GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Inversion of CCIfInReg
|
||||
class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
|
||||
class CCIfExtend<CCAction A>
|
||||
: CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
|
||||
|
||||
// Calling convention for SI
|
||||
def CC_SI : CallingConv<[
|
||||
|
||||
CCIfInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
|
||||
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
|
||||
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
|
||||
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
|
||||
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
|
||||
SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
|
||||
]>>>,
|
||||
|
||||
// We have no way of referring to the generated register tuples
|
||||
// here, so use a custom function.
|
||||
CCIfInReg<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
|
||||
CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
|
||||
|
||||
// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
|
||||
CCIfNotInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
|
||||
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
|
||||
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
|
||||
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
|
||||
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
|
||||
VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
|
||||
VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
|
||||
VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
|
||||
VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
|
||||
VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
|
||||
VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
|
||||
VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
|
||||
VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
|
||||
VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
|
||||
VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
|
||||
VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
|
||||
VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
|
||||
VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
|
||||
]>>>
|
||||
]>;
|
||||
|
||||
def RetCC_SI_Shader : CallingConv<[
|
||||
CCIfType<[i32] , CCAssignToReg<[
|
||||
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
|
||||
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
|
||||
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
|
||||
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
|
||||
SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
|
||||
]>>,
|
||||
|
||||
// 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
|
||||
CCIfType<[f32, f16] , CCAssignToReg<[
|
||||
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
|
||||
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
|
||||
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
|
||||
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
|
||||
VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
|
||||
VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
|
||||
VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
|
||||
VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
|
||||
VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
|
||||
VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
|
||||
VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
|
||||
VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
|
||||
VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
|
||||
VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
|
||||
VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
|
||||
VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
|
||||
VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
|
||||
]>>
|
||||
]>;
|
||||
|
||||
// Calling convention for R600
|
||||
def CC_R600 : CallingConv<[
|
||||
CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
|
||||
T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
|
||||
T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
|
||||
T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
|
||||
T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
|
||||
T30_XYZW, T31_XYZW, T32_XYZW
|
||||
]>>>
|
||||
]>;
|
||||
|
||||
// Calling convention for compute kernels
|
||||
def CC_AMDGPU_Kernel : CallingConv<[
|
||||
CCCustom<"allocateKernArg">
|
||||
]>;
|
||||
|
||||
def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs<
|
||||
(sequence "VGPR%u", 24, 255)
|
||||
>;
|
||||
|
||||
def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<
|
||||
(sequence "VGPR%u", 32, 255)
|
||||
>;
|
||||
|
||||
def CSR_AMDGPU_SGPRs_32_103 : CalleeSavedRegs<
|
||||
(sequence "SGPR%u", 32, 103)
|
||||
>;
|
||||
|
||||
def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
|
||||
(add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_103)
|
||||
>;
|
||||
|
||||
// Calling convention for leaf functions
|
||||
def CC_AMDGPU_Func : CallingConv<[
|
||||
CCIfByVal<CCPassByVal<4, 4>>,
|
||||
CCIfType<[i1], CCPromoteToType<i32>>,
|
||||
CCIfType<[i1, i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
|
||||
CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
|
||||
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
|
||||
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
|
||||
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
|
||||
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
|
||||
CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>,
|
||||
CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
|
||||
CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
|
||||
CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
|
||||
CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
|
||||
CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
|
||||
]>;
|
||||
|
||||
// Calling convention for leaf functions
|
||||
def RetCC_AMDGPU_Func : CallingConv<[
|
||||
CCIfType<[i1], CCPromoteToType<i32>>,
|
||||
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
|
||||
CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[
|
||||
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
|
||||
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
|
||||
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
|
||||
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
|
||||
CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>
|
||||
]>;
|
||||
|
||||
def CC_AMDGPU : CallingConv<[
|
||||
CCIf<"static_cast<const AMDGPUSubtarget&>"
|
||||
"(State.getMachineFunction().getSubtarget()).getGeneration() >="
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS && "
|
||||
"!AMDGPU::isShader(State.getCallingConv())",
|
||||
CCDelegateTo<CC_AMDGPU_Kernel>>,
|
||||
CCIf<"static_cast<const AMDGPUSubtarget&>"
|
||||
"(State.getMachineFunction().getSubtarget()).getGeneration() < "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS && "
|
||||
"!AMDGPU::isShader(State.getCallingConv())",
|
||||
CCDelegateTo<CC_AMDGPU_Kernel>>,
|
||||
CCIf<"static_cast<const AMDGPUSubtarget&>"
|
||||
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS",
|
||||
CCDelegateTo<CC_SI>>,
|
||||
CCIf<"static_cast<const AMDGPUSubtarget&>"
|
||||
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
|
||||
CCDelegateTo<CC_AMDGPU_Func>>,
|
||||
CCIf<"static_cast<const AMDGPUSubtarget&>"
|
||||
"(State.getMachineFunction().getSubtarget()).getGeneration() < "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS",
|
||||
CCDelegateTo<CC_R600>>
|
||||
]>;
|
File diff suppressed because it is too large
Load Diff
@ -1,66 +0,0 @@
|
||||
//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface to describe a layout of a stack frame on a AMDGPU target machine.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUFrameLowering.h"
|
||||
|
||||
using namespace llvm;
|
||||
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
|
||||
int LAO, unsigned TransAl)
|
||||
: TargetFrameLowering(D, StackAl, LAO, TransAl) { }
|
||||
|
||||
AMDGPUFrameLowering::~AMDGPUFrameLowering() = default;
|
||||
|
||||
unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
|
||||
// XXX: Hardcoding to 1 for now.
|
||||
//
|
||||
// I think the StackWidth should stored as metadata associated with the
|
||||
// MachineFunction. This metadata can either be added by a frontend, or
|
||||
// calculated by a R600 specific LLVM IR pass.
|
||||
//
|
||||
// The StackWidth determines how stack objects are laid out in memory.
|
||||
// For a vector stack variable, like: int4 stack[2], the data will be stored
|
||||
// in the following ways depending on the StackWidth.
|
||||
//
|
||||
// StackWidth = 1:
|
||||
//
|
||||
// T0.X = stack[0].x
|
||||
// T1.X = stack[0].y
|
||||
// T2.X = stack[0].z
|
||||
// T3.X = stack[0].w
|
||||
// T4.X = stack[1].x
|
||||
// T5.X = stack[1].y
|
||||
// T6.X = stack[1].z
|
||||
// T7.X = stack[1].w
|
||||
//
|
||||
// StackWidth = 2:
|
||||
//
|
||||
// T0.X = stack[0].x
|
||||
// T0.Y = stack[0].y
|
||||
// T1.X = stack[0].z
|
||||
// T1.Y = stack[0].w
|
||||
// T2.X = stack[1].x
|
||||
// T2.Y = stack[1].y
|
||||
// T3.X = stack[1].z
|
||||
// T3.Y = stack[1].w
|
||||
//
|
||||
// StackWidth = 4:
|
||||
// T0.X = stack[0].x
|
||||
// T0.Y = stack[0].y
|
||||
// T0.Z = stack[0].z
|
||||
// T0.W = stack[0].w
|
||||
// T1.X = stack[1].x
|
||||
// T1.Y = stack[1].y
|
||||
// T1.Z = stack[1].z
|
||||
// T1.W = stack[1].w
|
||||
return 1;
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief Interface to describe a layout of a stack frame on an AMDGPU target.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
|
||||
|
||||
#include "llvm/CodeGen/TargetFrameLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// \brief Information about the stack frame layout on the AMDGPU targets.
|
||||
///
|
||||
/// It holds the direction of the stack growth, the known stack alignment on
|
||||
/// entry to each function, and the offset to the locals area.
|
||||
/// See TargetFrameInfo for more comments.
|
||||
class AMDGPUFrameLowering : public TargetFrameLowering {
|
||||
public:
|
||||
AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
|
||||
unsigned TransAl = 1);
|
||||
~AMDGPUFrameLowering() override;
|
||||
|
||||
/// \returns The number of 32-bit sub-registers that are used when storing
|
||||
/// values to the stack.
|
||||
unsigned getStackWidth(const MachineFunction &MF) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
|
@ -1,58 +0,0 @@
|
||||
//===- AMDGPUGenRegisterBankInfo.def -----------------------------*- C++ -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
/// This file defines all the static objects used by AMDGPURegisterBankInfo.
|
||||
/// \todo This should be generated by TableGen.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
|
||||
enum PartialMappingIdx {
|
||||
None = - 1,
|
||||
PM_SGPR32 = 0,
|
||||
PM_SGPR64 = 1,
|
||||
PM_VGPR32 = 2,
|
||||
PM_VGPR64 = 3
|
||||
};
|
||||
|
||||
const RegisterBankInfo::PartialMapping PartMappings[] {
|
||||
// StartIdx, Length, RegBank
|
||||
{0, 32, SGPRRegBank},
|
||||
{0, 64, SGPRRegBank},
|
||||
{0, 32, VGPRRegBank},
|
||||
{0, 64, VGPRRegBank}
|
||||
};
|
||||
|
||||
const RegisterBankInfo::ValueMapping ValMappings[] {
|
||||
// SGPR 32-bit
|
||||
{&PartMappings[0], 1},
|
||||
// SGPR 64-bit
|
||||
{&PartMappings[1], 1},
|
||||
// VGPR 32-bit
|
||||
{&PartMappings[2], 1},
|
||||
// VGPR 64-bit
|
||||
{&PartMappings[3], 1}
|
||||
};
|
||||
|
||||
enum ValueMappingIdx {
|
||||
SGPRStartIdx = 0,
|
||||
VGPRStartIdx = 2
|
||||
};
|
||||
|
||||
const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
|
||||
unsigned Size) {
|
||||
assert(Size % 32 == 0);
|
||||
unsigned Idx = BankID == AMDGPU::SGPRRegBankID ? SGPRStartIdx : VGPRStartIdx;
|
||||
Idx += (Size / 32) - 1;
|
||||
return &ValMappings[Idx];
|
||||
}
|
||||
|
||||
} // End AMDGPU namespace.
|
||||
} // End llvm namespace.
|
2191
external/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
vendored
2191
external/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
vendored
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
||||
21192a2c1cc8bc9b4ce1bbcff6967b8362efc094
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user