You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
1741 lines
65 KiB
C++
1741 lines
65 KiB
C++
// Copyright 1998-2016 Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "VectorVMPrivate.h"
|
|
#include "CurveVector.h"
|
|
#include "VectorVMDataObject.h"
|
|
#include "ModuleManager.h"
|
|
|
|
IMPLEMENT_MODULE(FDefaultModuleImpl, VectorVM);
|
|
|
|
DEFINE_LOG_CATEGORY_STATIC(LogVectorVM, All, All);
|
|
|
|
//#define VM_FORCEINLINE
|
|
#define VM_FORCEINLINE FORCEINLINE
|
|
|
|
#define OP_REGISTER (0)
|
|
#define OP0_CONST (1 << 0)
|
|
#define OP1_CONST (1 << 1)
|
|
#define OP2_CONST (1 << 2)
|
|
#define OP3_CONST (1 << 3)
|
|
#define OP0_DATAOBJ (1 << 4)
|
|
#define OP1_DATAOBJ (1 << 5)
|
|
#define OP2_DATAOBJ (1 << 6)
|
|
#define OP3_DATAOBJ (1 << 7)
|
|
|
|
#define SRCOP_RRRR (OP_REGISTER | OP_REGISTER | OP_REGISTER | OP_REGISTER)
|
|
#define SRCOP_RRRC (OP_REGISTER | OP_REGISTER | OP_REGISTER | OP0_CONST)
|
|
#define SRCOP_RRCR (OP_REGISTER | OP_REGISTER | OP1_CONST | OP_REGISTER)
|
|
#define SRCOP_RRCC (OP_REGISTER | OP_REGISTER | OP1_CONST | OP0_CONST)
|
|
#define SRCOP_RCRR (OP_REGISTER | OP2_CONST | OP_REGISTER | OP_REGISTER)
|
|
#define SRCOP_RCRC (OP_REGISTER | OP2_CONST | OP_REGISTER | OP0_CONST)
|
|
#define SRCOP_RCCR (OP_REGISTER | OP2_CONST | OP1_CONST | OP_REGISTER)
|
|
#define SRCOP_RCCC (OP_REGISTER | OP2_CONST | OP1_CONST | OP0_CONST)
|
|
#define SRCOP_CRRR (OP3_CONST | OP_REGISTER | OP_REGISTER | OP_REGISTER)
|
|
#define SRCOP_CRRC (OP3_CONST | OP_REGISTER | OP_REGISTER | OP0_CONST)
|
|
#define SRCOP_CRCR (OP3_CONST | OP_REGISTER | OP1_CONST | OP_REGISTER)
|
|
#define SRCOP_CRCC (OP3_CONST | OP_REGISTER | OP1_CONST | OP0_CONST)
|
|
#define SRCOP_CCRR (OP3_CONST | OP2_CONST | OP_REGISTER | OP_REGISTER)
|
|
#define SRCOP_CCRC (OP3_CONST | OP2_CONST | OP_REGISTER | OP0_CONST)
|
|
#define SRCOP_CCCR (OP3_CONST | OP2_CONST | OP1_CONST | OP_REGISTER)
|
|
#define SRCOP_CCCC (OP3_CONST | OP2_CONST | OP1_CONST | OP0_CONST)
|
|
|
|
#define SRCOP_RRRB (OP_REGISTER | OP_REGISTER | OP_REGISTER | OP0_DATAOBJ)
|
|
#define SRCOP_RRBR (OP_REGISTER | OP_REGISTER | OP1_DATAOBJ | OP_REGISTER)
|
|
#define SRCOP_RRBB (OP_REGISTER | OP_REGISTER | OP1_DATAOBJ | OP0_DATAOBJ)
|
|
|
|
#define SRCOP_RRCB (OP_REGISTER | OP_REGISTER | OP1_CONST | OP0_DATAOBJ)
|
|
|
|
uint8 VectorVM::CreateSrcOperandMask(EVectorVMOperandLocation Type1, EVectorVMOperandLocation Type2, EVectorVMOperandLocation Type3, EVectorVMOperandLocation Type4)
|
|
{
|
|
return (Type1 == EVectorVMOperandLocation::Constant ? OP0_CONST : OP_REGISTER) |
|
|
(Type2 == EVectorVMOperandLocation::Constant ? OP1_CONST : OP_REGISTER) |
|
|
(Type3 == EVectorVMOperandLocation::Constant ? OP2_CONST : OP_REGISTER) |
|
|
(Type4 == EVectorVMOperandLocation::Constant ? OP3_CONST : OP_REGISTER) |
|
|
(Type1 == EVectorVMOperandLocation::DataObjConstant ? OP0_DATAOBJ : OP_REGISTER) |
|
|
(Type2 == EVectorVMOperandLocation::DataObjConstant ? OP1_DATAOBJ : OP_REGISTER) |
|
|
(Type3 == EVectorVMOperandLocation::DataObjConstant ? OP2_DATAOBJ : OP_REGISTER) |
|
|
(Type4 == EVectorVMOperandLocation::DataObjConstant ? OP3_DATAOBJ : OP_REGISTER);
|
|
}
|
|
|
|
|
|
UNiagaraDataObject::UNiagaraDataObject(const FObjectInitializer& ObjectInitializer)
|
|
: Super(ObjectInitializer)
|
|
{
|
|
}
|
|
|
|
UNiagaraCurveDataObject::UNiagaraCurveDataObject(const FObjectInitializer& ObjectInitializer)
|
|
: Super(ObjectInitializer), CurveObj(nullptr)
|
|
{
|
|
}
|
|
|
|
FVector4 UNiagaraCurveDataObject::Sample(const FVector4& InCoords) const
|
|
{
|
|
FVector Vec = CurveObj->GetVectorValue(InCoords.X);
|
|
return FVector4(Vec, 0.0f);
|
|
}
|
|
|
|
UNiagaraSparseVolumeDataObject::UNiagaraSparseVolumeDataObject(const FObjectInitializer& ObjectInitializer)
|
|
: Super(ObjectInitializer)
|
|
{
|
|
Size = 64;
|
|
NumBuckets = Size*Size*Size;
|
|
//Data.AddZeroed(NumBuckets);
|
|
Data.Init(FVector4(0.1f, 0.1f, 0.1f, 0.1f), NumBuckets);
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
#if WITH_EDITOR
|
|
TArray<FString> OpNames;
|
|
TArray<FString> OperandLocationNames;
|
|
#endif
|
|
|
|
#if ENABLE_VM_DEBUGGING
|
|
FVectorVMDebuggerImpl* AttachedDebugger = NULL;
|
|
#endif
|
|
|
|
/**
|
|
* Context information passed around during VM execution.
|
|
*/
|
|
struct FVectorVMContext
|
|
{
|
|
/** Pointer to the next element in the byte code. */
|
|
uint8 const* RESTRICT Code;
|
|
/** Pointer to the table of vector register arrays. */
|
|
VectorRegister* RESTRICT * RESTRICT RegisterTable;
|
|
/** Pointer to the constant table. */
|
|
FVector4 const* RESTRICT ConstantTable;
|
|
/** Pointer to the data object constant table. */
|
|
UNiagaraDataObject * RESTRICT *DataObjConstantTable;
|
|
/** Pointer to the shared data table. */
|
|
FVectorVMSharedDataView* RESTRICT SharedDataTable;
|
|
|
|
/** The number of vectors to process. */
|
|
int32 NumVectors;
|
|
|
|
/** The number of instances to process. */
|
|
int32 NumInstances;
|
|
|
|
/** The Operation currently executing. */
|
|
EVectorVMOp CurrOp;
|
|
|
|
/** The instance we're currently starting at. Advances with each chunk processed. */
|
|
int32 StartInstance;
|
|
/** Initialization constructor. */
|
|
FVectorVMContext(
|
|
const uint8* InCode,
|
|
VectorRegister** InRegisterTable,
|
|
const FVector4* InConstantTable,
|
|
UNiagaraDataObject** InDataObjTable,
|
|
FVectorVMSharedDataView* InSharedDataTable,
|
|
int32 InNumVectors,
|
|
int32 InNumInstances,
|
|
int32 InStartInstance
|
|
)
|
|
: Code(InCode)
|
|
, RegisterTable(InRegisterTable)
|
|
, ConstantTable(InConstantTable)
|
|
, DataObjConstantTable(InDataObjTable)
|
|
, SharedDataTable(InSharedDataTable)
|
|
, NumVectors(InNumVectors)
|
|
, NumInstances(InNumInstances)
|
|
, CurrOp(EVectorVMOp::done)
|
|
, StartInstance(InStartInstance)
|
|
{
|
|
}
|
|
|
|
FORCEINLINE bool IsDebugging()
|
|
{
|
|
#if ENABLE_VM_DEBUGGING
|
|
return AttachedDebugger != nullptr;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
FORCEINLINE void SetOp(EVectorVMOp InOp){ CurrOp = InOp; }
|
|
|
|
#if ENABLE_VM_DEBUGGING
|
|
FORCEINLINE void BeginOp(VectorVM::EVMType InType, int32 InNumArgs, int32 InNumInstancesPerOp)
|
|
{
|
|
if (AttachedDebugger)
|
|
{
|
|
AttachedDebugger->BeginOp(*this, InType, InNumArgs, InNumInstancesPerOp);
|
|
}
|
|
}
|
|
|
|
template<typename DstHandler, typename Arg0Handler, typename Arg1Handle, typename Arg2Handler, typename Arg3Handle>
|
|
FORCEINLINE void PreOp(DstHandler& Dst, Arg0Handler& Arg0, Arg1Handler& Arg1 = DummyHandler, Arg2Handler& Arg2 = DummyHandler, Arg3Handler& Arg3 = DummyHandler)
|
|
{
|
|
if (AttachedDebugger)
|
|
{
|
|
AttachedDebugger->PreOp(*this, Dst, Arg0, Arg1, Arg2, Arg3);
|
|
}
|
|
}
|
|
|
|
template<typename DstHandler, typename Arg0Handler, typename Arg1Handler, typename Arg2Handler, typename Arg3Handler>
|
|
FORCEINLINE void PostOp(DstHandler& Dst, Arg0Handler& Arg0, Arg1Handler& Arg1 = DummyHandler, Arg2Handler& Arg2 = DummyHandler, Arg3Handler& Arg3 = DummyHandler)
|
|
{
|
|
if (AttachedDebugger)
|
|
{
|
|
AttachedDebugger->PostOp(*this, Dst, Arg0, Arg1, Arg2, Arg3);
|
|
}
|
|
}
|
|
#else
|
|
FORCEINLINE void BeginOp(VectorVM::EVMType InType, int32 InNumArgs, int32 InNumInstancesPerOp) { }
|
|
|
|
template<typename DstHandler, typename Arg0Handler, typename Arg1Handler, typename Arg2Handler, typename Arg3Handler>
|
|
FORCEINLINE void PreOp(DstHandler& Dst, Arg0Handler& Arg0, Arg1Handler& Arg1, Arg2Handler& Arg2, Arg3Handler& Arg3){ }
|
|
template<typename DstHandler, typename Arg0Handler, typename Arg1Handler, typename Arg2Handler, typename Arg3Handler>
|
|
FORCEINLINE void PostOp(DstHandler& Dst, Arg0Handler& Arg0, Arg1Handler& Arg1, Arg2Handler& Arg2, Arg3Handler& Arg3){ }
|
|
|
|
|
|
template<typename DstHandler, typename Arg0Handler, typename Arg1Handler, typename Arg2Handler>
|
|
FORCEINLINE void PreOp(DstHandler& Dst, Arg0Handler& Arg0, Arg1Handler& Arg1, Arg2Handler& Arg2){ }
|
|
template<typename DstHandler, typename Arg0Handler, typename Arg1Handler, typename Arg2Handler>
|
|
FORCEINLINE void PostOp(DstHandler& Dst, Arg0Handler& Arg0, Arg1Handler& Arg1, Arg2Handler& Arg2){ }
|
|
|
|
|
|
template<typename DstHandler, typename Arg0Handler, typename Arg1Handler>
|
|
FORCEINLINE void PreOp(DstHandler& Dst, Arg0Handler& Arg0, Arg1Handler& Arg1){ }
|
|
template<typename DstHandler, typename Arg0Handler, typename Arg1Handler>
|
|
FORCEINLINE void PostOp(DstHandler& Dst, Arg0Handler& Arg0, Arg1Handler& Arg1){ }
|
|
|
|
|
|
template<typename DstHandler, typename Arg0Handler>
|
|
FORCEINLINE void PreOp(DstHandler& Dst, Arg0Handler& Arg0){ }
|
|
template<typename DstHandler, typename Arg0Handler>
|
|
FORCEINLINE void PostOp(DstHandler& Dst, Arg0Handler& Arg0){ }
|
|
#endif
|
|
};
|
|
|
|
|
|
static VM_FORCEINLINE uint8 DecodeU8(FVectorVMContext& Context)
|
|
{
|
|
return *Context.Code++;
|
|
}
|
|
|
|
static VM_FORCEINLINE uint8 DecodeU16(FVectorVMContext& Context)
|
|
{
|
|
return (*((uint16*)Context.Code))++;
|
|
}
|
|
|
|
static VM_FORCEINLINE uint8 DecodeU32(FVectorVMContext& Context)
|
|
{
|
|
return (*((uint32*)Context.Code))++;
|
|
}
|
|
|
|
/** Decode the next operation contained in the bytecode. */
|
|
static VM_FORCEINLINE EVectorVMOp DecodeOp(FVectorVMContext& Context)
|
|
{
|
|
return static_cast<EVectorVMOp>(DecodeU8(Context));
|
|
}
|
|
|
|
static VM_FORCEINLINE uint8 DecodeSrcOperandTypes(FVectorVMContext& Context)
|
|
{
|
|
return DecodeU8(Context);
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
/** Constant handler. */
|
|
|
|
struct FConstantHandlerBase
|
|
{
|
|
uint8 ConstantIndex;
|
|
FConstantHandlerBase(FVectorVMContext& Context)
|
|
: ConstantIndex(DecodeU8(Context))
|
|
{}
|
|
|
|
VM_FORCEINLINE void Advance(){ }
|
|
VM_FORCEINLINE int32 GetLocationIndex(){ return ConstantIndex; }
|
|
VM_FORCEINLINE int32 GetSecondaryIndex(){ return INDEX_NONE; }
|
|
VM_FORCEINLINE int32 GetTertiaryIndex(){ return INDEX_NONE; }
|
|
VM_FORCEINLINE EVectorVMOperandLocation GetLocation(){ return EVectorVMOperandLocation::Constant; }
|
|
};
|
|
|
|
template<typename T>
|
|
struct FConstantHandler : public FConstantHandlerBase
|
|
{
|
|
T Constant;
|
|
FConstantHandler(FVectorVMContext& Context)
|
|
: FConstantHandlerBase(Context)
|
|
, Constant(Context.ConstantTable[ConstantIndex])
|
|
{}
|
|
VM_FORCEINLINE const T& Get(){ return Constant; }
|
|
};
|
|
|
|
template<>
|
|
struct FConstantHandler<VectorRegister> : public FConstantHandlerBase
|
|
{
|
|
VectorRegister Constant;
|
|
FConstantHandler(FVectorVMContext& Context)
|
|
: FConstantHandlerBase(Context)
|
|
, Constant(VectorLoadAligned(&Context.ConstantTable[ConstantIndex]))
|
|
{}
|
|
VM_FORCEINLINE const VectorRegister Get(){ return Constant; }
|
|
};
|
|
|
|
typedef FConstantHandler<VectorRegister> FVectorConstantHandler;
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
struct FDataObjectConstantHandler
|
|
{
|
|
int32 ConstantIndex;
|
|
UNiagaraDataObject *Constant;
|
|
FDataObjectConstantHandler(FVectorVMContext& Context)
|
|
: ConstantIndex(DecodeU8(Context))
|
|
, Constant(Context.DataObjConstantTable[ConstantIndex])
|
|
{}
|
|
VM_FORCEINLINE void Advance(){ }
|
|
VM_FORCEINLINE int32 GetLocationIndex(){ return ConstantIndex; }
|
|
VM_FORCEINLINE int32 GetSecondaryIndex(){ return INDEX_NONE; }
|
|
VM_FORCEINLINE int32 GetTertiaryIndex(){ return INDEX_NONE; }
|
|
VM_FORCEINLINE EVectorVMOperandLocation GetLocation(){ return EVectorVMOperandLocation::DataObjConstant; }
|
|
VM_FORCEINLINE UNiagaraDataObject *Get(){ return Constant; }
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Register handlers.
|
|
// Handle reading of a register, advancing the pointer with each read.
|
|
|
|
struct FRegisterHandlerBase
|
|
{
|
|
int32 RegisterIndex;
|
|
FRegisterHandlerBase(FVectorVMContext& Context)
|
|
: RegisterIndex(DecodeU8(Context))
|
|
{}
|
|
VM_FORCEINLINE int32 GetLocationIndex()
|
|
{
|
|
if (RegisterIndex < VectorVM::NumTempRegisters)
|
|
{
|
|
return RegisterIndex;
|
|
}
|
|
else if (RegisterIndex < VectorVM::NumTempRegisters + VectorVM::MaxInputRegisters)
|
|
{
|
|
return RegisterIndex - VectorVM::NumTempRegisters;
|
|
}
|
|
else
|
|
{
|
|
return RegisterIndex - (VectorVM::NumTempRegisters + VectorVM::MaxInputRegisters);
|
|
}
|
|
}
|
|
VM_FORCEINLINE int32 GetSecondaryIndex(){ return INDEX_NONE; }
|
|
VM_FORCEINLINE int32 GetTertiaryIndex(){ return INDEX_NONE; }
|
|
VM_FORCEINLINE EVectorVMOperandLocation GetLocation()
|
|
{
|
|
if (RegisterIndex < VectorVM::NumTempRegisters)
|
|
{
|
|
return EVectorVMOperandLocation::TemporaryRegister;
|
|
}
|
|
else if (RegisterIndex < VectorVM::NumTempRegisters + VectorVM::MaxInputRegisters)
|
|
{
|
|
return EVectorVMOperandLocation::InputRegister;
|
|
}
|
|
else
|
|
{
|
|
return EVectorVMOperandLocation::OutputRegister;
|
|
}
|
|
}
|
|
};
|
|
|
|
template<typename T, int32 NumInstancesPerOp=1>
|
|
struct FRegisterHandler : public FRegisterHandlerBase
|
|
{
|
|
T* RESTRICT Register;
|
|
FRegisterHandler(FVectorVMContext& Context)
|
|
: FRegisterHandlerBase(Context)
|
|
, Register((T*)Context.RegisterTable[RegisterIndex])
|
|
{}
|
|
VM_FORCEINLINE const T Get(){ return *Register; }
|
|
VM_FORCEINLINE void Advance(){ Register += NumInstancesPerOp; }
|
|
};
|
|
|
|
template<int32 NumInstancesPerOp> struct FRegisterHandler<VectorRegister, NumInstancesPerOp> : public FRegisterHandlerBase
|
|
{
|
|
VectorRegister* RESTRICT Register;
|
|
FRegisterHandler(FVectorVMContext& Context)
|
|
: FRegisterHandlerBase(Context)
|
|
, Register((VectorRegister*)Context.RegisterTable[RegisterIndex])
|
|
{}
|
|
VM_FORCEINLINE const VectorRegister Get(){ return VectorLoadAligned(Register); }
|
|
VM_FORCEINLINE void Advance(){ Register += NumInstancesPerOp; }
|
|
};
|
|
|
|
typedef FRegisterHandler<VectorRegister> FVectorRegisterHandler;
|
|
|
|
/** Handles writing to a register, advancing the pointer with each write. */
|
|
template<typename T, int32 NumInstancesPerOp = 1>
|
|
struct FRegisterDestHandler : public FRegisterHandlerBase
|
|
{
|
|
T* RESTRICT Register;
|
|
FRegisterDestHandler(FVectorVMContext& Context)
|
|
: FRegisterHandlerBase(Context)
|
|
, Register((T*)Context.RegisterTable[RegisterIndex])
|
|
{}
|
|
VM_FORCEINLINE T* RESTRICT Get(){ return Register; }
|
|
VM_FORCEINLINE T GetValue(){ return *Register; }
|
|
VM_FORCEINLINE void Advance(){ Register += NumInstancesPerOp; }
|
|
};
|
|
|
|
template<int32 NumInstancesPerOp>
|
|
struct FRegisterDestHandler<VectorRegister, NumInstancesPerOp> : public FRegisterHandlerBase
|
|
{
|
|
VectorRegister* RESTRICT Register;
|
|
FRegisterDestHandler(FVectorVMContext& Context)
|
|
: FRegisterHandlerBase(Context)
|
|
, Register((VectorRegister*)Context.RegisterTable[RegisterIndex])
|
|
{}
|
|
VM_FORCEINLINE VectorRegister* RESTRICT Get(){ return Register; }
|
|
VM_FORCEINLINE VectorRegister GetValue(){ return VectorLoadAligned(Register); }
|
|
VM_FORCEINLINE void Advance(){ Register += NumInstancesPerOp; }
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Kernels
|
|
template<typename Kernel, typename DstHandler, typename Arg0Handler>
|
|
struct TUnaryKernel
|
|
{
|
|
static VM_FORCEINLINE void Exec(FVectorVMContext& Context)
|
|
{
|
|
DstHandler Dst(Context);
|
|
Arg0Handler Arg0(Context);
|
|
|
|
Context.BeginOp(Kernel::Type, 1, Kernel::NumInstancesPerOp);
|
|
for (int32 i = 0; i < Context.NumInstances; i += Kernel::NumInstancesPerOp)
|
|
{
|
|
Context.PreOp<DstHandler, Arg0Handler>(Dst, Arg0);
|
|
Kernel::DoKernel(Dst.Get(), Arg0.Get());
|
|
Context.PostOp<DstHandler, Arg0Handler>(Dst, Arg0);
|
|
Dst.Advance(); Arg0.Advance();
|
|
}
|
|
}
|
|
};
|
|
|
|
template<typename Kernel, typename DstHandler, typename Arg0Handler, typename Arg1Handler>
|
|
struct TBinaryKernel
|
|
{
|
|
static VM_FORCEINLINE void Exec(FVectorVMContext& Context)
|
|
{
|
|
DstHandler Dst(Context);
|
|
Arg0Handler Arg0(Context);
|
|
Arg1Handler Arg1(Context);
|
|
Context.BeginOp(Kernel::Type, 2, Kernel::NumInstancesPerOp);
|
|
for (int32 i = 0; i < Context.NumInstances; i += Kernel::NumInstancesPerOp)
|
|
{
|
|
Context.PreOp<DstHandler, Arg0Handler, Arg1Handler>(Dst, Arg0, Arg1);
|
|
Kernel::DoKernel(Dst.Get(), Arg0.Get(), Arg1.Get());
|
|
Context.PostOp<DstHandler, Arg0Handler, Arg1Handler>(Dst, Arg0, Arg1);
|
|
Dst.Advance(); Arg0.Advance(); Arg1.Advance();
|
|
}
|
|
}
|
|
};
|
|
|
|
template<typename Kernel, typename DstHandler, typename Arg0Handler, typename Arg1Handler, typename Arg2Handler>
|
|
struct TTrinaryKernel
|
|
{
|
|
static VM_FORCEINLINE void Exec(FVectorVMContext& Context)
|
|
{
|
|
DstHandler Dst(Context);
|
|
Arg0Handler Arg0(Context);
|
|
Arg1Handler Arg1(Context);
|
|
Arg2Handler Arg2(Context);
|
|
Context.BeginOp(Kernel::Type, 3, Kernel::NumInstancesPerOp);
|
|
for (int32 i = 0; i < Context.NumInstances; i += Kernel::NumInstancesPerOp)
|
|
{
|
|
Context.PreOp<DstHandler, Arg0Handler, Arg1Handler, Arg2Handler>(Dst, Arg0, Arg1, Arg2);
|
|
Kernel::DoKernel(Dst.Get(), Arg0.Get(), Arg1.Get(), Arg2.Get());
|
|
Context.PostOp<DstHandler, Arg0Handler, Arg1Handler, Arg2Handler>(Dst, Arg0, Arg1, Arg2);
|
|
Dst.Advance(); Arg0.Advance(); Arg1.Advance(); Arg2.Advance();
|
|
}
|
|
}
|
|
};
|
|
|
|
template<typename Kernel, typename DstHandler, typename Arg0Handler, typename Arg1Handler, typename Arg2Handler, typename Arg3Handler>
|
|
struct TQuaternaryKernel
|
|
{
|
|
static VM_FORCEINLINE void Exec(FVectorVMContext& Context)
|
|
{
|
|
DstHandler Dst(Context);
|
|
Arg0Handler Arg0(Context);
|
|
Arg1Handler Arg1(Context);
|
|
Arg2Handler Arg2(Context);
|
|
Arg3Handler Arg3(Context);
|
|
Context.BeginOp(Kernel::Type, 4, Kernel::NumInstancesPerOp);
|
|
for (int32 i = 0; i < Context.NumInstances; i += Kernel::NumInstancesPerOp)
|
|
{
|
|
Context.PreOp<DstHandler, Arg0Handler, Arg1Handler, Arg2Handler, Arg3Handler>(Dst, Arg0, Arg1, Arg2, Arg3);
|
|
Kernel::DoKernel(Dst.Get(), Arg0.Get(), Arg1.Get(), Arg2.Get(), Arg3.Get());
|
|
Context.PostOp<DstHandler, Arg0Handler, Arg1Handler, Arg2Handler, Arg3Handler>(Dst, Arg0, Arg1, Arg2, Arg3);
|
|
Dst.Advance(); Arg0.Advance(); Arg1.Advance(); Arg2.Advance(); Arg3.Advance();
|
|
}
|
|
}
|
|
};
|
|
|
|
/** Base class of vector kernels with a single operand. */
|
|
template <typename Kernel, typename DstHandler = FRegisterDestHandler<VectorRegister>>
|
|
struct TUnaryVectorKernel
|
|
{
|
|
static const VectorVM::EVMType Type = VectorVM::EVMType::Vector4;
|
|
static const int32 NumInstancesPerOp = 1;
|
|
static void Exec(FVectorVMContext& Context)
|
|
{
|
|
uint32 SrcOpTypes = DecodeSrcOperandTypes(Context);
|
|
switch (SrcOpTypes)
|
|
{
|
|
case SRCOP_RRRR: TUnaryKernel<Kernel, DstHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RRRC: TUnaryKernel<Kernel, DstHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
default: check(0); break;
|
|
};
|
|
}
|
|
};
|
|
|
|
/** Base class of Vector kernels with 2 operands. */
|
|
template <typename Kernel, typename DstHandler = FRegisterDestHandler<VectorRegister>>
|
|
struct TBinaryVectorKernel
|
|
{
|
|
static const VectorVM::EVMType Type = VectorVM::EVMType::Vector4;
|
|
static const int32 NumInstancesPerOp = 1;
|
|
static void Exec(FVectorVMContext& Context)
|
|
{
|
|
uint32 SrcOpTypes = DecodeSrcOperandTypes(Context);
|
|
switch (SrcOpTypes)
|
|
{
|
|
case SRCOP_RRRR: TBinaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RRRC: TBinaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RRCR: TBinaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_RRCC: TBinaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
default: check(0); break;
|
|
};
|
|
}
|
|
};
|
|
|
|
|
|
|
|
|
|
/** Base class of Vector kernels with 2 operands, one of which can be a data object. */
|
|
template <typename Kernel, typename DstHandler = FRegisterDestHandler<VectorRegister>>
|
|
struct TBinaryVectorKernelData
|
|
{
|
|
static const VectorVM::EVMType Type = VectorVM::EVMType::Vector4;
|
|
static const int32 NumInstancesPerOp = 1;
|
|
static void Exec(FVectorVMContext& Context)
|
|
{
|
|
uint32 SrcOpTypes = DecodeSrcOperandTypes(Context);
|
|
switch (SrcOpTypes)
|
|
{
|
|
case SRCOP_RRRB: TBinaryKernel<Kernel, DstHandler, FDataObjectConstantHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RRCB: TBinaryKernel<Kernel, DstHandler, FDataObjectConstantHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
default: check(0); break;
|
|
};
|
|
}
|
|
};
|
|
|
|
|
|
|
|
/** Base class of Vector kernels with 2 operands, one of which can be a data object. */
|
|
template <typename Kernel, typename DstHandler = FRegisterDestHandler<VectorRegister>>
|
|
struct TTrinaryVectorKernelData
|
|
{
|
|
static const VectorVM::EVMType Type = VectorVM::EVMType::Vector4;
|
|
static const int32 NumInstancesPerOp = 1;
|
|
static void Exec(FVectorVMContext& Context)
|
|
{
|
|
uint32 SrcOpTypes = DecodeSrcOperandTypes(Context);
|
|
switch (SrcOpTypes)
|
|
{
|
|
case SRCOP_RRRB: TTrinaryKernel<Kernel, DstHandler, FDataObjectConstantHandler, FVectorRegisterHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
default: check(0); break;
|
|
};
|
|
}
|
|
};
|
|
|
|
|
|
|
|
|
|
/** Base class of Vector kernels with 3 operands. */
|
|
template <typename Kernel, typename DstHandler = FRegisterDestHandler<VectorRegister>>
|
|
struct TTrinaryVectorKernel
|
|
{
|
|
static const VectorVM::EVMType Type = VectorVM::EVMType::Vector4;
|
|
static const int32 NumInstancesPerOp = 1;
|
|
static void Exec(FVectorVMContext& Context)
|
|
{
|
|
uint32 SrcOpTypes = DecodeSrcOperandTypes(Context);
|
|
switch (SrcOpTypes)
|
|
{
|
|
case SRCOP_RRRR: TTrinaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorRegisterHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RRRC: TTrinaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorRegisterHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RRCR: TTrinaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorConstantHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RRCC: TTrinaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorConstantHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RCRR: TTrinaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorRegisterHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_RCRC: TTrinaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorRegisterHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_RCCR: TTrinaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorConstantHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_RCCC: TTrinaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorConstantHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
default: check(0); break;
|
|
};
|
|
}
|
|
};
|
|
|
|
/** Base class of Vector kernels with 4 operands. */
|
|
template <typename Kernel, typename DstHandler = FRegisterDestHandler<VectorRegister>>
|
|
struct TQuatenaryVectorKernel
|
|
{
|
|
static const VectorVM::EVMType Type = VectorVM::EVMType::Vector4;
|
|
static const int32 NumInstancesPerOp = 1;
|
|
static void Exec(FVectorVMContext& Context)
|
|
{
|
|
uint32 SrcOpTypes = DecodeSrcOperandTypes(Context);
|
|
switch (SrcOpTypes)
|
|
{
|
|
case SRCOP_RRRR: TQuaternaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorRegisterHandler, FVectorRegisterHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RRRC: TQuaternaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorRegisterHandler, FVectorRegisterHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RRCR: TQuaternaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorConstantHandler, FVectorRegisterHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RRCC: TQuaternaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorConstantHandler, FVectorRegisterHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RCRR: TQuaternaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorRegisterHandler, FVectorConstantHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RCRC: TQuaternaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorRegisterHandler, FVectorConstantHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RCCR: TQuaternaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorConstantHandler, FVectorConstantHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_RCCC: TQuaternaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorConstantHandler, FVectorConstantHandler, FVectorRegisterHandler>::Exec(Context); break;
|
|
case SRCOP_CRRR: TQuaternaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorRegisterHandler, FVectorRegisterHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_CRRC: TQuaternaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorRegisterHandler, FVectorRegisterHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_CRCR: TQuaternaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorConstantHandler, FVectorRegisterHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_CRCC: TQuaternaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorConstantHandler, FVectorRegisterHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_CCRR: TQuaternaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorRegisterHandler, FVectorConstantHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_CCRC: TQuaternaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorRegisterHandler, FVectorConstantHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_CCCR: TQuaternaryKernel<Kernel, DstHandler, FVectorRegisterHandler, FVectorConstantHandler, FVectorConstantHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
case SRCOP_CCCC: TQuaternaryKernel<Kernel, DstHandler, FVectorConstantHandler, FVectorConstantHandler, FVectorConstantHandler, FVectorConstantHandler>::Exec(Context); break;
|
|
default: check(0); break;
|
|
};
|
|
}
|
|
};
|
|
|
|
/*------------------------------------------------------------------------------
|
|
Implementation of all kernel operations.
|
|
------------------------------------------------------------------------------*/
|
|
|
|
struct FVectorKernelAdd : public TBinaryVectorKernel<FVectorKernelAdd>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0,VectorRegister Src1)
|
|
{
|
|
*Dst = VectorAdd(Src0, Src1);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelSub : public TBinaryVectorKernel<FVectorKernelSub>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0,VectorRegister Src1)
|
|
{
|
|
*Dst = VectorSubtract(Src0, Src1);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelMul : public TBinaryVectorKernel<FVectorKernelMul>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0,VectorRegister Src1)
|
|
{
|
|
*Dst = VectorMultiply(Src0, Src1);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelDiv : public TBinaryVectorKernel<FVectorKernelDiv>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0, VectorRegister Src1)
|
|
{
|
|
*Dst = VectorDivide(Src0, Src1);
|
|
}
|
|
};
|
|
|
|
|
|
struct FVectorKernelMad : public TTrinaryVectorKernel<FVectorKernelMad>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0,VectorRegister Src1,VectorRegister Src2)
|
|
{
|
|
*Dst = VectorMultiplyAdd(Src0, Src1, Src2);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelLerp : public TTrinaryVectorKernel<FVectorKernelLerp>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0,VectorRegister Src1,VectorRegister Src2)
|
|
{
|
|
const VectorRegister OneMinusAlpha = VectorSubtract(GlobalVectorConstants::FloatOne, Src2);
|
|
const VectorRegister Tmp = VectorMultiply(Src0, OneMinusAlpha);
|
|
*Dst = VectorMultiplyAdd(Src1, Src2, Tmp);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelRcp : public TUnaryVectorKernel<FVectorKernelRcp>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0)
|
|
{
|
|
*Dst = VectorReciprocal(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelRsq : public TUnaryVectorKernel<FVectorKernelRsq>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0)
|
|
{
|
|
*Dst = VectorReciprocalSqrt(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelSqrt : public TUnaryVectorKernel<FVectorKernelSqrt>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0)
|
|
{
|
|
// TODO: Need a SIMD sqrt!
|
|
*Dst = VectorReciprocal(VectorReciprocalSqrt(Src0));
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelNeg : public TUnaryVectorKernel<FVectorKernelNeg>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0)
|
|
{
|
|
*Dst = VectorNegate(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelAbs : public TUnaryVectorKernel<FVectorKernelAbs>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0)
|
|
{
|
|
*Dst = VectorAbs(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelExp : public TUnaryVectorKernel<FVectorKernelExp>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorExp(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelExp2 : public TUnaryVectorKernel<FVectorKernelExp2>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorExp2(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelLog : public TUnaryVectorKernel<FVectorKernelLog>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorLog(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelLog2 : public TUnaryVectorKernel<FVectorKernelLog2>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorLog2(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelClamp : public TTrinaryVectorKernel<FVectorKernelClamp>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0,VectorRegister Src1,VectorRegister Src2)
|
|
{
|
|
const VectorRegister Tmp = VectorMax(Src0, Src1);
|
|
*Dst = VectorMin(Tmp, Src2);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelSin : public TUnaryVectorKernel<FVectorKernelSin>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorSin(VectorMultiply(Src0, GlobalVectorConstants::TwoPi));
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelCos : public TUnaryVectorKernel<FVectorKernelCos>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorCos(VectorMultiply(Src0, GlobalVectorConstants::TwoPi));
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelTan : public TUnaryVectorKernel<FVectorKernelTan>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorTan(VectorMultiply(Src0, GlobalVectorConstants::TwoPi));
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelASin : public TUnaryVectorKernel<FVectorKernelASin>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorMultiply(VectorASin(Src0), GlobalVectorConstants::OneOverTwoPi);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelACos : public TUnaryVectorKernel<FVectorKernelACos>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorMultiply(VectorACos(Src0), GlobalVectorConstants::OneOverTwoPi);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelATan : public TUnaryVectorKernel<FVectorKernelATan>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorMultiply(VectorATan(Src0), GlobalVectorConstants::OneOverTwoPi);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelATan2 : public TBinaryVectorKernel<FVectorKernelATan2>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0, VectorRegister Src1)
|
|
{
|
|
*Dst = VectorMultiply(VectorATan2(Src0, Src1), GlobalVectorConstants::OneOverTwoPi);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelCeil : public TUnaryVectorKernel<FVectorKernelCeil>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorCeil(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelFloor : public TUnaryVectorKernel<FVectorKernelFloor>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorFloor(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelMod : public TBinaryVectorKernel<FVectorKernelMod>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0, VectorRegister Src1)
|
|
{
|
|
*Dst = VectorMod(Src0, Src1);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelFrac : public TUnaryVectorKernel<FVectorKernelFrac>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorFractional(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelTrunc : public TUnaryVectorKernel<FVectorKernelTrunc>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorTruncate(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelLessThan : public TBinaryVectorKernel<FVectorKernelLessThan>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0, VectorRegister Src1)
|
|
{
|
|
VectorRegister Tmp = VectorSubtract(Src1, Src0);
|
|
Tmp = VectorMultiply(Tmp, GlobalVectorConstants::BigNumber);
|
|
Tmp = VectorMin(Tmp, GlobalVectorConstants::FloatOne);
|
|
*Dst = VectorMax(Tmp, GlobalVectorConstants::FloatZero);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelSelect : public TTrinaryVectorKernel<FVectorKernelSelect>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Mask, VectorRegister A, VectorRegister B)
|
|
{
|
|
//Currently works by cmpgt 0 to match the current, all vector float vm/scripts but with int support, this should probably change to direct use of a mask.
|
|
*Dst = VectorSelect(VectorCompareGT(Mask, GlobalVectorConstants::FloatZero), A, B);
|
|
}
|
|
};
|
|
|
|
|
|
struct FVectorKernelSample : public TBinaryVectorKernelData<FVectorKernelSample>
|
|
{
|
|
static void FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, UNiagaraDataObject *Src0, VectorRegister Src1)
|
|
{
|
|
if (Src0)
|
|
{
|
|
float const* FloatSrc1 = reinterpret_cast<float const*>(&Src1);
|
|
FVector4 Tmp = Src0->Sample(FVector4(FloatSrc1[0], FloatSrc1[1], FloatSrc1[2], FloatSrc1[3]));
|
|
*Dst = VectorLoad(&Tmp);
|
|
}
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelBufferWrite : public TTrinaryVectorKernelData<FVectorKernelBufferWrite>
|
|
{
|
|
static void FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, UNiagaraDataObject *Src0, VectorRegister Src1, VectorRegister Src2)
|
|
{
|
|
if (Src0)
|
|
{
|
|
float const* FloatSrc1 = reinterpret_cast<float const*>(&Src1); // Coords
|
|
float const* FloatSrc2 = reinterpret_cast<float const*>(&Src2); // Value
|
|
FVector4 Tmp = Src0->Write(FVector4(FloatSrc1[0], FloatSrc1[1], FloatSrc1[2], FloatSrc1[3]), FVector4(FloatSrc2[0], FloatSrc2[1], FloatSrc2[2], FloatSrc2[3]));
|
|
*Dst = VectorLoad(&Tmp);
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
struct FVectorKernelDot : public TBinaryVectorKernel<FVectorKernelDot>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0, VectorRegister Src1)
|
|
{
|
|
*Dst = VectorDot4(Src0, Src1);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelLength : public TUnaryVectorKernel<FVectorKernelLength>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
VectorRegister Temp = VectorReciprocalLen(Src0);
|
|
*Dst = VectorReciprocal(Temp);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelCross : public TBinaryVectorKernel<FVectorKernelCross>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0, VectorRegister Src1)
|
|
{
|
|
*Dst = VectorCross(Src0, Src1);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelNormalize : public TUnaryVectorKernel<FVectorKernelNormalize>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorNormalize(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelRandom : public TUnaryVectorKernel<FVectorKernelRandom>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
const float rm = RAND_MAX;
|
|
VectorRegister Result = MakeVectorRegister(static_cast<float>(FMath::Rand()) / rm,
|
|
static_cast<float>(FMath::Rand()) / rm,
|
|
static_cast<float>(FMath::Rand()) / rm,
|
|
static_cast<float>(FMath::Rand()) / rm);
|
|
*Dst = VectorMultiply(Result, Src0);
|
|
}
|
|
};
|
|
|
|
|
|
/* gaussian distribution random number (not working yet) */
|
|
struct FVectorKernelRandomGauss : public TBinaryVectorKernel<FVectorKernelRandomGauss>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0, VectorRegister Src1)
|
|
{
|
|
const float rm = RAND_MAX;
|
|
VectorRegister Result = MakeVectorRegister(static_cast<float>(FMath::Rand()) / rm,
|
|
static_cast<float>(FMath::Rand()) / rm,
|
|
static_cast<float>(FMath::Rand()) / rm,
|
|
static_cast<float>(FMath::Rand()) / rm);
|
|
|
|
Result = VectorSubtract(Result, MakeVectorRegister(0.5f, 0.5f, 0.5f, 0.5f));
|
|
Result = VectorMultiply(MakeVectorRegister(3.0f, 3.0f, 3.0f, 3.0f), Result);
|
|
|
|
// taylor series gaussian approximation
|
|
const VectorRegister SPi2 = VectorReciprocal(VectorReciprocalSqrt(MakeVectorRegister(2 * PI, 2 * PI, 2 * PI, 2 * PI)));
|
|
VectorRegister Gauss = VectorReciprocal(SPi2);
|
|
VectorRegister Div = VectorMultiply(MakeVectorRegister(2.0f, 2.0f, 2.0f, 2.0f), SPi2);
|
|
Gauss = VectorSubtract(Gauss, VectorDivide(VectorMultiply(Result, Result), Div));
|
|
Div = VectorMultiply(MakeVectorRegister(8.0f, 8.0f, 8.0f, 8.0f), SPi2);
|
|
Gauss = VectorAdd(Gauss, VectorDivide(VectorPow(MakeVectorRegister(4.0f, 4.0f, 4.0f, 4.0f), Result), Div));
|
|
Div = VectorMultiply(MakeVectorRegister(48.0f, 48.0f, 48.0f, 48.0f), SPi2);
|
|
Gauss = VectorSubtract(Gauss, VectorDivide(VectorPow(MakeVectorRegister(6.0f, 6.0f, 6.0f, 6.0f), Result), Div));
|
|
|
|
Gauss = VectorDivide(Gauss, MakeVectorRegister(0.4f, 0.4f, 0.4f, 0.4f));
|
|
Gauss = VectorMultiply(Gauss, Src0);
|
|
*Dst = Gauss;
|
|
}
|
|
};
|
|
|
|
|
|
|
|
|
|
struct FVectorKernelMin : public TBinaryVectorKernel<FVectorKernelMin>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0,VectorRegister Src1)
|
|
{
|
|
*Dst = VectorMin(Src0, Src1);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelMax : public TBinaryVectorKernel<FVectorKernelMax>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0,VectorRegister Src1)
|
|
{
|
|
*Dst = VectorMax(Src0, Src1);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelPow : public TBinaryVectorKernel<FVectorKernelPow>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst,VectorRegister Src0,VectorRegister Src1)
|
|
{
|
|
*Dst = VectorPow(Src0, Src1);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelSign : public TUnaryVectorKernel<FVectorKernelSign>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorSign(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelStep : public TUnaryVectorKernel<FVectorKernelStep>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorStep(Src0);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelNoise : public TUnaryVectorKernel<FVectorKernelNoise>
|
|
{
|
|
static VectorRegister RandomTable[17][17][17];
|
|
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
const VectorRegister One = MakeVectorRegister(1.0f, 1.0f, 1.0f, 1.0f);
|
|
const VectorRegister VecSize = MakeVectorRegister(16.0f, 16.0f, 16.0f, 16.0f);
|
|
|
|
*Dst = MakeVectorRegister(0.0f, 0.0f, 0.0f, 0.0f);
|
|
|
|
for (uint32 i = 1; i < 2; i++)
|
|
{
|
|
float Di = 0.2f * (1.0f/(1<<i));
|
|
VectorRegister Div = MakeVectorRegister(Di, Di, Di, Di);
|
|
VectorRegister Coords = VectorMod( VectorAbs( VectorMultiply(Src0, Div) ), VecSize );
|
|
const float *CoordPtr = reinterpret_cast<float const*>(&Coords);
|
|
const int32 Cx = CoordPtr[0];
|
|
const int32 Cy = CoordPtr[1];
|
|
const int32 Cz = CoordPtr[2];
|
|
|
|
VectorRegister Frac = VectorFractional(Coords);
|
|
VectorRegister Alpha = VectorReplicate(Frac, 0);
|
|
VectorRegister OneMinusAlpha = VectorSubtract(One, Alpha);
|
|
|
|
VectorRegister XV1 = VectorMultiplyAdd(RandomTable[Cx][Cy][Cz], Alpha, VectorMultiply(RandomTable[Cx+1][Cy][Cz], OneMinusAlpha));
|
|
VectorRegister XV2 = VectorMultiplyAdd(RandomTable[Cx][Cy+1][Cz], Alpha, VectorMultiply(RandomTable[Cx+1][Cy+1][Cz], OneMinusAlpha));
|
|
VectorRegister XV3 = VectorMultiplyAdd(RandomTable[Cx][Cy][Cz+1], Alpha, VectorMultiply(RandomTable[Cx+1][Cy][Cz+1], OneMinusAlpha));
|
|
VectorRegister XV4 = VectorMultiplyAdd(RandomTable[Cx][Cy+1][Cz+1], Alpha, VectorMultiply(RandomTable[Cx+1][Cy+1][Cz+1], OneMinusAlpha));
|
|
|
|
Alpha = VectorReplicate(Frac, 1);
|
|
OneMinusAlpha = VectorSubtract(One, Alpha);
|
|
VectorRegister YV1 = VectorMultiplyAdd(XV1, Alpha, VectorMultiply(XV2, OneMinusAlpha));
|
|
VectorRegister YV2 = VectorMultiplyAdd(XV3, Alpha, VectorMultiply(XV4, OneMinusAlpha));
|
|
|
|
Alpha = VectorReplicate(Frac, 2);
|
|
OneMinusAlpha = VectorSubtract(One, Alpha);
|
|
VectorRegister ZV = VectorMultiplyAdd(YV1, Alpha, VectorMultiply(YV2, OneMinusAlpha));
|
|
|
|
*Dst = VectorAdd(*Dst, ZV);
|
|
}
|
|
}
|
|
};
|
|
|
|
VectorRegister FVectorKernelNoise::RandomTable[17][17][17];
|
|
|
|
template<int32 Component>
|
|
struct FVectorKernelSplat : public TUnaryVectorKernel<FVectorKernelSplat<Component>>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, VectorRegister Src0)
|
|
{
|
|
*Dst = VectorReplicate(Src0, Component);
|
|
}
|
|
};
|
|
|
|
template<int32 Cmp0, int32 Cmp1, int32 Cmp2, int32 Cmp3>
|
|
struct FVectorKernelCompose : public TQuatenaryVectorKernel<FVectorKernelCompose<Cmp0, Cmp1, Cmp2, Cmp3>>
|
|
{
|
|
//Passing as const refs as some compilers cant handle > 3 aligned vectorregister params.
|
|
//inlined so shouldn't impact perf
|
|
//Todo: ^^^^ test this
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, const VectorRegister& Src0, const VectorRegister& Src1, const VectorRegister& Src2, const VectorRegister& Src3)
|
|
{
|
|
//TODO - There's probably a faster way to do this.
|
|
VectorRegister Tmp0 = VectorShuffle(Src0, Src1, Cmp0, Cmp0, Cmp1, Cmp1);
|
|
VectorRegister Tmp1 = VectorShuffle(Src2, Src3, Cmp2, Cmp2, Cmp3, Cmp3);
|
|
*Dst = VectorShuffle(Tmp0, Tmp1, 0, 2, 0, 2);
|
|
}
|
|
};
|
|
|
|
// Ken Perlin's smootherstep function (zero first and second order derivatives at 0 and 1)
|
|
// calculated separately for each channel of Src2
|
|
struct FVectorKernelEaseIn : public TTrinaryVectorKernel<FVectorKernelEaseIn>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, const VectorRegister& Src0, const VectorRegister& Src1, const VectorRegister& Src2)
|
|
{
|
|
VectorRegister X = VectorMin( VectorDivide(VectorSubtract(Src2, Src0), VectorSubtract(Src1, Src0)), MakeVectorRegister(1.0f, 1.0f, 1.0f, 1.0f) );
|
|
X = VectorMax(X, MakeVectorRegister(0.0f, 0.0f, 0.0f, 0.0f));
|
|
|
|
VectorRegister X3 = VectorMultiply( VectorMultiply(X, X), X);
|
|
VectorRegister N6 = MakeVectorRegister(6.0f, 6.0f, 6.0f, 6.0f);
|
|
VectorRegister N15 = MakeVectorRegister(15.0f, 15.0f, 15.0f, 15.0f);
|
|
VectorRegister T = VectorSubtract( VectorMultiply(X, N6), N15 );
|
|
T = VectorAdd(VectorMultiply(X, T), MakeVectorRegister(10.0f, 10.0f, 10.0f, 10.0f));
|
|
|
|
*Dst = VectorMultiply(X3, T);
|
|
}
|
|
};
|
|
|
|
// smoothly runs 0->1->0
|
|
struct FVectorKernelEaseInOut : public TUnaryVectorKernel<FVectorKernelEaseInOut>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* RESTRICT Dst, const VectorRegister& Src0)
|
|
{
|
|
VectorRegister T = VectorMultiply(Src0, MakeVectorRegister(2.0f, 2.0f, 2.0f, 2.0f));
|
|
T = VectorSubtract(T, MakeVectorRegister(1.0f, 1.0f, 1.0f, 1.0f));
|
|
VectorRegister X2 = VectorMultiply(T, T);
|
|
|
|
VectorRegister R = VectorMultiply(X2, MakeVectorRegister(0.9604f, 0.9604f, 0.9604f, 0.9604f));
|
|
R = VectorSubtract(R, MakeVectorRegister(1.96f, 1.96f, 1.96f, 1.96f));
|
|
R = VectorMultiply(R, X2);
|
|
*Dst = VectorAdd(R, MakeVectorRegister(1.0f, 1.0f, 1.0f, 1.0f));
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelOutputStreamed : public TUnaryVectorKernel<FVectorKernelOutputStreamed>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* Dst, VectorRegister Src0)
|
|
{
|
|
VectorStoreAlignedStreamed(Src0, Dst);
|
|
}
|
|
};
|
|
|
|
struct FVectorKernelOutput : public TUnaryVectorKernel<FVectorKernelOutput>
|
|
{
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* Dst, VectorRegister Src0)
|
|
{
|
|
VectorStoreAligned(Src0, Dst);
|
|
}
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
//Shared data
|
|
|
|
struct FSharedDataHandlerBase
|
|
{
|
|
int32 SharedDataIdx;
|
|
FVectorVMSharedDataView& SharedData;
|
|
int32 VarIndex;
|
|
FRegisterHandler<int32, 4> IndexRegisterHandler;
|
|
|
|
FSharedDataHandlerBase(FVectorVMContext& Context)
|
|
: SharedDataIdx(DecodeU8(Context))
|
|
, SharedData(Context.SharedDataTable[SharedDataIdx])
|
|
, VarIndex(DecodeU8(Context))
|
|
, IndexRegisterHandler(Context)
|
|
{
|
|
}
|
|
|
|
VM_FORCEINLINE void Advance(){ IndexRegisterHandler.Advance(); }
|
|
VM_FORCEINLINE int32 GetSharedDataIndex(){ return SharedDataIdx; }
|
|
VM_FORCEINLINE int32 GetVarIndex(){ return VarIndex; }
|
|
VM_FORCEINLINE EVectorVMOperandLocation GetLocation(){ return EVectorVMOperandLocation::SharedData; }
|
|
VM_FORCEINLINE int32 GetDataIndex(){ return IndexRegisterHandler.Get(); }
|
|
|
|
VM_FORCEINLINE int32 GetLocationIndex(){ return GetSharedDataIndex(); }
|
|
VM_FORCEINLINE int32 GetSecondaryIndex(){ return GetVarIndex(); }
|
|
VM_FORCEINLINE int32 GetTertiaryIndex(){ return GetDataIndex(); }
|
|
};
|
|
|
|
struct FSharedDataHandler : public FSharedDataHandlerBase
|
|
{
|
|
FSharedDataHandler(FVectorVMContext& Context)
|
|
: FSharedDataHandlerBase(Context)
|
|
{
|
|
}
|
|
|
|
VM_FORCEINLINE VectorRegister Get()
|
|
{
|
|
return VectorLoad((VectorRegister*)SharedData.GetReadBuffer(VarIndex, IndexRegisterHandler.Get()));
|
|
}
|
|
};
|
|
|
|
struct FSharedDataDestHandler : public FSharedDataHandlerBase
|
|
{
|
|
FSharedDataDestHandler(FVectorVMContext& Context)
|
|
: FSharedDataHandlerBase(Context)
|
|
{
|
|
}
|
|
|
|
VM_FORCEINLINE VectorRegister* Get()
|
|
{
|
|
return (VectorRegister*)SharedData.GetWriteBuffer(VarIndex, IndexRegisterHandler.Get());
|
|
}
|
|
VM_FORCEINLINE VectorRegister GetValue()
|
|
{
|
|
return VectorLoadAligned(Get());
|
|
}
|
|
};
|
|
|
|
/**
|
|
Temporary Vector only version of this.
|
|
*/
|
|
struct FVectorKernelSharedDataWrite : public TUnaryKernel<FVectorKernelSharedDataWrite, FSharedDataDestHandler, FRegisterHandler<VectorRegister>>
|
|
{
|
|
static const VectorVM::EVMType Type = VectorVM::EVMType::Vector4;
|
|
static const int32 NumInstancesPerOp = 1;
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* Buffer, VectorRegister Data)
|
|
{
|
|
VectorStoreAlignedStreamed(Data, (float*)(Buffer));
|
|
}
|
|
};
|
|
|
|
/**
|
|
Temporary Vector only version of this.
|
|
*/
|
|
struct FVectorKernelSharedDataRead : public TUnaryKernel<FVectorKernelSharedDataRead, FRegisterDestHandler<VectorRegister>, FSharedDataHandler>
|
|
{
|
|
static const VectorVM::EVMType Type = VectorVM::EVMType::Vector4;
|
|
static const int32 NumInstancesPerOp = 1;
|
|
static void VM_FORCEINLINE DoKernel(VectorRegister* Dst, VectorRegister Data)
|
|
{
|
|
*Dst = Data;
|
|
}
|
|
};
|
|
|
|
struct FSharedDataIndexHandlerBase
|
|
{
|
|
int32 SharedDataIndex;
|
|
int32 CurrIndex;
|
|
FVectorVMSharedDataView& SharedData;
|
|
|
|
VM_FORCEINLINE void Advance(){}
|
|
VM_FORCEINLINE EVectorVMOperandLocation GetLocation(){ return EVectorVMOperandLocation::Undefined; }
|
|
VM_FORCEINLINE int32 GetLocationIndex(){ return SharedDataIndex; }
|
|
VM_FORCEINLINE int32 GetSecondaryIndex(){ return INDEX_NONE; }
|
|
VM_FORCEINLINE int32 GetTertiaryIndex(){ return INDEX_NONE; }
|
|
|
|
int32 Get(){ return CurrIndex; }
|
|
|
|
FSharedDataIndexHandlerBase(FVectorVMContext& Context)
|
|
: SharedDataIndex(DecodeU8(Context))
|
|
, SharedData(Context.SharedDataTable[SharedDataIndex])
|
|
{ }
|
|
};
|
|
struct FSharedDataIndexHandler_Acquire : public FSharedDataIndexHandlerBase
|
|
{
|
|
FSharedDataIndexHandler_Acquire(FVectorVMContext& Context) : FSharedDataIndexHandlerBase(Context) { }
|
|
int32 GetNextIndex(){ CurrIndex = SharedData.AcquireIndexWrap(); return CurrIndex; }
|
|
};
|
|
struct FSharedDataIndexHandler_AcquireWrap : public FSharedDataIndexHandlerBase
|
|
{
|
|
FSharedDataIndexHandler_AcquireWrap(FVectorVMContext& Context) : FSharedDataIndexHandlerBase(Context) { }
|
|
int32 GetNextIndex(){ CurrIndex = SharedData.AcquireIndexWrap(); return CurrIndex; }
|
|
};
|
|
struct FSharedDataIndexHandler_Consume : public FSharedDataIndexHandlerBase
|
|
{
|
|
FSharedDataIndexHandler_Consume(FVectorVMContext& Context) : FSharedDataIndexHandlerBase(Context) { }
|
|
int32 GetNextIndex(){ CurrIndex = SharedData.ConsumeIndex(); return CurrIndex; }
|
|
};
|
|
struct FSharedDataIndexHandler_ConsumeWrap : public FSharedDataIndexHandlerBase
|
|
{
|
|
FSharedDataIndexHandler_ConsumeWrap(FVectorVMContext& Context) : FSharedDataIndexHandlerBase(Context) { }
|
|
int32 GetNextIndex(){ CurrIndex = SharedData.ConsumeIndexWrap(); return CurrIndex; }
|
|
};
|
|
|
|
//Temporary until after scalarization and we can store the size and counter of shared data in constant data.
|
|
template <typename IndexHandler>
|
|
struct FKernelSharedDataGetAppendIndexBase
|
|
{
|
|
static const VectorVM::EVMType Type = VectorVM::EVMType::Vector4;
|
|
static const int32 NumInstancesPerOp = 1;
|
|
static VM_FORCEINLINE void Exec(FVectorVMContext& Context)
|
|
{
|
|
FRegisterDestHandler<int32, 4> IndexDest(Context);
|
|
FRegisterHandler<float, 4> ValidSrc(Context);
|
|
IndexHandler IdxHandler(Context);
|
|
int32 NumInstances = Context.NumInstances;
|
|
Context.BeginOp(VectorVM::EVMType::Vector4, 2, 1);
|
|
for (int32 i = 0; i < NumInstances; ++i)
|
|
{
|
|
Context.PreOp<FRegisterDestHandler<int32, 4>, FRegisterHandler<float, 4>, IndexHandler>(IndexDest, ValidSrc, IdxHandler);
|
|
|
|
int32 Index = ValidSrc.Get() > 0.0f ? IdxHandler.GetNextIndex() : INDEX_NONE;
|
|
int32* Dest = IndexDest.Get();
|
|
Dest[0] = Index; Dest[1] = Index; Dest[2] = Index; Dest[3] = Index;
|
|
|
|
Context.PostOp<FRegisterDestHandler<int32, 4>, FRegisterHandler<float, 4>, IndexHandler>(IndexDest, ValidSrc, IdxHandler);
|
|
ValidSrc.Advance();
|
|
IndexDest.Advance();
|
|
IdxHandler.Advance();
|
|
}
|
|
}
|
|
};
|
|
|
|
typedef FKernelSharedDataGetAppendIndexBase<FSharedDataIndexHandler_Acquire> FKernelSharedDataGetAppendIndex;
|
|
typedef FKernelSharedDataGetAppendIndexBase<FSharedDataIndexHandler_AcquireWrap> FKernelSharedDataGetAppendIndex_Wrap;
|
|
|
|
//Temporary until after scalarization and we can store the size and counter of shared data in constant data.
|
|
template<typename IndexHandler>
|
|
struct FKernelSharedDataGetConsumeIndexBase
|
|
{
|
|
static const VectorVM::EVMType Type = VectorVM::EVMType::Vector4;
|
|
static const int32 NumInstancesPerOp = 1;
|
|
|
|
static VM_FORCEINLINE void Exec(FVectorVMContext& Context)
|
|
{
|
|
FRegisterDestHandler<int32, 4> IndexDest(Context);
|
|
IndexHandler IdxHandler(Context);
|
|
int32 NumInstances = Context.NumInstances;
|
|
Context.BeginOp(VectorVM::EVMType::Vector4, 1, 1);
|
|
for (int32 i = 0; i < NumInstances; ++i)
|
|
{
|
|
Context.PreOp<FRegisterDestHandler<int32, 4>, IndexHandler>(IndexDest, IdxHandler);
|
|
|
|
int32 Index = IdxHandler.GetNextIndex();
|
|
//Better to just stay in int pipeline?
|
|
*IndexDest.Get() = Index;//Only need index in X;
|
|
|
|
Context.PostOp<FRegisterDestHandler<int32, 4>, IndexHandler>(IndexDest, IdxHandler);
|
|
IndexDest.Advance();
|
|
IdxHandler.Advance();
|
|
}
|
|
}
|
|
};
|
|
|
|
typedef FKernelSharedDataGetConsumeIndexBase<FSharedDataIndexHandler_Consume> FKernelSharedDataGetConsumeIndex;
|
|
typedef FKernelSharedDataGetConsumeIndexBase<FSharedDataIndexHandler_ConsumeWrap> FKernelSharedDataGetConsumeIndex_Wrap;
|
|
|
|
//Temporary until after scalarization and we can store the size and counter of shared data in constant data.
|
|
struct FKernelSharedDataIndexValid
|
|
{
|
|
static VM_FORCEINLINE void Exec(FVectorVMContext& Context)
|
|
{
|
|
FRegisterDestHandler<float, 4> ValidDest(Context);
|
|
FRegisterHandler<int32, 4> IndexSrc(Context);
|
|
FVectorVMSharedDataView& SharedData = Context.SharedDataTable[DecodeU8(Context)];
|
|
int32 NumInstances = Context.NumInstances;
|
|
Context.BeginOp(VectorVM::EVMType::Vector4, 1, 1);
|
|
for (int32 i = 0; i < NumInstances; ++i)
|
|
{
|
|
Context.PreOp<FRegisterDestHandler<float, 4>, FRegisterHandler<int32, 4>>(ValidDest, IndexSrc);
|
|
|
|
int32 Index = IndexSrc.Get();
|
|
float Valid = SharedData.ValidIndex(Index) ? 1.0f : 0.0f;
|
|
//Better to just stay in int pipeline?
|
|
float* Dst = ValidDest.Get();
|
|
Dst[0] = Valid; Dst[1] = Valid; Dst[2] = Valid; Dst[3] = Valid;
|
|
|
|
Context.PostOp<FRegisterDestHandler<float, 4>, FRegisterHandler<int32, 4>>(ValidDest, IndexSrc);
|
|
ValidDest.Advance();
|
|
IndexSrc.Advance();;
|
|
}
|
|
}
|
|
};
|
|
|
|
void VectorVM::Init()
|
|
{
|
|
static bool Inited = false;
|
|
if (Inited == false)
|
|
{
|
|
// random noise
|
|
float TempTable[17][17][17];
|
|
for (int z = 0; z < 17; z++)
|
|
{
|
|
for (int y = 0; y < 17; y++)
|
|
{
|
|
for (int x = 0; x < 17; x++)
|
|
{
|
|
float f1 = (float)FMath::FRandRange(-1.0f, 1.0f);
|
|
TempTable[x][y][z] = f1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// pad
|
|
for (int i = 0; i < 17; i++)
|
|
{
|
|
for (int j = 0; j < 17; j++)
|
|
{
|
|
TempTable[i][j][16] = TempTable[i][j][0];
|
|
TempTable[i][16][j] = TempTable[i][0][j];
|
|
TempTable[16][j][i] = TempTable[0][j][i];
|
|
}
|
|
}
|
|
|
|
// compute gradients
|
|
FVector TempTable2[17][17][17];
|
|
for (int z = 0; z < 16; z++)
|
|
{
|
|
for (int y = 0; y < 16; y++)
|
|
{
|
|
for (int x = 0; x < 16; x++)
|
|
{
|
|
FVector XGrad = FVector(1.0f, 0.0f, TempTable[x][y][z] - TempTable[x+1][y][z]);
|
|
FVector YGrad = FVector(0.0f, 1.0f, TempTable[x][y][z] - TempTable[x][y + 1][z]);
|
|
FVector ZGrad = FVector(0.0f, 1.0f, TempTable[x][y][z] - TempTable[x][y][z+1]);
|
|
|
|
FVector Grad = FVector(XGrad.Z, YGrad.Z, ZGrad.Z);
|
|
TempTable2[x][y][z] = Grad;
|
|
}
|
|
}
|
|
}
|
|
|
|
// pad
|
|
for (int i = 0; i < 17; i++)
|
|
{
|
|
for (int j = 0; j < 17; j++)
|
|
{
|
|
TempTable2[i][j][16] = TempTable2[i][j][0];
|
|
TempTable2[i][16][j] = TempTable2[i][0][j];
|
|
TempTable2[16][j][i] = TempTable2[0][j][i];
|
|
}
|
|
}
|
|
|
|
|
|
// compute curl of gradient field
|
|
for (int z = 0; z < 16; z++)
|
|
{
|
|
for (int y = 0; y < 16; y++)
|
|
{
|
|
for (int x = 0; x < 16; x++)
|
|
{
|
|
FVector Dy = TempTable2[x][y][z] - TempTable2[x][y + 1][z];
|
|
FVector Sy = TempTable2[x][y][z] + TempTable2[x][y + 1][z];
|
|
FVector Dx = TempTable2[x][y][z] - TempTable2[x + 1][y][z];
|
|
FVector Sx = TempTable2[x][y][z] + TempTable2[x + 1][y][z];
|
|
FVector Dz = TempTable2[x][y][z] - TempTable2[x][y][z + 1];
|
|
FVector Sz = TempTable2[x][y][z] + TempTable2[x][y][z + 1];
|
|
FVector Dir = FVector(Dy.Z - Sz.Y, Dz.X - Sx.Z, Dx.Y - Sy.X);
|
|
|
|
FVectorKernelNoise::RandomTable[x][y][z] = MakeVectorRegister(Dir.X, Dir.Y, Dir.Z, 0.0f);
|
|
}
|
|
}
|
|
}
|
|
|
|
#if WITH_EDITOR
|
|
|
|
OpNames.AddDefaulted((int32)EVectorVMOp::NumOpcodes);
|
|
if (UEnum* EnumStateObj = FindObject<UEnum>(ANY_PACKAGE, TEXT("EVectorVMOp"), true))
|
|
{
|
|
for (int32 i = 0; i < (int32)EVectorVMOp::NumOpcodes; ++i)
|
|
{
|
|
OpNames[i] = EnumStateObj->GetDisplayNameText(i).ToString();
|
|
}
|
|
}
|
|
|
|
OperandLocationNames.AddDefaulted((int32)EVectorVMOperandLocation::Num);
|
|
if (UEnum* EnumStateObj = FindObject<UEnum>(ANY_PACKAGE, TEXT("EVectorVMOperandLocation"), true))
|
|
{
|
|
for (int32 i = 0; i < (int32)EVectorVMOperandLocation::Num; ++i)
|
|
{
|
|
OperandLocationNames[i] = EnumStateObj->GetDisplayNameText(i).ToString();
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
Inited = true;
|
|
}
|
|
}
|
|
|
|
void VectorVM::Exec(
|
|
uint8 const* Code,
|
|
VectorRegister** InputRegisters,
|
|
int32 NumInputRegisters,
|
|
VectorRegister** OutputRegisters,
|
|
int32 NumOutputRegisters,
|
|
FVector4 const* ConstantTable,
|
|
UNiagaraDataObject* *DataObjConstTable,
|
|
FVectorVMSharedDataView* SharedDataTable,
|
|
int32 NumVectors
|
|
)
|
|
{
|
|
VectorRegister TempRegisters[NumTempRegisters][VectorsPerChunk];
|
|
VectorRegister* RegisterTable[MaxRegisters] = {0};
|
|
|
|
// Map temporary registers.
|
|
for (int32 i = 0; i < NumTempRegisters; ++i)
|
|
{
|
|
RegisterTable[i] = TempRegisters[i];
|
|
}
|
|
|
|
// Process one chunk at a time.
|
|
int32 NumChunks = (NumVectors + VectorsPerChunk - 1) / VectorsPerChunk;
|
|
for (int32 ChunkIndex = 0; ChunkIndex < NumChunks; ++ChunkIndex)
|
|
{
|
|
// Map input and output registers.
|
|
for (int32 i = 0; i < NumInputRegisters; ++i)
|
|
{
|
|
RegisterTable[NumTempRegisters + i] = InputRegisters[i] + ChunkIndex * VectorsPerChunk;
|
|
}
|
|
for (int32 i = 0; i < NumOutputRegisters; ++i)
|
|
{
|
|
RegisterTable[NumTempRegisters + MaxInputRegisters + i] = OutputRegisters[i] + ChunkIndex * VectorsPerChunk;
|
|
}
|
|
|
|
// Setup execution context.
|
|
int32 VectorsThisChunk = FMath::Min<int32>(NumVectors, VectorsPerChunk);
|
|
int32 InstancesThisChunk = VectorsThisChunk;
|
|
FVectorVMContext Context(Code, RegisterTable, ConstantTable, DataObjConstTable, SharedDataTable, VectorsThisChunk, InstancesThisChunk, VectorsPerChunk * ChunkIndex);
|
|
EVectorVMOp Op = EVectorVMOp::done;
|
|
|
|
// Execute VM on all vectors in this chunk.
|
|
do
|
|
{
|
|
Op = DecodeOp(Context);
|
|
Context.SetOp(Op);
|
|
switch (Op)
|
|
{
|
|
// Dispatch kernel ops.
|
|
case EVectorVMOp::add: FVectorKernelAdd::Exec(Context); break;
|
|
case EVectorVMOp::sub: FVectorKernelSub::Exec(Context); break;
|
|
case EVectorVMOp::mul: FVectorKernelMul::Exec(Context); break;
|
|
case EVectorVMOp::div: FVectorKernelDiv::Exec(Context); break;
|
|
case EVectorVMOp::mad: FVectorKernelMad::Exec(Context); break;
|
|
case EVectorVMOp::lerp: FVectorKernelLerp::Exec(Context); break;
|
|
case EVectorVMOp::rcp: FVectorKernelRcp::Exec(Context); break;
|
|
case EVectorVMOp::rsq: FVectorKernelRsq::Exec(Context); break;
|
|
case EVectorVMOp::sqrt: FVectorKernelSqrt::Exec(Context); break;
|
|
case EVectorVMOp::neg: FVectorKernelNeg::Exec(Context); break;
|
|
case EVectorVMOp::abs: FVectorKernelAbs::Exec(Context); break;
|
|
case EVectorVMOp::exp: FVectorKernelExp::Exec(Context); break;
|
|
case EVectorVMOp::exp2: FVectorKernelExp2::Exec(Context); break;
|
|
case EVectorVMOp::log: FVectorKernelLog::Exec(Context); break;
|
|
case EVectorVMOp::log2: FVectorKernelLog2::Exec(Context); break;
|
|
case EVectorVMOp::sin: FVectorKernelSin::Exec(Context); break;
|
|
case EVectorVMOp::cos: FVectorKernelCos::Exec(Context); break;
|
|
case EVectorVMOp::tan: FVectorKernelTan::Exec(Context); break;
|
|
case EVectorVMOp::asin: FVectorKernelASin::Exec(Context); break;
|
|
case EVectorVMOp::acos: FVectorKernelACos::Exec(Context); break;
|
|
case EVectorVMOp::atan: FVectorKernelATan::Exec(Context); break;
|
|
case EVectorVMOp::atan2: FVectorKernelATan2::Exec(Context); break;
|
|
case EVectorVMOp::ceil: FVectorKernelCeil::Exec(Context); break;
|
|
case EVectorVMOp::floor: FVectorKernelFloor::Exec(Context); break;
|
|
case EVectorVMOp::fmod: FVectorKernelMod::Exec(Context); break;
|
|
case EVectorVMOp::frac: FVectorKernelFrac::Exec(Context); break;
|
|
case EVectorVMOp::trunc: FVectorKernelTrunc::Exec(Context); break;
|
|
case EVectorVMOp::clamp: FVectorKernelClamp::Exec(Context); break;
|
|
case EVectorVMOp::min: FVectorKernelMin::Exec(Context); break;
|
|
case EVectorVMOp::max: FVectorKernelMax::Exec(Context); break;
|
|
case EVectorVMOp::pow: FVectorKernelPow::Exec(Context); break;
|
|
case EVectorVMOp::sign: FVectorKernelSign::Exec(Context); break;
|
|
case EVectorVMOp::step: FVectorKernelStep::Exec(Context); break;
|
|
case EVectorVMOp::dot: FVectorKernelDot::Exec(Context); break;
|
|
case EVectorVMOp::cross: FVectorKernelCross::Exec(Context); break;
|
|
case EVectorVMOp::normalize: FVectorKernelNormalize::Exec(Context); break;
|
|
case EVectorVMOp::random: FVectorKernelRandom::Exec(Context); break;
|
|
case EVectorVMOp::length: FVectorKernelLength::Exec(Context); break;
|
|
case EVectorVMOp::noise: FVectorKernelNoise::Exec(Context); break;
|
|
case EVectorVMOp::splatx: FVectorKernelSplat<0>::Exec(Context); break;
|
|
case EVectorVMOp::splaty: FVectorKernelSplat<1>::Exec(Context); break;
|
|
case EVectorVMOp::splatz: FVectorKernelSplat<2>::Exec(Context); break;
|
|
case EVectorVMOp::splatw: FVectorKernelSplat<3>::Exec(Context); break;
|
|
case EVectorVMOp::compose: FVectorKernelCompose<0,1,2,3>::Exec(Context); break;
|
|
case EVectorVMOp::composex: FVectorKernelCompose<0, 0, 0, 0>::Exec(Context); break;
|
|
case EVectorVMOp::composey: FVectorKernelCompose<1, 1, 1, 1>::Exec(Context); break;
|
|
case EVectorVMOp::composez: FVectorKernelCompose<2, 2, 2, 2>::Exec(Context); break;
|
|
case EVectorVMOp::composew: FVectorKernelCompose<3, 3, 3, 3>::Exec(Context); break;
|
|
case EVectorVMOp::lessthan: FVectorKernelLessThan::Exec(Context); break;
|
|
case EVectorVMOp::sample: FVectorKernelSample::Exec(Context); break;
|
|
case EVectorVMOp::bufferwrite: FVectorKernelBufferWrite::Exec(Context); break;
|
|
case EVectorVMOp::easein: FVectorKernelEaseIn::Exec(Context); break;
|
|
case EVectorVMOp::easeinout: FVectorKernelEaseInOut::Exec(Context); break;
|
|
case EVectorVMOp::aquireshareddataindex: FKernelSharedDataGetAppendIndex::Exec(Context); break;
|
|
case EVectorVMOp::aquireshareddataindexwrap: FKernelSharedDataGetAppendIndex_Wrap::Exec(Context); break;
|
|
case EVectorVMOp::consumeshareddataindex: FKernelSharedDataGetConsumeIndex::Exec(Context); break;
|
|
case EVectorVMOp::consumeshareddataindexwrap: FKernelSharedDataGetConsumeIndex_Wrap::Exec(Context); break;
|
|
case EVectorVMOp::shareddataread: FVectorKernelSharedDataRead::Exec(Context); break;
|
|
case EVectorVMOp::shareddatawrite: FVectorKernelSharedDataWrite::Exec(Context); break;
|
|
case EVectorVMOp::shareddataindexvalid: FKernelSharedDataIndexValid::Exec(Context); break;
|
|
case EVectorVMOp::select: FVectorKernelSelect::Exec(Context); break;
|
|
case EVectorVMOp::output:
|
|
{
|
|
if ((ENABLE_VM_DEBUGGING && Context.IsDebugging()) == false)
|
|
{
|
|
FVectorKernelOutput::Exec(Context);
|
|
}
|
|
else
|
|
{
|
|
FVectorKernelOutputStreamed::Exec(Context);
|
|
}
|
|
}
|
|
break;
|
|
|
|
// Execution always terminates with a "done" opcode.
|
|
case EVectorVMOp::done:
|
|
break;
|
|
|
|
// Opcode not recognized / implemented.
|
|
default:
|
|
UE_LOG(LogVectorVM, Error, TEXT("Unknown op code 0x%02x"), (uint32)Op);
|
|
return;//BAIL
|
|
}
|
|
} while (Op != EVectorVMOp::done);
|
|
|
|
NumVectors -= VectorsPerChunk;
|
|
}
|
|
}
|
|
|
|
uint8 VectorVM::GetNumOpCodes()
|
|
{
|
|
return (uint8)EVectorVMOp::NumOpcodes;
|
|
}
|
|
|
|
#if WITH_EDITOR
|
|
FString VectorVM::GetOpName(EVectorVMOp Op)
|
|
{
|
|
return OpNames[(int32)Op];
|
|
}
|
|
|
|
FString VectorVM::GetOperandLocationName(EVectorVMOperandLocation Location)
|
|
{
|
|
return OperandLocationNames[(int32)Location];
|
|
}
|
|
#endif
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
#if ENABLE_VM_DEBUGGING
|
|
|
|
void FVectorVMDebuggerImpl::BeginOp(FVectorVMContext& Context, VectorVM::EVMType InType, int32 InNumArgs, int32 InNumInstancesPerOp)
|
|
{
|
|
CurrOp = Context.CurrOp;
|
|
OpType = InType;
|
|
CurrNumArgs = InNumArgs;
|
|
NumInstancesPerOp = InNumInstancesPerOp;
|
|
CurrInstanceBase = StartInstance + Context.StartInstance;
|
|
}
|
|
|
|
template<typename DstHandler, typename Arg0Handler, typename Arg1Handler, typename Arg2Handler, typename Arg3Handler>
|
|
void FVectorVMDebuggerImpl::PreOp(FVectorVMContext& Context, DstHandler& Dst, Arg0Handler& Arg0, Arg1Handler& Arg1, Arg2Handler& Arg2, Arg3Handler& Arg3)
|
|
{
|
|
CachedPreOpData[(int32)VectorVM::EKernelArgs::Dest].Set(Dst.GetValue());
|
|
CachedPreOpData[(int32)VectorVM::EKernelArgs::Arg0].Set(Arg0.Get());
|
|
CachedPreOpData[(int32)VectorVM::EKernelArgs::Arg1].Set(Arg1.Get());
|
|
CachedPreOpData[(int32)VectorVM::EKernelArgs::Arg2].Set(Arg2.Get());
|
|
CachedPreOpData[(int32)VectorVM::EKernelArgs::Arg3].Set(Arg3.Get());
|
|
}
|
|
|
|
template<typename DstHandler, typename Arg0Handler, typename Arg1Handler, typename Arg2Handler, typename Arg3Handler >
|
|
void FVectorVMDebuggerImpl::PostOp(FVectorVMContext& Context, DstHandler& Dst, Arg0Handler& Arg0, Arg1Handler& Arg1, Arg2Handler& Arg2, Arg3Handler& Arg3)
|
|
{
|
|
for (int32 i = 0; i < NumInstancesPerOp; ++i)
|
|
{
|
|
if (TArray<VectorVM::FOpDebugInfo>* Info = DebugInfo.Find(CurrInstanceBase + i))
|
|
{
|
|
TArray<VectorVM::FOpDebugInfo>& InfoArray = *Info;
|
|
int32 NewIdx = InfoArray.AddUninitialized();
|
|
VectorVM::FOpDebugInfo& NewInfo = InfoArray[NewIdx];
|
|
NewInfo.NumArgs = CurrNumArgs;
|
|
NewInfo.Op = CurrOp;
|
|
NewInfo.OpType = OpType;
|
|
NewInfo.LocationInfo[(int32)VectorVM::EKernelArgs::Dest] = VectorVM::FDataLocationInfo(Context, Dst.GetLocation(), Dst.GetLocationIndex(), Dst.GetSecondaryIndex(), Dst.GetTertiaryIndex());
|
|
NewInfo.LocationInfo[(int32)VectorVM::EKernelArgs::Arg0] = VectorVM::FDataLocationInfo(Context, Arg0.GetLocation(), Arg0.GetLocationIndex(), Arg0.GetSecondaryIndex(), Arg0.GetTertiaryIndex());;
|
|
NewInfo.LocationInfo[(int32)VectorVM::EKernelArgs::Arg1] = VectorVM::FDataLocationInfo(Context, Arg1.GetLocation(), Arg1.GetLocationIndex(), Arg1.GetSecondaryIndex(), Arg1.GetTertiaryIndex());;
|
|
NewInfo.LocationInfo[(int32)VectorVM::EKernelArgs::Arg2] = VectorVM::FDataLocationInfo(Context, Arg2.GetLocation(), Arg2.GetLocationIndex(), Arg2.GetSecondaryIndex(), Arg2.GetTertiaryIndex());;
|
|
NewInfo.LocationInfo[(int32)VectorVM::EKernelArgs::Arg3] = VectorVM::FDataLocationInfo(Context, Arg3.GetLocation(), Arg3.GetLocationIndex(), Arg3.GetSecondaryIndex(), Arg3.GetTertiaryIndex());;
|
|
|
|
NewInfo.PreOpValues[(int32)VectorVM::EKernelArgs::Dest] = CachedPreOpData[(int32)VectorVM::EKernelArgs::Dest];
|
|
NewInfo.PreOpValues[(int32)VectorVM::EKernelArgs::Arg0] = CachedPreOpData[(int32)VectorVM::EKernelArgs::Arg0];
|
|
NewInfo.PreOpValues[(int32)VectorVM::EKernelArgs::Arg1] = CachedPreOpData[(int32)VectorVM::EKernelArgs::Arg1];
|
|
NewInfo.PreOpValues[(int32)VectorVM::EKernelArgs::Arg2] = CachedPreOpData[(int32)VectorVM::EKernelArgs::Arg2];
|
|
NewInfo.PreOpValues[(int32)VectorVM::EKernelArgs::Arg3] = CachedPreOpData[(int32)VectorVM::EKernelArgs::Arg3];
|
|
|
|
NewInfo.PostOpValues[(int32)VectorVM::EKernelArgs::Dest].Set(Dst.GetValue());
|
|
NewInfo.PostOpValues[(int32)VectorVM::EKernelArgs::Arg0].Set(Arg0.Get());
|
|
NewInfo.PostOpValues[(int32)VectorVM::EKernelArgs::Arg1].Set(Arg1.Get());
|
|
NewInfo.PostOpValues[(int32)VectorVM::EKernelArgs::Arg2].Set(Arg2.Get());
|
|
NewInfo.PostOpValues[(int32)VectorVM::EKernelArgs::Arg3].Set(Arg3.Get());
|
|
}
|
|
}
|
|
CurrInstanceBase += NumInstancesPerOp;
|
|
}
|
|
|
|
void VectorVM::AttachDebugger(VectorVM::FVectorVMDebugger* Debugger)
|
|
{
|
|
check(IsInGameThread());
|
|
check(Debugger);
|
|
check(AttachedDebugger == NULL);
|
|
AttachedDebugger = Debugger->GetImpl();
|
|
}
|
|
|
|
void VectorVM::DetachDebugger(VectorVM::FVectorVMDebugger* Debugger)
|
|
{
|
|
check(IsInGameThread());
|
|
check(Debugger);
|
|
check(AttachedDebugger == Debugger->GetImpl());
|
|
AttachedDebugger = NULL;
|
|
}
|
|
|
|
VectorVM::FVectorVMDebugger::FVectorVMDebugger()
|
|
: Impl(new FVectorVMDebuggerImpl())
|
|
{
|
|
|
|
}
|
|
|
|
VectorVM::FVectorVMDebugger::~FVectorVMDebugger()
|
|
{
|
|
if (Impl)
|
|
{
|
|
delete Impl;
|
|
}
|
|
}
|
|
|
|
void VectorVM::FVectorVMDebugger::AddInstanceToGather(int32 Instance)
|
|
{
|
|
check(Impl);
|
|
Impl->DebugInfo.Add(Instance);
|
|
}
|
|
|
|
const TArray<VectorVM::FOpDebugInfo>* VectorVM::FVectorVMDebugger::GetDebugInfoForInstance(int32 Instance)
|
|
{
|
|
check(Impl);
|
|
return Impl->DebugInfo.Find(Instance);
|
|
}
|
|
|
|
FVectorVMDebuggerImpl* VectorVM::FVectorVMDebugger::GetImpl()
|
|
{
|
|
return Impl;
|
|
}
|
|
|
|
void VectorVM::FVectorVMDebugger::InitForScriptRun(int32 StartInstance, const TArray<int32> InstancesToDebug)
|
|
{
|
|
Impl->InitForScriptRun(StartInstance, InstancesToDebug);
|
|
}
|
|
|
|
VectorVM::FDataLocationInfo::FDataLocationInfo(FVectorVMContext& Context, EVectorVMOperandLocation InLocation, int32 InPrimaryIndex, int32 InSecondaryIndex, int32 InTertiaryIndex)
|
|
: Location(InLocation)
|
|
, PrimaryLocationIndex(InPrimaryIndex)
|
|
, SecondaryLocationIndex(InSecondaryIndex)
|
|
, TertiaryLocationIndex(InTertiaryIndex)
|
|
{
|
|
switch (Location)
|
|
{
|
|
case EVectorVMOperandLocation::Constant: MemoryAddress = &Context.ConstantTable[PrimaryLocationIndex]; break;
|
|
case EVectorVMOperandLocation::TemporaryRegister: MemoryAddress = &Context.RegisterTable[PrimaryLocationIndex]; break;
|
|
case EVectorVMOperandLocation::InputRegister: MemoryAddress = &Context.RegisterTable[NumTempRegisters + PrimaryLocationIndex]; break;
|
|
case EVectorVMOperandLocation::OutputRegister: MemoryAddress = &Context.ConstantTable[NumTempRegisters + MaxInputRegisters + PrimaryLocationIndex]; break;
|
|
case EVectorVMOperandLocation::SharedData:
|
|
{
|
|
FVectorVMSharedDataView& SharedData = Context.SharedDataTable[PrimaryLocationIndex];
|
|
MemoryAddress = SharedData.GetReadBuffer(SecondaryLocationIndex, TertiaryLocationIndex);
|
|
} break;
|
|
default: MemoryAddress = nullptr; break;
|
|
};
|
|
}
|
|
|
|
#endif
|
|
|
|
#undef VM_FORCEINLINE |