Files
UnrealEngineUWP/Engine/Shaders/Private/SceneData.ush
jamie hayes 857e02b9a0 Add ability to specify a camera distance cull range for instances of a primitive and add camera distance culling logic to Nanite.
Hooked the value used by instanced static meshes into it to get nanite ISMs to respect the cull distance.

#rb brian.karis
#preflight 6287d8a21e478b95c7345866

#ROBOMERGE-AUTHOR: jamie.hayes
#ROBOMERGE-SOURCE: CL 20301693 via CL 20301776 via CL 20301792
#ROBOMERGE-BOT: UE5 (Release-Engine-Staging -> Main) (v948-20297126)

[CL 20305738 by jamie hayes in ue5-main branch]
2022-05-20 19:15:15 -04:00

1190 lines
49 KiB
Plaintext

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "LargeWorldCoordinates.ush"
#include "OctahedralCommon.ush"
#include "/Engine/Shared/NaniteDefinitions.h"
#ifndef USE_GLOBAL_GPU_SCENE_DATA
#define USE_GLOBAL_GPU_SCENE_DATA 0
#endif
#ifndef USE_GLOBAL_GPU_SCENE_DATA_RW
#define USE_GLOBAL_GPU_SCENE_DATA_RW 0
#endif
#ifndef USES_PER_INSTANCE_CUSTOM_DATA
#define USES_PER_INSTANCE_CUSTOM_DATA 0
#endif
#ifndef USES_PER_INSTANCE_RANDOM
#define USES_PER_INSTANCE_RANDOM 0
#endif
#ifndef NEEDS_LIGHTMAP_COORDINATE
#define NEEDS_LIGHTMAP_COORDINATE 0
#endif
#ifndef VF_REQUIRES_PER_INSTANCE_CUSTOM_DATA
#define VF_REQUIRES_PER_INSTANCE_CUSTOM_DATA 0
#endif
#define ENABLE_PER_INSTANCE_CUSTOM_DATA (USES_PER_INSTANCE_CUSTOM_DATA || VF_REQUIRES_PER_INSTANCE_CUSTOM_DATA)
// When transforms come from the InstanceSceneData buffer, indicates whether or not the transforms are compressed (ignored on mobile)
// TODO: Global setting/define
#define INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS 1
// Whether to fetch primitive values (eg LocalToWorld) by dynamically indexing a scene-wide buffer, or to reference a single Primitive uniform buffer
#if VF_SUPPORTS_PRIMITIVE_SCENE_DATA
#if FEATURE_LEVEL == FEATURE_LEVEL_ES3_1
#define VF_USE_PRIMITIVE_SCENE_DATA 2
#else
#define VF_USE_PRIMITIVE_SCENE_DATA 1
#endif
#else
#define VF_USE_PRIMITIVE_SCENE_DATA 0
#endif
// Must match PrimitiveUniformShaderParameters.h
#define PRIMITIVE_SCENE_DATA_FLAG_CAST_SHADOWS 0x1
#define PRIMITIVE_SCENE_DATA_FLAG_USE_SINGLE_SAMPLE_SHADOW_SL 0x2
#define PRIMITIVE_SCENE_DATA_FLAG_USE_VOLUMETRIC_LM_SHADOW_SL 0x4
#define PRIMITIVE_SCENE_DATA_FLAG_DECAL_RECEIVER 0x8
#define PRIMITIVE_SCENE_DATA_FLAG_SHOULD_CACHE_SHADOW 0x10
#define PRIMITIVE_SCENE_DATA_FLAG_OUTPUT_VELOCITY 0x20
#define PRIMITIVE_SCENE_DATA_FLAG_DETERMINANT_SIGN 0x40
#define PRIMITIVE_SCENE_DATA_FLAG_HAS_CAPSULE_REPRESENTATION 0x80
#define PRIMITIVE_SCENE_DATA_FLAG_HAS_CAST_CONTACT_SHADOW 0x100
#define PRIMITIVE_SCENE_DATA_FLAG_HAS_PRIMITIVE_CUSTOM_DATA 0x200
#define PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_0 0x400
#define PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_1 0x800
#define PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_2 0x1000
#define PRIMITIVE_SCENE_DATA_FLAG_HAS_INSTANCE_LOCAL_BOUNDS 0x2000
#define PRIMITIVE_SCENE_DATA_FLAG_HAS_NANITE_IMPOSTER 0x4000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_GAME 0x8000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_EDITOR 0x10000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_REFLECTION_CAPTURES 0x20000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_REAL_TIME_SKY_CAPTURES 0x40000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_RAY_TRACING 0x80000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_SCENE_CAPTURE_ONLY 0x100000
#define PRIMITIVE_SCENE_DATA_FLAG_HIDDEN_IN_SCENE_CAPTURE 0x200000
#define PRIMITIVE_SCENE_DATA_FLAG_FORCE_HIDDEN 0x400000
#define PRIMITIVE_SCENE_DATA_FLAG_CAST_HIDDEN_SHADOW 0x800000
#define PRIMITIVE_SCENE_DATA_FLAG_EVALUATE_WORLD_POSITION_OFFSET 0x1000000
#define PRIMITIVE_SCENE_DATA_FLAG_CAMERA_DISTANCE_CULL 0x2000000
// GPUCULL_TODO: Eventually we need to remove this workaround
#define VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG (1U << 31U)
#define PRIMITIVE_ID_NUM_BITS (20u) // Max of 1,048,576 primitives
#define INSTANCE_SCENE_DATA_FLAGS_NUM_BITS (12u) // Max of 12 flags
#define INSTANCE_RELATIVE_ID_NUM_BITS (24u) // Max of 16,777,216 instances per primitive
#define INSTANCE_CUSTOM_DATA_COUNT_NUM_BITS (8u) // Max of 255 custom data floats per instance
#define PRIMITIVE_ID_MASK ((1u << PRIMITIVE_ID_NUM_BITS) - 1u)
#define INSTANCE_RELATIVE_ID_MASK ((1u << INSTANCE_RELATIVE_ID_NUM_BITS) - 1u)
#define INVALID_PRIMITIVE_ID PRIMITIVE_ID_MASK
#define INVALID_INSTANCE_PAYLOAD_OFFSET (0xFFFFFFFFu)
// Must match InstanceUniformShaderParameters.h
#define INSTANCE_SCENE_DATA_FLAG_DETERMINANT_SIGN 0x1
#define INSTANCE_SCENE_DATA_FLAG_HAS_RANDOM 0x2
#define INSTANCE_SCENE_DATA_FLAG_HAS_CUSTOM_DATA 0x4
#define INSTANCE_SCENE_DATA_FLAG_HAS_DYNAMIC_DATA 0x8
#define INSTANCE_SCENE_DATA_FLAG_HAS_LIGHTSHADOW_UV_BIAS 0x10
#define INSTANCE_SCENE_DATA_FLAG_HAS_HIERARCHY_OFFSET 0x20
#define INSTANCE_SCENE_DATA_FLAG_HAS_LOCAL_BOUNDS 0x40
#define INSTANCE_SCENE_DATA_FLAG_HAS_EDITOR_DATA 0x80
#define INSTANCE_SCENE_DATA_FLAG_IS_RAYTRACING_FAR_FIELD 0x100
#if INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
static const uint InstanceTransformSizeFloat4Count = 2u; // compressed transform
#else
static const uint InstanceTransformSizeFloat4Count = 3u; // encoded scale/rotation (uint4) and translation (float3)
#endif
#define NUM_CUSTOM_PRIMITIVE_DATA 9 // Num float4s used for custom data. Must match FCustomPrimitiveData::NumCustomPrimitiveDataFloat4s in SceneTypes.h
// Must match FPrimitiveUniformShaderParameters in C++
struct FPrimitiveSceneData
{
uint Flags; // TODO: Use 16 bits?
int InstanceSceneDataOffset; // Link to the range of instances that belong to this primitive
int NumInstanceSceneDataEntries;
int PersistentPrimitiveIndex;
uint SingleCaptureIndex; // TODO: Use 16 bits? 8 bits?
float3 TilePosition;
uint PrimitiveComponentId; // TODO: Refactor to use PersistentPrimitiveIndex, ENGINE USE ONLY - will be removed
FLWCMatrix LocalToWorld;
FLWCInverseMatrix WorldToLocal;
FLWCMatrix PreviousLocalToWorld;
FLWCInverseMatrix PreviousWorldToLocal;
float3 InvNonUniformScale;
float ObjectBoundsX;
FLWCVector3 ObjectWorldPosition;
FLWCVector3 ActorWorldPosition;
float ObjectRadius;
uint LightmapUVIndex; // TODO: Use 16 bits? // TODO: Move into associated array that disappears if static lighting is disabled
float3 ObjectOrientation; // TODO: More efficient representation?
uint LightmapDataIndex; // TODO: Use 16 bits? // TODO: Move into associated array that disappears if static lighting is disabled
float4 NonUniformScale;
float3 PreSkinnedLocalBoundsMin;
uint NaniteResourceID;
float3 PreSkinnedLocalBoundsMax;
uint NaniteHierarchyOffset;
float3 LocalObjectBoundsMin;
float ObjectBoundsY;
float3 LocalObjectBoundsMax;
float ObjectBoundsZ;
uint InstancePayloadDataOffset;
uint InstancePayloadDataStride; // TODO: Use 16 bits? 8 bits?
float3 InstanceLocalBoundsCenter;
float3 InstanceLocalBoundsExtent;
float3 WireframeColor; // TODO: Should refactor out all editor data into a separate buffer
float3 LevelColor; // TODO: Should refactor out all editor data into a separate buffer
uint NaniteImposterIndex;
uint NaniteFilterFlags;
float2 CameraDistanceCullMinMaxSquared;
float4 CustomPrimitiveData[NUM_CUSTOM_PRIMITIVE_DATA]; // TODO: Move to associated array to shrink primitive data and pack cachelines more effectively
};
// Fetch from Primitive uniform buffer
FPrimitiveSceneData GetPrimitiveDataFromUniformBuffer()
{
FPrimitiveSceneData PrimitiveData;
PrimitiveData.Flags = Primitive.Flags;
PrimitiveData.InstanceSceneDataOffset = Primitive.InstanceSceneDataOffset;
PrimitiveData.NumInstanceSceneDataEntries = Primitive.NumInstanceSceneDataEntries;
PrimitiveData.SingleCaptureIndex = Primitive.SingleCaptureIndex;
PrimitiveData.TilePosition = Primitive.TilePosition;
PrimitiveData.PrimitiveComponentId = Primitive.PrimitiveComponentId;
PrimitiveData.LocalToWorld = MakeLWCMatrix4x3(Primitive.TilePosition, Primitive.LocalToRelativeWorld);
PrimitiveData.WorldToLocal = MakeLWCInverseMatrix4x3(Primitive.TilePosition, Primitive.RelativeWorldToLocal);
PrimitiveData.PreviousLocalToWorld = MakeLWCMatrix4x3(Primitive.TilePosition, Primitive.PreviousLocalToRelativeWorld);
PrimitiveData.PreviousWorldToLocal = MakeLWCInverseMatrix4x3(Primitive.TilePosition, Primitive.PreviousRelativeWorldToLocal);
PrimitiveData.InvNonUniformScale = Primitive.InvNonUniformScale;
PrimitiveData.ObjectBoundsX = Primitive.ObjectBoundsX;
PrimitiveData.ObjectWorldPosition = MakeLWCVector3(Primitive.TilePosition, Primitive.ObjectRelativeWorldPositionAndRadius.xyz);
PrimitiveData.ObjectRadius = Primitive.ObjectRelativeWorldPositionAndRadius.w;
PrimitiveData.ActorWorldPosition = MakeLWCVector3(Primitive.TilePosition, Primitive.ActorRelativeWorldPosition);
PrimitiveData.LightmapUVIndex = Primitive.LightmapUVIndex;
PrimitiveData.ObjectOrientation = Primitive.ObjectOrientation;
PrimitiveData.LightmapDataIndex = Primitive.LightmapDataIndex;
PrimitiveData.NonUniformScale = Primitive.NonUniformScale;
PrimitiveData.PreSkinnedLocalBoundsMin = Primitive.PreSkinnedLocalBoundsMin;
PrimitiveData.NaniteResourceID = Primitive.NaniteResourceID;
PrimitiveData.PreSkinnedLocalBoundsMax = Primitive.PreSkinnedLocalBoundsMax;
PrimitiveData.NaniteHierarchyOffset = Primitive.NaniteHierarchyOffset;
PrimitiveData.LocalObjectBoundsMin = Primitive.LocalObjectBoundsMin;
PrimitiveData.ObjectBoundsY = Primitive.ObjectBoundsY;
PrimitiveData.LocalObjectBoundsMax = Primitive.LocalObjectBoundsMax;
PrimitiveData.ObjectBoundsZ = Primitive.ObjectBoundsZ;
PrimitiveData.InstancePayloadDataOffset = Primitive.InstancePayloadDataOffset;
PrimitiveData.InstancePayloadDataStride = Primitive.InstancePayloadDataStride;
PrimitiveData.WireframeColor = Primitive.WireframeColor;
PrimitiveData.LevelColor = Primitive.LevelColor;
PrimitiveData.NaniteImposterIndex = Primitive.NaniteImposterIndexAndFilterFlags & NANITE_IMPOSTER_INDEX_MASK;
PrimitiveData.NaniteFilterFlags = Primitive.NaniteImposterIndexAndFilterFlags >> NANITE_IMPOSTER_INDEX_NUM_BITS;
PrimitiveData.CameraDistanceCullMinMaxSquared = Primitive.CameraDistanceCullMinMaxSquared;
PrimitiveData.PersistentPrimitiveIndex = Primitive.PersistentPrimitiveIndex;
UNROLL
for (int DataIndex = 0; DataIndex < NUM_CUSTOM_PRIMITIVE_DATA; ++DataIndex)
{
PrimitiveData.CustomPrimitiveData[DataIndex] = Primitive.CustomPrimitiveData[DataIndex];
}
return PrimitiveData;
}
#if VF_USE_PRIMITIVE_SCENE_DATA
#if USE_GLOBAL_GPU_SCENE_DATA
StructuredBuffer<float4> GPUScenePrimitiveSceneData;
#elif USE_GLOBAL_GPU_SCENE_DATA_RW
RWStructuredBuffer<float4> GPUScenePrimitiveSceneDataRW;
#endif
// Stride of a single primitive's data in float4's, must match C++
#define PRIMITIVE_SCENE_DATA_STRIDE 41
float4 LoadPrimitivePrimitiveSceneDataElement(uint PrimitiveIndex, uint ItemIndex)
{
uint TargetIdx = PrimitiveIndex + ItemIndex;
#if USE_GLOBAL_GPU_SCENE_DATA
checkStructuredBufferAccessSlow(GPUScenePrimitiveSceneData, TargetIdx);
return GPUScenePrimitiveSceneData[TargetIdx];
#elif USE_GLOBAL_GPU_SCENE_DATA_RW
checkStructuredBufferAccessSlow(GPUScenePrimitiveSceneDataRW, TargetIdx);
return GPUScenePrimitiveSceneDataRW[TargetIdx];
#else
checkStructuredBufferAccessSlow(View.PrimitiveSceneData, TargetIdx);
return View.PrimitiveSceneData[TargetIdx];
#endif
}
// Fetch from scene primitive buffer
FPrimitiveSceneData GetPrimitiveData(uint PrimitiveId)
{
#if (FEATURE_LEVEL == FEATURE_LEVEL_ES3_1 && VERTEXSHADER)
// Vertex shaders do not have access to GPUScene on mobile. Use GetPrimitiveData(FVertexFactoryIntermediates Intermediates)
// TODO: need a way to report invalid usage, after all dead code elimination
return (FPrimitiveSceneData)0;
#else
FPrimitiveSceneData PrimitiveData = (FPrimitiveSceneData)0;
// Note: layout must match FPrimitiveSceneShaderData in C++
// Relying on optimizer to remove unused loads
uint PrimitiveIndex = PrimitiveId * PRIMITIVE_SCENE_DATA_STRIDE;
float3 TilePosition = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 1).xyz;
float4x4 LocalToWorld;
LocalToWorld[0] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 2).xyz, 0.0f);
LocalToWorld[1] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 3).xyz, 0.0f);
LocalToWorld[2] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 4).xyz, 0.0f);
LocalToWorld[3] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 5).xyz, 1.0f);
float4x4 PreviousLocalToWorld;
PreviousLocalToWorld[0] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 10).xyz, 0.0f);
PreviousLocalToWorld[1] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 11).xyz, 0.0f);
PreviousLocalToWorld[2] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 12).xyz, 0.0f);
PreviousLocalToWorld[3] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 13).xyz, 1.0f);
float4x4 WorldToLocal;
WorldToLocal[0] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 6).xyz, 0.0f);
WorldToLocal[1] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 7).xyz, 0.0f);
WorldToLocal[2] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 8).xyz, 0.0f);
WorldToLocal[3] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 9).xyz, 1.0f);
float4x4 PreviousWorldToLocal;
PreviousWorldToLocal[0] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 14).xyz, 0.0f);
PreviousWorldToLocal[1] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 15).xyz, 0.0f);
PreviousWorldToLocal[2] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 16).xyz, 0.0f);
PreviousWorldToLocal[3] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 17).xyz, 1.0f);
float4 ObjectWorldPositionAndRadius = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 19);
PrimitiveData.Flags = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 0).x);
PrimitiveData.InstanceSceneDataOffset = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 0).y);
PrimitiveData.NumInstanceSceneDataEntries = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 0).z);
PrimitiveData.SingleCaptureIndex = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 0).w);
PrimitiveData.TilePosition = TilePosition; // 1.xyz
PrimitiveData.PrimitiveComponentId = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 1).w);
PrimitiveData.LocalToWorld = MakeLWCMatrix4x3(TilePosition, LocalToWorld);
PrimitiveData.WorldToLocal = MakeLWCInverseMatrix4x3(TilePosition, WorldToLocal);
PrimitiveData.PreviousLocalToWorld = MakeLWCMatrix4x3(TilePosition, PreviousLocalToWorld);
PrimitiveData.PreviousWorldToLocal = MakeLWCInverseMatrix4x3(TilePosition, PreviousWorldToLocal);
PrimitiveData.InvNonUniformScale = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 18).xyz;
PrimitiveData.ObjectBoundsX = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 18).w;
PrimitiveData.ObjectWorldPosition = MakeLWCVector3(TilePosition, ObjectWorldPositionAndRadius.xyz);
PrimitiveData.ObjectRadius = ObjectWorldPositionAndRadius.w;
PrimitiveData.ActorWorldPosition = MakeLWCVector3(TilePosition, LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 20).xyz);
PrimitiveData.LightmapUVIndex = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 20).w);
PrimitiveData.ObjectOrientation = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 21).xyz;
PrimitiveData.LightmapDataIndex = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 21).w);
PrimitiveData.NonUniformScale = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 22);
PrimitiveData.PreSkinnedLocalBoundsMin = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 23).xyz;
PrimitiveData.NaniteResourceID = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 23).w);
PrimitiveData.PreSkinnedLocalBoundsMax = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 24).xyz;
PrimitiveData.NaniteHierarchyOffset = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 24).w);
PrimitiveData.LocalObjectBoundsMin = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 25).xyz;
PrimitiveData.ObjectBoundsY = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 25).w;
PrimitiveData.LocalObjectBoundsMax = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 26).xyz;
PrimitiveData.ObjectBoundsZ = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 26).w;
PrimitiveData.InstanceLocalBoundsCenter = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 27).xyz;
PrimitiveData.InstancePayloadDataOffset = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 27).w);
PrimitiveData.InstanceLocalBoundsExtent = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 28).xyz;
PrimitiveData.InstancePayloadDataStride = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 28).w);
PrimitiveData.WireframeColor = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 29).xyz;
PrimitiveData.LevelColor = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 30).xyz;
PrimitiveData.PersistentPrimitiveIndex = asint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 30).w);
PrimitiveData.CameraDistanceCullMinMaxSquared = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 31).xy;
uint NaniteImposterIndexAndFilterFlags = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 29).w);
PrimitiveData.NaniteFilterFlags = NaniteImposterIndexAndFilterFlags >> NANITE_IMPOSTER_INDEX_NUM_BITS;
PrimitiveData.NaniteImposterIndex = NaniteImposterIndexAndFilterFlags & NANITE_IMPOSTER_INDEX_MASK;
if (PrimitiveData.NaniteImposterIndex == NANITE_IMPOSTER_INDEX_MASK)
{
PrimitiveData.NaniteImposterIndex = INVALID_NANITE_IMPOSTER_INDEX;
}
// TODO: Move to associated array (and editor data) to shrink primitive data and better pack cachelines
UNROLL
for (int DataIndex = 0; DataIndex < NUM_CUSTOM_PRIMITIVE_DATA; ++DataIndex)
{
PrimitiveData.CustomPrimitiveData[DataIndex] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 32 + DataIndex);
}
return PrimitiveData;
#endif
}
#else // !VF_USE_PRIMITIVE_SCENE_DATA
FPrimitiveSceneData GetPrimitiveData(uint PrimitiveId)
{
return GetPrimitiveDataFromUniformBuffer();
}
#endif // VF_USE_PRIMITIVE_SCENE_DATA
float GetPrimitive_DeterminantSign_FromFlags(uint Flags)
{
return CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_DETERMINANT_SIGN, -1.0f, 1.0f);
}
float GetPrimitive_DeterminantSign(uint PrimitiveId)
{
return GetPrimitive_DeterminantSign_FromFlags(GetPrimitiveData(PrimitiveId).Flags);
}
#if VF_USE_PRIMITIVE_SCENE_DATA
float GetPrimitive_DeterminantSign(FPrimitiveSceneData Primitive)
{
return GetPrimitive_DeterminantSign_FromFlags(Primitive.Flags);
}
#endif
float GetPrimitive_PerObjectGBufferData_FromFlags(uint Flags)
{
const float CapsuleRepresentation = CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_HAS_CAPSULE_REPRESENTATION, 1.0f, 0.0f);
const float CastContactShadow = CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_HAS_CAST_CONTACT_SHADOW, 1.0f, 0.0f);
return (2.0f * CapsuleRepresentation + CastContactShadow) / 3.0f;
}
float GetPrimitive_PerObjectGBufferData(uint PrimitiveId)
{
return GetPrimitive_PerObjectGBufferData_FromFlags(GetPrimitiveData(PrimitiveId).Flags);
}
#if VF_USE_PRIMITIVE_SCENE_DATA
float GetPrimitive_PerObjectGBufferData(FPrimitiveSceneData Primitive)
{
return GetPrimitive_PerObjectGBufferData_FromFlags(Primitive.Flags);
}
#endif
uint GetPrimitive_LightingChannelMask_FromFlags(uint Flags)
{
const uint Channel0 = CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_0, 1u, 0u);
const uint Channel1 = CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_1, 1u, 0u);
const uint Channel2 = CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_2, 1u, 0u);
return (Channel0 | (Channel1 << 1u) | (Channel2 << 2u));
}
uint GetPrimitive_LightingChannelMask(uint PrimitiveId)
{
return GetPrimitive_LightingChannelMask_FromFlags(GetPrimitiveData(PrimitiveId).Flags);
}
#if VF_USE_PRIMITIVE_SCENE_DATA
uint GetPrimitive_LightingChannelMask(FPrimitiveSceneData Primitive)
{
return GetPrimitive_LightingChannelMask_FromFlags(Primitive.Flags);
}
#endif
// Unpacked AoS layout - see FInstanceSceneShaderData::Setup() for SoA packed layout.
#if USE_EDITOR_SHADERS
struct FInstanceSceneEditorData
{
float3 HitProxyId;
uint HitProxyPacked;
bool bIsSelected;
};
#endif
float3 UnpackHitProxyId(uint HitProxyPacked)
{
// BGR (dword) -> RGA (float)
return float3
(
float((HitProxyPacked ) & 0xFF),
float((HitProxyPacked >> 8u) & 0xFF),
float((HitProxyPacked >> 16u) & 0xFF)
) * (1.0f / 255.0f);
}
struct FInstancePayloadDataOffsets
{
uint HierarchyOffset;
uint EditorData;
uint LocalBounds;
uint DynamicData;
uint LightShadowUVBias;
uint CustomData;
};
struct FInstanceSceneData
{
FLWCMatrix LocalToWorld;
FLWCMatrix PrevLocalToWorld;
FLWCInverseMatrix WorldToLocal;
float4 NonUniformScale;
float3 InvNonUniformScale;
float DeterminantSign;
float3 LocalBoundsCenter;
uint PrimitiveId;
uint RelativeId;
uint PayloadDataOffset;
float3 LocalBoundsExtent;
uint LastUpdateSceneFrameNumber;
uint NaniteRuntimeResourceID;
uint NaniteHierarchyOffset;
#if 1//USES_PER_INSTANCE_RANDOM
float RandomID;
#endif
#if ENABLE_PER_INSTANCE_CUSTOM_DATA
uint CustomDataOffset;
uint CustomDataCount;
#endif
#if 1 //NEEDS_LIGHTMAP_COORDINATE // TODO: Fix Me
float4 LightMapAndShadowMapUVBias;
#endif
bool ValidInstance;
uint Flags;
#if USE_EDITOR_SHADERS
FInstanceSceneEditorData EditorData;
#endif
};
#if USE_GLOBAL_GPU_SCENE_DATA
StructuredBuffer<float4> GPUSceneInstanceSceneData;
StructuredBuffer<float4> GPUSceneInstancePayloadData;
uint GPUSceneFrameNumber;
#elif USE_GLOBAL_GPU_SCENE_DATA_RW
RWStructuredBuffer<float4> GPUSceneInstanceSceneDataRW;
RWStructuredBuffer<float4> GPUSceneInstancePayloadDataRW;
uint GPUSceneFrameNumber;
#endif
uint GetGPUSceneFrameNumber()
{
#if USE_GLOBAL_GPU_SCENE_DATA
return GPUSceneFrameNumber;
#else
return View.FrameNumber;
#endif
}
float4 LoadInstanceSceneDataElement(uint Index)
{
#if USE_GLOBAL_GPU_SCENE_DATA
return GPUSceneInstanceSceneData[Index];
#elif USE_GLOBAL_GPU_SCENE_DATA_RW
return GPUSceneInstanceSceneDataRW[Index];
#else
return View.InstanceSceneData[Index];
#endif
}
float4 LoadInstancePayloadDataElement(uint Index)
{
#if USE_GLOBAL_GPU_SCENE_DATA
return GPUSceneInstancePayloadData[Index];
#elif USE_GLOBAL_GPU_SCENE_DATA_RW
return GPUSceneInstancePayloadDataRW[Index];
#else
return View.InstancePayloadData[Index];
#endif
}
float4 LoadInstanceCustomDataElement(FInstanceSceneData SceneData, uint Float4Index)
{
#if ENABLE_PER_INSTANCE_CUSTOM_DATA
const uint NumCustomFloat4s = (SceneData.CustomDataCount + 3u) >> 2u;
if (SceneData.CustomDataOffset != 0xFFFFFFFFu && Float4Index < NumCustomFloat4s)
{
return LoadInstancePayloadDataElement(SceneData.CustomDataOffset + Float4Index);
}
#endif
return (float4)0.0f;
}
float LoadInstanceCustomDataFloat(FInstanceSceneData SceneData, uint FloatIndex)
{
#if ENABLE_PER_INSTANCE_CUSTOM_DATA
const uint Float4Index = FloatIndex >> 2u;
const uint ComponentIndex = FloatIndex % 4u;
const float4 Element = LoadInstanceCustomDataElement(SceneData, Float4Index);
return Element[ComponentIndex];
#else
return 0.0f;
#endif
}
// [Frisvad 2012, "Building an Orthonormal Basis from a 3D Unit Vector Without Normalization"]
void GetHemiOrthoBasis( inout float3 BasisX, inout float3 BasisY, float3 BasisZ )
{
float A = 1.0f / ( 1.0f + BasisZ.z );
float B = -BasisZ.x * BasisZ.y * A;
BasisX = float3( 1.0f - BasisZ.x * BasisZ.x * A, B, -BasisZ.x );
BasisY = float3( B, 1.0f - BasisZ.y * BasisZ.y * A, -BasisZ.y );
}
uint4 EncodeScaleAndRotation(float3 Scale, float3x3 Axis)
{
const uint ExpBits = 8;
const uint ExpBias = ( 1u << (ExpBits - 1) ) - 1;
const uint SignMantissaBits = 16;
const uint SignMantissaMask = (1u << SignMantissaBits) - 1;
const uint MantissaBits = SignMantissaBits - 1;
const float Sqrt2 = 1.41421356f;
uint4 Output;
// Rotation
{
if( Axis[2].z < 0.0f )
{
Axis[2] *= -1.0f;
Scale.z *= -1.0f;
}
float2 OctZ = UnitVectorToHemiOctahedron( Axis[2] );
float3 BasisX, BasisY;
GetHemiOrthoBasis( BasisX, BasisY, Axis[2] );
float X = dot(Axis[0], BasisX);
float Y = dot(Axis[0], BasisY);
float aX = abs( X );
float aY = abs( Y );
bool bSpinIsX = aX < aY;
float Spin0 = bSpinIsX ? X : Y;
float Spin1 = bSpinIsX ? Y : X;
float Sign1 = Spin1 < 0.0f ? -1.0f : 1.0f;
//Axis[0] *= Sign1;
Scale.x *= Sign1;
Spin0 *= Sign1;
float3 GeneratedY = cross(Axis[2], Axis[0]);
Scale.y *= dot( Axis[1], GeneratedY ) < 0.0f ? -Sign1 : Sign1;
// Avoid sign extension in shader by biasing
Output.x = (((int)round( OctZ.x * 32767.0f ) + 32768) & 0xFFFF) << 0;
Output.x |= (((int)round( OctZ.y * 32767.0f ) + 32768) & 0xFFFF) << 16;
// NOTE: Masking the bits with `& 0x7FFF` below causes the whole int to be optimized to 0 on some shader platforms.
// This is okay, as long as Spin0 is in [0, 1], which it should be.
Output.y = ((int)round( Spin0 * 16383.0f * Sqrt2 ) + 16384); // & 0x7FFF;
Output.y |= bSpinIsX ? (1u << 15) : 0;
}
// Scale
{
float MaxComponent = max3(abs(Scale.x), abs(Scale.y), abs(Scale.z));
uint MaxComponentExponent = (asuint(MaxComponent) & 0x7f800000u) >> 23;
// Need +1 because of losing the implicit leading bit of mantissa
// TODO assumes ExpBits == 8
// TODO clamp to expressable range
uint SharedExp = MaxComponentExponent + 1;
float ExpScale = asfloat(((127 + ExpBias + MantissaBits - SharedExp) & 0xFFu) << 23);
if( (uint)round( MaxComponent * ExpScale ) == (1u << MantissaBits) )
{
// Mantissa rounded up
SharedExp++;
ExpScale *= 0.5f;
}
Output.z = (((int)round( Scale.x * ExpScale ) + (1u << MantissaBits)) & 0xFFFFu) << 0;
Output.z |= (((int)round( Scale.y * ExpScale ) + (1u << MantissaBits)) & 0xFFFFu) << 16;
Output.w = (((int)round( Scale.z * ExpScale ) + (1u << MantissaBits)) & 0xFFFFu) << 0;
Output.w |= SharedExp << 16;
}
return Output;
}
uint4 EncodeScaleAndRotation( float3x3 InTransform )
{
float3 Scale = {
length(InTransform[0]),
length(InTransform[1]),
length(InTransform[2])
};
float3x3 Axis = {
InTransform[0] / Scale.x,
InTransform[1] / Scale.y,
InTransform[2] / Scale.z
};
return EncodeScaleAndRotation(Scale, Axis);
}
void EncodeTransform( float4x4 InTransform, inout uint4 OutRotationScale, inout float3 OutTranslation )
{
OutRotationScale = EncodeScaleAndRotation((float3x3)InTransform);
OutTranslation = InTransform[3].xyz;
}
float4x4 DecodeTransform( uint4 RotationScale, float3 Translation, inout float3 Scale )
{
float4x4 M = 0.0;
M[3].xyz = Translation;
M[3].w = 1.0;
// Rotation
{
float3 Rotation =
{
( RotationScale[0] >> 0 ) & 0xffff,
( RotationScale[0] >> 16 ) & 0xffff,
( RotationScale[1] >> 0 ) & 0x7fff
};
float2 OctZ = ( Rotation.xy - 32768 ) * (1.0f / 32767.0f);
float Spin0 = ( Rotation.z - 16384 ) * (0.70710678f / 16383.0f); // rsqrt(2)
bool bSpinIsX = RotationScale[1] & 0x8000;
M[2].xyz = HemiOctahedronToUnitVector( OctZ );
float3 BasisX, BasisY;
GetHemiOrthoBasis( BasisX, BasisY, M[2].xyz );
float Spin1 = sqrt( 1.0f - Spin0 * Spin0 );
float X = bSpinIsX ? Spin0 : Spin1;
float Y = bSpinIsX ? Spin1 : Spin0;
M[0].xyz = BasisX * X + BasisY * Y;
M[1].xyz = cross( M[2].xyz, M[0].xyz );
}
// Scale
{
const uint SignMantissaBits = 16;
const uint SignMantissaMask = (1u << SignMantissaBits) - 1;
const uint MantissaBits = SignMantissaBits - 1;
#if 0
uint SharedExp = RotationScale[3] >> 22;
float ExpScale = asfloat( ( SharedExp - MantissaBits ) << 23 );
int3 Mantissa =
{
( RotationScale[2] >> 0 ),
( RotationScale[2] >> 18 ) | ( RotationScale[3] << 14 ),
( RotationScale[3] >> 4 )
};
#else
uint SharedExp = RotationScale[3] >> 16;
float ExpScale = asfloat( ( SharedExp - MantissaBits ) << 23 );
uint3 Mantissa =
{
RotationScale[2] >> 0,
RotationScale[2] >> 16,
RotationScale[3] >> 0
};
#endif
Mantissa &= SignMantissaMask;
Scale = Mantissa;
Scale -= 1u << MantissaBits;
Scale *= ExpScale;
M[0] *= Scale[0];
M[1] *= Scale[1];
M[2] *= Scale[2];
}
return M;
}
// Helpers to pack/unpack the primitive ID and flags for the specified instance, which are packed together in a uint
void UnpackPrimitiveIdAndInstanceFlags(uint PackedPrimitiveIdAndFlags, inout uint OutPrimitiveId, inout uint OutInstanceFlags)
{
OutPrimitiveId = BitFieldExtractU32(PackedPrimitiveIdAndFlags, PRIMITIVE_ID_NUM_BITS, 0);
OutInstanceFlags = BitFieldExtractU32(PackedPrimitiveIdAndFlags, INSTANCE_SCENE_DATA_FLAGS_NUM_BITS, PRIMITIVE_ID_NUM_BITS);
}
uint PackPrimitiveIdAndInstanceFlags(uint PrimitiveId, uint InstanceFlags)
{
return (PrimitiveId & PRIMITIVE_ID_MASK) | (InstanceFlags << PRIMITIVE_ID_NUM_BITS);
}
void LoadInstancePrimitiveIdAndFlags(uint InstanceId, uint SOAStride, inout uint OutPrimitiveId, inout uint OutInstanceFlags)
{
const uint PackedPrimitiveIdAndFlags = asuint(LoadInstanceSceneDataElement(0 * SOAStride + InstanceId).x);
UnpackPrimitiveIdAndInstanceFlags(PackedPrimitiveIdAndFlags, OutPrimitiveId, OutInstanceFlags);
}
// Helpers to pack/unpack the instance relative ID and custom data count for the specified instance, which are packed together in a uint
void UnpackInstanceRelativeIdAndCustomDataCount(uint PackedRelativeIdAndCustomDataCount, inout uint OutRelativeId, inout uint OutCustomDataCount)
{
OutRelativeId = BitFieldExtractU32(PackedRelativeIdAndCustomDataCount, INSTANCE_RELATIVE_ID_NUM_BITS, 0);
OutCustomDataCount = BitFieldExtractU32(PackedRelativeIdAndCustomDataCount, INSTANCE_CUSTOM_DATA_COUNT_NUM_BITS, INSTANCE_RELATIVE_ID_NUM_BITS);
}
uint PackInstanceRelativeIdAndCustomDataCount(uint RelativeId, uint CustomDataCount)
{
return (RelativeId & INSTANCE_RELATIVE_ID_MASK) | (CustomDataCount << INSTANCE_RELATIVE_ID_NUM_BITS);
}
void LoadInstanceRelativeIdAndCustomDataCount(uint InstanceId, uint SOAStride, inout uint OutPrimitiveId, inout uint OutInstanceFlags)
{
const uint PackedRelativeIdAndCustomDataCount = asuint(LoadInstanceSceneDataElement(0 * SOAStride + InstanceId).y);
UnpackInstanceRelativeIdAndCustomDataCount(PackedRelativeIdAndCustomDataCount, OutPrimitiveId, OutInstanceFlags);
}
// Helpers for getting/setting the instance determinant sign from instance data flags
float GetInstanceDeterminantSignFromFlags(uint Flags)
{
// Scale.x * Scale.y * Scale.z < 0.0 ? -1.0 : 1.0;
return CondMask(Flags & INSTANCE_SCENE_DATA_FLAG_DETERMINANT_SIGN, -1.0f, 1.0f);
}
void SetInstanceDeterminantSignFlag(float Determinant, inout uint Flags)
{
if (Determinant < 0.0f)
{
Flags |= INSTANCE_SCENE_DATA_FLAG_DETERMINANT_SIGN;
}
else
{
Flags &= ~INSTANCE_SCENE_DATA_FLAG_DETERMINANT_SIGN;
}
}
// Determine the offsets into the payload data buffer for the given instance
FInstancePayloadDataOffsets GetInstancePayloadDataOffsets(uint PrimitiveId, uint Flags, uint InstanceRelativeId)
{
const uint PayloadDataRelativeOffset = InstanceRelativeId * GetPrimitiveData(PrimitiveId).InstancePayloadDataStride;
const uint PayloadDataGlobalOffset = PayloadDataRelativeOffset + GetPrimitiveData(PrimitiveId).InstancePayloadDataOffset;
const bool bHasHierarchyOffset = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_HIERARCHY_OFFSET) != 0u;
const bool bHasLocalBounds = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_LOCAL_BOUNDS) != 0u;
const bool bHasDynamicData = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_DYNAMIC_DATA) != 0u;
const bool bHasLightShadowUVBias = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_LIGHTSHADOW_UV_BIAS) != 0u;
const bool bHasCustomData = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_CUSTOM_DATA) != 0u;
#if USE_EDITOR_SHADERS
const bool bHasEditorData = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_EDITOR_DATA) != 0u;
#else
const bool bHasEditorData = false;
#endif
uint CurOffset = PayloadDataGlobalOffset;
// Offsets are in float4s
FInstancePayloadDataOffsets Offsets;
Offsets.HierarchyOffset = INVALID_INSTANCE_PAYLOAD_OFFSET;
Offsets.EditorData = INVALID_INSTANCE_PAYLOAD_OFFSET;
Offsets.LocalBounds = INVALID_INSTANCE_PAYLOAD_OFFSET;
Offsets.DynamicData = INVALID_INSTANCE_PAYLOAD_OFFSET;
Offsets.LightShadowUVBias = INVALID_INSTANCE_PAYLOAD_OFFSET;
Offsets.CustomData = INVALID_INSTANCE_PAYLOAD_OFFSET;
// Hierarchy Offset -> float0.x
if (bHasHierarchyOffset)
{
Offsets.HierarchyOffset = CurOffset;
}
// EditorData -> float0.y
if (bHasEditorData)
{
Offsets.EditorData = CurOffset;
}
// LocalBounds -> float0.zw & float1.xyzw
if (bHasLocalBounds)
{
Offsets.LocalBounds = CurOffset;
}
CurOffset += CondMask(bHasLocalBounds, 2u, CondMask(bHasHierarchyOffset || bHasEditorData, 1u, 0u));
if (bHasDynamicData)
{
Offsets.DynamicData = CurOffset;
CurOffset += InstanceTransformSizeFloat4Count;
}
if (bHasLightShadowUVBias)
{
Offsets.LightShadowUVBias = CurOffset;
++CurOffset;
}
if (bHasCustomData)
{
Offsets.CustomData = CurOffset;
}
return Offsets;
}
void ComputeInstanceDerivedData(inout FInstanceSceneData InstanceData, float3 TilePosition, float4x4 LocalToRelativeWorld)
{
//
// Do not put any load operations here!
//
#if (VF_USE_PRIMITIVE_SCENE_DATA == 2) || !INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
// Non-uniform scale must be computed from the transform because it was not already computed when decoding it (see below in GetInstanceSceneData)
float3 Scale2;
Scale2.x = length2(LocalToRelativeWorld[0].xyz);
Scale2.y = length2(LocalToRelativeWorld[1].xyz);
Scale2.z = length2(LocalToRelativeWorld[2].xyz);
InstanceData.InvNonUniformScale = rsqrt(Scale2);
InstanceData.NonUniformScale.xyz = Scale2 * InstanceData.InvNonUniformScale;
#endif
InstanceData.NonUniformScale.w = max3( InstanceData.NonUniformScale.x, InstanceData.NonUniformScale.y, InstanceData.NonUniformScale.z );
InstanceData.DeterminantSign = GetInstanceDeterminantSignFromFlags(InstanceData.Flags);
float4x4 RelativeWorldToLocal = LocalToRelativeWorld;
RelativeWorldToLocal[0].xyz *= Pow2(InstanceData.InvNonUniformScale.x);
RelativeWorldToLocal[1].xyz *= Pow2(InstanceData.InvNonUniformScale.y);
RelativeWorldToLocal[2].xyz *= Pow2(InstanceData.InvNonUniformScale.z);
RelativeWorldToLocal[3].xyz = 0.0f;
RelativeWorldToLocal = transpose(RelativeWorldToLocal);
RelativeWorldToLocal[3].xyz = mul(float4(-LocalToRelativeWorld[3].xyz, 0.0f), RelativeWorldToLocal).xyz;
InstanceData.WorldToLocal = MakeLWCInverseMatrix(TilePosition, RelativeWorldToLocal);
}
// Fetch from scene primitive buffer
FInstanceSceneData GetInstanceSceneData(uint InstanceId, uint SOAStride, bool bCheckValid = true)
{
FInstanceSceneData InstanceData = (FInstanceSceneData)0;
//
// NOTE: When changing the packed data layout, ensure that GPUScene/GPUSceneWriter.ush is kept in sync!
// Also, please update the GetInstanceSceneData function in GPUScene.cpp for validation purposes.
//
// Only process valid instances
LoadInstancePrimitiveIdAndFlags(InstanceId, SOAStride, InstanceData.PrimitiveId, InstanceData.Flags);
InstanceData.ValidInstance = InstanceData.PrimitiveId != INVALID_PRIMITIVE_ID;
// Payload Data Layout
// NOTE: Per-instance local bounds and hierarchy offset are always mutually inclusive, so pack together.
// Random ID <packed inline>
// Custom Data Count <packed inline>
// HierarchyOffset float0.x
// LocalBounds Center float0.yzw
// LocalBounds Extent float1.xyz
// __UNUSED float1.w
#if INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
// Previous Transform[0] float2.xyzw
// Previous Transform[1] float3.xyzw
// LM/SM Scale Bias float4.xyzw
// Custom Data Float4s float5.xyzw ... floatN.xyzw
#else
// Previous Transform[0] float2.xyzw
// Previous Transform[1] float3.xyzw
// Previous Transform[2] float4.xyzw
// LM/SM Scale Bias float5.xyzw
// Custom Data Float4s float6.xyzw ... floatN.xyzw
#endif
BRANCH
if (!bCheckValid || InstanceData.ValidInstance)
{
uint CustomDataCount;
LoadInstanceRelativeIdAndCustomDataCount(InstanceId, SOAStride, InstanceData.RelativeId, CustomDataCount);
FInstancePayloadDataOffsets Offsets = GetInstancePayloadDataOffsets(InstanceData.PrimitiveId, InstanceData.Flags, InstanceData.RelativeId);
#if ENABLE_PER_INSTANCE_CUSTOM_DATA
InstanceData.CustomDataCount = CustomDataCount;
InstanceData.CustomDataOffset = Offsets.CustomData;
#endif
InstanceData.LastUpdateSceneFrameNumber = asuint(LoadInstanceSceneDataElement(0 * SOAStride + InstanceId).z);
#if 1//USES_PER_INSTANCE_RANDOM
InstanceData.RandomID = LoadInstanceSceneDataElement(0 * SOAStride + InstanceId).w;
#endif
float3 TilePosition = GetPrimitiveData(InstanceData.PrimitiveId).TilePosition;
#if INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
uint4 RotationScale = asuint(LoadInstanceSceneDataElement(1 * SOAStride + InstanceId));
float3 Translation = LoadInstanceSceneDataElement(2 * SOAStride + InstanceId).xyz;
float3 Scale = 0;
float4x4 LocalToRelativeWorld = DecodeTransform( RotationScale, Translation, Scale );
uint4 PrevRotationScale = asuint(LoadInstanceSceneDataElement(3 * SOAStride + InstanceId));
float3 PrevTranslation = LoadInstanceSceneDataElement(4 * SOAStride + InstanceId).xyz;
float3 PrevScale = 0;
float4x4 PrevLocalToRelativeWorld = DecodeTransform( PrevRotationScale, PrevTranslation, PrevScale );
InstanceData.NonUniformScale.xyz = abs(Scale);
InstanceData.InvNonUniformScale = rcp(InstanceData.NonUniformScale.xyz);
#else
float4x4 LocalToRelativeWorld = transpose(float4x4(LoadInstanceSceneDataElement(1 * SOAStride + InstanceId),
LoadInstanceSceneDataElement(2 * SOAStride + InstanceId),
LoadInstanceSceneDataElement(3 * SOAStride + InstanceId),
float4(0.0f, 0.0f, 0.0f, 1.0f)));
float4x4 PrevLocalToRelativeWorld = transpose(float4x4(LoadInstanceSceneDataElement(4 * SOAStride + InstanceId),
LoadInstanceSceneDataElement(5 * SOAStride + InstanceId),
LoadInstanceSceneDataElement(6 * SOAStride + InstanceId),
float4(0.0f, 0.0f, 0.0f, 1.0f)));
#endif
InstanceData.LocalToWorld = MakeLWCMatrix(TilePosition, LocalToRelativeWorld);
InstanceData.PrevLocalToWorld = MakeLWCMatrix(TilePosition, PrevLocalToRelativeWorld);
ComputeInstanceDerivedData(InstanceData, TilePosition, LocalToRelativeWorld);
InstanceData.NaniteRuntimeResourceID = GetPrimitiveData(InstanceData.PrimitiveId).NaniteResourceID;
InstanceData.NaniteHierarchyOffset = GetPrimitiveData(InstanceData.PrimitiveId).NaniteHierarchyOffset;
BRANCH
if (Offsets.HierarchyOffset != INVALID_INSTANCE_PAYLOAD_OFFSET)
{
const uint HierarchyRootOffset = asuint(LoadInstancePayloadDataElement(Offsets.HierarchyOffset)).x;
// Combine this instance's hierarchy offset with the primitive's root hierarchy offset
InstanceData.NaniteHierarchyOffset += HierarchyRootOffset;
}
#if USE_EDITOR_SHADERS
BRANCH
if (Offsets.EditorData != INVALID_INSTANCE_PAYLOAD_OFFSET)
{
const uint PackedEditorData = asuint(LoadInstancePayloadDataElement(Offsets.EditorData)).y;
InstanceData.EditorData.bIsSelected = (PackedEditorData >> 24u) != 0;
InstanceData.EditorData.HitProxyPacked = PackedEditorData & 0x00FFFFFFu;
InstanceData.EditorData.HitProxyId = UnpackHitProxyId(InstanceData.EditorData.HitProxyPacked);
}
#endif
BRANCH
if (Offsets.LocalBounds != INVALID_INSTANCE_PAYLOAD_OFFSET)
{
InstanceData.LocalBoundsCenter = float3(LoadInstancePayloadDataElement(Offsets.LocalBounds + 0).zw, LoadInstancePayloadDataElement(Offsets.LocalBounds + 1).x);
InstanceData.LocalBoundsExtent = LoadInstancePayloadDataElement(Offsets.LocalBounds + 1).yzw;
}
else
{
InstanceData.LocalBoundsCenter = GetPrimitiveData(InstanceData.PrimitiveId).InstanceLocalBoundsCenter;
InstanceData.LocalBoundsExtent = GetPrimitiveData(InstanceData.PrimitiveId).InstanceLocalBoundsExtent;
}
BRANCH
if (Offsets.DynamicData != INVALID_INSTANCE_PAYLOAD_OFFSET)
{
#if INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
uint4 PrevRotationScale = asuint(LoadInstancePayloadDataElement(Offsets.DynamicData + 0));
float3 PrevTranslation = LoadInstancePayloadDataElement(Offsets.DynamicData + 1).xyz;
float3 PrevScale = 0;
float4x4 PrevLocalToRelativeWorld = DecodeTransform(PrevRotationScale, PrevTranslation, PrevScale);
#else
float4x4 PrevLocalToRelativeWorld = transpose(float4x4(LoadInstancePayloadDataElement(Offsets.DynamicData + 0),
LoadInstancePayloadDataElement(Offsets.DynamicData + 1),
LoadInstancePayloadDataElement(Offsets.DynamicData + 2),
float4(0.0f, 0.0f, 0.0f, 1.0f)));
#endif
float3 TilePosition = GetPrimitiveData(InstanceData.PrimitiveId).TilePosition;
InstanceData.PrevLocalToWorld = MakeLWCMatrix(TilePosition, PrevLocalToRelativeWorld);
}
else
{
#if INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
// TODO: Temporary PrevVelocityHack
uint4 PrevRotationScale = asuint(LoadInstanceSceneDataElement(3 * SOAStride + InstanceId));
float3 PrevTranslation = LoadInstanceSceneDataElement(4 * SOAStride + InstanceId).xyz;
float3 PrevScale = 0;
float4x4 PrevLocalToRelativeWorld = DecodeTransform(PrevRotationScale, PrevTranslation, PrevScale);
#else
float4x4 PrevLocalToRelativeWorld = InstanceData.LocalToWorld;
#endif
float3 TilePosition = GetPrimitiveData(InstanceData.PrimitiveId).TilePosition;
InstanceData.PrevLocalToWorld = MakeLWCMatrix(TilePosition, PrevLocalToRelativeWorld);
}
#if 1 //NEEDS_LIGHTMAP_COORDINATE
BRANCH
if (Offsets.LightShadowUVBias != INVALID_INSTANCE_PAYLOAD_OFFSET)
{
InstanceData.LightMapAndShadowMapUVBias = LoadInstancePayloadDataElement(Offsets.LightShadowUVBias);
}
#endif
}
return InstanceData;
}
struct FSceneDataIntermediates
{
uint PrimitiveId;
uint InstanceId;
uint ViewIndex;
// Index from which we load the instance info, needed for the
uint InstanceIdLoadIndex;
FInstanceSceneData InstanceData;
FPrimitiveSceneData Primitive;
};
/**
* Load scene data once given the inputs require.
* InstanceIdOffset - supplied as a vertex stream with 0 instance step rate (constant for all instances)
* DrawInstanceId - the instance ID (SV_InstanceID) in the current draw
*/
#if (VF_USE_PRIMITIVE_SCENE_DATA == 1)
FSceneDataIntermediates GetSceneDataIntermediates(uint InstanceIdOffset, uint DrawInstanceId)
{
FSceneDataIntermediates Intermediates = (FSceneDataIntermediates)0;
Intermediates.InstanceIdLoadIndex = InstanceIdOffset + DrawInstanceId;
// GPUCULL_TODO: workaround for the fact that DrawDynamicMeshPassPrivate et al. don't work with GPU-Scene instancing
// instead they mark the top bit in the primitive ID and disable auto instancing such that there is an 1:1:1
// drawcmd:primitive:instance. Then we can just look up the primitive and fetch the instance data index.
// GPUCULL_TODO: Workaround also used for ray tracing interfacing with the VFs, that also supply a DrawInstanceId.
// We mark the PrimitiveID with the top bit in dynamic draw passes
if ((InstanceIdOffset & VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG) != 0U)
{
// mask off the flag
uint PrimitiveID = InstanceIdOffset & (VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG - 1U);
Intermediates.InstanceId = GetPrimitiveData(PrimitiveID).InstanceSceneDataOffset + DrawInstanceId;
Intermediates.ViewIndex = 0;
}
else
{
Intermediates.InstanceId = InstanceCulling.InstanceIdsBuffer[InstanceIdOffset + DrawInstanceId] & ((1U << 28U) - 1);
// We store the view index (which can be used for instanced stereo or other multi-view in the top four bits of the instance ID)
// Note: this is an index of views for this render pass, not the view ID in the culling manager.
Intermediates.ViewIndex = InstanceCulling.InstanceIdsBuffer[InstanceIdOffset + DrawInstanceId] >> 28U;
}
Intermediates.InstanceData = GetInstanceSceneData(Intermediates.InstanceId, View.InstanceSceneDataSOAStride);
Intermediates.PrimitiveId = Intermediates.InstanceData.PrimitiveId;
Intermediates.Primitive = GetPrimitiveData(Intermediates.PrimitiveId);
return Intermediates;
}
#elif (VF_USE_PRIMITIVE_SCENE_DATA == 2)
// Must match PackLocalBoundsCenter and PackLocalBoundsExtent
float3 UnpackLocalBoundsCenter(float2 PackedCenter)
{
float3 Result = 0;
// uses 21 bits for each component, rounded to a 1 unit
const uint SPLIT_MASK = (1u << 11u) - 1u;
const float CenterBias = (1u << 20u) - 1u;
uint p0 = asuint(PackedCenter.x);
uint p1 = asuint(PackedCenter.y);
Result.x = float(p0 >> 11u) - CenterBias;
Result.y = float(p1 >> 11u) - CenterBias;
Result.z = float((p0 & SPLIT_MASK) | ((p1 & SPLIT_MASK) << 11u)) - CenterBias;
return Result;
}
float3 UnpackLocalBoundsExtent(float2 PackedExtent)
{
float3 Result = 0;
// uses 21 bits for each component, rounded to a 1 unit
const uint SPLIT_MASK = (1u << 11u) - 1u;
uint p0 = asuint(PackedExtent.x);
uint p1 = asuint(PackedExtent.y);
Result.x = float(p0 >> 11u);
Result.y = float(p1 >> 11u);
Result.z = float((p0 & SPLIT_MASK) | ((p1 & SPLIT_MASK) << 11u));
return Result;
}
FSceneDataIntermediates GetSceneDataIntermediates(uint DrawInstanceId, float4 InstanceOrigin, float4 InstanceTransform1, float4 InstanceTransform2, float4 InstanceTransform3, float4 InstanceAuxData)
{
// Not all mobile devices can access storage buffers from a vertex shaders
// we supply some of primtive data using per-instance vertex data, and rest of primitive data comes from Primitive UB
// If vertex shader uses any of Primitive UB data associated drawcalls will not auto-instance
FPrimitiveSceneData Primitive = GetPrimitiveDataFromUniformBuffer();
// TODO: add support for LWC, we should pack it as integer tile coordinates
float3 TilePosition = float3(0,0,0);
// TODO: pack important primitive and instance flags here
const uint PrimitiveFlags = asuint(InstanceTransform1.w);
const uint InstanceFlags = (PrimitiveFlags >> 16);
// Reconstruct InstanceData from a packed data
FInstanceSceneData InstanceData = (FInstanceSceneData)0;
float4x4 LocalToRelativeWorld = float4x4(
float4(InstanceTransform1.xyz, 0.0f),
float4(InstanceTransform2.xyz, 0.0f),
float4(InstanceTransform3.xyz, 0.0f),
float4(InstanceOrigin.xyz, 1.0f));
InstanceData.LocalToWorld = MakeLWCMatrix(TilePosition, LocalToRelativeWorld);
InstanceData.Flags = InstanceFlags;
InstanceData.PrimitiveId = asuint(InstanceOrigin.w);
#if USES_PER_INSTANCE_RANDOM
InstanceData.RandomID = InstanceTransform3.w;
#endif
ComputeInstanceDerivedData(InstanceData, TilePosition, LocalToRelativeWorld);
//
FSceneDataIntermediates Intermediates = (FSceneDataIntermediates)0;
Intermediates.InstanceData = InstanceData;
Intermediates.PrimitiveId = InstanceData.PrimitiveId;
Intermediates.InstanceId = 0;
Intermediates.ViewIndex = 0;
Intermediates.InstanceIdLoadIndex = 0;
Intermediates.Primitive = Primitive;
// Primitive data that comes from per-instance vertex data
Intermediates.Primitive.Flags = PrimitiveFlags;
Intermediates.Primitive.LocalToWorld = Intermediates.InstanceData.LocalToWorld;
Intermediates.Primitive.InvNonUniformScale = Intermediates.InstanceData.InvNonUniformScale;
Intermediates.Primitive.WorldToLocal = Intermediates.InstanceData.WorldToLocal;
Intermediates.Primitive.NonUniformScale = Intermediates.InstanceData.NonUniformScale;
#if ALLOW_STATIC_LIGHTING
Intermediates.Primitive.LightmapDataIndex = asuint(InstanceTransform2.w);
InstanceData.LightMapAndShadowMapUVBias = float4(
UnpackSnorm2x16(asuint(InstanceAuxData.y)),
UnpackSnorm2x16(asuint(InstanceAuxData.w)));
#else
InstanceData.LocalBoundsCenter = UnpackLocalBoundsCenter(InstanceAuxData.xy);
InstanceData.LocalBoundsExtent = UnpackLocalBoundsExtent(InstanceAuxData.zw);
Intermediates.Primitive.LocalObjectBoundsMin = InstanceData.LocalBoundsCenter - InstanceData.LocalBoundsExtent;
Intermediates.Primitive.LocalObjectBoundsMax = InstanceData.LocalBoundsCenter + InstanceData.LocalBoundsExtent;
float3 ObjectRelativeWorldPosition = mul(float4(InstanceData.LocalBoundsCenter.xyz, 0.0f), LocalToRelativeWorld).xyz;
Intermediates.Primitive.ObjectWorldPosition = MakeLWCVector3(TilePosition, ObjectRelativeWorldPosition);
Intermediates.Primitive.ObjectRadius = length(InstanceData.LocalBoundsExtent * InstanceData.NonUniformScale.xyz);
#endif
return Intermediates;
}
#else
FSceneDataIntermediates GetSceneDataIntermediates()
{
FSceneDataIntermediates Intermediates = (FSceneDataIntermediates)0;
// Populate from Primitive uniform buffer
Intermediates.ViewIndex = 0U;
Intermediates.PrimitiveId = 0U;
Intermediates.InstanceId = 0U;
Intermediates.Primitive = GetPrimitiveDataFromUniformBuffer();
// Populate instance data from primitive data
Intermediates.InstanceData.LocalToWorld = Intermediates.Primitive.LocalToWorld;
Intermediates.InstanceData.PrevLocalToWorld = Intermediates.Primitive.PreviousLocalToWorld;
Intermediates.InstanceData.WorldToLocal = Intermediates.Primitive.WorldToLocal;
Intermediates.InstanceData.NonUniformScale = Intermediates.Primitive.NonUniformScale;
Intermediates.InstanceData.InvNonUniformScale = Intermediates.Primitive.InvNonUniformScale;
Intermediates.InstanceData.DeterminantSign = GetPrimitive_DeterminantSign_FromFlags(Intermediates.Primitive.Flags);
Intermediates.InstanceData.LocalBoundsCenter = (Intermediates.Primitive.LocalObjectBoundsMax + Intermediates.Primitive.LocalObjectBoundsMin) * 0.5f;
Intermediates.InstanceData.LocalBoundsExtent = (Intermediates.Primitive.LocalObjectBoundsMax - Intermediates.Primitive.LocalObjectBoundsMin) * 0.5f;
Intermediates.InstanceData.ValidInstance = true;
return Intermediates;
}
#endif //VF_USE_PRIMITIVE_SCENE_DATA