Files
UnrealEngineUWP/Engine/Shaders/Private/SceneData.ush
Ola Olsson 94a55f4768 Fix VF input setup for ray tracing use-cases with GPU-Scene instance culling enabled.
#rb yuriy.odonnell

[CL 15241176 by Ola Olsson in ue5-main branch]
2021-01-28 12:24:41 -04:00

352 lines
15 KiB
Plaintext

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#ifndef USE_GLOBAL_GPU_SCENE_DATA
#define USE_GLOBAL_GPU_SCENE_DATA 0
#endif
// Whether to fetch primitive values (eg LocalToWorld) by dynamically indexing a scene-wide buffer, or to reference a single Primitive uniform buffer
#define VF_USE_PRIMITIVE_SCENE_DATA ((FEATURE_LEVEL >= FEATURE_LEVEL_SM5 || FEATURE_LEVEL == FEATURE_LEVEL_ES3_1) && VF_SUPPORTS_PRIMITIVE_SCENE_DATA)
#define INSTANCE_SCENE_DATA_SUPPORTED (FEATURE_LEVEL >= FEATURE_LEVEL_SM5 || FEATURE_LEVEL == FEATURE_LEVEL_ES3_1)
// GPUCULL_TODO: Eventually we need to remove this workaround
#define VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG (1U << 31U)
// Unpacked AoS layout - see FInstanceSceneShaderData::Setup() for SoA packed layout.
#if INSTANCE_SCENE_DATA_SUPPORTED
// Must match FInstanceUniformShaderParameters in C++
struct FInstanceSceneData
{
float4x4 LocalToWorld;
float4x4 PrevLocalToWorld;
float4x4 WorldToLocal;
float4 NonUniformScale;
float4 InvNonUniformScaleAndDeterminantSign;
float3 LocalBoundsCenter;
uint PrimitiveId;
float3 LocalBoundsExtent;
uint LastUpdateSceneFrameNumber;
uint NaniteRuntimeResourceID;
uint NaniteHierarchyOffset;
bool NaniteHasImposter;
bool UseInstanceData;
bool CastShadows;
float PerInstanceRandom;
float4 LightMapAndShadowMapUVBias;
};
#if USE_GLOBAL_GPU_SCENE_DATA
StructuredBuffer<float4> GPUSceneInstanceSceneData;
uint GPUSceneFrameNumber;
#endif
uint GetGPUSceneFrameNumber()
{
#if USE_GLOBAL_GPU_SCENE_DATA
return GPUSceneFrameNumber;
#else
return View.FrameNumber;
#endif
}
float4 LoadInstanceDataElement(uint Index)
{
#if USE_GLOBAL_GPU_SCENE_DATA
return GPUSceneInstanceSceneData[Index];
#else
return View.InstanceSceneData[Index];
#endif
}
// Fetch from scene primitive buffer
FInstanceSceneData GetInstanceData(uint InstanceId, uint SOAStride)
{
FInstanceSceneData InstanceData = (FInstanceSceneData)0;
InstanceData.LocalToWorld = transpose(float4x4(LoadInstanceDataElement(0 * SOAStride + InstanceId),
LoadInstanceDataElement(1 * SOAStride + InstanceId),
LoadInstanceDataElement(2 * SOAStride + InstanceId),
float4(0.0f, 0.0f, 0.0f, 1.0f)));
InstanceData.PrevLocalToWorld = transpose(float4x4(LoadInstanceDataElement(3 * SOAStride + InstanceId),
LoadInstanceDataElement(4 * SOAStride + InstanceId),
LoadInstanceDataElement(5 * SOAStride + InstanceId),
float4(0.0f, 0.0f, 0.0f, 1.0f)));
InstanceData.NonUniformScale = LoadInstanceDataElement(6 * SOAStride + InstanceId);
InstanceData.InvNonUniformScaleAndDeterminantSign = LoadInstanceDataElement(7 * SOAStride + InstanceId);
InstanceData.LocalBoundsCenter = LoadInstanceDataElement(8 * SOAStride + InstanceId).xyz;
InstanceData.PrimitiveId = asuint(LoadInstanceDataElement(8 * SOAStride + InstanceId).w);
InstanceData.LocalBoundsExtent = LoadInstanceDataElement(9 * SOAStride + InstanceId).xyz;
InstanceData.LastUpdateSceneFrameNumber = asuint(LoadInstanceDataElement(9 * SOAStride + InstanceId).w);
InstanceData.NaniteRuntimeResourceID = asuint(LoadInstanceDataElement(10 * SOAStride + InstanceId).x);
const uint NaniteHierarchyOffset_AndHasImposter = asint(LoadInstanceDataElement(10 * SOAStride + InstanceId).y);
InstanceData.PerInstanceRandom = LoadInstanceDataElement(10 * SOAStride + InstanceId).z;
const uint Flags = asuint(LoadInstanceDataElement(10 * SOAStride + InstanceId).w);
InstanceData.LightMapAndShadowMapUVBias = LoadInstanceDataElement(11 * SOAStride + InstanceId);
InstanceData.NaniteHierarchyOffset = (NaniteHierarchyOffset_AndHasImposter >> 1);
InstanceData.NaniteHasImposter = (NaniteHierarchyOffset_AndHasImposter & 1u) != 0u;
InstanceData.WorldToLocal = InstanceData.LocalToWorld;
InstanceData.WorldToLocal[0].xyz *= Pow2(InstanceData.InvNonUniformScaleAndDeterminantSign.x);
InstanceData.WorldToLocal[1].xyz *= Pow2(InstanceData.InvNonUniformScaleAndDeterminantSign.y);
InstanceData.WorldToLocal[2].xyz *= Pow2(InstanceData.InvNonUniformScaleAndDeterminantSign.z);
InstanceData.WorldToLocal[3].xyz = 0.0f;
InstanceData.WorldToLocal = transpose(InstanceData.WorldToLocal);
InstanceData.WorldToLocal[3].xyz = mul(float4(-InstanceData.LocalToWorld[3].xyz, 0.0f), InstanceData.WorldToLocal).xyz;
// TODO: Do something cleaner to indicate instanced LM/SM UV bias vs. uniform
InstanceData.UseInstanceData = all(InstanceData.LightMapAndShadowMapUVBias != float4(-11.0f, -22.0f, -33.0f, -44.0f));
InstanceData.CastShadows = (Flags & 1u) != 0u;
return InstanceData;
}
#endif // INSTANCE_SCENE_DATA_SUPPORTED
#if VF_USE_PRIMITIVE_SCENE_DATA
#if USE_GLOBAL_GPU_SCENE_DATA
StructuredBuffer<float4> GPUScenePrimitiveSceneData;
#endif
#define NUM_CUSTOM_PRIMITIVE_DATA 9 // Num float4s used for custom data. Must match FCustomPrimitiveData::NumCustomPrimitiveDataFloat4s in SceneTypes.h
// Must match FPrimitiveUniformShaderParameters in C++
struct FPrimitiveSceneData
{
float4x4 LocalToWorld;
float4 InvNonUniformScaleAndDeterminantSign;
float4 ObjectWorldPositionAndRadius;
float4x4 WorldToLocal;
float4x4 PreviousLocalToWorld;
float4x4 PreviousWorldToLocal;
float3 ActorWorldPosition;
float UseSingleSampleShadowFromStationaryLights;
float3 ObjectBounds;
float LpvBiasMultiplier;
float DecalReceiverMask;
float PerObjectGBufferData;
float UseVolumetricLightmapShadowFromStationaryLights;
float DrawsVelocity;
float4 ObjectOrientation;
float4 NonUniformScale;
float3 LocalObjectBoundsMin;
uint LightingChannelMask;
float3 LocalObjectBoundsMax;
uint LightmapDataIndex;
float3 PreSkinnedLocalBoundsMin;
int SingleCaptureIndex;
float3 PreSkinnedLocalBoundsMax;
uint OutputVelocity;
uint LightmapUVIndex;
// Link to the range of instances that belong to this primitive
int InstanceDataOffset;
int NumInstanceDataEntries;
// CastShadows=1
uint Flags;
float4 CustomPrimitiveData[NUM_CUSTOM_PRIMITIVE_DATA];
};
// Stride of a single primitive's data in float4's, must match C++
#define PRIMITIVE_SCENE_DATA_STRIDE 37
struct FPrimitiveIndex
{
#if VF_GPU_SCENE_TEXTURE
uint IndY;
uint IndX;
#else
uint BaseOffset;
#endif
};
FPrimitiveIndex SetupPrimitiveIndexes(uint PrimitiveId)
{
FPrimitiveIndex PrimitiveIndex;
#if VF_GPU_SCENE_TEXTURE
PrimitiveIndex.IndY = (PrimitiveId & 0xFFFF0000) >> 16;
PrimitiveIndex.IndX = (PrimitiveId & 0xFFFF) * PRIMITIVE_SCENE_DATA_STRIDE;
#else
PrimitiveIndex.BaseOffset = PrimitiveId * PRIMITIVE_SCENE_DATA_STRIDE;
#endif
return PrimitiveIndex;
}
/*void UnpackLightmapDataAndUVIndex(uint Packed, out uint LightmapDataIndex, out uint LightmapUVIndex)
{
LightmapDataIndex = (Packed & 0xFFFFFF00) >> 8u;
LightmapUVIndex = (Packed & 0x000000FF);
}*/
float4 LoadPrimitivePrimitiveSceneDataElement(FPrimitiveIndex PrimitiveIndex, uint ItemIndex)
{
#if USE_GLOBAL_GPU_SCENE_DATA
return GPUScenePrimitiveSceneData[PrimitiveIndex.BaseOffset + ItemIndex];
#else
#if VF_GPU_SCENE_TEXTURE
return View.PrimitiveSceneDataTexture.Load(int3(PrimitiveIndex.IndX + ItemIndex, PrimitiveIndex.IndY, 0));
#else
return View.PrimitiveSceneData[PrimitiveIndex.BaseOffset + ItemIndex];
#endif
#endif
}
// Fetch from scene primitive buffer
FPrimitiveSceneData GetPrimitiveData(uint PrimitiveId)
{
// Note: layout must match FPrimitiveSceneShaderData in C++
// Relying on optimizer to remove unused loads
FPrimitiveIndex PrimitiveIndex = SetupPrimitiveIndexes(PrimitiveId);
FPrimitiveSceneData PrimitiveData;
PrimitiveData.LocalToWorld[0] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 0);
PrimitiveData.LocalToWorld[1] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 1);
PrimitiveData.LocalToWorld[2] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 2);
PrimitiveData.LocalToWorld[3] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 3);
PrimitiveData.InvNonUniformScaleAndDeterminantSign = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 4);
PrimitiveData.ObjectWorldPositionAndRadius = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 5);
PrimitiveData.WorldToLocal[0] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 6);
PrimitiveData.WorldToLocal[1] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 7);
PrimitiveData.WorldToLocal[2] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 8);
PrimitiveData.WorldToLocal[3] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 9);
PrimitiveData.PreviousLocalToWorld[0] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 10);
PrimitiveData.PreviousLocalToWorld[1] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 11);
PrimitiveData.PreviousLocalToWorld[2] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 12);
PrimitiveData.PreviousLocalToWorld[3] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 13);
PrimitiveData.PreviousWorldToLocal[0] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 14);
PrimitiveData.PreviousWorldToLocal[1] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 15);
PrimitiveData.PreviousWorldToLocal[2] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 16);
PrimitiveData.PreviousWorldToLocal[3] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 17);
PrimitiveData.ActorWorldPosition = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 18).xyz;
PrimitiveData.UseSingleSampleShadowFromStationaryLights = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 18).w;
PrimitiveData.ObjectBounds = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 19).xyz;
PrimitiveData.LpvBiasMultiplier = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 19).w;
PrimitiveData.DecalReceiverMask = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 20).x;
PrimitiveData.PerObjectGBufferData = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 20).y;
PrimitiveData.UseVolumetricLightmapShadowFromStationaryLights = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 20).z;
PrimitiveData.DrawsVelocity = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 20).w;
PrimitiveData.ObjectOrientation = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 21);
PrimitiveData.NonUniformScale = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 22);
PrimitiveData.LocalObjectBoundsMin = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 23).xyz;
PrimitiveData.LightingChannelMask = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 23).w);
PrimitiveData.LocalObjectBoundsMax = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 24).xyz;
PrimitiveData.LightmapDataIndex = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 24).w);
PrimitiveData.PreSkinnedLocalBoundsMin = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 25).xyz;
PrimitiveData.SingleCaptureIndex = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 25).w);
PrimitiveData.PreSkinnedLocalBoundsMax = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 26).xyz;
PrimitiveData.OutputVelocity = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 26).w);
PrimitiveData.LightmapUVIndex = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 27).x);
PrimitiveData.InstanceDataOffset = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 27).y);
PrimitiveData.NumInstanceDataEntries = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 27).z);
PrimitiveData.Flags = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 27).w);
UNROLL
for (int i = 0; i < NUM_CUSTOM_PRIMITIVE_DATA; i++)
{
PrimitiveData.CustomPrimitiveData[i] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 28 + i);
}
return PrimitiveData;
}
#else // !VF_USE_PRIMITIVE_SCENE_DATA
// Route to Primitive uniform buffer
#define GetPrimitiveData(x) Primitive
// Route to Instance uniform buffer
#if !INSTANCE_SCENE_DATA_SUPPORTED
// GPUCULL_TODO: Figure out if this one does anything?
#define GetInstanceData(x,y) PrimitiveInstance
#endif // !INSTANCE_SCENE_DATA_SUPPORTED
#endif // VF_USE_PRIMITIVE_SCENE_DATA
#if defined(GPUCULL_TODO)
struct FSceneDataIntermediates
{
uint PrimitiveId;
uint InstanceId;
uint ViewIndex;
// Index from which we load the instance info, needed for the
uint InstanceIdLoadIndex;
#if INSTANCE_SCENE_DATA_SUPPORTED
FInstanceSceneData InstanceData;
#endif // INSTANCE_SCENE_DATA_SUPPORTED
#if VF_USE_PRIMITIVE_SCENE_DATA
// GPUCULL_TODO: Move this definition out of ifdef in SceneData so we can have fewer ifdefs?
// Also want to copy / reference the primitive UB so that we have a consistent access everywhere
FPrimitiveSceneData Primitive;
#endif
};
/**
* Load scene data once given the inputs require.
* InstanceIdOffset - supplied as a vertex stream with 0 instance step rate (constant for all instances)
* DrawInstanceId - the instance ID (SV_InstanceID) in the current draw
*/
FSceneDataIntermediates GetSceneDataIntermediates(uint InstanceIdOffset, uint DrawInstanceId)
{
FSceneDataIntermediates Intermediates = (FSceneDataIntermediates)0;
#if VF_USE_PRIMITIVE_SCENE_DATA
Intermediates.InstanceIdLoadIndex = InstanceIdOffset + DrawInstanceId;
// GPUCULL_TODO: workaround for the fact that DrawDynamicMeshPassPrivate et al. don't work with GPU-Scene instancing
// instead they mark the top bit in the primitive ID and disable auto instancing such that there is an 1:1:1
// drawcmd:primitive:instance. Then we can just look up the primitive and fetch the instance data index.
// GPUCULL_TODO: Workaround also used for ray tracing interfacing with the VFs, that also supply a DrawInstanceId.
// We mark the PrimitiveID with the top bit in dynamic draw passes
if ((InstanceIdOffset & VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG) != 0U)
{
// mask off the flag
uint PrimitiveID = InstanceIdOffset & (VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG - 1U);
Intermediates.InstanceId = GetPrimitiveData(PrimitiveID).InstanceDataOffset + DrawInstanceId;
Intermediates.ViewIndex = 0;
}
else
{
// TODO: move InstanceIdsBuffer out of view and into some global Scene structure?
Intermediates.InstanceId = View.InstanceIdsBuffer[InstanceIdOffset + DrawInstanceId] & ((1U << 28U) - 1);
// We store the view index (which can be used for instanced stereo or other multi-view in the top four bits of the instance ID)
// Note: this is an index of views for this render pass, not the view ID in the culling manager.
Intermediates.ViewIndex = View.InstanceIdsBuffer[InstanceIdOffset + DrawInstanceId] >> 28U;
}
Intermediates.InstanceData = GetInstanceData(Intermediates.InstanceId, View.InstanceDataSOAStride);
Intermediates.PrimitiveId = Intermediates.InstanceData.PrimitiveId;
Intermediates.Primitive = GetPrimitiveData(Intermediates.PrimitiveId);
#else // Reference the uniform buffers
// GPUCULL_TODO: Need to get this from uniforms for the non-GPU scene path
// Maybe we need to add an accessor function that can be a macro that resolves (like Primitive does) to the UBs
//Intermediates.ViewIndex = 0U;
//Intermediates.InstanceData = InstanceData;
//Intermediates.PrimitiveId = Intermediates.InstanceData.PrimitiveId;
//Intermediates.Primitive = Primitive;
#endif
return Intermediates;
}
#endif // defined(GPUCULL_TODO)