2019-12-27 09:26:59 -05:00
// Copyright Epic Games, Inc. All Rights Reserved.
2018-12-11 22:25:04 -05:00
2021-01-13 19:51:21 -04:00
#pragma once
2020-07-06 18:58:26 -04:00
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
#include "LargeWorldCoordinates.ush"
2021-09-16 18:16:47 -04:00
#include "OctahedralCommon.ush"
2022-04-26 14:37:33 -04:00
#include "/Engine/Shared/NaniteDefinitions.h"
2021-09-16 18:16:47 -04:00
2020-07-06 18:58:26 -04:00
#ifndef USE_GLOBAL_GPU_SCENE_DATA
#define USE_GLOBAL_GPU_SCENE_DATA 0
2021-01-13 19:51:21 -04:00
#endif
2020-07-06 18:58:26 -04:00
2021-06-10 15:47:04 -04:00
#ifndef USE_GLOBAL_GPU_SCENE_DATA_RW
#define USE_GLOBAL_GPU_SCENE_DATA_RW 0
#endif
2021-06-17 03:09:14 -04:00
#ifndef USES_PER_INSTANCE_CUSTOM_DATA
#define USES_PER_INSTANCE_CUSTOM_DATA 0
#endif
#ifndef USES_PER_INSTANCE_RANDOM
#define USES_PER_INSTANCE_RANDOM 0
#endif
2021-09-02 20:30:59 -04:00
#ifndef NEEDS_LIGHTMAP_COORDINATE
#define NEEDS_LIGHTMAP_COORDINATE 0
#endif
2021-12-06 13:18:17 -05:00
#ifndef VF_REQUIRES_PER_INSTANCE_CUSTOM_DATA
#define VF_REQUIRES_PER_INSTANCE_CUSTOM_DATA 0
#endif
#define ENABLE_PER_INSTANCE_CUSTOM_DATA (USES_PER_INSTANCE_CUSTOM_DATA || VF_REQUIRES_PER_INSTANCE_CUSTOM_DATA)
// When transforms come from the InstanceSceneData buffer, indicates whether or not the transforms are compressed (ignored on mobile)
2021-12-02 23:38:54 -05:00
// TODO: Global setting/define
2021-12-06 13:18:17 -05:00
#define INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS 1
2021-12-02 23:38:54 -05:00
2018-12-11 22:25:04 -05:00
// Whether to fetch primitive values (eg LocalToWorld) by dynamically indexing a scene-wide buffer, or to reference a single Primitive uniform buffer
2021-08-07 07:20:52 -04:00
#if VF_SUPPORTS_PRIMITIVE_SCENE_DATA
#if FEATURE_LEVEL == FEATURE_LEVEL_ES3_1
#define VF_USE_PRIMITIVE_SCENE_DATA 2
#else
#define VF_USE_PRIMITIVE_SCENE_DATA 1
#endif
#else
#define VF_USE_PRIMITIVE_SCENE_DATA 0
#endif
2018-12-11 22:25:04 -05:00
2021-05-26 19:03:46 -04:00
// Must match PrimitiveUniformShaderParameters.h
Implemented Nanite support for view visibility flags DrawInGame, DrawInEditor, VisibleInReflectionCaptures, VisibleInSceneCaptureOnly, HiddenInSceneCapture, ForceHidden, and others. Also partially implemented a GPUScene version of FPrimitiveProxy::IsShown().
#rb rune.stubbe, ola.olsson
[FYI] brian.karis, andrew.lauritzen, daniel.wright, krzysztof.narkowicz
#preflight 61f0e569fd5285142b30b907
#jira UE-127692
#ROBOMERGE-AUTHOR: graham.wihlidal
#ROBOMERGE-SOURCE: CL 18736993 in //UE5/Release-5.0/... via CL 18737108 via CL 18738215
#ROBOMERGE-BOT: UE5 (Release-Engine-Test -> Main) (v903-18687472)
[CL 18738671 by graham wihlidal in ue5-main branch]
2022-01-26 13:04:40 -05:00
#define PRIMITIVE_SCENE_DATA_FLAG_CAST_SHADOWS 0x1
#define PRIMITIVE_SCENE_DATA_FLAG_USE_SINGLE_SAMPLE_SHADOW_SL 0x2
#define PRIMITIVE_SCENE_DATA_FLAG_USE_VOLUMETRIC_LM_SHADOW_SL 0x4
#define PRIMITIVE_SCENE_DATA_FLAG_DECAL_RECEIVER 0x8
2022-05-19 10:08:15 -04:00
#define PRIMITIVE_SCENE_DATA_FLAG_SHOULD_CACHE_SHADOW 0x10
Implemented Nanite support for view visibility flags DrawInGame, DrawInEditor, VisibleInReflectionCaptures, VisibleInSceneCaptureOnly, HiddenInSceneCapture, ForceHidden, and others. Also partially implemented a GPUScene version of FPrimitiveProxy::IsShown().
#rb rune.stubbe, ola.olsson
[FYI] brian.karis, andrew.lauritzen, daniel.wright, krzysztof.narkowicz
#preflight 61f0e569fd5285142b30b907
#jira UE-127692
#ROBOMERGE-AUTHOR: graham.wihlidal
#ROBOMERGE-SOURCE: CL 18736993 in //UE5/Release-5.0/... via CL 18737108 via CL 18738215
#ROBOMERGE-BOT: UE5 (Release-Engine-Test -> Main) (v903-18687472)
[CL 18738671 by graham wihlidal in ue5-main branch]
2022-01-26 13:04:40 -05:00
#define PRIMITIVE_SCENE_DATA_FLAG_OUTPUT_VELOCITY 0x20
#define PRIMITIVE_SCENE_DATA_FLAG_DETERMINANT_SIGN 0x40
#define PRIMITIVE_SCENE_DATA_FLAG_HAS_CAPSULE_REPRESENTATION 0x80
#define PRIMITIVE_SCENE_DATA_FLAG_HAS_CAST_CONTACT_SHADOW 0x100
#define PRIMITIVE_SCENE_DATA_FLAG_HAS_PRIMITIVE_CUSTOM_DATA 0x200
#define PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_0 0x400
#define PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_1 0x800
#define PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_2 0x1000
#define PRIMITIVE_SCENE_DATA_FLAG_HAS_INSTANCE_LOCAL_BOUNDS 0x2000
#define PRIMITIVE_SCENE_DATA_FLAG_HAS_NANITE_IMPOSTER 0x4000
2022-02-25 09:35:14 -05:00
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_GAME 0x8000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_EDITOR 0x10000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_REFLECTION_CAPTURES 0x20000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_REAL_TIME_SKY_CAPTURES 0x40000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_RAY_TRACING 0x80000
#define PRIMITIVE_SCENE_DATA_FLAG_VISIBLE_IN_SCENE_CAPTURE_ONLY 0x100000
#define PRIMITIVE_SCENE_DATA_FLAG_HIDDEN_IN_SCENE_CAPTURE 0x200000
#define PRIMITIVE_SCENE_DATA_FLAG_FORCE_HIDDEN 0x400000
#define PRIMITIVE_SCENE_DATA_FLAG_CAST_HIDDEN_SHADOW 0x800000
Implemented a GPU Scene API for primitives explicitly enabling/disabling WPO support driven by events. This will be important for disabling WPO overhead in Nanite and other systems when unnecessary. The material system MayModifyMeshPosition hints are insufficient when using an MICD with static params that ultimately disable WPO, but the material system still reports WPO usage. This hint can also be used in new LOD systems to disable expensive features like WPO in the distance, but without doing a full shader switch. Nanite now supports a debug view that shows WPO off (red) and on (green) for meshes in the scene (r.Nanite.Visualize EvaluateWPO).
This change also remaps the original bEvaluateWorldPositionOffset on SMC into bEvaluateWorldPositionOffsetInRayTracing, because this var was only ever driven by ray tracing specific methods. The original bEvaluateWorldPositionOffset is now used by this more generic API.
Lastly, a new cvar (r.OptimizedWPO) has been added that indicates if the hint should be respected or not (default is false, which means WPO is always active, regardless of hint)
#rb rune.stubbe, marc.audy, derek.ehrman
[FYI] brian.karis, jamie.hayes, ola.olsson, andrew.lauritzen, jian.ru
#preflight 6244a8dcdc6183e3f5f8de98
#ROBOMERGE-AUTHOR: graham.wihlidal
#ROBOMERGE-SOURCE: CL 19564957 via CL 19564973 via CL 19564978
#ROBOMERGE-BOT: UE5 (Release-Engine-Staging -> Main) (v937-19513599)
[CL 19566743 by graham wihlidal in ue5-main branch]
2022-03-30 19:41:19 -04:00
#define PRIMITIVE_SCENE_DATA_FLAG_EVALUATE_WORLD_POSITION_OFFSET 0x1000000
2022-05-20 19:15:15 -04:00
#define PRIMITIVE_SCENE_DATA_FLAG_CAMERA_DISTANCE_CULL 0x2000000
2021-05-26 19:03:46 -04:00
2021-01-28 12:24:41 -04:00
// GPUCULL_TODO: Eventually we need to remove this workaround
#define VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG (1U << 31U)
2021-12-06 13:18:17 -05:00
#define PRIMITIVE_ID_NUM_BITS (20u) // Max of 1,048,576 primitives
#define INSTANCE_SCENE_DATA_FLAGS_NUM_BITS (12u) // Max of 12 flags
#define INSTANCE_RELATIVE_ID_NUM_BITS (24u) // Max of 16,777,216 instances per primitive
#define INSTANCE_CUSTOM_DATA_COUNT_NUM_BITS (8u) // Max of 255 custom data floats per instance
#define PRIMITIVE_ID_MASK ((1u << PRIMITIVE_ID_NUM_BITS) - 1u)
#define INSTANCE_RELATIVE_ID_MASK ((1u << INSTANCE_RELATIVE_ID_NUM_BITS) - 1u)
#define INVALID_PRIMITIVE_ID PRIMITIVE_ID_MASK
#define INVALID_INSTANCE_PAYLOAD_OFFSET (0xFFFFFFFFu)
2021-09-02 00:54:16 -04:00
// Must match InstanceUniformShaderParameters.h
#define INSTANCE_SCENE_DATA_FLAG_DETERMINANT_SIGN 0x1
#define INSTANCE_SCENE_DATA_FLAG_HAS_RANDOM 0x2
#define INSTANCE_SCENE_DATA_FLAG_HAS_CUSTOM_DATA 0x4
#define INSTANCE_SCENE_DATA_FLAG_HAS_DYNAMIC_DATA 0x8
#define INSTANCE_SCENE_DATA_FLAG_HAS_LIGHTSHADOW_UV_BIAS 0x10
#define INSTANCE_SCENE_DATA_FLAG_HAS_HIERARCHY_OFFSET 0x20
2021-12-02 20:49:23 -05:00
#define INSTANCE_SCENE_DATA_FLAG_HAS_LOCAL_BOUNDS 0x40
2021-12-03 13:43:00 -05:00
#define INSTANCE_SCENE_DATA_FLAG_HAS_EDITOR_DATA 0x80
2022-02-03 09:15:19 -05:00
#define INSTANCE_SCENE_DATA_FLAG_IS_RAYTRACING_FAR_FIELD 0x100
2021-04-12 13:07:00 -04:00
2021-12-06 13:18:17 -05:00
#if INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
static const uint InstanceTransformSizeFloat4Count = 2u; // compressed transform
#else
static const uint InstanceTransformSizeFloat4Count = 3u; // encoded scale/rotation (uint4) and translation (float3)
#endif
2021-09-16 18:16:47 -04:00
2020-09-24 00:43:27 -04:00
#define NUM_CUSTOM_PRIMITIVE_DATA 9 // Num float4s used for custom data. Must match FCustomPrimitiveData::NumCustomPrimitiveDataFloat4s in SceneTypes.h
2019-04-16 09:17:07 -04:00
2018-12-11 22:25:04 -05:00
// Must match FPrimitiveUniformShaderParameters in C++
struct FPrimitiveSceneData
{
2021-05-26 19:03:46 -04:00
uint Flags; // TODO: Use 16 bits?
2021-06-14 13:43:26 -04:00
int InstanceSceneDataOffset; // Link to the range of instances that belong to this primitive
int NumInstanceSceneDataEntries;
2022-01-18 07:54:10 -05:00
int PersistentPrimitiveIndex;
2021-05-26 19:03:46 -04:00
uint SingleCaptureIndex; // TODO: Use 16 bits? 8 bits?
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
float3 TilePosition;
2022-01-29 17:39:14 -05:00
uint PrimitiveComponentId; // TODO: Refactor to use PersistentPrimitiveIndex, ENGINE USE ONLY - will be removed
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
FLWCMatrix LocalToWorld;
FLWCInverseMatrix WorldToLocal;
FLWCMatrix PreviousLocalToWorld;
FLWCInverseMatrix PreviousWorldToLocal;
2021-05-26 19:03:46 -04:00
float3 InvNonUniformScale;
float ObjectBoundsX;
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
FLWCVector3 ObjectWorldPosition;
FLWCVector3 ActorWorldPosition;
float ObjectRadius;
2021-05-26 19:03:46 -04:00
uint LightmapUVIndex; // TODO: Use 16 bits? // TODO: Move into associated array that disappears if static lighting is disabled
float3 ObjectOrientation; // TODO: More efficient representation?
uint LightmapDataIndex; // TODO: Use 16 bits? // TODO: Move into associated array that disappears if static lighting is disabled
float4 NonUniformScale;
float3 PreSkinnedLocalBoundsMin;
uint NaniteResourceID;
float3 PreSkinnedLocalBoundsMax;
uint NaniteHierarchyOffset;
float3 LocalObjectBoundsMin;
float ObjectBoundsY;
float3 LocalObjectBoundsMax;
float ObjectBoundsZ;
2021-06-22 13:50:35 -04:00
uint InstancePayloadDataOffset;
uint InstancePayloadDataStride; // TODO: Use 16 bits? 8 bits?
2021-12-02 21:32:37 -05:00
float3 InstanceLocalBoundsCenter;
float3 InstanceLocalBoundsExtent;
2021-12-07 15:14:57 -05:00
float3 WireframeColor; // TODO: Should refactor out all editor data into a separate buffer
float3 LevelColor; // TODO: Should refactor out all editor data into a separate buffer
2021-12-03 10:01:28 -05:00
uint NaniteImposterIndex;
2022-04-26 14:37:33 -04:00
uint NaniteFilterFlags;
2022-05-20 19:15:15 -04:00
float2 CameraDistanceCullMinMaxSquared;
2021-05-26 19:03:46 -04:00
float4 CustomPrimitiveData[NUM_CUSTOM_PRIMITIVE_DATA]; // TODO: Move to associated array to shrink primitive data and pack cachelines more effectively
2018-12-11 22:25:04 -05:00
};
2021-08-07 07:20:52 -04:00
// Fetch from Primitive uniform buffer
FPrimitiveSceneData GetPrimitiveDataFromUniformBuffer()
{
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
FPrimitiveSceneData PrimitiveData;
2022-05-20 19:15:15 -04:00
PrimitiveData.Flags = Primitive.Flags;
PrimitiveData.InstanceSceneDataOffset = Primitive.InstanceSceneDataOffset;
PrimitiveData.NumInstanceSceneDataEntries = Primitive.NumInstanceSceneDataEntries;
PrimitiveData.SingleCaptureIndex = Primitive.SingleCaptureIndex;
PrimitiveData.TilePosition = Primitive.TilePosition;
PrimitiveData.PrimitiveComponentId = Primitive.PrimitiveComponentId;
PrimitiveData.LocalToWorld = MakeLWCMatrix4x3(Primitive.TilePosition, Primitive.LocalToRelativeWorld);
PrimitiveData.WorldToLocal = MakeLWCInverseMatrix4x3(Primitive.TilePosition, Primitive.RelativeWorldToLocal);
PrimitiveData.PreviousLocalToWorld = MakeLWCMatrix4x3(Primitive.TilePosition, Primitive.PreviousLocalToRelativeWorld);
PrimitiveData.PreviousWorldToLocal = MakeLWCInverseMatrix4x3(Primitive.TilePosition, Primitive.PreviousRelativeWorldToLocal);
PrimitiveData.InvNonUniformScale = Primitive.InvNonUniformScale;
PrimitiveData.ObjectBoundsX = Primitive.ObjectBoundsX;
PrimitiveData.ObjectWorldPosition = MakeLWCVector3(Primitive.TilePosition, Primitive.ObjectRelativeWorldPositionAndRadius.xyz);
PrimitiveData.ObjectRadius = Primitive.ObjectRelativeWorldPositionAndRadius.w;
PrimitiveData.ActorWorldPosition = MakeLWCVector3(Primitive.TilePosition, Primitive.ActorRelativeWorldPosition);
PrimitiveData.LightmapUVIndex = Primitive.LightmapUVIndex;
PrimitiveData.ObjectOrientation = Primitive.ObjectOrientation;
PrimitiveData.LightmapDataIndex = Primitive.LightmapDataIndex;
PrimitiveData.NonUniformScale = Primitive.NonUniformScale;
PrimitiveData.PreSkinnedLocalBoundsMin = Primitive.PreSkinnedLocalBoundsMin;
PrimitiveData.NaniteResourceID = Primitive.NaniteResourceID;
PrimitiveData.PreSkinnedLocalBoundsMax = Primitive.PreSkinnedLocalBoundsMax;
PrimitiveData.NaniteHierarchyOffset = Primitive.NaniteHierarchyOffset;
PrimitiveData.LocalObjectBoundsMin = Primitive.LocalObjectBoundsMin;
PrimitiveData.ObjectBoundsY = Primitive.ObjectBoundsY;
PrimitiveData.LocalObjectBoundsMax = Primitive.LocalObjectBoundsMax;
PrimitiveData.ObjectBoundsZ = Primitive.ObjectBoundsZ;
PrimitiveData.InstancePayloadDataOffset = Primitive.InstancePayloadDataOffset;
PrimitiveData.InstancePayloadDataStride = Primitive.InstancePayloadDataStride;
PrimitiveData.WireframeColor = Primitive.WireframeColor;
PrimitiveData.LevelColor = Primitive.LevelColor;
PrimitiveData.NaniteImposterIndex = Primitive.NaniteImposterIndexAndFilterFlags & NANITE_IMPOSTER_INDEX_MASK;
PrimitiveData.NaniteFilterFlags = Primitive.NaniteImposterIndexAndFilterFlags >> NANITE_IMPOSTER_INDEX_NUM_BITS;
PrimitiveData.CameraDistanceCullMinMaxSquared = Primitive.CameraDistanceCullMinMaxSquared;
PrimitiveData.PersistentPrimitiveIndex = Primitive.PersistentPrimitiveIndex;
2021-08-07 07:20:52 -04:00
UNROLL
for (int DataIndex = 0; DataIndex < NUM_CUSTOM_PRIMITIVE_DATA; ++DataIndex)
{
PrimitiveData.CustomPrimitiveData[DataIndex] = Primitive.CustomPrimitiveData[DataIndex];
}
return PrimitiveData;
}
#if VF_USE_PRIMITIVE_SCENE_DATA
#if USE_GLOBAL_GPU_SCENE_DATA
StructuredBuffer<float4> GPUScenePrimitiveSceneData;
#elif USE_GLOBAL_GPU_SCENE_DATA_RW
RWStructuredBuffer<float4> GPUScenePrimitiveSceneDataRW;
#endif
2018-12-11 22:25:04 -05:00
// Stride of a single primitive's data in float4's, must match C++
2022-05-20 19:15:15 -04:00
#define PRIMITIVE_SCENE_DATA_STRIDE 41
2020-02-12 13:27:19 -05:00
2021-02-16 00:46:28 -04:00
float4 LoadPrimitivePrimitiveSceneDataElement(uint PrimitiveIndex, uint ItemIndex)
2020-02-12 13:27:19 -05:00
{
2021-05-12 18:06:35 -04:00
uint TargetIdx = PrimitiveIndex + ItemIndex;
2021-08-03 11:56:47 -04:00
#if USE_GLOBAL_GPU_SCENE_DATA
checkStructuredBufferAccessSlow(GPUScenePrimitiveSceneData, TargetIdx);
2021-05-12 18:06:35 -04:00
return GPUScenePrimitiveSceneData[TargetIdx];
2021-06-10 15:47:04 -04:00
#elif USE_GLOBAL_GPU_SCENE_DATA_RW
2021-08-03 11:56:47 -04:00
checkStructuredBufferAccessSlow(GPUScenePrimitiveSceneDataRW, TargetIdx);
return GPUScenePrimitiveSceneDataRW[TargetIdx];
2021-01-13 19:51:21 -04:00
#else
2021-08-03 11:56:47 -04:00
checkStructuredBufferAccessSlow(View.PrimitiveSceneData, TargetIdx);
return View.PrimitiveSceneData[TargetIdx];
2021-01-13 19:51:21 -04:00
#endif
2020-02-12 13:27:19 -05:00
}
2018-12-11 22:25:04 -05:00
// Fetch from scene primitive buffer
2020-02-12 13:27:19 -05:00
FPrimitiveSceneData GetPrimitiveData(uint PrimitiveId)
2018-12-11 22:25:04 -05:00
{
2021-08-07 07:20:52 -04:00
#if (FEATURE_LEVEL == FEATURE_LEVEL_ES3_1 && VERTEXSHADER)
// Vertex shaders do not have access to GPUScene on mobile. Use GetPrimitiveData(FVertexFactoryIntermediates Intermediates)
// TODO: need a way to report invalid usage, after all dead code elimination
return (FPrimitiveSceneData)0;
#else
FPrimitiveSceneData PrimitiveData = (FPrimitiveSceneData)0;
2018-12-11 22:25:04 -05:00
// Note: layout must match FPrimitiveSceneShaderData in C++
// Relying on optimizer to remove unused loads
2021-02-16 00:46:28 -04:00
uint PrimitiveIndex = PrimitiveId * PRIMITIVE_SCENE_DATA_STRIDE;
2021-08-07 07:20:52 -04:00
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
float3 TilePosition = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 1).xyz;
float4x4 LocalToWorld;
LocalToWorld[0] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 2).xyz, 0.0f);
LocalToWorld[1] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 3).xyz, 0.0f);
LocalToWorld[2] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 4).xyz, 0.0f);
LocalToWorld[3] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 5).xyz, 1.0f);
float4x4 PreviousLocalToWorld;
PreviousLocalToWorld[0] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 10).xyz, 0.0f);
PreviousLocalToWorld[1] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 11).xyz, 0.0f);
PreviousLocalToWorld[2] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 12).xyz, 0.0f);
PreviousLocalToWorld[3] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 13).xyz, 1.0f);
float4x4 WorldToLocal;
WorldToLocal[0] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 6).xyz, 0.0f);
WorldToLocal[1] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 7).xyz, 0.0f);
WorldToLocal[2] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 8).xyz, 0.0f);
WorldToLocal[3] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 9).xyz, 1.0f);
float4x4 PreviousWorldToLocal;
PreviousWorldToLocal[0] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 14).xyz, 0.0f);
PreviousWorldToLocal[1] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 15).xyz, 0.0f);
PreviousWorldToLocal[2] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 16).xyz, 0.0f);
PreviousWorldToLocal[3] = float4(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 17).xyz, 1.0f);
float4 ObjectWorldPositionAndRadius = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 19);
2021-05-26 19:03:46 -04:00
PrimitiveData.Flags = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 0).x);
2021-06-14 13:43:26 -04:00
PrimitiveData.InstanceSceneDataOffset = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 0).y);
PrimitiveData.NumInstanceSceneDataEntries = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 0).z);
2021-05-26 19:03:46 -04:00
PrimitiveData.SingleCaptureIndex = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 0).w);
2022-01-29 17:39:14 -05:00
PrimitiveData.TilePosition = TilePosition; // 1.xyz
PrimitiveData.PrimitiveComponentId = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 1).w);
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
PrimitiveData.LocalToWorld = MakeLWCMatrix4x3(TilePosition, LocalToWorld);
PrimitiveData.WorldToLocal = MakeLWCInverseMatrix4x3(TilePosition, WorldToLocal);
PrimitiveData.PreviousLocalToWorld = MakeLWCMatrix4x3(TilePosition, PreviousLocalToWorld);
PrimitiveData.PreviousWorldToLocal = MakeLWCInverseMatrix4x3(TilePosition, PreviousWorldToLocal);
PrimitiveData.InvNonUniformScale = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 18).xyz;
PrimitiveData.ObjectBoundsX = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 18).w;
PrimitiveData.ObjectWorldPosition = MakeLWCVector3(TilePosition, ObjectWorldPositionAndRadius.xyz);
PrimitiveData.ObjectRadius = ObjectWorldPositionAndRadius.w;
PrimitiveData.ActorWorldPosition = MakeLWCVector3(TilePosition, LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 20).xyz);
PrimitiveData.LightmapUVIndex = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 20).w);
PrimitiveData.ObjectOrientation = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 21).xyz;
PrimitiveData.LightmapDataIndex = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 21).w);
PrimitiveData.NonUniformScale = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 22);
PrimitiveData.PreSkinnedLocalBoundsMin = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 23).xyz;
PrimitiveData.NaniteResourceID = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 23).w);
PrimitiveData.PreSkinnedLocalBoundsMax = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 24).xyz;
PrimitiveData.NaniteHierarchyOffset = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 24).w);
PrimitiveData.LocalObjectBoundsMin = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 25).xyz;
PrimitiveData.ObjectBoundsY = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 25).w;
PrimitiveData.LocalObjectBoundsMax = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 26).xyz;
PrimitiveData.ObjectBoundsZ = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 26).w;
2021-12-02 21:32:37 -05:00
PrimitiveData.InstanceLocalBoundsCenter = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 27).xyz;
PrimitiveData.InstancePayloadDataOffset = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 27).w);
PrimitiveData.InstanceLocalBoundsExtent = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 28).xyz;
PrimitiveData.InstancePayloadDataStride = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 28).w);
2021-12-07 15:14:57 -05:00
PrimitiveData.WireframeColor = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 29).xyz;
PrimitiveData.LevelColor = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 30).xyz;
2022-01-18 07:54:10 -05:00
PrimitiveData.PersistentPrimitiveIndex = asint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 30).w);
2022-05-20 19:15:15 -04:00
PrimitiveData.CameraDistanceCullMinMaxSquared = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 31).xy;
2020-12-14 02:41:14 -04:00
2022-04-26 14:37:33 -04:00
uint NaniteImposterIndexAndFilterFlags = asuint(LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 29).w);
PrimitiveData.NaniteFilterFlags = NaniteImposterIndexAndFilterFlags >> NANITE_IMPOSTER_INDEX_NUM_BITS;
PrimitiveData.NaniteImposterIndex = NaniteImposterIndexAndFilterFlags & NANITE_IMPOSTER_INDEX_MASK;
if (PrimitiveData.NaniteImposterIndex == NANITE_IMPOSTER_INDEX_MASK)
{
PrimitiveData.NaniteImposterIndex = INVALID_NANITE_IMPOSTER_INDEX;
}
2021-12-07 15:14:57 -05:00
// TODO: Move to associated array (and editor data) to shrink primitive data and better pack cachelines
2019-04-16 09:17:07 -04:00
UNROLL
2021-05-26 19:03:46 -04:00
for (int DataIndex = 0; DataIndex < NUM_CUSTOM_PRIMITIVE_DATA; ++DataIndex)
2019-04-16 09:17:07 -04:00
{
2022-05-20 19:15:15 -04:00
PrimitiveData.CustomPrimitiveData[DataIndex] = LoadPrimitivePrimitiveSceneDataElement(PrimitiveIndex, 32 + DataIndex);
2019-04-16 09:17:07 -04:00
}
2018-12-11 22:25:04 -05:00
return PrimitiveData;
2021-08-07 07:20:52 -04:00
#endif
2018-12-11 22:25:04 -05:00
}
2020-02-12 13:27:19 -05:00
#else // !VF_USE_PRIMITIVE_SCENE_DATA
2018-12-11 22:25:04 -05:00
2021-08-07 07:20:52 -04:00
FPrimitiveSceneData GetPrimitiveData(uint PrimitiveId)
{
return GetPrimitiveDataFromUniformBuffer();
}
2018-12-11 22:25:04 -05:00
2020-07-06 18:58:26 -04:00
#endif // VF_USE_PRIMITIVE_SCENE_DATA
2021-01-19 08:25:03 -04:00
2021-05-26 19:03:46 -04:00
float GetPrimitive_DeterminantSign_FromFlags(uint Flags)
{
return CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_DETERMINANT_SIGN, -1.0f, 1.0f);
}
float GetPrimitive_DeterminantSign(uint PrimitiveId)
{
return GetPrimitive_DeterminantSign_FromFlags(GetPrimitiveData(PrimitiveId).Flags);
}
#if VF_USE_PRIMITIVE_SCENE_DATA
float GetPrimitive_DeterminantSign(FPrimitiveSceneData Primitive)
{
return GetPrimitive_DeterminantSign_FromFlags(Primitive.Flags);
}
#endif
float GetPrimitive_PerObjectGBufferData_FromFlags(uint Flags)
{
const float CapsuleRepresentation = CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_HAS_CAPSULE_REPRESENTATION, 1.0f, 0.0f);
const float CastContactShadow = CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_HAS_CAST_CONTACT_SHADOW, 1.0f, 0.0f);
return (2.0f * CapsuleRepresentation + CastContactShadow) / 3.0f;
}
float GetPrimitive_PerObjectGBufferData(uint PrimitiveId)
{
return GetPrimitive_PerObjectGBufferData_FromFlags(GetPrimitiveData(PrimitiveId).Flags);
}
#if VF_USE_PRIMITIVE_SCENE_DATA
float GetPrimitive_PerObjectGBufferData(FPrimitiveSceneData Primitive)
{
return GetPrimitive_PerObjectGBufferData_FromFlags(Primitive.Flags);
}
#endif
uint GetPrimitive_LightingChannelMask_FromFlags(uint Flags)
{
const uint Channel0 = CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_0, 1u, 0u);
const uint Channel1 = CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_1, 1u, 0u);
const uint Channel2 = CondMask(Flags & PRIMITIVE_SCENE_DATA_FLAG_LIGHTING_CHANNEL_2, 1u, 0u);
return (Channel0 | (Channel1 << 1u) | (Channel2 << 2u));
}
2021-05-27 11:32:19 -04:00
uint GetPrimitive_LightingChannelMask(uint PrimitiveId)
2021-05-26 19:03:46 -04:00
{
return GetPrimitive_LightingChannelMask_FromFlags(GetPrimitiveData(PrimitiveId).Flags);
}
#if VF_USE_PRIMITIVE_SCENE_DATA
2021-05-27 11:32:19 -04:00
uint GetPrimitive_LightingChannelMask(FPrimitiveSceneData Primitive)
2021-05-26 19:03:46 -04:00
{
return GetPrimitive_LightingChannelMask_FromFlags(Primitive.Flags);
}
#endif
2021-06-07 23:55:28 -04:00
// Unpacked AoS layout - see FInstanceSceneShaderData::Setup() for SoA packed layout.
2021-12-03 13:43:00 -05:00
#if USE_EDITOR_SHADERS
struct FInstanceSceneEditorData
{
float3 HitProxyId;
2021-12-03 15:37:56 -05:00
uint HitProxyPacked;
2021-12-03 13:43:00 -05:00
bool bIsSelected;
};
#endif
2021-12-03 15:37:56 -05:00
float3 UnpackHitProxyId(uint HitProxyPacked)
{
// BGR (dword) -> RGA (float)
return float3
(
float((HitProxyPacked ) & 0xFF),
float((HitProxyPacked >> 8u) & 0xFF),
float((HitProxyPacked >> 16u) & 0xFF)
) * (1.0f / 255.0f);
}
2021-12-06 13:18:17 -05:00
struct FInstancePayloadDataOffsets
{
uint HierarchyOffset;
uint EditorData;
uint LocalBounds;
uint DynamicData;
uint LightShadowUVBias;
uint CustomData;
};
2021-06-07 23:55:28 -04:00
struct FInstanceSceneData
{
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
FLWCMatrix LocalToWorld;
FLWCMatrix PrevLocalToWorld;
FLWCInverseMatrix WorldToLocal;
2021-06-07 23:55:28 -04:00
float4 NonUniformScale;
float3 InvNonUniformScale;
float DeterminantSign;
float3 LocalBoundsCenter;
uint PrimitiveId;
2021-09-02 20:30:59 -04:00
uint RelativeId;
2021-12-02 23:38:54 -05:00
uint PayloadDataOffset;
2021-06-07 23:55:28 -04:00
float3 LocalBoundsExtent;
uint LastUpdateSceneFrameNumber;
uint NaniteRuntimeResourceID;
uint NaniteHierarchyOffset;
2021-12-03 13:43:00 -05:00
#if 1//USES_PER_INSTANCE_RANDOM
2021-09-02 20:30:59 -04:00
float RandomID;
2021-06-17 03:09:14 -04:00
#endif
2021-12-06 13:18:17 -05:00
#if ENABLE_PER_INSTANCE_CUSTOM_DATA
2021-06-17 03:09:14 -04:00
uint CustomDataOffset;
2021-12-02 23:38:54 -05:00
uint CustomDataCount;
2021-09-02 20:30:59 -04:00
#endif
2021-12-02 23:38:54 -05:00
#if 1 //NEEDS_LIGHTMAP_COORDINATE // TODO: Fix Me
2021-09-02 20:30:59 -04:00
float4 LightMapAndShadowMapUVBias;
2021-06-17 03:09:14 -04:00
#endif
2021-06-07 23:55:28 -04:00
bool ValidInstance;
2021-06-10 15:47:04 -04:00
uint Flags;
2021-12-03 13:43:00 -05:00
#if USE_EDITOR_SHADERS
FInstanceSceneEditorData EditorData;
#endif
2021-06-07 23:55:28 -04:00
};
#if USE_GLOBAL_GPU_SCENE_DATA
StructuredBuffer<float4> GPUSceneInstanceSceneData;
2021-08-23 17:58:49 -04:00
StructuredBuffer<float4> GPUSceneInstancePayloadData;
2021-06-07 23:55:28 -04:00
uint GPUSceneFrameNumber;
2021-06-10 15:47:04 -04:00
#elif USE_GLOBAL_GPU_SCENE_DATA_RW
RWStructuredBuffer<float4> GPUSceneInstanceSceneDataRW;
2021-08-23 17:58:49 -04:00
RWStructuredBuffer<float4> GPUSceneInstancePayloadDataRW;
2021-06-10 15:47:04 -04:00
uint GPUSceneFrameNumber;
2021-06-07 23:55:28 -04:00
#endif
uint GetGPUSceneFrameNumber()
{
#if USE_GLOBAL_GPU_SCENE_DATA
return GPUSceneFrameNumber;
#else
return View.FrameNumber;
#endif
}
2021-08-23 16:47:16 -04:00
float4 LoadInstanceSceneDataElement(uint Index)
2021-06-07 23:55:28 -04:00
{
#if USE_GLOBAL_GPU_SCENE_DATA
return GPUSceneInstanceSceneData[Index];
2021-06-10 15:47:04 -04:00
#elif USE_GLOBAL_GPU_SCENE_DATA_RW
return GPUSceneInstanceSceneDataRW[Index];
2021-06-07 23:55:28 -04:00
#else
return View.InstanceSceneData[Index];
#endif
}
2021-08-23 16:47:16 -04:00
float4 LoadInstancePayloadDataElement(uint Index)
{
#if USE_GLOBAL_GPU_SCENE_DATA
return GPUSceneInstancePayloadData[Index];
#elif USE_GLOBAL_GPU_SCENE_DATA_RW
return GPUSceneInstancePayloadDataRW[Index];
#else
return View.InstancePayloadData[Index];
#endif
}
2021-12-06 13:18:17 -05:00
float4 LoadInstanceCustomDataElement(FInstanceSceneData SceneData, uint Float4Index)
{
#if ENABLE_PER_INSTANCE_CUSTOM_DATA
const uint NumCustomFloat4s = (SceneData.CustomDataCount + 3u) >> 2u;
if (SceneData.CustomDataOffset != 0xFFFFFFFFu && Float4Index < NumCustomFloat4s)
{
return LoadInstancePayloadDataElement(SceneData.CustomDataOffset + Float4Index);
}
#endif
return (float4)0.0f;
}
float LoadInstanceCustomDataFloat(FInstanceSceneData SceneData, uint FloatIndex)
{
#if ENABLE_PER_INSTANCE_CUSTOM_DATA
const uint Float4Index = FloatIndex >> 2u;
const uint ComponentIndex = FloatIndex % 4u;
const float4 Element = LoadInstanceCustomDataElement(SceneData, Float4Index);
return Element[ComponentIndex];
#else
return 0.0f;
#endif
}
2021-09-16 18:16:47 -04:00
// [Frisvad 2012, "Building an Orthonormal Basis from a 3D Unit Vector Without Normalization"]
void GetHemiOrthoBasis( inout float3 BasisX, inout float3 BasisY, float3 BasisZ )
{
float A = 1.0f / ( 1.0f + BasisZ.z );
float B = -BasisZ.x * BasisZ.y * A;
BasisX = float3( 1.0f - BasisZ.x * BasisZ.x * A, B, -BasisZ.x );
BasisY = float3( B, 1.0f - BasisZ.y * BasisZ.y * A, -BasisZ.y );
}
2021-12-06 13:18:17 -05:00
uint4 EncodeScaleAndRotation(float3 Scale, float3x3 Axis)
{
const uint ExpBits = 8;
const uint ExpBias = ( 1u << (ExpBits - 1) ) - 1;
const uint SignMantissaBits = 16;
const uint SignMantissaMask = (1u << SignMantissaBits) - 1;
const uint MantissaBits = SignMantissaBits - 1;
const float Sqrt2 = 1.41421356f;
uint4 Output;
// Rotation
{
if( Axis[2].z < 0.0f )
{
Axis[2] *= -1.0f;
Scale.z *= -1.0f;
}
float2 OctZ = UnitVectorToHemiOctahedron( Axis[2] );
float3 BasisX, BasisY;
GetHemiOrthoBasis( BasisX, BasisY, Axis[2] );
float X = dot(Axis[0], BasisX);
float Y = dot(Axis[0], BasisY);
float aX = abs( X );
float aY = abs( Y );
bool bSpinIsX = aX < aY;
float Spin0 = bSpinIsX ? X : Y;
float Spin1 = bSpinIsX ? Y : X;
float Sign1 = Spin1 < 0.0f ? -1.0f : 1.0f;
//Axis[0] *= Sign1;
Scale.x *= Sign1;
Spin0 *= Sign1;
float3 GeneratedY = cross(Axis[2], Axis[0]);
Scale.y *= dot( Axis[1], GeneratedY ) < 0.0f ? -Sign1 : Sign1;
// Avoid sign extension in shader by biasing
Output.x = (((int)round( OctZ.x * 32767.0f ) + 32768) & 0xFFFF) << 0;
Output.x |= (((int)round( OctZ.y * 32767.0f ) + 32768) & 0xFFFF) << 16;
// NOTE: Masking the bits with `& 0x7FFF` below causes the whole int to be optimized to 0 on some shader platforms.
// This is okay, as long as Spin0 is in [0, 1], which it should be.
Output.y = ((int)round( Spin0 * 16383.0f * Sqrt2 ) + 16384); // & 0x7FFF;
Output.y |= bSpinIsX ? (1u << 15) : 0;
}
// Scale
{
float MaxComponent = max3(abs(Scale.x), abs(Scale.y), abs(Scale.z));
uint MaxComponentExponent = (asuint(MaxComponent) & 0x7f800000u) >> 23;
// Need +1 because of losing the implicit leading bit of mantissa
// TODO assumes ExpBits == 8
// TODO clamp to expressable range
uint SharedExp = MaxComponentExponent + 1;
float ExpScale = asfloat(((127 + ExpBias + MantissaBits - SharedExp) & 0xFFu) << 23);
if( (uint)round( MaxComponent * ExpScale ) == (1u << MantissaBits) )
{
// Mantissa rounded up
SharedExp++;
ExpScale *= 0.5f;
}
Output.z = (((int)round( Scale.x * ExpScale ) + (1u << MantissaBits)) & 0xFFFFu) << 0;
Output.z |= (((int)round( Scale.y * ExpScale ) + (1u << MantissaBits)) & 0xFFFFu) << 16;
Output.w = (((int)round( Scale.z * ExpScale ) + (1u << MantissaBits)) & 0xFFFFu) << 0;
Output.w |= SharedExp << 16;
}
return Output;
}
uint4 EncodeScaleAndRotation( float3x3 InTransform )
{
float3 Scale = {
length(InTransform[0]),
length(InTransform[1]),
length(InTransform[2])
};
float3x3 Axis = {
InTransform[0] / Scale.x,
InTransform[1] / Scale.y,
InTransform[2] / Scale.z
};
return EncodeScaleAndRotation(Scale, Axis);
}
void EncodeTransform( float4x4 InTransform, inout uint4 OutRotationScale, inout float3 OutTranslation )
{
OutRotationScale = EncodeScaleAndRotation((float3x3)InTransform);
OutTranslation = InTransform[3].xyz;
}
2021-09-16 18:16:47 -04:00
float4x4 DecodeTransform( uint4 RotationScale, float3 Translation, inout float3 Scale )
{
float4x4 M = 0.0;
M[3].xyz = Translation;
M[3].w = 1.0;
// Rotation
{
float3 Rotation =
{
( RotationScale[0] >> 0 ) & 0xffff,
( RotationScale[0] >> 16 ) & 0xffff,
( RotationScale[1] >> 0 ) & 0x7fff
};
float2 OctZ = ( Rotation.xy - 32768 ) * (1.0f / 32767.0f);
float Spin0 = ( Rotation.z - 16384 ) * (0.70710678f / 16383.0f); // rsqrt(2)
bool bSpinIsX = RotationScale[1] & 0x8000;
M[2].xyz = HemiOctahedronToUnitVector( OctZ );
float3 BasisX, BasisY;
GetHemiOrthoBasis( BasisX, BasisY, M[2].xyz );
float Spin1 = sqrt( 1.0f - Spin0 * Spin0 );
float X = bSpinIsX ? Spin0 : Spin1;
float Y = bSpinIsX ? Spin1 : Spin0;
M[0].xyz = BasisX * X + BasisY * Y;
M[1].xyz = cross( M[2].xyz, M[0].xyz );
}
// Scale
{
const uint SignMantissaBits = 16;
const uint SignMantissaMask = (1u << SignMantissaBits) - 1;
const uint MantissaBits = SignMantissaBits - 1;
#if 0
uint SharedExp = RotationScale[3] >> 22;
float ExpScale = asfloat( ( SharedExp - MantissaBits ) << 23 );
int3 Mantissa =
{
( RotationScale[2] >> 0 ),
( RotationScale[2] >> 18 ) | ( RotationScale[3] << 14 ),
( RotationScale[3] >> 4 )
};
#else
uint SharedExp = RotationScale[3] >> 16;
float ExpScale = asfloat( ( SharedExp - MantissaBits ) << 23 );
uint3 Mantissa =
{
RotationScale[2] >> 0,
RotationScale[2] >> 16,
RotationScale[3] >> 0
};
#endif
Mantissa &= SignMantissaMask;
Scale = Mantissa;
Scale -= 1u << MantissaBits;
Scale *= ExpScale;
M[0] *= Scale[0];
M[1] *= Scale[1];
M[2] *= Scale[2];
}
return M;
}
2021-12-06 13:18:17 -05:00
// Helpers to pack/unpack the primitive ID and flags for the specified instance, which are packed together in a uint
void UnpackPrimitiveIdAndInstanceFlags(uint PackedPrimitiveIdAndFlags, inout uint OutPrimitiveId, inout uint OutInstanceFlags)
{
OutPrimitiveId = BitFieldExtractU32(PackedPrimitiveIdAndFlags, PRIMITIVE_ID_NUM_BITS, 0);
OutInstanceFlags = BitFieldExtractU32(PackedPrimitiveIdAndFlags, INSTANCE_SCENE_DATA_FLAGS_NUM_BITS, PRIMITIVE_ID_NUM_BITS);
}
uint PackPrimitiveIdAndInstanceFlags(uint PrimitiveId, uint InstanceFlags)
{
return (PrimitiveId & PRIMITIVE_ID_MASK) | (InstanceFlags << PRIMITIVE_ID_NUM_BITS);
}
void LoadInstancePrimitiveIdAndFlags(uint InstanceId, uint SOAStride, inout uint OutPrimitiveId, inout uint OutInstanceFlags)
{
const uint PackedPrimitiveIdAndFlags = asuint(LoadInstanceSceneDataElement(0 * SOAStride + InstanceId).x);
UnpackPrimitiveIdAndInstanceFlags(PackedPrimitiveIdAndFlags, OutPrimitiveId, OutInstanceFlags);
}
// Helpers to pack/unpack the instance relative ID and custom data count for the specified instance, which are packed together in a uint
void UnpackInstanceRelativeIdAndCustomDataCount(uint PackedRelativeIdAndCustomDataCount, inout uint OutRelativeId, inout uint OutCustomDataCount)
{
OutRelativeId = BitFieldExtractU32(PackedRelativeIdAndCustomDataCount, INSTANCE_RELATIVE_ID_NUM_BITS, 0);
OutCustomDataCount = BitFieldExtractU32(PackedRelativeIdAndCustomDataCount, INSTANCE_CUSTOM_DATA_COUNT_NUM_BITS, INSTANCE_RELATIVE_ID_NUM_BITS);
}
uint PackInstanceRelativeIdAndCustomDataCount(uint RelativeId, uint CustomDataCount)
{
return (RelativeId & INSTANCE_RELATIVE_ID_MASK) | (CustomDataCount << INSTANCE_RELATIVE_ID_NUM_BITS);
}
void LoadInstanceRelativeIdAndCustomDataCount(uint InstanceId, uint SOAStride, inout uint OutPrimitiveId, inout uint OutInstanceFlags)
{
const uint PackedRelativeIdAndCustomDataCount = asuint(LoadInstanceSceneDataElement(0 * SOAStride + InstanceId).y);
UnpackInstanceRelativeIdAndCustomDataCount(PackedRelativeIdAndCustomDataCount, OutPrimitiveId, OutInstanceFlags);
}
// Helpers for getting/setting the instance determinant sign from instance data flags
float GetInstanceDeterminantSignFromFlags(uint Flags)
{
// Scale.x * Scale.y * Scale.z < 0.0 ? -1.0 : 1.0;
return CondMask(Flags & INSTANCE_SCENE_DATA_FLAG_DETERMINANT_SIGN, -1.0f, 1.0f);
}
void SetInstanceDeterminantSignFlag(float Determinant, inout uint Flags)
{
if (Determinant < 0.0f)
{
Flags |= INSTANCE_SCENE_DATA_FLAG_DETERMINANT_SIGN;
}
else
{
Flags &= ~INSTANCE_SCENE_DATA_FLAG_DETERMINANT_SIGN;
}
}
// Determine the offsets into the payload data buffer for the given instance
FInstancePayloadDataOffsets GetInstancePayloadDataOffsets(uint PrimitiveId, uint Flags, uint InstanceRelativeId)
{
const uint PayloadDataRelativeOffset = InstanceRelativeId * GetPrimitiveData(PrimitiveId).InstancePayloadDataStride;
const uint PayloadDataGlobalOffset = PayloadDataRelativeOffset + GetPrimitiveData(PrimitiveId).InstancePayloadDataOffset;
const bool bHasHierarchyOffset = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_HIERARCHY_OFFSET) != 0u;
const bool bHasLocalBounds = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_LOCAL_BOUNDS) != 0u;
const bool bHasDynamicData = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_DYNAMIC_DATA) != 0u;
const bool bHasLightShadowUVBias = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_LIGHTSHADOW_UV_BIAS) != 0u;
const bool bHasCustomData = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_CUSTOM_DATA) != 0u;
#if USE_EDITOR_SHADERS
const bool bHasEditorData = (Flags & INSTANCE_SCENE_DATA_FLAG_HAS_EDITOR_DATA) != 0u;
#else
const bool bHasEditorData = false;
#endif
uint CurOffset = PayloadDataGlobalOffset;
// Offsets are in float4s
FInstancePayloadDataOffsets Offsets;
Offsets.HierarchyOffset = INVALID_INSTANCE_PAYLOAD_OFFSET;
Offsets.EditorData = INVALID_INSTANCE_PAYLOAD_OFFSET;
Offsets.LocalBounds = INVALID_INSTANCE_PAYLOAD_OFFSET;
Offsets.DynamicData = INVALID_INSTANCE_PAYLOAD_OFFSET;
Offsets.LightShadowUVBias = INVALID_INSTANCE_PAYLOAD_OFFSET;
Offsets.CustomData = INVALID_INSTANCE_PAYLOAD_OFFSET;
// Hierarchy Offset -> float0.x
if (bHasHierarchyOffset)
{
Offsets.HierarchyOffset = CurOffset;
}
// EditorData -> float0.y
if (bHasEditorData)
{
Offsets.EditorData = CurOffset;
}
// LocalBounds -> float0.zw & float1.xyzw
if (bHasLocalBounds)
{
Offsets.LocalBounds = CurOffset;
}
CurOffset += CondMask(bHasLocalBounds, 2u, CondMask(bHasHierarchyOffset || bHasEditorData, 1u, 0u));
if (bHasDynamicData)
{
Offsets.DynamicData = CurOffset;
CurOffset += InstanceTransformSizeFloat4Count;
}
if (bHasLightShadowUVBias)
{
Offsets.LightShadowUVBias = CurOffset;
++CurOffset;
}
if (bHasCustomData)
{
Offsets.CustomData = CurOffset;
}
return Offsets;
}
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
void ComputeInstanceDerivedData(inout FInstanceSceneData InstanceData, float3 TilePosition, float4x4 LocalToRelativeWorld)
2021-08-23 23:20:44 -04:00
{
//
// Do not put any load operations here!
//
2021-10-06 02:20:54 -04:00
2021-12-06 13:18:17 -05:00
#if (VF_USE_PRIMITIVE_SCENE_DATA == 2) || !INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
// Non-uniform scale must be computed from the transform because it was not already computed when decoding it (see below in GetInstanceSceneData)
2021-10-06 02:20:54 -04:00
float3 Scale2;
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
Scale2.x = length2(LocalToRelativeWorld[0].xyz);
Scale2.y = length2(LocalToRelativeWorld[1].xyz);
Scale2.z = length2(LocalToRelativeWorld[2].xyz);
2021-10-06 02:20:54 -04:00
InstanceData.InvNonUniformScale = rsqrt(Scale2);
InstanceData.NonUniformScale.xyz = Scale2 * InstanceData.InvNonUniformScale;
#endif
2021-09-16 18:16:47 -04:00
InstanceData.NonUniformScale.w = max3( InstanceData.NonUniformScale.x, InstanceData.NonUniformScale.y, InstanceData.NonUniformScale.z );
2021-12-06 13:18:17 -05:00
InstanceData.DeterminantSign = GetInstanceDeterminantSignFromFlags(InstanceData.Flags);
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
float4x4 RelativeWorldToLocal = LocalToRelativeWorld;
RelativeWorldToLocal[0].xyz *= Pow2(InstanceData.InvNonUniformScale.x);
RelativeWorldToLocal[1].xyz *= Pow2(InstanceData.InvNonUniformScale.y);
RelativeWorldToLocal[2].xyz *= Pow2(InstanceData.InvNonUniformScale.z);
RelativeWorldToLocal[3].xyz = 0.0f;
RelativeWorldToLocal = transpose(RelativeWorldToLocal);
RelativeWorldToLocal[3].xyz = mul(float4(-LocalToRelativeWorld[3].xyz, 0.0f), RelativeWorldToLocal).xyz;
InstanceData.WorldToLocal = MakeLWCInverseMatrix(TilePosition, RelativeWorldToLocal);
2021-08-23 23:20:44 -04:00
}
2021-06-07 23:55:28 -04:00
// Fetch from scene primitive buffer
2021-11-18 14:37:34 -05:00
FInstanceSceneData GetInstanceSceneData(uint InstanceId, uint SOAStride, bool bCheckValid = true)
2021-06-07 23:55:28 -04:00
{
FInstanceSceneData InstanceData = (FInstanceSceneData)0;
2021-06-10 15:47:04 -04:00
//
// NOTE: When changing the packed data layout, ensure that GPUScene/GPUSceneWriter.ush is kept in sync!
2021-06-14 13:43:26 -04:00
// Also, please update the GetInstanceSceneData function in GPUScene.cpp for validation purposes.
2021-06-10 15:47:04 -04:00
//
2021-06-07 23:55:28 -04:00
// Only process valid instances
2021-12-06 13:18:17 -05:00
LoadInstancePrimitiveIdAndFlags(InstanceId, SOAStride, InstanceData.PrimitiveId, InstanceData.Flags);
InstanceData.ValidInstance = InstanceData.PrimitiveId != INVALID_PRIMITIVE_ID;
2021-06-07 23:55:28 -04:00
2021-12-02 23:38:54 -05:00
// Payload Data Layout
// NOTE: Per-instance local bounds and hierarchy offset are always mutually inclusive, so pack together.
// Random ID <packed inline>
// Custom Data Count <packed inline>
// HierarchyOffset float0.x
// LocalBounds Center float0.yzw
// LocalBounds Extent float1.xyz
// __UNUSED float1.w
2021-12-06 13:18:17 -05:00
#if INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
2021-12-02 23:38:54 -05:00
// Previous Transform[0] float2.xyzw
// Previous Transform[1] float3.xyzw
// LM/SM Scale Bias float4.xyzw
// Custom Data Float4s float5.xyzw ... floatN.xyzw
#else
// Previous Transform[0] float2.xyzw
// Previous Transform[1] float3.xyzw
// Previous Transform[2] float4.xyzw
// LM/SM Scale Bias float5.xyzw
// Custom Data Float4s float6.xyzw ... floatN.xyzw
#endif
2021-06-07 23:55:28 -04:00
BRANCH
2021-11-18 14:37:34 -05:00
if (!bCheckValid || InstanceData.ValidInstance)
2021-06-07 23:55:28 -04:00
{
2021-12-06 13:18:17 -05:00
uint CustomDataCount;
LoadInstanceRelativeIdAndCustomDataCount(InstanceId, SOAStride, InstanceData.RelativeId, CustomDataCount);
FInstancePayloadDataOffsets Offsets = GetInstancePayloadDataOffsets(InstanceData.PrimitiveId, InstanceData.Flags, InstanceData.RelativeId);
#if ENABLE_PER_INSTANCE_CUSTOM_DATA
InstanceData.CustomDataCount = CustomDataCount;
InstanceData.CustomDataOffset = Offsets.CustomData;
2021-12-02 23:38:54 -05:00
#endif
2021-12-06 13:18:17 -05:00
InstanceData.LastUpdateSceneFrameNumber = asuint(LoadInstanceSceneDataElement(0 * SOAStride + InstanceId).z);
2021-12-03 13:43:00 -05:00
#if 1//USES_PER_INSTANCE_RANDOM
2021-12-06 13:18:17 -05:00
InstanceData.RandomID = LoadInstanceSceneDataElement(0 * SOAStride + InstanceId).w;
2021-06-17 03:09:14 -04:00
#endif
2021-06-07 23:55:28 -04:00
2021-12-03 14:00:27 -05:00
float3 TilePosition = GetPrimitiveData(InstanceData.PrimitiveId).TilePosition;
2021-12-06 13:18:17 -05:00
#if INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
2021-09-16 18:16:47 -04:00
uint4 RotationScale = asuint(LoadInstanceSceneDataElement(1 * SOAStride + InstanceId));
float3 Translation = LoadInstanceSceneDataElement(2 * SOAStride + InstanceId).xyz;
float3 Scale = 0;
2021-12-03 14:00:27 -05:00
float4x4 LocalToRelativeWorld = DecodeTransform( RotationScale, Translation, Scale );
uint4 PrevRotationScale = asuint(LoadInstanceSceneDataElement(3 * SOAStride + InstanceId));
float3 PrevTranslation = LoadInstanceSceneDataElement(4 * SOAStride + InstanceId).xyz;
float3 PrevScale = 0;
float4x4 PrevLocalToRelativeWorld = DecodeTransform( PrevRotationScale, PrevTranslation, PrevScale );
2021-09-16 18:16:47 -04:00
2021-12-02 23:38:54 -05:00
InstanceData.NonUniformScale.xyz = abs(Scale);
InstanceData.InvNonUniformScale = rcp(InstanceData.NonUniformScale.xyz);
#else
float4x4 LocalToRelativeWorld = transpose(float4x4(LoadInstanceSceneDataElement(1 * SOAStride + InstanceId),
LoadInstanceSceneDataElement(2 * SOAStride + InstanceId),
LoadInstanceSceneDataElement(3 * SOAStride + InstanceId),
float4(0.0f, 0.0f, 0.0f, 1.0f)));
2021-09-16 18:16:47 -04:00
2021-12-02 23:38:54 -05:00
float4x4 PrevLocalToRelativeWorld = transpose(float4x4(LoadInstanceSceneDataElement(4 * SOAStride + InstanceId),
LoadInstanceSceneDataElement(5 * SOAStride + InstanceId),
LoadInstanceSceneDataElement(6 * SOAStride + InstanceId),
float4(0.0f, 0.0f, 0.0f, 1.0f)));
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
2021-12-02 23:38:54 -05:00
#endif
2021-12-03 14:00:27 -05:00
InstanceData.LocalToWorld = MakeLWCMatrix(TilePosition, LocalToRelativeWorld);
InstanceData.PrevLocalToWorld = MakeLWCMatrix(TilePosition, PrevLocalToRelativeWorld);
ComputeInstanceDerivedData(InstanceData, TilePosition, LocalToRelativeWorld);
2021-12-02 23:38:54 -05:00
InstanceData.NaniteRuntimeResourceID = GetPrimitiveData(InstanceData.PrimitiveId).NaniteResourceID;
InstanceData.NaniteHierarchyOffset = GetPrimitiveData(InstanceData.PrimitiveId).NaniteHierarchyOffset;
BRANCH
2021-12-06 13:18:17 -05:00
if (Offsets.HierarchyOffset != INVALID_INSTANCE_PAYLOAD_OFFSET)
2021-06-07 23:55:28 -04:00
{
2021-12-06 13:18:17 -05:00
const uint HierarchyRootOffset = asuint(LoadInstancePayloadDataElement(Offsets.HierarchyOffset)).x;
2021-12-02 23:38:54 -05:00
2021-06-07 23:55:28 -04:00
// Combine this instance's hierarchy offset with the primitive's root hierarchy offset
2021-12-02 23:38:54 -05:00
InstanceData.NaniteHierarchyOffset += HierarchyRootOffset;
2021-06-07 23:55:28 -04:00
}
2021-12-02 23:38:54 -05:00
2021-12-06 13:18:17 -05:00
#if USE_EDITOR_SHADERS
2021-12-03 13:43:00 -05:00
BRANCH
2021-12-06 13:18:17 -05:00
if (Offsets.EditorData != INVALID_INSTANCE_PAYLOAD_OFFSET)
2021-12-03 13:43:00 -05:00
{
2021-12-06 13:18:17 -05:00
const uint PackedEditorData = asuint(LoadInstancePayloadDataElement(Offsets.EditorData)).y;
2021-12-03 13:43:00 -05:00
2021-12-03 15:37:56 -05:00
InstanceData.EditorData.bIsSelected = (PackedEditorData >> 24u) != 0;
InstanceData.EditorData.HitProxyPacked = PackedEditorData & 0x00FFFFFFu;
InstanceData.EditorData.HitProxyId = UnpackHitProxyId(InstanceData.EditorData.HitProxyPacked);
2021-12-03 13:43:00 -05:00
}
2021-12-06 13:18:17 -05:00
#endif
2021-12-03 15:37:56 -05:00
2021-12-02 23:38:54 -05:00
BRANCH
2021-12-06 13:18:17 -05:00
if (Offsets.LocalBounds != INVALID_INSTANCE_PAYLOAD_OFFSET)
2021-12-02 23:38:54 -05:00
{
2021-12-06 13:18:17 -05:00
InstanceData.LocalBoundsCenter = float3(LoadInstancePayloadDataElement(Offsets.LocalBounds + 0).zw, LoadInstancePayloadDataElement(Offsets.LocalBounds + 1).x);
InstanceData.LocalBoundsExtent = LoadInstancePayloadDataElement(Offsets.LocalBounds + 1).yzw;
2021-12-02 23:38:54 -05:00
}
else
{
InstanceData.LocalBoundsCenter = GetPrimitiveData(InstanceData.PrimitiveId).InstanceLocalBoundsCenter;
InstanceData.LocalBoundsExtent = GetPrimitiveData(InstanceData.PrimitiveId).InstanceLocalBoundsExtent;
}
BRANCH
2021-12-06 13:18:17 -05:00
if (Offsets.DynamicData != INVALID_INSTANCE_PAYLOAD_OFFSET)
2021-12-02 23:38:54 -05:00
{
2021-12-06 13:18:17 -05:00
#if INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
uint4 PrevRotationScale = asuint(LoadInstancePayloadDataElement(Offsets.DynamicData + 0));
float3 PrevTranslation = LoadInstancePayloadDataElement(Offsets.DynamicData + 1).xyz;
2021-12-03 14:00:27 -05:00
float3 PrevScale = 0;
float4x4 PrevLocalToRelativeWorld = DecodeTransform(PrevRotationScale, PrevTranslation, PrevScale);
2021-12-02 23:38:54 -05:00
#else
2021-12-06 13:18:17 -05:00
float4x4 PrevLocalToRelativeWorld = transpose(float4x4(LoadInstancePayloadDataElement(Offsets.DynamicData + 0),
LoadInstancePayloadDataElement(Offsets.DynamicData + 1),
LoadInstancePayloadDataElement(Offsets.DynamicData + 2),
2021-12-02 23:38:54 -05:00
float4(0.0f, 0.0f, 0.0f, 1.0f)));
#endif
2021-12-03 14:00:27 -05:00
float3 TilePosition = GetPrimitiveData(InstanceData.PrimitiveId).TilePosition;
InstanceData.PrevLocalToWorld = MakeLWCMatrix(TilePosition, PrevLocalToRelativeWorld);
2021-12-02 23:38:54 -05:00
}
else
{
2021-12-06 13:18:17 -05:00
#if INSTANCE_SCENE_DATA_COMPRESSED_TRANSFORMS
2021-12-02 23:52:53 -05:00
// TODO: Temporary PrevVelocityHack
2021-12-03 14:00:27 -05:00
uint4 PrevRotationScale = asuint(LoadInstanceSceneDataElement(3 * SOAStride + InstanceId));
float3 PrevTranslation = LoadInstanceSceneDataElement(4 * SOAStride + InstanceId).xyz;
2021-12-02 23:52:53 -05:00
float3 PrevScale = 0;
2021-12-03 14:00:27 -05:00
float4x4 PrevLocalToRelativeWorld = DecodeTransform(PrevRotationScale, PrevTranslation, PrevScale);
2021-12-02 23:52:53 -05:00
#else
2021-12-03 14:00:27 -05:00
float4x4 PrevLocalToRelativeWorld = InstanceData.LocalToWorld;
2021-12-02 23:52:53 -05:00
#endif
2021-12-03 14:00:27 -05:00
float3 TilePosition = GetPrimitiveData(InstanceData.PrimitiveId).TilePosition;
InstanceData.PrevLocalToWorld = MakeLWCMatrix(TilePosition, PrevLocalToRelativeWorld);
2021-12-02 23:38:54 -05:00
}
#if 1 //NEEDS_LIGHTMAP_COORDINATE
BRANCH
2021-12-06 13:18:17 -05:00
if (Offsets.LightShadowUVBias != INVALID_INSTANCE_PAYLOAD_OFFSET)
2021-12-02 23:38:54 -05:00
{
2021-12-06 13:18:17 -05:00
InstanceData.LightMapAndShadowMapUVBias = LoadInstancePayloadDataElement(Offsets.LightShadowUVBias);
2021-12-02 23:38:54 -05:00
}
#endif
2021-06-07 23:55:28 -04:00
}
return InstanceData;
}
2021-01-19 08:25:03 -04:00
struct FSceneDataIntermediates
{
uint PrimitiveId;
uint InstanceId;
uint ViewIndex;
// Index from which we load the instance info, needed for the
uint InstanceIdLoadIndex;
FInstanceSceneData InstanceData;
FPrimitiveSceneData Primitive;
};
/**
* Load scene data once given the inputs require.
* InstanceIdOffset - supplied as a vertex stream with 0 instance step rate (constant for all instances)
* DrawInstanceId - the instance ID (SV_InstanceID) in the current draw
*/
2021-08-07 07:20:52 -04:00
#if (VF_USE_PRIMITIVE_SCENE_DATA == 1)
2021-01-19 08:25:03 -04:00
FSceneDataIntermediates GetSceneDataIntermediates(uint InstanceIdOffset, uint DrawInstanceId)
{
FSceneDataIntermediates Intermediates = (FSceneDataIntermediates)0;
2021-08-07 07:20:52 -04:00
2021-01-19 08:25:03 -04:00
Intermediates.InstanceIdLoadIndex = InstanceIdOffset + DrawInstanceId;
2021-01-20 19:57:54 -04:00
// GPUCULL_TODO: workaround for the fact that DrawDynamicMeshPassPrivate et al. don't work with GPU-Scene instancing
// instead they mark the top bit in the primitive ID and disable auto instancing such that there is an 1:1:1
// drawcmd:primitive:instance. Then we can just look up the primitive and fetch the instance data index.
2021-01-28 12:24:41 -04:00
// GPUCULL_TODO: Workaround also used for ray tracing interfacing with the VFs, that also supply a DrawInstanceId.
2021-01-20 19:57:54 -04:00
// We mark the PrimitiveID with the top bit in dynamic draw passes
2021-01-28 12:24:41 -04:00
if ((InstanceIdOffset & VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG) != 0U)
2021-01-20 19:57:54 -04:00
{
2021-01-28 12:24:41 -04:00
// mask off the flag
uint PrimitiveID = InstanceIdOffset & (VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG - 1U);
2021-06-14 13:43:26 -04:00
Intermediates.InstanceId = GetPrimitiveData(PrimitiveID).InstanceSceneDataOffset + DrawInstanceId;
2021-01-20 19:57:54 -04:00
Intermediates.ViewIndex = 0;
}
else
{
2021-06-03 02:19:28 -04:00
Intermediates.InstanceId = InstanceCulling.InstanceIdsBuffer[InstanceIdOffset + DrawInstanceId] & ((1U << 28U) - 1);
2021-01-20 19:57:54 -04:00
// We store the view index (which can be used for instanced stereo or other multi-view in the top four bits of the instance ID)
// Note: this is an index of views for this render pass, not the view ID in the culling manager.
2021-06-03 02:19:28 -04:00
Intermediates.ViewIndex = InstanceCulling.InstanceIdsBuffer[InstanceIdOffset + DrawInstanceId] >> 28U;
2021-01-20 19:57:54 -04:00
}
2021-06-14 13:43:26 -04:00
Intermediates.InstanceData = GetInstanceSceneData(Intermediates.InstanceId, View.InstanceSceneDataSOAStride);
2021-01-19 08:25:03 -04:00
Intermediates.PrimitiveId = Intermediates.InstanceData.PrimitiveId;
Intermediates.Primitive = GetPrimitiveData(Intermediates.PrimitiveId);
2021-08-07 07:20:52 -04:00
2021-01-19 08:25:03 -04:00
return Intermediates;
}
2021-08-07 07:20:52 -04:00
#elif (VF_USE_PRIMITIVE_SCENE_DATA == 2)
2021-08-20 11:50:55 -04:00
// Must match PackLocalBoundsCenter and PackLocalBoundsExtent
float3 UnpackLocalBoundsCenter(float2 PackedCenter)
{
float3 Result = 0;
// uses 21 bits for each component, rounded to a 1 unit
const uint SPLIT_MASK = (1u << 11u) - 1u;
const float CenterBias = (1u << 20u) - 1u;
uint p0 = asuint(PackedCenter.x);
uint p1 = asuint(PackedCenter.y);
Result.x = float(p0 >> 11u) - CenterBias;
Result.y = float(p1 >> 11u) - CenterBias;
Result.z = float((p0 & SPLIT_MASK) | ((p1 & SPLIT_MASK) << 11u)) - CenterBias;
return Result;
}
float3 UnpackLocalBoundsExtent(float2 PackedExtent)
{
float3 Result = 0;
// uses 21 bits for each component, rounded to a 1 unit
const uint SPLIT_MASK = (1u << 11u) - 1u;
uint p0 = asuint(PackedExtent.x);
uint p1 = asuint(PackedExtent.y);
Result.x = float(p0 >> 11u);
Result.y = float(p1 >> 11u);
Result.z = float((p0 & SPLIT_MASK) | ((p1 & SPLIT_MASK) << 11u));
return Result;
}
FSceneDataIntermediates GetSceneDataIntermediates(uint DrawInstanceId, float4 InstanceOrigin, float4 InstanceTransform1, float4 InstanceTransform2, float4 InstanceTransform3, float4 InstanceAuxData)
2021-08-07 07:20:52 -04:00
{
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
// Not all mobile devices can access storage buffers from a vertex shaders
// we supply some of primtive data using per-instance vertex data, and rest of primitive data comes from Primitive UB
// If vertex shader uses any of Primitive UB data associated drawcalls will not auto-instance
FPrimitiveSceneData Primitive = GetPrimitiveDataFromUniformBuffer();
2021-10-29 02:17:50 -04:00
// TODO: add support for LWC, we should pack it as integer tile coordinates
2021-10-29 02:34:22 -04:00
float3 TilePosition = float3(0,0,0);
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
2021-08-07 07:20:52 -04:00
// TODO: pack important primitive and instance flags here
const uint PrimitiveFlags = asuint(InstanceTransform1.w);
const uint InstanceFlags = (PrimitiveFlags >> 16);
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
2021-08-07 07:20:52 -04:00
// Reconstruct InstanceData from a packed data
FInstanceSceneData InstanceData = (FInstanceSceneData)0;
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
float4x4 LocalToRelativeWorld = float4x4(
2021-08-07 07:20:52 -04:00
float4(InstanceTransform1.xyz, 0.0f),
float4(InstanceTransform2.xyz, 0.0f),
float4(InstanceTransform3.xyz, 0.0f),
float4(InstanceOrigin.xyz, 1.0f));
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
2021-10-29 02:34:22 -04:00
InstanceData.LocalToWorld = MakeLWCMatrix(TilePosition, LocalToRelativeWorld);
2021-01-19 08:25:03 -04:00
2021-08-07 07:20:52 -04:00
InstanceData.Flags = InstanceFlags;
InstanceData.PrimitiveId = asuint(InstanceOrigin.w);
2021-08-23 23:20:44 -04:00
2021-10-06 02:20:54 -04:00
#if USES_PER_INSTANCE_RANDOM
2021-09-02 20:30:59 -04:00
InstanceData.RandomID = InstanceTransform3.w;
2021-10-06 02:20:54 -04:00
#endif
2021-10-29 02:34:22 -04:00
ComputeInstanceDerivedData(InstanceData, TilePosition, LocalToRelativeWorld);
2021-08-23 23:20:44 -04:00
2021-08-07 07:20:52 -04:00
//
FSceneDataIntermediates Intermediates = (FSceneDataIntermediates)0;
Intermediates.InstanceData = InstanceData;
Intermediates.PrimitiveId = InstanceData.PrimitiveId;
Intermediates.InstanceId = 0;
Intermediates.ViewIndex = 0;
Intermediates.InstanceIdLoadIndex = 0;
Merging Dev-LWCRendering into Main, this includes initial work to support rendering with LWC-scale position
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin.
Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large.
General overview of changed files:
LargeWorldCoordinates.ush - This defines the FLWC types and operations
GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation
HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values
SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate
#jira UE-117101
#rb arne.schober, Michael.Galetzka
#ROBOMERGE-AUTHOR: ben.ingram
#ROBOMERGE-SOURCE: CL 17787435 in //UE5/Main/...
#ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v881-17767770)
[CL 17787478 by ben ingram in ue5-release-engine-test branch]
2021-10-12 13:31:00 -04:00
Intermediates.Primitive = Primitive;
2021-08-07 07:20:52 -04:00
// Primitive data that comes from per-instance vertex data
Intermediates.Primitive.Flags = PrimitiveFlags;
Intermediates.Primitive.LocalToWorld = Intermediates.InstanceData.LocalToWorld;
Intermediates.Primitive.InvNonUniformScale = Intermediates.InstanceData.InvNonUniformScale;
Intermediates.Primitive.WorldToLocal = Intermediates.InstanceData.WorldToLocal;
Intermediates.Primitive.NonUniformScale = Intermediates.InstanceData.NonUniformScale;
2021-08-20 11:50:55 -04:00
#if ALLOW_STATIC_LIGHTING
Intermediates.Primitive.LightmapDataIndex = asuint(InstanceTransform2.w);
InstanceData.LightMapAndShadowMapUVBias = float4(
UnpackSnorm2x16(asuint(InstanceAuxData.y)),
UnpackSnorm2x16(asuint(InstanceAuxData.w)));
#else
InstanceData.LocalBoundsCenter = UnpackLocalBoundsCenter(InstanceAuxData.xy);
InstanceData.LocalBoundsExtent = UnpackLocalBoundsExtent(InstanceAuxData.zw);
Intermediates.Primitive.LocalObjectBoundsMin = InstanceData.LocalBoundsCenter - InstanceData.LocalBoundsExtent;
Intermediates.Primitive.LocalObjectBoundsMax = InstanceData.LocalBoundsCenter + InstanceData.LocalBoundsExtent;
2021-10-29 02:17:50 -04:00
float3 ObjectRelativeWorldPosition = mul(float4(InstanceData.LocalBoundsCenter.xyz, 0.0f), LocalToRelativeWorld).xyz;
2021-10-29 02:34:22 -04:00
Intermediates.Primitive.ObjectWorldPosition = MakeLWCVector3(TilePosition, ObjectRelativeWorldPosition);
2021-10-29 02:17:50 -04:00
Intermediates.Primitive.ObjectRadius = length(InstanceData.LocalBoundsExtent * InstanceData.NonUniformScale.xyz);
2021-08-20 11:50:55 -04:00
#endif
2021-08-07 07:20:52 -04:00
return Intermediates;
}
#else
FSceneDataIntermediates GetSceneDataIntermediates()
{
FSceneDataIntermediates Intermediates = (FSceneDataIntermediates)0;
// Populate from Primitive uniform buffer
Intermediates.ViewIndex = 0U;
Intermediates.PrimitiveId = 0U;
Intermediates.InstanceId = 0U;
Intermediates.Primitive = GetPrimitiveDataFromUniformBuffer();
// Populate instance data from primitive data
Intermediates.InstanceData.LocalToWorld = Intermediates.Primitive.LocalToWorld;
Intermediates.InstanceData.PrevLocalToWorld = Intermediates.Primitive.PreviousLocalToWorld;
Intermediates.InstanceData.WorldToLocal = Intermediates.Primitive.WorldToLocal;
Intermediates.InstanceData.NonUniformScale = Intermediates.Primitive.NonUniformScale;
Intermediates.InstanceData.InvNonUniformScale = Intermediates.Primitive.InvNonUniformScale;
Intermediates.InstanceData.DeterminantSign = GetPrimitive_DeterminantSign_FromFlags(Intermediates.Primitive.Flags);
Intermediates.InstanceData.LocalBoundsCenter = (Intermediates.Primitive.LocalObjectBoundsMax + Intermediates.Primitive.LocalObjectBoundsMin) * 0.5f;
Intermediates.InstanceData.LocalBoundsExtent = (Intermediates.Primitive.LocalObjectBoundsMax - Intermediates.Primitive.LocalObjectBoundsMin) * 0.5f;
Intermediates.InstanceData.ValidInstance = true;
return Intermediates;
}
#endif //VF_USE_PRIMITIVE_SCENE_DATA