Files
UnrealEngineUWP/Engine/Source/Developer/MeshUtilities/Private/MeshRepresentationCommon.cpp

319 lines
11 KiB
C++
Raw Normal View History

// Copyright Epic Games, Inc. All Rights Reserved.
#include "MeshRepresentationCommon.h"
#include "MeshUtilities.h"
#include "MeshUtilitiesPrivate.h"
#include "DerivedMeshDataTaskUtils.h"
void MeshUtilities::GenerateStratifiedUniformHemisphereSamples(int32 NumSamples, FRandomStream& RandomStream, TArray<FVector3f>& Samples)
{
const int32 NumThetaSteps = FMath::TruncToInt(FMath::Sqrt(NumSamples / (2.0f * (float)PI)));
const int32 NumPhiSteps = FMath::TruncToInt(NumThetaSteps * (float)PI);
Samples.Empty(NumThetaSteps * NumPhiSteps);
for (int32 ThetaIndex = 0; ThetaIndex < NumThetaSteps; ThetaIndex++)
{
for (int32 PhiIndex = 0; PhiIndex < NumPhiSteps; PhiIndex++)
{
const float U1 = RandomStream.GetFraction();
const float U2 = RandomStream.GetFraction();
const float Fraction1 = (ThetaIndex + U1) / (float)NumThetaSteps;
const float Fraction2 = (PhiIndex + U2) / (float)NumPhiSteps;
const float R = FMath::Sqrt(1.0f - Fraction1 * Fraction1);
const float Phi = 2.0f * (float)PI * Fraction2;
// Convert to Cartesian
Samples.Add(FVector3f(FMath::Cos(Phi) * R, FMath::Sin(Phi) * R, Fraction1));
}
}
}
// [Frisvad 2012, "Building an Orthonormal Basis from a 3D Unit Vector Without Normalization"]
FMatrix44f MeshRepresentation::GetTangentBasisFrisvad(FVector3f TangentZ)
{
FVector3f TangentX;
FVector3f TangentY;
if (TangentZ.Z < -0.9999999f)
{
TangentX = FVector3f(0, -1, 0);
TangentY = FVector3f(-1, 0, 0);
}
else
{
float A = 1.0f / (1.0f + TangentZ.Z);
float B = -TangentZ.X * TangentZ.Y * A;
TangentX = FVector3f(1.0f - TangentZ.X * TangentZ.X * A, B, -TangentZ.X);
TangentY = FVector3f(B, 1.0f - TangentZ.Y * TangentZ.Y * A, -TangentZ.Y);
}
FMatrix44f LocalBasis;
LocalBasis.SetIdentity();
LocalBasis.SetAxis(0, TangentX);
LocalBasis.SetAxis(1, TangentY);
LocalBasis.SetAxis(2, TangentZ);
return LocalBasis;
}
#if USE_EMBREE
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
void EmbreeFilterFunc(const struct RTCFilterFunctionNArguments* args)
{
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
FEmbreeGeometry* EmbreeGeometry = (FEmbreeGeometry*)args->geometryUserPtr;
FEmbreeTriangleDesc Desc = EmbreeGeometry->TriangleDescs[RTCHitN_primID(args->hit, 1, 0)];
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
FEmbreeIntersectionContext& IntersectionContext = *static_cast<FEmbreeIntersectionContext*>(args->context);
IntersectionContext.ElementIndex = Desc.ElementIndex;
}
void EmbreeErrorFunc(void* userPtr, RTCError code, const char* str)
{
FString ErrorString;
TArray<TCHAR, FString::AllocatorType>& ErrorStringArray = ErrorString.GetCharArray();
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
ErrorStringArray.Empty();
int32 StrLen = FCStringAnsi::Strlen(str);
int32 Length = FUTF8ToTCHAR_Convert::ConvertedLength(str, StrLen);
ErrorStringArray.AddUninitialized(Length + 1); // +1 for the null terminator
FUTF8ToTCHAR_Convert::Convert(ErrorStringArray.GetData(), ErrorStringArray.Num(), reinterpret_cast<const ANSICHAR*>(str), StrLen);
ErrorStringArray[Length] = TEXT('\0');
UE_LOG(LogMeshUtilities, Error, TEXT("Embree error: %s Code=%u"), *ErrorString, (uint32)code);
}
#endif
void MeshRepresentation::SetupEmbreeScene(
FString MeshName,
const FSourceMeshDataForDerivedDataTask& SourceMeshData,
const FStaticMeshLODResources& LODModel,
const TArray<FSignedDistanceFieldBuildMaterialData>& MaterialBlendModes,
bool bGenerateAsIfTwoSided,
FEmbreeScene& EmbreeScene)
{
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
const uint32 NumIndices = SourceMeshData.IsValid() ? SourceMeshData.GetNumIndices() : LODModel.IndexBuffer.GetNumIndices();
const int32 NumTriangles = NumIndices / 3;
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
const uint32 NumVertices = SourceMeshData.IsValid() ? SourceMeshData.GetNumVertices() : LODModel.VertexBuffers.PositionVertexBuffer.GetNumVertices();
EmbreeScene.NumIndices = NumTriangles;
TArray<FkDOPBuildCollisionTriangle<uint32> > BuildTriangles;
#if USE_EMBREE
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
EmbreeScene.bUseEmbree = true;
if (EmbreeScene.bUseEmbree)
{
EmbreeScene.EmbreeDevice = rtcNewDevice(nullptr);
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
rtcSetDeviceErrorFunction(EmbreeScene.EmbreeDevice, EmbreeErrorFunc, nullptr);
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
RTCError ReturnErrorNewDevice = rtcGetDeviceError(EmbreeScene.EmbreeDevice);
if (ReturnErrorNewDevice != RTC_ERROR_NONE)
{
UE_LOG(LogMeshUtilities, Warning, TEXT("GenerateSignedDistanceFieldVolumeData failed for %s. Embree rtcNewDevice failed. Code: %d"), *MeshName, (int32)ReturnErrorNewDevice);
return;
}
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
EmbreeScene.EmbreeScene = rtcNewScene(EmbreeScene.EmbreeDevice);
rtcSetSceneFlags(EmbreeScene.EmbreeScene, RTC_SCENE_FLAG_NONE);
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
RTCError ReturnErrorNewScene = rtcGetDeviceError(EmbreeScene.EmbreeDevice);
if (ReturnErrorNewScene != RTC_ERROR_NONE)
{
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
UE_LOG(LogMeshUtilities, Warning, TEXT("GenerateSignedDistanceFieldVolumeData failed for %s. Embree rtcNewScene failed. Code: %d"), *MeshName, (int32)ReturnErrorNewScene);
rtcReleaseDevice(EmbreeScene.EmbreeDevice);
return;
}
}
#endif
TArray<int32> FilteredTriangles;
FilteredTriangles.Empty(NumTriangles);
if (SourceMeshData.IsValid())
{
for (int32 TriangleIndex = 0; TriangleIndex < NumTriangles; ++TriangleIndex)
{
const uint32 I0 = SourceMeshData.TriangleIndices[TriangleIndex * 3 + 0];
const uint32 I1 = SourceMeshData.TriangleIndices[TriangleIndex * 3 + 1];
const uint32 I2 = SourceMeshData.TriangleIndices[TriangleIndex * 3 + 2];
const FVector3f V0 = SourceMeshData.VertexPositions[I0];
const FVector3f V1 = SourceMeshData.VertexPositions[I1];
const FVector3f V2 = SourceMeshData.VertexPositions[I2];
const FVector3f TriangleNormal = ((V1 - V2) ^ (V0 - V2));
const bool bDegenerateTriangle = TriangleNormal.SizeSquared() < SMALL_NUMBER;
if (!bDegenerateTriangle)
{
FilteredTriangles.Add(TriangleIndex);
}
}
}
else
{
for (int32 TriangleIndex = 0; TriangleIndex < NumTriangles; ++TriangleIndex)
{
const FIndexArrayView Indices = LODModel.IndexBuffer.GetArrayView();
const uint32 I0 = Indices[TriangleIndex * 3 + 0];
const uint32 I1 = Indices[TriangleIndex * 3 + 1];
const uint32 I2 = Indices[TriangleIndex * 3 + 2];
const FVector3f V0 = LODModel.VertexBuffers.PositionVertexBuffer.VertexPosition(I0);
const FVector3f V1 = LODModel.VertexBuffers.PositionVertexBuffer.VertexPosition(I1);
const FVector3f V2 = LODModel.VertexBuffers.PositionVertexBuffer.VertexPosition(I2);
const FVector3f TriangleNormal = ((V1 - V2) ^ (V0 - V2));
const bool bDegenerateTriangle = TriangleNormal.SizeSquared() < SMALL_NUMBER;
if (!bDegenerateTriangle)
{
bool bTriangleIsOpaqueOrMasked = false;
for (int32 SectionIndex = 0; SectionIndex < LODModel.Sections.Num(); SectionIndex++)
{
const FStaticMeshSection& Section = LODModel.Sections[SectionIndex];
if ((uint32)(TriangleIndex * 3) >= Section.FirstIndex && (uint32)(TriangleIndex * 3) < Section.FirstIndex + Section.NumTriangles * 3)
{
if (MaterialBlendModes.IsValidIndex(Section.MaterialIndex))
{
bTriangleIsOpaqueOrMasked = !IsTranslucentBlendMode(MaterialBlendModes[Section.MaterialIndex].BlendMode);
}
break;
}
}
if (bTriangleIsOpaqueOrMasked)
{
FilteredTriangles.Add(TriangleIndex);
}
}
}
}
const int32 NumBufferVerts = 1; // Reserve extra space at the end of the array, as embree has an internal bug where they read and discard 4 bytes off the end of the array
EmbreeScene.Geometry.VertexArray.Empty(NumVertices + NumBufferVerts);
EmbreeScene.Geometry.VertexArray.AddUninitialized(NumVertices + NumBufferVerts);
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
const int32 NumFilteredIndices = FilteredTriangles.Num() * 3;
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
EmbreeScene.Geometry.IndexArray.Empty(NumFilteredIndices);
EmbreeScene.Geometry.IndexArray.AddUninitialized(NumFilteredIndices);
FVector3f* EmbreeVertices = EmbreeScene.Geometry.VertexArray.GetData();
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
uint32* EmbreeIndices = EmbreeScene.Geometry.IndexArray.GetData();
EmbreeScene.Geometry.TriangleDescs.Empty(FilteredTriangles.Num());
for (int32 FilteredTriangleIndex = 0; FilteredTriangleIndex < FilteredTriangles.Num(); FilteredTriangleIndex++)
{
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
uint32 I0, I1, I2;
FVector3f V0, V1, V2;
const int32 TriangleIndex = FilteredTriangles[FilteredTriangleIndex];
if (SourceMeshData.IsValid())
{
I0 = SourceMeshData.TriangleIndices[TriangleIndex * 3 + 0];
I1 = SourceMeshData.TriangleIndices[TriangleIndex * 3 + 1];
I2 = SourceMeshData.TriangleIndices[TriangleIndex * 3 + 2];
V0 = SourceMeshData.VertexPositions[I0];
V1 = SourceMeshData.VertexPositions[I1];
V2 = SourceMeshData.VertexPositions[I2];
}
else
{
const FIndexArrayView Indices = LODModel.IndexBuffer.GetArrayView();
I0 = Indices[TriangleIndex * 3 + 0];
I1 = Indices[TriangleIndex * 3 + 1];
I2 = Indices[TriangleIndex * 3 + 2];
V0 = LODModel.VertexBuffers.PositionVertexBuffer.VertexPosition(I0);
V1 = LODModel.VertexBuffers.PositionVertexBuffer.VertexPosition(I1);
V2 = LODModel.VertexBuffers.PositionVertexBuffer.VertexPosition(I2);
}
bool bTriangleIsTwoSided = false;
for (int32 SectionIndex = 0; SectionIndex < LODModel.Sections.Num(); SectionIndex++)
{
const FStaticMeshSection& Section = LODModel.Sections[SectionIndex];
if ((uint32)(TriangleIndex * 3) >= Section.FirstIndex && (uint32)(TriangleIndex * 3) < Section.FirstIndex + Section.NumTriangles * 3)
{
if (MaterialBlendModes.IsValidIndex(Section.MaterialIndex))
{
bTriangleIsTwoSided = MaterialBlendModes[Section.MaterialIndex].bTwoSided;
}
break;
}
}
if (EmbreeScene.bUseEmbree)
{
EmbreeIndices[FilteredTriangleIndex * 3 + 0] = I0;
EmbreeIndices[FilteredTriangleIndex * 3 + 1] = I1;
EmbreeIndices[FilteredTriangleIndex * 3 + 2] = I2;
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
EmbreeVertices[I0] = V0;
EmbreeVertices[I1] = V1;
EmbreeVertices[I2] = V2;
FEmbreeTriangleDesc Desc;
// Store bGenerateAsIfTwoSided in material index
Desc.ElementIndex = bGenerateAsIfTwoSided || bTriangleIsTwoSided ? 1 : 0;
EmbreeScene.Geometry.TriangleDescs.Add(Desc);
}
else
{
BuildTriangles.Add(FkDOPBuildCollisionTriangle<uint32>(
// Store bGenerateAsIfTwoSided in material index
bGenerateAsIfTwoSided || bTriangleIsTwoSided ? 1 : 0,
V0,
V1,
V2));
}
}
#if USE_EMBREE
if (EmbreeScene.bUseEmbree)
{
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
RTCGeometry Geometry = rtcNewGeometry(EmbreeScene.EmbreeDevice, RTC_GEOMETRY_TYPE_TRIANGLE);
EmbreeScene.Geometry.InternalGeometry = Geometry;
rtcSetSharedGeometryBuffer(Geometry, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, EmbreeVertices, 0, sizeof(FVector3f), NumVertices);
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
rtcSetSharedGeometryBuffer(Geometry, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, EmbreeIndices, 0, sizeof(uint32) * 3, FilteredTriangles.Num());
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
rtcSetGeometryUserData(Geometry, &EmbreeScene.Geometry);
rtcSetGeometryIntersectFilterFunction(Geometry, EmbreeFilterFunc);
rtcCommitGeometry(Geometry);
rtcAttachGeometry(EmbreeScene.EmbreeScene, Geometry);
rtcReleaseGeometry(Geometry);
rtcCommitScene(EmbreeScene.EmbreeScene);
RTCError ReturnError = rtcGetDeviceError(EmbreeScene.EmbreeDevice);
if (ReturnError != RTC_ERROR_NONE)
{
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
UE_LOG(LogMeshUtilities, Warning, TEXT("GenerateSignedDistanceFieldVolumeData failed for %s. Embree rtcCommitScene failed. Code: %d"), *MeshName, (int32)ReturnError);
return;
}
}
else
#endif
{
EmbreeScene.kDopTree.Build(BuildTriangles);
}
}
void MeshRepresentation::DeleteEmbreeScene(FEmbreeScene& EmbreeScene)
{
#if USE_EMBREE
if (EmbreeScene.bUseEmbree)
{
Sparse, narrow band, streamed Mesh Signed Distance Fields * SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding). * Tracing must load the brick index from the indirection table, and only bricks near the surface are stored * 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed * SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface * The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy * Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator * Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances * Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases * Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2 * Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead * Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread * Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation * Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects * Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup * Upgraded mesh utilities to Embree 3.12.2 to get point queries * Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation * Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key. Original costs on 1080 GTX (full updates on everything and no screen traces) 10.60ms UpdateGlobalDistanceField 3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 1.73ms VoxelizeCards Clipmaps=[0,1,2,3] 0.38ms TraceCards 1 dispatch 1 groups 0.51ms TraceCards 1 dispatch 1 groups Sparse SDF costs 12.06ms UpdateGlobalDistanceField 4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1 2.30ms VoxelizeCards Clipmaps=[0,1,2,3] 0.69ms TraceCards 1 dispatch 1 groups 0.77ms TraceCards 1 dispatch 1 groups Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed #rb Krzysztof.Narkowicz #ROBOMERGE-OWNER: Daniel.Wright #ROBOMERGE-AUTHOR: daniel.wright #ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/... #ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269) #ROBOMERGE-CONFLICT from-shelf [CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
rtcReleaseScene(EmbreeScene.EmbreeScene);
rtcReleaseDevice(EmbreeScene.EmbreeDevice);
}
#endif
}