2020-07-06 18:58:26 -04:00
|
|
|
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
|
|
|
|
|
|
#include "MeshUtilities.h"
|
|
|
|
|
#include "MeshUtilitiesPrivate.h"
|
|
|
|
|
#include "Components/StaticMeshComponent.h"
|
|
|
|
|
#include "Engine/StaticMesh.h"
|
|
|
|
|
#include "Materials/Material.h"
|
|
|
|
|
#include "RawMesh.h"
|
|
|
|
|
#include "StaticMeshResources.h"
|
|
|
|
|
#include "MeshCardRepresentation.h"
|
|
|
|
|
#include "DistanceFieldAtlas.h"
|
2021-01-20 11:34:55 -04:00
|
|
|
#include "MeshRepresentationCommon.h"
|
2020-07-06 18:58:26 -04:00
|
|
|
|
|
|
|
|
class FGenerateCardMeshContext
|
|
|
|
|
{
|
|
|
|
|
public:
|
|
|
|
|
const FString& MeshName;
|
|
|
|
|
RTCScene FullMeshEmbreeScene;
|
|
|
|
|
RTCDevice EmbreeDevice;
|
|
|
|
|
FCardRepresentationData& OutData;
|
|
|
|
|
|
2020-09-22 23:04:05 -04:00
|
|
|
FGenerateCardMeshContext(const FString& InMeshName, RTCScene InEmbreeScene, RTCDevice InEmbreeDevice, FCardRepresentationData& InOutData) :
|
2020-07-06 18:58:26 -04:00
|
|
|
MeshName(InMeshName),
|
|
|
|
|
FullMeshEmbreeScene(InEmbreeScene),
|
|
|
|
|
EmbreeDevice(InEmbreeDevice),
|
|
|
|
|
OutData(InOutData)
|
|
|
|
|
{}
|
|
|
|
|
};
|
|
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
class FPlacedCard
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
|
|
|
|
public:
|
2021-01-20 11:34:55 -04:00
|
|
|
int32 SliceMin;
|
|
|
|
|
int32 SliceMax;
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
float NearPlane;
|
|
|
|
|
float FarPlane;
|
|
|
|
|
FBox Bounds;
|
|
|
|
|
int32 NumHits;
|
2020-07-06 18:58:26 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#if USE_EMBREE
|
|
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
bool IsSurfacePointInsideMesh(const RTCScene& FullMeshEmbreeScene, FVector SurfacePoint, FVector SurfaceNormal, const TArray<FVector4>& RayDirectionsOverHemisphere)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
2021-01-20 11:34:55 -04:00
|
|
|
uint32 NumHits = 0;
|
|
|
|
|
uint32 NumBackFaceHits = 0;
|
|
|
|
|
|
|
|
|
|
const FMatrix SurfaceBasis = MeshRepresentation::GetTangentBasisFrisvad(SurfaceNormal);
|
|
|
|
|
|
|
|
|
|
for (int32 SampleIndex = 0; SampleIndex < RayDirectionsOverHemisphere.Num(); ++SampleIndex)
|
|
|
|
|
{
|
|
|
|
|
FVector RayDirection = SurfaceBasis.TransformVector(RayDirectionsOverHemisphere[SampleIndex]);
|
|
|
|
|
|
|
|
|
|
FEmbreeRay EmbreeRay;
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
EmbreeRay.ray.org_x = SurfacePoint.X;
|
|
|
|
|
EmbreeRay.ray.org_y = SurfacePoint.Y;
|
|
|
|
|
EmbreeRay.ray.org_z = SurfacePoint.Z;
|
|
|
|
|
EmbreeRay.ray.dir_x = RayDirection.X;
|
|
|
|
|
EmbreeRay.ray.dir_y = RayDirection.Y;
|
|
|
|
|
EmbreeRay.ray.dir_z = RayDirection.Z;
|
|
|
|
|
EmbreeRay.ray.tnear = 0.1f;
|
|
|
|
|
EmbreeRay.ray.tfar = FLT_MAX;
|
2021-01-20 11:34:55 -04:00
|
|
|
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
FEmbreeIntersectionContext EmbreeContext;
|
|
|
|
|
rtcInitIntersectContext(&EmbreeContext);
|
|
|
|
|
rtcIntersect1(FullMeshEmbreeScene, &EmbreeContext, &EmbreeRay);
|
2021-01-20 11:34:55 -04:00
|
|
|
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
if (EmbreeRay.hit.geomID != RTC_INVALID_GEOMETRY_ID && EmbreeRay.hit.primID != RTC_INVALID_GEOMETRY_ID)
|
2021-01-20 11:34:55 -04:00
|
|
|
{
|
|
|
|
|
++NumHits;
|
|
|
|
|
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
if (FVector::DotProduct(RayDirection, EmbreeRay.GetHitNormal()) > 0.0f && !EmbreeContext.IsHitTwoSided())
|
2021-01-20 11:34:55 -04:00
|
|
|
{
|
|
|
|
|
++NumBackFaceHits;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (NumHits > 0 && NumBackFaceHits > RayDirectionsOverHemisphere.Num() * 0.4f)
|
|
|
|
|
{
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct FSurfacePoint
|
|
|
|
|
{
|
|
|
|
|
float MinT;
|
|
|
|
|
float HitT;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
int32 UpdatePlacedCards(TArray<FPlacedCard, TInlineAllocator<16>>& PlacedCards,
|
|
|
|
|
FVector RayOriginFrame,
|
|
|
|
|
FVector RayDirection,
|
|
|
|
|
FVector HeighfieldStepX,
|
|
|
|
|
FVector HeighfieldStepY,
|
|
|
|
|
FIntPoint HeighfieldSize,
|
|
|
|
|
int32 MeshSliceNum,
|
|
|
|
|
float MaxRayT,
|
|
|
|
|
int32 MinCardHits,
|
|
|
|
|
FVector VoxelExtent,
|
|
|
|
|
const TArray<TArray<FSurfacePoint, TInlineAllocator<16>>>& HeightfieldLayers)
|
|
|
|
|
{
|
|
|
|
|
for (int32 PlacedCardIndex = 0; PlacedCardIndex < PlacedCards.Num(); ++PlacedCardIndex)
|
|
|
|
|
{
|
|
|
|
|
FPlacedCard& PlacedCard = PlacedCards[PlacedCardIndex];
|
|
|
|
|
PlacedCard.NearPlane = PlacedCard.SliceMin / float(MeshSliceNum) * MaxRayT;
|
|
|
|
|
PlacedCard.FarPlane = (PlacedCard.SliceMax / float(MeshSliceNum)) * MaxRayT;
|
|
|
|
|
PlacedCard.Bounds.Init();
|
|
|
|
|
PlacedCard.NumHits = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int32 HeighfieldY = 0; HeighfieldY < HeighfieldSize.Y; ++HeighfieldY)
|
|
|
|
|
{
|
|
|
|
|
for (int32 HeighfieldX = 0; HeighfieldX < HeighfieldSize.X; ++HeighfieldX)
|
|
|
|
|
{
|
|
|
|
|
const int32 HeightfieldLinearIndex = HeighfieldX + HeighfieldY * HeighfieldSize.X;
|
|
|
|
|
|
|
|
|
|
FVector RayOrigin = RayOriginFrame;
|
|
|
|
|
RayOrigin += (HeighfieldX + 0.5f) * HeighfieldStepX;
|
|
|
|
|
RayOrigin += (HeighfieldY + 0.5f) * HeighfieldStepY;
|
|
|
|
|
|
|
|
|
|
int32 LayerIndex = 0;
|
|
|
|
|
int32 PlacedCardIndex = 0;
|
|
|
|
|
|
|
|
|
|
while (LayerIndex < HeightfieldLayers[HeightfieldLinearIndex].Num() && PlacedCardIndex < PlacedCards.Num())
|
|
|
|
|
{
|
|
|
|
|
const FSurfacePoint& SurfacePoint = HeightfieldLayers[HeightfieldLinearIndex][LayerIndex];
|
|
|
|
|
FPlacedCard& PlacedCard = PlacedCards[PlacedCardIndex];
|
|
|
|
|
|
|
|
|
|
if (SurfacePoint.HitT >= PlacedCard.NearPlane && SurfacePoint.HitT <= PlacedCard.FarPlane
|
|
|
|
|
&& SurfacePoint.MinT <= PlacedCard.NearPlane)
|
|
|
|
|
{
|
|
|
|
|
PlacedCard.NumHits += 1;
|
|
|
|
|
PlacedCard.Bounds += RayOrigin + SurfacePoint.HitT * RayDirection - VoxelExtent;
|
|
|
|
|
PlacedCard.Bounds += RayOrigin + SurfacePoint.HitT * RayDirection + VoxelExtent;
|
|
|
|
|
|
|
|
|
|
++PlacedCardIndex;
|
|
|
|
|
++LayerIndex;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (SurfacePoint.HitT >= PlacedCard.FarPlane)
|
|
|
|
|
{
|
|
|
|
|
++PlacedCardIndex;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
++LayerIndex;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int32 NumMeshHits = 0;
|
|
|
|
|
for (int32 PlacedCardIndex = 0; PlacedCardIndex < PlacedCards.Num(); ++PlacedCardIndex)
|
|
|
|
|
{
|
|
|
|
|
const FPlacedCard& PlacedCard = PlacedCards[PlacedCardIndex];
|
|
|
|
|
if (PlacedCard.NumHits >= MinCardHits)
|
|
|
|
|
{
|
|
|
|
|
NumMeshHits += PlacedCard.NumHits;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NumMeshHits;
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-18 09:38:36 -04:00
|
|
|
void SerializePlacedCards(TArrayView<const FPlacedCard> PlacedCards,
|
2021-01-20 11:34:55 -04:00
|
|
|
int32 LODLevel,
|
|
|
|
|
int32 Orientation,
|
|
|
|
|
int32 MinCardHits,
|
2021-02-11 17:59:41 -04:00
|
|
|
const FBox& MeshCardsBounds,
|
2021-01-20 11:34:55 -04:00
|
|
|
FCardRepresentationData& OutData)
|
|
|
|
|
{
|
|
|
|
|
for (int32 PlacedCardIndex = 0; PlacedCardIndex < PlacedCards.Num(); ++PlacedCardIndex)
|
|
|
|
|
{
|
|
|
|
|
const FPlacedCard& PlacedCard = PlacedCards[PlacedCardIndex];
|
|
|
|
|
if (PlacedCard.NumHits >= MinCardHits)
|
|
|
|
|
{
|
2021-02-11 17:59:41 -04:00
|
|
|
const FBox ClampedBox = PlacedCard.Bounds.Overlap(MeshCardsBounds);
|
|
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
FLumenCardBuildData CardBuildData;
|
2021-02-11 17:59:41 -04:00
|
|
|
CardBuildData.Center = ClampedBox.GetCenter();
|
|
|
|
|
CardBuildData.Extent = ClampedBox.GetExtent();
|
2021-02-22 18:04:23 -04:00
|
|
|
CardBuildData.Extent = FLumenCardBuildData::TransformFaceExtent(CardBuildData.Extent, Orientation);
|
2021-01-20 11:34:55 -04:00
|
|
|
CardBuildData.Orientation = Orientation;
|
|
|
|
|
CardBuildData.LODLevel = LODLevel;
|
|
|
|
|
|
|
|
|
|
OutData.MeshCardsBuildData.CardBuildData.Add(CardBuildData);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-23 01:26:01 -04:00
|
|
|
void BuildMeshCards(const FBox& MeshBounds, const FGenerateCardMeshContext& Context, FCardRepresentationData& OutData)
|
2021-01-20 11:34:55 -04:00
|
|
|
{
|
|
|
|
|
static const auto CVarMeshCardRepresentationMinSurface = IConsoleManager::Get().FindTConsoleVariableDataFloat(TEXT("r.MeshCardRepresentation.MinSurface"));
|
|
|
|
|
const float MinSurfaceThreshold = CVarMeshCardRepresentationMinSurface->GetValueOnAnyThread();
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-23 01:26:01 -04:00
|
|
|
// Make sure BBox isn't empty and we can generate card representation for it. This handles e.g. infinitely thin planes.
|
2021-02-11 17:59:41 -04:00
|
|
|
const FVector MeshCardsBoundsCenter = MeshBounds.GetCenter();
|
|
|
|
|
const FVector MeshCardsBoundsExtent = FVector::Max(MeshBounds.GetExtent() + 1.0f, FVector(5.0f));
|
|
|
|
|
const FBox MeshCardsBounds(MeshCardsBoundsCenter - MeshCardsBoundsExtent, MeshCardsBoundsCenter + MeshCardsBoundsExtent);
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
OutData.MeshCardsBuildData.Bounds = MeshCardsBounds;
|
|
|
|
|
OutData.MeshCardsBuildData.MaxLODLevel = 1;
|
|
|
|
|
OutData.MeshCardsBuildData.CardBuildData.Reset();
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
const float SamplesPerWorldUnit = 1.0f / 10.0f;
|
|
|
|
|
const int32 MinSamplesPerAxis = 4;
|
|
|
|
|
const int32 MaxSamplesPerAxis = 64;
|
|
|
|
|
FIntVector VolumeSizeInVoxels;
|
|
|
|
|
VolumeSizeInVoxels.X = FMath::Clamp<int32>(MeshCardsBounds.GetSize().X * SamplesPerWorldUnit, MinSamplesPerAxis, MaxSamplesPerAxis);
|
|
|
|
|
VolumeSizeInVoxels.Y = FMath::Clamp<int32>(MeshCardsBounds.GetSize().Y * SamplesPerWorldUnit, MinSamplesPerAxis, MaxSamplesPerAxis);
|
|
|
|
|
VolumeSizeInVoxels.Z = FMath::Clamp<int32>(MeshCardsBounds.GetSize().Z * SamplesPerWorldUnit, MinSamplesPerAxis, MaxSamplesPerAxis);
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
const FVector VoxelExtent = MeshCardsBounds.GetSize() / FVector(VolumeSizeInVoxels);
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
// Generate random ray directions over a hemisphere
|
|
|
|
|
TArray<FVector4> RayDirectionsOverHemisphere;
|
|
|
|
|
{
|
|
|
|
|
FRandomStream RandomStream(0);
|
|
|
|
|
MeshUtilities::GenerateStratifiedUniformHemisphereSamples(64, RandomStream, RayDirectionsOverHemisphere);
|
|
|
|
|
}
|
2021-03-18 09:38:36 -04:00
|
|
|
|
|
|
|
|
using FPlacedCardArray = TArray<FPlacedCard, TInlineAllocator<16>>;
|
|
|
|
|
struct FTaskOutputs
|
|
|
|
|
{
|
|
|
|
|
FPlacedCardArray PlacedCardsPerLod[2];
|
|
|
|
|
float MinCardHitsPerLod[2] = {};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
FTaskOutputs TaskOutputsPerOrientation[6];
|
|
|
|
|
|
|
|
|
|
ParallelFor(6, [VolumeSizeInVoxels, MinSurfaceThreshold, VoxelExtent,
|
|
|
|
|
&Context = AsConst(Context),
|
|
|
|
|
&RayDirectionsOverHemisphere = AsConst(RayDirectionsOverHemisphere),
|
|
|
|
|
&MeshCardsBounds = AsConst(MeshCardsBounds),
|
|
|
|
|
&TaskOutputsPerOrientation
|
|
|
|
|
] (int32 Orientation)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
|
|
|
|
FIntPoint HeighfieldSize(0, 0);
|
|
|
|
|
FVector RayDirection(0.0f, 0.0f, 0.0f);
|
2021-01-20 11:34:55 -04:00
|
|
|
FVector RayOriginFrame = MeshCardsBounds.Min;
|
2020-07-06 18:58:26 -04:00
|
|
|
FVector HeighfieldStepX(0.0f, 0.0f, 0.0f);
|
|
|
|
|
FVector HeighfieldStepY(0.0f, 0.0f, 0.0f);
|
2021-01-20 11:34:55 -04:00
|
|
|
float MaxRayT = 0.0f;
|
|
|
|
|
int32 MeshSliceNum = 0;
|
|
|
|
|
|
|
|
|
|
switch (Orientation / 2)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
MaxRayT = MeshCardsBounds.GetSize().X + 0.1f;
|
|
|
|
|
MeshSliceNum = VolumeSizeInVoxels.X;
|
|
|
|
|
HeighfieldSize.X = VolumeSizeInVoxels.Y;
|
|
|
|
|
HeighfieldSize.Y = VolumeSizeInVoxels.Z;
|
2021-02-11 17:59:41 -04:00
|
|
|
HeighfieldStepX = FVector(0.0f, MeshCardsBounds.GetSize().Y / HeighfieldSize.X, 0.0f);
|
|
|
|
|
HeighfieldStepY = FVector(0.0f, 0.0f, MeshCardsBounds.GetSize().Z / HeighfieldSize.Y);
|
2021-01-20 11:34:55 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
|
MaxRayT = MeshCardsBounds.GetSize().Y + 0.1f;
|
|
|
|
|
MeshSliceNum = VolumeSizeInVoxels.Y;
|
|
|
|
|
HeighfieldSize.X = VolumeSizeInVoxels.X;
|
|
|
|
|
HeighfieldSize.Y = VolumeSizeInVoxels.Z;
|
2021-02-11 17:59:41 -04:00
|
|
|
HeighfieldStepX = FVector(MeshCardsBounds.GetSize().X / HeighfieldSize.X, 0.0f, 0.0f);
|
|
|
|
|
HeighfieldStepY = FVector(0.0f, 0.0f, MeshCardsBounds.GetSize().Z / HeighfieldSize.Y);
|
2021-01-20 11:34:55 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
MaxRayT = MeshCardsBounds.GetSize().Z + 0.1f;
|
|
|
|
|
MeshSliceNum = VolumeSizeInVoxels.Z;
|
|
|
|
|
HeighfieldSize.X = VolumeSizeInVoxels.X;
|
|
|
|
|
HeighfieldSize.Y = VolumeSizeInVoxels.Y;
|
2021-02-11 17:59:41 -04:00
|
|
|
HeighfieldStepX = FVector(MeshCardsBounds.GetSize().X / HeighfieldSize.X, 0.0f, 0.0f);
|
|
|
|
|
HeighfieldStepY = FVector(0.0f, MeshCardsBounds.GetSize().Y / HeighfieldSize.Y, 0.0f);
|
2021-01-20 11:34:55 -04:00
|
|
|
break;
|
|
|
|
|
}
|
2020-07-06 18:58:26 -04:00
|
|
|
|
|
|
|
|
switch (Orientation)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
RayDirection.X = +1.0f;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
|
RayDirection.X = -1.0f;
|
2021-01-20 11:34:55 -04:00
|
|
|
RayOriginFrame.X = MeshCardsBounds.Max.X;
|
2020-07-06 18:58:26 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
RayDirection.Y = +1.0f;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 3:
|
|
|
|
|
RayDirection.Y = -1.0f;
|
2021-01-20 11:34:55 -04:00
|
|
|
RayOriginFrame.Y = MeshCardsBounds.Max.Y;
|
2020-07-06 18:58:26 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 4:
|
|
|
|
|
RayDirection.Z = +1.0f;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 5:
|
|
|
|
|
RayDirection.Z = -1.0f;
|
2021-01-20 11:34:55 -04:00
|
|
|
RayOriginFrame.Z = MeshCardsBounds.Max.Z;
|
2020-07-06 18:58:26 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
check(false);
|
|
|
|
|
};
|
|
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
TArray<TArray<FSurfacePoint, TInlineAllocator<16>>> HeightfieldLayers;
|
|
|
|
|
HeightfieldLayers.SetNum(HeighfieldSize.X * HeighfieldSize.Y);
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
// Fill surface points
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
2021-01-20 11:34:55 -04:00
|
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(FillSurfacePoints);
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
TArray<float> Heightfield;
|
|
|
|
|
Heightfield.SetNum(HeighfieldSize.X * HeighfieldSize.Y);
|
|
|
|
|
for (int32 HeighfieldY = 0; HeighfieldY < HeighfieldSize.Y; ++HeighfieldY)
|
|
|
|
|
{
|
|
|
|
|
for (int32 HeighfieldX = 0; HeighfieldX < HeighfieldSize.X; ++HeighfieldX)
|
|
|
|
|
{
|
|
|
|
|
Heightfield[HeighfieldX + HeighfieldY * HeighfieldSize.X] = -1.0f;
|
2020-07-06 18:58:26 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int32 HeighfieldY = 0; HeighfieldY < HeighfieldSize.Y; ++HeighfieldY)
|
|
|
|
|
{
|
|
|
|
|
for (int32 HeighfieldX = 0; HeighfieldX < HeighfieldSize.X; ++HeighfieldX)
|
|
|
|
|
{
|
2021-01-20 11:34:55 -04:00
|
|
|
FVector RayOrigin = RayOriginFrame;
|
|
|
|
|
RayOrigin += (HeighfieldX + 0.5f) * HeighfieldStepX;
|
|
|
|
|
RayOrigin += (HeighfieldY + 0.5f) * HeighfieldStepY;
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
float StepTMin = 0.0f;
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
for (int32 StepIndex = 0; StepIndex < 64; ++StepIndex)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
2021-01-20 11:34:55 -04:00
|
|
|
FEmbreeRay EmbreeRay;
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
EmbreeRay.ray.org_x = RayOrigin.X;
|
|
|
|
|
EmbreeRay.ray.org_y = RayOrigin.Y;
|
|
|
|
|
EmbreeRay.ray.org_z = RayOrigin.Z;
|
|
|
|
|
EmbreeRay.ray.dir_x = RayDirection.X;
|
|
|
|
|
EmbreeRay.ray.dir_y = RayDirection.Y;
|
|
|
|
|
EmbreeRay.ray.dir_z = RayDirection.Z;
|
|
|
|
|
EmbreeRay.ray.tnear = StepTMin;
|
|
|
|
|
EmbreeRay.ray.tfar = FLT_MAX;
|
2020-07-06 18:58:26 -04:00
|
|
|
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
FEmbreeIntersectionContext EmbreeContext;
|
|
|
|
|
rtcInitIntersectContext(&EmbreeContext);
|
|
|
|
|
rtcIntersect1(Context.FullMeshEmbreeScene, &EmbreeContext, &EmbreeRay);
|
|
|
|
|
|
|
|
|
|
if (EmbreeRay.hit.geomID != RTC_INVALID_GEOMETRY_ID && EmbreeRay.hit.primID != RTC_INVALID_GEOMETRY_ID)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
const FVector SurfacePoint = RayOrigin + RayDirection * EmbreeRay.ray.tfar;
|
2021-01-20 11:34:55 -04:00
|
|
|
const FVector SurfaceNormal = EmbreeRay.GetHitNormal();
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
const float NdotD = FVector::DotProduct(RayDirection, SurfaceNormal);
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
const bool bPassCullTest = EmbreeContext.IsHitTwoSided() || NdotD <= 0.0f;
|
2021-01-20 11:34:55 -04:00
|
|
|
const bool bPassProjectionAngleTest = FMath::Abs(NdotD) >= FMath::Cos(75.0f * (PI / 180.0f));
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
const float MinDistanceBetweenPoints = (MaxRayT / MeshSliceNum);
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
const bool bPassDistanceToAnotherSurfaceTest = EmbreeRay.ray.tnear <= 0.0f || (EmbreeRay.ray.tfar - EmbreeRay.ray.tnear > MinDistanceBetweenPoints);
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
if (bPassCullTest && bPassProjectionAngleTest && bPassDistanceToAnotherSurfaceTest)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
2021-01-20 11:34:55 -04:00
|
|
|
const bool bIsInsideMesh = IsSurfacePointInsideMesh(Context.FullMeshEmbreeScene, SurfacePoint, SurfaceNormal, RayDirectionsOverHemisphere);
|
|
|
|
|
if (!bIsInsideMesh)
|
|
|
|
|
{
|
|
|
|
|
HeightfieldLayers[HeighfieldX + HeighfieldY * HeighfieldSize.X].Add(
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
{ EmbreeRay.ray.tnear, EmbreeRay.ray.tfar }
|
2021-01-20 11:34:55 -04:00
|
|
|
);
|
|
|
|
|
}
|
2020-07-06 18:58:26 -04:00
|
|
|
}
|
2021-01-20 11:34:55 -04:00
|
|
|
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
StepTMin = EmbreeRay.ray.tfar + 0.01f;
|
2021-01-20 11:34:55 -04:00
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
break;
|
2020-07-06 18:58:26 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
const int32 MinCardHits = FMath::Floor(HeighfieldSize.X * HeighfieldSize.Y * MinSurfaceThreshold);
|
|
|
|
|
|
2021-03-18 09:38:36 -04:00
|
|
|
FPlacedCardArray& PlacedCardsLod0 = TaskOutputsPerOrientation[Orientation].PlacedCardsPerLod[0];
|
2021-01-20 11:34:55 -04:00
|
|
|
int32 PlacedCardsHits = 0;
|
|
|
|
|
|
|
|
|
|
// Place a default card
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
2021-01-20 11:34:55 -04:00
|
|
|
FPlacedCard PlacedCard;
|
|
|
|
|
PlacedCard.SliceMin = 0;
|
|
|
|
|
PlacedCard.SliceMax = MeshSliceNum;
|
2021-03-18 09:38:36 -04:00
|
|
|
PlacedCardsLod0.Add(PlacedCard);
|
2021-01-20 11:34:55 -04:00
|
|
|
|
2021-03-18 09:38:36 -04:00
|
|
|
PlacedCardsHits = UpdatePlacedCards(PlacedCardsLod0,
|
2021-01-20 11:34:55 -04:00
|
|
|
RayOriginFrame,
|
|
|
|
|
RayDirection,
|
|
|
|
|
HeighfieldStepX,
|
|
|
|
|
HeighfieldStepY,
|
|
|
|
|
HeighfieldSize,
|
|
|
|
|
MeshSliceNum,
|
|
|
|
|
MaxRayT,
|
|
|
|
|
MinCardHits,
|
|
|
|
|
VoxelExtent,
|
|
|
|
|
HeightfieldLayers);
|
|
|
|
|
|
|
|
|
|
if (PlacedCardsHits < MinCardHits)
|
|
|
|
|
{
|
2021-03-18 09:38:36 -04:00
|
|
|
PlacedCardsLod0.Reset();
|
2021-01-20 11:34:55 -04:00
|
|
|
}
|
2020-07-06 18:58:26 -04:00
|
|
|
}
|
|
|
|
|
|
2021-03-18 09:38:36 -04:00
|
|
|
TaskOutputsPerOrientation[Orientation].MinCardHitsPerLod[0] = MinCardHits;
|
|
|
|
|
|
|
|
|
|
FPlacedCardArray& PlacedCardsLod1 = TaskOutputsPerOrientation[Orientation].PlacedCardsPerLod[1];
|
|
|
|
|
PlacedCardsLod1 = PlacedCardsLod0;
|
2021-01-20 11:34:55 -04:00
|
|
|
|
|
|
|
|
// Try to place more cards by splitting existing ones
|
|
|
|
|
for (uint32 CardPlacementIteration = 0; CardPlacementIteration < 4; ++CardPlacementIteration)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
2021-01-20 11:34:55 -04:00
|
|
|
TArray<FPlacedCard, TInlineAllocator<16>> BestPlacedCards;
|
|
|
|
|
int32 BestPlacedCardHits = PlacedCardsHits;
|
|
|
|
|
|
2021-03-18 09:38:36 -04:00
|
|
|
for (int32 PlacedCardIndex = 0; PlacedCardIndex < PlacedCardsLod1.Num(); ++PlacedCardIndex)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
2021-03-18 09:38:36 -04:00
|
|
|
const FPlacedCard& PlacedCard = PlacedCardsLod1[PlacedCardIndex];
|
2021-01-20 11:34:55 -04:00
|
|
|
for (int32 SliceIndex = PlacedCard.SliceMin + 2; SliceIndex < PlacedCard.SliceMax; ++SliceIndex)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
2021-03-18 09:38:36 -04:00
|
|
|
TArray<FPlacedCard, TInlineAllocator<16>> TempPlacedCards(PlacedCardsLod1);
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
FPlacedCard NewPlacedCard;
|
|
|
|
|
NewPlacedCard.SliceMin = SliceIndex;
|
|
|
|
|
NewPlacedCard.SliceMax = PlacedCard.SliceMax;
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
TempPlacedCards[PlacedCardIndex].SliceMax = SliceIndex - 1;
|
|
|
|
|
TempPlacedCards.Insert(NewPlacedCard, PlacedCardIndex + 1);
|
|
|
|
|
|
|
|
|
|
const int32 NumHits = UpdatePlacedCards(
|
|
|
|
|
TempPlacedCards,
|
|
|
|
|
RayOriginFrame,
|
|
|
|
|
RayDirection,
|
|
|
|
|
HeighfieldStepX,
|
|
|
|
|
HeighfieldStepY,
|
|
|
|
|
HeighfieldSize,
|
|
|
|
|
MeshSliceNum,
|
|
|
|
|
MaxRayT,
|
|
|
|
|
MinCardHits,
|
|
|
|
|
VoxelExtent,
|
|
|
|
|
HeightfieldLayers);
|
|
|
|
|
|
|
|
|
|
if (NumHits > BestPlacedCardHits)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
2021-01-20 11:34:55 -04:00
|
|
|
BestPlacedCards = TempPlacedCards;
|
|
|
|
|
BestPlacedCardHits = NumHits;
|
2020-07-06 18:58:26 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
if (BestPlacedCardHits >= PlacedCardsHits + MinCardHits)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
2021-03-18 09:38:36 -04:00
|
|
|
PlacedCardsLod1 = BestPlacedCards;
|
2021-01-20 11:34:55 -04:00
|
|
|
PlacedCardsHits = BestPlacedCardHits;
|
2020-07-06 18:58:26 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-18 09:38:36 -04:00
|
|
|
TaskOutputsPerOrientation[Orientation].MinCardHitsPerLod[1] = MinCardHits;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
for (int32 Orientation = 0; Orientation < 6; ++Orientation)
|
|
|
|
|
{
|
|
|
|
|
const FTaskOutputs& TaskOutputs = TaskOutputsPerOrientation[Orientation];
|
|
|
|
|
SerializePlacedCards(TaskOutputs.PlacedCardsPerLod[0], /*LOD level*/ 0, Orientation, TaskOutputs.MinCardHitsPerLod[0], MeshCardsBounds, OutData);
|
|
|
|
|
SerializePlacedCards(TaskOutputs.PlacedCardsPerLod[1], /*LOD level*/ 1, Orientation, TaskOutputs.MinCardHitsPerLod[1], MeshCardsBounds, OutData);
|
2020-07-06 18:58:26 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif // #if USE_EMBREE
|
|
|
|
|
|
|
|
|
|
bool FMeshUtilities::GenerateCardRepresentationData(
|
|
|
|
|
FString MeshName,
|
2020-09-22 23:04:05 -04:00
|
|
|
const FSourceMeshDataForDerivedDataTask& SourceMeshData,
|
2020-07-06 18:58:26 -04:00
|
|
|
const FStaticMeshLODResources& LODModel,
|
|
|
|
|
class FQueuedThreadPool& ThreadPool,
|
2021-01-20 11:34:55 -04:00
|
|
|
const TArray<FSignedDistanceFieldBuildMaterialData>& MaterialBlendModes,
|
2020-07-06 18:58:26 -04:00
|
|
|
const FBoxSphereBounds& Bounds,
|
|
|
|
|
const FDistanceFieldVolumeData* DistanceFieldVolumeData,
|
2021-01-20 11:34:55 -04:00
|
|
|
bool bGenerateAsIfTwoSided,
|
2020-07-06 18:58:26 -04:00
|
|
|
FCardRepresentationData& OutData)
|
|
|
|
|
{
|
|
|
|
|
#if USE_EMBREE
|
2020-07-28 09:09:18 -04:00
|
|
|
TRACE_CPUPROFILER_EVENT_SCOPE(FMeshUtilities::GenerateCardRepresentationData);
|
2021-01-20 11:34:55 -04:00
|
|
|
const double StartTime = FPlatformTime::Seconds();
|
2020-07-28 09:09:18 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
FEmbreeScene EmbreeScene;
|
|
|
|
|
MeshRepresentation::SetupEmbreeScene(MeshName,
|
|
|
|
|
SourceMeshData,
|
|
|
|
|
LODModel,
|
|
|
|
|
MaterialBlendModes,
|
|
|
|
|
bGenerateAsIfTwoSided,
|
|
|
|
|
EmbreeScene);
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
if (!EmbreeScene.EmbreeScene)
|
2020-07-06 18:58:26 -04:00
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
FGenerateCardMeshContext Context(MeshName, EmbreeScene.EmbreeScene, EmbreeScene.EmbreeDevice, OutData);
|
2020-07-06 18:58:26 -04:00
|
|
|
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
// Note: must operate on the SDF bounds because SDF generation can expand the mesh's bounds
|
2021-03-23 22:43:50 -04:00
|
|
|
BuildMeshCards(DistanceFieldVolumeData ? DistanceFieldVolumeData->LocalSpaceMeshBounds : Bounds.GetBox(), Context, OutData);
|
2020-07-06 18:58:26 -04:00
|
|
|
|
2021-01-20 11:34:55 -04:00
|
|
|
MeshRepresentation::DeleteEmbreeScene(EmbreeScene);
|
|
|
|
|
|
Sparse, narrow band, streamed Mesh Signed Distance Fields
* SDFs are now generated, allocated from the atlas and uploaded in 8^3 bricks (7^3 unique data, half voxel padding).
* Tracing must load the brick index from the indirection table, and only bricks near the surface are stored
* 3 mips are now generated, with the lowest resolution always loaded and the other 2 streamed
* SDFs are now G8 narrow band. Lower resolution mips must be traversed when querying distance to nearest surface far away from the surface
* The Distance Field Brick Atlas is now stored for each FScene and dynamically resized based on needs with a GPU memcopy
* Brick atlas uses a 1d pooled allocator which has no fragmentation and greatly reduces packing waste over the 3d allocator
* Added new indirection for Distance Field Asset data, so that only a single entry needs to be updated when a mip is streamed in or out in scenes with millions of instances
* Compute shaders operating on distance field instances generate streaming requests, which are async read back to CPU, turned into IO requests, which are polled and when complete uploaded to atlases
* Any mesh instance inside the Global SDF extent (200m) requests mip1, and at 50m requests mip2
* Now using a batched compute scatter to upload to the distance field atlas instead of RHIUpdateTexture3d, to bypass alignment restrictions and per-upload overhead
* Distance Field streaming uses an async task to move Memcpy and IO request overhead off of the Rendering Thread
* Distance Field Visualization now computes a normal from the SDF gradient and does simple lighting to better visualize the scene representation
* Increased r.DistanceFields.MaxPerMeshResolution from 128 to 512, to better represent large objects
* Mesh SDF generation now uses an Embree point query to calculate closest unsigned distance, and then a much smaller set of rays to count backfaces for negative region determination, for a 11x speedup
* Upgraded mesh utilities to Embree 3.12.2 to get point queries
* Fixed wrong transform used for SDF normals in Lumen, causing non-uniformly scaled meshes to have incorrect Surface Cache interpolation
* Fixed Static Mesh materials not getting PostLoaded before SDF build, causing their blend modes to be wrong for the build, which corrupts the DDC. Also included those blend modes in the DDC key.
Original costs on 1080 GTX (full updates on everything and no screen traces)
10.60ms UpdateGlobalDistanceField
3.62ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
1.73ms VoxelizeCards Clipmaps=[0,1,2,3]
0.38ms TraceCards 1 dispatch 1 groups
0.51ms TraceCards 1 dispatch 1 groups
Sparse SDF costs
12.06ms UpdateGlobalDistanceField
4.35ms LumenReflectiveTest.DirectionalLight_1 Shadowmap 1
2.30ms VoxelizeCards Clipmaps=[0,1,2,3]
0.69ms TraceCards 1 dispatch 1 groups
0.77ms TraceCards 1 dispatch 1 groups
Tested: TopazEntry PC, Reverb PC and PS5, EngineTests, QAGame, Rift, Frosty P_Construct_WP, FortGPUTestbed
#rb Krzysztof.Narkowicz
#ROBOMERGE-OWNER: Daniel.Wright
#ROBOMERGE-AUTHOR: daniel.wright
#ROBOMERGE-SOURCE: CL 15784493 in //UE5/Release-5.0-EarlyAccess/...
#ROBOMERGE-BOT: STARSHIP (Release-5.0-EarlyAccess -> Main) (v783-15756269)
#ROBOMERGE-CONFLICT from-shelf
[CL 15790658 by Daniel Wright in ue5-main branch]
2021-03-23 22:40:05 -04:00
|
|
|
const float TimeElapsed = (float)(FPlatformTime::Seconds() - StartTime);
|
|
|
|
|
|
|
|
|
|
if (TimeElapsed > 1.0f)
|
|
|
|
|
{
|
|
|
|
|
UE_LOG(LogMeshUtilities, Log, TEXT("Finished mesh card build in %.1fs %s"),
|
|
|
|
|
TimeElapsed,
|
|
|
|
|
*MeshName);
|
|
|
|
|
}
|
2020-07-06 18:58:26 -04:00
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
#else
|
|
|
|
|
UE_LOG(LogMeshUtilities, Warning, TEXT("Platform did not set USE_EMBREE, GenerateCardRepresentationData failed."));
|
|
|
|
|
return false;
|
|
|
|
|
#endif
|
|
|
|
|
}
|