You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
Immediate mode patch rasterizer optimizations:
-Implemented vertex caching to evaluate ~1 vertex per triangle instead of 3 -Separate immediate mode tessellation table that is constrained to one vert + one tri per lane -Disable material range merging in raster binning, so UVDensities can be made scalar in immediate mode rasterizer -Use wide LDS loads instead of permutes for some properties to reduce DS pressure Patch rasterizer optimizations: -Reduced max tess factor from 16 to 14 to increase occupancy of patch rasterizer -Move UVDensities to per-patch work in patch rasterizer Packed FTessellatedPatch data to fit in fewer registers to reduce VGPR/DS pressure Added debug code to output SVG of tessellation pattern #jira UE-197833 #rb brian.karis [FYI] graham.wihlidal, jamie.hayes [CL 30896683 by rune stubbe in ue5-main branch]
This commit is contained in:
@@ -223,6 +223,11 @@ FInstanceDynamicData WaveReadLaneAt(FInstanceDynamicData In, uint SrcIndex)
|
||||
return Result;
|
||||
}
|
||||
|
||||
float ClipZFromLinearZ(FNaniteView NaniteView, float LinearZ)
|
||||
{
|
||||
return LinearZ * NaniteView.ViewToClip[2][2] + NaniteView.ViewToClip[3][2]; // TODO: Pack coefficients into single load?
|
||||
}
|
||||
|
||||
// Packs a (PageIndex, ClusterIndex) pair into a flat index based on max clusters per page.
|
||||
uint PackPoolClusterRef(uint PageIndex, uint ClusterIndex)
|
||||
{
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
|
||||
#if NANITE_TESSELLATION
|
||||
|
||||
#define NANITE_TESSELLATION_DICE_USE_LDS 1
|
||||
|
||||
void RasterizeDicedTri(
|
||||
FRasterTri Tri,
|
||||
FRaster Raster,
|
||||
@@ -44,12 +46,16 @@ void RasterizeDicedTri(
|
||||
}
|
||||
}
|
||||
|
||||
groupshared float4 GroupPointPackedClip[ THREADGROUP_SIZE ]; // TODO: Convert to PackedClip ?
|
||||
groupshared float4 GroupNormalPackedClip[ THREADGROUP_SIZE ];
|
||||
|
||||
struct FDiceTask
|
||||
{
|
||||
FRaster Raster;
|
||||
FMaterialShader Shader;
|
||||
uint PixelValue;
|
||||
uint2 VisualizeValues;
|
||||
float4 UVDensities;
|
||||
bool bReverseWinding;
|
||||
|
||||
FNaniteTransformedVert Vert;
|
||||
@@ -59,17 +65,18 @@ struct FDiceTask
|
||||
uint PatchData;
|
||||
|
||||
// Vertex cache
|
||||
//uint FirstVert;
|
||||
//uint& NumCached;
|
||||
float3 CachedPackedSubpixelPosition;
|
||||
|
||||
void Init( float3 TessFactors, uint3 VertIndexes, uint TriIndex )
|
||||
{
|
||||
TessellatedPatch.Init( TessFactors, VertIndexes );
|
||||
TessellatedPatch.Init( TessFactors, VertIndexes, true );
|
||||
|
||||
PatchData = VertIndexes.x << 0;
|
||||
PatchData |= VertIndexes.y << 8;
|
||||
PatchData |= VertIndexes.z << 16;
|
||||
PatchData |= TriIndex << 24;
|
||||
|
||||
CachedPackedSubpixelPosition = 0.0f;
|
||||
}
|
||||
|
||||
FDiceTask CreateChild( uint ParentLaneIndex )
|
||||
@@ -80,12 +87,12 @@ struct FDiceTask
|
||||
ChildTask.Shader = Shader;
|
||||
ChildTask.PixelValue = PixelValue;
|
||||
ChildTask.VisualizeValues = VisualizeValues;
|
||||
ChildTask.UVDensities = UVDensities;
|
||||
ChildTask.bReverseWinding = bReverseWinding;
|
||||
|
||||
ChildTask.TessellatedPatch = WaveReadLaneAt( TessellatedPatch, ParentLaneIndex );
|
||||
ChildTask.Encoded = WaveReadLaneAt( Encoded, ParentLaneIndex );
|
||||
ChildTask.PatchData = WaveReadLaneAt( PatchData, ParentLaneIndex );
|
||||
//ChildTask.FirstVert = WaveReadLaneAt( FirstVert, ParentLaneIndex );
|
||||
|
||||
uint3 PatchVertIndexes;
|
||||
PatchVertIndexes.x = ( ChildTask.PatchData >> 0 ) & 0xff;
|
||||
@@ -93,18 +100,42 @@ struct FDiceTask
|
||||
PatchVertIndexes.z = ( ChildTask.PatchData >> 16 ) & 0xff;
|
||||
|
||||
FNaniteTransformedTri TransformedTri = MakeTransformedNaniteTriangle( Vert, PatchVertIndexes );
|
||||
|
||||
#if NANITE_TESSELLATION_DICE_USE_LDS
|
||||
// Alleviate DS pressure by using wide LDS loads instead of single component permutes
|
||||
for (uint Corner = 0; Corner < 3; ++Corner)
|
||||
{
|
||||
const uint SourceIndex = PatchVertIndexes[ Corner ];
|
||||
TransformedTri.Verts[ Corner ].PointClip = GroupPointPackedClip[ SourceIndex ];
|
||||
TransformedTri.Verts[ Corner ].NormalClip = GroupNormalPackedClip[ SourceIndex ];
|
||||
}
|
||||
#endif
|
||||
|
||||
ChildTask.Shader.TransformedTri = TransformedTri; // TODO mutable. This is weird
|
||||
|
||||
return ChildTask;
|
||||
}
|
||||
|
||||
void CacheToLDS()
|
||||
{
|
||||
#if NANITE_TESSELLATION_DICE_USE_LDS
|
||||
const uint LaneIndex = WaveGetLaneIndex();
|
||||
|
||||
void RunChild( uint LocalItemIndex );
|
||||
GroupPointPackedClip[LaneIndex] = Vert.PointClip;
|
||||
GroupNormalPackedClip[LaneIndex] = Vert.NormalClip;
|
||||
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void RunChild( inout FDiceTask ParentTask, bool bActive, uint LocalItemIndex );
|
||||
};
|
||||
|
||||
//groupshared float3 VertexCache[ THREADGROUP_SIZE ];
|
||||
//#define VertCache(x) VertexCache[ QueueOffset + ( (x) & ( LaneCount - 1 ) ) ]
|
||||
|
||||
void FDiceTask::RunChild( uint LocalItemIndex )
|
||||
void FDiceTask::RunChild( inout FDiceTask ParentTask, bool bActive, uint LocalItemIndex )
|
||||
{
|
||||
uint PatchIndex = PatchData >> 24;
|
||||
|
||||
@@ -114,8 +145,63 @@ void FDiceTask::RunChild( uint LocalItemIndex )
|
||||
VertIndexes.yz = VertIndexes.zy;
|
||||
|
||||
float4 Verts[3];
|
||||
|
||||
#if 1
|
||||
const float3 TessFactors = TessellatedPatch.GetTessFactors();
|
||||
|
||||
FBarycentrics Barycentrics;
|
||||
Barycentrics.Value = TessellatedPatch.GetVert( LocalItemIndex );
|
||||
Barycentrics.Value_dx = 0; // float3( -1, 1, 0 ) / TessFactors.x;
|
||||
Barycentrics.Value_dy = 0; // float3( 0, -1, 1 ) / TessFactors.y;
|
||||
|
||||
#if 0
|
||||
const bool bOrtho = IsOrthoProjection( Shader.NaniteView.ViewToClip );
|
||||
|
||||
float3 CornerPackedSubpixel0; // sub-pixel xy, linear z
|
||||
|
||||
// TODO: Unify these paths by having EvaluateDomain operate directly in PackedClip space
|
||||
BRANCH
|
||||
if (bOrtho)
|
||||
{
|
||||
// Optimize out .w work and lane permutes
|
||||
const float3 PointPackedClip = Shader.EvaluateDomain( UVDensities, Barycentrics ).xyz;
|
||||
CornerPackedSubpixel0 = PointPackedClip.xyz;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Optimize out .z work and lane permutes
|
||||
const float3 PointPackedClip = Shader.EvaluateDomain( UVDensities, Barycentrics ).xyw;
|
||||
CornerPackedSubpixel0 = float3( PointPackedClip.xy / PointPackedClip.z, PointPackedClip.z );
|
||||
}
|
||||
|
||||
CornerPackedSubpixel0.xy = CornerPackedSubpixel0.xy * Raster.ViewportScale + Raster.ViewportBias;
|
||||
CornerPackedSubpixel0.xy = floor( CornerPackedSubpixel0.xy );
|
||||
|
||||
const int3 RelativeVertIndexes = WaveGetLaneIndex() - ( LocalItemIndex - VertIndexes ); // Relative to thread 0 in wave
|
||||
const uint3 ReadLaneIndex = uint3( RelativeVertIndexes ) & ( WaveGetLaneCount() - 1u );
|
||||
|
||||
float3 CornerPackedSubpixel1 = WaveReadLaneAt( ParentTask.CachedPackedSubpixelPosition, ReadLaneIndex.y );
|
||||
float3 CornerPackedSubpixel2 = WaveReadLaneAt( ParentTask.CachedPackedSubpixelPosition, ReadLaneIndex.z );
|
||||
|
||||
ParentTask.CachedPackedSubpixelPosition = CornerPackedSubpixel0;
|
||||
|
||||
CornerPackedSubpixel1 = select( RelativeVertIndexes.y >= 0, WaveReadLaneAt( ParentTask.CachedPackedSubpixelPosition, ReadLaneIndex.y ), CornerPackedSubpixel1 );
|
||||
CornerPackedSubpixel2 = select( RelativeVertIndexes.z >= 0, WaveReadLaneAt( ParentTask.CachedPackedSubpixelPosition, ReadLaneIndex.z ), CornerPackedSubpixel2 );
|
||||
|
||||
BRANCH
|
||||
if (bOrtho)
|
||||
{
|
||||
Verts[0] = float4( CornerPackedSubpixel0, 1.0f );
|
||||
Verts[1] = float4( CornerPackedSubpixel1, 1.0f );
|
||||
Verts[2] = float4( CornerPackedSubpixel2, 1.0f );
|
||||
}
|
||||
else
|
||||
{
|
||||
Verts[0] = float4( CornerPackedSubpixel0.xy, float2( ClipZFromLinearZ( Shader.NaniteView, CornerPackedSubpixel0.z ), 1.0f ) / CornerPackedSubpixel0.z );
|
||||
Verts[1] = float4( CornerPackedSubpixel1.xy, float2( ClipZFromLinearZ( Shader.NaniteView, CornerPackedSubpixel1.z ), 1.0f ) / CornerPackedSubpixel1.z );
|
||||
Verts[2] = float4( CornerPackedSubpixel2.xy, float2( ClipZFromLinearZ( Shader.NaniteView, CornerPackedSubpixel2.z ), 1.0f ) / CornerPackedSubpixel2.z );
|
||||
}
|
||||
|
||||
#elif 0
|
||||
// Grab what's there for this triangle before updating cache. Otherwise cache would need to be double size.
|
||||
bool3 VertRead = false;
|
||||
|
||||
@@ -175,7 +261,7 @@ void FDiceTask::RunChild( uint LocalItemIndex )
|
||||
FRasterTri Tri = SetupTriangle< NANITE_SUBPIXEL_SAMPLES, !NANITE_TWO_SIDED >( Raster.ScissorRect, Verts );
|
||||
|
||||
// Immediate dicing doesn't doesn't do near plane culling
|
||||
if( min3( Verts[0].w, Verts[1].w, Verts[2].w ) < 0 )
|
||||
if( !bActive || min3( Verts[0].w, Verts[1].w, Verts[2].w ) < 0 )
|
||||
Tri.bIsValid = false;
|
||||
|
||||
if( Tri.bIsValid )
|
||||
@@ -202,7 +288,7 @@ struct FClusterSplitTask
|
||||
void Init( float3 TessFactors, uint VisibleClusterIndex, uint TriIndex );
|
||||
|
||||
FClusterSplitTask CreateChild( uint ParentLaneIndex );
|
||||
void RunChild( uint LocalItemIndex );
|
||||
void RunChild( inout FClusterSplitTask ParentTask, bool bActive, uint LocalItemIndex );
|
||||
};
|
||||
|
||||
void FClusterSplitTask::Init( float3 TessFactors, uint VisibleClusterIndex, uint TriIndex )
|
||||
@@ -212,7 +298,7 @@ void FClusterSplitTask::Init( float3 TessFactors, uint VisibleClusterIndex, uint
|
||||
Encoded.z = BarycentricMax << 16;
|
||||
Encoded.w = 0;
|
||||
|
||||
TessellatedPatch.Init( TessFactors, Encoded.yzw );
|
||||
TessellatedPatch.Init( TessFactors, Encoded.yzw, false );
|
||||
}
|
||||
|
||||
FClusterSplitTask FClusterSplitTask::CreateChild( uint ParentLaneIndex )
|
||||
@@ -223,8 +309,10 @@ FClusterSplitTask FClusterSplitTask::CreateChild( uint ParentLaneIndex )
|
||||
return ChildTask;
|
||||
}
|
||||
|
||||
void FClusterSplitTask::RunChild( uint LocalItemIndex )
|
||||
void FClusterSplitTask::RunChild( inout FClusterSplitTask ParentTask, bool bActive, uint LocalItemIndex )
|
||||
{
|
||||
if( !bActive )
|
||||
return;
|
||||
#if 0
|
||||
Encoded.yzw = TessellatedPatch.GetTriangleEncoded( LocalItemIndex );
|
||||
#else
|
||||
|
||||
@@ -425,6 +425,7 @@ void RasterBinBuild(uint RelativeClusterIndex : SV_DispatchThreadID, uint GroupT
|
||||
FCluster Cluster = GetCluster(VisibleCluster.PageIndex, VisibleCluster.ClusterIndex);
|
||||
const bool bWPOEnabled = (VisibleCluster.Flags & NANITE_CULLING_FLAG_ENABLE_WPO) != 0;
|
||||
const bool bSecondaryBin = !bWPOEnabled; // use secondary bin where applicable when WPO disabled
|
||||
const bool bMergeRanges = false; // Merging ranges is incompatible with tessellation. Cluster rasterizer assumes UVDensities is uniform. // TODO: Reenable it for non-tessellation materials?
|
||||
|
||||
BRANCH
|
||||
if (IsMaterialFastPath(Cluster))
|
||||
@@ -464,30 +465,32 @@ void RasterBinBuild(uint RelativeClusterIndex : SV_DispatchThreadID, uint GroupT
|
||||
RasterBin2 = GetRemappedRasterBinFromIndex(Cluster.Material2Index, InstanceData.PrimitiveId, RegularMaterialRasterBinCount, RenderFlags, bSecondaryBin);
|
||||
}
|
||||
|
||||
if (RasterBin0 == RasterBin1 && RasterBin0 == RasterBin2)
|
||||
{
|
||||
RasterLen0 = Cluster.NumTris;
|
||||
BatchCount0 += BatchCount1 + BatchCount2;
|
||||
}
|
||||
else if (RasterBin0 == RasterBin1)
|
||||
{
|
||||
RasterLen0 = (Cluster.Material0Length + Cluster.Material1Length);
|
||||
RasterLen2 = Material2Length;
|
||||
BatchCount0 += BatchCount1;
|
||||
}
|
||||
else if (RasterBin1 == RasterBin2)
|
||||
{
|
||||
RasterLen0 = Cluster.Material0Length;
|
||||
RasterLen1 = Cluster.NumTris - Cluster.Material0Length;
|
||||
BatchCount1 += BatchCount2;
|
||||
}
|
||||
else
|
||||
{
|
||||
RasterLen0 = Cluster.Material0Length;
|
||||
RasterLen1 = Cluster.Material1Length;
|
||||
RasterLen2 = Material2Length;
|
||||
}
|
||||
RasterLen0 = Cluster.Material0Length;
|
||||
RasterLen1 = Cluster.Material1Length;
|
||||
RasterLen2 = Material2Length;
|
||||
|
||||
BRANCH
|
||||
if (bMergeRanges)
|
||||
{
|
||||
if (RasterBin0 == RasterBin1 && RasterBin0 == RasterBin2)
|
||||
{
|
||||
RasterLen0 = Cluster.NumTris;
|
||||
BatchCount0 += BatchCount1 + BatchCount2;
|
||||
}
|
||||
else if (RasterBin0 == RasterBin1)
|
||||
{
|
||||
RasterLen0 = (Cluster.Material0Length + Cluster.Material1Length);
|
||||
RasterLen2 = Material2Length;
|
||||
BatchCount0 += BatchCount1;
|
||||
}
|
||||
else if (RasterBin1 == RasterBin2)
|
||||
{
|
||||
RasterLen0 = Cluster.Material0Length;
|
||||
RasterLen1 = Cluster.NumTris - Cluster.Material0Length;
|
||||
BatchCount1 += BatchCount2;
|
||||
}
|
||||
}
|
||||
|
||||
// The 0th material range is always non-zero length
|
||||
{
|
||||
ExportRasterBin(RasterBin0, VisibleClusterIndex, 0u, RasterLen0, BatchCount0, BatchInfoOffset0, Cluster, true, bSoftware);
|
||||
@@ -535,8 +538,7 @@ void RasterBinBuild(uint RelativeClusterIndex : SV_DispatchThreadID, uint GroupT
|
||||
const uint BatchCount = BitStreamReader_Read_RO(ClusterPageData, BatchCountStreamState, 4, 4);
|
||||
|
||||
// Check if raster slot matches the current run, and that the triangle range is contiguous.
|
||||
const bool bMergeRange = (RasterBinN == CurrentRangeBin);
|
||||
if (bMergeRange)
|
||||
if (bMergeRanges && (RasterBinN == CurrentRangeBin))
|
||||
{
|
||||
// Update current range
|
||||
CurrentRangeEnd = TriStart + TriLength;
|
||||
|
||||
@@ -475,12 +475,6 @@ struct FMaterialShader
|
||||
|
||||
return PointClip;
|
||||
}
|
||||
|
||||
float4 EvaluateDomain( uint TriIndex, FBarycentrics Barycentrics )
|
||||
{
|
||||
const float4 UVDensities = GetMaterialUVDensities( Cluster, InstanceData.PrimitiveId, TriIndex );
|
||||
return EvaluateDomain( UVDensities, Barycentrics );
|
||||
}
|
||||
#endif
|
||||
|
||||
bool EvaluatePixel( FBarycentrics Barycentrics, float4 SvPosition, inout FVisBufferPixel Pixel )
|
||||
|
||||
@@ -234,7 +234,7 @@ FRaster CreateRaster( FNaniteView NaniteView, FInstanceSceneData InstanceData, F
|
||||
}
|
||||
|
||||
#if PATCHES
|
||||
#define VERTEX_CACHE_SIZE 153
|
||||
#define VERTEX_CACHE_SIZE 120 // (MaxTessFactor+1)*(MaxTessFactor+2)/2
|
||||
#else
|
||||
#define VERTEX_CACHE_SIZE 256
|
||||
#endif
|
||||
@@ -311,9 +311,10 @@ void ClusterRasterize( uint VisibleIndex, uint GroupThreadIndex )
|
||||
#endif
|
||||
|
||||
#if NANITE_TESSELLATION
|
||||
|
||||
#if USES_DISPLACEMENT
|
||||
MaterialShader.DisplacementCenter = RasterBinMeta[ActiveRasterBin].MaterialDisplacementCenter;
|
||||
MaterialShader.DisplacementMagnitude = RasterBinMeta[ActiveRasterBin].MaterialDisplacementMagnitude;
|
||||
#endif
|
||||
|
||||
uint TriIndex = TriRange.Start + GroupThreadIndex;
|
||||
bool bTriValid = GroupThreadIndex < TriRange.Num;
|
||||
@@ -345,20 +346,23 @@ void ClusterRasterize( uint VisibleIndex, uint GroupThreadIndex )
|
||||
|
||||
float3 TessFactors = GetTessFactors( NaniteView, TriPointView );
|
||||
|
||||
const uint ImmediateDiceLimit = 8;
|
||||
const uint ImmediateSplitLimit = 8;
|
||||
|
||||
bool bCanDice = max3( TessFactors.x, TessFactors.y, TessFactors.z ) <= ImmediateDiceLimit;
|
||||
bool bCanDice = max3( TessFactors.x, TessFactors.y, TessFactors.z ) <= NANITE_TESSELLATION_TABLE_IMMEDIATE_SIZE;
|
||||
|
||||
if( WaveActiveAnyTrue( bCanDice ) )
|
||||
{
|
||||
FDiceTask DiceTask;
|
||||
DiceTask.Raster = Raster;
|
||||
DiceTask.Shader = MaterialShader;
|
||||
DiceTask.PixelValue = ( VisibleIndex + 1 ) << 7;
|
||||
DiceTask.VisualizeValues = GetVisualizeValues();
|
||||
DiceTask.UVDensities = GetMaterialUVDensities( Cluster, InstanceData.PrimitiveId, TriRange.Start );
|
||||
DiceTask.bReverseWinding = bReverseWindingOrder;
|
||||
DiceTask.Vert = Vert;
|
||||
|
||||
DiceTask.CacheToLDS();
|
||||
|
||||
uint NumVerts = 0;
|
||||
uint NumTris = 0;
|
||||
if( bTriValid && bCanDice )
|
||||
@@ -368,9 +372,6 @@ void ClusterRasterize( uint VisibleIndex, uint GroupThreadIndex )
|
||||
NumTris = DiceTask.TessellatedPatch.GetNumTris();
|
||||
}
|
||||
|
||||
//DiceTask.FirstVert = WavePrefixSum( NumVerts );
|
||||
//DiceTask.NumCached = 0;
|
||||
|
||||
BRANCH
|
||||
if ((RenderFlags & NANITE_RENDER_FLAG_WRITE_STATS) != 0u)
|
||||
{
|
||||
@@ -603,6 +604,7 @@ void PatchRasterize( uint GroupID, uint GroupThreadIndex )
|
||||
FTessellatedPatch Patches_TessellatedPatch;
|
||||
FNaniteVertTransforms Patches_VertTransforms;
|
||||
FNaniteTransformedVert Patches_Verts;
|
||||
float4 Patches_UVDensities;
|
||||
|
||||
if (GroupThreadIndex < NumPatches * 3u)
|
||||
{
|
||||
@@ -657,10 +659,10 @@ void PatchRasterize( uint GroupID, uint GroupThreadIndex )
|
||||
const uint3 VertIndexes = DecodeTriangleIndices(Cluster, Patches_SplitPatch.TriIndex);
|
||||
Patches_Verts = FetchTransformedNaniteVertex(PrimitiveData, Patches_InstanceData, Patches_VertTransforms, Cluster, VertIndexes[PatchCornerIndex], bEvaluateWPO);
|
||||
|
||||
Patches_UVDensities = GetMaterialUVDensities(Cluster, Patches_InstanceData.PrimitiveId, Patches_SplitPatch.TriIndex);
|
||||
|
||||
#if NANITE_TESSELLATION_PATCH_REFS
|
||||
const uint Pattern = VisiblePatch.y;
|
||||
Patches_TessellatedPatch.Pattern = Pattern;
|
||||
Patches_TessellatedPatch.TableOffsets = TessellationTable_Offsets.Load4(4 * 2 * (Pattern & 0xfff));
|
||||
Patches_TessellatedPatch.Init(VisiblePatch.y, false);
|
||||
#else
|
||||
|
||||
const float3 OuterPatchCornersView = mul(float4(Patches_Verts.PointWorld, 1), NaniteView.TranslatedWorldToView).xyz;
|
||||
@@ -675,7 +677,7 @@ void PatchRasterize( uint GroupID, uint GroupThreadIndex )
|
||||
CornersView[2] = WaveReadLaneAt(InnerPatchCornersView, PatchStartLane + 2);
|
||||
|
||||
const float3 TessFactors = GetTessFactors(NaniteView, CornersView);
|
||||
Patches_TessellatedPatch.Init(TessFactors, Patches_EncodedPatch.yzw);
|
||||
Patches_TessellatedPatch.Init(TessFactors, Patches_EncodedPatch.yzw, false);
|
||||
Patches_SplitPatch.Decode(Patches_EncodedPatch);
|
||||
#endif
|
||||
}
|
||||
@@ -688,6 +690,7 @@ void PatchRasterize( uint GroupID, uint GroupThreadIndex )
|
||||
const bool bReverseWindingOrder = WaveReadLaneAt(Patches_bReverseWindingOrders, PatchStartLane);
|
||||
const FSplitPatch SplitPatch = WaveReadLaneAt(Patches_SplitPatch, PatchStartLane);
|
||||
const FTessellatedPatch TessellatedPatch = WaveReadLaneAt(Patches_TessellatedPatch, PatchStartLane);
|
||||
const float4 UVDensities = WaveReadLaneAt(Patches_UVDensities, PatchStartLane);
|
||||
|
||||
// The following values can be used in a shader, but will most likely be dead code eliminated
|
||||
const FInstanceSceneData InstanceData = WaveReadLaneAt(Patches_InstanceData, PatchStartLane);
|
||||
@@ -717,8 +720,10 @@ void PatchRasterize( uint GroupID, uint GroupThreadIndex )
|
||||
#endif
|
||||
MaterialShader.TransformedTri = MakeTransformedNaniteTriangle(Patches_Verts, PatchStartLane + uint3(0, 1, 2));
|
||||
|
||||
#if USES_DISPLACEMENT
|
||||
MaterialShader.DisplacementCenter = RasterBinMeta[ActiveRasterBin].MaterialDisplacementCenter;
|
||||
MaterialShader.DisplacementMagnitude = RasterBinMeta[ActiveRasterBin].MaterialDisplacementMagnitude;
|
||||
#endif
|
||||
|
||||
uint PixelValue = (SplitPatch.VisibleClusterIndex + 1) << 7;
|
||||
|
||||
@@ -745,7 +750,6 @@ void PatchRasterize( uint GroupID, uint GroupThreadIndex )
|
||||
}
|
||||
#endif
|
||||
|
||||
const float4 UVDensities = GetMaterialUVDensities( Cluster, InstanceData.PrimitiveId, SplitPatch.TriIndex );
|
||||
for( uint VertIndex = GroupThreadIndex; VertIndex < NumVerts; VertIndex += ThreadGroupSize )
|
||||
{
|
||||
FBarycentrics Barycentrics;
|
||||
@@ -877,7 +881,7 @@ VSOut CommonRasterizerVS(FNaniteView NaniteView, FPrimitiveSceneData PrimitiveDa
|
||||
MaterialShader.NaniteView = NaniteView;
|
||||
MaterialShader.Cluster = Cluster;
|
||||
|
||||
#if NANITE_TESSELLATION
|
||||
#if NANITE_TESSELLATION && USES_DISPLACEMENT
|
||||
MaterialShader.DisplacementCenter = RasterBinMeta[ActiveRasterBin].MaterialDisplacementCenter;
|
||||
MaterialShader.DisplacementMagnitude = RasterBinMeta[ActiveRasterBin].MaterialDisplacementMagnitude;
|
||||
#endif
|
||||
|
||||
@@ -63,7 +63,7 @@ struct FSplitTask
|
||||
uint Run();
|
||||
|
||||
FSplitTask CreateChild( uint ParentLaneIndex );
|
||||
void RunChild( uint LocalItemIndex );
|
||||
void RunChild( inout FSplitTask ParentTask, bool bActive, uint LocalItemIndex );
|
||||
};
|
||||
|
||||
void FSplitTask::Init( uint VisibleClusterIndex, uint TriIndex )
|
||||
@@ -222,7 +222,7 @@ uint FSplitTask::Run()
|
||||
}
|
||||
#endif
|
||||
|
||||
bool bNeedsSplitting = max3( TessFactors.x, TessFactors.y, TessFactors.z ) > TessellationTable_Size;
|
||||
bool bNeedsSplitting = max3( TessFactors.x, TessFactors.y, TessFactors.z ) > NANITE_TESSELLATION_TABLE_SIZE;
|
||||
|
||||
if( Cull.bWasOccluded )
|
||||
{
|
||||
@@ -250,11 +250,11 @@ uint FSplitTask::Run()
|
||||
if( WriteOffset < VisiblePatchesSize )
|
||||
{
|
||||
#if NANITE_TESSELLATION_PATCH_REFS || WRITE_STATS
|
||||
TessellatedPatch.Init( TessFactors, Encoded.yzw );
|
||||
TessellatedPatch.Init( TessFactors, Encoded.yzw, false );
|
||||
#endif
|
||||
|
||||
#if NANITE_TESSELLATION_PATCH_REFS || WRITE_STATS
|
||||
RWVisiblePatches.Store2( WriteOffset * 8, uint2( QueueOffset, TessellatedPatch.Pattern ) );
|
||||
RWVisiblePatches.Store2( WriteOffset * 8, uint2( QueueOffset, TessellatedPatch.GetPattern() ) );
|
||||
SplitWorkQueue.DataBuffer.Store4( QueueOffset * 16, Encoded );
|
||||
QueueOffset = ~0u;
|
||||
#else
|
||||
@@ -269,7 +269,7 @@ uint FSplitTask::Run()
|
||||
else
|
||||
{
|
||||
float3 SplitFactors = min( GetSplitFactors( TessFactors ), 8 );
|
||||
TessellatedPatch.Init( SplitFactors, Encoded.yzw );
|
||||
TessellatedPatch.Init( SplitFactors, Encoded.yzw, false );
|
||||
return TessellatedPatch.GetNumTris();
|
||||
}
|
||||
}
|
||||
@@ -285,8 +285,11 @@ FSplitTask FSplitTask::CreateChild( uint ParentLaneIndex )
|
||||
return ChildTask;
|
||||
}
|
||||
|
||||
void FSplitTask::RunChild( uint LocalItemIndex )
|
||||
void FSplitTask::RunChild( inout FSplitTask ParentTask, bool bActive, uint LocalItemIndex )
|
||||
{
|
||||
if ( !bActive )
|
||||
return;
|
||||
|
||||
FSplitPatch Patch;
|
||||
Patch.Decode( Encoded );
|
||||
|
||||
|
||||
@@ -43,9 +43,7 @@ float3 DecodeBarycentrics( uint Encoded )
|
||||
|
||||
|
||||
ByteAddressBuffer TessellationTable_Offsets;
|
||||
ByteAddressBuffer TessellationTable_Verts;
|
||||
ByteAddressBuffer TessellationTable_Indexes;
|
||||
static const uint TessellationTable_Size = 16;
|
||||
ByteAddressBuffer TessellationTable_VertsAndIndexes;
|
||||
|
||||
float InvDiceRate;
|
||||
uint MaxPatchesPerGroup;
|
||||
@@ -91,7 +89,8 @@ float3 GetTessFactors( FNaniteView NaniteView, float3 PointView[3] )
|
||||
float3 GetSplitFactors( float3 TessFactors )
|
||||
{
|
||||
#if 1
|
||||
return TessFactors / TessellationTable_Size;
|
||||
return TessFactors / NANITE_TESSELLATION_TABLE_SIZE;
|
||||
// TODO: Fix other modes to work with NANITE_TESSELLATION_TABLE_SIZE
|
||||
#elif 1
|
||||
const float SizeLog2 = log2( TessellationTable_Size );
|
||||
|
||||
@@ -123,18 +122,18 @@ float2 CalculateUVDerivativeForDomainPoint(
|
||||
|
||||
struct FTessellatedPatch
|
||||
{
|
||||
uint4 TableOffsets;
|
||||
uint Pattern;
|
||||
uint Swizzle;
|
||||
uint TableOffset;
|
||||
uint Pattern_NumVerts_NumTris;
|
||||
//uint Swizzle;
|
||||
|
||||
void Init( float3 TessFactors, inout uint3 VertData )
|
||||
void Init( float3 TessFactors, inout uint3 VertData, bool bImmediateTable )
|
||||
{
|
||||
TessFactors = ceil( TessFactors );
|
||||
TessFactors = clamp( TessFactors, 1, TessellationTable_Size );
|
||||
TessFactors = clamp( TessFactors, 1, NANITE_TESSELLATION_TABLE_SIZE );
|
||||
|
||||
float MaxTessFactor = max3( TessFactors.x, TessFactors.y, TessFactors.z );
|
||||
|
||||
Swizzle = 0b011010;
|
||||
//Swizzle = 0b011010;
|
||||
|
||||
// TessFactors in descending order to reduce size of table.
|
||||
// Rotate patch so TessFactors.x == MaxTessFactor
|
||||
@@ -142,13 +141,13 @@ struct FTessellatedPatch
|
||||
{
|
||||
VertData = VertData.yzx;
|
||||
TessFactors = TessFactors.yzx;
|
||||
Swizzle = 0b110100;
|
||||
//Swizzle = 0b110100;
|
||||
}
|
||||
else if( TessFactors.z == MaxTessFactor )
|
||||
{
|
||||
VertData = VertData.zxy;
|
||||
TessFactors = TessFactors.zxy;
|
||||
Swizzle = 0b101001;
|
||||
//Swizzle = 0b101001;
|
||||
}
|
||||
|
||||
// Sorting can flip winding which we need to undo later.
|
||||
@@ -158,34 +157,43 @@ struct FTessellatedPatch
|
||||
VertData.xy = VertData.yx;
|
||||
TessFactors.yz = TessFactors.zy;
|
||||
bFlipWinding = true;
|
||||
Swizzle ^= 0b000111;
|
||||
//Swizzle ^= 0b000111;
|
||||
}
|
||||
|
||||
Pattern = uint(
|
||||
uint Pattern = uint(
|
||||
TessFactors.x +
|
||||
TessFactors.y * TessellationTable_Size +
|
||||
TessFactors.z * TessellationTable_Size * TessellationTable_Size -
|
||||
(1 + TessellationTable_Size + TessellationTable_Size * TessellationTable_Size) );
|
||||
TessFactors.y * NANITE_TESSELLATION_TABLE_PO2_SIZE +
|
||||
TessFactors.z * NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE -
|
||||
(1 + NANITE_TESSELLATION_TABLE_PO2_SIZE + NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE) );
|
||||
|
||||
TableOffsets = TessellationTable_Offsets.Load4( 4*2 * Pattern );
|
||||
|
||||
Pattern |= bFlipWinding ? 0x1000 : 0;
|
||||
//Pattern |= Swizzle << 25;
|
||||
|
||||
Init( Pattern, bImmediateTable );
|
||||
}
|
||||
|
||||
uint GetNumVerts() { return TableOffsets[2] - TableOffsets[0]; }
|
||||
uint GetNumTris() { return TableOffsets[3] - TableOffsets[1]; }
|
||||
void Init( uint InPattern, bool bImmediateTable)
|
||||
{
|
||||
const uint Offset = bImmediateTable ? (NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE) : 0u;
|
||||
const uint2 Tmp = TessellationTable_Offsets.Load2( 4 * 2 * ( Offset + ( InPattern & 0xfff ) ) );
|
||||
TableOffset = Tmp.x;
|
||||
Pattern_NumVerts_NumTris = InPattern | Tmp.y;
|
||||
}
|
||||
|
||||
uint GetPattern() { return Pattern_NumVerts_NumTris & 0x1FFFu; }
|
||||
uint GetNumVerts() { return ( Pattern_NumVerts_NumTris >> 13 ) & 0x1FFu; }
|
||||
uint GetNumTris() { return Pattern_NumVerts_NumTris >> 22; }
|
||||
|
||||
float3 GetVert( uint VertIndex )
|
||||
{
|
||||
uint BarycentricsEncoded = TessellationTable_Verts.Load( 4 * ( TableOffsets.x + VertIndex ) );
|
||||
uint BarycentricsEncoded = TessellationTable_VertsAndIndexes.Load( 4 * ( TableOffset + VertIndex ) );
|
||||
|
||||
return DecodeBarycentrics( BarycentricsEncoded );
|
||||
}
|
||||
|
||||
uint3 GetIndexes( uint TriIndex )
|
||||
{
|
||||
uint PackedIndexes = TessellationTable_Indexes.Load( 4 * ( TableOffsets.y + TriIndex ) );
|
||||
uint PackedIndexes = TessellationTable_VertsAndIndexes.Load( 4 * ( TableOffset + GetNumVerts() + TriIndex ) );
|
||||
|
||||
uint3 VertIndexes;
|
||||
VertIndexes[0] = ( PackedIndexes >> 0 ) & 1023;
|
||||
@@ -194,7 +202,7 @@ struct FTessellatedPatch
|
||||
|
||||
// Sorting TessFactors might have flipped the patch winding.
|
||||
//bool bFlipWinding = Pattern > 0xfff;
|
||||
bool bFlipWinding = Pattern & 0x1000;
|
||||
bool bFlipWinding = Pattern_NumVerts_NumTris & 0x1000;
|
||||
if( bFlipWinding )
|
||||
VertIndexes.yz = VertIndexes.zy;
|
||||
|
||||
@@ -205,6 +213,7 @@ struct FTessellatedPatch
|
||||
{
|
||||
uint3 VertIndexes = GetIndexes( TriIndex );
|
||||
|
||||
uint Pattern = GetPattern();
|
||||
uint3 Shift, Mask;
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
@@ -216,7 +225,7 @@ struct FTessellatedPatch
|
||||
uint3 Encoded;
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
uint BarycentricsEncoded = TessellationTable_Verts.Load( 4 * ( TableOffsets.x + VertIndexes[i] ) );
|
||||
uint BarycentricsEncoded = TessellationTable_VertsAndIndexes.Load( 4 * ( TableOffset + VertIndexes[i] ) );
|
||||
|
||||
uint3 Barycentrics;
|
||||
Barycentrics.x = BarycentricsEncoded & 0xffff;
|
||||
@@ -231,14 +240,14 @@ struct FTessellatedPatch
|
||||
|
||||
uint3 GetTessFactors()
|
||||
{
|
||||
uint Packed = Pattern & 0xfff;
|
||||
Packed -= (1 + TessellationTable_Size + TessellationTable_Size * TessellationTable_Size);
|
||||
uint Packed = GetPattern() & 0xfff;
|
||||
Packed -= (1 + NANITE_TESSELLATION_TABLE_PO2_SIZE + NANITE_TESSELLATION_TABLE_PO2_SIZE * NANITE_TESSELLATION_TABLE_PO2_SIZE);
|
||||
|
||||
uint3 TessFactors;
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
TessFactors[i] = Packed % TessellationTable_Size;
|
||||
Packed /= TessellationTable_Size;
|
||||
TessFactors[i] = Packed % NANITE_TESSELLATION_TABLE_PO2_SIZE;
|
||||
Packed /= NANITE_TESSELLATION_TABLE_PO2_SIZE;
|
||||
}
|
||||
return TessFactors;
|
||||
}
|
||||
@@ -247,9 +256,8 @@ struct FTessellatedPatch
|
||||
FTessellatedPatch WaveReadLaneAt( FTessellatedPatch In, uint SrcIndex )
|
||||
{
|
||||
FTessellatedPatch Out;
|
||||
Out.TableOffsets= WaveReadLaneAt( In.TableOffsets, SrcIndex );
|
||||
Out.Pattern = WaveReadLaneAt( In.Pattern, SrcIndex );
|
||||
Out.Swizzle = WaveReadLaneAt( In.Swizzle, SrcIndex );
|
||||
Out.TableOffset = WaveReadLaneAt( In.TableOffset, SrcIndex );
|
||||
Out.Pattern_NumVerts_NumTris = WaveReadLaneAt( In.Pattern_NumVerts_NumTris, SrcIndex );
|
||||
return Out;
|
||||
}
|
||||
|
||||
|
||||
@@ -290,10 +290,8 @@ void DistributeWork( FTask Task, uint GroupIndex, uint NumWorkItems )
|
||||
|
||||
FTask ChildTask = Task.CreateChild( SourceIndex );
|
||||
|
||||
if( WorkHead + LaneIndex < WorkTail )
|
||||
{
|
||||
ChildTask.RunChild( LocalItemIndex );
|
||||
}
|
||||
bool bActive = ( WorkHead + LaneIndex < WorkTail );
|
||||
ChildTask.RunChild( Task, bActive, LocalItemIndex );
|
||||
|
||||
// Did 1 wave of work
|
||||
WorkHead += LaneCount;
|
||||
|
||||
@@ -101,6 +101,10 @@
|
||||
#define NANITE_STREAMING_PRIORITY_CATEGORY_MASK ((1 << NANITE_NUM_STREAMING_PRIORITY_CATEGORY_BITS) - 1)
|
||||
#define NANITE_MAX_PRIORITY_BEFORE_PARENTS 0xFFFFFFE0u
|
||||
|
||||
#define NANITE_TESSELLATION_TABLE_SIZE 14
|
||||
#define NANITE_TESSELLATION_TABLE_PO2_SIZE 16
|
||||
#define NANITE_TESSELLATION_TABLE_IMMEDIATE_SIZE 8
|
||||
|
||||
#define NANITE_VIEW_FLAG_HZBTEST 0x1
|
||||
#define NANITE_VIEW_FLAG_NEAR_CLIP 0x2
|
||||
#define NANITE_VIEW_FLAG_DISTANCE_CULL 0x4
|
||||
|
||||
Reference in New Issue
Block a user