Files
UnrealEngineUWP/Engine/Shaders/Private/PostProcessSubsurfaceTile.usf
eric mcdaniel 502749c59a Fix for async compute on platforms with memory boundary restrictions on async compute dispatch indirect arguments
*** This change will incur a full shader invalidation across all platforms ***

Issues:
  - Some platforms require async compute dispatch indirect arguments to not cross specific memory boundaries
    - This places restrictions on the valid sizes for a dispatch indirect argument set.  We were not conforming to these restrictions which could result in GPU crashes on these async passes

Fixes:
  - FRHIDispatchIndirectParameters is padded out to meet per-platform memory boundary restrictions
    - This is driven via new per-platform preprocessor define PLATFORM_DISPATCH_INDIRECT_ARGUMENT_BOUNDARY_SIZE
    - Some platforms require FRHIDispatchIndirectParameters to align with their internal structure hence we cannot universally size to meet all platform's requirements

  - Introduce new FRHIDispatchIndirectParametersNoPadding for uses when we explicitly do not want the padding and otherwise avoid the memory boundary restrictions

  - Revise and expand indirect argument validation code to catch further such issues in the future

  - Update shaders which write to dispatch indirect argument buffers to account for optional per-platform padding
    - New utility function WriteDispatchIndirectArgs introduced to faciliate this
    - platforms which require other than the default nonpadded dispatch indirect arguments must define DISPATCH_INDIRECT_UINT_COUNT and their own WriteDispatchIndirectArgs in their CommonPlatform.ush

  - move creation of DispatchIndirectGraphicsCommandSignature command signature to be per-platform
    - DispatchIndirectGraphicsCommandSignature and DispatchIndirectComputeCommandSignature stride changed to account for additional padding on impacted platforms

Testing:
  - ran Lyra with and without async compute Lumen on impacted platforms as well as Win64
  - ran FN replay on impacted platforms

#rb Krzysztof.Narkowicz, Ben.Woodhouse, Benjamin.Rouveyrol
#jira UE-167950
#preflight 6359563b2e6690262a11bc06

[CL 22862498 by eric mcdaniel in ue5-main branch]
2022-10-31 10:15:11 -04:00

105 lines
2.8 KiB
Plaintext

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
PostProcessSubsurfaceTile.usf: Screenspace subsurface scattering tile shaders.
=============================================================================*/
#pragma once
#include "Common.ush"
////////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifndef SHADER_TILE_VS
#define SHADER_TILE_VS 0
#endif
#ifndef SUBSURFACE_COMPUTE
#define SUBSURFACE_COMPUTE 0
#endif
#define LINE_TEXEL_SIZE 1
#if SUBSURFACE_COMPUTE
int2 ViewportSize;
uint Offset;
Buffer<uint> ConditionBuffer;
RWBuffer<uint> RWIndirectDispatchArgsBuffer;
[numthreads(1, 1, 1)]
void BuildIndirectDispatchArgsCS(uint DT_ID : SV_DispatchThreadID)
{
uint condition = ConditionBuffer[Offset];
const bool bShouldDispatch = (condition > 0);
if (bShouldDispatch)
{
int2 DestTextureSize = (ViewportSize + SUBSURFACE_TILE_SIZE * LINE_TEXEL_SIZE - 1)
/ (SUBSURFACE_TILE_SIZE * LINE_TEXEL_SIZE);
WriteDispatchIndirectArgs(RWIndirectDispatchArgsBuffer, DT_ID, DestTextureSize.x, DestTextureSize.y, 1);
}
else
{
WriteDispatchIndirectArgs(RWIndirectDispatchArgsBuffer, DT_ID, 0, 0, 0);
}
}
uint2 TextureExtent;
uint2 ViewportMin;
RWTexture2D<float4> TextureOutput;
[numthreads(SUBSURFACE_TILE_SIZE, SUBSURFACE_TILE_SIZE, 1)]
void ClearUAV(uint2 DT_ID : SV_DispatchThreadID)
{
uint2 BufferPos = ViewportMin + DT_ID * LINE_TEXEL_SIZE;
if (all(BufferPos < TextureExtent))
{
UNROLL
for (uint i = 0; i < LINE_TEXEL_SIZE; ++i)
{
UNROLL
for (uint j = 0; j < LINE_TEXEL_SIZE; ++j)
{
uint2 CurrentBufferPos = min(BufferPos + uint2(i, j), TextureExtent - uint2(1,1));
TextureOutput[CurrentBufferPos] = float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
}
}
#endif
#if SHADER_TILE_VS
int2 ViewMin;
int2 ViewMax;
float2 ExtentInverse;
uint TileType;
Buffer<uint> TileDataBuffer;
void MainVS(
in uint InVertexId : SV_VertexID,
in uint InInstanceId : SV_InstanceID,
out FScreenVertexOutput Out)
{
Out = (FScreenVertexOutput)0;
const uint2 TileCoord = uint2(TileDataBuffer[InInstanceId * 2 + 0], TileDataBuffer[InInstanceId * 2 + 1]);
uint2 TileVertex = TileCoord * SUBSURFACE_TILE_SIZE;
TileVertex.x += InVertexId == 1 || InVertexId == 2 || InVertexId == 4 ? SUBSURFACE_TILE_SIZE : 0;
TileVertex.y += InVertexId == 2 || InVertexId == 4 || InVertexId == 5 ? SUBSURFACE_TILE_SIZE : 0;
Out.UV = float2(min((int2)TileVertex + ViewMin, ViewMax)) * ExtentInverse;
Out.Position = float4(Out.UV * float2(2.0f, -2.0f) + float2(-1.0, 1.0f), 0.0f, 1.0f);
}
#endif //SHADER_TILE_VS
void SubsurfaceTileFallbackScreenPassVS(
in float4 InPosition : ATTRIBUTE0,
in float2 InTexCoord : ATTRIBUTE1,
out FScreenVertexOutput Out)
{
DrawRectangle(InPosition, InTexCoord, Out.Position, Out.UV);
}