Files
UnrealEngineUWP/Engine/Shaders/Private/ThreadGroupPrefixSum.ush
jamie hayes e4bae00099 Add opt-in ability for instanced draw calls to preserve their draw order with GPU Scene after culling.
Requires two additional compute passes post-culling to compact the draw command instances.
This feature is not currently supported on mobile platforms; more work is required to make instance compaction work with mobile vertex buffers.
Updated ISM to use order preservation for meshes that render with a translucent material

#rb ola.olsson, dmitriy.dyomin
#jira none
#preflight 61e98458c92021e535994cd7

#ROBOMERGE-AUTHOR: jamie.hayes
#ROBOMERGE-SOURCE: CL 18676076 in //UE5/Release-5.0/... via CL 18676089 via CL 18676097
#ROBOMERGE-BOT: UE5 (Release-Engine-Test -> Main) (v902-18672795)

[CL 18679861 by jamie hayes in ue5-main branch]
2022-01-20 15:29:06 -05:00

67 lines
2.6 KiB
Plaintext

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
/**
* ThreadGroupPrefixSum(Value, GroupThreadIndex [, GroupSumOut])
*
* Calculates the prefix sum (and optionally group sum) of a given value across threads of a thread group.
*
* EXAMPLE USES:
* float GroupSum;
* float PrefixSum = ThreadGroupPrefixSum(ValueFloat, GroupThreadIndex, GroupSum);
*
* int PrefixSum = ThreadGroupPrefixSum(ValueInt, GroupThreadIndex);
*
* NOTES:
* - PrefixSum means the sum of the value for all threads whose group thread index is LESS than the current
* - Only scalar types are currently supported
* - All threads in the group must be active when calling this method; it cannot be used in a branch (or after an early return)
* that doesn't include the entire group.
*/
#ifndef NUM_THREADS_PER_GROUP
#error NUM_THREADS_PER_GROUP must be defined, and must be equal to the thread group size of the caller of ThreadGroupPrefixSum
#endif
// re-use the same groupshared memory, in case the caller utilizes multiple overloads
groupshared uint ThreadGroupPrefixSumWorkspace[2][NUM_THREADS_PER_GROUP];
#define DECLARE_THREAD_GROUP_PREFIX_SUM(ValType, CastToUint, CastFromUint) \
ValType ThreadGroupPrefixSum(ValType Value, uint GroupThreadIndex, inout ValType GroupSum) \
{ \
uint Curr = 0; \
ThreadGroupPrefixSumWorkspace[Curr][GroupThreadIndex] = CastToUint(Value); \
GroupMemoryBarrierWithGroupSync(); \
for (uint i = 1U; i <= (NUM_THREADS_PER_GROUP / 2U); i *= 2U) \
{ \
const uint Next = 1U - Curr; \
if (GroupThreadIndex < i) \
{ \
ThreadGroupPrefixSumWorkspace[Next][GroupThreadIndex] = \
ThreadGroupPrefixSumWorkspace[Curr][GroupThreadIndex]; \
} \
else \
{ \
ThreadGroupPrefixSumWorkspace[Next][GroupThreadIndex] = CastToUint( \
CastFromUint(ThreadGroupPrefixSumWorkspace[Curr][GroupThreadIndex]) + \
CastFromUint(ThreadGroupPrefixSumWorkspace[Curr][GroupThreadIndex - i]) \
); \
} \
Curr = Next; \
GroupMemoryBarrierWithGroupSync(); \
} \
GroupSum = CastFromUint(ThreadGroupPrefixSumWorkspace[Curr][NUM_THREADS_PER_GROUP - 1]); \
return (GroupThreadIndex == 0U) ? CastFromUint(0) : CastFromUint(ThreadGroupPrefixSumWorkspace[Curr][GroupThreadIndex - 1]); \
} \
ValType ThreadGroupPrefixSum(ValType Value, uint GroupThreadIndex) \
{ \
ValType GroupSum; \
return ThreadGroupPrefixSum(Value, GroupThreadIndex, GroupSum); \
}
DECLARE_THREAD_GROUP_PREFIX_SUM(float, asuint, asfloat)
DECLARE_THREAD_GROUP_PREFIX_SUM(int, uint, int)
DECLARE_THREAD_GROUP_PREFIX_SUM(uint, uint, uint)
#undef DECLARE_THREAD_GROUP_PREFIX_SUM