You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
138 lines
3.2 KiB
Plaintext
138 lines
3.2 KiB
Plaintext
#include "../Common.ush"
|
|
#include "RayTracingCommon.ush"
|
|
|
|
#define MATERIAL_SORT_SHADER_VERSION 0xd74cbc88 // Change to force shader compilation of this shader
|
|
|
|
#ifndef DIM_WAVE_OPS
|
|
#define DIM_WAVE_OPS 0
|
|
#endif // DIM_WAVE_OPS
|
|
|
|
#if DIM_SORT_SIZE == 0
|
|
#define NUM_ELEMENTS 256
|
|
|
|
#elif DIM_SORT_SIZE == 1
|
|
#define NUM_ELEMENTS 512
|
|
|
|
#elif DIM_SORT_SIZE == 2
|
|
#define NUM_ELEMENTS 1024
|
|
|
|
#elif DIM_SORT_SIZE == 3
|
|
#define NUM_ELEMENTS 2048
|
|
|
|
#elif DIM_SORT_SIZE == 4
|
|
#define NUM_ELEMENTS 4096
|
|
|
|
|
|
#else
|
|
#error Unknown sorting size
|
|
#endif
|
|
|
|
#define NUM_BINS 128 // Max sort window (4096) / min threads per wave (32)
|
|
groupshared uint Bins[NUM_BINS];
|
|
|
|
#if DIM_WAVE_OPS
|
|
#define MIN_SUPPORTED_WAVE_SIZE 32
|
|
#define NUM_WAVE_SUMS ((NUM_BINS+MIN_SUPPORTED_WAVE_SIZE-1) / MIN_SUPPORTED_WAVE_SIZE)
|
|
groupshared uint WaveSums[NUM_WAVE_SUMS];
|
|
#define Offsets Bins // No need for a separate array of offsets as prefix sum is computed in-place
|
|
#else // DIM_WAVE_OPS
|
|
groupshared uint Offsets[NUM_BINS];
|
|
#endif // DIM_WAVE_OPS
|
|
|
|
int NumTotalEntries;
|
|
RWStructuredBuffer<FDeferredMaterialPayload> MaterialBuffer;
|
|
|
|
#if NUM_ELEMENTS < 1024
|
|
#define NUM_THREADS_X 256
|
|
#else
|
|
#define NUM_THREADS_X 512
|
|
#endif
|
|
|
|
// Simple local bin sort based on shared atomics
|
|
[numthreads(NUM_THREADS_X, 1, 1)]
|
|
void MaterialSortLocal(uint GroupThread : SV_GroupThreadID, uint GroupIndex : SV_GroupID)
|
|
{
|
|
const uint GroupBase = GroupIndex * NUM_ELEMENTS;
|
|
|
|
if (GroupThread < NUM_BINS)
|
|
{
|
|
Bins[GroupThread] = 0;
|
|
Offsets[GroupThread] = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
FDeferredMaterialPayload Data[NUM_ELEMENTS / NUM_THREADS_X];
|
|
uint Slot[NUM_ELEMENTS / NUM_THREADS_X];
|
|
|
|
// Compute histogram
|
|
|
|
for (int i = GroupThread; i < NUM_ELEMENTS ; i += NUM_THREADS_X)
|
|
{
|
|
uint Index = GroupBase + i;
|
|
if (Index < NumTotalEntries)
|
|
{
|
|
Data[i/ NUM_THREADS_X] = MaterialBuffer[Index];
|
|
uint Bin = Data[i / NUM_THREADS_X].SortKey % NUM_BINS;
|
|
|
|
InterlockedAdd(Bins[Bin], 1, Slot[i/ NUM_THREADS_X]);
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Compute prefix sum
|
|
|
|
#if DIM_WAVE_OPS
|
|
{
|
|
const uint WaveIndex = GroupThread / WaveGetLaneCount();
|
|
const uint LaneIndex = WaveGetLaneIndex();
|
|
uint Value = 0;
|
|
if (GroupThread < NUM_BINS)
|
|
{
|
|
Value = Bins[GroupThread];
|
|
uint ThisWaveSum = WaveActiveSum(Value);
|
|
if (LaneIndex == 0)
|
|
{
|
|
WaveSums[WaveIndex] = ThisWaveSum;
|
|
}
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (WaveIndex == 0 && LaneIndex < NUM_WAVE_SUMS)
|
|
{
|
|
WaveSums[LaneIndex] = WavePrefixSum(WaveSums[LaneIndex]);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (GroupThread < NUM_BINS)
|
|
{
|
|
Offsets[GroupThread] = WaveSums[WaveIndex] + WavePrefixSum(Value);
|
|
}
|
|
}
|
|
#else // DIM_WAVE_OPS
|
|
if (GroupThread < NUM_BINS)
|
|
{
|
|
// Naive implementation that trades instructions for fewer barriers
|
|
for (int i = 0; i < GroupThread; i++)
|
|
{
|
|
Offsets[GroupThread] += Bins[i];
|
|
}
|
|
}
|
|
#endif // DIM_WAVE_OPS
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Write data
|
|
|
|
for (int i = GroupThread; i < NUM_ELEMENTS; i += NUM_THREADS_X)
|
|
{
|
|
uint Index = GroupBase + i;
|
|
if (Index < NumTotalEntries)
|
|
{
|
|
uint Bin = Data[i / NUM_THREADS_X].SortKey % NUM_BINS;
|
|
uint DestIndex = GroupBase + Offsets[Bin] + Slot[i / NUM_THREADS_X];
|
|
|
|
MaterialBuffer[DestIndex] = Data[i / NUM_THREADS_X];
|
|
}
|
|
}
|
|
}
|