UnrealEngineUWP/Engine/Shaders/Public/Platform.ush

// Copyright Epic Games, Inc. All Rights Reserved.

/*=============================================================================
	Prefix.usf: USF file automatically included by shader preprocessor.
=============================================================================*/

#pragma once

#include "FP16Math.ush"

// Values of FEATURE_LEVEL.
#define FEATURE_LEVEL_ES2_REMOVED	1
#define FEATURE_LEVEL_ES3_1			2
#define FEATURE_LEVEL_SM3			3
#define FEATURE_LEVEL_SM4			4
#define FEATURE_LEVEL_SM5			5
#define FEATURE_LEVEL_SM6			6
#define FEATURE_LEVEL_MAX			7

// ---------------------------------------------------- Profile or compiler specific includes
// TODO: Have shader compiler including these platform specific USF files, that needs to work
// with ShaderCore.cpp's GetShaderIncludes().

#ifdef OVERRIDE_PLATFORMCOMMON_USH
	#include "/Platform/Public/PlatformCommon.ush"
#elif COMPILER_METAL
	// Helps with iteration when changing Metal shader code generation backend.
	#include "Platform/Metal/MetalCommon.ush"
#elif COMPILER_VULKAN
	// Helps with iteration when changing Vulkan shader code generation backend.
	#include "Platform/Vulkan/VulkanCommon.ush"
#elif COMPILER_GLSL || COMPILER_GLSL_ES3_1
	// Helps with iteration when changing Vulkan shader code generation backend.
	#include "Platform/GL/GLCommon.ush"
#elif SM6_PROFILE || SM5_PROFILE
	#include "Platform/D3D/D3DCommon.ush"
#endif

#include "/Engine/Public/BindlessResources.ush"

// ---------------------------------------------------- DDC invalidation
// to support the console command "r.InvalidateShaderCache"
#include "ShaderVersion.ush"


// ---------------------------------------------------- COMPILE_* and *_PROFILE defaults

#ifndef COMPILER_HLSLCC
#define COMPILER_HLSLCC 0
#endif

#ifndef COMPILER_DXC
#define COMPILER_DXC 0
#endif

#ifndef COMPILER_HLSL
#define COMPILER_HLSL 0
#endif

#ifndef COMPILER_PSSL
#define COMPILER_PSSL 0
#endif

#ifndef COMPILER_GLSL
#define COMPILER_GLSL 0
#endif

#ifndef COMPILER_GLSL_ES3_1
#define COMPILER_GLSL_ES3_1 0
#endif

#ifndef COMPILER_METAL
#define COMPILER_METAL 0
#endif

#ifndef COMPILER_SUPPORTS_ATTRIBUTES
#define COMPILER_SUPPORTS_ATTRIBUTES 0
#endif

#ifndef COMPILER_SUPPORTS_QUAD_PASS
#define COMPILER_SUPPORTS_QUAD_PASS 0
#endif

#ifndef COMPILER_SUPPORTS_DUAL_SOURCE_BLENDING_SLOT_DECORATION
#define COMPILER_SUPPORTS_DUAL_SOURCE_BLENDING_SLOT_DECORATION	0
#endif

#ifndef COMPILER_SUPPORTS_PRIMITIVE_SHADERS
#define COMPILER_SUPPORTS_PRIMITIVE_SHADERS 0
#endif

#ifndef COMPILER_SUPPORTS_BARYCENTRIC_INTRINSICS
#define COMPILER_SUPPORTS_BARYCENTRIC_INTRINSICS 0
#endif

#ifndef PLATFORM_SUPPORTS_SRV_UB
#define PLATFORM_SUPPORTS_SRV_UB 0
#endif

#ifndef PLATFORM_SUPPORTS_ROV
#define PLATFORM_SUPPORTS_ROV 0
#define RasterizerOrderedTexture2D RWTexture2D
#endif

#ifndef SM6_PROFILE
#define SM6_PROFILE 0
#endif

#ifndef SM5_PROFILE
#define SM5_PROFILE 0
#endif

#ifndef OPENGL_PROFILE
#define OPENGL_PROFILE 0
#endif

#ifndef ES3_1_PROFILE
#define ES3_1_PROFILE 0
#endif

#ifndef METAL_PROFILE
#define METAL_PROFILE 0
#endif

#ifndef METAL_MRT_PROFILE
#define METAL_MRT_PROFILE 0
#endif

#ifndef METAL_SM5_PROFILE
#define METAL_SM5_PROFILE 0
#endif

#ifndef COMPILER_VULKAN
#define	COMPILER_VULKAN 0
#endif

#ifndef VULKAN_PROFILE
#define	VULKAN_PROFILE 0
#endif

#ifndef VULKAN_PROFILE_SM5
#define	VULKAN_PROFILE_SM5 0
#endif

#ifndef IOS
#define IOS 0
#endif

#ifndef MAC
#define MAC 0
#endif

#ifndef VECTORVM_PROFILE
#define VECTORVM_PROFILE 0
#endif

#ifndef IR_LANGUAGE_DXBC
#define IR_LANGUAGE_DXBC 0
#endif

// 'static' asserts
#if COMPILER_GLSL || COMPILER_GLSL_ES3_1 || COMPILER_VULKAN || COMPILER_METAL
#if !COMPILER_HLSLCC
#error "Missing COMPILER_HLSLCC define!"
#endif
#endif


#if PLATFORM_SUPPORTS_SRV_UB
#define PLATFORM_SUPPORTS_SRV_UB_MACRO(...) __VA_ARGS__
#else
#define PLATFORM_SUPPORTS_SRV_UB_MACRO(...)
#endif

#ifndef PLATFORM_SUPPORTS_CALLABLE_SHADERS
#define PLATFORM_SUPPORTS_CALLABLE_SHADERS 0
#endif

// Whether the platforms support official SM6 wave intrinsics
// https://github.com/Microsoft/DirectXShaderCompiler/wiki/Wave-Intrinsics
#ifndef PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
#define PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS 0
#endif

#ifndef PLATFORM_SUPPORTS_REAL_TYPES
#define PLATFORM_SUPPORTS_REAL_TYPES 0
#elif FORCE_FLOATS
#error Collision between FORCE_FLOATS and PLATFORM_SUPPORTS_REAL_TYPES
#endif

#ifndef COMPILER_SUPPORTS_PACK_INTRINSICS
#define COMPILER_SUPPORTS_PACK_INTRINSICS 0
#endif

#ifndef COMPILER_SUPPORTS_WAVE_32_64_MODE
#define COMPILER_SUPPORTS_WAVE_32_64_MODE 0
#endif

//Platforms that don't run the editor shouldn't need editor features in the shaders.
#ifndef PLATFORM_SUPPORTS_EDITOR_SHADERS
#define PLATFORM_SUPPORTS_EDITOR_SHADERS 1
#endif

#ifndef COMPILER_SUPPORTS_HLSL2021
#define COMPILER_SUPPORTS_HLSL2021 0
#endif

#ifndef PLATFORM_SUPPORTS_UB_STRUCT
#define PLATFORM_SUPPORTS_UB_STRUCT 0
#endif

#if PLATFORM_SUPPORTS_UB_STRUCT
	#define UB_CB_NAME(X) UniformBufferConstants_##X
#else
	#define UB_CB_NAME(X) X
#endif

// ---------------------------------------------------- Alternative floating point types

#ifndef FORCE_FLOATS
#define FORCE_FLOATS 0
#endif

#if ((!(ES3_1_PROFILE || METAL_PROFILE)) && !PLATFORM_SUPPORTS_REAL_TYPES) || FORCE_FLOATS
	// Always use floats when using the ES3/METAL compiler, because platforms not optimized for lower precision,
	// And we don't want potential side effects on other platforms
	#define half float
	#define half1 float1
	#define half2 float2
	#define half3 float3
	#define half4 float4
	#define half3x3 float3x3
	#define half4x4 float4x4
	#define half4x3 float4x3
	#define fixed float
	#define fixed1 float1
	#define fixed2 float2
	#define fixed3 float3
	#define fixed4 float4
	#define fixed3x3 float3x3
	#define fixed4x4 float4x4
	#define fixed4x3 float4x3
#elif (VULKAN_PROFILE) || (COMPILER_GLSL_ES3_1 && !(COMPILER_HLSLCC && COMPILER_HLSLCC == 1))
	// For VULKAN and OPENGL ES31 use RelaxedPrecision for half floats
	#define half min16float
	#define half2 min16float2
	#define half3 min16float3
	#define half4 min16float4
	#define half3x3 min16float3x3
	#define half3x4 min16float3x4
	#define half4x4 min16float4x4
#endif

// ---------------------------------------------------- Profile config


#if SM6_PROFILE
	// SM6 = full dx12 features (high end UE5 rendering)
	#define FEATURE_LEVEL FEATURE_LEVEL_SM6

#elif SM5_PROFILE
	// SM5 = full dx11 features (high end UE4 rendering)
	#define FEATURE_LEVEL FEATURE_LEVEL_SM5

#elif SWITCH_PROFILE || SWITCH_PROFILE_FORWARD
	#undef ES3_1_PROFILE

	#if SWITCH_PROFILE
		#define FEATURE_LEVEL FEATURE_LEVEL_SM5
	#else
		#define FEATURE_LEVEL FEATURE_LEVEL_ES3_1
		// @todo switch: maybe all uses of this should check feature level not profile?
		#define ES3_1_PROFILE 1
	#endif

#elif VULKAN_PROFILE
	#define FEATURE_LEVEL FEATURE_LEVEL_ES3_1
	// @todo: replace usage of ES3_1_PROFILE with FEATURE_LEVEL where appropriate
	#undef ES3_1_PROFILE
	#define ES3_1_PROFILE 1

#elif VULKAN_PROFILE_SM5
	#define FEATURE_LEVEL FEATURE_LEVEL_SM5
	#define STENCIL_COMPONENT_SWIZZLE .x

#elif METAL_PROFILE
	#define FEATURE_LEVEL  FEATURE_LEVEL_ES3_1
	// @todo metal: remove this and make sure all uses handle METAL_PROFILE
	#undef ES3_1_PROFILE
	#define ES3_1_PROFILE 1
	#define FCOLOR_COMPONENT_SWIZZLE .rgba
	#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
	#define STENCIL_COMPONENT_SWIZZLE .x

#elif METAL_MRT_PROFILE
	#define FEATURE_LEVEL FEATURE_LEVEL_SM5
	#define FCOLOR_COMPONENT_SWIZZLE .rgba
	#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
	#define STENCIL_COMPONENT_SWIZZLE .x

#elif METAL_SM5_PROFILE
	#define FEATURE_LEVEL FEATURE_LEVEL_SM5
	#define FCOLOR_COMPONENT_SWIZZLE .rgba
	#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
	#define STENCIL_COMPONENT_SWIZZLE .x

#elif ES3_1_PROFILE
	#define FEATURE_LEVEL FEATURE_LEVEL_ES3_1

	#if COMPILER_GLSL_ES3_1
		#define FCOLOR_COMPONENT_SWIZZLE .bgra
		#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
	#else
		#define FCOLOR_COMPONENT_SWIZZLE .rgba
		#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
	#endif

	#if COMPILER_GLSL || COMPILER_GLSL_ES3_1
		// A8 textures when sampled have their component in R
		#define A8_SAMPLE_MASK .r
	#endif
#elif VECTORVM_PROFILE
	#define FEATURE_LEVEL FEATURE_LEVEL_SM5
#endif

#ifndef FEATURE_LEVEL
	#error FEATURE_LEVEL has not been defined for this platform. Add it to Platform.ush or in the Common.ush file for this platform

	#define FEATURE_LEVEL FEATURE_LEVEL_MAX

#endif

#if COMPILER_METAL
	// Metal does not allow writes to A8 textures so we are faking it by making them all R8.
	// WARNING: If this changes or the type in MetalRHI changes both must be updated!
	#define A8_SAMPLE_MASK .r
#endif


// ---------------------------------------------------- Swizzle defaults

// If we didn't request color component swizzling, just make it empty
#ifndef FCOLOR_COMPONENT_SWIZZLE
#define FCOLOR_COMPONENT_SWIZZLE .rgba
#endif

#ifndef FMANUALFETCH_COLOR_COMPONENT_SWIZZLE
#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
#endif

#ifndef STENCIL_COMPONENT_SWIZZLE
#define STENCIL_COMPONENT_SWIZZLE .g
#endif

#ifndef A8_SAMPLE_MASK
#define A8_SAMPLE_MASK .a
#endif

// ---------------------------------------------------- Platform dependent supports

// On mobile use interpolator for ClipDistance as it does not work/supported on all mobile platforms
#if (FEATURE_LEVEL == FEATURE_LEVEL_ES3_1)
#define SV_ClipDistance OUTCLIPDIST
#endif

// non-editor platforms generally never want development/editor features.
#ifndef PLATFORM_SUPPORTS_DEVELOPMENT_SHADERS
#define PLATFORM_SUPPORTS_DEVELOPMENT_SHADERS 1
#endif

#ifndef MOBILE_EMULATION
#define MOBILE_EMULATION 0
#endif

// Whether the platform supports independent texture and samplers
// When enabled, different texture lookups can share samplers to allow more artist samplers in the base pass
// Ideally this would just be enabled for all SM4 and above feature level platforms
// @todo metal mrt: No reason this can't work with Metal, once cross compiler is fixed
#ifndef SUPPORTS_INDEPENDENT_SAMPLERS
#define SUPPORTS_INDEPENDENT_SAMPLERS (SM6_PROFILE || SM5_PROFILE || METAL_MRT_PROFILE || METAL_SM5_PROFILE || VULKAN_PROFILE_SM5 || VULKAN_PROFILE)
#endif

// Whether the platform support pixel coverage on MSAA targets (SV_Coverage).
#define SUPPORTS_PIXEL_COVERAGE (FEATURE_LEVEL >= FEATURE_LEVEL_SM5 && !COMPILER_GLSL && !MOBILE_EMULATION)

// Must match C++ RHISupports4ComponentUAVReadWrite
// D3D11 does not support multi-component loads from a UAV: "error X3676: typed UAV loads are only allowed for single-component 32-bit element types"
#ifndef PLATFORM_SUPPORTS_4COMPONENT_UAV_READ_WRITE
#define PLATFORM_SUPPORTS_4COMPONENT_UAV_READ_WRITE (XBOXONE_PROFILE || COMPILER_METAL)
#endif


// ---------------------------------------------------- Compiler specific defaults and fallbacks

#if !defined(PLATFORM_BREAK)
	#define PLATFORM_BREAK()
#endif

#if !defined(PLATFORM_ASSERT)
	#define PLATFORM_ASSERT(condition, assert_id)
	#define PLATFORM_ASSERT1(condition, assert_id, a)
	#define PLATFORM_ASSERT2(condition, assert_id, a, b)
	#define PLATFORM_ASSERT3(condition, assert_id, a, b, c)
	#define PLATFORM_ASSERT4(condition, assert_id, a, b, c, d)
#endif

#if !defined(PLATFORM_SUPPORTS_SHADER_TIMESTAMP)
	#define PLATFORM_SUPPORTS_SHADER_TIMESTAMP 0
#endif

// Hlslcc platforms ignore the uniform keyword as it can't properly optimize flow
#if COMPILER_HLSLCC
#define uniform
#endif

#if PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS
#define COMPILER_SUPPORTS_WAVE_ONCE			1
#define COMPILER_SUPPORTS_WAVE_VOTE			1
#define COMPILER_SUPPORTS_WAVE_MINMAX		1
#define COMPILER_SUPPORTS_WAVE_BIT_ORAND	1
#endif

// If compiler lane management in a wave.
//	WaveGetLaneCount()
//	WaveGetLaneIndex()
//	if (WaveIsFirstLane()) { ... }
#ifndef COMPILER_SUPPORTS_WAVE_ONCE
#define COMPILER_SUPPORTS_WAVE_ONCE 0
#endif

// Whether the compiler exposes voting on all lanes:
//	WaveActiveAnyTrue(MyBool)
//	WaveActiveAnyTrue(MyBool)
//	WaveActiveAllEqual(MyBool)
#ifndef COMPILER_SUPPORTS_WAVE_VOTE
#define COMPILER_SUPPORTS_WAVE_VOTE 0
#endif

// Whether the compiler exposes min max instructions across all lane of the wave.
//	WaveActiveMin(MyFloat)
//	WaveActiveMin(MyInt)
//	WaveActiveMin(MyUint)
//	WaveActiveMax(MyFloat)
//	WaveActiveMax(MyInt)
//	WaveActiveMax(MyUint)
#ifndef COMPILER_SUPPORTS_WAVE_MINMAX
#define COMPILER_SUPPORTS_WAVE_MINMAX 0
#endif

// Whether the compiler exposes OR and AND bit operation all lanes:
//	WaveActiveBitAnd(MyMask)
//	WaveActiveBitOr(MyMask)
#ifndef COMPILER_SUPPORTS_WAVE_BIT_ORAND
#define COMPILER_SUPPORTS_WAVE_BIT_ORAND 0
#endif

// Whether the compiler exposes GCN's ds_swizzle_b32 instruction.
//	float WaveLaneSwizzleGCN(float x, const uint and_mask, const uint or_mask, const uint xor_mask)
#ifndef COMPILER_SUPPORTS_WAVE_SWIZZLE_GCN
#define COMPILER_SUPPORTS_WAVE_SWIZZLE_GCN 0
#endif

// Mirrors GRHISupportsPrimitiveShaders.
#ifndef COMPILER_SUPPORTS_PRIMITIVE_SHADERS
#define COMPILER_SUPPORTS_PRIMITIVE_SHADERS 0
#endif

// Mirrors GRHISupportsRectTopology.
#ifndef PLATFORM_SUPPORTS_RECT_LIST
#define PLATFORM_SUPPORTS_RECT_LIST 0
#endif

// Mirrors GRHISupportsAtomicUInt64.
#ifndef PLATFORM_SUPPORTS_ATOMIC_UINT64
#define PLATFORM_SUPPORTS_ATOMIC_UINT64 0
#endif

// Support for depth test running both before and after pixel shader
#ifndef COMPILER_SUPPORTS_DEPTHSTENCIL_EARLYTEST_LATEWRITE
#define COMPILER_SUPPORTS_DEPTHSTENCIL_EARLYTEST_LATEWRITE 0
#endif

#ifndef COMPILER_SUPPORTS_SHADER_YIELD
#define COMPILER_SUPPORTS_SHADER_YIELD 0
void ShaderYield()
{
	// Do nothing
}
#endif

#ifndef COMPILER_SUPPORTS_GATHER_LOD_RED
#define COMPILER_SUPPORTS_GATHER_LOD_RED 0
#endif

#ifndef COMPILER_SUPPORTS_GATHER_UINT
#define COMPILER_SUPPORTS_GATHER_UINT 0
#endif

#if ES3_1_PROFILE && !METAL_PROFILE
#define HALF_TYPE half
#define HALF2_TYPE half2
#define HALF3_TYPE half3
#define HALF4_TYPE half4
#else
#define HALF_TYPE float
#define HALF2_TYPE float2
#define HALF3_TYPE float3
#define HALF4_TYPE float4
#endif

// ---------------------------------------------------- Compiler attributes

#if SM6_PROFILE || SM5_PROFILE || COMPILER_SUPPORTS_ATTRIBUTES

/** Avoids flow control constructs. */
#define UNROLL [unroll]
#define UNROLL_N(N) [unroll(N)]

/** Gives preference to flow control constructs. */
#define LOOP [loop]

/** Performs branching by using control flow instructions like jmp and label. */
#define BRANCH [branch]

/** Performs branching by using the cnd instructions. */
#define FLATTEN [flatten]

/** Allows a compute shader loop termination condition to be based off of a UAV read. The loop must not contain synchronization intrinsics. */
#define ALLOW_UAV_CONDITION [allow_uav_condition]

#endif // SM6_PROFILE || SM5_PROFILE || COMPILER_SUPPORTS_ATTRIBUTES

#if SM6_PROFILE || SM5_PROFILE || METAL_MRT_PROFILE || METAL_SM5_PROFILE || ES3_1_PROFILE || VULKAN_PROFILE_SM5
#define EARLYDEPTHSTENCIL [earlydepthstencil]
#endif

#if COMPILER_SUPPORTS_DUAL_SOURCE_BLENDING_SLOT_DECORATION
#define DUAL_SOURCE_BLENDING_SLOT(SLOT) [[vk::location(0), vk::index(SLOT)]]
#endif

// ---------------------------------------------------- Compiler attribute fallbacks

#ifndef UNROLL
#define UNROLL
#endif

#ifndef UNROLL_N
#define UNROLL_N(N)
#endif

#ifndef LOOP
#define LOOP
#endif

#ifndef BRANCH
#define BRANCH
#endif

#ifndef FLATTEN
#define FLATTEN
#endif

#ifndef ALLOW_UAV_CONDITION
#define ALLOW_UAV_CONDITION
#endif

#ifndef INVARIANT
#define INVARIANT(X) (X)
#endif

#ifndef ENABLE_RE_Z
#define ENABLE_RE_Z
#endif

#ifndef COMPILER_SUPPORTS_NOINLINE
#define COMPILER_SUPPORTS_NOINLINE 0
#endif

// Informs compiler we want a subroutine created, which can be used to
// decrease register pressure in certain situations. Code is kept separate,
// and a set number of registers are used on each call. Should only be used
// with extensive profiling, as the default inlining behavior is usually best.
// DXIL:  https://github.com/microsoft/DirectXShaderCompiler/blob/master/tools/clang/test/HLSLFileCheck/hlsl/functions/attribute/noinline.hlsl
// SPIRV: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html (DontInline)
#if COMPILER_SUPPORTS_NOINLINE
#define NOINLINE [noinline]
#else
#define NOINLINE
#endif

#ifndef EARLYDEPTHSTENCIL
#define EARLYDEPTHSTENCIL
#endif

#ifndef DUAL_SOURCE_BLENDING_SLOT
#define DUAL_SOURCE_BLENDING_SLOT(SLOT)
#endif

#ifndef DEPTHSTENCIL_EARLYTEST_LATEWRITE
#define DEPTHSTENCIL_EARLYTEST_LATEWRITE
#endif

#ifndef STRONG_TYPE
#define STRONG_TYPE
#endif

#ifndef StrongTypedBuffer
#define StrongTypedBuffer Buffer
#endif

#ifndef RWCoherentBuffer
#define RWCoherentBuffer(TYPE) RWBuffer<TYPE>
#endif

#ifndef RWCoherentStructuredBuffer
#define RWCoherentStructuredBuffer(TYPE) RWStructuredBuffer<TYPE>
#endif

#ifndef RWCoherentByteAddressBuffer
#define RWCoherentByteAddressBuffer RWByteAddressBuffer
#endif

#ifndef ISOLATE
#define ISOLATE
#endif

#ifndef HOIST_DESCRIPTORS
#define HOIST_DESCRIPTORS
#endif

#ifndef CALL_SITE_DEBUGLOC
#define CALL_SITE_DEBUGLOC
#endif

#ifndef SCHEDULER_MIN_PRESSURE
#define SCHEDULER_MIN_PRESSURE
#endif

#ifndef MAX_OCCUPENCY
#define MAX_OCCUPENCY
#endif

// ---------------------------------------------------- Interpolator attribute fallbacks

#ifndef COMPRESSED_16_FLOAT
#define COMPRESSED_16_FLOAT
#endif

#ifndef COMPRESSED_16_UNORM
#define COMPRESSED_16_UNORM
#endif

#ifndef COMPRESSED_16_SNORM
#define COMPRESSED_16_SNORM
#endif

#ifndef COMPRESSED_16_UINT
#define COMPRESSED_16_UINT
#endif

#ifndef COMPRESSED_16_INT
#define COMPRESSED_16_INT
#endif

#ifndef COMPRESSED_8_UNORM
#define COMPRESSED_8_UNORM
#endif

#ifndef COMPRESSED_8_SNORM
#define COMPRESSED_8_SNORM
#endif

#ifndef COMPRESSED_8_UINT
#define COMPRESSED_8_UINT
#endif

#ifndef CUSTOM_INTERPOLATION
#define CUSTOM_INTERPOLATION nointerpolation
#endif

// ---------------------------------------------------- Global uses

#define USE_DEVELOPMENT_SHADERS (COMPILE_SHADERS_FOR_DEVELOPMENT && PLATFORM_SUPPORTS_DEVELOPMENT_SHADERS)


// ---------------------------------------------------- Standard sizes of the indirect parameter structs

// sizeof(FRHIDispatchIndirectParameters) / sizeof(uint)
#define DISPATCH_INDIRECT_UINT_COUNT 3

// sizeof(FRHIDrawIndirectParameters) / sizeof(uint)
#define DRAW_INDIRECT_UINT_COUNT 4

// sizeof(FRHIDrawIndexedIndirectParameters) / sizeof(uint)
#define DRAW_INDEXED_INDIRECT_UINT_COUNT 5


// ---------------------------------------------------- Compiler missing implementations

#if COMPILER_SWITCH

	float determinant(float3x3 M)
	{
		return
			M[0][0] * (M[1][1] * M[2][2] - M[1][2] * M[2][1]) -
			M[1][0] * (M[0][1] * M[2][2] - M[0][2] * M[2][1]) +
			M[2][0] * (M[0][1] * M[1][2] - M[0][2] * M[1][1]);
	}

#endif

#if COMPILER_HLSLCC
#define log10(x)  log((x)) / log(10.0)
#endif


#if !COMPILER_SUPPORTS_MINMAX3

float min3( float a, float b, float c )
{
	return min( a, min( b, c ) );
}

float max3( float a, float b, float c )
{
	return max( a, max( b, c ) );
}

float2 min3( float2 a, float2 b, float2 c )
{
	return float2(
		min3( a.x, b.x, c.x ),
		min3( a.y, b.y, c.y )
	);
}

float2 max3( float2 a, float2 b, float2 c )
{
	return float2(
		max3( a.x, b.x, c.x ),
		max3( a.y, b.y, c.y )
	);
}

float3 max3( float3 a, float3 b, float3 c )
{
	return float3(
		max3( a.x, b.x, c.x ),
		max3( a.y, b.y, c.y ),
		max3( a.z, b.z, c.z )
	);
}

float3 min3( float3 a, float3 b, float3 c )
{
	return float3(
		min3( a.x, b.x, c.x ),
		min3( a.y, b.y, c.y ),
		min3( a.z, b.z, c.z )
	);
}

float4 min3( float4 a, float4 b, float4 c )
{
	return float4(
		min3( a.x, b.x, c.x ),
		min3( a.y, b.y, c.y ),
		min3( a.z, b.z, c.z ),
		min3( a.w, b.w, c.w )
	);
}

float4 max3( float4 a, float4 b, float4 c )
{
	return float4(
		max3( a.x, b.x, c.x ),
		max3( a.y, b.y, c.y ),
		max3( a.z, b.z, c.z ),
		max3( a.w, b.w, c.w )
	);
}

#if PLATFORM_SUPPORTS_REAL_TYPES

half min3( half a, half b, half c )
{
	return min( a, min( b, c ) );
}

half max3( half a, half b, half c )
{
	return max( a, max( b, c ) );
}

half2 min3( half2 a, half2 b, half2 c )
{
	return half2(
		min3( a.x, b.x, c.x ),
		min3( a.y, b.y, c.y )
	);
}

half2 max3( half2 a, half2 b, half2 c )
{
	return half2(
		max3( a.x, b.x, c.x ),
		max3( a.y, b.y, c.y )
	);
}

half3 max3( half3 a, half3 b, half3 c )
{
	return half3(
		max3( a.x, b.x, c.x ),
		max3( a.y, b.y, c.y ),
		max3( a.z, b.z, c.z )
	);
}

half3 min3( half3 a, half3 b, half3 c )
{
	return half3(
		min3( a.x, b.x, c.x ),
		min3( a.y, b.y, c.y ),
		min3( a.z, b.z, c.z )
	);
}

half4 min3( half4 a, half4 b, half4 c )
{
	return half4(
		min3( a.x, b.x, c.x ),
		min3( a.y, b.y, c.y ),
		min3( a.z, b.z, c.z ),
		min3( a.w, b.w, c.w )
	);
}

half4 max3( half4 a, half4 b, half4 c )
{
	return half4(
		max3( a.x, b.x, c.x ),
		max3( a.y, b.y, c.y ),
		max3( a.z, b.z, c.z ),
		max3( a.w, b.w, c.w )
	);
}

#endif

#endif

// https://devblogs.microsoft.com/directx/announcing-hlsl-2021/
// HLSL 2021 supports Logical Operator Short Circuiting. To do vector bool operations, need to use and() or() select()
// Sadly the HLSL2021 standard does not overload select() very well...
#define select(cond,a,b) select_internal(cond,a,b)
#define DEFINE_SELECT(TYPE) \
	TYPE    select_internal(bool    c, TYPE    a, TYPE    b) { return TYPE   (c   ? a.x : b.x); } \
	\
	TYPE##2 select_internal(bool    c, TYPE    a, TYPE##2 b) { return TYPE##2(c   ? a   : b.x, c   ? a   : b.y); } \
	TYPE##2 select_internal(bool    c, TYPE##2 a, TYPE    b) { return TYPE##2(c   ? a.x : b  , c   ? a.y : b  ); } \
	TYPE##2 select_internal(bool    c, TYPE##2 a, TYPE##2 b) { return TYPE##2(c   ? a.x : b.x, c   ? a.y : b.y); } \
	TYPE##2 select_internal(bool##2 c, TYPE    a, TYPE    b) { return TYPE##2(c.x ? a   : b  , c.y ? a   : b  ); } \
	TYPE##2 select_internal(bool##2 c, TYPE    a, TYPE##2 b) { return TYPE##2(c.x ? a   : b.x, c.y ? a   : b.y); } \
	TYPE##2 select_internal(bool##2 c, TYPE##2 a, TYPE    b) { return TYPE##2(c.x ? a.x : b  , c.y ? a.y : b  ); } \
	TYPE##2 select_internal(bool##2 c, TYPE##2 a, TYPE##2 b) { return TYPE##2(c.x ? a.x : b.x, c.y ? a.y : b.y); } \
	\
	TYPE##3 select_internal(bool    c, TYPE    a, TYPE##3 b) { return TYPE##3(c   ? a   : b.x, c   ? a   : b.y, c   ? a   : b.z); } \
	TYPE##3 select_internal(bool    c, TYPE##3 a, TYPE    b) { return TYPE##3(c   ? a.x : b  , c   ? a.y : b  , c   ? a.z : b  ); } \
	TYPE##3 select_internal(bool    c, TYPE##3 a, TYPE##3 b) { return TYPE##3(c   ? a.x : b.x, c   ? a.y : b.y, c   ? a.z : b.z); } \
	TYPE##3 select_internal(bool##3 c, TYPE    a, TYPE    b) { return TYPE##3(c.x ? a   : b  , c.y ? a   : b  , c.z ? a   : b  ); } \
	TYPE##3 select_internal(bool##3 c, TYPE    a, TYPE##3 b) { return TYPE##3(c.x ? a   : b.x, c.y ? a   : b.y, c.z ? a   : b.z); } \
	TYPE##3 select_internal(bool##3 c, TYPE##3 a, TYPE    b) { return TYPE##3(c.x ? a.x : b  , c.y ? a.y : b  , c.z ? a.z : b  ); } \
	TYPE##3 select_internal(bool##3 c, TYPE##3 a, TYPE##3 b) { return TYPE##3(c.x ? a.x : b.x, c.y ? a.y : b.y, c.z ? a.z : b.z); } \
	\
	TYPE##4 select_internal(bool    c, TYPE    a, TYPE##4 b) { return TYPE##4(c   ? a   : b.x, c   ? a   : b.y, c   ? a   : b.z, c   ? a   : b.w); } \
	TYPE##4 select_internal(bool    c, TYPE##4 a, TYPE    b) { return TYPE##4(c   ? a.x : b  , c   ? a.y : b  , c   ? a.z : b  , c   ? a.w : b  ); } \
	TYPE##4 select_internal(bool    c, TYPE##4 a, TYPE##4 b) { return TYPE##4(c   ? a.x : b.x, c   ? a.y : b.y, c   ? a.z : b.z, c   ? a.w : b.w); } \
	TYPE##4 select_internal(bool##4 c, TYPE    a, TYPE    b) { return TYPE##4(c.x ? a   : b  , c.y ? a   : b  , c.z ? a   : b  , c.w ? a   : b  ); } \
	TYPE##4 select_internal(bool##4 c, TYPE    a, TYPE##4 b) { return TYPE##4(c.x ? a   : b.x, c.y ? a   : b.y, c.z ? a   : b.z, c.w ? a   : b.w); } \
	TYPE##4 select_internal(bool##4 c, TYPE##4 a, TYPE    b) { return TYPE##4(c.x ? a.x : b  , c.y ? a.y : b  , c.z ? a.z : b  , c.w ? a.w : b  ); } \
	TYPE##4 select_internal(bool##4 c, TYPE##4 a, TYPE##4 b) { return TYPE##4(c.x ? a.x : b.x, c.y ? a.y : b.y, c.z ? a.z : b.z, c.w ? a.w : b.w); } \

DEFINE_SELECT(bool)
DEFINE_SELECT(uint)
DEFINE_SELECT(int)
DEFINE_SELECT(float)
#if PLATFORM_SUPPORTS_REAL_TYPES
DEFINE_SELECT(half)
DEFINE_SELECT(uint16_t)
DEFINE_SELECT(int16_t)
#endif
#undef DEFINE_SELECT

// Works around bug in the spirv for the missing implementation of the and() and or() intrinsics.
bool  and_internal(bool  a, bool  b) { return bool(a && b); }
bool2 and_internal(bool2 a, bool2 b) { return bool2(a.x && b.x, a.y && b.y); }
bool3 and_internal(bool3 a, bool3 b) { return bool3(a.x && b.x, a.y && b.y, a.z && b.z); }
bool4 and_internal(bool4 a, bool4 b) { return bool4(a.x && b.x, a.y && b.y, a.z && b.z, a.w && b.w); }

bool  or_internal(bool  a, bool  b) { return bool(a || b); }
bool2 or_internal(bool2 a, bool2 b) { return bool2(a.x || b.x, a.y || b.y); }
bool3 or_internal(bool3 a, bool3 b) { return bool3(a.x || b.x, a.y || b.y, a.z || b.z); }
bool4 or_internal(bool4 a, bool4 b) { return bool4(a.x || b.x, a.y || b.y, a.z || b.z, a.w || b.w); }

#define and(a, b) and_internal(a, b)
#define or(a, b) or_internal(a, b)

#if PLATFORM_SUPPORTS_REAL_TYPES && !defined(COMPILER_SUPPORTS_PACK_B32_B16)

// Function that explicitly use RDNA's v_pack_b32_f16 on supported platform. Note that RDNA's documentation call this instruction v_pack_b32_f16
// but really is a v_pack_b32_b16.
half2 v_pack_b32_b16(half a, half b)
{
	return half2(a, b);
}

int16_t2 v_pack_b32_b16(int16_t a, int16_t b)
{
	return int16_t2(a, b);
}

uint16_t2 v_pack_b32_b16(uint16_t a, uint16_t b)
{
	return uint16_t2(a, b);
}

#endif

#if !defined(COMPILER_SUPPORTS_COND_MASK)

float  CondMask(bool Cond, float  Src0, float  Src1) { return Cond ? Src0 : Src1; }
float2 CondMask(bool Cond, float2 Src0, float2 Src1) { return Cond ? Src0 : Src1; }
float3 CondMask(bool Cond, float3 Src0, float3 Src1) { return Cond ? Src0 : Src1; }
float4 CondMask(bool Cond, float4 Src0, float4 Src1) { return Cond ? Src0 : Src1; }

int  CondMask(bool Cond, int  Src0, int  Src1) { return Cond ? Src0 : Src1; }
int2 CondMask(bool Cond, int2 Src0, int2 Src1) { return Cond ? Src0 : Src1; }
int3 CondMask(bool Cond, int3 Src0, int3 Src1) { return Cond ? Src0 : Src1; }
int4 CondMask(bool Cond, int4 Src0, int4 Src1) { return Cond ? Src0 : Src1; }

uint  CondMask(bool Cond, uint  Src0, uint  Src1) { return Cond ? Src0 : Src1; }
uint2 CondMask(bool Cond, uint2 Src0, uint2 Src1) { return Cond ? Src0 : Src1; }
uint3 CondMask(bool Cond, uint3 Src0, uint3 Src1) { return Cond ? Src0 : Src1; }
uint4 CondMask(bool Cond, uint4 Src0, uint4 Src1) { return Cond ? Src0 : Src1; }

#endif

#if !defined(COMPILER_SUPPORTS_UNPACKBYTEN)
float UnpackByte0(uint v) { return float(v & 0xff); }
float UnpackByte1(uint v) { return float((v >> 8) & 0xff); }
float UnpackByte2(uint v) { return float((v >> 16) & 0xff); }
float UnpackByte3(uint v) { return float(v >> 24); }
#endif // !COMPILER_SUPPORTS_UNPACKBYTEN

#if !defined(COMPILER_SUPPORTS_BITFIELD_INTRINSICS)
#define COMPILER_SUPPORTS_BITFIELD_INTRINSICS 0

// Software emulation using SM5/GCN semantics.
// Fast as long as shifts, sizes and offsets are compile-time constant.
// TODO: Should we consider weaker semantics to allow for a more efficient implementation in the dynamic case?

uint BitFieldInsertU32(uint Mask, uint Preserve, uint Enable)
{
	return (Preserve & Mask) | (Enable & ~Mask);
}

uint BitFieldExtractU32(uint Data, uint Size, uint Offset)
{
	Size &= 31u;
	Offset &= 31u;

	if (Size == 0u)
		return 0u;
	else if (Offset + Size < 32u)
		return (Data << (32u - Size - Offset)) >> (32u - Size);
	else
		return Data >> Offset;
}

int BitFieldExtractI32(int Data, uint Size, uint Offset)
{
	Size &= 31u;
	Offset &= 31u;

	if (Size == 0u)
		return 0;
	else if (Offset + Size < 32u)
		return (Data << (32u - Size - Offset)) >> (32u - Size);
	else
		return Data >> Offset;
}

uint BitFieldMaskU32(uint MaskWidth, uint MaskLocation)
{
	MaskWidth &= 31u;
	MaskLocation &= 31u;

	return ((1u << MaskWidth) - 1u) << MaskLocation;
}
#endif

#if !defined(COMPILER_SUPPORTS_BITALIGN)
#define COMPILER_SUPPORTS_BITALIGN 0
uint BitAlignU32(uint High, uint Low, uint Shift)
{
	Shift &= 31u;

	uint Result = Low >> Shift;
	Result |= Shift > 0u ? (High << (32u - Shift)) : 0u;
	return Result;
}
#endif

#ifndef COMPILER_SUPPORTS_BYTEALIGN
#define COMPILER_SUPPORTS_BYTEALIGN 0
uint ByteAlignU32(uint High, uint Low, uint Shift)
{
	return BitAlignU32(High, Low, Shift * 8);
}
#endif // #ifndef COMPILER_SUPPORTS_BYTEALIGN

#if COMPILER_HLSLCC
#define ddx_fine(x) ddx(x)
#define ddy_fine(y) ddy(y)
#endif

#ifndef COMPILER_SUPPORTS_ULONG_TYPES

#define UlongType uint2

UlongType PackUlongType(uint2 Value)
{
	return Value;
}

uint2 UnpackUlongType(UlongType Value)
{
	return Value;
}

#endif

// Prefix sum of Bits masked to the bits lower than Index.
uint MaskedBitCount( uint2 Bits, uint Index )
{
	bool bLow = Index < 32;

	uint Mask = 1u << ( Index - ( bLow ? 0 : 32 ) );
	Mask -= 1;

	uint Offset;
	Offset  = countbits( Bits.x & ( bLow ? Mask : ~0u ) );
	Offset += countbits( Bits.y & ( bLow ? 0 : Mask ) );
	return Offset;
}

// Lock a critical region of code within a pixel shader and guarantees no concurrent execution for the same pixel
#ifndef RASTER_ORDERED_VIEW_LOCK
#define RASTER_ORDERED_VIEW_LOCK()
#endif

// Unlock a critical region of code within a pixel shader.
#ifndef RASTER_ORDERED_VIEW_UNLOCK
#define RASTER_ORDERED_VIEW_UNLOCK()
#endif

#if PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS

#ifndef COMPILER_SUPPORTS_TO_SCALAR_MEMORY
#define COMPILER_SUPPORTS_TO_SCALAR_MEMORY 1
#define ToScalarMemory(x) WaveReadLaneFirst(x)
#endif

#ifndef COMPILER_SUPPORTS_MASKED_BIT_COUNT
#define COMPILER_SUPPORTS_MASKED_BIT_COUNT 1
uint MaskedBitCount( uint2 Bits )
{
	return MaskedBitCount( Bits, WaveGetLaneIndex() );
}
#endif

#if COMPILER_DXC
uint2 WaveBallot( bool Expr )
{
	return WaveActiveBallot( Expr ).xy;
}
#endif

#ifndef WaveReadLaneLast
uint WaveGetActiveLaneIndexLast()
{
	uint2 ActiveMask = WaveActiveBallot( true ).xy;
	return firstbithigh( ActiveMask.y ? ActiveMask.y : ActiveMask.x ) + ( ActiveMask.y ? 32 : 0 );
}

#define WaveReadLaneLast(x) WaveReadLaneAt( x, WaveGetActiveLaneIndexLast() )
#endif

#endif // PLATFORM_SUPPORTS_SM6_0_WAVE_OPERATIONS

// Give hint to compiler to move one value to scalar unit.
#if !defined(ToScalarMemory) && !defined(COMPILER_SUPPORTS_TO_SCALAR_MEMORY)
	#define ToScalarMemory(x) (x)
#endif

#if FEATURE_LEVEL < FEATURE_LEVEL_ES3_1 && !COMPILER_METAL
	// DX11 (feature levels >= 10) feature sets natively supports uints in shaders; we just use floats on other platforms.
	#define uint4	int4
#endif

#ifndef SNORM
#if COMPILER_HLSLCC
	#define SNORM
	#define UNORM
#else
	#define SNORM snorm
	#define UNORM unorm
#endif
#endif

#ifndef INFINITE_FLOAT
	#if COMPILER_HLSLCC
		#define INFINITE_FLOAT 3.402823e+38
	#else
		#define INFINITE_FLOAT 1.#INF
	#endif
#endif

#ifndef RWTextureCube
#define RWTextureCube RWTexture2DArray
#endif

#ifndef PLATFORM_NEEDS_PRECISE_SHADOW_DEPTH
#define PLATFORM_NEEDS_PRECISE_SHADOW_DEPTH 0
#endif

// Little tools to help with packing scalars arrays
#ifndef CALC_SCALAR_ARRAY_SIZE
#define CALC_SCALAR_ARRAY_SIZE(ElementCount) ((ElementCount+3)/4)
#endif
#ifndef DECLARE_SCALAR_ARRAY
#define DECLARE_SCALAR_ARRAY(ScalarType, ScalarName, ElementCount) ScalarType##4 ScalarName[CALC_SCALAR_ARRAY_SIZE(ElementCount)]
#endif
#ifndef GET_SCALAR_ARRAY_ELEMENT
#define GET_SCALAR_ARRAY_ELEMENT(PackedArray, ElementIndex) PackedArray[ElementIndex>>2u][ElementIndex&3u]
#endif